Skip to content

Commit

Permalink
Caffeine-based caching parser for the user_agent processor (#4619)
Browse files Browse the repository at this point in the history
Adds and uses a Caffeine-based caching parser for the user_agent processor. Resolves #4618

Signed-off-by: David Venable <[email protected]>
  • Loading branch information
dlvenable authored Jun 13, 2024
1 parent af7d1b5 commit bc51e67
Show file tree
Hide file tree
Showing 7 changed files with 233 additions and 5 deletions.
2 changes: 1 addition & 1 deletion data-prepper-plugins/opensearch/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ dependencies {
implementation 'software.amazon.awssdk:s3'
implementation 'software.amazon.awssdk:opensearchserverless'
implementation libs.commons.lang3
implementation 'com.github.ben-manes.caffeine:caffeine:3.1.8'
implementation libs.caffeine
implementation 'software.amazon.awssdk:apache-client'
implementation 'software.amazon.awssdk:netty-nio-client'
implementation 'co.elastic.clients:elasticsearch-java:7.17.0'
Expand Down
2 changes: 1 addition & 1 deletion data-prepper-plugins/otel-trace-raw-processor/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ dependencies {
implementation libs.armeria.grpc
implementation 'com.fasterxml.jackson.core:jackson-databind'
implementation 'com.fasterxml.jackson.dataformat:jackson-dataformat-yaml'
implementation 'com.github.ben-manes.caffeine:caffeine:3.1.8'
implementation libs.caffeine
testImplementation 'org.assertj:assertj-core:3.25.3'
testImplementation testLibs.mockito.inline
}
Expand Down
3 changes: 2 additions & 1 deletion data-prepper-plugins/user-agent-processor/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ dependencies {
implementation project(':data-prepper-api')
implementation project(':data-prepper-plugins:common')
implementation 'com.fasterxml.jackson.core:jackson-databind'
implementation "com.github.ua-parser:uap-java:1.6.1"
implementation 'com.github.ua-parser:uap-java:1.6.1'
implementation libs.caffeine
}

jacocoTestCoverageVerification {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.dataprepper.plugins.processor.useragent;

import com.github.benmanes.caffeine.cache.Cache;
import com.github.benmanes.caffeine.cache.Caffeine;
import ua_parser.Client;
import ua_parser.Device;
import ua_parser.OS;
import ua_parser.Parser;
import ua_parser.UserAgent;

import java.util.function.Function;

/**
* A superclass of {@link Parser} which uses Caffeine as a cache.
*/
class CaffeineCachingParser extends Parser {
private final Cache<String, Client> clientCache;
private final Cache<String, UserAgent> userAgentCache;
private final Cache<String, Device> deviceCache;
private final Cache<String, OS> osCache;

/**
* Constructs a new instance with a given cache size. Each parse method
* will have its own cache.
*
* @param cacheSize The size of the cache as a count of items.
*/
CaffeineCachingParser(final long cacheSize) {
userAgentCache = createCache(cacheSize);
clientCache = createCache(cacheSize);
deviceCache = createCache(cacheSize);
osCache = createCache(cacheSize);
}

@Override
public Client parse(final String agentString) {
return parseCaching(agentString, clientCache, super::parse);
}

@Override
public UserAgent parseUserAgent(final String agentString) {
return parseCaching(agentString, userAgentCache, super::parseUserAgent);
}

@Override
public Device parseDevice(final String agentString) {
return parseCaching(agentString, deviceCache, super::parseDevice);
}

@Override
public OS parseOS(final String agentString) {
return parseCaching(agentString, osCache, super::parseOS);
}

private <T> T parseCaching(
final String agentString,
final Cache<String, T> cache,
final Function<String, T> parseFunction) {
if (agentString == null) {
return null;
}
return cache.get(agentString, parseFunction);
}

private static <T> Cache<String, T> createCache(final long maximumSize) {
return Caffeine.newBuilder()
.maximumSize(maximumSize)
.build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import org.opensearch.dataprepper.model.record.Record;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ua_parser.CachingParser;
import ua_parser.Client;
import ua_parser.Parser;

Expand All @@ -36,7 +35,7 @@ public class UserAgentProcessor extends AbstractProcessor<Record<Event>, Record<
public UserAgentProcessor(final PluginMetrics pluginMetrics, final UserAgentProcessorConfig config) {
super(pluginMetrics);
this.config = config;
this.userAgentParser = new CachingParser(config.getCacheSize());
this.userAgentParser = new CaffeineCachingParser(config.getCacheSize());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.dataprepper.plugins.processor.useragent;

import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import ua_parser.Client;
import ua_parser.Device;
import ua_parser.OS;
import ua_parser.UserAgent;

import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.CoreMatchers.notNullValue;
import static org.hamcrest.CoreMatchers.nullValue;
import static org.hamcrest.CoreMatchers.sameInstance;
import static org.hamcrest.MatcherAssert.assertThat;

@SuppressWarnings("StringOperationCanBeSimplified")
class CaffeineCachingParserTest {
private static final String KNOWN_USER_AGENT_STRING = "Mozilla/5.0 (iPhone; CPU iPhone OS 13_5_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Mobile/15E148 Safari/604.1";
long cacheSize;

@BeforeEach
void setUp() {
cacheSize = 1000;
}

private CaffeineCachingParser createObjectUnderTest() {
return new CaffeineCachingParser(cacheSize);
}

@Test
void parse_returns_expected_results() {
final Client client = createObjectUnderTest().parse(KNOWN_USER_AGENT_STRING);

assertThat(client, notNullValue());
assertThat(client.userAgent, notNullValue());
assertThat(client.userAgent.family, equalTo("Mobile Safari"));
assertThat(client.userAgent.major, equalTo("13"));
assertThat(client.device.family, equalTo("iPhone"));
assertThat(client.os.family, equalTo("iOS"));
}

@Test
void parse_with_null_returns_null() {
assertThat(createObjectUnderTest().parse(null),
nullValue());
}

@Test
void parse_called_multiple_times_returns_same_instance() {
final CaffeineCachingParser objectUnderTest = createObjectUnderTest();

final String userAgentString = KNOWN_USER_AGENT_STRING;
final Client client = objectUnderTest.parse(userAgentString);

assertThat(client, notNullValue());

assertThat(objectUnderTest.parse(new String(userAgentString)), sameInstance(client));
assertThat(objectUnderTest.parse(new String(userAgentString)), sameInstance(client));
assertThat(objectUnderTest.parse(new String(userAgentString)), sameInstance(client));
}

@Test
void parseUserAgent_returns_expected_results() {
final UserAgent userAgent = createObjectUnderTest().parseUserAgent(KNOWN_USER_AGENT_STRING);

assertThat(userAgent, notNullValue());
assertThat(userAgent.family, equalTo("Mobile Safari"));
assertThat(userAgent.major, equalTo("13"));
}

@Test
void parseUserAgent_with_null_returns_null() {
assertThat(createObjectUnderTest().parseUserAgent(null),
nullValue());
}

@Test
void parseUserAgent_called_multiple_times_returns_same_instance() {
final CaffeineCachingParser objectUnderTest = createObjectUnderTest();

final String userAgentString = KNOWN_USER_AGENT_STRING;
final UserAgent userAgent = objectUnderTest.parseUserAgent(userAgentString);

assertThat(userAgent, notNullValue());

assertThat(objectUnderTest.parseUserAgent(new String(userAgentString)), sameInstance(userAgent));
assertThat(objectUnderTest.parseUserAgent(new String(userAgentString)), sameInstance(userAgent));
assertThat(objectUnderTest.parseUserAgent(new String(userAgentString)), sameInstance(userAgent));
}

@Test
void parseDevice_returns_expected_results() {
final Device device = createObjectUnderTest().parseDevice(KNOWN_USER_AGENT_STRING);

assertThat(device, notNullValue());
assertThat(device.family, equalTo("iPhone"));
}

@Test
void parseDevice_with_null_returns_null() {
assertThat(createObjectUnderTest().parseDevice(null),
nullValue());
}

@Test
void parseDevice_called_multiple_times_returns_same_instance() {
final CaffeineCachingParser objectUnderTest = createObjectUnderTest();

final String userAgentString = KNOWN_USER_AGENT_STRING;
final Device device = objectUnderTest.parseDevice(userAgentString);

assertThat(device, notNullValue());

assertThat(objectUnderTest.parseDevice(new String(userAgentString)), sameInstance(device));
assertThat(objectUnderTest.parseDevice(new String(userAgentString)), sameInstance(device));
assertThat(objectUnderTest.parseDevice(new String(userAgentString)), sameInstance(device));
}

@Test
void parseOS_returns_expected_results() {
final OS os = createObjectUnderTest().parseOS(KNOWN_USER_AGENT_STRING);

assertThat(os, notNullValue());
assertThat(os.family, equalTo("iOS"));
assertThat(os.major, equalTo("13"));
}

@Test
void parseOS_with_null_returns_null() {
assertThat(createObjectUnderTest().parseOS(null),
nullValue());
}

@Test
void parseOS_called_multiple_times_returns_same_instance() {
final CaffeineCachingParser objectUnderTest = createObjectUnderTest();

final String userAgentString = KNOWN_USER_AGENT_STRING;
final OS os = objectUnderTest.parseOS(userAgentString);

assertThat(os, notNullValue());

assertThat(objectUnderTest.parseOS(new String(userAgentString)), sameInstance(os));
assertThat(objectUnderTest.parseOS(new String(userAgentString)), sameInstance(os));
assertThat(objectUnderTest.parseOS(new String(userAgentString)), sameInstance(os));
}
}
1 change: 1 addition & 0 deletions settings.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ dependencyResolutionManagement {
library('hadoop-mapreduce', 'org.apache.hadoop', 'hadoop-mapreduce-client-core').versionRef('hadoop')
version('avro', '1.11.3')
library('avro-core', 'org.apache.avro', 'avro').versionRef('avro')
library('caffeine', 'com.github.ben-manes.caffeine', 'caffeine').version('3.1.8')
}
testLibs {
version('junit', '5.8.2')
Expand Down

0 comments on commit bc51e67

Please sign in to comment.