Skip to content

Commit

Permalink
Duplicate values (#3026)
Browse files Browse the repository at this point in the history
* implement transform_key feature

Signed-off-by: Kat Shen <[email protected]>

* fix unit tests

Signed-off-by: Kat Shen <[email protected]>

* fix unit tests

Signed-off-by: Kat Shen <[email protected]>

* remove bin files

Signed-off-by: Kat Shen <[email protected]>

* add static final variable for string comparison

Signed-off-by: Kat Shen <[email protected]>

* add whitespace description to readme, add configs

Signed-off-by: Kat Shen <[email protected]>

* writing whitespace impl

Signed-off-by: Kat Shen <[email protected]>

* add whitespace impl

Signed-off-by: Kat Shen <[email protected]>

* fix impl, writing tests

Signed-off-by: Kat Shen <[email protected]>

* write whitespace test

Signed-off-by: Kat Shen <[email protected]>

* fix formatting, whitespace() -> trimWhitespace()

Signed-off-by: Kat Shen <[email protected]>

* edit readme, add config

Signed-off-by: Kat Shen <[email protected]>

* update logic to valid values set

Signed-off-by: Kat Shen <[email protected]>

* correct return value

Signed-off-by: Kat Shen <[email protected]>

* update variables to static

Signed-off-by: Kat Shen <[email protected]>

* correct convention for private variables

Signed-off-by: Kat Shen <[email protected]>

* impl allow duplicate values, writing tests

Signed-off-by: Kat Shen <[email protected]>

* allow duplicate values impl + tests

Signed-off-by: Kat Shen <[email protected]>

* modify regex portion to final variables, remove some whitespace

Signed-off-by: Kat Shen <[email protected]>

* rerun checks

Signed-off-by: Kat Shen <[email protected]>

* rename methods/variables for more clarity, change default bool value to be false

Signed-off-by: Kat Shen <[email protected]>

* rerun checks

Signed-off-by: Kat Shen <[email protected]>

* change logic to reflect skip_duplicate_values

Signed-off-by: Kat Shen <[email protected]>

* modify tests according to changed logic

Signed-off-by: Kat Shen <[email protected]>

* remove include keys content (accidentally included it oops)

Signed-off-by: Kat Shen <[email protected]>

---------

Signed-off-by: Kat Shen <[email protected]>
Signed-off-by: Katherine Shen <[email protected]>
Co-authored-by: Kat Shen <[email protected]>
  • Loading branch information
shenkw1 and shenkw1 committed Jul 17, 2023
1 parent cc0d527 commit 2476813
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 0 deletions.
4 changes: 4 additions & 0 deletions data-prepper-plugins/key-value-processor/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ When run, the processor will parse the message into the following output:
* Default: `lenient`
* Example: `whitespace` is `"lenient"`. `{"key1 = value1"}` will parse into `{"key1 ": " value1"}`
* Example: `whitespace` is `"strict"`. `{"key1 = value1"}` will parse into `{"key1": "value1"}`
* `skip_duplicate_values` - A boolean option for removing duplicate key/value pairs. When set to true, only one unique key/value pair will be preserved.
* Default: `false`
* Example: `skip_duplicate_values` is `false`. `{"key1=value1&key1=value1"}` will parse into `{"key1": ["value1", "value1"]}`
* Example: `skip_duplicate_values` is `true`. `{"key1=value1&key1=value1"}` will parse into `{"key1": "value1"}`

## Developer Guide
This plugin is compatible with Java 14. See
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import java.util.Set;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import java.util.regex.Matcher;

@DataPrepperPlugin(name = "key_value", pluginType = Processor.class, pluginConfigurationType = KeyValueProcessorConfig.class)
public class KeyValueProcessor extends AbstractProcessor<Record<Event>, Record<Event>> {
Expand Down Expand Up @@ -109,6 +110,12 @@ public KeyValueProcessor(final PluginMetrics pluginMetrics, final KeyValueProces
if (!(validWhitespaceSet.contains(keyValueProcessorConfig.getWhitespace()))) {
throw new IllegalArgumentException(String.format("The whitespace value: %s is not a valid option", keyValueProcessorConfig.getWhitespace()));
}

final Pattern duplicateValueBoolCheck = Pattern.compile("true|false", Pattern.CASE_INSENSITIVE);
final Matcher duplicateValueBoolMatch = duplicateValueBoolCheck.matcher(String.valueOf(keyValueProcessorConfig.getSkipDuplicateValues()));
if (!duplicateValueBoolMatch.matches()) {
throw new IllegalArgumentException(String.format("The skip_duplicate_values value: %s is not a valid option", keyValueProcessorConfig.getSkipDuplicateValues()));
}
}

private String buildRegexFromCharacters(String s) {
Expand Down Expand Up @@ -220,8 +227,20 @@ private void addKeyValueToMap(final Map<String, Object> parsedMap, final String
}

if (parsedMap.get(key) instanceof List) {
if (keyValueProcessorConfig.getSkipDuplicateValues()) {
if (((List<Object>) parsedMap.get(key)).contains(value)) {
return;
}
}

((List<Object>) parsedMap.get(key)).add(value);
} else {
if (keyValueProcessorConfig.getSkipDuplicateValues()) {
if (parsedMap.containsValue(value)) {
return;
}
}

final LinkedList<Object> combinedList = new LinkedList<>();
combinedList.add(parsedMap.get(key));
combinedList.add(value);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ public class KeyValueProcessorConfig {
static final String DEFAULT_DELETE_VALUE_REGEX = "";
static final String DEFAULT_TRANSFORM_KEY = "";
static final String DEFAULT_WHITESPACE = "lenient";
static final boolean DEFAULT_SKIP_DUPLICATE_VALUES = false;

@NotEmpty
private String source = DEFAULT_SOURCE;
Expand Down Expand Up @@ -70,6 +71,10 @@ public class KeyValueProcessorConfig {
@NotNull
private String whitespace = DEFAULT_WHITESPACE;

@JsonProperty("skip_duplicate_values")
@NotNull
private boolean skipDuplicateValues = DEFAULT_SKIP_DUPLICATE_VALUES;

public String getSource() {
return source;
}
Expand Down Expand Up @@ -121,4 +126,8 @@ public String getTransformKey() {
public String getWhitespace() {
return whitespace;
}

public boolean getSkipDuplicateValues() {
return skipDuplicateValues;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ void setup() {
lenient().when(mockConfig.getDeleteValueRegex()).thenReturn(defaultConfig.getDeleteValueRegex());
lenient().when(mockConfig.getTransformKey()).thenReturn(defaultConfig.getTransformKey());
lenient().when(mockConfig.getWhitespace()).thenReturn(defaultConfig.getWhitespace());
lenient().when(mockConfig.getSkipDuplicateValues()).thenReturn(defaultConfig.getSkipDuplicateValues());

keyValueProcessor = new KeyValueProcessor(pluginMetrics, mockConfig);
}
Expand Down Expand Up @@ -421,6 +422,50 @@ void testStrictWhitespaceKvProcessor() {
assertThatKeyEquals(parsed_message, "key1", "value1");
}

@Test
void testFalseSkipDuplicateValuesKvProcessor() {
when(mockConfig.getSkipDuplicateValues()).thenReturn(false);

final Record<Event> record = getMessage("key1=value1&key1=value1");
final List<Record<Event>> editedRecords = (List<Record<Event>>) keyValueProcessor.doExecute(Collections.singletonList(record));
final LinkedHashMap<String, Object> parsed_message = getLinkedHashMap(editedRecords);

final ArrayList<Object> expectedValue = new ArrayList();
expectedValue.add("value1");
expectedValue.add("value1");

assertThat(parsed_message.size(), equalTo(1));
assertThatKeyEquals(parsed_message, "key1", expectedValue);
}

@Test
void testTrueSkipDuplicateValuesKvProcessor() {
when(mockConfig.getSkipDuplicateValues()).thenReturn(true);

final Record<Event> record = getMessage("key1=value1&key1=value1");
final List<Record<Event>> editedRecords = (List<Record<Event>>) keyValueProcessor.doExecute(Collections.singletonList(record));
final LinkedHashMap<String, Object> parsed_message = getLinkedHashMap(editedRecords);

assertThat(parsed_message.size(), equalTo(1));
assertThatKeyEquals(parsed_message, "key1", "value1");
}

@Test
void testTrueThreeInputsDuplicateValuesKvProcessor() {
when(mockConfig.getSkipDuplicateValues()).thenReturn(true);

final Record<Event> record = getMessage("key1=value1&key1=value2&key1=value1");
final List<Record<Event>> editedRecords = (List<Record<Event>>) keyValueProcessor.doExecute(Collections.singletonList(record));
final LinkedHashMap<String, Object> parsed_message = getLinkedHashMap(editedRecords);

final ArrayList<Object> expectedValue = new ArrayList();
expectedValue.add("value1");
expectedValue.add("value2");

assertThat(parsed_message.size(), equalTo(1));
assertThatKeyEquals(parsed_message, "key1", expectedValue);
}

@Test
void testShutdownIsReady() {
assertThat(keyValueProcessor.isReadyForShutdown(), is(true));
Expand Down

0 comments on commit 2476813

Please sign in to comment.