Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add LogPatternTool #413

Merged
merged 6 commits into from
Sep 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/main/java/org/opensearch/agent/ToolPlugin.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import org.opensearch.agent.tools.CreateAlertTool;
import org.opensearch.agent.tools.CreateAnomalyDetectorTool;
import org.opensearch.agent.tools.LogPatternTool;
import org.opensearch.agent.tools.NeuralSparseSearchTool;
import org.opensearch.agent.tools.PPLTool;
import org.opensearch.agent.tools.RAGTool;
Expand Down Expand Up @@ -71,6 +72,7 @@
SearchMonitorsTool.Factory.getInstance().init(client);
CreateAlertTool.Factory.getInstance().init(client);
CreateAnomalyDetectorTool.Factory.getInstance().init(client);
LogPatternTool.Factory.getInstance().init(client, xContentRegistry);

Check warning on line 75 in src/main/java/org/opensearch/agent/ToolPlugin.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/agent/ToolPlugin.java#L75

Added line #L75 was not covered by tests
return Collections.emptyList();
}

Expand All @@ -87,7 +89,8 @@
SearchAnomalyResultsTool.Factory.getInstance(),
SearchMonitorsTool.Factory.getInstance(),
CreateAlertTool.Factory.getInstance(),
CreateAnomalyDetectorTool.Factory.getInstance()
CreateAnomalyDetectorTool.Factory.getInstance(),
LogPatternTool.Factory.getInstance()

Check warning on line 93 in src/main/java/org/opensearch/agent/ToolPlugin.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/agent/ToolPlugin.java#L92-L93

Added lines #L92 - L93 were not covered by tests
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ private static Map<String, Object> processResponse(SearchHit hit) {
return docContent;
}

private <T> SearchRequest buildSearchRequest(Map<String, String> parameters) throws IOException {
protected <T> SearchRequest buildSearchRequest(Map<String, String> parameters) throws IOException {
String question = parameters.get(INPUT_FIELD);
if (StringUtils.isBlank(question)) {
throw new IllegalArgumentException("[" + INPUT_FIELD + "] is null or empty, can not process it.");
Expand All @@ -89,8 +89,7 @@ private <T> SearchRequest buildSearchRequest(Map<String, String> parameters) thr
searchSourceBuilder.parseXContent(queryParser);
searchSourceBuilder.fetchSource(sourceFields, null);
searchSourceBuilder.size(docSize);
SearchRequest searchRequest = new SearchRequest().source(searchSourceBuilder).indices(index);
return searchRequest;
return new SearchRequest().source(searchSourceBuilder).indices(parameters.getOrDefault(INDEX_FIELD, index));
}

@Override
Expand Down
294 changes: 294 additions & 0 deletions src/main/java/org/opensearch/agent/tools/LogPatternTool.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,294 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.agent.tools;

import static org.opensearch.ml.common.utils.StringUtils.gson;

import java.security.AccessController;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

import org.apache.commons.lang3.StringUtils;
import org.opensearch.action.search.SearchRequest;
import org.opensearch.action.search.SearchResponse;
import org.opensearch.client.Client;
import org.opensearch.core.action.ActionListener;
import org.opensearch.core.common.logging.LoggerMessageFormat;
import org.opensearch.core.xcontent.NamedXContentRegistry;
import org.opensearch.ml.common.spi.tools.ToolAnnotation;
import org.opensearch.search.SearchHit;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableSet;

import lombok.Builder;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.Setter;
import lombok.extern.log4j.Log4j2;

/**
* This tool supports generating log patterns on the input dsl and index. It's implemented by
* several steps:
* 1. Retrival [[${DOC_SIZE_FIELD}]] logs from index
qianheng-aws marked this conversation as resolved.
Show resolved Hide resolved
* 2. Extract patterns for each retrieved log
* 2.1 Find Pattern Field: If users provide parameter [[${PATTERN_FIELD}]], use it as the pattern
* field; Otherwise, find the string field with the longest length on the first log.
* 2.2 Extract Pattern: If users provide parameter [[${PATTERN}]], compile it as a pattern;
* Otherwise, use [[${DEFAULT_IGNORED_CHARS}]]. It will remove all chars matching the pattern.
* 3. Group logs by their extracted patterns.
* 4. Find top N patterns with the largest sample log size.
* 5. For each found top N patterns, return [[${SAMPLE_LOG_SIZE}]] sample logs.
*/
@Log4j2
@Getter
@Setter
@ToolAnnotation(LogPatternTool.TYPE)
public class LogPatternTool extends AbstractRetrieverTool {
public static final String TYPE = "LogPatternTool";

public static final String DEFAULT_DESCRIPTION = "Log Pattern Tool";
public static final String TOP_N_PATTERN = "top_n_pattern";
public static final String SAMPLE_LOG_SIZE = "sample_log_size";
public static final String PATTERN_FIELD = "pattern_field";
public static final String PATTERN = "pattern";
public static final int LOG_PATTERN_DEFAULT_DOC_SIZE = 1000;
public static final int DEFAULT_TOP_N_PATTERN = 3;
public static final int DEFAULT_SAMPLE_LOG_SIZE = 20;
private static final ImmutableSet<Character> DEFAULT_IGNORED_CHARS = ImmutableSet
.copyOf("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789".chars().mapToObj(c -> (char) c).toArray(Character[]::new));

private String name = TYPE;
private int topNPattern;
private int sampleLogSize;
@EqualsAndHashCode.Exclude
private Pattern pattern;

@Builder
public LogPatternTool(
Client client,
NamedXContentRegistry xContentRegistry,
int docSize,
int topNPattern,
int sampleLogSize,
String patternStr
) {
super(client, xContentRegistry, null, null, docSize);
checkPositive(topNPattern, TOP_N_PATTERN);
checkPositive(sampleLogSize, SAMPLE_LOG_SIZE);
this.topNPattern = topNPattern;
this.sampleLogSize = sampleLogSize;
if (patternStr != null)
this.pattern = Pattern.compile(patternStr);

Check warning on line 90 in src/main/java/org/opensearch/agent/tools/LogPatternTool.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/agent/tools/LogPatternTool.java#L90

Added line #L90 was not covered by tests
}

@Override
protected String getQueryBody(String queryText) {
return queryText;
}

@Override
public <T> void run(Map<String, String> parameters, ActionListener<T> listener) {
int topNPattern = parameters.containsKey(TOP_N_PATTERN) ? getPositiveInteger(parameters, TOP_N_PATTERN) : this.topNPattern;
int sampleLogSize = parameters.containsKey(SAMPLE_LOG_SIZE) ? getPositiveInteger(parameters, SAMPLE_LOG_SIZE) : this.sampleLogSize;
Pattern pattern = parameters.containsKey(PATTERN) ? Pattern.compile(parameters.get(PATTERN)) : this.pattern;

SearchRequest searchRequest;
try {
searchRequest = buildSearchRequest(parameters);
} catch (Exception e) {
log.error("Failed to build search request.", e);
listener.onFailure(e);
return;
}

Check warning on line 111 in src/main/java/org/opensearch/agent/tools/LogPatternTool.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/agent/tools/LogPatternTool.java#L106-L111

Added lines #L106 - L111 were not covered by tests

ActionListener<SearchResponse> actionListener = ActionListener.wrap(r -> {
SearchHit[] hits = r.getHits().getHits();

Check warning on line 114 in src/main/java/org/opensearch/agent/tools/LogPatternTool.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/agent/tools/LogPatternTool.java#L113-L114

Added lines #L113 - L114 were not covered by tests

if (hits != null && hits.length > 0) {
String patternField = parameters.containsKey(PATTERN_FIELD)
? parameters.get(PATTERN_FIELD)
: findLongestField(hits[0].getSourceAsMap());

Check warning on line 119 in src/main/java/org/opensearch/agent/tools/LogPatternTool.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/agent/tools/LogPatternTool.java#L118-L119

Added lines #L118 - L119 were not covered by tests
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we stop if longestField is fewer than maybe 30 chars or continue to search for 10 more records in case the first one has outlier data?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It just imitates the logic of Discover log pattern.

It sounds good to avoid outliner, but it could also make the logic more complex and inconsistent with Discover.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The above suggestion is not ideal, just want to trigger some discussion. It's ok to leave it as it is if we don't have better idea.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What if the longest field is missing in hits[0] ? Shall we iterate though all the hits?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that's possible. And also, in some cases, the longest field in the first log maybe not the most longest overall.

But I think firstly we just imitate what current log pattern feature do, and do enhancement later. Better to discuss with Observability team as well. You can check the current implementation here Code Ref

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add one more check to see whether parseField is one of the columns inside the index? Sometimes customer could type wrong parse field name, in the PPL, it will raise an error to show the parse field doesn't exist. We could do the same logic.

if (patternField == null) {
listener.onResponse((T) "Pattern field is not set and this index doesn't contain any string field");
qianheng-aws marked this conversation as resolved.
Show resolved Hide resolved
return;

Check warning on line 122 in src/main/java/org/opensearch/agent/tools/LogPatternTool.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/agent/tools/LogPatternTool.java#L121-L122

Added lines #L121 - L122 were not covered by tests
}
Map<String, List<Map<String, Object>>> patternGroups = new HashMap<>();

Check warning on line 124 in src/main/java/org/opensearch/agent/tools/LogPatternTool.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/agent/tools/LogPatternTool.java#L124

Added line #L124 was not covered by tests
for (SearchHit hit : hits) {
Map<String, Object> source = hit.getSourceAsMap();
String patternValue = extractPattern((String) source.getOrDefault(patternField, ""), pattern);
List<Map<String, Object>> group = patternGroups.computeIfAbsent(patternValue, k -> new ArrayList<>());
group.add(source);

Check warning on line 129 in src/main/java/org/opensearch/agent/tools/LogPatternTool.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/agent/tools/LogPatternTool.java#L126-L129

Added lines #L126 - L129 were not covered by tests
}
List<Map<String, Object>> sortedEntries = patternGroups
.entrySet()
.stream()
.sorted(Comparator.comparingInt(entry -> -entry.getValue().size()))
.limit(topNPattern)
.map(
entry -> Map
.of(

Check warning on line 138 in src/main/java/org/opensearch/agent/tools/LogPatternTool.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/agent/tools/LogPatternTool.java#L131-L138

Added lines #L131 - L138 were not covered by tests
"total count",
entry.getValue().size(),

Check warning on line 140 in src/main/java/org/opensearch/agent/tools/LogPatternTool.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/agent/tools/LogPatternTool.java#L140

Added line #L140 was not covered by tests
"pattern",
entry.getKey(),

Check warning on line 142 in src/main/java/org/opensearch/agent/tools/LogPatternTool.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/agent/tools/LogPatternTool.java#L142

Added line #L142 was not covered by tests
"sample logs",
entry.getValue().subList(0, Math.min(entry.getValue().size(), sampleLogSize))

Check warning on line 144 in src/main/java/org/opensearch/agent/tools/LogPatternTool.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/agent/tools/LogPatternTool.java#L144

Added line #L144 was not covered by tests
)
)
.toList();

Check warning on line 147 in src/main/java/org/opensearch/agent/tools/LogPatternTool.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/agent/tools/LogPatternTool.java#L147

Added line #L147 was not covered by tests

listener
.onResponse((T) AccessController.doPrivileged((PrivilegedExceptionAction<String>) () -> gson.toJson(sortedEntries)));
} else {
listener.onResponse((T) "Can not get any match from search result.");

Check warning on line 152 in src/main/java/org/opensearch/agent/tools/LogPatternTool.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/agent/tools/LogPatternTool.java#L149-L152

Added lines #L149 - L152 were not covered by tests
}
}, e -> {
log.error("Failed to search index.", e);
listener.onFailure(e);
});
client.search(searchRequest, actionListener);
}

Check warning on line 159 in src/main/java/org/opensearch/agent/tools/LogPatternTool.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/agent/tools/LogPatternTool.java#L154-L159

Added lines #L154 - L159 were not covered by tests

/**
* Extract a pattern from the value of a field by removing chars in the pattern. This function
* imitates the same logic of Observability log pattern feature here:
* <a href="https://github.com/opensearch-project/sql/blob/4303a2ab755d53903094dd94a5100572677a27a1/core/src/main/java/org/opensearch/sql/expression/parse/PatternsExpression.java#L53">parseValue</a>
* @param rawString string value of the field to generate a pattern
* @param pattern @Nullable the specified pattern to remove, use DEFAULT_IGNORED_CHARS if null
* @return the generated pattern value
*/
@VisibleForTesting
static String extractPattern(String rawString, Pattern pattern) {
if (pattern != null)
return pattern.matcher(rawString).replaceAll("");
char[] chars = rawString.toCharArray();
int pos = 0;
for (int i = 0; i < chars.length; i++) {
if (!DEFAULT_IGNORED_CHARS.contains(chars[i])) {
chars[pos++] = chars[i];
}
}
Comment on lines +173 to +179
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For better readability, please try regex: [a-zA-Z0-9]

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For this version, I think we just need to copy the code from sql plugin without doing too much change. So developer can better track the source and diff.

What you suggested could be an enhancement in a separate PR.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure

return new String(chars, 0, pos);
}

/**
* Find the longest field in the sample log source. This function imitates the same logic of
* Observability log pattern feature here:
* <a href="https://github.com/opensearch-project/dashboards-observability/blob/279463ad0c8e00780bf57f85e6d2bf30e8c7c93d/public/components/event_analytics/hooks/use_fetch_patterns.ts#L137C2-L164C6">setDefaultPatternsField</a>
* @param sampleLogSource sample log source map
* @return the longest field name
*/
@VisibleForTesting
static String findLongestField(Map<String, Object> sampleLogSource) {
String longestField = null;
int maxLength = 0;

for (Map.Entry<String, Object> entry : sampleLogSource.entrySet()) {
Object value = entry.getValue();
if (value instanceof String) {
String stringValue = (String) value;
int length = stringValue.length();
if (length > maxLength) {
maxLength = length;
longestField = entry.getKey();
}
}
}
return longestField;
}

@Override
public String getType() {
return TYPE;
}

@Override
public boolean validate(Map<String, String> parameters) {
// LogPatternTool needs to pass index and input as parameter in runtime.
return super.validate(parameters) && parameters.containsKey(INDEX_FIELD) && !StringUtils.isBlank(parameters.get(INDEX_FIELD));
}

private static int getPositiveInteger(Map<String, ?> params, String paramName) {
int value = getInteger(params, paramName);
checkPositive(value, paramName);
return value;

Check warning on line 223 in src/main/java/org/opensearch/agent/tools/LogPatternTool.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/agent/tools/LogPatternTool.java#L221-L223

Added lines #L221 - L223 were not covered by tests
}

private static int getInteger(Map<String, ?> params, String paramName) {
int value;
try {
value = Integer.parseInt((String) params.get(paramName));
} catch (NumberFormatException e) {
throw new IllegalArgumentException(
LoggerMessageFormat.format("Invalid value {} for parameter {}, it should be a number", params.get(paramName), paramName)

Check warning on line 232 in src/main/java/org/opensearch/agent/tools/LogPatternTool.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/agent/tools/LogPatternTool.java#L229-L232

Added lines #L229 - L232 were not covered by tests
);
}
return value;

Check warning on line 235 in src/main/java/org/opensearch/agent/tools/LogPatternTool.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/agent/tools/LogPatternTool.java#L234-L235

Added lines #L234 - L235 were not covered by tests
}

private static void checkPositive(int value, String paramName) {
if (value <= 0) {
throw new IllegalArgumentException(
LoggerMessageFormat.format("Invalid value {} for parameter {}, it should be positive", value, paramName)

Check warning on line 241 in src/main/java/org/opensearch/agent/tools/LogPatternTool.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/agent/tools/LogPatternTool.java#L240-L241

Added lines #L240 - L241 were not covered by tests
);
}
}

public static class Factory extends AbstractRetrieverTool.Factory<LogPatternTool> {
private static LogPatternTool.Factory INSTANCE;

public static LogPatternTool.Factory getInstance() {
if (INSTANCE != null) {
return INSTANCE;
}
synchronized (LogPatternTool.class) {
if (INSTANCE != null) {
return INSTANCE;

Check warning on line 255 in src/main/java/org/opensearch/agent/tools/LogPatternTool.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/agent/tools/LogPatternTool.java#L255

Added line #L255 was not covered by tests
}
INSTANCE = new LogPatternTool.Factory();
return INSTANCE;
}
}

@Override
public LogPatternTool create(Map<String, Object> params) {
int docSize = params.containsKey(DOC_SIZE_FIELD) ? getInteger(params, DOC_SIZE_FIELD) : LOG_PATTERN_DEFAULT_DOC_SIZE;
int topNPattern = params.containsKey(TOP_N_PATTERN) ? getInteger(params, TOP_N_PATTERN) : DEFAULT_TOP_N_PATTERN;
int sampleLogSize = params.containsKey(SAMPLE_LOG_SIZE) ? getInteger(params, SAMPLE_LOG_SIZE) : DEFAULT_SAMPLE_LOG_SIZE;
String patternStr = params.containsKey(PATTERN) ? (String) params.get(PATTERN) : null;
return LogPatternTool
.builder()
.client(client)
.xContentRegistry(xContentRegistry)
.docSize(docSize)
.topNPattern(topNPattern)
.sampleLogSize(sampleLogSize)
.patternStr(patternStr)
.build();
}

@Override
public String getDefaultType() {
return TYPE;

Check warning on line 281 in src/main/java/org/opensearch/agent/tools/LogPatternTool.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/agent/tools/LogPatternTool.java#L281

Added line #L281 was not covered by tests
}

@Override
public String getDefaultVersion() {
return null;

Check warning on line 286 in src/main/java/org/opensearch/agent/tools/LogPatternTool.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/opensearch/agent/tools/LogPatternTool.java#L286

Added line #L286 was not covered by tests
}

@Override
public String getDefaultDescription() {
return DEFAULT_DESCRIPTION;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.agent.tools;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;

import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;

import org.junit.Before;
import org.junit.Test;

import lombok.SneakyThrows;

public class LogPatternToolTests {

public static final String TEST_QUERY_TEXT = "123fsd23134sdfouh";
private Map<String, Object> params = new HashMap<>();

@Before
public void setup() {}

@Test
@SneakyThrows
public void testCreateTool() {
LogPatternTool tool = LogPatternTool.Factory.getInstance().create(params);
assertEquals(LogPatternTool.LOG_PATTERN_DEFAULT_DOC_SIZE, (int) tool.docSize);
assertEquals(LogPatternTool.DEFAULT_TOP_N_PATTERN, tool.getTopNPattern());
assertEquals(LogPatternTool.DEFAULT_SAMPLE_LOG_SIZE, tool.getSampleLogSize());
assertNull(tool.getPattern());
assertEquals("LogPatternTool", tool.getType());
assertEquals("LogPatternTool", tool.getName());
assertEquals(LogPatternTool.DEFAULT_DESCRIPTION, LogPatternTool.Factory.getInstance().getDefaultDescription());
}

@Test
public void testGetQueryBody() {
LogPatternTool tool = LogPatternTool.Factory.getInstance().create(params);
assertEquals(TEST_QUERY_TEXT, tool.getQueryBody(TEST_QUERY_TEXT));
}

@Test
public void testFindLongestField() {
assertEquals("field2", LogPatternTool.findLongestField(Map.of("field1", "123", "field2", "1234", "filed3", 1234)));
}

@Test
public void testExtractPattern() {
assertEquals("././", LogPatternTool.extractPattern("123.abc/.AB/", null));
assertEquals("123.c/.AB/", LogPatternTool.extractPattern("123.abc/.AB/", Pattern.compile("ab")));
assertEquals(".abc/.AB/", LogPatternTool.extractPattern("123.abc/.AB/", Pattern.compile("[0-9]")));
}
}
Loading
Loading