diff --git a/src/main/java/org/opensearch/agent/tools/NeuralSparseSearchTool.java b/src/main/java/org/opensearch/agent/tools/NeuralSparseSearchTool.java index cbe0d393..60168603 100644 --- a/src/main/java/org/opensearch/agent/tools/NeuralSparseSearchTool.java +++ b/src/main/java/org/opensearch/agent/tools/NeuralSparseSearchTool.java @@ -33,10 +33,12 @@ public class NeuralSparseSearchTool extends AbstractRetrieverTool { public static final String TYPE = "NeuralSparseSearchTool"; public static final String MODEL_ID_FIELD = "model_id"; public static final String EMBEDDING_FIELD = "embedding_field"; + public static final String NESTED_PATH_FIELD = "nested_path"; private String name = TYPE; private String modelId; private String embeddingField; + private String nestedPath; @Builder public NeuralSparseSearchTool( @@ -46,11 +48,13 @@ public NeuralSparseSearchTool( String embeddingField, String[] sourceFields, Integer docSize, - String modelId + String modelId, + String nestedPath ) { super(client, xContentRegistry, index, sourceFields, docSize); this.modelId = modelId; this.embeddingField = embeddingField; + this.nestedPath = nestedPath; } @Override @@ -61,8 +65,29 @@ protected String getQueryBody(String queryText) { ); } - Map queryBody = Map - .of("query", Map.of("neural_sparse", Map.of(embeddingField, Map.of("query_text", queryText, "model_id", modelId)))); + Map queryBody; + if (StringUtils.isBlank(nestedPath)) { + queryBody = Map + .of("query", Map.of("neural_sparse", Map.of(embeddingField, Map.of("query_text", queryText, "model_id", modelId)))); + } else { + queryBody = Map + .of( + "query", + Map + .of( + "nested", + Map + .of( + "path", + nestedPath, + "score_mode", + "max", + "query", + Map.of("neural_sparse", Map.of(embeddingField, Map.of("query_text", queryText, "model_id", modelId))) + ) + ) + ); + } try { return AccessController.doPrivileged((PrivilegedExceptionAction) () -> gson.toJson(queryBody)); @@ -99,6 +124,7 @@ public NeuralSparseSearchTool create(Map params) { String[] sourceFields = gson.fromJson((String) params.get(SOURCE_FIELD), String[].class); String modelId = (String) params.get(MODEL_ID_FIELD); Integer docSize = params.containsKey(DOC_SIZE_FIELD) ? Integer.parseInt((String) params.get(DOC_SIZE_FIELD)) : DEFAULT_DOC_SIZE; + String nestedPath = (String) params.get(NESTED_PATH_FIELD); return NeuralSparseSearchTool .builder() .client(client) @@ -108,6 +134,7 @@ public NeuralSparseSearchTool create(Map params) { .sourceFields(sourceFields) .modelId(modelId) .docSize(docSize) + .nestedPath(nestedPath) .build(); } diff --git a/src/main/java/org/opensearch/agent/tools/VectorDBTool.java b/src/main/java/org/opensearch/agent/tools/VectorDBTool.java index 4b5b41fa..d397060e 100644 --- a/src/main/java/org/opensearch/agent/tools/VectorDBTool.java +++ b/src/main/java/org/opensearch/agent/tools/VectorDBTool.java @@ -38,11 +38,13 @@ public class VectorDBTool extends AbstractRetrieverTool { public static final String EMBEDDING_FIELD = "embedding_field"; public static final String K_FIELD = "k"; public static final Integer DEFAULT_K = 10; + public static final String NESTED_PATH_FIELD = "nested_path"; private String name = TYPE; private String modelId; private String embeddingField; private Integer k; + private String nestedPath; @Builder public VectorDBTool( @@ -53,12 +55,14 @@ public VectorDBTool( String[] sourceFields, Integer docSize, String modelId, - Integer k + Integer k, + String nestedPath ) { super(client, xContentRegistry, index, sourceFields, docSize); this.modelId = modelId; this.embeddingField = embeddingField; this.k = k; + this.nestedPath = nestedPath; } @Override @@ -69,8 +73,30 @@ protected String getQueryBody(String queryText) { ); } - Map queryBody = Map - .of("query", Map.of("neural", Map.of(embeddingField, Map.of("query_text", queryText, "model_id", modelId, "k", k)))); + Map queryBody; + if (StringUtils.isBlank(nestedPath)) { + queryBody = Map + .of("query", Map.of("neural", Map.of(embeddingField, Map.of("query_text", queryText, "model_id", modelId, "k", k)))); + + } else { + queryBody = Map + .of( + "query", + Map + .of( + "nested", + Map + .of( + "path", + nestedPath, + "score_mode", + "max", + "query", + Map.of("neural", Map.of(embeddingField, Map.of("query_text", queryText, "model_id", modelId, "k", k))) + ) + ) + ); + } try { return AccessController.doPrivileged((PrivilegedExceptionAction) () -> gson.toJson(queryBody)); @@ -108,6 +134,7 @@ public VectorDBTool create(Map params) { String modelId = (String) params.get(MODEL_ID_FIELD); Integer docSize = params.containsKey(DOC_SIZE_FIELD) ? Integer.parseInt((String) params.get(DOC_SIZE_FIELD)) : DEFAULT_DOC_SIZE; Integer k = params.containsKey(K_FIELD) ? Integer.parseInt((String) params.get(K_FIELD)) : DEFAULT_K; + String nestedPath = (String) params.get(NESTED_PATH_FIELD); return VectorDBTool .builder() .client(client) @@ -118,6 +145,7 @@ public VectorDBTool create(Map params) { .modelId(modelId) .docSize(docSize) .k(k) + .nestedPath(nestedPath) .build(); } diff --git a/src/test/java/org/opensearch/agent/tools/NeuralSparseSearchToolTests.java b/src/test/java/org/opensearch/agent/tools/NeuralSparseSearchToolTests.java index 4491db43..d6d14991 100644 --- a/src/test/java/org/opensearch/agent/tools/NeuralSparseSearchToolTests.java +++ b/src/test/java/org/opensearch/agent/tools/NeuralSparseSearchToolTests.java @@ -23,6 +23,7 @@ public class NeuralSparseSearchToolTests { public static final String TEST_QUERY_TEXT = "123fsd23134sdfouh"; public static final String TEST_EMBEDDING_FIELD = "test embedding"; public static final String TEST_MODEL_ID = "123fsd23134"; + public static final String TEST_NESTED_PATH = "nested_path"; private Map params = new HashMap<>(); @Before @@ -60,6 +61,22 @@ public void testGetQueryBody() { assertEquals("123fsd23134", queryBody.get("query").get("neural_sparse").get("test embedding").get("model_id")); } + @Test + @SneakyThrows + public void testGetQueryBodyWithNestedPath() { + params.put(NeuralSparseSearchTool.NESTED_PATH_FIELD, TEST_NESTED_PATH); + NeuralSparseSearchTool tool = NeuralSparseSearchTool.Factory.getInstance().create(params); + Map>> nestedQueryBody = gson.fromJson(tool.getQueryBody(TEST_QUERY_TEXT), Map.class); + assertEquals("nested_path", nestedQueryBody.get("query").get("nested").get("path")); + assertEquals("max", nestedQueryBody.get("query").get("nested").get("score_mode")); + Map>> queryBody = (Map>>) nestedQueryBody + .get("query") + .get("nested") + .get("query"); + assertEquals("123fsd23134sdfouh", queryBody.get("neural_sparse").get("test embedding").get("query_text")); + assertEquals("123fsd23134", queryBody.get("neural_sparse").get("test embedding").get("model_id")); + } + @Test @SneakyThrows public void testGetQueryBodyWithJsonObjectString() { @@ -110,6 +127,11 @@ public void testCreateToolsParseParams() { () -> NeuralSparseSearchTool.Factory.getInstance().create(Map.of(NeuralSparseSearchTool.MODEL_ID_FIELD, 123)) ); + assertThrows( + ClassCastException.class, + () -> NeuralSparseSearchTool.Factory.getInstance().create(Map.of(NeuralSparseSearchTool.NESTED_PATH_FIELD, 123)) + ); + assertThrows( JsonSyntaxException.class, () -> NeuralSparseSearchTool.Factory.getInstance().create(Map.of(NeuralSparseSearchTool.SOURCE_FIELD, "123")) diff --git a/src/test/java/org/opensearch/agent/tools/RAGToolTests.java b/src/test/java/org/opensearch/agent/tools/RAGToolTests.java index 4696c12c..0f19f91a 100644 --- a/src/test/java/org/opensearch/agent/tools/RAGToolTests.java +++ b/src/test/java/org/opensearch/agent/tools/RAGToolTests.java @@ -55,6 +55,7 @@ public class RAGToolTests { public static final String TEST_INFERENCE_MODEL_ID = "1234"; public static final String TEST_NEURAL_QUERY_TYPE = "neural"; public static final String TEST_NEURAL_SPARSE_QUERY_TYPE = "neural_sparse"; + public static final String TEST_NESTED_PATH = "nested_path"; static public final NamedXContentRegistry TEST_XCONTENT_REGISTRY_FOR_NEURAL_QUERY = getQueryNamedXContentRegistry(); private RAGTool ragTool; @@ -422,6 +423,7 @@ public void testFactoryNeuralQuery() { assertEquals(factoryMock.getDefaultVersion(), null); assertNotNull(RAGTool.Factory.getInstance()); + params.put(VectorDBTool.NESTED_PATH_FIELD, TEST_NESTED_PATH); RAGTool rAGtool1 = factoryMock.create(params); VectorDBTool.Factory.getInstance().init(client, TEST_XCONTENT_REGISTRY_FOR_NEURAL_QUERY); params.put(VectorDBTool.MODEL_ID_FIELD, TEST_EMBEDDING_MODEL_ID); @@ -436,6 +438,7 @@ public void testFactoryNeuralQuery() { assertEquals(rAGtool1.getQueryTool().getSourceFields(), rAGtool2.getQueryTool().getSourceFields()); assertEquals(rAGtool1.getXContentRegistry(), rAGtool2.getXContentRegistry()); assertEquals(rAGtool1.getQueryType(), rAGtool2.getQueryType()); + assertEquals(((VectorDBTool) rAGtool1.getQueryTool()).getNestedPath(), ((VectorDBTool) rAGtool2.getQueryTool()).getNestedPath()); } @Test @@ -450,6 +453,8 @@ public void testFactoryNeuralSparseQuery() { assertEquals(factoryMock.getDefaultType(), RAGTool.TYPE); assertEquals(factoryMock.getDefaultVersion(), null); + params.put(NeuralSparseSearchTool.NESTED_PATH_FIELD, TEST_NESTED_PATH); + params.put("query_type", "neural_sparse"); RAGTool rAGtool1 = factoryMock.create(params); NeuralSparseSearchTool.Factory.getInstance().init(client, TEST_XCONTENT_REGISTRY_FOR_NEURAL_QUERY); NeuralSparseSearchTool queryTool = NeuralSparseSearchTool.Factory.getInstance().create(params); @@ -463,7 +468,10 @@ public void testFactoryNeuralSparseQuery() { assertEquals(rAGtool1.getQueryTool().getSourceFields(), rAGtool2.getQueryTool().getSourceFields()); assertEquals(rAGtool1.getXContentRegistry(), rAGtool2.getXContentRegistry()); assertEquals(rAGtool1.getQueryType(), rAGtool2.getQueryType()); - + assertEquals( + ((NeuralSparseSearchTool) rAGtool1.getQueryTool()).getNestedPath(), + ((NeuralSparseSearchTool) rAGtool2.getQueryTool()).getNestedPath() + ); } private static NamedXContentRegistry getQueryNamedXContentRegistry() { diff --git a/src/test/java/org/opensearch/agent/tools/VectorDBToolTests.java b/src/test/java/org/opensearch/agent/tools/VectorDBToolTests.java index 849f9254..635724a7 100644 --- a/src/test/java/org/opensearch/agent/tools/VectorDBToolTests.java +++ b/src/test/java/org/opensearch/agent/tools/VectorDBToolTests.java @@ -24,6 +24,7 @@ public class VectorDBToolTests { public static final String TEST_EMBEDDING_FIELD = "test embedding"; public static final String TEST_MODEL_ID = "123fsd23134"; public static final Integer TEST_K = 123; + public static final String TEST_NESTED_PATH = "nested_path"; private Map params = new HashMap<>(); @Before @@ -61,6 +62,22 @@ public void testGetQueryBody() { assertEquals(123.0, queryBody.get("query").get("neural").get("test embedding").get("k")); } + @Test + @SneakyThrows + public void testGetQueryBodyWithNestedPath() { + params.put(VectorDBTool.NESTED_PATH_FIELD, TEST_NESTED_PATH); + VectorDBTool tool = VectorDBTool.Factory.getInstance().create(params); + Map>> nestedQueryBody = gson.fromJson(tool.getQueryBody(TEST_QUERY_TEXT), Map.class); + assertEquals("nested_path", nestedQueryBody.get("query").get("nested").get("path")); + assertEquals("max", nestedQueryBody.get("query").get("nested").get("score_mode")); + Map>> queryBody = (Map>>) nestedQueryBody + .get("query") + .get("nested") + .get("query"); + assertEquals("123fsd23134sdfouh", queryBody.get("neural").get("test embedding").get("query_text")); + assertEquals("123fsd23134", queryBody.get("neural").get("test embedding").get("model_id")); + } + @Test @SneakyThrows public void testGetQueryBodyWithJsonObjectString() { @@ -103,6 +120,11 @@ public void testCreateToolsParseParams() { assertThrows(ClassCastException.class, () -> VectorDBTool.Factory.getInstance().create(Map.of(VectorDBTool.MODEL_ID_FIELD, 123))); + assertThrows( + ClassCastException.class, + () -> VectorDBTool.Factory.getInstance().create(Map.of(VectorDBTool.NESTED_PATH_FIELD, 123)) + ); + assertThrows(JsonSyntaxException.class, () -> VectorDBTool.Factory.getInstance().create(Map.of(VectorDBTool.SOURCE_FIELD, "123"))); // although it will be parsed as integer, but the parameters value should always be String diff --git a/src/test/java/org/opensearch/integTest/NeuralSparseSearchToolIT.java b/src/test/java/org/opensearch/integTest/NeuralSparseSearchToolIT.java index 3758c84d..b7618468 100644 --- a/src/test/java/org/opensearch/integTest/NeuralSparseSearchToolIT.java +++ b/src/test/java/org/opensearch/integTest/NeuralSparseSearchToolIT.java @@ -7,7 +7,6 @@ import static org.hamcrest.Matchers.allOf; import static org.hamcrest.Matchers.containsString; -import static org.opensearch.ml.common.utils.StringUtils.gson; import java.nio.file.Files; import java.nio.file.Path; @@ -22,6 +21,7 @@ public class NeuralSparseSearchToolIT extends BaseAgentToolsIT { public static String TEST_INDEX_NAME = "test_index"; + public static String TEST_NESTED_INDEX_NAME = "test_index_nested"; private String modelId; private String registerAgentRequestBody; @@ -64,12 +64,55 @@ private void prepareIndex() { addDocToIndex(TEST_INDEX_NAME, "2", List.of("text", "embedding"), List.of("text doc 3", Map.of("test", 5, "a", 6))); } + @SneakyThrows + private void prepareNestedIndex() { + createIndexWithConfiguration( + TEST_NESTED_INDEX_NAME, + "{\n" + + " \"mappings\": {\n" + + " \"properties\": {\n" + + " \"text\": {\n" + + " \"type\": \"text\"\n" + + " },\n" + + " \"embedding\": {\n" + + " \"type\": \"nested\",\n" + + " \"properties\":{\n" + + " \"sparse\":{\n" + + " \"type\":\"rank_features\"\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + "}" + ); + addDocToIndex( + TEST_NESTED_INDEX_NAME, + "0", + List.of("text", "embedding"), + List.of("text doc 1", Map.of("sparse", List.of(Map.of("hello", 1, "world", 2)))) + ); + addDocToIndex( + TEST_NESTED_INDEX_NAME, + "1", + List.of("text", "embedding"), + List.of("text doc 2", Map.of("sparse", List.of(Map.of("a", 3, "b", 4)))) + ); + addDocToIndex( + TEST_NESTED_INDEX_NAME, + "2", + List.of("text", "embedding"), + List.of("text doc 3", Map.of("sparse", List.of(Map.of("test", 5, "a", 6)))) + ); + } + @Before @SneakyThrows public void setUp() { super.setUp(); prepareModel(); prepareIndex(); + prepareNestedIndex(); registerAgentRequestBody = Files .readString( Path @@ -127,6 +170,23 @@ public void testNeuralSparseSearchToolInFlowAgent() { ); } + public void testNeuralSparseSearchToolInFlowAgent_withNestedIndex() { + String registerAgentRequestBodyNested = registerAgentRequestBody; + registerAgentRequestBodyNested = registerAgentRequestBodyNested.replace("\"nested_path\": \"\"", "\"nested_path\": \"embedding\""); + registerAgentRequestBodyNested = registerAgentRequestBodyNested + .replace("\"embedding_field\": \"embedding\"", "\"embedding_field\": \"embedding.sparse\""); + registerAgentRequestBodyNested = registerAgentRequestBodyNested + .replace("\"index\": \"test_index\"", "\"index\": \"test_index_nested\""); + String agentId = createAgent(registerAgentRequestBodyNested); + String result = executeAgent(agentId, "{\"parameters\": {\"question\": \"a\"}}"); + assertEquals( + "The agent execute response not equal with expected.", + "{\"_index\":\"test_index_nested\",\"_source\":{\"text\":\"text doc 3\"},\"_id\":\"2\",\"_score\":2.4136734}\n" + + "{\"_index\":\"test_index_nested\",\"_source\":{\"text\":\"text doc 2\"},\"_id\":\"1\",\"_score\":1.2068367}\n", + result + ); + } + public void testNeuralSparseSearchToolInFlowAgent_withIllegalSourceField_thenGetEmptySource() { String agentId = createAgent(registerAgentRequestBody.replace("text", "text2")); String result = executeAgent(agentId, "{\"parameters\": {\"question\": \"a\"}}"); diff --git a/src/test/java/org/opensearch/integTest/VectorDBToolIT.java b/src/test/java/org/opensearch/integTest/VectorDBToolIT.java index c494371a..3f7fc77e 100644 --- a/src/test/java/org/opensearch/integTest/VectorDBToolIT.java +++ b/src/test/java/org/opensearch/integTest/VectorDBToolIT.java @@ -22,6 +22,7 @@ public class VectorDBToolIT extends BaseAgentToolsIT { public static String TEST_INDEX_NAME = "test_index"; + public static String TEST_NESTED_INDEX_NAME = "test_index_nested"; private String modelId; private String registerAgentRequestBody; @@ -99,12 +100,75 @@ private void prepareIndex() { addDocToIndex(TEST_INDEX_NAME, "1", List.of("text"), List.of("a b")); } + @SneakyThrows + private void prepareNestedIndex() { + String pipelineConfig = "{\n" + + " \"description\": \"text embedding pipeline\",\n" + + " \"processors\": [\n" + + " {\n" + + " \"text_embedding\": {\n" + + " \"model_id\": \"" + + modelId + + "\",\n" + + " \"field_map\": {\n" + + " \"text\": \"embedding\"\n" + + " }\n" + + " }\n" + + " }\n" + + " ]\n" + + "}"; + createIngestPipelineWithConfiguration("test-embedding-model", pipelineConfig); + + String indexMapping = "{\n" + + " \"mappings\": {\n" + + " \"properties\": {\n" + + " \"text\": {\n" + + " \"type\": \"text\"\n" + + " },\n" + + " \"embedding\": {\n" + + " \"type\":\"nested\",\n" + + " \"properties\":{\n" + + " \"knn\":{\n" + + " \"type\": \"knn_vector\",\n" + + " \"dimension\": 768,\n" + + " \"method\": {\n" + + " \"name\": \"hnsw\",\n" + + " \"space_type\": \"l2\",\n" + + " \"engine\": \"lucene\",\n" + + " \"parameters\": {\n" + + " \"ef_construction\": 128,\n" + + " \"m\": 24\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + " \n" + + " }\n" + + " }\n" + + " },\n" + + " \"settings\": {\n" + + " \"index\": {\n" + + " \"knn.space_type\": \"cosinesimil\",\n" + + " \"default_pipeline\": \"test-embedding-model\",\n" + + " \"knn\": \"true\"\n" + + " }\n" + + " }\n" + + "}"; + + createIndexWithConfiguration(TEST_NESTED_INDEX_NAME, indexMapping); + + addDocToIndex(TEST_NESTED_INDEX_NAME, "0", List.of("text"), List.of(List.of("hello world"))); + + addDocToIndex(TEST_NESTED_INDEX_NAME, "1", List.of("text"), List.of(List.of("a b"))); + } + @Before @SneakyThrows public void setUp() { super.setUp(); prepareModel(); prepareIndex(); + prepareNestedIndex(); registerAgentRequestBody = Files .readString( Path @@ -157,6 +221,22 @@ public void testVectorDBToolInFlowAgent() { ); } + public void testVectorDBToolInFlowAgent_withNestedIndex() { + String registerAgentRequestBodyNested = registerAgentRequestBody; + registerAgentRequestBodyNested = registerAgentRequestBodyNested.replace("\"nested_path\": \"\"", "\"nested_path\": \"embedding\""); + registerAgentRequestBodyNested = registerAgentRequestBodyNested + .replace("\"embedding_field\": \"embedding\"", "\"embedding_field\": \"embedding.knn\""); + registerAgentRequestBodyNested = registerAgentRequestBodyNested + .replace("\"index\": \"test_index\"", "\"index\": \"test_index_nested\""); + String agentId = createAgent(registerAgentRequestBodyNested); + String result = executeAgent(agentId, "{\"parameters\": {\"question\": \"a\"}}"); + // To allow digits variation from model output, using string contains to match + assertTrue( + result.contains("{\"_index\":\"test_index_nested\",\"_source\":{\"text\":[\"hello world\"]},\"_id\":\"0\",\"_score\":0.7") + ); + assertTrue(result.contains("{\"_index\":\"test_index_nested\",\"_source\":{\"text\":[\"a b\"]},\"_id\":\"1\",\"_score\":0.2")); + } + public void testVectorDBToolInFlowAgent_withIllegalSourceField_thenGetEmptySource() { String agentId = createAgent(registerAgentRequestBody.replace("text", "text2")); String result = executeAgent(agentId, "{\"parameters\": {\"question\": \"a\"}}"); diff --git a/src/test/resources/org/opensearch/agent/tools/register_flow_agent_of_neural_sparse_search_tool_request_body.json b/src/test/resources/org/opensearch/agent/tools/register_flow_agent_of_neural_sparse_search_tool_request_body.json index ac2a2987..579f0778 100644 --- a/src/test/resources/org/opensearch/agent/tools/register_flow_agent_of_neural_sparse_search_tool_request_body.json +++ b/src/test/resources/org/opensearch/agent/tools/register_flow_agent_of_neural_sparse_search_tool_request_body.json @@ -10,7 +10,8 @@ "index": "test_index", "embedding_field": "embedding", "source_field": ["text"], - "input": "${parameters.question}" + "input": "${parameters.question}", + "nested_path": "" } } ] diff --git a/src/test/resources/org/opensearch/agent/tools/register_flow_agent_of_vectordb_tool_request_body.json b/src/test/resources/org/opensearch/agent/tools/register_flow_agent_of_vectordb_tool_request_body.json index 3b13e443..b1488388 100644 --- a/src/test/resources/org/opensearch/agent/tools/register_flow_agent_of_vectordb_tool_request_body.json +++ b/src/test/resources/org/opensearch/agent/tools/register_flow_agent_of_vectordb_tool_request_body.json @@ -10,7 +10,8 @@ "index": "test_index", "embedding_field": "embedding", "source_field": ["text"], - "input": "${parameters.question}" + "input": "${parameters.question}", + "nested_path": "" } } ]