From c4eefdd9e3bfe90d750002766b71bea2649b44e1 Mon Sep 17 00:00:00 2001 From: Amit Galitzky Date: Mon, 10 Jun 2024 09:00:27 -0700 Subject: [PATCH] [Backport 2.x] Adding additional default use cases (#731) (#734) Adding additional default use cases (#731) * adding pretrained model templates * adding reindex * changing file structure for bwc --------- Signed-off-by: Amit Galitzky --- CHANGELOG.md | 1 + build.gradle | 26 +++- .../flowframework/common/CommonValue.java | 2 + .../flowframework/common/DefaultUseCases.java | 23 +++ .../flowframework/workflow/ReindexStep.java | 16 ++- .../defaults/hybrid-search-defaults.json | 3 +- ...brid-search-with-local-model-defaults.json | 23 +++ .../defaults/multi-modal-search-defaults.json | 4 +- ...timodal-search-bedrock-titan-defaults.json | 4 +- ...ntic-search-with-local-model-defaults.json | 20 +++ ...semantic-search-with-reindex-defaults.json | 31 ++++ .../hybrid-search-template.json | 8 +- ...brid-search-with-local-model-template.json | 109 ++++++++++++++ .../multi-modal-search-template.json | 7 +- ...al-search-with-bedrock-titan-template.json | 7 +- ...eural-sparse-local-biencoder-template.json | 3 - .../semantic-search-template.json | 3 - ...ntic-search-with-local-model-template.json | 86 +++++++++++ ...ith-model-and-query-enricher-template.json | 3 - .../semantic-search-with-model-template.json | 3 - ...c-search-with-query-enricher-template.json | 3 - ...semantic-search-with-reindex-template.json | 135 ++++++++++++++++++ .../FlowFrameworkRestTestCase.java | 72 +++++++++- .../rest/FlowFrameworkRestApiIT.java | 84 ++++++++++- 24 files changed, 631 insertions(+), 45 deletions(-) create mode 100644 src/main/resources/defaults/hybrid-search-with-local-model-defaults.json create mode 100644 src/main/resources/defaults/semantic-search-with-local-model-defaults.json create mode 100644 src/main/resources/defaults/semantic-search-with-reindex-defaults.json create mode 100644 src/main/resources/substitutionTemplates/hybrid-search-with-local-model-template.json create mode 100644 src/main/resources/substitutionTemplates/semantic-search-with-local-model-template.json create mode 100644 src/main/resources/substitutionTemplates/semantic-search-with-reindex-template.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 080555235..17e37ff0c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) ### Enhancements - Add Workflow Step for Reindex from source index to destination ([#718](https://github.com/opensearch-project/flow-framework/pull/718)) - Add param to delete workflow API to clear status even if resources exist ([#719](https://github.com/opensearch-project/flow-framework/pull/719)) +- Add additional default use cases ([#731](https://github.com/opensearch-project/flow-framework/pull/731)) ### Bug Fixes - Add user mapping to Workflow State index ([#705](https://github.com/opensearch-project/flow-framework/pull/705)) diff --git a/build.gradle b/build.gradle index b08074c74..9d69dcb0d 100644 --- a/build.gradle +++ b/build.gradle @@ -180,6 +180,8 @@ dependencies { // ZipArchive dependencies used for integration tests zipArchive group: 'org.opensearch.plugin', name:'opensearch-ml-plugin', version: "${opensearch_build}" + zipArchive group: 'org.opensearch.plugin', name:'opensearch-knn', version: "${opensearch_build}" + zipArchive group: 'org.opensearch.plugin', name:'neural-search', version: "${opensearch_build}" secureIntegTestPluginArchive group: 'org.opensearch.plugin', name:'opensearch-security', version: "${opensearch_build}" configurations.all { @@ -491,7 +493,29 @@ List> plugins = [ return new RegularFile() { @Override File getAsFile() { - return configurations.zipArchive.asFileTree.getSingleFile() + return configurations.zipArchive.asFileTree.matching{include "**/opensearch-ml-plugin-${opensearch_build}.zip"}.getSingleFile() + } + } + } + }), + provider(new Callable(){ + @Override + RegularFile call() throws Exception { + return new RegularFile() { + @Override + File getAsFile() { + return configurations.zipArchive.asFileTree.matching{include "**/opensearch-knn-${opensearch_build}.zip"}.getSingleFile() + } + } + } + }), + provider(new Callable(){ + @Override + RegularFile call() throws Exception { + return new RegularFile() { + @Override + File getAsFile() { + return configurations.zipArchive.asFileTree.matching{include "**/neural-search-${opensearch_build}.zip"}.getSingleFile() } } } diff --git a/src/main/java/org/opensearch/flowframework/common/CommonValue.java b/src/main/java/org/opensearch/flowframework/common/CommonValue.java index 87c2f2180..10a23357a 100644 --- a/src/main/java/org/opensearch/flowframework/common/CommonValue.java +++ b/src/main/java/org/opensearch/flowframework/common/CommonValue.java @@ -225,4 +225,6 @@ private CommonValue() {} public static final String CREATE_CONNECTOR_CREDENTIAL_SESSION_TOKEN = "create_connector.credential.session_token"; /** The field name for ingest pipeline model ID substitution */ public static final String CREATE_INGEST_PIPELINE_MODEL_ID = "create_ingest_pipeline.model_id"; + /** The field name for reindex source index substitution */ + public static final String REINDEX_SOURCE_INDEX = "reindex.source_index"; } diff --git a/src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java b/src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java index bc88f2b4d..7b8d06f1a 100644 --- a/src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java +++ b/src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java @@ -22,6 +22,7 @@ import static org.opensearch.flowframework.common.CommonValue.CREATE_CONNECTOR_CREDENTIAL_SECRET_KEY; import static org.opensearch.flowframework.common.CommonValue.CREATE_CONNECTOR_CREDENTIAL_SESSION_TOKEN; import static org.opensearch.flowframework.common.CommonValue.CREATE_INGEST_PIPELINE_MODEL_ID; +import static org.opensearch.flowframework.common.CommonValue.REINDEX_SOURCE_INDEX; /** * Enum encapsulating the different default use cases and templates we have stored @@ -132,6 +133,28 @@ public enum DefaultUseCases { "defaults/conversational-search-defaults.json", "substitutionTemplates/conversational-search-with-cohere-model-template.json", List.of(CREATE_CONNECTOR_CREDENTIAL_KEY) + ), + /** defaults file and substitution ready template for semantic search with a local pretrained model*/ + SEMANTIC_SEARCH_WITH_LOCAL_MODEL( + "semantic_search_with_local_model", + "defaults/semantic-search-with-local-model-defaults.json", + "substitutionTemplates/semantic-search-with-local-model-template.json", + Collections.emptyList() + + ), + /** defaults file and substitution ready template for hybrid search with a local pretrained model*/ + HYBRID_SEARCH_WITH_LOCAL_MODEL( + "hybrid_search_with_local_model", + "defaults/hybrid-search-with-local-model-defaults.json", + "substitutionTemplates/hybrid-search-with-local-model-template.json", + Collections.emptyList() + ), + /** defaults file and substitution ready template for semantic search with reindex command*/ + SEMANTIC_SEARCH_WITH_REINDEX( + "semantic_search_with_reindex", + "defaults/semantic-search-with-reindex-defaults.json", + "substitutionTemplates/semantic-search-with-reindex-template.json", + List.of(CREATE_CONNECTOR_CREDENTIAL_KEY, REINDEX_SOURCE_INDEX) ); private final String useCaseName; diff --git a/src/main/java/org/opensearch/flowframework/workflow/ReindexStep.java b/src/main/java/org/opensearch/flowframework/workflow/ReindexStep.java index bc335db97..b46ddecab 100644 --- a/src/main/java/org/opensearch/flowframework/workflow/ReindexStep.java +++ b/src/main/java/org/opensearch/flowframework/workflow/ReindexStep.java @@ -95,10 +95,20 @@ public PlainActionFuture execute( Float requestsPerSecond = inputs.containsKey(REQUESTS_PER_SECOND) ? Float.parseFloat(inputs.get(REQUESTS_PER_SECOND).toString()) : null; + requestsPerSecond = requestsPerSecond < 0 ? Float.POSITIVE_INFINITY : requestsPerSecond; Boolean requireAlias = inputs.containsKey(REQUIRE_ALIAS) ? Booleans.parseBoolean(inputs.get(REQUIRE_ALIAS).toString()) : null; - Integer slices = (Integer) inputs.get(SLICES); - Integer maxDocs = (Integer) inputs.get(MAX_DOCS); - + Integer slices; + Integer maxDocs; + if (inputs.get(SLICES) != null) { + slices = Integer.parseInt(String.valueOf(inputs.get(SLICES))); + } else { + slices = (Integer) inputs.get(SLICES); + } + if (inputs.get(MAX_DOCS) != null) { + maxDocs = Integer.parseInt(String.valueOf(inputs.get(MAX_DOCS))); + } else { + maxDocs = (Integer) inputs.get(MAX_DOCS); + } ReindexRequest reindexRequest = new ReindexRequest().setSourceIndices(Strings.splitStringByCommaToArray(sourceIndices)) .setDestIndex(destinationIndex); diff --git a/src/main/resources/defaults/hybrid-search-defaults.json b/src/main/resources/defaults/hybrid-search-defaults.json index cf9fb584b..b64bce6ae 100644 --- a/src/main/resources/defaults/hybrid-search-defaults.json +++ b/src/main/resources/defaults/hybrid-search-defaults.json @@ -14,6 +14,5 @@ "text_embedding.field_map.output.dimension": "1024", "create_search_pipeline.pipeline_id": "nlp-search-pipeline", "normalization-processor.normalization.technique": "min_max", - "normalization-processor.combination.technique": "arithmetic_mean", - "normalization-processor.combination.parameters.weights": "[0.3, 0.7]" + "normalization-processor.combination.technique": "arithmetic_mean" } diff --git a/src/main/resources/defaults/hybrid-search-with-local-model-defaults.json b/src/main/resources/defaults/hybrid-search-with-local-model-defaults.json new file mode 100644 index 000000000..d07cc918d --- /dev/null +++ b/src/main/resources/defaults/hybrid-search-with-local-model-defaults.json @@ -0,0 +1,23 @@ +{ + "template.name": "hybrid-search", + "template.description": "Setting up hybrid search, ingest pipeline and index", + "register_local_pretrained_model.name": "huggingface/sentence-transformers/paraphrase-MiniLM-L3-v2", + "register_local_pretrained_model.description": "This is a sentence transformer model", + "register_local_pretrained_model.model_format": "TORCH_SCRIPT", + "register_local_pretrained_model.deploy": "true", + "register_local_pretrained_model.version": "1.0.1", + "create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline", + "create_ingest_pipeline.description": "A text embedding pipeline", + "create_ingest_pipeline.model_id": "123", + "text_embedding.field_map.input": "passage_text", + "text_embedding.field_map.output": "passage_embedding", + "create_index.name": "my-nlp-index", + "create_index.settings.number_of_shards": "2", + "create_index.mappings.method.engine": "lucene", + "create_index.mappings.method.space_type": "l2", + "create_index.mappings.method.name": "hnsw", + "text_embedding.field_map.output.dimension": "768", + "create_search_pipeline.pipeline_id": "nlp-search-pipeline", + "normalization-processor.normalization.technique": "min_max", + "normalization-processor.combination.technique": "arithmetic_mean" +} diff --git a/src/main/resources/defaults/multi-modal-search-defaults.json b/src/main/resources/defaults/multi-modal-search-defaults.json index 0588e7182..4e0f86449 100644 --- a/src/main/resources/defaults/multi-modal-search-defaults.json +++ b/src/main/resources/defaults/multi-modal-search-defaults.json @@ -11,5 +11,7 @@ "create_index.settings.number_of_shards": "2", "text_image_embedding.field_map.output.dimension": "1024", "create_index.mappings.method.engine": "lucene", - "create_index.mappings.method.name": "hnsw" + "create_index.mappings.method.name": "hnsw", + "text_image_embedding.field_map.image.type": "text", + "text_image_embedding.field_map.text.type": "text" } diff --git a/src/main/resources/defaults/multimodal-search-bedrock-titan-defaults.json b/src/main/resources/defaults/multimodal-search-bedrock-titan-defaults.json index b6d6a0ff9..3a6a09b21 100644 --- a/src/main/resources/defaults/multimodal-search-bedrock-titan-defaults.json +++ b/src/main/resources/defaults/multimodal-search-bedrock-titan-defaults.json @@ -24,5 +24,7 @@ "create_index.settings.number_of_shards": "2", "text_image_embedding.field_map.output.dimension": "1024", "create_index.mappings.method.engine": "lucene", - "create_index.mappings.method.name": "hnsw" + "create_index.mappings.method.name": "hnsw", + "text_image_embedding.field_map.image.type": "text", + "text_image_embedding.field_map.text.type": "text" } diff --git a/src/main/resources/defaults/semantic-search-with-local-model-defaults.json b/src/main/resources/defaults/semantic-search-with-local-model-defaults.json new file mode 100644 index 000000000..89fad8465 --- /dev/null +++ b/src/main/resources/defaults/semantic-search-with-local-model-defaults.json @@ -0,0 +1,20 @@ +{ + "template.name": "semantic search with local pretrained model", + "template.description": "Setting up semantic search, with a local pretrained embedding model", + "register_local_pretrained_model.name": "huggingface/sentence-transformers/paraphrase-MiniLM-L3-v2", + "register_local_pretrained_model.description": "This is a sentence transformer model", + "register_local_pretrained_model.model_format": "TORCH_SCRIPT", + "register_local_pretrained_model.deploy": "true", + "register_local_pretrained_model.version": "1.0.1", + "create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline", + "create_ingest_pipeline.description": "A text embedding pipeline", + "text_embedding.field_map.input": "passage_text", + "text_embedding.field_map.output": "passage_embedding", + "create_index.name": "my-nlp-index", + "create_index.settings.number_of_shards": "2", + "create_index.mappings.method.engine": "lucene", + "create_index.mappings.method.space_type": "l2", + "create_index.mappings.method.name": "hnsw", + "text_embedding.field_map.output.dimension": "768", + "create_search_pipeline.pipeline_id": "default_model_pipeline" +} diff --git a/src/main/resources/defaults/semantic-search-with-reindex-defaults.json b/src/main/resources/defaults/semantic-search-with-reindex-defaults.json new file mode 100644 index 000000000..b59780ee9 --- /dev/null +++ b/src/main/resources/defaults/semantic-search-with-reindex-defaults.json @@ -0,0 +1,31 @@ +{ + "template.name": "semantic search with cohere embedding", + "template.description": "Setting up semantic search, with a Cohere embedding model", + "create_connector.name": "cohere-embedding-connector", + "create_connector.description": "The connector to Cohere's public embed API", + "create_connector.protocol": "http", + "create_connector.model": "embed-english-v3.0", + "create_connector.input_type": "search_document", + "create_connector.truncate": "end", + "create_connector.credential.key": "123", + "create_connector.actions.url": "https://api.cohere.ai/v1/embed", + "create_connector.actions.request_body": "{ \"texts\": ${parameters.texts}, \"truncate\": \"${parameters.truncate}\", \"model\": \"${parameters.model}\", \"input_type\": \"${parameters.input_type}\" }", + "create_connector.actions.pre_process_function": "connector.pre_process.cohere.embedding", + "create_connector.actions.post_process_function": "connector.post_process.cohere.embedding", + "register_remote_model.name": "Cohere english embed model", + "register_remote_model.description": "cohere-embedding-model", + "create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline", + "create_ingest_pipeline.description": "A text embedding pipeline", + "text_embedding.field_map.input": "passage_text", + "text_embedding.field_map.output": "passage_embedding", + "create_index.name": "my-nlp-index", + "create_index.settings.number_of_shards": "2", + "create_index.mappings.method.engine": "lucene", + "create_index.mappings.method.space_type": "l2", + "create_index.mappings.method.name": "hnsw", + "text_embedding.field_map.output.dimension": "1024", + "create_search_pipeline.pipeline_id": "default_model_pipeline", + "reindex.source_index": "", + "reindex.requests_per_second": "-1", + "reindex.slices": "1" +} diff --git a/src/main/resources/substitutionTemplates/hybrid-search-template.json b/src/main/resources/substitutionTemplates/hybrid-search-template.json index c25748ff7..daaf102e6 100644 --- a/src/main/resources/substitutionTemplates/hybrid-search-template.json +++ b/src/main/resources/substitutionTemplates/hybrid-search-template.json @@ -49,9 +49,6 @@ }, "mappings": { "properties": { - "id": { - "type": "text" - }, "${{text_embedding.field_map.output}}": { "type": "knn_vector", "dimension": "${{text_embedding.field_map.output.dimension}}", @@ -84,10 +81,7 @@ "technique": "${{normalization-processor.normalization.technique}}" }, "combination": { - "technique": "${{normalization-processor.combination.technique}}", - "parameters": { - "weights": "${{normalization-processor.combination.parameters.weights}}" - } + "technique": "${{normalization-processor.combination.technique}}" } } } diff --git a/src/main/resources/substitutionTemplates/hybrid-search-with-local-model-template.json b/src/main/resources/substitutionTemplates/hybrid-search-with-local-model-template.json new file mode 100644 index 000000000..457746ab4 --- /dev/null +++ b/src/main/resources/substitutionTemplates/hybrid-search-with-local-model-template.json @@ -0,0 +1,109 @@ +{ + "name": "${{template.name}}", + "description": "${{template.description}}", + "use_case": "HYBRID_SEARCH", + "version": { + "template": "1.0.0", + "compatibility": [ + "2.12.0", + "3.0.0" + ] + }, + "workflows": { + "provision": { + "nodes": [ + { + "id": "register_local_pretrained_model", + "type": "register_local_pretrained_model", + "user_inputs": { + "name": "${{register_local_pretrained_model.name}}", + "version": "${{register_local_pretrained_model.version}}", + "description": "${{register_local_pretrained_model.description}}", + "model_format": "${{register_local_pretrained_model.model_format}}", + "deploy": true + } + }, + { + "id": "create_ingest_pipeline", + "type": "create_ingest_pipeline", + "previous_node_inputs": { + "register_local_pretrained_model": "model_id" + }, + "user_inputs": { + "pipeline_id": "${{create_ingest_pipeline.pipeline_id}}", + "configurations": { + "description": "${{create_ingest_pipeline.description}}", + "processors": [ + { + "text_embedding": { + "model_id": "${{register_local_pretrained_model.model_id}}", + "field_map": { + "${{text_embedding.field_map.input}}": "${{text_embedding.field_map.output}}" + } + } + } + ] + } + } + }, + { + "id": "create_index", + "type": "create_index", + "previous_node_inputs": { + "create_ingest_pipeline": "pipeline_id" + }, + "user_inputs": { + "index_name": "${{create_index.name}}", + "configurations": { + "settings": { + "index.knn": true, + "default_pipeline": "${{create_ingest_pipeline.pipeline_id}}", + "number_of_shards": "${{create_index.settings.number_of_shards}}", + "index.search.default_pipeline": "${{create_search_pipeline.pipeline_id}}" + }, + "mappings": { + "properties": { + "${{text_embedding.field_map.output}}": { + "type": "knn_vector", + "dimension": "${{text_embedding.field_map.output.dimension}}", + "method": { + "engine": "${{create_index.mappings.method.engine}}", + "space_type": "${{create_index.mappings.method.space_type}}", + "name": "${{create_index.mappings.method.name}}", + "parameters": {} + } + }, + "${{text_embedding.field_map.input}}": { + "type": "text" + } + } + } + } + } + }, + { + "id": "create_search_pipeline", + "type": "create_search_pipeline", + "user_inputs": { + "pipeline_id": "${{create_search_pipeline.pipeline_id}}", + "configurations": { + "description": "Post processor for hybrid search", + "phase_results_processors": [ + { + "normalization-processor": { + "normalization": { + "technique": "${{normalization-processor.normalization.technique}}" + }, + "combination": { + "technique": "${{normalization-processor.combination.technique}}" + } + } + } + ] + } + } + } + ] + } + } +} diff --git a/src/main/resources/substitutionTemplates/multi-modal-search-template.json b/src/main/resources/substitutionTemplates/multi-modal-search-template.json index b8acfed47..c7358615d 100644 --- a/src/main/resources/substitutionTemplates/multi-modal-search-template.json +++ b/src/main/resources/substitutionTemplates/multi-modal-search-template.json @@ -50,9 +50,6 @@ }, "mappings": { "properties": { - "id": { - "type": "text" - }, "${{text_image_embedding.embedding}}": { "type": "knn_vector", "dimension": "${{text_image_embedding.field_map.output.dimension}}", @@ -63,10 +60,10 @@ } }, "${{text_image_embedding.field_map.text}}": { - "type": "text" + "type": "${{text_image_embedding.field_map.text.type}}" }, "${{text_image_embedding.field_map.image}}": { - "type": "binary" + "type": "${{text_image_embedding.field_map.image.type}}" } } } diff --git a/src/main/resources/substitutionTemplates/multi-modal-search-with-bedrock-titan-template.json b/src/main/resources/substitutionTemplates/multi-modal-search-with-bedrock-titan-template.json index a19965aa3..08567b267 100644 --- a/src/main/resources/substitutionTemplates/multi-modal-search-with-bedrock-titan-template.json +++ b/src/main/resources/substitutionTemplates/multi-modal-search-with-bedrock-titan-template.json @@ -100,9 +100,6 @@ }, "mappings": { "properties": { - "id": { - "type": "text" - }, "${{text_image_embedding.embedding}}": { "type": "knn_vector", "dimension": "${{text_image_embedding.field_map.output.dimension}}", @@ -113,10 +110,10 @@ } }, "${{text_image_embedding.field_map.text}}": { - "type": "text" + "type": "${{text_image_embedding.field_map.text.type}}" }, "${{text_image_embedding.field_map.image}}": { - "type": "binary" + "type": "${{text_image_embedding.field_map.image.type}}" } } } diff --git a/src/main/resources/substitutionTemplates/neural-sparse-local-biencoder-template.json b/src/main/resources/substitutionTemplates/neural-sparse-local-biencoder-template.json index 71b2a2421..c10012233 100644 --- a/src/main/resources/substitutionTemplates/neural-sparse-local-biencoder-template.json +++ b/src/main/resources/substitutionTemplates/neural-sparse-local-biencoder-template.json @@ -60,9 +60,6 @@ }, "mappings": { "properties": { - "id": { - "type": "text" - }, "${{create_ingest_pipeline.text_embedding.field_map.output}}": { "type": "rank_features" }, diff --git a/src/main/resources/substitutionTemplates/semantic-search-template.json b/src/main/resources/substitutionTemplates/semantic-search-template.json index 5e7ea4dc7..90fc35c45 100644 --- a/src/main/resources/substitutionTemplates/semantic-search-template.json +++ b/src/main/resources/substitutionTemplates/semantic-search-template.json @@ -48,9 +48,6 @@ }, "mappings": { "properties": { - "id": { - "type": "text" - }, "${{text_embedding.field_map.output}}": { "type": "knn_vector", "dimension": "${{text_embedding.field_map.output.dimension}}", diff --git a/src/main/resources/substitutionTemplates/semantic-search-with-local-model-template.json b/src/main/resources/substitutionTemplates/semantic-search-with-local-model-template.json new file mode 100644 index 000000000..125554b78 --- /dev/null +++ b/src/main/resources/substitutionTemplates/semantic-search-with-local-model-template.json @@ -0,0 +1,86 @@ +{ + "name": "${{template.name}}", + "description": "${{template.description}}", + "use_case": "SEMANTIC_SEARCH", + "version": { + "template": "1.0.0", + "compatibility": [ + "2.12.0", + "3.0.0" + ] + }, + "workflows": { + "provision": { + "nodes": [ + { + "id": "register_local_pretrained_model", + "type": "register_local_pretrained_model", + "user_inputs": { + "name": "${{register_local_pretrained_model.name}}", + "version": "${{register_local_pretrained_model.version}}", + "description": "${{register_local_pretrained_model.description}}", + "model_format": "${{register_local_pretrained_model.model_format}}", + "deploy": true + } + }, + { + "id": "create_ingest_pipeline", + "type": "create_ingest_pipeline", + "previous_node_inputs": { + "register_local_pretrained_model": "model_id" + }, + "user_inputs": { + "pipeline_id": "${{create_ingest_pipeline.pipeline_id}}", + "configurations": { + "description": "${{create_ingest_pipeline.description}}", + "processors": [ + { + "text_embedding": { + "model_id": "${{register_local_pretrained_model.model_id}}", + "field_map": { + "${{text_embedding.field_map.input}}": "${{text_embedding.field_map.output}}" + } + } + } + ] + } + } + }, + { + "id": "create_index", + "type": "create_index", + "previous_node_inputs": { + "create_ingest_pipeline": "pipeline_id" + }, + "user_inputs": { + "index_name": "${{create_index.name}}", + "configurations": { + "settings": { + "index.knn": true, + "default_pipeline": "${{create_ingest_pipeline.pipeline_id}}", + "number_of_shards": "${{create_index.settings.number_of_shards}}" + }, + "mappings": { + "properties": { + "${{text_embedding.field_map.output}}": { + "type": "knn_vector", + "dimension": "${{text_embedding.field_map.output.dimension}}", + "method": { + "engine": "${{create_index.mappings.method.engine}}", + "space_type": "${{create_index.mappings.method.space_type}}", + "name": "${{create_index.mappings.method.name}}", + "parameters": {} + } + }, + "${{text_embedding.field_map.input}}": { + "type": "text" + } + } + } + } + } + } + ] + } + } +} diff --git a/src/main/resources/substitutionTemplates/semantic-search-with-model-and-query-enricher-template.json b/src/main/resources/substitutionTemplates/semantic-search-with-model-and-query-enricher-template.json index 932afac9e..42a20867c 100644 --- a/src/main/resources/substitutionTemplates/semantic-search-with-model-and-query-enricher-template.json +++ b/src/main/resources/substitutionTemplates/semantic-search-with-model-and-query-enricher-template.json @@ -98,9 +98,6 @@ }, "mappings": { "properties": { - "id": { - "type": "text" - }, "${{text_embedding.field_map.output}}": { "type": "knn_vector", "dimension": "${{text_embedding.field_map.output.dimension}}", diff --git a/src/main/resources/substitutionTemplates/semantic-search-with-model-template.json b/src/main/resources/substitutionTemplates/semantic-search-with-model-template.json index 9bda5da1a..713fb472f 100644 --- a/src/main/resources/substitutionTemplates/semantic-search-with-model-template.json +++ b/src/main/resources/substitutionTemplates/semantic-search-with-model-template.json @@ -97,9 +97,6 @@ }, "mappings": { "properties": { - "id": { - "type": "text" - }, "${{text_embedding.field_map.output}}": { "type": "knn_vector", "dimension": "${{text_embedding.field_map.output.dimension}}", diff --git a/src/main/resources/substitutionTemplates/semantic-search-with-query-enricher-template.json b/src/main/resources/substitutionTemplates/semantic-search-with-query-enricher-template.json index 5ecca1bd4..c963c1912 100644 --- a/src/main/resources/substitutionTemplates/semantic-search-with-query-enricher-template.json +++ b/src/main/resources/substitutionTemplates/semantic-search-with-query-enricher-template.json @@ -66,9 +66,6 @@ }, "mappings": { "properties": { - "id": { - "type": "text" - }, "${{text_embedding.field_map.output}}": { "type": "knn_vector", "dimension": "${{text_embedding.field_map.output.dimension}}", diff --git a/src/main/resources/substitutionTemplates/semantic-search-with-reindex-template.json b/src/main/resources/substitutionTemplates/semantic-search-with-reindex-template.json new file mode 100644 index 000000000..6460eabdc --- /dev/null +++ b/src/main/resources/substitutionTemplates/semantic-search-with-reindex-template.json @@ -0,0 +1,135 @@ +{ + "name": "${{template.name}}", + "description": "${{template.description}}", + "use_case": "SEMANTIC_SEARCH", + "version": { + "template": "1.0.0", + "compatibility": [ + "2.12.0", + "3.0.0" + ] + }, + "workflows": { + "provision": { + "nodes": [ + { + "id": "create_connector", + "type": "create_connector", + "user_inputs": { + "name": "${{create_connector.name}}", + "description": "${{create_connector.description}}", + "version": "1", + "protocol": "${{create_connector.protocol}}", + "parameters": { + "endpoint": "${{create_connector.endpoint}}", + "model": "${{create_connector.model}}", + "input_type": "search_document", + "truncate": "END" + }, + "credential": { + "key": "${{create_connector.credential.key}}" + }, + "actions": [ + { + "action_type": "predict", + "method": "POST", + "url": "${{create_connector.actions.url}}", + "headers": { + "Authorization": "Bearer ${credential.key}", + "Request-Source": "unspecified:opensearch" + }, + "request_body": "${{create_connector.actions.request_body}}", + "pre_process_function": "${{create_connector.actions.pre_process_function}}", + "post_process_function": "${{create_connector.actions.post_process_function}}" + } + ] + } + }, + { + "id": "register_model", + "type": "register_remote_model", + "previous_node_inputs": { + "create_connector": "connector_id" + }, + "user_inputs": { + "name": "${{register_remote_model.name}}", + "function_name": "remote", + "description": "${{register_remote_model.description}}", + "deploy": true + } + }, + { + "id": "create_ingest_pipeline", + "type": "create_ingest_pipeline", + "previous_node_inputs": { + "register_model": "model_id" + }, + "user_inputs": { + "pipeline_id": "${{create_ingest_pipeline.pipeline_id}}", + "configurations": { + "description": "${{create_ingest_pipeline.description}}", + "processors": [ + { + "text_embedding": { + "model_id": "${{register_model.model_id}}", + "field_map": { + "${{text_embedding.field_map.input}}": "${{text_embedding.field_map.output}}" + } + } + } + ] + } + } + }, + { + "id": "create_index", + "type": "create_index", + "previous_node_inputs": { + "create_ingest_pipeline": "pipeline_id" + }, + "user_inputs": { + "index_name": "${{create_index.name}}", + "configurations": { + "settings": { + "index.knn": true, + "default_pipeline": "${{create_ingest_pipeline.pipeline_id}}", + "number_of_shards": "${{create_index.settings.number_of_shards}}" + }, + "mappings": { + "properties": { + "${{text_embedding.field_map.output}}": { + "type": "knn_vector", + "dimension": "${{text_embedding.field_map.output.dimension}}", + "method": { + "engine": "${{create_index.mappings.method.engine}}", + "space_type": "${{create_index.mappings.method.space_type}}", + "name": "${{create_index.mappings.method.name}}", + "parameters": {} + } + }, + "${{text_embedding.field_map.input}}": { + "type": "text" + } + } + } + } + } + }, + { + "id": "reindex", + "type": "reindex", + "previous_node_inputs": { + "create_index": "index_name" + }, + "user_inputs": { + "source_index": "${{reindex.source_index}}", + "destination_index": "${{create_index.name}}", + "refresh": false, + "requests_per_second": "${{reindex.requests_per_second}}", + "slices": "${{reindex.slices}}" + } + } + ] + } + } +} diff --git a/src/test/java/org/opensearch/flowframework/FlowFrameworkRestTestCase.java b/src/test/java/org/opensearch/flowframework/FlowFrameworkRestTestCase.java index 0684279a2..e190a42ca 100644 --- a/src/test/java/org/opensearch/flowframework/FlowFrameworkRestTestCase.java +++ b/src/test/java/org/opensearch/flowframework/FlowFrameworkRestTestCase.java @@ -43,6 +43,8 @@ import org.opensearch.flowframework.model.State; import org.opensearch.flowframework.model.Template; import org.opensearch.flowframework.model.WorkflowState; +import org.opensearch.flowframework.util.ParseUtils; +import org.opensearch.ml.repackage.com.google.common.collect.ImmutableList; import org.opensearch.test.rest.OpenSearchRestTestCase; import org.junit.After; import org.junit.Before; @@ -340,7 +342,7 @@ protected Response createWorkflow(RestClient client, Template template) throws E * @throws Exception if the request fails * @return a rest response */ - protected Response createWorkflowWithUseCase(RestClient client, String useCase, List params) throws Exception { + protected Response createWorkflowWithUseCaseWithNoValidation(RestClient client, String useCase, List params) throws Exception { StringBuilder sb = new StringBuilder(); for (String param : params) { @@ -360,6 +362,28 @@ protected Response createWorkflowWithUseCase(RestClient client, String useCase, ); } + /** + * Helper method to invoke the create workflow API with a use case and also the provision param as true + * @param client the rest client + * @param useCase the usecase to create + * @param defaults the defaults to override given through the request payload + * @throws Exception if the request fails + * @return a rest response + */ + protected Response createAndProvisionWorkflowWithUseCaseWithContent(RestClient client, String useCase, Map defaults) + throws Exception { + String payload = ParseUtils.parseArbitraryStringToObjectMapToString(defaults); + + return TestHelpers.makeRequest( + client, + "POST", + WORKFLOW_URI + "?provision=true&use_case=" + useCase, + Collections.emptyMap(), + payload, + null + ); + } + /** * Helper method to invoke the Create Workflow Rest Action with provision * @param client the rest client @@ -717,6 +741,52 @@ protected GetPipelineResponse getPipelines(String pipelineId) throws IOException } } + protected void ingestSingleDoc(String payload, String indexName) throws IOException { + try { + TestHelpers.makeRequest( + client(), + "PUT", + indexName + "/_doc/1", + null, + payload, + ImmutableList.of(new BasicHeader(HttpHeaders.USER_AGENT, "")) + ); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + protected SearchResponse neuralSearchRequest(String indexName, String modelId) throws IOException { + String searchRequest = + "{\"_source\":{\"excludes\":[\"passage_embedding\"]},\"query\":{\"neural\":{\"passage_embedding\":{\"query_text\":\"world\",\"k\":5,\"model_id\":\"" + + modelId + + "\"}}}}"; + try { + Response restSearchResponse = TestHelpers.makeRequest( + client(), + "POST", + indexName + "/_search", + null, + searchRequest, + ImmutableList.of(new BasicHeader(HttpHeaders.USER_AGENT, "")) + ); + // Parse entity content into SearchResponse + MediaType mediaType = MediaType.fromMediaType(restSearchResponse.getEntity().getContentType().getValue()); + try ( + XContentParser parser = mediaType.xContent() + .createParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + restSearchResponse.getEntity().getContent() + ) + ) { + return SearchResponse.fromXContent(parser); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + @SuppressWarnings("unchecked") protected List catPlugins() throws IOException { Response response = TestHelpers.makeRequest( diff --git a/src/test/java/org/opensearch/flowframework/rest/FlowFrameworkRestApiIT.java b/src/test/java/org/opensearch/flowframework/rest/FlowFrameworkRestApiIT.java index b587d4611..6eb89fb79 100644 --- a/src/test/java/org/opensearch/flowframework/rest/FlowFrameworkRestApiIT.java +++ b/src/test/java/org/opensearch/flowframework/rest/FlowFrameworkRestApiIT.java @@ -34,6 +34,7 @@ import java.time.Instant; import java.util.Collections; import java.util.EnumSet; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -429,7 +430,11 @@ public void testCreateAndProvisionIngestAndSearchPipeline() throws Exception { public void testDefaultCohereUseCase() throws Exception { // Hit Create Workflow API with original template - Response response = createWorkflowWithUseCase(client(), "cohere_embedding_model_deploy", List.of(CREATE_CONNECTOR_CREDENTIAL_KEY)); + Response response = createWorkflowWithUseCaseWithNoValidation( + client(), + "cohere_embedding_model_deploy", + List.of(CREATE_CONNECTOR_CREDENTIAL_KEY) + ); assertEquals(RestStatus.CREATED, TestHelpers.restStatus(response)); Map responseMap = entityAsMap(response); @@ -468,7 +473,7 @@ public void testDefaultSemanticSearchUseCaseWithFailureExpected() throws Excepti // Hit Create Workflow API with original template without required params ResponseException exception = expectThrows( ResponseException.class, - () -> createWorkflowWithUseCase(client(), "semantic_search", Collections.emptyList()) + () -> createWorkflowWithUseCaseWithNoValidation(client(), "semantic_search", Collections.emptyList()) ); assertTrue( exception.getMessage() @@ -476,7 +481,11 @@ public void testDefaultSemanticSearchUseCaseWithFailureExpected() throws Excepti ); // Pass in required params - Response response = createWorkflowWithUseCase(client(), "semantic_search", List.of(CREATE_INGEST_PIPELINE_MODEL_ID)); + Response response = createWorkflowWithUseCaseWithNoValidation( + client(), + "semantic_search", + List.of(CREATE_INGEST_PIPELINE_MODEL_ID) + ); assertEquals(RestStatus.CREATED, TestHelpers.restStatus(response)); Map responseMap = entityAsMap(response); @@ -502,7 +511,7 @@ public void testAllDefaultUseCasesCreation() throws Exception { .collect(Collectors.toSet()); for (String useCaseName : allUseCaseNames) { - Response response = createWorkflowWithUseCase( + Response response = createWorkflowWithUseCaseWithNoValidation( client(), useCaseName, DefaultUseCases.getRequiredParamsByUseCaseName(useCaseName) @@ -514,4 +523,71 @@ public void testAllDefaultUseCasesCreation() throws Exception { getAndAssertWorkflowStatus(client(), workflowId, State.NOT_STARTED, ProvisioningProgress.NOT_STARTED); } } + + public void testSemanticSearchWithLocalModelEndToEnd() throws Exception { + // Checking if plugins are part of the integration test cluster so we can continue with this test + List plugins = catPlugins(); + if (!plugins.contains("opensearch-knn") && plugins.contains("opensearch-neural-search")) { + return; + } + Map defaults = new HashMap<>(); + defaults.put("register_local_pretrained_model.name", "huggingface/sentence-transformers/all-MiniLM-L6-v2"); + defaults.put("register_local_pretrained_model.version", "1.0.1"); + defaults.put("text_embedding.field_map.output.dimension", 384); + + Response response = createAndProvisionWorkflowWithUseCaseWithContent(client(), "semantic_search_with_local_model", defaults); + assertEquals(RestStatus.CREATED, TestHelpers.restStatus(response)); + + Map responseMap = entityAsMap(response); + String workflowId = (String) responseMap.get(WORKFLOW_ID); + getAndAssertWorkflowStatus(client(), workflowId, State.PROVISIONING, ProvisioningProgress.IN_PROGRESS); + + // Wait until provisioning has completed successfully before attempting to retrieve created resources + List resourcesCreated = getResourcesCreated(client(), workflowId, 45); + + // This template should create 4 resources, registered model_id, deployed model_id, ingest pipeline, and index name + assertEquals(4, resourcesCreated.size()); + String modelId = resourcesCreated.get(1).resourceId(); + String indexName = resourcesCreated.get(3).resourceId(); + + // Short wait before ingesting data + Thread.sleep(30000); + + String docContent = "{\"passage_text\": \"Hello planet\"\n}"; + ingestSingleDoc(docContent, indexName); + // Short wait before neural search + Thread.sleep(500); + SearchResponse neuralSearchResponse = neuralSearchRequest(indexName, modelId); + assertEquals(neuralSearchResponse.getHits().getHits().length, 1); + Thread.sleep(500); + deleteIndex(indexName); + + // Hit Deprovision API + // By design, this may not completely deprovision the first time if it takes >2s to process removals + Response deprovisionResponse = deprovisionWorkflow(client(), workflowId); + try { + assertBusy( + () -> { getAndAssertWorkflowStatus(client(), workflowId, State.NOT_STARTED, ProvisioningProgress.NOT_STARTED); }, + 30, + TimeUnit.SECONDS + ); + } catch (ComparisonFailure e) { + // 202 return if still processing + assertEquals(RestStatus.ACCEPTED, TestHelpers.restStatus(deprovisionResponse)); + } + if (TestHelpers.restStatus(deprovisionResponse) == RestStatus.ACCEPTED) { + // Short wait before we try again + Thread.sleep(10000); + deprovisionResponse = deprovisionWorkflow(client(), workflowId); + assertBusy( + () -> { getAndAssertWorkflowStatus(client(), workflowId, State.NOT_STARTED, ProvisioningProgress.NOT_STARTED); }, + 30, + TimeUnit.SECONDS + ); + } + assertEquals(RestStatus.OK, TestHelpers.restStatus(deprovisionResponse)); + // Hit Delete API + Response deleteResponse = deleteWorkflow(client(), workflowId); + assertEquals(RestStatus.OK, TestHelpers.restStatus(deleteResponse)); + } }