opensearch-project · amitgalitz · Mar 18, 2024 · Mar 18, 2024
@@ -180,7 +180,6 @@ dependencies {
 
     // ZipArchive dependencies used for integration tests
     zipArchive group: 'org.opensearch.plugin', name:'opensearch-ml-plugin', version: "${opensearch_build}"
-
     secureIntegTestPluginArchive group: 'org.opensearch.plugin', name:'opensearch-security', version: "${opensearch_build}"
 
     configurations.all {

@@ -21,21 +21,79 @@ public enum DefaultUseCases {
     /** defaults file and substitution ready template for OpenAI embedding model */
     OPEN_AI_EMBEDDING_MODEL_DEPLOY(
         "open_ai_embedding_model_deploy",
-        "defaults/open-ai-embedding-defaults.json",
+        "defaults/openai-embedding-defaults.json",
         "substitutionTemplates/deploy-remote-model-template.json"
     ),
-    /** defaults file and substitution ready template for cohere embedding model */
+    /** defaults file and substitution ready template for Cohere embedding model */
     COHERE_EMBEDDING_MODEL_DEPLOY(
         "cohere-embedding_model_deploy",
         "defaults/cohere-embedding-defaults.json",
         "substitutionTemplates/deploy-remote-model-template-extra-params.json"
     ),
+    /** defaults file and substitution ready template for Bedrock Titan embedding model */
+    BEDROCK_TITAN_EMBEDDING_MODEL_DEPLOY(
+        "bedrock-titan-embedding_model_deploy",
+        "defaults/bedrock-titan-embedding-defaults.json",
+        "substitutionTemplates/deploy-remote-bedrock-model-template.json"
+    ),
+    /** defaults file and substitution ready template for Bedrock Titan multimodal embedding model */
+    BEDROCK_TITAN_MULTIMODAL_MODEL_DEPLOY(
+        "bedrock-titan-multimodal_model_deploy",
+        "defaults/bedrock-titan-multimodal-defaults.json",
+        "substitutionTemplates/deploy-remote-bedrock-model-template.json"
+    ),
+    /** defaults file and substitution ready template for Cohere chat model */
+    COHERE_CHAT_MODEL_DEPLOY(
+        "cohere-chat_model_deploy",
+        "defaults/cohere-chat-defaults.json",
+        "substitutionTemplates/deploy-remote-model-chat-template.json"
+    ),
+    /** defaults file and substitution ready template for OpenAI chat model */
+    OPENAI_CHAT_MODEL_DEPLOY(
+        "openai-chat_model_deploy",
+        "defaults/openai-chat-defaults.json",
+        "substitutionTemplates/deploy-remote-model-chat-template.json"
+    ),
     /** defaults file and substitution ready template for local neural sparse model and ingest pipeline*/
-    LOCAL_NEURAL_SPARSE_SEARCH(
-        "local_neural_sparse_search",
-        "defaults/local-sparse-search-defaults.json",
-        "substitutionTemplates/neural-sparse-local-template.json"
-    );
+    LOCAL_NEURAL_SPARSE_SEARCH_BI_ENCODER(
+        "local_neural_sparse_search_bi_encoder",
+        "defaults/local-sparse-search-biencoder-defaults.json",
+        "substitutionTemplates/neural-sparse-local-biencoder-template.json"
+    ),
+    /** defaults file and substitution ready template for semantic search, no model creation*/
+    SEMANTIC_SEARCH("semantic_search", "defaults/semantic-search-defaults.json", "substitutionTemplates/semantic-search-template.json"),
+    /** defaults file and substitution ready template for multimodal search, no model creation*/
+    MULTI_MODAL_SEARCH(
+        "multi_modal_search",
+        "defaults/multi-modal-search-defaults.json",
+        "substitutionTemplates/multi-modal-search-template.json"
+    ),
+    /** defaults file and substitution ready template for multimodal search, no model creation*/
+    MULTI_MODAL_SEARCH_WITH_BEDROCK_TITAN(
+        "multi_modal_search_with_bedrock_titan_multi_modal",
+        "defaults/multimodal-search-bedrock-titan-defaults.json",
+        "substitutionTemplates/multi-modal-search-with-bedrock-titan-template.json"
+    ),
+    /** defaults file and substitution ready template for semantic search with query enricher processor attached, no model creation*/
+    SEMANTIC_SEARCH_WITH_QUERY_ENRICHER(
+        "semantic_search_with_query_enricher",
+        "defaults/semantic-search-defaults.json",
+        "substitutionTemplates/semantic-search-with-query-enricher-template.json"
+    ),
+    /** defaults file and substitution ready template for semantic search with cohere embedding model*/
+    SEMANTIC_SEARCH_WITH_COHERE_EMBEDDING(
+        "semantic_search_with_cohere_embedding",
+        "defaults/cohere-embedding-semantic-search-defaults.json",
+        "substitutionTemplates/semantic-search-with-model-template.json"
+    ),
+    /** defaults file and substitution ready template for semantic search with query enricher processor attached and cohere embedding model*/
+    SEMANTIC_SEARCH_WITH_COHERE_EMBEDDING_AND_QUERY_ENRICHER(
+        "semantic_search_with_cohere_embedding_query_enricher",
+        "defaults/cohere-embedding-semantic-search-defaults.json",
+        "substitutionTemplates/semantic-search-with-model-and-query-enricher-template.json"
+    ),
+    /** defaults file and substitution ready template for hybrid search, no model creation*/
+    HYBRID_SEARCH("hybrid_search", "defaults/hybrid-search-defaults.json", "substitutionTemplates/hybrid-search-template.json");
 
     private final String useCaseName;
     private final String defaultsFile;

@@ -378,8 +378,11 @@ public static Object conditionallySubstitute(Object value, Map<String, WorkflowD
                     String regex = "\\$\\{\\{\\s*" + Pattern.quote(e.getKey()) + "\\s*\\}\\}";
                     String replacement = e.getValue();
 
-                    // Special handling for JSON strings that contain placeholders (connectors action)
-                    replacement = Matcher.quoteReplacement(replacement.replace("\"", "\\\""));
+                    // Correctly escape backslashes, newlines, and quotes for JSON compatibility
+                    replacement = replacement.replace("\\", "\\\\").replace("\"", "\\\"").replace("\n", "\\n");
+
+                    // Use Matcher.quoteReplacement to handle special replacement characters like $ and \ that weren't previously handled
+                    replacement = Matcher.quoteReplacement(replacement);
                     value = ((String) value).replaceAll(regex, replacement);
                 }
             }

@@ -84,7 +84,12 @@ public PlainActionFuture<WorkflowData> execute(
             String pipelineId = (String) inputs.get(PIPELINE_ID);
             String configurations = (String) inputs.get(CONFIGURATIONS);
 
-            byte[] byteArr = configurations.getBytes(StandardCharsets.UTF_8);
+            // Special case for processors that have arrays that need to have the quotes removed
+            // (e.g. "weights": "[0.7, 0.3]" -> "weights": [0.7, 0.3]
+            // Define a regular expression pattern to match stringified arrays
+            String transformedJsonString = configurations.replaceAll("\"\\[(.*?)]\"", "[$1]");
+
+            byte[] byteArr = transformedJsonString.getBytes(StandardCharsets.UTF_8);
             BytesReference configurationsBytes = new BytesArray(byteArr);
 
             String pipelineToBeCreated = this.getName();

@@ -160,6 +160,7 @@ public void onFailure(Exception e) {
                 parameters = getParameterMap(inputs.get(PARAMETERS_FIELD));
                 credentials = getStringToStringMap(inputs.get(CREDENTIAL_FIELD), CREDENTIAL_FIELD);
                 actions = getConnectorActionList(inputs.get(ACTIONS_FIELD));
+                // TODO: check for un-needed substitution? ${{create_connector. and remove field so we don't need almost duplicate templates
             } catch (IllegalArgumentException iae) {
                 logger.error("IllegalArgumentException in connector configuration", iae);
                 throw new FlowFrameworkException("IllegalArgumentException in connector configuration", RestStatus.BAD_REQUEST);

@@ -52,12 +52,12 @@ public RegisterLocalSparseEncodingModelStep(
 
     @Override
     protected Set<String> getRequiredKeys() {
-        return Set.of(NAME_FIELD, VERSION_FIELD, MODEL_FORMAT, FUNCTION_NAME, MODEL_CONTENT_HASH_VALUE, URL);
+        return Set.of(NAME_FIELD, VERSION_FIELD, MODEL_FORMAT);
     }
 
     @Override
     protected Set<String> getOptionalKeys() {
-        return Set.of(DESCRIPTION_FIELD, MODEL_GROUP_ID, DEPLOY_FIELD);
+        return Set.of(DESCRIPTION_FIELD, MODEL_GROUP_ID, DEPLOY_FIELD, MODEL_CONTENT_HASH_VALUE, URL, FUNCTION_NAME);
     }
 
     @Override

@@ -156,8 +156,8 @@ public enum WorkflowSteps {
         /** Register Local Sparse Encoding Model Step */
         REGISTER_LOCAL_SPARSE_ENCODING_MODEL(
             RegisterLocalSparseEncodingModelStep.NAME,
-            List.of(NAME_FIELD, VERSION_FIELD, MODEL_FORMAT, FUNCTION_NAME, MODEL_CONTENT_HASH_VALUE, URL),
-            List.of(MODEL_ID, REGISTER_MODEL_STATUS),
+            List.of(NAME_FIELD, VERSION_FIELD, MODEL_FORMAT),
+            List.of(MODEL_ID, REGISTER_MODEL_STATUS, FUNCTION_NAME, MODEL_CONTENT_HASH_VALUE, URL),
             List.of(OPENSEARCH_ML),
             TimeValue.timeValueSeconds(60)
         ),

@@ -0,0 +1,17 @@
+{
+    "template.name": "deploy-bedrock-titan-embedding-model",
+    "template.description": "Deploying Amazon Bedrock Titan embedding model ",
+    "create_connector.name": "Amazon Bedrock Connector: embedding",
+    "create_connector.description": "The connector to bedrock Titan embedding model",
+    "create_connector.region": "us-east-1",
+    "create_connector.endpoint": "api.openai.com",
+    "create_connector.credential.access_key": "123",
+    "create_connector.credential.secret_key": "123",
+    "create_connector.credential.session_token": "123",
+    "create_connector.actions.url": "https://bedrock-runtime.us-east-1.amazonaws.com/model/amazon.titan-embed-text-v1/invoke",
+    "create_connector.actions.request_body": "{ \"inputText\": \"${parameters.inputText}\" }",
+    "create_connector.actions.pre_process_function": "\n    StringBuilder builder = new StringBuilder();\n    builder.append(\"\\\"\");\n    String first = params.text_docs[0];\n    builder.append(first);\n    builder.append(\"\\\"\");\n    def parameters = \"{\" +\"\\\"inputText\\\":\" + builder + \"}\";\n    return  \"{\" +\"\\\"parameters\\\":\" + parameters + \"}\";",
+    "create_connector.actions.post_process_function": "\n      def name = \"sentence_embedding\";\n      def dataType = \"FLOAT32\";\n      if (params.embedding == null || params.embedding.length == 0) {\n        return params.message;\n      }\n      def shape = [params.embedding.length];\n      def json = \"{\" +\n                 \"\\\"name\\\":\\\"\" + name + \"\\\",\" +\n                 \"\\\"data_type\\\":\\\"\" + dataType + \"\\\",\" +\n                 \"\\\"shape\\\":\" + shape + \",\" +\n                 \"\\\"data\\\":\" + params.embedding +\n                 \"}\";\n      return json;\n    ",
+    "register_remote_model.name": "Bedrock embedding model",
+    "register_remote_model.description": "bedrock-embedding-model"
+}
@@ -0,0 +1,18 @@
+{
+    "template.name": "deploy-bedrock-titan-multimodal-embedding-model",
+    "template.description": "deploying Amazon Bedrock Titan multimodal embedding model ",
+    "create_connector.name": "Amazon Bedrock Connector: multi-modal embedding",
+    "create_connector.description": "The connector to bedrock Titan multi-modal embedding model",
+    "create_connector.region": "us-east-1",
+    "create_connector.input_docs_processed_step_size": 2,
+    "create_connector.endpoint": "api.openai.com",
+    "create_connector.credential.access_key": "123",
+    "create_connector.credential.secret_key": "123",
+    "create_connector.credential.session_token": "123",
+    "create_connector.actions.url": "https://bedrock-runtime.us-east-1.amazonaws.com/model/amazon.titan-embed-image-v1/invoke",
+    "create_connector.actions.request_body": "{ \"inputText\": \"${parameters.inputText:-null}\", \"inputImage\": \"${parameters.inputImage:-null}\" }",
+    "create_connector.actions.pre_process_function": "\n    StringBuilder parametersBuilder = new StringBuilder(\"{\");\n    if (params.text_docs.length > 0 && params.text_docs[0] != null) {\n      parametersBuilder.append(\"\\\"inputText\\\":\");\n      parametersBuilder.append(\"\\\"\");\n      parametersBuilder.append(params.text_docs[0]);\n      parametersBuilder.append(\"\\\"\");\n      \n      if (params.text_docs.length > 1 && params.text_docs[1] != null) {\n        parametersBuilder.append(\",\");\n      }\n    }\n    \n    \n    if (params.text_docs.length > 1 && params.text_docs[1] != null) {\n      parametersBuilder.append(\"\\\"inputImage\\\":\");\n      parametersBuilder.append(\"\\\"\");\n      parametersBuilder.append(params.text_docs[1]);\n      parametersBuilder.append(\"\\\"\");\n    }\n    parametersBuilder.append(\"}\");\n    \n    return  \"{\" +\"\\\"parameters\\\":\" + parametersBuilder + \"}\";",
+    "create_connector.actions.post_process_function": "\n      def name = \"sentence_embedding\";\n      def dataType = \"FLOAT32\";\n      if (params.embedding == null || params.embedding.length == 0) {\n          return null;\n      }\n      def shape = [params.embedding.length];\n      def json = \"{\" +\n                 \"\\\"name\\\":\\\"\" + name + \"\\\",\" +\n                 \"\\\"data_type\\\":\\\"\" + dataType + \"\\\",\" +\n                 \"\\\"shape\\\":\" + shape + \",\" +\n                 \"\\\"data\\\":\" + params.embedding +\n                 \"}\";\n      return json;\n    ",
+    "register_remote_model.name": "Bedrock multi-modal embedding model",
+    "register_remote_model.description": "bedrock-multi-modal-embedding-model"
+}
@@ -0,0 +1,14 @@
+{
+    "template.name": "deploy-cohere-chat-model",
+    "template.description": "deploying cohere chat model",
+    "create_connector.name": "Cohere Chat Model",
+    "create_connector.description": "The connector to Cohere's public chat API",
+    "create_connector.protocol": "http",
+    "create_connector.model": "command",
+    "create_connector.endpoint": "api.cohere.ai",
+    "create_connector.credential.key": "123",
+    "create_connector.actions.url": "https://api.cohere.ai/v1/chat",
+    "create_connector.actions.request_body": "{ \"message\": \"${parameters.message}\", \"model\": \"${parameters.model}\" }",
+    "register_remote_model.name": "Cohere chat model",
+    "register_remote_model.description": "cohere-chat-model"
+}
@@ -7,7 +7,6 @@
     "create_connector.model": "embed-english-v3.0",
     "create_connector.input_type": "search_document",
     "create_connector.truncate": "end",
-    "create_connector.endpoint": "api.openai.com",
     "create_connector.credential.key": "123",
     "create_connector.actions.url": "https://api.cohere.ai/v1/embed",
     "create_connector.actions.request_body": "{ \"texts\": ${parameters.texts}, \"truncate\": \"${parameters.truncate}\", \"model\": \"${parameters.model}\", \"input_type\": \"${parameters.input_type}\" }",

@@ -0,0 +1,28 @@
+{
+    "template.name": "semantic search with cohere embedding",
+    "template.description": "Setting up semantic search, with cohere embedding model",
+    "create_connector.name": "cohere-embedding-connector",
+    "create_connector.description": "The connector to Cohere's public embed API",
+    "create_connector.protocol": "http",
+    "create_connector.model": "embed-english-v3.0",
+    "create_connector.input_type": "search_document",
+    "create_connector.truncate": "end",
+    "create_connector.credential.key": "123",
+    "create_connector.actions.url": "https://api.cohere.ai/v1/embed",
+    "create_connector.actions.request_body": "{ \"texts\": ${parameters.texts}, \"truncate\": \"${parameters.truncate}\", \"model\": \"${parameters.model}\", \"input_type\": \"${parameters.input_type}\" }",
+    "create_connector.actions.pre_process_function": "connector.pre_process.cohere.embedding",
+    "create_connector.actions.post_process_function": "connector.post_process.cohere.embedding",
+    "register_remote_model.name": "Cohere english embed model",
+    "register_remote_model.description": "cohere-embedding-model",
+    "create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline",
+    "create_ingest_pipeline.description": "A text embedding pipeline",
+    "text_embedding.field_map.input": "passage_text",
+    "text_embedding.field_map.output": "passage_embedding",
+    "create_index.name": "my-nlp-index",
+    "create_index.settings.number_of_shards": "2",
+    "create_index.mappings.method.engine": "lucene",
+    "create_index.mappings.method.space_type": "l2",
+    "create_index.mappings.method.name": "hnsw",
+    "text_embedding.field_map.output.dimension": "1024",
+    "create_search_pipeline.pipeline_id": "default_model_pipeline"
+}
@@ -0,0 +1,19 @@
+{
+    "template.name": "hybrid-search",
+    "template.description": "Setting up hybrid search, ingest pipeline and index",
+    "create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline",
+    "create_ingest_pipeline.description": "A text embedding pipeline",
+    "create_ingest_pipeline.model_id": "123",
+    "text_embedding.field_map.input": "passage_text",
+    "text_embedding.field_map.output": "passage_embedding",
+    "create_index.name": "my-nlp-index",
+    "create_index.settings.number_of_shards": "2",
+    "create_index.mappings.method.engine": "lucene",
+    "create_index.mappings.method.space_type": "l2",
+    "create_index.mappings.method.name": "hnsw",
+    "text_embedding.field_map.output.dimension": "1024",
+    "create_search_pipeline.pipeline_id": "nlp-search-pipeline",
+    "normalization-processor.normalization.technique": "min_max",
+    "normalization-processor.combination.technique": "arithmetic_mean",
+    "normalization-processor.combination.parameters.weights": "[0.3, 0.7]"
+}
@@ -0,0 +1,14 @@
+{
+    "template.name": "local-model-neural-sparse-search",
+    "template.description": "setting up neural sparse search with local model",
+    "register_local_sparse_encoding_model.name": "amazon/neural-sparse/opensearch-neural-sparse-encoding-v1",
+    "register_local_sparse_encoding_model.description": "This is a neural sparse encoding model",
+    "register_local_sparse_encoding_model.model_format": "TORCH_SCRIPT",
+    "register_local_sparse_encoding_model.deploy": "true",
+    "register_local_sparse_encoding_model.version": "1.0.1",
+    "create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline-sparse",
+    "create_ingest_pipeline.description": "A sparse encoding ingest pipeline",
+    "create_ingest_pipeline.text_embedding.field_map.input": "passage_text",
+    "create_ingest_pipeline.text_embedding.field_map.output": "passage_embedding",
+    "create_index.name": "my-nlp-index"
+}