Skip to content

Commit

Permalink
adding new use cases
Browse files Browse the repository at this point in the history
Signed-off-by: Amit Galitzky <[email protected]>
  • Loading branch information
amitgalitz committed Mar 18, 2024
1 parent b148eb5 commit c0efddb
Show file tree
Hide file tree
Showing 38 changed files with 1,064 additions and 181 deletions.
1 change: 0 additions & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,6 @@ dependencies {

// ZipArchive dependencies used for integration tests
zipArchive group: 'org.opensearch.plugin', name:'opensearch-ml-plugin', version: "${opensearch_build}"

secureIntegTestPluginArchive group: 'org.opensearch.plugin', name:'opensearch-security', version: "${opensearch_build}"

configurations.all {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,79 @@ public enum DefaultUseCases {
/** defaults file and substitution ready template for OpenAI embedding model */
OPEN_AI_EMBEDDING_MODEL_DEPLOY(
"open_ai_embedding_model_deploy",
"defaults/open-ai-embedding-defaults.json",
"defaults/openai-embedding-defaults.json",
"substitutionTemplates/deploy-remote-model-template.json"
),
/** defaults file and substitution ready template for cohere embedding model */
/** defaults file and substitution ready template for Cohere embedding model */
COHERE_EMBEDDING_MODEL_DEPLOY(
"cohere-embedding_model_deploy",
"defaults/cohere-embedding-defaults.json",
"substitutionTemplates/deploy-remote-model-template-extra-params.json"
),
/** defaults file and substitution ready template for Bedrock Titan embedding model */
BEDROCK_TITAN_EMBEDDING_MODEL_DEPLOY(
"bedrock-titan-embedding_model_deploy",
"defaults/bedrock-titan-embedding-defaults.json",
"substitutionTemplates/deploy-remote-bedrock-model-template.json"
),
/** defaults file and substitution ready template for Bedrock Titan multimodal embedding model */
BEDROCK_TITAN_MULTIMODAL_MODEL_DEPLOY(
"bedrock-titan-multimodal_model_deploy",
"defaults/bedrock-titan-multimodal-defaults.json",
"substitutionTemplates/deploy-remote-bedrock-model-template.json"
),
/** defaults file and substitution ready template for Cohere chat model */
COHERE_CHAT_MODEL_DEPLOY(
"cohere-chat_model_deploy",
"defaults/cohere-chat-defaults.json",
"substitutionTemplates/deploy-remote-model-chat-template.json"
),
/** defaults file and substitution ready template for OpenAI chat model */
OPENAI_CHAT_MODEL_DEPLOY(
"openai-chat_model_deploy",
"defaults/openai-chat-defaults.json",
"substitutionTemplates/deploy-remote-model-chat-template.json"
),
/** defaults file and substitution ready template for local neural sparse model and ingest pipeline*/
LOCAL_NEURAL_SPARSE_SEARCH(
"local_neural_sparse_search",
"defaults/local-sparse-search-defaults.json",
"substitutionTemplates/neural-sparse-local-template.json"
);
LOCAL_NEURAL_SPARSE_SEARCH_BI_ENCODER(
"local_neural_sparse_search_bi_encoder",
"defaults/local-sparse-search-biencoder-defaults.json",
"substitutionTemplates/neural-sparse-local-biencoder-template.json"
),
/** defaults file and substitution ready template for semantic search, no model creation*/
SEMANTIC_SEARCH("semantic_search", "defaults/semantic-search-defaults.json", "substitutionTemplates/semantic-search-template.json"),
/** defaults file and substitution ready template for multimodal search, no model creation*/
MULTI_MODAL_SEARCH(
"multi_modal_search",
"defaults/multi-modal-search-defaults.json",
"substitutionTemplates/multi-modal-search-template.json"
),
/** defaults file and substitution ready template for multimodal search, no model creation*/
MULTI_MODAL_SEARCH_WITH_BEDROCK_TITAN(
"multi_modal_search_with_bedrock_titan_multi_modal",
"defaults/multimodal-search-bedrock-titan-defaults.json",
"substitutionTemplates/multi-modal-search-with-bedrock-titan-template.json"
),
/** defaults file and substitution ready template for semantic search with query enricher processor attached, no model creation*/
SEMANTIC_SEARCH_WITH_QUERY_ENRICHER(
"semantic_search_with_query_enricher",
"defaults/semantic-search-defaults.json",
"substitutionTemplates/semantic-search-with-query-enricher-template.json"
),
/** defaults file and substitution ready template for semantic search with cohere embedding model*/
SEMANTIC_SEARCH_WITH_COHERE_EMBEDDING(
"semantic_search_with_cohere_embedding",
"defaults/cohere-embedding-semantic-search-defaults.json",
"substitutionTemplates/semantic-search-with-model-template.json"
),
/** defaults file and substitution ready template for semantic search with query enricher processor attached and cohere embedding model*/
SEMANTIC_SEARCH_WITH_COHERE_EMBEDDING_AND_QUERY_ENRICHER(
"semantic_search_with_cohere_embedding_query_enricher",
"defaults/cohere-embedding-semantic-search-defaults.json",
"substitutionTemplates/semantic-search-with-model-and-query-enricher-template.json"
),
/** defaults file and substitution ready template for hybrid search, no model creation*/
HYBRID_SEARCH("hybrid_search", "defaults/hybrid-search-defaults.json", "substitutionTemplates/hybrid-search-template.json");

private final String useCaseName;
private final String defaultsFile;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -378,8 +378,11 @@ public static Object conditionallySubstitute(Object value, Map<String, WorkflowD
String regex = "\\$\\{\\{\\s*" + Pattern.quote(e.getKey()) + "\\s*\\}\\}";
String replacement = e.getValue();

// Special handling for JSON strings that contain placeholders (connectors action)
replacement = Matcher.quoteReplacement(replacement.replace("\"", "\\\""));
// Correctly escape backslashes, newlines, and quotes for JSON compatibility
replacement = replacement.replace("\\", "\\\\").replace("\"", "\\\"").replace("\n", "\\n");

// Use Matcher.quoteReplacement to handle special replacement characters like $ and \ that weren't previously handled
replacement = Matcher.quoteReplacement(replacement);
value = ((String) value).replaceAll(regex, replacement);
}
}
Expand Down Expand Up @@ -407,6 +410,7 @@ public static String parseArbitraryStringToObjectMapToString(Map<String, Object>
*/
public static Map<String, String> parseJsonFileToStringToStringMap(String path) throws IOException {
String jsonContent = resourceToString(path);
logger.info("jsonContent: " + jsonContent);
Map<String, String> mappedJsonFile = mapper.readValue(jsonContent, Map.class);
return mappedJsonFile;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,12 @@ public PlainActionFuture<WorkflowData> execute(
String pipelineId = (String) inputs.get(PIPELINE_ID);
String configurations = (String) inputs.get(CONFIGURATIONS);

byte[] byteArr = configurations.getBytes(StandardCharsets.UTF_8);
// Special case for processors that have arrays that need to have the quotes removed
// (e.g. "weights": "[0.7, 0.3]" -> "weights": [0.7, 0.3]
// Define a regular expression pattern to match stringified arrays
String transformedJsonString = configurations.replaceAll("\"\\[(.*?)]\"", "[$1]");

byte[] byteArr = transformedJsonString.getBytes(StandardCharsets.UTF_8);
BytesReference configurationsBytes = new BytesArray(byteArr);

String pipelineToBeCreated = this.getName();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ public void onFailure(Exception e) {
parameters = getParameterMap(inputs.get(PARAMETERS_FIELD));
credentials = getStringToStringMap(inputs.get(CREDENTIAL_FIELD), CREDENTIAL_FIELD);
actions = getConnectorActionList(inputs.get(ACTIONS_FIELD));
// TODO: check for un-needed substitution? ${{create_connector. and remove field so we don't need almost duplicate templates
} catch (IllegalArgumentException iae) {
logger.error("IllegalArgumentException in connector configuration", iae);
throw new FlowFrameworkException("IllegalArgumentException in connector configuration", RestStatus.BAD_REQUEST);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,12 @@ public RegisterLocalSparseEncodingModelStep(

@Override
protected Set<String> getRequiredKeys() {
return Set.of(NAME_FIELD, VERSION_FIELD, MODEL_FORMAT, FUNCTION_NAME, MODEL_CONTENT_HASH_VALUE, URL);
return Set.of(NAME_FIELD, VERSION_FIELD, MODEL_FORMAT);
}

@Override
protected Set<String> getOptionalKeys() {
return Set.of(DESCRIPTION_FIELD, MODEL_GROUP_ID, DEPLOY_FIELD);
return Set.of(DESCRIPTION_FIELD, MODEL_GROUP_ID, DEPLOY_FIELD, MODEL_CONTENT_HASH_VALUE, URL, FUNCTION_NAME);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,8 @@ public enum WorkflowSteps {
/** Register Local Sparse Encoding Model Step */
REGISTER_LOCAL_SPARSE_ENCODING_MODEL(
RegisterLocalSparseEncodingModelStep.NAME,
List.of(NAME_FIELD, VERSION_FIELD, MODEL_FORMAT, FUNCTION_NAME, MODEL_CONTENT_HASH_VALUE, URL),
List.of(MODEL_ID, REGISTER_MODEL_STATUS),
List.of(NAME_FIELD, VERSION_FIELD, MODEL_FORMAT),
List.of(MODEL_ID, REGISTER_MODEL_STATUS, FUNCTION_NAME, MODEL_CONTENT_HASH_VALUE, URL),
List.of(OPENSEARCH_ML),
TimeValue.timeValueSeconds(60)
),
Expand Down
17 changes: 17 additions & 0 deletions src/main/resources/defaults/bedrock-titan-embedding-defaults.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"template.name": "deploy-bedrock-titan-embedding-model",
"template.description": "Deploying Amazon Bedrock Titan embedding model ",
"create_connector.name": "Amazon Bedrock Connector: embedding",
"create_connector.description": "The connector to bedrock Titan embedding model",
"create_connector.region": "us-east-1",
"create_connector.endpoint": "api.openai.com",
"create_connector.credential.access_key": "123",
"create_connector.credential.secret_key": "123",
"create_connector.credential.session_token": "123",
"create_connector.actions.url": "https://bedrock-runtime.us-east-1.amazonaws.com/model/amazon.titan-embed-text-v1/invoke",
"create_connector.actions.request_body": "{ \"inputText\": \"${parameters.inputText}\" }",
"create_connector.actions.pre_process_function": "\n StringBuilder builder = new StringBuilder();\n builder.append(\"\\\"\");\n String first = params.text_docs[0];\n builder.append(first);\n builder.append(\"\\\"\");\n def parameters = \"{\" +\"\\\"inputText\\\":\" + builder + \"}\";\n return \"{\" +\"\\\"parameters\\\":\" + parameters + \"}\";",
"create_connector.actions.post_process_function": "\n def name = \"sentence_embedding\";\n def dataType = \"FLOAT32\";\n if (params.embedding == null || params.embedding.length == 0) {\n return params.message;\n }\n def shape = [params.embedding.length];\n def json = \"{\" +\n \"\\\"name\\\":\\\"\" + name + \"\\\",\" +\n \"\\\"data_type\\\":\\\"\" + dataType + \"\\\",\" +\n \"\\\"shape\\\":\" + shape + \",\" +\n \"\\\"data\\\":\" + params.embedding +\n \"}\";\n return json;\n ",
"register_remote_model.name": "Bedrock embedding model",
"register_remote_model.description": "bedrock-embedding-model"
}
18 changes: 18 additions & 0 deletions src/main/resources/defaults/bedrock-titan-mulitmodal-defaults.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"template.name": "deploy-bedrock-titan-multimodal-embedding-model",
"template.description": "deploying Amazon Bedrock Titan multimodal embedding model ",
"create_connector.name": "Amazon Bedrock Connector: multi-modal embedding",
"create_connector.description": "The connector to bedrock Titan multi-modal embedding model",
"create_connector.region": "us-east-1",
"create_connector.input_docs_processed_step_size": 2,
"create_connector.endpoint": "api.openai.com",
"create_connector.credential.access_key": "123",
"create_connector.credential.secret_key": "123",
"create_connector.credential.session_token": "123",
"create_connector.actions.url": "https://bedrock-runtime.us-east-1.amazonaws.com/model/amazon.titan-embed-image-v1/invoke",
"create_connector.actions.request_body": "{ \"inputText\": \"${parameters.inputText:-null}\", \"inputImage\": \"${parameters.inputImage:-null}\" }",
"create_connector.actions.pre_process_function": "\n StringBuilder parametersBuilder = new StringBuilder(\"{\");\n if (params.text_docs.length > 0 && params.text_docs[0] != null) {\n parametersBuilder.append(\"\\\"inputText\\\":\");\n parametersBuilder.append(\"\\\"\");\n parametersBuilder.append(params.text_docs[0]);\n parametersBuilder.append(\"\\\"\");\n \n if (params.text_docs.length > 1 && params.text_docs[1] != null) {\n parametersBuilder.append(\",\");\n }\n }\n \n \n if (params.text_docs.length > 1 && params.text_docs[1] != null) {\n parametersBuilder.append(\"\\\"inputImage\\\":\");\n parametersBuilder.append(\"\\\"\");\n parametersBuilder.append(params.text_docs[1]);\n parametersBuilder.append(\"\\\"\");\n }\n parametersBuilder.append(\"}\");\n \n return \"{\" +\"\\\"parameters\\\":\" + parametersBuilder + \"}\";",
"create_connector.actions.post_process_function": "\n def name = \"sentence_embedding\";\n def dataType = \"FLOAT32\";\n if (params.embedding == null || params.embedding.length == 0) {\n return null;\n }\n def shape = [params.embedding.length];\n def json = \"{\" +\n \"\\\"name\\\":\\\"\" + name + \"\\\",\" +\n \"\\\"data_type\\\":\\\"\" + dataType + \"\\\",\" +\n \"\\\"shape\\\":\" + shape + \",\" +\n \"\\\"data\\\":\" + params.embedding +\n \"}\";\n return json;\n ",
"register_remote_model.name": "Bedrock multi-modal embedding model",
"register_remote_model.description": "bedrock-multi-modal-embedding-model"
}
14 changes: 14 additions & 0 deletions src/main/resources/defaults/cohere-chat-defaults.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"template.name": "deploy-cohere-chat-model",
"template.description": "deploying cohere chat model",
"create_connector.name": "Cohere Chat Model",
"create_connector.description": "The connector to Cohere's public chat API",
"create_connector.protocol": "http",
"create_connector.model": "command",
"create_connector.endpoint": "api.cohere.ai",
"create_connector.credential.key": "123",
"create_connector.actions.url": "https://api.cohere.ai/v1/chat",
"create_connector.actions.request_body": "{ \"message\": \"${parameters.message}\", \"model\": \"${parameters.model}\" }",
"register_remote_model.name": "Cohere chat model",
"register_remote_model.description": "cohere-chat-model"
}
1 change: 0 additions & 1 deletion src/main/resources/defaults/cohere-embedding-defaults.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
"create_connector.model": "embed-english-v3.0",
"create_connector.input_type": "search_document",
"create_connector.truncate": "end",
"create_connector.endpoint": "api.openai.com",
"create_connector.credential.key": "123",
"create_connector.actions.url": "https://api.cohere.ai/v1/embed",
"create_connector.actions.request_body": "{ \"texts\": ${parameters.texts}, \"truncate\": \"${parameters.truncate}\", \"model\": \"${parameters.model}\", \"input_type\": \"${parameters.input_type}\" }",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"template.name": "semantic search with cohere embedding",
"template.description": "Setting up semantic search, with cohere embedding model",
"create_connector.name": "cohere-embedding-connector",
"create_connector.description": "The connector to Cohere's public embed API",
"create_connector.protocol": "http",
"create_connector.model": "embed-english-v3.0",
"create_connector.input_type": "search_document",
"create_connector.truncate": "end",
"create_connector.credential.key": "123",
"create_connector.actions.url": "https://api.cohere.ai/v1/embed",
"create_connector.actions.request_body": "{ \"texts\": ${parameters.texts}, \"truncate\": \"${parameters.truncate}\", \"model\": \"${parameters.model}\", \"input_type\": \"${parameters.input_type}\" }",
"create_connector.actions.pre_process_function": "connector.pre_process.cohere.embedding",
"create_connector.actions.post_process_function": "connector.post_process.cohere.embedding",
"register_remote_model.name": "Cohere english embed model",
"register_remote_model.description": "cohere-embedding-model",
"create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline",
"create_ingest_pipeline.description": "A text embedding pipeline",
"text_embedding.field_map.input": "passage_text",
"text_embedding.field_map.output": "passage_embedding",
"create_index.name": "my-nlp-index",
"create_index.settings.number_of_shards": "2",
"create_index.mappings.method.engine": "lucene",
"create_index.mappings.method.space_type": "l2",
"create_index.mappings.method.name": "hnsw",
"text_embedding.field_map.output.dimension": "1024",
"create_search_pipeline.pipeline_id": "default_model_pipeline"
}
19 changes: 19 additions & 0 deletions src/main/resources/defaults/hybrid-search-defaults.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"template.name": "hybrid-search",
"template.description": "Setting up hybrid search, ingest pipeline and index",
"create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline",
"create_ingest_pipeline.description": "A text embedding pipeline",
"create_ingest_pipeline.model_id": "123",
"text_embedding.field_map.input": "passage_text",
"text_embedding.field_map.output": "passage_embedding",
"create_index.name": "my-nlp-index",
"create_index.settings.number_of_shards": "2",
"create_index.mappings.method.engine": "lucene",
"create_index.mappings.method.space_type": "l2",
"create_index.mappings.method.name": "hnsw",
"text_embedding.field_map.output.dimension": "1024",
"create_search_pipeline.pipeline_id": "nlp-search-pipeline",
"normalization-processor.normalization.technique": "min_max",
"normalization-processor.combination.technique": "arithmetic_mean",
"normalization-processor.combination.parameters.weights": "[0.3, 0.7]"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"template.name": "local-model-neural-sparse-search",
"template.description": "setting up neural sparse search with local model",
"register_local_sparse_encoding_model.name": "amazon/neural-sparse/opensearch-neural-sparse-encoding-v1",
"register_local_sparse_encoding_model.description": "This is a neural sparse encoding model",
"register_local_sparse_encoding_model.model_format": "TORCH_SCRIPT",
"register_local_sparse_encoding_model.deploy": "true",
"register_local_sparse_encoding_model.version": "1.0.1",
"create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline-sparse",
"create_ingest_pipeline.description": "A sparse encoding ingest pipeline",
"create_ingest_pipeline.text_embedding.field_map.input": "passage_text",
"create_ingest_pipeline.text_embedding.field_map.output": "passage_embedding",
"create_index.name": "my-nlp-index"
}
Loading

0 comments on commit c0efddb

Please sign in to comment.