Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding new use cases #588

Merged
merged 1 commit into from
Mar 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,6 @@ dependencies {

// ZipArchive dependencies used for integration tests
zipArchive group: 'org.opensearch.plugin', name:'opensearch-ml-plugin', version: "${opensearch_build}"

amitgalitz marked this conversation as resolved.
Show resolved Hide resolved
secureIntegTestPluginArchive group: 'org.opensearch.plugin', name:'opensearch-security', version: "${opensearch_build}"

configurations.all {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,79 @@ public enum DefaultUseCases {
/** defaults file and substitution ready template for OpenAI embedding model */
OPEN_AI_EMBEDDING_MODEL_DEPLOY(
"open_ai_embedding_model_deploy",
"defaults/open-ai-embedding-defaults.json",
"defaults/openai-embedding-defaults.json",
"substitutionTemplates/deploy-remote-model-template.json"
),
/** defaults file and substitution ready template for cohere embedding model */
/** defaults file and substitution ready template for Cohere embedding model */
COHERE_EMBEDDING_MODEL_DEPLOY(
"cohere-embedding_model_deploy",
"defaults/cohere-embedding-defaults.json",
"substitutionTemplates/deploy-remote-model-template-extra-params.json"
),
/** defaults file and substitution ready template for Bedrock Titan embedding model */
BEDROCK_TITAN_EMBEDDING_MODEL_DEPLOY(
"bedrock-titan-embedding_model_deploy",
"defaults/bedrock-titan-embedding-defaults.json",
"substitutionTemplates/deploy-remote-bedrock-model-template.json"
),
/** defaults file and substitution ready template for Bedrock Titan multimodal embedding model */
BEDROCK_TITAN_MULTIMODAL_MODEL_DEPLOY(
"bedrock-titan-multimodal_model_deploy",
"defaults/bedrock-titan-multimodal-defaults.json",
"substitutionTemplates/deploy-remote-bedrock-model-template.json"
),
/** defaults file and substitution ready template for Cohere chat model */
COHERE_CHAT_MODEL_DEPLOY(
"cohere-chat_model_deploy",
"defaults/cohere-chat-defaults.json",
"substitutionTemplates/deploy-remote-model-chat-template.json"
),
/** defaults file and substitution ready template for OpenAI chat model */
OPENAI_CHAT_MODEL_DEPLOY(
"openai-chat_model_deploy",
"defaults/openai-chat-defaults.json",
"substitutionTemplates/deploy-remote-model-chat-template.json"
),
/** defaults file and substitution ready template for local neural sparse model and ingest pipeline*/
LOCAL_NEURAL_SPARSE_SEARCH(
"local_neural_sparse_search",
"defaults/local-sparse-search-defaults.json",
"substitutionTemplates/neural-sparse-local-template.json"
);
LOCAL_NEURAL_SPARSE_SEARCH_BI_ENCODER(
"local_neural_sparse_search_bi_encoder",
"defaults/local-sparse-search-biencoder-defaults.json",
"substitutionTemplates/neural-sparse-local-biencoder-template.json"
),
/** defaults file and substitution ready template for semantic search, no model creation*/
SEMANTIC_SEARCH("semantic_search", "defaults/semantic-search-defaults.json", "substitutionTemplates/semantic-search-template.json"),
/** defaults file and substitution ready template for multimodal search, no model creation*/
MULTI_MODAL_SEARCH(
"multi_modal_search",
"defaults/multi-modal-search-defaults.json",
"substitutionTemplates/multi-modal-search-template.json"
),
/** defaults file and substitution ready template for multimodal search, no model creation*/
MULTI_MODAL_SEARCH_WITH_BEDROCK_TITAN(
"multi_modal_search_with_bedrock_titan_multi_modal",
"defaults/multimodal-search-bedrock-titan-defaults.json",
"substitutionTemplates/multi-modal-search-with-bedrock-titan-template.json"
),
/** defaults file and substitution ready template for semantic search with query enricher processor attached, no model creation*/
SEMANTIC_SEARCH_WITH_QUERY_ENRICHER(
"semantic_search_with_query_enricher",
"defaults/semantic-search-defaults.json",
"substitutionTemplates/semantic-search-with-query-enricher-template.json"
),
/** defaults file and substitution ready template for semantic search with cohere embedding model*/
SEMANTIC_SEARCH_WITH_COHERE_EMBEDDING(
"semantic_search_with_cohere_embedding",
"defaults/cohere-embedding-semantic-search-defaults.json",
"substitutionTemplates/semantic-search-with-model-template.json"
),
/** defaults file and substitution ready template for semantic search with query enricher processor attached and cohere embedding model*/
SEMANTIC_SEARCH_WITH_COHERE_EMBEDDING_AND_QUERY_ENRICHER(
"semantic_search_with_cohere_embedding_query_enricher",
"defaults/cohere-embedding-semantic-search-defaults.json",
"substitutionTemplates/semantic-search-with-model-and-query-enricher-template.json"
),
/** defaults file and substitution ready template for hybrid search, no model creation*/
HYBRID_SEARCH("hybrid_search", "defaults/hybrid-search-defaults.json", "substitutionTemplates/hybrid-search-template.json");

private final String useCaseName;
private final String defaultsFile;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -378,8 +378,11 @@ public static Object conditionallySubstitute(Object value, Map<String, WorkflowD
String regex = "\\$\\{\\{\\s*" + Pattern.quote(e.getKey()) + "\\s*\\}\\}";
String replacement = e.getValue();

// Special handling for JSON strings that contain placeholders (connectors action)
replacement = Matcher.quoteReplacement(replacement.replace("\"", "\\\""));
// Correctly escape backslashes, newlines, and quotes for JSON compatibility
replacement = replacement.replace("\\", "\\\\").replace("\"", "\\\"").replace("\n", "\\n");

// Use Matcher.quoteReplacement to handle special replacement characters like $ and \ that weren't previously handled
replacement = Matcher.quoteReplacement(replacement);
value = ((String) value).replaceAll(regex, replacement);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,12 @@ public PlainActionFuture<WorkflowData> execute(
String pipelineId = (String) inputs.get(PIPELINE_ID);
String configurations = (String) inputs.get(CONFIGURATIONS);

byte[] byteArr = configurations.getBytes(StandardCharsets.UTF_8);
// Special case for processors that have arrays that need to have the quotes removed
// (e.g. "weights": "[0.7, 0.3]" -> "weights": [0.7, 0.3]
// Define a regular expression pattern to match stringified arrays
String transformedJsonString = configurations.replaceAll("\"\\[(.*?)]\"", "[$1]");

byte[] byteArr = transformedJsonString.getBytes(StandardCharsets.UTF_8);
BytesReference configurationsBytes = new BytesArray(byteArr);

String pipelineToBeCreated = this.getName();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ public void onFailure(Exception e) {
parameters = getParameterMap(inputs.get(PARAMETERS_FIELD));
credentials = getStringToStringMap(inputs.get(CREDENTIAL_FIELD), CREDENTIAL_FIELD);
actions = getConnectorActionList(inputs.get(ACTIONS_FIELD));
// TODO: check for un-needed substitution? ${{create_connector. and remove field so we don't need almost duplicate templates
} catch (IllegalArgumentException iae) {
logger.error("IllegalArgumentException in connector configuration", iae);
throw new FlowFrameworkException("IllegalArgumentException in connector configuration", RestStatus.BAD_REQUEST);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,12 @@ public RegisterLocalSparseEncodingModelStep(

@Override
protected Set<String> getRequiredKeys() {
return Set.of(NAME_FIELD, VERSION_FIELD, MODEL_FORMAT, FUNCTION_NAME, MODEL_CONTENT_HASH_VALUE, URL);
return Set.of(NAME_FIELD, VERSION_FIELD, MODEL_FORMAT);
}

@Override
protected Set<String> getOptionalKeys() {
return Set.of(DESCRIPTION_FIELD, MODEL_GROUP_ID, DEPLOY_FIELD);
return Set.of(DESCRIPTION_FIELD, MODEL_GROUP_ID, DEPLOY_FIELD, MODEL_CONTENT_HASH_VALUE, URL, FUNCTION_NAME);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,8 @@ public enum WorkflowSteps {
/** Register Local Sparse Encoding Model Step */
REGISTER_LOCAL_SPARSE_ENCODING_MODEL(
RegisterLocalSparseEncodingModelStep.NAME,
List.of(NAME_FIELD, VERSION_FIELD, MODEL_FORMAT, FUNCTION_NAME, MODEL_CONTENT_HASH_VALUE, URL),
List.of(MODEL_ID, REGISTER_MODEL_STATUS),
List.of(NAME_FIELD, VERSION_FIELD, MODEL_FORMAT),
List.of(MODEL_ID, REGISTER_MODEL_STATUS, FUNCTION_NAME, MODEL_CONTENT_HASH_VALUE, URL),
List.of(OPENSEARCH_ML),
TimeValue.timeValueSeconds(60)
),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"template.name": "deploy-bedrock-titan-embedding-model",
"template.description": "Deploying Amazon Bedrock Titan embedding model ",
"create_connector.name": "Amazon Bedrock Connector: embedding",
"create_connector.description": "The connector to bedrock Titan embedding model",
"create_connector.region": "us-east-1",
"create_connector.endpoint": "api.openai.com",
"create_connector.credential.access_key": "123",
joshpalis marked this conversation as resolved.
Show resolved Hide resolved
"create_connector.credential.secret_key": "123",
"create_connector.credential.session_token": "123",
"create_connector.actions.url": "https://bedrock-runtime.us-east-1.amazonaws.com/model/amazon.titan-embed-text-v1/invoke",
"create_connector.actions.request_body": "{ \"inputText\": \"${parameters.inputText}\" }",
"create_connector.actions.pre_process_function": "\n StringBuilder builder = new StringBuilder();\n builder.append(\"\\\"\");\n String first = params.text_docs[0];\n builder.append(first);\n builder.append(\"\\\"\");\n def parameters = \"{\" +\"\\\"inputText\\\":\" + builder + \"}\";\n return \"{\" +\"\\\"parameters\\\":\" + parameters + \"}\";",
"create_connector.actions.post_process_function": "\n def name = \"sentence_embedding\";\n def dataType = \"FLOAT32\";\n if (params.embedding == null || params.embedding.length == 0) {\n return params.message;\n }\n def shape = [params.embedding.length];\n def json = \"{\" +\n \"\\\"name\\\":\\\"\" + name + \"\\\",\" +\n \"\\\"data_type\\\":\\\"\" + dataType + \"\\\",\" +\n \"\\\"shape\\\":\" + shape + \",\" +\n \"\\\"data\\\":\" + params.embedding +\n \"}\";\n return json;\n ",
"register_remote_model.name": "Bedrock embedding model",
"register_remote_model.description": "bedrock-embedding-model"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"template.name": "deploy-bedrock-titan-multimodal-embedding-model",
"template.description": "deploying Amazon Bedrock Titan multimodal embedding model ",
"create_connector.name": "Amazon Bedrock Connector: multi-modal embedding",
"create_connector.description": "The connector to bedrock Titan multi-modal embedding model",
"create_connector.region": "us-east-1",
"create_connector.input_docs_processed_step_size": 2,
"create_connector.endpoint": "api.openai.com",
"create_connector.credential.access_key": "123",
"create_connector.credential.secret_key": "123",
"create_connector.credential.session_token": "123",
"create_connector.actions.url": "https://bedrock-runtime.us-east-1.amazonaws.com/model/amazon.titan-embed-image-v1/invoke",
"create_connector.actions.request_body": "{ \"inputText\": \"${parameters.inputText:-null}\", \"inputImage\": \"${parameters.inputImage:-null}\" }",
"create_connector.actions.pre_process_function": "\n StringBuilder parametersBuilder = new StringBuilder(\"{\");\n if (params.text_docs.length > 0 && params.text_docs[0] != null) {\n parametersBuilder.append(\"\\\"inputText\\\":\");\n parametersBuilder.append(\"\\\"\");\n parametersBuilder.append(params.text_docs[0]);\n parametersBuilder.append(\"\\\"\");\n \n if (params.text_docs.length > 1 && params.text_docs[1] != null) {\n parametersBuilder.append(\",\");\n }\n }\n \n \n if (params.text_docs.length > 1 && params.text_docs[1] != null) {\n parametersBuilder.append(\"\\\"inputImage\\\":\");\n parametersBuilder.append(\"\\\"\");\n parametersBuilder.append(params.text_docs[1]);\n parametersBuilder.append(\"\\\"\");\n }\n parametersBuilder.append(\"}\");\n \n return \"{\" +\"\\\"parameters\\\":\" + parametersBuilder + \"}\";",
"create_connector.actions.post_process_function": "\n def name = \"sentence_embedding\";\n def dataType = \"FLOAT32\";\n if (params.embedding == null || params.embedding.length == 0) {\n return null;\n }\n def shape = [params.embedding.length];\n def json = \"{\" +\n \"\\\"name\\\":\\\"\" + name + \"\\\",\" +\n \"\\\"data_type\\\":\\\"\" + dataType + \"\\\",\" +\n \"\\\"shape\\\":\" + shape + \",\" +\n \"\\\"data\\\":\" + params.embedding +\n \"}\";\n return json;\n ",
"register_remote_model.name": "Bedrock multi-modal embedding model",
"register_remote_model.description": "bedrock-multi-modal-embedding-model"
}
14 changes: 14 additions & 0 deletions src/main/resources/defaults/cohere-chat-defaults.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"template.name": "deploy-cohere-chat-model",
"template.description": "deploying cohere chat model",
"create_connector.name": "Cohere Chat Model",
"create_connector.description": "The connector to Cohere's public chat API",
"create_connector.protocol": "http",
"create_connector.model": "command",
"create_connector.endpoint": "api.cohere.ai",
"create_connector.credential.key": "123",
joshpalis marked this conversation as resolved.
Show resolved Hide resolved
"create_connector.actions.url": "https://api.cohere.ai/v1/chat",
"create_connector.actions.request_body": "{ \"message\": \"${parameters.message}\", \"model\": \"${parameters.model}\" }",
"register_remote_model.name": "Cohere chat model",
"register_remote_model.description": "cohere-chat-model"
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
"create_connector.model": "embed-english-v3.0",
"create_connector.input_type": "search_document",
"create_connector.truncate": "end",
"create_connector.endpoint": "api.openai.com",
"create_connector.credential.key": "123",
"create_connector.actions.url": "https://api.cohere.ai/v1/embed",
"create_connector.actions.request_body": "{ \"texts\": ${parameters.texts}, \"truncate\": \"${parameters.truncate}\", \"model\": \"${parameters.model}\", \"input_type\": \"${parameters.input_type}\" }",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"template.name": "semantic search with cohere embedding",
"template.description": "Setting up semantic search, with cohere embedding model",
"create_connector.name": "cohere-embedding-connector",
"create_connector.description": "The connector to Cohere's public embed API",
"create_connector.protocol": "http",
"create_connector.model": "embed-english-v3.0",
"create_connector.input_type": "search_document",
"create_connector.truncate": "end",
"create_connector.credential.key": "123",
"create_connector.actions.url": "https://api.cohere.ai/v1/embed",
"create_connector.actions.request_body": "{ \"texts\": ${parameters.texts}, \"truncate\": \"${parameters.truncate}\", \"model\": \"${parameters.model}\", \"input_type\": \"${parameters.input_type}\" }",
"create_connector.actions.pre_process_function": "connector.pre_process.cohere.embedding",
"create_connector.actions.post_process_function": "connector.post_process.cohere.embedding",
"register_remote_model.name": "Cohere english embed model",
"register_remote_model.description": "cohere-embedding-model",
"create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline",
"create_ingest_pipeline.description": "A text embedding pipeline",
"text_embedding.field_map.input": "passage_text",
"text_embedding.field_map.output": "passage_embedding",
"create_index.name": "my-nlp-index",
"create_index.settings.number_of_shards": "2",
"create_index.mappings.method.engine": "lucene",
"create_index.mappings.method.space_type": "l2",
"create_index.mappings.method.name": "hnsw",
"text_embedding.field_map.output.dimension": "1024",
"create_search_pipeline.pipeline_id": "default_model_pipeline"
}
19 changes: 19 additions & 0 deletions src/main/resources/defaults/hybrid-search-defaults.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"template.name": "hybrid-search",
"template.description": "Setting up hybrid search, ingest pipeline and index",
"create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline",
"create_ingest_pipeline.description": "A text embedding pipeline",
"create_ingest_pipeline.model_id": "123",
"text_embedding.field_map.input": "passage_text",
"text_embedding.field_map.output": "passage_embedding",
"create_index.name": "my-nlp-index",
"create_index.settings.number_of_shards": "2",
"create_index.mappings.method.engine": "lucene",
"create_index.mappings.method.space_type": "l2",
"create_index.mappings.method.name": "hnsw",
"text_embedding.field_map.output.dimension": "1024",
"create_search_pipeline.pipeline_id": "nlp-search-pipeline",
"normalization-processor.normalization.technique": "min_max",
"normalization-processor.combination.technique": "arithmetic_mean",
"normalization-processor.combination.parameters.weights": "[0.3, 0.7]"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"template.name": "local-model-neural-sparse-search",
"template.description": "setting up neural sparse search with local model",
"register_local_sparse_encoding_model.name": "amazon/neural-sparse/opensearch-neural-sparse-encoding-v1",
"register_local_sparse_encoding_model.description": "This is a neural sparse encoding model",
"register_local_sparse_encoding_model.model_format": "TORCH_SCRIPT",
"register_local_sparse_encoding_model.deploy": "true",
"register_local_sparse_encoding_model.version": "1.0.1",
"create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline-sparse",
"create_ingest_pipeline.description": "A sparse encoding ingest pipeline",
"create_ingest_pipeline.text_embedding.field_map.input": "passage_text",
"create_ingest_pipeline.text_embedding.field_map.output": "passage_embedding",
"create_index.name": "my-nlp-index"
}
17 changes: 0 additions & 17 deletions src/main/resources/defaults/local-sparse-search-defaults.json

This file was deleted.

Loading
Loading