Skip to content

Commit

Permalink
adding pretrained model templates
Browse files Browse the repository at this point in the history
Signed-off-by: Amit Galitzky <[email protected]>
  • Loading branch information
amitgalitz committed Jun 6, 2024
1 parent 13b32f1 commit 7ce4099
Show file tree
Hide file tree
Showing 19 changed files with 413 additions and 41 deletions.
2 changes: 2 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,8 @@ dependencies {

// ZipArchive dependencies used for integration tests
zipArchive group: 'org.opensearch.plugin', name:'opensearch-ml-plugin', version: "${opensearch_build}"
zipArchive group: 'org.opensearch.plugin', name:'opensearch-knn', version: "${opensearch_build}"
zipArchive group: 'org.opensearch.plugin', name:'neural-search', version: "${opensearch_build}"
secureIntegTestPluginArchive group: 'org.opensearch.plugin', name:'opensearch-security', version: "${opensearch_build}"

configurations.all {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,21 @@ public enum DefaultUseCases {
"defaults/conversational-search-defaults.json",
"substitutionTemplates/conversational-search-with-cohere-model-template.json",
List.of(CREATE_CONNECTOR_CREDENTIAL_KEY)
),
/** defaults file and substitution ready template for semantic search with a local pretrained model*/
SEMANTIC_SEARCH_WITH_LOCAL_MODEL(
"semantic_search_with_local_model",
"defaults/semantic-search-with-local-model-defaults.json",
"substitutionTemplates/semantic-search-with-local-model-template.json",
Collections.emptyList()

),
/** defaults file and substitution ready template for hybrid search with a local pretrained model*/
HYBRID_SEARCH_WITH_LOCAL_MODEL(
"hybrid_search_with_local_model",
"defaults/hybrid-search-with-local-model-defaults.json",
"substitutionTemplates/hybrid-search-with-local-model-template.json",
Collections.emptyList()
);

private final String useCaseName;
Expand Down
3 changes: 1 addition & 2 deletions src/main/resources/defaults/hybrid-search-defaults.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,5 @@
"text_embedding.field_map.output.dimension": "1024",
"create_search_pipeline.pipeline_id": "nlp-search-pipeline",
"normalization-processor.normalization.technique": "min_max",
"normalization-processor.combination.technique": "arithmetic_mean",
"normalization-processor.combination.parameters.weights": "[0.3, 0.7]"
"normalization-processor.combination.technique": "arithmetic_mean"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"template.name": "hybrid-search",
"template.description": "Setting up hybrid search, ingest pipeline and index",
"register_local_pretrained_model.name": "huggingface/sentence-transformers/msmarco-distilbert-base-tas-b",
"register_local_pretrained_model.description": "This is a sentence transformer model",
"register_local_pretrained_model.model_format": "TORCH_SCRIPT",
"register_local_pretrained_model.deploy": "true",
"register_local_pretrained_model.version": "1.0.2",
"create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline",
"create_ingest_pipeline.description": "A text embedding pipeline",
"create_ingest_pipeline.model_id": "123",
"text_embedding.field_map.input": "passage_text",
"text_embedding.field_map.output": "passage_embedding",
"create_index.name": "my-nlp-index",
"create_index.settings.number_of_shards": "2",
"create_index.mappings.method.engine": "lucene",
"create_index.mappings.method.space_type": "l2",
"create_index.mappings.method.name": "hnsw",
"text_embedding.field_map.output.dimension": "768",
"create_search_pipeline.pipeline_id": "nlp-search-pipeline",
"normalization-processor.normalization.technique": "min_max",
"normalization-processor.combination.technique": "arithmetic_mean"
}
4 changes: 3 additions & 1 deletion src/main/resources/defaults/multi-modal-search-defaults.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,7 @@
"create_index.settings.number_of_shards": "2",
"text_image_embedding.field_map.output.dimension": "1024",
"create_index.mappings.method.engine": "lucene",
"create_index.mappings.method.name": "hnsw"
"create_index.mappings.method.name": "hnsw",
"text_image_embedding.field_map.image.type": "text",
"text_image_embedding.field_map.text.type": "text"
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,7 @@
"create_index.settings.number_of_shards": "2",
"text_image_embedding.field_map.output.dimension": "1024",
"create_index.mappings.method.engine": "lucene",
"create_index.mappings.method.name": "hnsw"
"create_index.mappings.method.name": "hnsw",
"text_image_embedding.field_map.image.type": "text",
"text_image_embedding.field_map.text.type": "text"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"template.name": "semantic search with local pretrained model",
"template.description": "Setting up semantic search, with a local pretrained embedding model",
"register_local_pretrained_model.name": "huggingface/sentence-transformers/msmarco-distilbert-base-tas-b",
"register_local_pretrained_model.description": "This is a sentence transformer model",
"register_local_pretrained_model.model_format": "TORCH_SCRIPT",
"register_local_pretrained_model.deploy": "true",
"register_local_pretrained_model.version": "1.0.2",
"create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline",
"create_ingest_pipeline.description": "A text embedding pipeline",
"text_embedding.field_map.input": "passage_text",
"text_embedding.field_map.output": "passage_embedding",
"create_index.name": "my-nlp-index",
"create_index.settings.number_of_shards": "2",
"create_index.mappings.method.engine": "lucene",
"create_index.mappings.method.space_type": "l2",
"create_index.mappings.method.name": "hnsw",
"text_embedding.field_map.output.dimension": "768",
"create_search_pipeline.pipeline_id": "default_model_pipeline"
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,6 @@
"mappings": {
"_doc": {
"properties": {
"id": {
"type": "text"
},
"${{text_embedding.field_map.output}}": {
"type": "knn_vector",
"dimension": "${{text_embedding.field_map.output.dimension}}",
Expand Down Expand Up @@ -86,10 +83,7 @@
"technique": "${{normalization-processor.normalization.technique}}"
},
"combination": {
"technique": "${{normalization-processor.combination.technique}}",
"parameters": {
"weights": "${{normalization-processor.combination.parameters.weights}}"
}
"technique": "${{normalization-processor.combination.technique}}"
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
{
"name": "${{template.name}}",
"description": "${{template.description}}",
"use_case": "HYBRID_SEARCH",
"version": {
"template": "1.0.0",
"compatibility": [
"2.12.0",
"3.0.0"
]
},
"workflows": {
"provision": {
"nodes": [
{
"id": "register_local_pretrained_model",
"type": "register_local_pretrained_model",
"user_inputs": {
"name": "${{register_local_pretrained_model.name}}",
"version": "${{register_local_pretrained_model.version}}",
"description": "${{register_local_pretrained_model.description}}",
"model_format": "${{register_local_pretrained_model.model_format}}",
"deploy": true
}
},
{
"id": "create_ingest_pipeline",
"type": "create_ingest_pipeline",
"previous_node_inputs": {
"register_local_pretrained_model": "model_id"
},
"user_inputs": {
"pipeline_id": "${{create_ingest_pipeline.pipeline_id}}",
"configurations": {
"description": "${{create_ingest_pipeline.description}}",
"processors": [
{
"text_embedding": {
"model_id": "${{register_local_pretrained_model.model_id}}",
"field_map": {
"${{text_embedding.field_map.input}}": "${{text_embedding.field_map.output}}"
}
}
}
]
}
}
},
{
"id": "create_index",
"type": "create_index",
"previous_node_inputs": {
"create_ingest_pipeline": "pipeline_id"
},
"user_inputs": {
"index_name": "${{create_index.name}}",
"configurations": {
"settings": {
"index.knn": true,
"default_pipeline": "${{create_ingest_pipeline.pipeline_id}}",
"number_of_shards": "${{create_index.settings.number_of_shards}}",
"index.search.default_pipeline": "${{create_search_pipeline.pipeline_id}}"
},
"mappings": {
"properties": {
"${{text_embedding.field_map.output}}": {
"type": "knn_vector",
"dimension": "${{text_embedding.field_map.output.dimension}}",
"method": {
"engine": "${{create_index.mappings.method.engine}}",
"space_type": "${{create_index.mappings.method.space_type}}",
"name": "${{create_index.mappings.method.name}}",
"parameters": {}
}
},
"${{text_embedding.field_map.input}}": {
"type": "text"
}
}
}
}
}
},
{
"id": "create_search_pipeline",
"type": "create_search_pipeline",
"user_inputs": {
"pipeline_id": "${{create_search_pipeline.pipeline_id}}",
"configurations": {
"description": "Post processor for hybrid search",
"phase_results_processors": [
{
"normalization-processor": {
"normalization": {
"technique": "${{normalization-processor.normalization.technique}}"
},
"combination": {
"technique": "${{normalization-processor.combination.technique}}"
}
}
}
]
}
}
}
]
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,6 @@
"mappings": {
"_doc": {
"properties": {
"id": {
"type": "text"
},
"${{text_image_embedding.embedding}}": {
"type": "knn_vector",
"dimension": "${{text_image_embedding.field_map.output.dimension}}",
Expand All @@ -64,10 +61,10 @@
}
},
"${{text_image_embedding.field_map.text}}": {
"type": "text"
"type": "${{text_image_embedding.field_map.text.type}}"
},
"${{text_image_embedding.field_map.image}}": {
"type": "binary"
"type": "${{text_image_embedding.field_map.image.type}}"
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,6 @@
"mappings": {
"_doc": {
"properties": {
"id": {
"type": "text"
},
"${{text_image_embedding.embedding}}": {
"type": "knn_vector",
"dimension": "${{text_image_embedding.field_map.output.dimension}}",
Expand All @@ -114,10 +111,10 @@
}
},
"${{text_image_embedding.field_map.text}}": {
"type": "text"
"type": "${{text_image_embedding.field_map.text.type}}"
},
"${{text_image_embedding.field_map.image}}": {
"type": "binary"
"type": "${{text_image_embedding.field_map.image.type}}"
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,6 @@
"mappings": {
"_doc": {
"properties": {
"id": {
"type": "text"
},
"${{create_ingest_pipeline.text_embedding.field_map.output}}": {
"type": "rank_features"
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,6 @@
"mappings": {
"_doc": {
"properties": {
"id": {
"type": "text"
},
"${{text_embedding.field_map.output}}": {
"type": "knn_vector",
"dimension": "${{text_embedding.field_map.output.dimension}}",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
{
"name": "${{template.name}}",
"description": "${{template.description}}",
"use_case": "SEMANTIC_SEARCH",
"version": {
"template": "1.0.0",
"compatibility": [
"2.12.0",
"3.0.0"
]
},
"workflows": {
"provision": {
"nodes": [
{
"id": "register_local_pretrained_model",
"type": "register_local_pretrained_model",
"user_inputs": {
"name": "${{register_local_pretrained_model.name}}",
"version": "${{register_local_pretrained_model.version}}",
"description": "${{register_local_pretrained_model.description}}",
"model_format": "${{register_local_pretrained_model.model_format}}",
"deploy": true
}
},
{
"id": "create_ingest_pipeline",
"type": "create_ingest_pipeline",
"previous_node_inputs": {
"register_local_pretrained_model": "model_id"
},
"user_inputs": {
"pipeline_id": "${{create_ingest_pipeline.pipeline_id}}",
"configurations": {
"description": "${{create_ingest_pipeline.description}}",
"processors": [
{
"text_embedding": {
"model_id": "${{register_local_pretrained_model.model_id}}",
"field_map": {
"${{text_embedding.field_map.input}}": "${{text_embedding.field_map.output}}"
}
}
}
]
}
}
},
{
"id": "create_index",
"type": "create_index",
"previous_node_inputs": {
"create_ingest_pipeline": "pipeline_id"
},
"user_inputs": {
"index_name": "${{create_index.name}}",
"configurations": {
"settings": {
"index.knn": true,
"default_pipeline": "${{create_ingest_pipeline.pipeline_id}}",
"number_of_shards": "${{create_index.settings.number_of_shards}}"
},
"mappings": {
"properties": {
"${{text_embedding.field_map.output}}": {
"type": "knn_vector",
"dimension": "${{text_embedding.field_map.output.dimension}}",
"method": {
"engine": "${{create_index.mappings.method.engine}}",
"space_type": "${{create_index.mappings.method.space_type}}",
"name": "${{create_index.mappings.method.name}}",
"parameters": {}
}
},
"${{text_embedding.field_map.input}}": {
"type": "text"
}
}
}
}
}
}
]
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,6 @@
"mappings": {
"_doc": {
"properties": {
"id": {
"type": "text"
},
"${{text_embedding.field_map.output}}": {
"type": "knn_vector",
"dimension": "${{text_embedding.field_map.output.dimension}}",
Expand Down
Loading

0 comments on commit 7ce4099

Please sign in to comment.