Skip to content

Commit

Permalink
adding reindex
Browse files Browse the repository at this point in the history
Signed-off-by: Amit Galitzky <[email protected]>
  • Loading branch information
amitgalitz committed Jun 7, 2024
1 parent bca3abe commit c7b533e
Show file tree
Hide file tree
Showing 8 changed files with 194 additions and 8 deletions.
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,7 @@ List<Provider<RegularFile>> plugins = [
return new RegularFile() {
@Override
File getAsFile() {
return configurations.zipArchive.asFileTree.getSingleFile()
return configurations.zipArchive.asFileTree.getFiles()
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,4 +225,6 @@ private CommonValue() {}
public static final String CREATE_CONNECTOR_CREDENTIAL_SESSION_TOKEN = "create_connector.credential.session_token";
/** The field name for ingest pipeline model ID substitution */
public static final String CREATE_INGEST_PIPELINE_MODEL_ID = "create_ingest_pipeline.model_id";
/** The field name for reindex source index substitution */
public static final String REINDEX_SOURCE_INDEX = "reindex.source_index";
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import static org.opensearch.flowframework.common.CommonValue.CREATE_CONNECTOR_CREDENTIAL_SECRET_KEY;
import static org.opensearch.flowframework.common.CommonValue.CREATE_CONNECTOR_CREDENTIAL_SESSION_TOKEN;
import static org.opensearch.flowframework.common.CommonValue.CREATE_INGEST_PIPELINE_MODEL_ID;
import static org.opensearch.flowframework.common.CommonValue.REINDEX_SOURCE_INDEX;

/**
* Enum encapsulating the different default use cases and templates we have stored
Expand Down Expand Up @@ -147,6 +148,13 @@ public enum DefaultUseCases {
"defaults/hybrid-search-with-local-model-defaults.json",
"substitutionTemplates/hybrid-search-with-local-model-template.json",
Collections.emptyList()
),
/** defaults file and substitution ready template for semantic search with reindex command*/
SEMANTIC_SEARCH_WITH_REINDEX(
"semantic_search_with_reindex",
"defaults/semantic-search-with-reindex-defaults.json",
"substitutionTemplates/semantic-search-with-reindex-template.json",
List.of(CREATE_CONNECTOR_CREDENTIAL_KEY, REINDEX_SOURCE_INDEX)
);

private final String useCaseName;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,20 @@ public PlainActionFuture<WorkflowData> execute(
Float requestsPerSecond = inputs.containsKey(REQUESTS_PER_SECOND)
? Float.parseFloat(inputs.get(REQUESTS_PER_SECOND).toString())
: null;
requestsPerSecond = requestsPerSecond < 0 ? Float.POSITIVE_INFINITY : requestsPerSecond;
Boolean requireAlias = inputs.containsKey(REQUIRE_ALIAS) ? Booleans.parseBoolean(inputs.get(REQUIRE_ALIAS).toString()) : null;
Integer slices = (Integer) inputs.get(SLICES);
Integer maxDocs = (Integer) inputs.get(MAX_DOCS);

Integer slices;
Integer maxDocs;
if (inputs.get(SLICES) != null) {
slices = Integer.parseInt(String.valueOf(inputs.get(SLICES)));
} else {
slices = (Integer) inputs.get(SLICES);
}
if (inputs.get(MAX_DOCS) != null) {
maxDocs = Integer.parseInt(String.valueOf(inputs.get(MAX_DOCS)));
} else {
maxDocs = (Integer) inputs.get(MAX_DOCS);
}
ReindexRequest reindexRequest = new ReindexRequest().setSourceIndices(Strings.splitStringByCommaToArray(sourceIndices))
.setDestIndex(destinationIndex);

Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"template.name": "hybrid-search",
"template.description": "Setting up hybrid search, ingest pipeline and index",
"register_local_pretrained_model.name": "huggingface/sentence-transformers/msmarco-distilbert-base-tas-b",
"register_local_pretrained_model.name": "huggingface/sentence-transformers/paraphrase-MiniLM-L3-v2",
"register_local_pretrained_model.description": "This is a sentence transformer model",
"register_local_pretrained_model.model_format": "TORCH_SCRIPT",
"register_local_pretrained_model.deploy": "true",
"register_local_pretrained_model.version": "1.0.2",
"register_local_pretrained_model.version": "1.0.1",
"create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline",
"create_ingest_pipeline.description": "A text embedding pipeline",
"create_ingest_pipeline.model_id": "123",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"template.name": "semantic search with local pretrained model",
"template.description": "Setting up semantic search, with a local pretrained embedding model",
"register_local_pretrained_model.name": "huggingface/sentence-transformers/msmarco-distilbert-base-tas-b",
"register_local_pretrained_model.name": "huggingface/sentence-transformers/paraphrase-MiniLM-L3-v2",
"register_local_pretrained_model.description": "This is a sentence transformer model",
"register_local_pretrained_model.model_format": "TORCH_SCRIPT",
"register_local_pretrained_model.deploy": "true",
"register_local_pretrained_model.version": "1.0.2",
"register_local_pretrained_model.version": "1.0.1",
"create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline",
"create_ingest_pipeline.description": "A text embedding pipeline",
"text_embedding.field_map.input": "passage_text",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"template.name": "semantic search with cohere embedding",
"template.description": "Setting up semantic search, with a Cohere embedding model",
"create_connector.name": "cohere-embedding-connector",
"create_connector.description": "The connector to Cohere's public embed API",
"create_connector.protocol": "http",
"create_connector.model": "embed-english-v3.0",
"create_connector.input_type": "search_document",
"create_connector.truncate": "end",
"create_connector.credential.key": "123",
"create_connector.actions.url": "https://api.cohere.ai/v1/embed",
"create_connector.actions.request_body": "{ \"texts\": ${parameters.texts}, \"truncate\": \"${parameters.truncate}\", \"model\": \"${parameters.model}\", \"input_type\": \"${parameters.input_type}\" }",
"create_connector.actions.pre_process_function": "connector.pre_process.cohere.embedding",
"create_connector.actions.post_process_function": "connector.post_process.cohere.embedding",
"register_remote_model.name": "Cohere english embed model",
"register_remote_model.description": "cohere-embedding-model",
"create_ingest_pipeline.pipeline_id": "nlp-ingest-pipeline",
"create_ingest_pipeline.description": "A text embedding pipeline",
"text_embedding.field_map.input": "passage_text",
"text_embedding.field_map.output": "passage_embedding",
"create_index.name": "my-nlp-index",
"create_index.settings.number_of_shards": "2",
"create_index.mappings.method.engine": "lucene",
"create_index.mappings.method.space_type": "l2",
"create_index.mappings.method.name": "hnsw",
"text_embedding.field_map.output.dimension": "1024",
"create_search_pipeline.pipeline_id": "default_model_pipeline",
"reindex.source_index": "",
"reindex.requests_per_second": "-1",
"reindex.slices": "1"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
{
"name": "${{template.name}}",
"description": "${{template.description}}",
"use_case": "SEMANTIC_SEARCH",
"version": {
"template": "1.0.0",
"compatibility": [
"2.12.0",
"3.0.0"
]
},
"workflows": {
"provision": {
"nodes": [
{
"id": "create_connector",
"type": "create_connector",
"user_inputs": {
"name": "${{create_connector.name}}",
"description": "${{create_connector.description}}",
"version": "1",
"protocol": "${{create_connector.protocol}}",
"parameters": {
"endpoint": "${{create_connector.endpoint}}",
"model": "${{create_connector.model}}",
"input_type": "search_document",
"truncate": "END"
},
"credential": {
"key": "${{create_connector.credential.key}}"
},
"actions": [
{
"action_type": "predict",
"method": "POST",
"url": "${{create_connector.actions.url}}",
"headers": {
"Authorization": "Bearer ${credential.key}",
"Request-Source": "unspecified:opensearch"
},
"request_body": "${{create_connector.actions.request_body}}",
"pre_process_function": "${{create_connector.actions.pre_process_function}}",
"post_process_function": "${{create_connector.actions.post_process_function}}"
}
]
}
},
{
"id": "register_model",
"type": "register_remote_model",
"previous_node_inputs": {
"create_connector": "connector_id"
},
"user_inputs": {
"name": "${{register_remote_model.name}}",
"function_name": "remote",
"description": "${{register_remote_model.description}}",
"deploy": true
}
},
{
"id": "create_ingest_pipeline",
"type": "create_ingest_pipeline",
"previous_node_inputs": {
"register_model": "model_id"
},
"user_inputs": {
"pipeline_id": "${{create_ingest_pipeline.pipeline_id}}",
"configurations": {
"description": "${{create_ingest_pipeline.description}}",
"processors": [
{
"text_embedding": {
"model_id": "${{register_model.model_id}}",
"field_map": {
"${{text_embedding.field_map.input}}": "${{text_embedding.field_map.output}}"
}
}
}
]
}
}
},
{
"id": "create_index",
"type": "create_index",
"previous_node_inputs": {
"create_ingest_pipeline": "pipeline_id"
},
"user_inputs": {
"index_name": "${{create_index.name}}",
"configurations": {
"settings": {
"index.knn": true,
"default_pipeline": "${{create_ingest_pipeline.pipeline_id}}",
"number_of_shards": "${{create_index.settings.number_of_shards}}"
},
"mappings": {
"properties": {
"${{text_embedding.field_map.output}}": {
"type": "knn_vector",
"dimension": "${{text_embedding.field_map.output.dimension}}",
"method": {
"engine": "${{create_index.mappings.method.engine}}",
"space_type": "${{create_index.mappings.method.space_type}}",
"name": "${{create_index.mappings.method.name}}",
"parameters": {}
}
},
"${{text_embedding.field_map.input}}": {
"type": "text"
}
}
}
}
}
},
{
"id": "reindex",
"type": "reindex",
"previous_node_inputs": {
"create_index": "index_name"
},
"user_inputs": {
"source_index": "${{reindex.source_index}}",
"destination_index": "${{create_index.name}}",
"refresh": false,
"requests_per_second": "${{reindex.requests_per_second}}",
"slices": "${{reindex.slices}}"
}
}
]
}
}
}

0 comments on commit c7b533e

Please sign in to comment.