From 621ba8bee2a09c1177a9bfaefdfcc6391130cc67 Mon Sep 17 00:00:00 2001 From: Amit Galitzky Date: Wed, 20 Mar 2024 15:27:58 -0700 Subject: [PATCH] Fixing create index and use case input parsing bugs (#600) * fixing create index step and array input for processors Signed-off-by: Amit Galitzky * fixing test and enhancing error message Signed-off-by: Amit Galitzky * adding release notes change and some comments addressed Signed-off-by: Amit Galitzky * addressed comments and cleaned up defaults/templates Signed-off-by: Amit Galitzky --------- Signed-off-by: Amit Galitzky --- ...h-flow-framework.release-notes-2.13.0.0.md | 5 +- .../flowframework/common/DefaultUseCases.java | 10 ++- .../rest/RestCreateWorkflowAction.java | 10 +-- .../flowframework/util/ParseUtils.java | 74 +++++++++++++---- .../workflow/AbstractCreatePipelineStep.java | 9 +- .../workflow/CreateIndexStep.java | 44 +++++++++- .../bedrock-titan-embedding-defaults.json | 1 - ...=> bedrock-titan-multimodal-defaults.json} | 5 +- .../defaults/cohere-chat-defaults.json | 2 +- .../defaults/cohere-embedding-defaults.json | 2 +- ...re-embedding-semantic-search-defaults.json | 2 +- .../conversational-search-defaults.json | 20 +++++ ...ocal-sparse-search-biencoder-defaults.json | 2 +- .../defaults/multi-modal-search-defaults.json | 6 +- ...timodal-search-bedrock-titan-defaults.json | 8 +- .../defaults/openai-chat-defaults.json | 2 +- .../defaults/openai-embedding-defaults.json | 2 +- ...nal-search-with-cohere-model-template.json | 83 +++++++++++++++++++ .../deploy-remote-bedrock-model-template.json | 2 +- .../deploy-remote-model-chat-template.json | 4 +- ...y-remote-model-extra-params-template.json} | 0 .../deploy-remote-model-template.json | 4 +- .../multi-modal-search-template.json | 4 +- ...al-search-with-bedrock-titan-template.json | 6 +- .../rest/FlowFrameworkRestApiIT.java | 17 ++++ .../flowframework/util/ParseUtilsTests.java | 10 +++ .../workflow/CreateIndexStepTests.java | 3 +- 27 files changed, 278 insertions(+), 59 deletions(-) rename src/main/resources/defaults/{bedrock-titan-mulitmodal-defaults.json => bedrock-titan-multimodal-defaults.json} (93%) create mode 100644 src/main/resources/defaults/conversational-search-defaults.json create mode 100644 src/main/resources/substitutionTemplates/conversational-search-with-cohere-model-template.json rename src/main/resources/substitutionTemplates/{deploy-remote-model-template-extra-params.json => deploy-remote-model-extra-params-template.json} (100%) diff --git a/release-notes/opensearch-flow-framework.release-notes-2.13.0.0.md b/release-notes/opensearch-flow-framework.release-notes-2.13.0.0.md index d3f5d1e14..428c9c69a 100644 --- a/release-notes/opensearch-flow-framework.release-notes-2.13.0.0.md +++ b/release-notes/opensearch-flow-framework.release-notes-2.13.0.0.md @@ -17,4 +17,7 @@ Compatible with OpenSearch 2.13.0 ### Refactoring * Moved workflow-steps.json to Enum ([#523](https://github.com/opensearch-project/flow-framework/pull/523)) -* Refactored logging for consistency ([#524](https://github.com/opensearch-project/flow-framework/pull/524)) \ No newline at end of file +* Refactored logging for consistency ([#524](https://github.com/opensearch-project/flow-framework/pull/524)) + +### Bug Fixes +* Fixing create index and use case input parsing bugs ([#600](https://github.com/opensearch-project/flow-framework/pull/600)) diff --git a/src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java b/src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java index 71be18a65..126ef0731 100644 --- a/src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java +++ b/src/main/java/org/opensearch/flowframework/common/DefaultUseCases.java @@ -28,7 +28,7 @@ public enum DefaultUseCases { COHERE_EMBEDDING_MODEL_DEPLOY( "cohere-embedding_model_deploy", "defaults/cohere-embedding-defaults.json", - "substitutionTemplates/deploy-remote-model-template-extra-params.json" + "substitutionTemplates/deploy-remote-model-extra-params-template.json" ), /** defaults file and substitution ready template for Bedrock Titan embedding model */ BEDROCK_TITAN_EMBEDDING_MODEL_DEPLOY( @@ -93,7 +93,13 @@ public enum DefaultUseCases { "substitutionTemplates/semantic-search-with-model-and-query-enricher-template.json" ), /** defaults file and substitution ready template for hybrid search, no model creation*/ - HYBRID_SEARCH("hybrid_search", "defaults/hybrid-search-defaults.json", "substitutionTemplates/hybrid-search-template.json"); + HYBRID_SEARCH("hybrid_search", "defaults/hybrid-search-defaults.json", "substitutionTemplates/hybrid-search-template.json"), + /** defaults file and substitution ready template for conversational search with cohere chat model*/ + CONVERSATIONAL_SEARCH_WITH_COHERE_DEPLOY( + "conversational_search_with_llm_deploy", + "defaults/conversational-search-defaults.json", + "substitutionTemplates/conversational-search-with-cohere-model-template.json" + ); private final String useCaseName; private final String defaultsFile; diff --git a/src/main/java/org/opensearch/flowframework/rest/RestCreateWorkflowAction.java b/src/main/java/org/opensearch/flowframework/rest/RestCreateWorkflowAction.java index c2ec444c4..59c8a3b59 100644 --- a/src/main/java/org/opensearch/flowframework/rest/RestCreateWorkflowAction.java +++ b/src/main/java/org/opensearch/flowframework/rest/RestCreateWorkflowAction.java @@ -131,18 +131,19 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli try { XContentParser parser = request.contentParser(); ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser); - Map userDefaults = ParseUtils.parseStringToStringMap(parser); + Map userDefaults = ParseUtils.parseStringToObjectMap(parser); // updates the default params with anything user has given that matches - for (Map.Entry userDefaultsEntry : userDefaults.entrySet()) { + for (Map.Entry userDefaultsEntry : userDefaults.entrySet()) { String key = userDefaultsEntry.getKey(); - String value = userDefaultsEntry.getValue(); + String value = userDefaultsEntry.getValue().toString(); if (useCaseDefaultsMap.containsKey(key)) { useCaseDefaultsMap.put(key, value); } } } catch (Exception ex) { RestStatus status = ex instanceof IOException ? RestStatus.BAD_REQUEST : ExceptionsHelper.status(ex); - String errorMessage = "failure parsing request body when a use case is given"; + String errorMessage = + "failure parsing request body when a use case is given, make sure to provide a map with values that are either Strings, Arrays, or Map of Strings to Strings"; logger.error(errorMessage, ex); throw new FlowFrameworkException(errorMessage, status); } @@ -154,7 +155,6 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli null, useCaseDefaultsMap ); - XContentParser parserTestJson = ParseUtils.jsonToParser(useCaseTemplateFileInStringFormat); ensureExpectedToken(XContentParser.Token.START_OBJECT, parserTestJson.currentToken(), parserTestJson); template = Template.parse(parserTestJson); diff --git a/src/main/java/org/opensearch/flowframework/util/ParseUtils.java b/src/main/java/org/opensearch/flowframework/util/ParseUtils.java index 0a923721f..40b4ed43e 100644 --- a/src/main/java/org/opensearch/flowframework/util/ParseUtils.java +++ b/src/main/java/org/opensearch/flowframework/util/ParseUtils.java @@ -34,6 +34,7 @@ import java.io.IOException; import java.io.InputStream; import java.time.Instant; +import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -58,6 +59,7 @@ public class ParseUtils { // Matches ${{ foo.bar }} (whitespace optional) with capturing groups 1=foo, 2=bar // private static final Pattern SUBSTITUTION_PATTERN = Pattern.compile("\\$\\{\\{\\s*(.+)\\.(.+?)\\s*\\}\\}"); private static final Pattern SUBSTITUTION_PATTERN = Pattern.compile("\\$\\{\\{\\s*([\\w_]+)\\.([\\w_]+)\\s*\\}\\}"); + private static final Pattern JSON_ARRAY_DOUBLE_QUOTES_PATTERN = Pattern.compile("\"\\[(.*?)]\""); private ParseUtils() {} @@ -69,7 +71,7 @@ private ParseUtils() {} * @param json the json string * @return The XContent parser for the json string * @throws IOException on failure to create the parser - */ + */ public static XContentParser jsonToParser(String json) throws IOException { XContentParser parser = JsonXContent.jsonXContent.createParser( NamedXContentRegistry.EMPTY, @@ -103,7 +105,7 @@ public static String resourceToString(String path) throws IOException { * Builds an XContent object representing a map of String keys to String values. * * @param xContentBuilder An XContent builder whose position is at the start of the map object to build - * @param map A map as key-value String pairs. + * @param map A map as key-value String pairs. * @throws IOException on a build failure */ public static void buildStringToStringMap(XContentBuilder xContentBuilder, Map map) throws IOException { @@ -118,7 +120,7 @@ public static void buildStringToStringMap(XContentBuilder xContentBuilder, Map map) throws IOException { @@ -137,7 +139,7 @@ public static void buildStringToObjectMap(XContentBuilder xContentBuilder, Map parseStringToStringMap(XContentParser parser) /** * Parses an XContent object representing a map of String keys to Object values. * The Object value here can either be a string or a map + * If an array is found in the given parser we conver the array to a string representation of the array + * * @param parser An XContent parser whose position is at the start of the map object to parse * @return A map as identified by the key-value pairs in the XContent * @throws IOException on a parse failure @@ -182,6 +186,18 @@ public static Map parseStringToObjectMap(XContentParser parser) if (parser.currentToken() == XContentParser.Token.START_OBJECT) { // If the current token is a START_OBJECT, parse it as Map map.put(fieldName, parseStringToStringMap(parser)); + } else if (parser.currentToken() == XContentParser.Token.START_ARRAY) { + // If an array, parse it to a string + // Handle array: convert it to a string representation + List elements = new ArrayList<>(); + while (parser.nextToken() != XContentParser.Token.END_ARRAY) { + if (parser.currentToken().equals(XContentParser.Token.VALUE_NUMBER)) { + elements.add(String.valueOf(parser.numberValue())); // If number value don't add escaping quotes + } else { + elements.add("\"" + parser.text() + "\""); // Adding escaped quotes around each element + } + } + map.put(fieldName, elements.toString()); } else { // Otherwise, parse it as a string map.put(fieldName, parser.text()); @@ -209,6 +225,7 @@ public static Instant parseInstant(XContentParser parser) throws IOException { * (e.g., john||own_index,testrole|__user__, no backend role so you see two verticle line after john.). * This is the user string format used internally in the OPENSEARCH_SECURITY_USER_INFO_THREAD_CONTEXT and may be * parsed using User.parse(string). + * * @param client Client containing user info. A public API request will fill in the user info in the thread context. * @return parsed user object */ @@ -222,7 +239,7 @@ public static User getUserContext(Client client) { * Creates a XContentParser from a given Registry * * @param xContentRegistry main registry for serializable content - * @param bytesReference given bytes to be parsed + * @param bytesReference given bytes to be parsed * @return bytesReference of {@link java.time.Instant} * @throws IOException IOException if content can't be parsed correctly */ @@ -233,7 +250,8 @@ public static XContentParser createXContentParserFromRegistry(NamedXContentRegis /** * Generates a string to string Map - * @param map content map + * + * @param map content map * @param fieldName fieldName * @return instance of the map */ @@ -249,15 +267,15 @@ public static Map getStringToStringMap(Object map, String fieldN * Creates a map containing the specified input keys, with values derived from template data or previous node * output. * - * @param requiredInputKeys A set of keys that must be present, or will cause an exception to be thrown - * @param optionalInputKeys A set of keys that may be present, or will be absent in the returned map - * @param currentNodeInputs Input params and content for this node, from workflow parsing - * @param outputs WorkflowData content of previous steps + * @param requiredInputKeys A set of keys that must be present, or will cause an exception to be thrown + * @param optionalInputKeys A set of keys that may be present, or will be absent in the returned map + * @param currentNodeInputs Input params and content for this node, from workflow parsing + * @param outputs WorkflowData content of previous steps * @param previousNodeInputs Input params for this node that come from previous steps - * @param params Params that came from REST path + * @param params Params that came from REST path * @return A map containing the requiredInputKeys with their corresponding values, - * and optionalInputKeys with their corresponding values if present. - * Throws a {@link FlowFrameworkException} if a required key is not present. + * and optionalInputKeys with their corresponding values if present. + * Throws a {@link FlowFrameworkException} if a required key is not present. */ public static Map getInputsFromPreviousSteps( Set requiredInputKeys, @@ -346,9 +364,10 @@ public static Map getInputsFromPreviousSteps( /** * Executes substitution on the given value by looking at any matching values in either the ouputs or params map - * @param value the Object that will have the substitution done on + * + * @param value the Object that will have the substitution done on * @param outputs potential location of values to be substituted in - * @param params potential location of values to be subsituted in + * @param params potential location of values to be subsituted in * @return the substituted object back */ public static Object conditionallySubstitute(Object value, Map outputs, Map params) { @@ -392,6 +411,7 @@ public static Object conditionallySubstitute(Object value, Map /** * Generates a String to String map based on a Json File + * * @param path file path * @return instance of the string * @throws JsonProcessingException JsonProcessingException from Jackson for issues processing map @@ -413,4 +434,27 @@ public static Map parseJsonFileToStringToStringMap(String path) Map mappedJsonFile = mapper.readValue(jsonContent, Map.class); return mappedJsonFile; } + + /** + * Takes an input string, then checks if there is an array in the string with backslashes around strings + * (e.g. "[\"text\", \"hello\"]" to "["text", "hello"]"), this is needed for processors that take in string arrays, + * This also removes the quotations around the array making the array valid to consume + * (e.g. "weights": "[0.7, 0.3]" to "weights": [0.7, 0.3]) + * + * @param input The inputString given to be transformed + * @return the transformed string + */ + public static String removingBackslashesAndQuotesInArrayInJsonString(String input) { + Matcher matcher = JSON_ARRAY_DOUBLE_QUOTES_PATTERN.matcher(input); + StringBuffer result = new StringBuffer(); + while (matcher.find()) { + // Extract matched content and remove backslashes before quotes + String withoutEscapes = matcher.group(1).replaceAll("\\\\\"", "\""); + // Return the transformed string with the brackets but without the outer quotes + matcher.appendReplacement(result, "[" + withoutEscapes + "]"); + } + // Append remaining input after the last match + matcher.appendTail(result); + return result.toString(); + } } diff --git a/src/main/java/org/opensearch/flowframework/workflow/AbstractCreatePipelineStep.java b/src/main/java/org/opensearch/flowframework/workflow/AbstractCreatePipelineStep.java index 0689ce4e4..ce9bca27e 100644 --- a/src/main/java/org/opensearch/flowframework/workflow/AbstractCreatePipelineStep.java +++ b/src/main/java/org/opensearch/flowframework/workflow/AbstractCreatePipelineStep.java @@ -85,12 +85,11 @@ public PlainActionFuture execute( String pipelineId = (String) inputs.get(PIPELINE_ID); String configurations = (String) inputs.get(CONFIGURATIONS); - // Special case for processors that have arrays that need to have the quotes removed - // (e.g. "weights": "[0.7, 0.3]" -> "weights": [0.7, 0.3] - // Define a regular expression pattern to match stringified arrays - String transformedJsonString = configurations.replaceAll("\"\\[(.*?)]\"", "[$1]"); + // Special case for processors that have arrays that need to have the quotes around or + // backslashes around strings in array removed + String transformedJsonStringForStringArray = ParseUtils.removingBackslashesAndQuotesInArrayInJsonString(configurations); - byte[] byteArr = transformedJsonString.getBytes(StandardCharsets.UTF_8); + byte[] byteArr = transformedJsonStringForStringArray.getBytes(StandardCharsets.UTF_8); BytesReference configurationsBytes = new BytesArray(byteArr); String pipelineToBeCreated = this.getName(); diff --git a/src/main/java/org/opensearch/flowframework/workflow/CreateIndexStep.java b/src/main/java/org/opensearch/flowframework/workflow/CreateIndexStep.java index 509d4b417..f28fa64d4 100644 --- a/src/main/java/org/opensearch/flowframework/workflow/CreateIndexStep.java +++ b/src/main/java/org/opensearch/flowframework/workflow/CreateIndexStep.java @@ -14,20 +14,27 @@ import org.opensearch.action.admin.indices.create.CreateIndexRequest; import org.opensearch.action.support.PlainActionFuture; import org.opensearch.client.Client; -import org.opensearch.common.xcontent.XContentType; +import org.opensearch.common.xcontent.LoggingDeprecationHandler; +import org.opensearch.common.xcontent.XContentHelper; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.rest.RestStatus; +import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.flowframework.exception.FlowFrameworkException; +import org.opensearch.flowframework.exception.WorkflowStepException; import org.opensearch.flowframework.indices.FlowFrameworkIndicesHandler; import org.opensearch.flowframework.util.ParseUtils; +import org.opensearch.index.mapper.MapperService; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.Collections; +import java.util.HashMap; import java.util.Map; import java.util.Set; +import static java.util.Collections.singletonMap; import static org.opensearch.flowframework.common.CommonValue.CONFIGURATIONS; import static org.opensearch.flowframework.common.WorkflowResources.INDEX_NAME; import static org.opensearch.flowframework.common.WorkflowResources.getResourceByWorkflowStep; @@ -85,8 +92,20 @@ public PlainActionFuture execute( byte[] byteArr = configurations.getBytes(StandardCharsets.UTF_8); BytesReference configurationsBytes = new BytesArray(byteArr); + CreateIndexRequest createIndexRequest = new CreateIndexRequest(indexName); + + try { + if (!configurations.isEmpty()) { + Map sourceAsMap = XContentHelper.convertToMap(configurationsBytes, false, MediaTypeRegistry.JSON).v2(); + sourceAsMap = prepareMappings(sourceAsMap); + createIndexRequest.source(sourceAsMap, LoggingDeprecationHandler.INSTANCE); + } + } catch (Exception ex) { + String errorMessage = "Failed to create the index" + indexName + ", _doc is not permitted in mapping"; + logger.error(errorMessage, ex); + createIndexFuture.onFailure(new WorkflowStepException(errorMessage, RestStatus.BAD_REQUEST)); + } - CreateIndexRequest createIndexRequest = new CreateIndexRequest(indexName).source(configurationsBytes, XContentType.JSON); client.admin().indices().create(createIndexRequest, ActionListener.wrap(acknowledgedResponse -> { String resourceName = getResourceByWorkflowStep(getName()); logger.info("Created index: {}", indexName); @@ -129,6 +148,27 @@ public PlainActionFuture execute( return createIndexFuture; } + // This method to check if the mapping contains a type `_doc` and if yes we fail the request + // is to duplicate the behavior we have today through create index rest API, we want users + // to encounter the same behavior and not suddenly have to add `_doc` while using our create_index step + // related bug: https://github.com/opensearch-project/OpenSearch/issues/12775 + private static Map prepareMappings(Map source) { + if (source.containsKey("mappings") == false || (source.get("mappings") instanceof Map) == false) { + return source; + } + + Map newSource = new HashMap<>(source); + + @SuppressWarnings("unchecked") + Map mappings = (Map) source.get("mappings"); + if (MapperService.isMappingSourceTyped(MapperService.SINGLE_MAPPING_NAME, mappings)) { + throw new WorkflowStepException("The mapping definition cannot be nested under a type", RestStatus.BAD_REQUEST); + } + + newSource.put("mappings", singletonMap(MapperService.SINGLE_MAPPING_NAME, mappings)); + return newSource; + } + @Override public String getName() { return NAME; diff --git a/src/main/resources/defaults/bedrock-titan-embedding-defaults.json b/src/main/resources/defaults/bedrock-titan-embedding-defaults.json index 20baf867b..401924ea2 100644 --- a/src/main/resources/defaults/bedrock-titan-embedding-defaults.json +++ b/src/main/resources/defaults/bedrock-titan-embedding-defaults.json @@ -4,7 +4,6 @@ "create_connector.name": "Amazon Bedrock Connector: embedding", "create_connector.description": "The connector to bedrock Titan embedding model", "create_connector.region": "us-east-1", - "create_connector.endpoint": "api.openai.com", "create_connector.credential.access_key": "123", "create_connector.credential.secret_key": "123", "create_connector.credential.session_token": "123", diff --git a/src/main/resources/defaults/bedrock-titan-mulitmodal-defaults.json b/src/main/resources/defaults/bedrock-titan-multimodal-defaults.json similarity index 93% rename from src/main/resources/defaults/bedrock-titan-mulitmodal-defaults.json rename to src/main/resources/defaults/bedrock-titan-multimodal-defaults.json index b1666bec5..65f3b44d9 100644 --- a/src/main/resources/defaults/bedrock-titan-mulitmodal-defaults.json +++ b/src/main/resources/defaults/bedrock-titan-multimodal-defaults.json @@ -1,11 +1,10 @@ { "template.name": "deploy-bedrock-titan-multimodal-embedding-model", - "template.description": "deploying Amazon Bedrock Titan multimodal embedding model ", + "template.description": "Deploying Amazon Bedrock Titan multimodal embedding model ", "create_connector.name": "Amazon Bedrock Connector: multi-modal embedding", "create_connector.description": "The connector to bedrock Titan multi-modal embedding model", "create_connector.region": "us-east-1", - "create_connector.input_docs_processed_step_size": 2, - "create_connector.endpoint": "api.openai.com", + "create_connector.input_docs_processed_step_size": "2", "create_connector.credential.access_key": "123", "create_connector.credential.secret_key": "123", "create_connector.credential.session_token": "123", diff --git a/src/main/resources/defaults/cohere-chat-defaults.json b/src/main/resources/defaults/cohere-chat-defaults.json index bc200d335..7ac6eb0ef 100644 --- a/src/main/resources/defaults/cohere-chat-defaults.json +++ b/src/main/resources/defaults/cohere-chat-defaults.json @@ -1,6 +1,6 @@ { "template.name": "deploy-cohere-chat-model", - "template.description": "deploying cohere chat model", + "template.description": "Deploying a Cohere chat model", "create_connector.name": "Cohere Chat Model", "create_connector.description": "The connector to Cohere's public chat API", "create_connector.protocol": "http", diff --git a/src/main/resources/defaults/cohere-embedding-defaults.json b/src/main/resources/defaults/cohere-embedding-defaults.json index 53a402f60..3a6c7d043 100644 --- a/src/main/resources/defaults/cohere-embedding-defaults.json +++ b/src/main/resources/defaults/cohere-embedding-defaults.json @@ -1,6 +1,6 @@ { "template.name": "deploy-cohere-model", - "template.description": "deploying cohere embedding model", + "template.description": "Deploying a Cohere embedding model", "create_connector.name": "cohere-embedding-connector", "create_connector.description": "The connector to Cohere's public embed API", "create_connector.protocol": "http", diff --git a/src/main/resources/defaults/cohere-embedding-semantic-search-defaults.json b/src/main/resources/defaults/cohere-embedding-semantic-search-defaults.json index 439f905f2..56fc2ba73 100644 --- a/src/main/resources/defaults/cohere-embedding-semantic-search-defaults.json +++ b/src/main/resources/defaults/cohere-embedding-semantic-search-defaults.json @@ -1,6 +1,6 @@ { "template.name": "semantic search with cohere embedding", - "template.description": "Setting up semantic search, with cohere embedding model", + "template.description": "Setting up semantic search, with a Cohere embedding model", "create_connector.name": "cohere-embedding-connector", "create_connector.description": "The connector to Cohere's public embed API", "create_connector.protocol": "http", diff --git a/src/main/resources/defaults/conversational-search-defaults.json b/src/main/resources/defaults/conversational-search-defaults.json new file mode 100644 index 000000000..a983fec95 --- /dev/null +++ b/src/main/resources/defaults/conversational-search-defaults.json @@ -0,0 +1,20 @@ +{ + "template.name": "deploy-cohere-chat-model", + "template.description": "A template to deploy a Cohere chat model", + "create_connector.name": "Cohere Chat Model", + "create_connector.description": "The connector to Cohere's public chat API", + "create_connector.protocol": "http", + "create_connector.model": "command", + "create_connector.endpoint": "api.cohere.ai", + "create_connector.credential.key": "123", + "create_connector.actions.url": "https://api.cohere.ai/v1/chat", + "create_connector.actions.request_body": "{ \"message\": \"${parameters.message}\", \"model\": \"${parameters.model}\" }", + "register_remote_model.name": "Cohere chat model", + "register_remote_model.description": "cohere-chat-model", + "create_search_pipeline.pipeline_id": "rag-pipeline", + "create_search_pipeline.retrieval_augmented_generation.tag": "openai_pipeline_demo", + "create_search_pipeline.retrieval_augmented_generation.description": "Demo pipeline using a Cohere chat model", + "create_search_pipeline.retrieval_augmented_generation.context_field_list": "[\"text\"]", + "create_search_pipeline.retrieval_augmented_generation.system_prompt": "You are a helpful assistant", + "create_search_pipeline.retrieval_augmented_generation.user_instructions": "Generate a concise and informative answer in less than 100 words for the given question" +} diff --git a/src/main/resources/defaults/local-sparse-search-biencoder-defaults.json b/src/main/resources/defaults/local-sparse-search-biencoder-defaults.json index aa4ae0512..f39dde616 100644 --- a/src/main/resources/defaults/local-sparse-search-biencoder-defaults.json +++ b/src/main/resources/defaults/local-sparse-search-biencoder-defaults.json @@ -1,6 +1,6 @@ { "template.name": "local-model-neural-sparse-search", - "template.description": "setting up neural sparse search with local model", + "template.description": "Setting up neural sparse search with pretrained local model", "register_local_sparse_encoding_model.name": "amazon/neural-sparse/opensearch-neural-sparse-encoding-v1", "register_local_sparse_encoding_model.description": "This is a neural sparse encoding model", "register_local_sparse_encoding_model.model_format": "TORCH_SCRIPT", diff --git a/src/main/resources/defaults/multi-modal-search-defaults.json b/src/main/resources/defaults/multi-modal-search-defaults.json index 3bd47f625..2a05cc5bf 100644 --- a/src/main/resources/defaults/multi-modal-search-defaults.json +++ b/src/main/resources/defaults/multi-modal-search-defaults.json @@ -4,12 +4,12 @@ "create_ingest_pipeline.pipeline_id": "nlp-multimodal-ingest-pipeline", "create_ingest_pipeline.description": "A text/image embedding pipeline", "create_ingest_pipeline.model_id": "123", - "create_ingest_pipeline.embedding": "vector_embedding", + "text_image_embedding.embedding": "vector_embedding", "text_image_embedding.field_map.text": "image_description", "text_image_embedding.field_map.image": "image_binary", "create_index.name": "my-multimodal-nlp-index", - "create_index.settings.number_of_shards": 2, - "text_image_embedding.field_map.output.dimension": 1024, + "create_index.settings.number_of_shards": "2", + "text_image_embedding.field_map.output.dimension": "1024", "create_index.mappings.method.engine": "lucene", "create_index.mappings.method.name": "hnsw" } diff --git a/src/main/resources/defaults/multimodal-search-bedrock-titan-defaults.json b/src/main/resources/defaults/multimodal-search-bedrock-titan-defaults.json index 222053db1..afd3b5da5 100644 --- a/src/main/resources/defaults/multimodal-search-bedrock-titan-defaults.json +++ b/src/main/resources/defaults/multimodal-search-bedrock-titan-defaults.json @@ -4,7 +4,7 @@ "create_connector.name": "Amazon Bedrock Connector: multi-modal embedding", "create_connector.description": "The connector to bedrock Titan multi-modal embedding model", "create_connector.region": "us-east-1", - "create_connector.input_docs_processed_step_size": 2, + "create_connector.input_docs_processed_step_size": "2", "create_connector.endpoint": "api.openai.com", "create_connector.credential.access_key": "123", "create_connector.credential.secret_key": "123", @@ -17,12 +17,12 @@ "register_remote_model.description": "bedrock-multi-modal-embedding-model", "create_ingest_pipeline.pipeline_id": "nlp-multimodal-ingest-pipeline", "create_ingest_pipeline.description": "A text/image embedding pipeline", - "create_ingest_pipeline.embedding": "vector_embedding", + "text_image_embedding.embedding": "vector_embedding", "text_image_embedding.field_map.text": "image_description", "text_image_embedding.field_map.image": "image_binary", "create_index.name": "my-multimodal-nlp-index", - "create_index.settings.number_of_shards": 2, - "text_image_embedding.field_map.output.dimension": 1024, + "create_index.settings.number_of_shards": "2", + "text_image_embedding.field_map.output.dimension": "1024", "create_index.mappings.method.engine": "lucene", "create_index.mappings.method.name": "hnsw" } diff --git a/src/main/resources/defaults/openai-chat-defaults.json b/src/main/resources/defaults/openai-chat-defaults.json index 2b28088db..e495079f6 100644 --- a/src/main/resources/defaults/openai-chat-defaults.json +++ b/src/main/resources/defaults/openai-chat-defaults.json @@ -1,6 +1,6 @@ { "template.name": "deploy-openai-chat-model", - "template.description": "deploying openAI chat model", + "template.description": "Deploying an OpenAI chat model", "create_connector.name": "OpenAI Chat Connector", "create_connector.description": "Connector to public OpenAI model", "create_connector.protocol": "http", diff --git a/src/main/resources/defaults/openai-embedding-defaults.json b/src/main/resources/defaults/openai-embedding-defaults.json index 4775e1c27..6951cedaa 100644 --- a/src/main/resources/defaults/openai-embedding-defaults.json +++ b/src/main/resources/defaults/openai-embedding-defaults.json @@ -1,6 +1,6 @@ { "template.name": "deploy-openai-model", - "template.description": "deploying openAI embedding model", + "template.description": "Deploying an OpenAI embedding model", "create_connector.name": "OpenAI-embedding-connector", "create_connector.description": "Connector to public OpenAI model", "create_connector.protocol": "http", diff --git a/src/main/resources/substitutionTemplates/conversational-search-with-cohere-model-template.json b/src/main/resources/substitutionTemplates/conversational-search-with-cohere-model-template.json new file mode 100644 index 000000000..9c919f553 --- /dev/null +++ b/src/main/resources/substitutionTemplates/conversational-search-with-cohere-model-template.json @@ -0,0 +1,83 @@ +{ + "name": "${{template.name}}", + "description": "${{template.description}}", + "use_case": "SEMANTIC_SEARCH", + "version": { + "template": "1.0.0", + "compatibility": [ + "2.12.0", + "3.0.0" + ] + }, + "workflows": { + "provision": { + "nodes": [ + { + "id": "create_connector", + "type": "create_connector", + "user_inputs": { + "name": "${{create_connector}}", + "description": "${{create_connector.description}}", + "version": "1", + "protocol": "${{create_connector.protocol}}", + "parameters": { + "endpoint": "${{create_connector.endpoint}}", + "model": "${{create_connector.model}}" + }, + "credential": { + "key": "${{create_connector.credential.key}}" + }, + "actions": [ + { + "action_type": "predict", + "method": "POST", + "url": "${{create_connector.actions.url}}", + "headers": { + "Authorization": "Bearer ${credential.key}" + }, + "request_body": "${{create_connector.actions.request_body}}" + } + ] + } + }, + { + "id": "register_model", + "type": "register_remote_model", + "previous_node_inputs": { + "create_connector": "parameters" + }, + "user_inputs": { + "name": "${{register_remote_model.name}}", + "function_name": "remote", + "description": "${{register_remote_model.description}}", + "deploy": true + } + }, + { + "id": "create_search_pipeline", + "type": "create_search_pipeline", + "previous_node_inputs": { + "register_model": "model_id" + }, + "user_inputs": { + "pipeline_id": "${{create_search_pipeline.pipeline_id}}", + "configurations": { + "response_processors": [ + { + "retrieval_augmented_generation": { + "tag": "${{create_search_pipeline.retrieval_augmented_generation.tag}}", + "description": "${{create_search_pipeline.retrieval_augmented_generation.description}}", + "model_id": "${{register_model.model_id}}", + "context_field_list": "${{create_search_pipeline.retrieval_augmented_generation.context_field_list}}", + "system_prompt": "${{create_search_pipeline.retrieval_augmented_generation.system_prompt}}", + "user_instructions": "${{create_search_pipeline.retrieval_augmented_generation.user_instructions}}" + } + } + ] + } + } + } + ] + } + } +} diff --git a/src/main/resources/substitutionTemplates/deploy-remote-bedrock-model-template.json b/src/main/resources/substitutionTemplates/deploy-remote-bedrock-model-template.json index 4f9ea5911..bf073b2b8 100644 --- a/src/main/resources/substitutionTemplates/deploy-remote-bedrock-model-template.json +++ b/src/main/resources/substitutionTemplates/deploy-remote-bedrock-model-template.json @@ -26,7 +26,7 @@ "input_docs_processed_step_size": "${{create_connector.input_docs_processed_step_size}}" }, "credential": { - "access_ key": "${{create_connector.credential.access_key}}", + "access_key": "${{create_connector.credential.access_key}}", "secret_key": "${{create_connector.credential.secret_key}}", "session_token": "${{create_connector.credential.session_token}}" }, diff --git a/src/main/resources/substitutionTemplates/deploy-remote-model-chat-template.json b/src/main/resources/substitutionTemplates/deploy-remote-model-chat-template.json index 6c2f7cc05..f03f8e6c6 100644 --- a/src/main/resources/substitutionTemplates/deploy-remote-model-chat-template.json +++ b/src/main/resources/substitutionTemplates/deploy-remote-model-chat-template.json @@ -44,7 +44,7 @@ "id": "register_model", "type": "register_remote_model", "previous_node_inputs": { - "create_connector_step_1": "parameters" + "create_connector": "parameters" }, "user_inputs": { "name": "${{register_remote_model.name}}", @@ -56,7 +56,7 @@ "id": "deploy_model", "type": "deploy_model", "previous_node_inputs": { - "register_model_1": "model_id" + "register_model": "model_id" } } ], diff --git a/src/main/resources/substitutionTemplates/deploy-remote-model-template-extra-params.json b/src/main/resources/substitutionTemplates/deploy-remote-model-extra-params-template.json similarity index 100% rename from src/main/resources/substitutionTemplates/deploy-remote-model-template-extra-params.json rename to src/main/resources/substitutionTemplates/deploy-remote-model-extra-params-template.json diff --git a/src/main/resources/substitutionTemplates/deploy-remote-model-template.json b/src/main/resources/substitutionTemplates/deploy-remote-model-template.json index bc1c9eebc..4e74a9d2c 100644 --- a/src/main/resources/substitutionTemplates/deploy-remote-model-template.json +++ b/src/main/resources/substitutionTemplates/deploy-remote-model-template.json @@ -46,7 +46,7 @@ "id": "register_model", "type": "register_remote_model", "previous_node_inputs": { - "create_connector_step_1": "parameters" + "create_connector": "parameters" }, "user_inputs": { "name": "${{register_remote_model.name}}", @@ -58,7 +58,7 @@ "id": "deploy_model", "type": "deploy_model", "previous_node_inputs": { - "register_model_1": "model_id" + "register_model": "model_id" } } ], diff --git a/src/main/resources/substitutionTemplates/multi-modal-search-template.json b/src/main/resources/substitutionTemplates/multi-modal-search-template.json index ff8623a9e..f6a14dc75 100644 --- a/src/main/resources/substitutionTemplates/multi-modal-search-template.json +++ b/src/main/resources/substitutionTemplates/multi-modal-search-template.json @@ -23,7 +23,7 @@ { "text_image_embedding": { "model_id": "${{create_ingest_pipeline.model_id}}", - "embedding": "${{create_ingest_pipeline.embedding}}", + "embedding": "${{text_image_embedding.embedding}}", "field_map": { "text": "${{text_image_embedding.field_map.text}}", "image": "${{text_image_embedding.field_map.image}}" @@ -54,7 +54,7 @@ "id": { "type": "text" }, - "${{text_embedding.field_map.output}}": { + "${{text_image_embedding.embedding}}": { "type": "knn_vector", "dimension": "${{text_image_embedding.field_map.output.dimension}}", "method": { diff --git a/src/main/resources/substitutionTemplates/multi-modal-search-with-bedrock-titan-template.json b/src/main/resources/substitutionTemplates/multi-modal-search-with-bedrock-titan-template.json index 87adf082e..da85a9387 100644 --- a/src/main/resources/substitutionTemplates/multi-modal-search-with-bedrock-titan-template.json +++ b/src/main/resources/substitutionTemplates/multi-modal-search-with-bedrock-titan-template.json @@ -26,7 +26,7 @@ "input_docs_processed_step_size": "${{create_connector.input_docs_processed_step_size}}" }, "credential": { - "access_ key": "${{create_connector.credential.access_key}}", + "access_key": "${{create_connector.credential.access_key}}", "secret_key": "${{create_connector.credential.secret_key}}", "session_token": "${{create_connector.credential.session_token}}" }, @@ -73,7 +73,7 @@ { "text_image_embedding": { "model_id": "${{register_model.model_id}}", - "embedding": "${{create_ingest_pipeline.embedding}}", + "embedding": "${{text_image_embedding.embedding}}", "field_map": { "text": "${{text_image_embedding.field_map.text}}", "image": "${{text_image_embedding.field_map.image}}" @@ -104,7 +104,7 @@ "id": { "type": "text" }, - "${{text_embedding.field_map.output}}": { + "${{text_image_embedding.embedding}}": { "type": "knn_vector", "dimension": "${{text_image_embedding.field_map.output.dimension}}", "method": { diff --git a/src/test/java/org/opensearch/flowframework/rest/FlowFrameworkRestApiIT.java b/src/test/java/org/opensearch/flowframework/rest/FlowFrameworkRestApiIT.java index 67e21150b..096d0fac7 100644 --- a/src/test/java/org/opensearch/flowframework/rest/FlowFrameworkRestApiIT.java +++ b/src/test/java/org/opensearch/flowframework/rest/FlowFrameworkRestApiIT.java @@ -18,6 +18,7 @@ import org.opensearch.core.rest.RestStatus; import org.opensearch.flowframework.FlowFrameworkRestTestCase; import org.opensearch.flowframework.TestHelpers; +import org.opensearch.flowframework.common.DefaultUseCases; import org.opensearch.flowframework.model.ProvisioningProgress; import org.opensearch.flowframework.model.ResourceCreated; import org.opensearch.flowframework.model.State; @@ -32,6 +33,7 @@ import java.nio.charset.StandardCharsets; import java.time.Instant; import java.util.Collections; +import java.util.EnumSet; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -472,4 +474,19 @@ public void testDefaultSemanticSearchUseCaseWithFailureExpected() throws Excepti } } + public void testAllDefaultUseCasesCreation() throws Exception { + Set allUseCaseNames = EnumSet.allOf(DefaultUseCases.class) + .stream() + .map(DefaultUseCases::getUseCaseName) + .collect(Collectors.toSet()); + + for (String useCaseName : allUseCaseNames) { + Response response = createWorkflowWithUseCase(client(), useCaseName); + assertEquals(RestStatus.CREATED, TestHelpers.restStatus(response)); + + Map responseMap = entityAsMap(response); + String workflowId = (String) responseMap.get(WORKFLOW_ID); + getAndAssertWorkflowStatus(client(), workflowId, State.NOT_STARTED, ProvisioningProgress.NOT_STARTED); + } + } } diff --git a/src/test/java/org/opensearch/flowframework/util/ParseUtilsTests.java b/src/test/java/org/opensearch/flowframework/util/ParseUtilsTests.java index 7ae057d24..92406b3e7 100644 --- a/src/test/java/org/opensearch/flowframework/util/ParseUtilsTests.java +++ b/src/test/java/org/opensearch/flowframework/util/ParseUtilsTests.java @@ -110,6 +110,16 @@ public void testConditionallySubstituteWithUnmatchedPlaceholders() { assertEquals("This string has unmatched ${{placeholder}}", result); } + public void testRemovingBackslashesAndQuotesInArrayInJsonString() { + String inputNumArray = "normalization-processor.combination.parameters.weights: \"[0.3, 0.7]\""; + String outputNumArray = ParseUtils.removingBackslashesAndQuotesInArrayInJsonString(inputNumArray); + assertEquals("normalization-processor.combination.parameters.weights: [0.3, 0.7]", outputNumArray); + String inputStringArray = + "create_search_pipeline.retrieval_augmented_generation.context_field_list: \"[\\\"text\\\", \\\"hello\\\"]\""; + String outputStringArray = ParseUtils.removingBackslashesAndQuotesInArrayInJsonString(inputStringArray); + assertEquals("create_search_pipeline.retrieval_augmented_generation.context_field_list: [\"text\", \"hello\"]", outputStringArray); + } + public void testConditionallySubstituteWithOutputsSubstitution() { String input = "This string contains ${{node.step}}"; Map outputs = new HashMap<>(); diff --git a/src/test/java/org/opensearch/flowframework/workflow/CreateIndexStepTests.java b/src/test/java/org/opensearch/flowframework/workflow/CreateIndexStepTests.java index 56a26429a..3b6af8ffa 100644 --- a/src/test/java/org/opensearch/flowframework/workflow/CreateIndexStepTests.java +++ b/src/test/java/org/opensearch/flowframework/workflow/CreateIndexStepTests.java @@ -70,8 +70,7 @@ public void setUp() throws Exception { this.flowFrameworkIndicesHandler = mock(FlowFrameworkIndicesHandler.class); MockitoAnnotations.openMocks(this); String configurations = - "{\"settings\":{\"index\":{\"number_of_shards\":2,\"number_of_replicas\":1}},\"mappings\":{\"_doc\":{\"properties\":{\"age\":{\"type\":\"integer\"}}}},\"aliases\":{\"sample-alias1\":{}}}"; - + "{\"settings\":{\"index\":{\"number_of_shards\":2,\"number_of_replicas\":1}},\"mappings\":{\"properties\":{\"age\":{\"type\":\"integer\"}}},\"aliases\":{\"sample-alias1\":{}}}"; inputData = new WorkflowData( Map.ofEntries(Map.entry(INDEX_NAME, "demo"), Map.entry(CONFIGURATIONS, configurations)), "test-id",