Skip to content

Commit

Permalink
Fixing create index and use case input parsing bugs (#600)
Browse files Browse the repository at this point in the history
* fixing create index step and array input for processors

Signed-off-by: Amit Galitzky <[email protected]>

* fixing test and enhancing error message

Signed-off-by: Amit Galitzky <[email protected]>

* adding release notes change and some comments addressed

Signed-off-by: Amit Galitzky <[email protected]>

* addressed comments and cleaned up defaults/templates

Signed-off-by: Amit Galitzky <[email protected]>

---------

Signed-off-by: Amit Galitzky <[email protected]>
  • Loading branch information
amitgalitz committed Mar 26, 2024
1 parent 871b0c5 commit 621ba8b
Show file tree
Hide file tree
Showing 27 changed files with 278 additions and 59 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,7 @@ Compatible with OpenSearch 2.13.0

### Refactoring
* Moved workflow-steps.json to Enum ([#523](https://github.com/opensearch-project/flow-framework/pull/523))
* Refactored logging for consistency ([#524](https://github.com/opensearch-project/flow-framework/pull/524))
* Refactored logging for consistency ([#524](https://github.com/opensearch-project/flow-framework/pull/524))

### Bug Fixes
* Fixing create index and use case input parsing bugs ([#600](https://github.com/opensearch-project/flow-framework/pull/600))
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public enum DefaultUseCases {
COHERE_EMBEDDING_MODEL_DEPLOY(
"cohere-embedding_model_deploy",
"defaults/cohere-embedding-defaults.json",
"substitutionTemplates/deploy-remote-model-template-extra-params.json"
"substitutionTemplates/deploy-remote-model-extra-params-template.json"
),
/** defaults file and substitution ready template for Bedrock Titan embedding model */
BEDROCK_TITAN_EMBEDDING_MODEL_DEPLOY(
Expand Down Expand Up @@ -93,7 +93,13 @@ public enum DefaultUseCases {
"substitutionTemplates/semantic-search-with-model-and-query-enricher-template.json"
),
/** defaults file and substitution ready template for hybrid search, no model creation*/
HYBRID_SEARCH("hybrid_search", "defaults/hybrid-search-defaults.json", "substitutionTemplates/hybrid-search-template.json");
HYBRID_SEARCH("hybrid_search", "defaults/hybrid-search-defaults.json", "substitutionTemplates/hybrid-search-template.json"),
/** defaults file and substitution ready template for conversational search with cohere chat model*/
CONVERSATIONAL_SEARCH_WITH_COHERE_DEPLOY(
"conversational_search_with_llm_deploy",
"defaults/conversational-search-defaults.json",
"substitutionTemplates/conversational-search-with-cohere-model-template.json"
);

private final String useCaseName;
private final String defaultsFile;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,18 +131,19 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli
try {
XContentParser parser = request.contentParser();
ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser);
Map<String, String> userDefaults = ParseUtils.parseStringToStringMap(parser);
Map<String, Object> userDefaults = ParseUtils.parseStringToObjectMap(parser);
// updates the default params with anything user has given that matches
for (Map.Entry<String, String> userDefaultsEntry : userDefaults.entrySet()) {
for (Map.Entry<String, Object> userDefaultsEntry : userDefaults.entrySet()) {
String key = userDefaultsEntry.getKey();
String value = userDefaultsEntry.getValue();
String value = userDefaultsEntry.getValue().toString();
if (useCaseDefaultsMap.containsKey(key)) {
useCaseDefaultsMap.put(key, value);
}
}
} catch (Exception ex) {
RestStatus status = ex instanceof IOException ? RestStatus.BAD_REQUEST : ExceptionsHelper.status(ex);
String errorMessage = "failure parsing request body when a use case is given";
String errorMessage =
"failure parsing request body when a use case is given, make sure to provide a map with values that are either Strings, Arrays, or Map of Strings to Strings";
logger.error(errorMessage, ex);
throw new FlowFrameworkException(errorMessage, status);
}
Expand All @@ -154,7 +155,6 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli
null,
useCaseDefaultsMap
);

XContentParser parserTestJson = ParseUtils.jsonToParser(useCaseTemplateFileInStringFormat);
ensureExpectedToken(XContentParser.Token.START_OBJECT, parserTestJson.currentToken(), parserTestJson);
template = Template.parse(parserTestJson);
Expand Down
74 changes: 59 additions & 15 deletions src/main/java/org/opensearch/flowframework/util/ParseUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.time.Instant;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
Expand All @@ -58,6 +59,7 @@ public class ParseUtils {
// Matches ${{ foo.bar }} (whitespace optional) with capturing groups 1=foo, 2=bar
// private static final Pattern SUBSTITUTION_PATTERN = Pattern.compile("\\$\\{\\{\\s*(.+)\\.(.+?)\\s*\\}\\}");
private static final Pattern SUBSTITUTION_PATTERN = Pattern.compile("\\$\\{\\{\\s*([\\w_]+)\\.([\\w_]+)\\s*\\}\\}");
private static final Pattern JSON_ARRAY_DOUBLE_QUOTES_PATTERN = Pattern.compile("\"\\[(.*?)]\"");

private ParseUtils() {}

Expand All @@ -69,7 +71,7 @@ private ParseUtils() {}
* @param json the json string
* @return The XContent parser for the json string
* @throws IOException on failure to create the parser
*/
*/
public static XContentParser jsonToParser(String json) throws IOException {
XContentParser parser = JsonXContent.jsonXContent.createParser(
NamedXContentRegistry.EMPTY,
Expand Down Expand Up @@ -103,7 +105,7 @@ public static String resourceToString(String path) throws IOException {
* Builds an XContent object representing a map of String keys to String values.
*
* @param xContentBuilder An XContent builder whose position is at the start of the map object to build
* @param map A map as key-value String pairs.
* @param map A map as key-value String pairs.
* @throws IOException on a build failure
*/
public static void buildStringToStringMap(XContentBuilder xContentBuilder, Map<?, ?> map) throws IOException {
Expand All @@ -118,7 +120,7 @@ public static void buildStringToStringMap(XContentBuilder xContentBuilder, Map<?
* Builds an XContent object representing a map of String keys to Object values.
*
* @param xContentBuilder An XContent builder whose position is at the start of the map object to build
* @param map A map as key-value String to Object.
* @param map A map as key-value String to Object.
* @throws IOException on a build failure
*/
public static void buildStringToObjectMap(XContentBuilder xContentBuilder, Map<?, ?> map) throws IOException {
Expand All @@ -137,7 +139,7 @@ public static void buildStringToObjectMap(XContentBuilder xContentBuilder, Map<?
* Builds an XContent object representing a LLMSpec.
*
* @param xContentBuilder An XContent builder whose position is at the start of the map object to build
* @param llm LLMSpec
* @param llm LLMSpec
* @throws IOException on a build failure
*/
public static void buildLLMMap(XContentBuilder xContentBuilder, LLMSpec llm) throws IOException {
Expand Down Expand Up @@ -169,6 +171,8 @@ public static Map<String, String> parseStringToStringMap(XContentParser parser)
/**
* Parses an XContent object representing a map of String keys to Object values.
* The Object value here can either be a string or a map
* If an array is found in the given parser we conver the array to a string representation of the array
*
* @param parser An XContent parser whose position is at the start of the map object to parse
* @return A map as identified by the key-value pairs in the XContent
* @throws IOException on a parse failure
Expand All @@ -182,6 +186,18 @@ public static Map<String, Object> parseStringToObjectMap(XContentParser parser)
if (parser.currentToken() == XContentParser.Token.START_OBJECT) {
// If the current token is a START_OBJECT, parse it as Map<String, String>
map.put(fieldName, parseStringToStringMap(parser));
} else if (parser.currentToken() == XContentParser.Token.START_ARRAY) {
// If an array, parse it to a string
// Handle array: convert it to a string representation
List<String> elements = new ArrayList<>();
while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
if (parser.currentToken().equals(XContentParser.Token.VALUE_NUMBER)) {
elements.add(String.valueOf(parser.numberValue())); // If number value don't add escaping quotes
} else {
elements.add("\"" + parser.text() + "\""); // Adding escaped quotes around each element
}
}
map.put(fieldName, elements.toString());
} else {
// Otherwise, parse it as a string
map.put(fieldName, parser.text());
Expand Down Expand Up @@ -209,6 +225,7 @@ public static Instant parseInstant(XContentParser parser) throws IOException {
* (e.g., john||own_index,testrole|__user__, no backend role so you see two verticle line after john.).
* This is the user string format used internally in the OPENSEARCH_SECURITY_USER_INFO_THREAD_CONTEXT and may be
* parsed using User.parse(string).
*
* @param client Client containing user info. A public API request will fill in the user info in the thread context.
* @return parsed user object
*/
Expand All @@ -222,7 +239,7 @@ public static User getUserContext(Client client) {
* Creates a XContentParser from a given Registry
*
* @param xContentRegistry main registry for serializable content
* @param bytesReference given bytes to be parsed
* @param bytesReference given bytes to be parsed
* @return bytesReference of {@link java.time.Instant}
* @throws IOException IOException if content can't be parsed correctly
*/
Expand All @@ -233,7 +250,8 @@ public static XContentParser createXContentParserFromRegistry(NamedXContentRegis

/**
* Generates a string to string Map
* @param map content map
*
* @param map content map
* @param fieldName fieldName
* @return instance of the map
*/
Expand All @@ -249,15 +267,15 @@ public static Map<String, String> getStringToStringMap(Object map, String fieldN
* Creates a map containing the specified input keys, with values derived from template data or previous node
* output.
*
* @param requiredInputKeys A set of keys that must be present, or will cause an exception to be thrown
* @param optionalInputKeys A set of keys that may be present, or will be absent in the returned map
* @param currentNodeInputs Input params and content for this node, from workflow parsing
* @param outputs WorkflowData content of previous steps
* @param requiredInputKeys A set of keys that must be present, or will cause an exception to be thrown
* @param optionalInputKeys A set of keys that may be present, or will be absent in the returned map
* @param currentNodeInputs Input params and content for this node, from workflow parsing
* @param outputs WorkflowData content of previous steps
* @param previousNodeInputs Input params for this node that come from previous steps
* @param params Params that came from REST path
* @param params Params that came from REST path
* @return A map containing the requiredInputKeys with their corresponding values,
* and optionalInputKeys with their corresponding values if present.
* Throws a {@link FlowFrameworkException} if a required key is not present.
* and optionalInputKeys with their corresponding values if present.
* Throws a {@link FlowFrameworkException} if a required key is not present.
*/
public static Map<String, Object> getInputsFromPreviousSteps(
Set<String> requiredInputKeys,
Expand Down Expand Up @@ -346,9 +364,10 @@ public static Map<String, Object> getInputsFromPreviousSteps(

/**
* Executes substitution on the given value by looking at any matching values in either the ouputs or params map
* @param value the Object that will have the substitution done on
*
* @param value the Object that will have the substitution done on
* @param outputs potential location of values to be substituted in
* @param params potential location of values to be subsituted in
* @param params potential location of values to be subsituted in
* @return the substituted object back
*/
public static Object conditionallySubstitute(Object value, Map<String, WorkflowData> outputs, Map<String, String> params) {
Expand Down Expand Up @@ -392,6 +411,7 @@ public static Object conditionallySubstitute(Object value, Map<String, WorkflowD

/**
* Generates a string based on an arbitrary String to object map using Jackson
*
* @param map content map
* @return instance of the string
* @throws JsonProcessingException JsonProcessingException from Jackson for issues processing map
Expand All @@ -404,6 +424,7 @@ public static String parseArbitraryStringToObjectMapToString(Map<String, Object>

/**
* Generates a String to String map based on a Json File
*
* @param path file path
* @return instance of the string
* @throws JsonProcessingException JsonProcessingException from Jackson for issues processing map
Expand All @@ -413,4 +434,27 @@ public static Map<String, String> parseJsonFileToStringToStringMap(String path)
Map<String, String> mappedJsonFile = mapper.readValue(jsonContent, Map.class);
return mappedJsonFile;
}

/**
* Takes an input string, then checks if there is an array in the string with backslashes around strings
* (e.g. "[\"text\", \"hello\"]" to "["text", "hello"]"), this is needed for processors that take in string arrays,
* This also removes the quotations around the array making the array valid to consume
* (e.g. "weights": "[0.7, 0.3]" to "weights": [0.7, 0.3])
*
* @param input The inputString given to be transformed
* @return the transformed string
*/
public static String removingBackslashesAndQuotesInArrayInJsonString(String input) {
Matcher matcher = JSON_ARRAY_DOUBLE_QUOTES_PATTERN.matcher(input);
StringBuffer result = new StringBuffer();
while (matcher.find()) {
// Extract matched content and remove backslashes before quotes
String withoutEscapes = matcher.group(1).replaceAll("\\\\\"", "\"");
// Return the transformed string with the brackets but without the outer quotes
matcher.appendReplacement(result, "[" + withoutEscapes + "]");
}
// Append remaining input after the last match
matcher.appendTail(result);
return result.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,11 @@ public PlainActionFuture<WorkflowData> execute(
String pipelineId = (String) inputs.get(PIPELINE_ID);
String configurations = (String) inputs.get(CONFIGURATIONS);

// Special case for processors that have arrays that need to have the quotes removed
// (e.g. "weights": "[0.7, 0.3]" -> "weights": [0.7, 0.3]
// Define a regular expression pattern to match stringified arrays
String transformedJsonString = configurations.replaceAll("\"\\[(.*?)]\"", "[$1]");
// Special case for processors that have arrays that need to have the quotes around or
// backslashes around strings in array removed
String transformedJsonStringForStringArray = ParseUtils.removingBackslashesAndQuotesInArrayInJsonString(configurations);

byte[] byteArr = transformedJsonString.getBytes(StandardCharsets.UTF_8);
byte[] byteArr = transformedJsonStringForStringArray.getBytes(StandardCharsets.UTF_8);
BytesReference configurationsBytes = new BytesArray(byteArr);

String pipelineToBeCreated = this.getName();
Expand Down
Loading

0 comments on commit 621ba8b

Please sign in to comment.