Avoid infinite loop in flat_object parsing (#15985)

* Avoid infinite loop in flat_object parsing We had logic in flat_object parsing that would: 1. Try parsing a flat object field that is not an object or null. 2. Would see an END_ARRAY token, ignore it, and not advance the parser. Combined, this would create a scenario where passing an array of strings for a flat_object would parse the string values, then loop infinitely on the END_ARRAY token. Signed-off-by: Michael Froh <[email protected]> * Remove some unused code and add more tests The removed code does not actually seem to affect the logic. Also, I want to be 100% sure that every call to parseToken is guaranteed to call parser.nextToken() at some point. Signed-off-by: Michael Froh <[email protected]> * Remove unused parameter from parseToken Thanks for the reminder, @kkewwei! Signed-off-by: Michael Froh <[email protected]> * Add skip for newly-added test The test fails on MixedClusterClientYamlTestSuiteIT because 2.x still has the infinite loop until backport. Signed-off-by: Michael Froh <[email protected]> --------- Signed-off-by: Michael Froh <[email protected]>
opensearch-project · Sep 20, 2024 · 05dab3b · 05dab3b
1 parent 4963792
commit 05dab3b
Show file tree

Hide file tree

Showing 4 changed files with 90 additions and 53 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -34,6 +34,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Fix wildcard query containing escaped character ([#15737](https://github.com/opensearch-project/OpenSearch/pull/15737))
 - Fix case-insensitive query on wildcard field ([#15882](https://github.com/opensearch-project/OpenSearch/pull/15882))
 - Add validation for the search backpressure cancellation settings ([#15501](https://github.com/opensearch-project/OpenSearch/pull/15501))
+- Avoid infinite loop when `flat_object` field contains invalid token ([#15985](https://github.com/opensearch-project/OpenSearch/pull/15985))
 - Fix infinite loop in nested agg ([#15931](https://github.com/opensearch-project/OpenSearch/pull/15931))
 
 ### Security

diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml
@@ -62,7 +62,6 @@ setup:
           },
           "required_matches": 1
         }
-
   # Do index refresh
   - do:
       indices.refresh:
@@ -74,7 +73,52 @@ teardown:
   - do:
       indices.delete:
         index: test
-
+---
+"Invalid docs":
+  - skip:
+      version: "- 2.99.99"
+      reason: "parsing of these objects would infinite loop prior to 2.18"
+  # The following documents are invalid.
+  - do:
+      catch: /parsing_exception/
+      index:
+        index: test
+        id: 3
+        body: {
+          "ISBN13": "V12154942123242",
+          "catalog": [ "Arrays in Action" ],
+          "required_matches": 1
+        }
+  - do:
+      catch: /parsing_exception/
+      index:
+        index: test
+        id: 3
+        body: {
+          "ISBN13": "V12154942123242",
+          "catalog": "Strings in Action",
+          "required_matches": 1
+        }
+  - do:
+      catch: /parsing_exception/
+      index:
+        index: test
+        id: 3
+        body: {
+          "ISBN13": "V12154942123242",
+          "catalog": 12345,
+          "required_matches": 1
+        }
+  - do:
+      catch: /parsing_exception/
+      index:
+        index: test
+        id: 3
+        body: {
+          "ISBN13": "V12154942123242",
+          "catalog": [ 12345 ],
+          "required_matches": 1
+        }
 ---
 # Verify that mappings under the catalog field did not expand
 # and no dynamic fields were created.

diff --git a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java
@@ -9,6 +9,7 @@
 package org.opensearch.common.xcontent;
 
 import org.opensearch.common.xcontent.json.JsonXContent;
+import org.opensearch.core.common.ParsingException;
 import org.opensearch.core.common.Strings;
 import org.opensearch.core.common.bytes.BytesReference;
 import org.opensearch.core.xcontent.AbstractXContentParser;
@@ -73,7 +74,7 @@ public XContentParser parseObject() throws IOException {
         builder.startObject();
         LinkedList<String> path = new LinkedList<>(Collections.singleton(fieldTypeName));
         while (currentToken() != Token.END_OBJECT) {
-            parseToken(path, null);
+            parseToken(path);
         }
         // deduplication the fieldName,valueList,valueAndPathList
         builder.field(this.fieldTypeName, new HashSet<>(keyList));
@@ -87,14 +88,11 @@ public XContentParser parseObject() throws IOException {
     /**
      * @return true if the child object contains no_null value, false otherwise
      */
-    private boolean parseToken(Deque<String> path, String currentFieldName) throws IOException {
-        if (path.size() == 1 && processNoNestedValue()) {
-            return true;
-        }
+    private boolean parseToken(Deque<String> path) throws IOException {
         boolean isChildrenValueValid = false;
         boolean visitFieldName = false;
         if (this.parser.currentToken() == Token.FIELD_NAME) {
-            currentFieldName = this.parser.currentName();
+            final String currentFieldName = this.parser.currentName();
             path.addLast(currentFieldName); // Pushing onto the stack *must* be matched by pop
             visitFieldName = true;
             String parts = currentFieldName;
@@ -106,23 +104,21 @@ private boolean parseToken(Deque<String> path, String currentFieldName) throws I
             }
             this.keyList.add(parts); // parts has no dot, so either it's the original fieldName or it's the last part
             this.parser.nextToken(); // advance to the value of fieldName
-            isChildrenValueValid = parseToken(path, currentFieldName); // parse the value for fieldName (which will be an array, an object,
-                                                                       // or a primitive value)
+            isChildrenValueValid = parseToken(path); // parse the value for fieldName (which will be an array, an object,
+                                                     // or a primitive value)
             path.removeLast(); // Here is where we pop fieldName from the stack (since we're done with the value of fieldName)
             // Note that whichever other branch we just passed through has already ended with nextToken(), so we
             // don't need to call it.
         } else if (this.parser.currentToken() == Token.START_ARRAY) {
             parser.nextToken();
             while (this.parser.currentToken() != Token.END_ARRAY) {
-                isChildrenValueValid |= parseToken(path, currentFieldName);
+                isChildrenValueValid |= parseToken(path);
             }
             this.parser.nextToken();
-        } else if (this.parser.currentToken() == Token.END_ARRAY) {
-            // skip
         } else if (this.parser.currentToken() == Token.START_OBJECT) {
             parser.nextToken();
             while (this.parser.currentToken() != Token.END_OBJECT) {
-                isChildrenValueValid |= parseToken(path, currentFieldName);
+                isChildrenValueValid |= parseToken(path);
             }
             this.parser.nextToken();
         } else {
@@ -148,21 +144,6 @@ public void removeKeyOfNullValue() {
         this.keyList.remove(keyList.size() - 1);
     }
 
-    private boolean processNoNestedValue() throws IOException {
-        if (parser.currentToken() == Token.VALUE_NULL) {
-            return true;
-        } else if (this.parser.currentToken() == Token.VALUE_STRING
-            || this.parser.currentToken() == Token.VALUE_NUMBER
-            || this.parser.currentToken() == Token.VALUE_BOOLEAN) {
-                String value = this.parser.textOrNull();
-                if (value != null) {
-                    this.valueList.add(value);
-                }
-                return true;
-            }
-        return false;
-    }
-
     private String parseValue() throws IOException {
         switch (this.parser.currentToken()) {
             case VALUE_BOOLEAN:
@@ -172,7 +153,7 @@ private String parseValue() throws IOException {
                 return this.parser.textOrNull();
             // Handle other token types as needed
             default:
-                throw new IOException("Unsupported value token type [" + parser.currentToken() + "]");
+                throw new ParsingException(parser.getTokenLocation(), "Unexpected value token type [" + parser.currentToken() + "]");
         }
     }
 

diff --git a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java
@@ -30,6 +30,7 @@
 import org.opensearch.common.lucene.Lucene;
 import org.opensearch.common.lucene.search.AutomatonQueries;
 import org.opensearch.common.xcontent.JsonToStringXContentParser;
+import org.opensearch.core.common.ParsingException;
 import org.opensearch.core.xcontent.DeprecationHandler;
 import org.opensearch.core.xcontent.NamedXContentRegistry;
 import org.opensearch.core.xcontent.XContentParser;
@@ -568,31 +569,41 @@ protected void parseCreateField(ParseContext context) throws IOException {
         if (context.externalValueSet()) {
             String value = context.externalValue().toString();
             parseValueAddFields(context, value, fieldType().name());
-        } else if (context.parser().currentToken() != XContentParser.Token.VALUE_NULL) {
-            JsonToStringXContentParser jsonToStringParser = new JsonToStringXContentParser(
-                NamedXContentRegistry.EMPTY,
-                DeprecationHandler.IGNORE_DEPRECATIONS,
-                context.parser(),
-                fieldType().name()
-            );
-            /*
-              JsonToStringParser is the main parser class to transform JSON into stringFields in a XContentParser
-              It reads the JSON object and parsed to a list of string
-             */
-            XContentParser parser = jsonToStringParser.parseObject();
-
-            XContentParser.Token currentToken;
-            while ((currentToken = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
-                switch (currentToken) {
-                    case FIELD_NAME:
-                        fieldName = parser.currentName();
-                        break;
-                    case VALUE_STRING:
-                        String value = parser.textOrNull();
-                        parseValueAddFields(context, value, fieldName);
-                        break;
+        } else {
+            XContentParser ctxParser = context.parser();
+            if (ctxParser.currentToken() != XContentParser.Token.VALUE_NULL) {
+                if (ctxParser.currentToken() != XContentParser.Token.START_OBJECT) {
+                    throw new ParsingException(
+                        ctxParser.getTokenLocation(),
+                        "[" + this.name() + "] unexpected token [" + ctxParser.currentToken() + "] in flat_object field value"
+                    );
                 }
 
+                JsonToStringXContentParser jsonToStringParser = new JsonToStringXContentParser(
+                    NamedXContentRegistry.EMPTY,
+                    DeprecationHandler.IGNORE_DEPRECATIONS,
+                    ctxParser,
+                    fieldType().name()
+                );
+                /*
+                  JsonToStringParser is the main parser class to transform JSON into stringFields in a XContentParser
+                  It reads the JSON object and parsed to a list of string
+                 */
+                XContentParser parser = jsonToStringParser.parseObject();
+
+                XContentParser.Token currentToken;
+                while ((currentToken = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
+                    switch (currentToken) {
+                        case FIELD_NAME:
+                            fieldName = parser.currentName();
+                            break;
+                        case VALUE_STRING:
+                            String value = parser.textOrNull();
+                            parseValueAddFields(context, value, fieldName);
+                            break;
+                    }
+
+                }
             }
         }