Skip to content

Commit

Permalink
Avoid infinite loop in flat_object parsing (#15985)
Browse files Browse the repository at this point in the history
* Avoid infinite loop in flat_object parsing

We had logic in flat_object parsing that would:

1. Try parsing a flat object field that is not an object or null.
2. Would see an END_ARRAY token, ignore it, and not advance the parser.

Combined, this would create a scenario where passing an array of
strings for a flat_object would parse the string values, then loop
infinitely on the END_ARRAY token.

Signed-off-by: Michael Froh <[email protected]>

* Remove some unused code and add more tests

The removed code does not actually seem to affect the logic. Also, I
want to be 100% sure that every call to parseToken is guaranteed to
call parser.nextToken() at some point.

Signed-off-by: Michael Froh <[email protected]>

* Remove unused parameter from parseToken

Thanks for the reminder, @kkewwei!

Signed-off-by: Michael Froh <[email protected]>

* Add skip for newly-added test

The test fails on MixedClusterClientYamlTestSuiteIT because 2.x still
has the infinite loop until backport.

Signed-off-by: Michael Froh <[email protected]>

---------

Signed-off-by: Michael Froh <[email protected]>
  • Loading branch information
msfroh committed Sep 20, 2024
1 parent 4963792 commit 05dab3b
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 53 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Fix wildcard query containing escaped character ([#15737](https://github.com/opensearch-project/OpenSearch/pull/15737))
- Fix case-insensitive query on wildcard field ([#15882](https://github.com/opensearch-project/OpenSearch/pull/15882))
- Add validation for the search backpressure cancellation settings ([#15501](https://github.com/opensearch-project/OpenSearch/pull/15501))
- Avoid infinite loop when `flat_object` field contains invalid token ([#15985](https://github.com/opensearch-project/OpenSearch/pull/15985))
- Fix infinite loop in nested agg ([#15931](https://github.com/opensearch-project/OpenSearch/pull/15931))

### Security
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ setup:
},
"required_matches": 1
}

# Do index refresh
- do:
indices.refresh:
Expand All @@ -74,7 +73,52 @@ teardown:
- do:
indices.delete:
index: test

---
"Invalid docs":
- skip:
version: "- 2.99.99"
reason: "parsing of these objects would infinite loop prior to 2.18"
# The following documents are invalid.
- do:
catch: /parsing_exception/
index:
index: test
id: 3
body: {
"ISBN13": "V12154942123242",
"catalog": [ "Arrays in Action" ],
"required_matches": 1
}
- do:
catch: /parsing_exception/
index:
index: test
id: 3
body: {
"ISBN13": "V12154942123242",
"catalog": "Strings in Action",
"required_matches": 1
}
- do:
catch: /parsing_exception/
index:
index: test
id: 3
body: {
"ISBN13": "V12154942123242",
"catalog": 12345,
"required_matches": 1
}
- do:
catch: /parsing_exception/
index:
index: test
id: 3
body: {
"ISBN13": "V12154942123242",
"catalog": [ 12345 ],
"required_matches": 1
}
---
# Verify that mappings under the catalog field did not expand
# and no dynamic fields were created.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
package org.opensearch.common.xcontent;

import org.opensearch.common.xcontent.json.JsonXContent;
import org.opensearch.core.common.ParsingException;
import org.opensearch.core.common.Strings;
import org.opensearch.core.common.bytes.BytesReference;
import org.opensearch.core.xcontent.AbstractXContentParser;
Expand Down Expand Up @@ -73,7 +74,7 @@ public XContentParser parseObject() throws IOException {
builder.startObject();
LinkedList<String> path = new LinkedList<>(Collections.singleton(fieldTypeName));
while (currentToken() != Token.END_OBJECT) {
parseToken(path, null);
parseToken(path);
}
// deduplication the fieldName,valueList,valueAndPathList
builder.field(this.fieldTypeName, new HashSet<>(keyList));
Expand All @@ -87,14 +88,11 @@ public XContentParser parseObject() throws IOException {
/**
* @return true if the child object contains no_null value, false otherwise
*/
private boolean parseToken(Deque<String> path, String currentFieldName) throws IOException {
if (path.size() == 1 && processNoNestedValue()) {
return true;
}
private boolean parseToken(Deque<String> path) throws IOException {
boolean isChildrenValueValid = false;
boolean visitFieldName = false;
if (this.parser.currentToken() == Token.FIELD_NAME) {
currentFieldName = this.parser.currentName();
final String currentFieldName = this.parser.currentName();
path.addLast(currentFieldName); // Pushing onto the stack *must* be matched by pop
visitFieldName = true;
String parts = currentFieldName;
Expand All @@ -106,23 +104,21 @@ private boolean parseToken(Deque<String> path, String currentFieldName) throws I
}
this.keyList.add(parts); // parts has no dot, so either it's the original fieldName or it's the last part
this.parser.nextToken(); // advance to the value of fieldName
isChildrenValueValid = parseToken(path, currentFieldName); // parse the value for fieldName (which will be an array, an object,
// or a primitive value)
isChildrenValueValid = parseToken(path); // parse the value for fieldName (which will be an array, an object,
// or a primitive value)
path.removeLast(); // Here is where we pop fieldName from the stack (since we're done with the value of fieldName)
// Note that whichever other branch we just passed through has already ended with nextToken(), so we
// don't need to call it.
} else if (this.parser.currentToken() == Token.START_ARRAY) {
parser.nextToken();
while (this.parser.currentToken() != Token.END_ARRAY) {
isChildrenValueValid |= parseToken(path, currentFieldName);
isChildrenValueValid |= parseToken(path);
}
this.parser.nextToken();
} else if (this.parser.currentToken() == Token.END_ARRAY) {
// skip
} else if (this.parser.currentToken() == Token.START_OBJECT) {
parser.nextToken();
while (this.parser.currentToken() != Token.END_OBJECT) {
isChildrenValueValid |= parseToken(path, currentFieldName);
isChildrenValueValid |= parseToken(path);
}
this.parser.nextToken();
} else {
Expand All @@ -148,21 +144,6 @@ public void removeKeyOfNullValue() {
this.keyList.remove(keyList.size() - 1);
}

private boolean processNoNestedValue() throws IOException {
if (parser.currentToken() == Token.VALUE_NULL) {
return true;
} else if (this.parser.currentToken() == Token.VALUE_STRING
|| this.parser.currentToken() == Token.VALUE_NUMBER
|| this.parser.currentToken() == Token.VALUE_BOOLEAN) {
String value = this.parser.textOrNull();
if (value != null) {
this.valueList.add(value);
}
return true;
}
return false;
}

private String parseValue() throws IOException {
switch (this.parser.currentToken()) {
case VALUE_BOOLEAN:
Expand All @@ -172,7 +153,7 @@ private String parseValue() throws IOException {
return this.parser.textOrNull();
// Handle other token types as needed
default:
throw new IOException("Unsupported value token type [" + parser.currentToken() + "]");
throw new ParsingException(parser.getTokenLocation(), "Unexpected value token type [" + parser.currentToken() + "]");
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.opensearch.common.lucene.Lucene;
import org.opensearch.common.lucene.search.AutomatonQueries;
import org.opensearch.common.xcontent.JsonToStringXContentParser;
import org.opensearch.core.common.ParsingException;
import org.opensearch.core.xcontent.DeprecationHandler;
import org.opensearch.core.xcontent.NamedXContentRegistry;
import org.opensearch.core.xcontent.XContentParser;
Expand Down Expand Up @@ -568,31 +569,41 @@ protected void parseCreateField(ParseContext context) throws IOException {
if (context.externalValueSet()) {
String value = context.externalValue().toString();
parseValueAddFields(context, value, fieldType().name());
} else if (context.parser().currentToken() != XContentParser.Token.VALUE_NULL) {
JsonToStringXContentParser jsonToStringParser = new JsonToStringXContentParser(
NamedXContentRegistry.EMPTY,
DeprecationHandler.IGNORE_DEPRECATIONS,
context.parser(),
fieldType().name()
);
/*
JsonToStringParser is the main parser class to transform JSON into stringFields in a XContentParser
It reads the JSON object and parsed to a list of string
*/
XContentParser parser = jsonToStringParser.parseObject();

XContentParser.Token currentToken;
while ((currentToken = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
switch (currentToken) {
case FIELD_NAME:
fieldName = parser.currentName();
break;
case VALUE_STRING:
String value = parser.textOrNull();
parseValueAddFields(context, value, fieldName);
break;
} else {
XContentParser ctxParser = context.parser();
if (ctxParser.currentToken() != XContentParser.Token.VALUE_NULL) {
if (ctxParser.currentToken() != XContentParser.Token.START_OBJECT) {
throw new ParsingException(
ctxParser.getTokenLocation(),
"[" + this.name() + "] unexpected token [" + ctxParser.currentToken() + "] in flat_object field value"
);
}

JsonToStringXContentParser jsonToStringParser = new JsonToStringXContentParser(
NamedXContentRegistry.EMPTY,
DeprecationHandler.IGNORE_DEPRECATIONS,
ctxParser,
fieldType().name()
);
/*
JsonToStringParser is the main parser class to transform JSON into stringFields in a XContentParser
It reads the JSON object and parsed to a list of string
*/
XContentParser parser = jsonToStringParser.parseObject();

XContentParser.Token currentToken;
while ((currentToken = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
switch (currentToken) {
case FIELD_NAME:
fieldName = parser.currentName();
break;
case VALUE_STRING:
String value = parser.textOrNull();
parseValueAddFields(context, value, fieldName);
break;
}

}
}
}

Expand Down

0 comments on commit 05dab3b

Please sign in to comment.