Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GR-52906] Re-enable fast-forward past empty mandatory iterations in JS. #8643

Merged
merged 4 commits into from
Mar 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions regex/ci/ci.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@
["git", "clone", ["mx", "urlrewrite", "https://github.com/graalvm/js-tests.git"], "../../js-tests"],
["mx", "-p", "../vm", "--dynamicimports", "/graal-js,js-tests", "checkout-downstream", "graal-js", "js-tests"],
# run downstream gate from js-tests suite.
["mx", "-p", "../../js-tests", "sversions"],
["mx", "-p", "../../js-tests", "gate", "--no-warning-as-error", "--all-suites", "--tags", "build,Test262-default,TestV8-default,regex"],
["cd", "../../js-tests"],
["mx", "sversions"],
["mx", "gate", "--no-warning-as-error", "--all-suites", "--tags", "build,Test262-default,TestV8-default,regex"],
],
targets: ["gate"],
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,4 +289,17 @@ public void innerLiteralSurrogates() {
test("\\udf06", "u", "\uD834\uDF06", 0, false);
test("x?\\udf06", "u", "\uD834\uDF06", 0, false);
}

@Test
public void gr52906() {
// Original test case
test("\\b(((.*?)){67108860})\\b|(?=(?=(?!.).\\b(\\d))){0,4}", "yi", "L1O\n\n\n11\n \n\n11\n \uD091 1aa\uFCDB=\n ", 0, true, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1);
// Minimized version
test("(.*?){67108863}", "", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxx", 0, true, 0, 0, 0, 0);

// Linked issue
test("(?=(?=(\\W)\u008e+|\\uC47A|(\\s)))+?|((((?:(\\\u0015)))+?))|(?:\\r|[^]+?[^])|\\3{3,}", "gyim", "", 0, true, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
// Minimized version
test("()\\1{3,}", "", "", 0, true, 0, 0, 0, 0);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@
import com.oracle.truffle.regex.tregex.util.json.Json;
import com.oracle.truffle.regex.tregex.util.json.JsonValue;

import java.util.Arrays;

/**
* Represents a transition of a {@link PureNFA}.
*/
Expand Down Expand Up @@ -122,6 +124,7 @@ public JsonValue toJson(RegexAST ast) {
Json.prop("source", source.getId()),
Json.prop("target", target.getId()),
Json.prop("groupBoundaries", groupBoundaries),
Json.prop("sourceSections", groupBoundaries.indexUpdateSourceSectionsToJson(ast)));
Json.prop("sourceSections", groupBoundaries.indexUpdateSourceSectionsToJson(ast)),
Json.prop("quantifierGuards", Arrays.stream(quantifierGuards).map(QuantifierGuard::toJson)));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
import com.oracle.truffle.regex.tregex.parser.Token.Quantifier;
import com.oracle.truffle.regex.tregex.parser.ast.ConditionalBackReferenceGroup;
import com.oracle.truffle.regex.tregex.parser.flavors.RegexFlavor;
import com.oracle.truffle.regex.tregex.util.json.Json;
import com.oracle.truffle.regex.tregex.util.json.JsonValue;

import java.util.Objects;

Expand Down Expand Up @@ -228,4 +230,9 @@ public String toString() {
return kind + " " + index;
}
}

@TruffleBoundary
public JsonValue toJson() {
return Json.val(toString());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,13 @@ public final class TRegexBacktrackingNFAExecutorNode extends TRegexBacktrackerSu
private static final int FLAG_BACKREF_WITH_NULL_TARGET_FAILS = 1 << 2;
private static final int FLAG_MONITOR_CAPTURE_GROUPS_IN_EMPTY_CHECK = 1 << 3;
private static final int FLAG_TRANSITION_MATCHES_STEP_BY_STEP = 1 << 4;
private static final int FLAG_TRACK_LAST_GROUP = 1 << 5;
private static final int FLAG_RETURNS_FIRST_GROUP = 1 << 6;
private static final int FLAG_MUST_ADVANCE = 1 << 7;
private static final int FLAG_LONE_SURROGATES = 1 << 8;
private static final int FLAG_LOOPBACK_INITIAL_STATE = 1 << 9;
private static final int FLAG_USE_MERGE_EXPLODE = 1 << 10;
private static final int FLAG_EMPTY_CHECKS_ON_MANDATORY_LOOP_ITERATIONS = 1 << 5;
private static final int FLAG_TRACK_LAST_GROUP = 1 << 6;
private static final int FLAG_RETURNS_FIRST_GROUP = 1 << 7;
private static final int FLAG_MUST_ADVANCE = 1 << 8;
private static final int FLAG_LONE_SURROGATES = 1 << 9;
private static final int FLAG_LOOPBACK_INITIAL_STATE = 1 << 10;
private static final int FLAG_USE_MERGE_EXPLODE = 1 << 11;
private static final int FLAG_RECURSIVE_BACK_REFERENCES = 1 << 12;

private final PureNFA nfa;
Expand Down Expand Up @@ -199,6 +200,7 @@ private static int createFlags(RegexAST ast, PureNFA nfa, boolean mustAdvance, R
flags = setFlag(flags, FLAG_BACKREF_WITH_NULL_TARGET_FAILS, ast.getOptions().getFlavor().backreferencesToUnmatchedGroupsFail());
flags = setFlag(flags, FLAG_MONITOR_CAPTURE_GROUPS_IN_EMPTY_CHECK, ast.getOptions().getFlavor().emptyChecksMonitorCaptureGroups());
flags = setFlag(flags, FLAG_TRANSITION_MATCHES_STEP_BY_STEP, ast.getOptions().getFlavor().matchesTransitionsStepByStep());
flags = setFlag(flags, FLAG_EMPTY_CHECKS_ON_MANDATORY_LOOP_ITERATIONS, ast.getOptions().getFlavor().emptyChecksOnMandatoryLoopIterations());
flags = setFlag(flags, FLAG_TRACK_LAST_GROUP, ast.getOptions().getFlavor().usesLastGroupResultField());
flags = setFlag(flags, FLAG_RETURNS_FIRST_GROUP, !isFlagSet(flags, FLAG_FORWARD) && ast.getOptions().getFlavor().lookBehindsRunLeftToRight());
flags = setFlag(flags, FLAG_MUST_ADVANCE, mustAdvance);
Expand Down Expand Up @@ -250,6 +252,10 @@ public boolean isTransitionMatchesStepByStep() {
return isFlagSet(FLAG_TRANSITION_MATCHES_STEP_BY_STEP);
}

public boolean isEmptyChecksOnMandatoryLoopIterations() {
return isFlagSet(FLAG_EMPTY_CHECKS_ON_MANDATORY_LOOP_ITERATIONS);
}

public boolean isTrackLastGroup() {
return isFlagSet(FLAG_TRACK_LAST_GROUP);
}
Expand Down Expand Up @@ -749,7 +755,10 @@ protected boolean transitionMatches(VirtualFrame frame, TRegexBacktrackingNFAExe
case exitZeroWidth:
if (locals.getZeroWidthQuantifierGuardIndex(q) == index &&
(!isMonitorCaptureGroupsInEmptyCheck() || locals.isResultUnmodifiedByZeroWidthQuantifier(q)) &&
(!q.hasIndex() || locals.getQuantifierCount(q) > q.getMin())) {
// In JS, we allow this guard to pass if we are still in the
// optional part of the quantifier. This allows JS to fast-
// forward past all the empty mandatory iterations.
(isEmptyChecksOnMandatoryLoopIterations() || !q.hasIndex() || locals.getQuantifierCount(q) > q.getMin())) {
return false;
}
break;
Expand Down Expand Up @@ -827,6 +836,29 @@ protected void updateState(TRegexBacktrackingNFAExecutorLocals locals, PureNFATr
locals.setZeroWidthQuantifierGuardIndex(q);
locals.setZeroWidthQuantifierResults(q);
break;
case exitZeroWidth:
boolean emptyCheckFailed = locals.getZeroWidthQuantifierGuardIndex(q) == index &&
(!isMonitorCaptureGroupsInEmptyCheck() || locals.isResultUnmodifiedByZeroWidthQuantifier(q));
boolean advancePastOptionalIterations = !isEmptyChecksOnMandatoryLoopIterations() && q.hasIndex() && locals.getQuantifierCount(q) < q.getMin();
if (emptyCheckFailed && advancePastOptionalIterations && !transition.hasCaretGuard() && !transition.hasDollarGuard()) {
// We advance the counter to min - 1 to skip past all but one mandatory
// iteration. We do not skip the last mandatory iteration and set the
// counter to min, because of the way JavaScript regexes are executed. The
// JavaScript flavor does not set matchesTransitionStepByStep and therefore
// all guards are tested against the same original state. In the case of the
// last mandatory iteration, we would like it to be possible to match the
// exitZeroWidth guard followed by the exit guard, so that it is possible to
// hit the exact minimum number of iterations. However, this relies on first
// updating the state with exitZeroWidth and then testing this new state
// with the exit guard. This would mean having to enable
// matchesTransitionStepByStep for JavaScript and implementing this logic in
// tryUpdateState instead, which would lead to degraded performance for JS
// regexps. Instead, we choose to advance the counter to just before the
// last mandatory iteration so that this fast-forwarding behavior does not
// coincide with an exit guard that should pass.
locals.setQuantifierCount(q, q.getMin() - 1);
}
break;
default:
break;
}
Expand Down