Skip to content

Commit

Permalink
TRegex: fix PE bailout in backtracking engine
Browse files Browse the repository at this point in the history
  • Loading branch information
djoooooe committed May 31, 2024
1 parent c2fa2d8 commit f5b8bd7
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@
*/
package com.oracle.truffle.regex.tregex.nfa;

import static com.oracle.truffle.api.CompilerDirectives.CompilationFinal;

import java.util.Arrays;
import java.util.EnumSet;

import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
Expand Down Expand Up @@ -122,6 +125,8 @@ public enum Kind {
checkGroupNotMatched
}

@CompilationFinal(dimensions = 1) private static final Kind[] KIND_VALUES = Arrays.copyOf(Kind.values(), Kind.values().length);

private static final EnumSet<Kind> QUANTIFIER_GUARDS = EnumSet.of(Kind.loop, Kind.loopInc, Kind.exit, Kind.exitReset);
private static final EnumSet<Kind> ZERO_WIDTH_QUANTIFIER_GUARDS = EnumSet.of(Kind.enterZeroWidth, Kind.exitZeroWidth, Kind.escapeZeroWidth);
private static final EnumSet<Kind> GROUP_NUMBER_GUARDS = EnumSet.of(Kind.updateRecursiveBackrefPointer, Kind.checkGroupMatched, Kind.checkGroupNotMatched);
Expand Down Expand Up @@ -150,6 +155,7 @@ public static long createEnterZeroWidth(Quantifier quantifier) {
}

public static long createEnterZeroWidthFromExit(long guard) {
assert is(guard, Kind.exitZeroWidth) || is(guard, Kind.escapeZeroWidth);
return create(Kind.enterZeroWidth, getZeroWidthQuantifierIndex(guard));
}

Expand Down Expand Up @@ -196,7 +202,7 @@ private static int getKindOrdinal(long guard) {
}

public static Kind getKind(long guard) {
return Kind.values()[getKindOrdinal(guard)];
return KIND_VALUES[getKindOrdinal(guard)];
}

public static boolean is(long guard, Kind kind) {
Expand All @@ -213,14 +219,14 @@ public static int getZeroWidthQuantifierIndex(long guard) {
return (int) guard;
}

/**
* Returns the capture group boundary index for {@code updateCG} guards.
*/
public static int getGroupNumber(long guard) {
assert GROUP_NUMBER_GUARDS.contains(getKind(guard));
return (int) guard;
}

/**
* Returns the capture group boundary index for {@code updateCG} guards.
*/
public static int getGroupBoundaryIndex(long guard) {
assert GROUP_BOUNDARY_INDEX_GUARDS.contains(getKind(guard));
return (int) guard;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@
import com.oracle.truffle.regex.tregex.buffer.IntRingBuffer;
import com.oracle.truffle.regex.tregex.nfa.PureNFATransition;
import com.oracle.truffle.regex.tregex.nodes.TRegexExecutorLocals;
import com.oracle.truffle.regex.tregex.parser.CaseFoldData;
import com.oracle.truffle.regex.tregex.parser.ast.Group;
import com.oracle.truffle.regex.tregex.util.MathUtil;
import com.oracle.truffle.regex.util.BitSets;

/**
Expand Down Expand Up @@ -80,6 +82,8 @@
*/
public final class TRegexBacktrackingNFAExecutorLocals extends TRegexExecutorLocals {

private static final int MULTI_CHAR_EXPANSION_LENGTH_POWER_OF_2 = MathUtil.log2ceil(CaseFoldData.MAX_MULTI_CHAR_SEQUENCE_LENGTH);

private final int stackFrameSize;
private final int nQuantifierCounts;
private final int nZeroWidthQuantifiers;
Expand Down Expand Up @@ -175,8 +179,8 @@ public static TRegexBacktrackingNFAExecutorLocals create(
trackLastGroup,
dontOverwriteLastGroup,
recursiveBackrefs,
backrefMultiCharExpansion ? new IntRingBuffer(7) : null,
backrefMultiCharExpansion ? new IntRingBuffer(7) : null);
backrefMultiCharExpansion ? new IntRingBuffer(MULTI_CHAR_EXPANSION_LENGTH_POWER_OF_2) : null,
backrefMultiCharExpansion ? new IntRingBuffer(MULTI_CHAR_EXPANSION_LENGTH_POWER_OF_2) : null);
ret.setIndex(fromIndex);
ret.clearCaptureGroups();
if (recursiveBackrefs) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -308,11 +308,21 @@ private boolean isFlagSet(int flag) {
}

private Quantifier getQuantifier(long guard) {
return quantifiers[TransitionGuard.getQuantifierIndex(guard)];
CompilerAsserts.partialEvaluationConstant(guard);
int quantifierIndex = TransitionGuard.getQuantifierIndex(guard);
CompilerAsserts.partialEvaluationConstant(quantifierIndex);
return quantifiers[quantifierIndex];
}

private Quantifier getZeroWidthQuantifier(long guard) {
return zeroWidthQuantifiers[TransitionGuard.getZeroWidthQuantifierIndex(guard)];
CompilerAsserts.partialEvaluationConstant(this);
CompilerAsserts.partialEvaluationConstant(guard);
int zeroWidthQuantifierIndex = TransitionGuard.getZeroWidthQuantifierIndex(guard);
CompilerAsserts.partialEvaluationConstant(zeroWidthQuantifiers);
CompilerAsserts.partialEvaluationConstant(zeroWidthQuantifierIndex);
Quantifier zeroWidthQuantifier = zeroWidthQuantifiers[zeroWidthQuantifierIndex];
CompilerAsserts.partialEvaluationConstant(zeroWidthQuantifier);
return zeroWidthQuantifier;
}

@Override
Expand Down Expand Up @@ -767,9 +777,15 @@ protected boolean transitionMatches(VirtualFrame frame, TRegexBacktrackingNFAExe
if (transition.hasDollarGuard() && index < locals.getRegionTo()) {
return false;
}
for (long guard : transition.getGuards()) {
long[] guards = transition.getGuards();
CompilerAsserts.partialEvaluationConstant(guards);
for (int i = 0; i < guards.length; i++) {
CompilerAsserts.partialEvaluationConstant(i);
long guard = guards[i];
CompilerAsserts.partialEvaluationConstant(guard);
switch (TransitionGuard.getKind(guard)) {
TransitionGuard.Kind kind = TransitionGuard.getKind(guard);
CompilerAsserts.partialEvaluationConstant(kind);
switch (kind) {
case loop -> {
// retreat if quantifier count is at maximum
if (locals.getQuantifierCount(TransitionGuard.getQuantifierIndex(guard)) == getQuantifier(guard).getMax()) {
Expand All @@ -784,6 +800,7 @@ protected boolean transitionMatches(VirtualFrame frame, TRegexBacktrackingNFAExe
}
case exitZeroWidth -> {
Quantifier q = getZeroWidthQuantifier(guard);
CompilerAsserts.partialEvaluationConstant(q);
if (locals.getZeroWidthQuantifierGuardIndex(TransitionGuard.getZeroWidthQuantifierIndex(guard)) == index &&
(!isMonitorCaptureGroupsInEmptyCheck() || locals.isResultUnmodifiedByZeroWidthQuantifier(TransitionGuard.getZeroWidthQuantifierIndex(guard))) &&
// In JS, we allow this guard to pass if we are still in the
Expand Down Expand Up @@ -865,6 +882,7 @@ protected void updateState(TRegexBacktrackingNFAExecutorLocals locals, PureNFATr
}
case exitZeroWidth -> {
Quantifier q = getZeroWidthQuantifier(guard);
CompilerAsserts.partialEvaluationConstant(q);
boolean emptyCheckFailed = locals.getZeroWidthQuantifierGuardIndex(TransitionGuard.getZeroWidthQuantifierIndex(guard)) == index &&
(!isMonitorCaptureGroupsInEmptyCheck() || locals.isResultUnmodifiedByZeroWidthQuantifier(TransitionGuard.getZeroWidthQuantifierIndex(guard)));
boolean advancePastOptionalIterations = !isEmptyChecksOnMandatoryLoopIterations() && q.hasIndex() && locals.getQuantifierCount(q.getIndex()) < q.getMin();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,7 @@ public void appendRangesTo(RangesBuffer buffer, int startIndex, int endIndex) {
"xii",
"fo",
};
public static final int MAX_MULTI_CHAR_SEQUENCE_LENGTH = 4;
private static final CaseFoldEquivalenceTable UNICODE_15_1_0_SIMPLE = new CaseFoldEquivalenceTable(null, new CodePointSet[]{
rangeSet(0x00004b, 0x00004b, 0x00006b, 0x00006b, 0x00212a, 0x00212a),
rangeSet(0x000053, 0x000053, 0x000073, 0x000073, 0x00017f, 0x00017f),
Expand Down

0 comments on commit f5b8bd7

Please sign in to comment.