Skip to content

Commit

Permalink
Improve the identifier detection regex to support non-ascii identifie…
Browse files Browse the repository at this point in the history
…rs (and also support identifiers that start with @) (#291)

Fixes #269
  • Loading branch information
metoule authored Jul 5, 2023
1 parent dfa3277 commit f919582
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 39 deletions.
19 changes: 12 additions & 7 deletions src/DynamicExpresso.Core/Detector.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ internal class Detector
{
private readonly ParserSettings _settings;

private static readonly string Type = @"\b(?<type>[a-zA-Z_]\w*)\b";
private static readonly string Id = @"\b(?<id>[a-zA-Z_]\w*)\b";
private static readonly Regex LambdaDetectionRegex = new Regex($@"(\((((?<withtype>({Type}\s+)?{Id}))(\s*,\s*)?)+\)|(?<withtype>{Id}))\s*=>", RegexOptions.Compiled);
private static readonly Regex IdentifiersDetectionRegex = new Regex(@"(?<id>@?[\p{L}\p{Nl}_][\p{L}\p{Nl}\p{Nd}\p{Mn}\p{Mc}\p{Pc}\p{Cf}_]*)", RegexOptions.Compiled);

private static readonly Regex IdentifiersDetectionRegex = new Regex(@"([^\.]|^)\b(?<id>[a-zA-Z_]\w*)\b", RegexOptions.Compiled);
private static readonly string Id = IdentifiersDetectionRegex.ToString();
private static readonly string Type = Id.Replace("<id>", "<type>");
private static readonly Regex LambdaDetectionRegex = new Regex($@"(\((((?<withtype>({Type}\s+)?{Id}))(\s*,\s*)?)+\)|(?<withtype>{Id}))\s*=>", RegexOptions.Compiled);

private static readonly Regex StringDetectionRegex = new Regex(@"(?<!\\)?"".*?(?<!\\)""", RegexOptions.Compiled);
private static readonly Regex CharDetectionRegex = new Regex(@"(?<!\\)?'.{1,2}?(?<!\\)'", RegexOptions.Compiled);
Expand Down Expand Up @@ -57,8 +57,8 @@ public IdentifiersInfo DetectIdentifiers(string expression)
t++;
}

// there might be several lambda parameters with the same name;
// in that case, we ignore the detected type
// there might be several lambda parameters with the same name
// -> in that case, we ignore the detected type
if (lambdaParameters.TryGetValue(identifier, out Identifier already) && already.Expression.Type != type)
type = typeof(object);

Expand All @@ -70,11 +70,16 @@ public IdentifiersInfo DetectIdentifiers(string expression)

foreach (Match match in IdentifiersDetectionRegex.Matches(expression))
{
var identifier = match.Groups["id"].Value;
var idGroup = match.Groups["id"];
var identifier = idGroup.Value;

if (IsReservedKeyword(identifier))
continue;

// don't consider member accesses as identifiers (e.g. "x.Length" will only return x but not Length)
if (idGroup.Index > 0 && expression[idGroup.Index - 1] == '.')
continue;

if (_settings.Identifiers.TryGetValue(identifier, out Identifier knownIdentifier))
knownIdentifiers.Add(knownIdentifier);
else if (lambdaParameters.TryGetValue(identifier, out Identifier knownLambdaParam))
Expand Down
89 changes: 57 additions & 32 deletions test/DynamicExpresso.UnitTest/DetectIdentifiersTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public void Detect_unknown_identifiers()
var detectedIdentifiers = target.DetectIdentifiers("x + y");

CollectionAssert.AreEqual(
new []{ "x", "y"},
new[] { "x", "y" },
detectedIdentifiers.UnknownIdentifiers.ToArray());
}

Expand Down Expand Up @@ -161,36 +161,30 @@ public void Detect_known_identifiers_types()
}

[Test]
public void Detect_identifiers_inside_other_expressions()
[TestCase("x + y")]
[TestCase("x + y + 654")]
[TestCase("x + y + 654.564")]
[TestCase("x.method + y[0]")]
[TestCase("x+y")]
[TestCase("x[y]")]
[TestCase("x.method1.method2(y)")]
[TestCase("x + y + \"z\"")]
[TestCase("x + y + \"lorem ipsum\"")]
[TestCase(@"x + y + ""literal \""2""")]
[TestCase("x + y + \"\"")]
[TestCase("x + y + 'z'")]
[TestCase("x + y + '\\a'")]
[TestCase("x + y + '\\''")]
[TestCase("x+y")]
public void Detect_identifiers_inside_other_expressions(string testCase)
{
var testCases = new[] {
"x + y",
"x + y + 654",
"x + y + 654.564",
"x.method + y[0]",
"x+y",
"x[y]",
"x.method1.method2(y)",
"x + y + \"z\"",
"x + y + \"lorem ipsum\"",
@"x + y + ""literal \""2""",
"x + y + \"\"",
"x + y + 'z'",
"x + y + '\\a'",
"x + y + '\\''",
"x+y",
};

foreach (var testCase in testCases)
{
var target = new Interpreter();
var target = new Interpreter();

var detectedIdentifiers = target.DetectIdentifiers(testCase);
var detectedIdentifiers = target.DetectIdentifiers(testCase);

Assert.AreEqual("x", detectedIdentifiers.UnknownIdentifiers.ElementAt(0));
Assert.AreEqual("y", detectedIdentifiers.UnknownIdentifiers.ElementAt(1));
Assert.AreEqual(2, detectedIdentifiers.UnknownIdentifiers.Count());
}
Assert.AreEqual(2, detectedIdentifiers.UnknownIdentifiers.Count());
Assert.AreEqual("x", detectedIdentifiers.UnknownIdentifiers.ElementAt(0));
Assert.AreEqual("y", detectedIdentifiers.UnknownIdentifiers.ElementAt(1));
}

[Test]
Expand Down Expand Up @@ -233,15 +227,15 @@ public void Detect_identifiers_inside_lambda_expression_multiple_params()
{
var target = new Interpreter(InterpreterOptions.Default | InterpreterOptions.LambdaExpressions);

var detectedIdentifiers = target.DetectIdentifiers("(x, y) => x + y");
var detectedIdentifiers = target.DetectIdentifiers("(x, _1y) => x + _1y");
Assert.IsEmpty(detectedIdentifiers.UnknownIdentifiers);

Assert.AreEqual(2, detectedIdentifiers.Identifiers.Count());

Assert.AreEqual("x", detectedIdentifiers.Identifiers.ElementAt(0).Name);
Assert.AreEqual(typeof(object), detectedIdentifiers.Identifiers.ElementAt(0).Expression.Type);

Assert.AreEqual("y", detectedIdentifiers.Identifiers.ElementAt(1).Name);
Assert.AreEqual("_1y", detectedIdentifiers.Identifiers.ElementAt(1).Name);
Assert.AreEqual(typeof(object), detectedIdentifiers.Identifiers.ElementAt(1).Expression.Type);
}

Expand All @@ -250,7 +244,7 @@ public void Detect_identifiers_inside_lambda_expression_multiple_params_with_typ
{
var target = new Interpreter(InterpreterOptions.Default | InterpreterOptions.LambdaExpressions);

var detectedIdentifiers = target.DetectIdentifiers("(int x, string y) => x + y");
var detectedIdentifiers = target.DetectIdentifiers("(int x, string @class) => x + @class");
Assert.IsEmpty(detectedIdentifiers.UnknownIdentifiers);

Assert.AreEqual(2, detectedIdentifiers.Types.Count());
Expand All @@ -264,7 +258,7 @@ public void Detect_identifiers_inside_lambda_expression_multiple_params_with_typ
Assert.AreEqual("x", detectedIdentifiers.Identifiers.ElementAt(0).Name);
Assert.AreEqual(typeof(int), detectedIdentifiers.Identifiers.ElementAt(0).Expression.Type);

Assert.AreEqual("y", detectedIdentifiers.Identifiers.ElementAt(1).Name);
Assert.AreEqual("@class", detectedIdentifiers.Identifiers.ElementAt(1).Name);
Assert.AreEqual(typeof(string), detectedIdentifiers.Identifiers.ElementAt(1).Expression.Type);
}

Expand Down Expand Up @@ -299,5 +293,36 @@ public void Detect_identifiers_inside_lambda_expression_duplicate_param_name()
Assert.AreEqual("b", detectedIdentifiers.Identifiers.ElementAt(4).Name);
Assert.AreEqual(typeof(string), detectedIdentifiers.Identifiers.ElementAt(4).Expression.Type);
}

[Test]
[TestCase("@class")]
[TestCase("français_holé")]
[TestCase("中文")]
[TestCase("_1中0文")]
[TestCase("日本語")]
[TestCase("русский")]
public void Detect_all_identifiers_including_not_ascii(string identifier)
{
var code = $"1 + {identifier}.Method()";

var target = new Interpreter(InterpreterOptions.Default | InterpreterOptions.LambdaExpressions);
var detectedIdentifiers = target.DetectIdentifiers(code);

Assert.AreEqual(1, detectedIdentifiers.UnknownIdentifiers.Count());
Assert.AreEqual(identifier, detectedIdentifiers.UnknownIdentifiers.ElementAt(0));
}

[Test]
public void Dont_detect_members_with_at()
{
var code = "@class.@if()";

var target = new Interpreter(InterpreterOptions.Default | InterpreterOptions.LambdaExpressions);
var detectedIdentifiers = target.DetectIdentifiers(code);

// @class should be detected as an identifier, but not the @if because it's a member
Assert.AreEqual(1, detectedIdentifiers.UnknownIdentifiers.Count());
Assert.AreEqual("@class", detectedIdentifiers.UnknownIdentifiers.ElementAt(0));
}
}
}

0 comments on commit f919582

Please sign in to comment.