From 2238588c7d54f1a49cab3eee16c7e52d781d0a87 Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Fri, 23 Aug 2024 16:35:41 +0200 Subject: [PATCH 01/17] Add `applyEndPatternlast` parameter to `beginEnd` constructor --- rascal-textmate-core/src/main/rascal/lang/textmate/Grammar.rsc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/Grammar.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/Grammar.rsc index 736c49e..381daa7 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/Grammar.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/Grammar.rsc @@ -58,7 +58,8 @@ data TmRule Captures beginCaptures = (), Captures endCaptures = (), list[TmRule] patterns = [], - Repository repository = ()) + Repository repository = (), + bool applyEndPatternLast = false) | include( str include, From 3b11353c0bfe5a34a8a2a3930a05c9b5de2a2825 Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Fri, 23 Aug 2024 16:39:27 +0200 Subject: [PATCH 02/17] Add utility function to check if a symbol is recursive --- .../src/main/rascal/lang/rascal/grammar/Util.rsc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc index a7ad3ad..440acfe 100644 --- a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc @@ -31,6 +31,22 @@ bool tryParse(Grammar g, Symbol s, str input, bool allowAmbiguity = false) { return false; } +@synopsis{ + Checks if symbol `s` is recursive in grammar `g` +} + +bool isRecursive(Grammar g, Symbol s) { + set[Symbol] getChildren(Symbol s) + = {s | p <- lookup(g, s), /Symbol s := p.symbols}; + + bool check(set[Symbol] checking, Symbol s) + = s in checking + ? true + : any(child <- getChildren(s), check(checking + s, child)); + + return check({}, s); +} + @synopsis{ Lookups a list of productions for symbol `s` in grammar `g`, replacing formal parameters with actual parameters when needed From f6a2803c996701a057baf82ed8ec75901e199f74 Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Fri, 23 Aug 2024 16:40:46 +0200 Subject: [PATCH 03/17] Add utility function to remove the conditional from a symbol --- .../src/main/rascal/lang/rascal/grammar/Util.rsc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc index 440acfe..0e71bab 100644 --- a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc @@ -115,7 +115,14 @@ Symbol destar(\alt({symbol})) default Symbol destar(Symbol s) = s; @synopsis{ - Retain from set `symbols` each symbol that is a strict prefix of any other + Removes the conditional from symbol `s`, if any +} + +Symbol decond(\conditional(Symbol s, _)) = decond(s); +default Symbol decond(Symbol s) = s; + +@synopsis{ + Retains from set `symbols` each symbol that is a strict prefix of any other symbol in `symbols` } From 667ae4b64a098cfb27ad498a1ce33c2d68340757 Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Fri, 23 Aug 2024 16:43:37 +0200 Subject: [PATCH 04/17] Add overloaded version of `getInnerDelimiterPair` for symbols --- .../main/rascal/lang/rascal/grammar/analyze/Delimiters.rsc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Delimiters.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Delimiters.rsc index a4b882b..55ce0ad 100644 --- a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Delimiters.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Delimiters.rsc @@ -59,6 +59,12 @@ list[&T] reorder(list[&T] l, backward()) = reverse(l); rightmost delimiter. } +DelimiterPair getInnerDelimiterPair(Grammar g, Symbol s, bool getOnlyFirst = false) { + Maybe[Symbol] begin = getInnerDelimitersBySymbol(g, forward(), getOnlyFirst = getOnlyFirst)[s]; + Maybe[Symbol] end = getInnerDelimitersBySymbol(g, backward(), getOnlyFirst = getOnlyFirst)[s]; + return ; +} + DelimiterPair getInnerDelimiterPair(Grammar g, Production p, bool getOnlyFirst = false) { Maybe[Symbol] begin = getInnerDelimiterByProduction(g, forward() , getOnlyFirst = getOnlyFirst)[p]; Maybe[Symbol] end = getInnerDelimiterByProduction(g, backward(), getOnlyFirst = getOnlyFirst)[p]; @@ -79,6 +85,7 @@ private map[Production, Maybe[Symbol]] getInnerDelimiterByProduction(Grammar g, for (p <- ret, ret[p] == nothing()) { for (s <- reorder(p.symbols, direction)) { s = delabel(s); + s = decond(s); if (isDelimiter(s)) { ret[p] = just(s); break; From 22ea04575a1c338f5850c44739163fd08be9f2af Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Fri, 23 Aug 2024 16:47:25 +0200 Subject: [PATCH 05/17] Add `\conditional` to `destar` --- .../src/main/rascal/lang/rascal/grammar/Util.rsc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc index 0e71bab..b93d6c5 100644 --- a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/Util.rsc @@ -112,6 +112,9 @@ Symbol destar(\seq([symbol])) Symbol destar(\alt({symbol})) = \alt({destar(symbol)}); +Symbol destar(\conditional(symbol, conditions)) + = \conditional(destar(symbol), conditions); + default Symbol destar(Symbol s) = s; @synopsis{ From 217252dcc202d04826cce7eb89618af8363688a4 Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Mon, 2 Sep 2024 15:28:41 +0200 Subject: [PATCH 06/17] Fix typo --- .../main/rascal/lang/rascal/grammar/analyze/Delimiters.rsc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Delimiters.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Delimiters.rsc index 55ce0ad..93f1c93 100644 --- a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Delimiters.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Delimiters.rsc @@ -60,8 +60,8 @@ list[&T] reorder(list[&T] l, backward()) = reverse(l); } DelimiterPair getInnerDelimiterPair(Grammar g, Symbol s, bool getOnlyFirst = false) { - Maybe[Symbol] begin = getInnerDelimitersBySymbol(g, forward(), getOnlyFirst = getOnlyFirst)[s]; - Maybe[Symbol] end = getInnerDelimitersBySymbol(g, backward(), getOnlyFirst = getOnlyFirst)[s]; + Maybe[Symbol] begin = getInnerDelimiterBySymbol(g, forward(), getOnlyFirst = getOnlyFirst)[s]; + Maybe[Symbol] end = getInnerDelimiterBySymbol(g, backward(), getOnlyFirst = getOnlyFirst)[s]; return ; } From 4d59dac4bf7b8b872b77cd47307457f25197eb89 Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Mon, 2 Sep 2024 15:47:42 +0200 Subject: [PATCH 07/17] Add `recursive` parameter to `ConversionUnit` --- .../src/main/rascal/lang/textmate/ConversionUnit.rsc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionUnit.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionUnit.rsc index 0519a6d..c6e9c94 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionUnit.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionUnit.rsc @@ -43,7 +43,7 @@ data ConversionUnit = unit( // The following parameters are set when a unit is created during analysis: Grammar rsc, Production prod, - /* bool recursive, */ // TODO: Add support for recursive productions + bool recursive, bool multiLine, DelimiterPair outerDelimiters, DelimiterPair innerDelimiters, From bd14940e874a12b42131410cb0f1ad0495cbeecd Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Mon, 2 Sep 2024 16:24:21 +0200 Subject: [PATCH 08/17] Add function to remove prefix conversion units from a list --- .../rascal/lang/textmate/ConversionUnit.rsc | 35 ++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionUnit.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionUnit.rsc index c6e9c94..b1daceb 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionUnit.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionUnit.rsc @@ -137,4 +137,37 @@ private list[tuple[Keygen, Compare]] sorters = [ // Sort by stringified production -]; \ No newline at end of file +]; + +@synopsis{ + Retains from set `units` each unit that is a prefix (i.e., the symbols of + its production) of any other unit in `units` +} + +set[ConversionUnit] retainStrictPrefixes(set[ConversionUnit] units) + = {u1 | u1 <- units, any(u2 <- units, u1 != u2, isStrictPrefix(u1, u2))}; + +@synopsis{ + Removes from set `units` each units that is a prefix (i.e., the symbols of + its production) of any other unit in `units` +} + +set[ConversionUnit] removeStrictPrefixes(set[ConversionUnit] units) + = units - retainStrictPrefixes(units); + +@synopsis{ + Checks if unit `u1` is a strict prefix of unit `u2` +} + +bool isStrictPrefix(ConversionUnit u1, ConversionUnit u2) + = isStrictPrefix(u1.prod.symbols, u2.prod.symbols); + +// TODO: This function could be moved to a separate, generic module +private bool isStrictPrefix([], []) + = false; +private bool isStrictPrefix([], [_, *_]) + = true; +private bool isStrictPrefix([_, *_], []) + = false; +private bool isStrictPrefix([head1, *tail1], [head2, *tail2]) + = head1 == head2 && isStrictPrefix(tail1, tail2); \ No newline at end of file From 1dcd56eb5e8cadca2e508744fd1c7a491b88ef71 Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Tue, 3 Sep 2024 17:40:24 +0200 Subject: [PATCH 09/17] Widen applicability of function `getInnerDelimiterPair` for symbols to also support the case when the argument symbol is itself a delimiter --- .../rascal/grammar/analyze/Delimiters.rsc | 30 ++++++++++++++----- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Delimiters.rsc b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Delimiters.rsc index 93f1c93..70a0e6a 100644 --- a/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Delimiters.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/rascal/grammar/analyze/Delimiters.rsc @@ -26,6 +26,28 @@ data Direction // Traverse lists of symbols (in productions)... list[&T] reorder(list[&T] l, forward()) = l; list[&T] reorder(list[&T] l, backward()) = reverse(l); +@synopsis{ + Gets the unique leftmost delimiter (`begin`) and the unique rightmost + delimiter `end`, if any, that occur **inside** productions of symbol `s` + (when `s` is a non-terminal) or `s` itself (when `s` is a delimiter). If + `getOnlyFirst` is `true` (default: `false`), then only the first (resp. + last) symbol of the productions can be considered as leftmost (resp. + rightmost). +} + +DelimiterPair getInnerDelimiterPair(Grammar g, Symbol s, bool getOnlyFirst = false) { + s = delabel(s); + if (isDelimiter(s)) { + return ; + } else if (isNonTerminalType(s)) { + Maybe[Symbol] begin = getInnerDelimiterBySymbol(g, forward(), getOnlyFirst = getOnlyFirst)[s]; + Maybe[Symbol] end = getInnerDelimiterBySymbol(g, backward(), getOnlyFirst = getOnlyFirst)[s]; + return ; + } else { + return ; + } +} + @synopsis{ Gets the unique leftmost delimiter (`begin`) and the unique rightmost delimiter (`end`), if any, that occur **inside** production `p` in grammar @@ -59,14 +81,8 @@ list[&T] reorder(list[&T] l, backward()) = reverse(l); rightmost delimiter. } -DelimiterPair getInnerDelimiterPair(Grammar g, Symbol s, bool getOnlyFirst = false) { - Maybe[Symbol] begin = getInnerDelimiterBySymbol(g, forward(), getOnlyFirst = getOnlyFirst)[s]; - Maybe[Symbol] end = getInnerDelimiterBySymbol(g, backward(), getOnlyFirst = getOnlyFirst)[s]; - return ; -} - DelimiterPair getInnerDelimiterPair(Grammar g, Production p, bool getOnlyFirst = false) { - Maybe[Symbol] begin = getInnerDelimiterByProduction(g, forward() , getOnlyFirst = getOnlyFirst)[p]; + Maybe[Symbol] begin = getInnerDelimiterByProduction(g, forward(), getOnlyFirst = getOnlyFirst)[p]; Maybe[Symbol] end = getInnerDelimiterByProduction(g, backward(), getOnlyFirst = getOnlyFirst)[p]; return ; } From 044d10c1747679d04e093906eea829aaedd94b39 Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Tue, 3 Sep 2024 17:54:40 +0200 Subject: [PATCH 10/17] Add function to decompose lists of units into prefix/suffixes --- .../rascal/lang/textmate/ConversionUnit.rsc | 60 +++++++++++++++++-- 1 file changed, 55 insertions(+), 5 deletions(-) diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionUnit.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionUnit.rsc index b1daceb..6efe8d6 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionUnit.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionUnit.rsc @@ -7,8 +7,10 @@ module lang::textmate::ConversionUnit import Grammar; import ParseTree; +import util::Math; import util::Maybe; +import lang::rascal::grammar::Util; import lang::rascal::grammar::analyze::Delimiters; import lang::textmate::ConversionConstants; import lang::textmate::Grammar; @@ -140,16 +142,16 @@ private list[tuple[Keygen, Compare]] sorters = [ ]; @synopsis{ - Retains from set `units` each unit that is a prefix (i.e., the symbols of - its production) of any other unit in `units` + Retains from set `units` each unit that is a prefix (i.e., the list of + symbols of its production) of any other unit in `units` } set[ConversionUnit] retainStrictPrefixes(set[ConversionUnit] units) = {u1 | u1 <- units, any(u2 <- units, u1 != u2, isStrictPrefix(u1, u2))}; @synopsis{ - Removes from set `units` each units that is a prefix (i.e., the symbols of - its production) of any other unit in `units` + Removes from set `units` each units that is a prefix (i.e., the list of + symbols of its production) of any other unit in `units` } set[ConversionUnit] removeStrictPrefixes(set[ConversionUnit] units) @@ -170,4 +172,52 @@ private bool isStrictPrefix([], [_, *_]) private bool isStrictPrefix([_, *_], []) = false; private bool isStrictPrefix([head1, *tail1], [head2, *tail2]) - = head1 == head2 && isStrictPrefix(tail1, tail2); \ No newline at end of file + = head1 == head2 && isStrictPrefix(tail1, tail2); + +@synopsis{ + Representation of a decomposition of a list of units (i.e., the lists of + symbols of their productions) into their maximally common non-recursive + prefix and their minimally disjoint suffixes. See also function `decompose`. +} + +alias Decomposition = tuple[ + list[Symbol] prefix, + list[list[Symbol]] suffixes +]; + +@synopsis{ + Decomposes list `units`. See also type `Decomposition`. +} + +Decomposition decompose(list[ConversionUnit] units) { + list[Symbol] prefix = []; + list[list[Symbol]] suffixes = []; + + list[Production] prods = [u.prod | u <- units]; + set[Grammar] grammars = {u.rsc | u <- units}; + + if (_ <- prods && {rsc} := grammars) { + list[int] sizes = [size(p.symbols) | p <- prods]; + int n = (sizes[0] | min(it, size) | size <- sizes[1..]); + + // Compute prefix (at most of size `n`) + prefix = for (i <- [0..n]) { + set[Symbol] iths = {p.symbols[i] | p <- prods}; + if ({ith} := iths && !isRecursive(rsc, delabel(ith))) { + append ith; + } else { + break; + } + } + + // Compute suffixes + suffixes = for (p <- prods) { + list[Symbol] suffix = p.symbols[size(prefix)..]; + if (_ <- suffix) { + append suffix; + } + } + } + + return ; +} \ No newline at end of file From 6ba991ddcc68696c4982c9ea4e263a4a93f373d4 Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Tue, 3 Sep 2024 17:54:58 +0200 Subject: [PATCH 11/17] Add support for recursive productions to the converter --- .../main/rascal/lang/textmate/Conversion.rsc | 142 +++++++++++++----- 1 file changed, 107 insertions(+), 35 deletions(-) diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc index 81b19b0..f597b56 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc @@ -112,12 +112,10 @@ list[ConversionUnit] analyze(RscGrammar rsc) { // Analyze dependencies among productions println("[LOG] Analyzing dependencies among productions"); - Dependencies dependencies = deps(toGraph(rsc)); - list[Production] prods = dependencies - .removeProds(isCyclic, true) // `true` means "also remove ancestors" - .retainProds(isNonEmpty) - .retainProds(hasCategory) - .getProds(); + Graph[Production] graph = toGraph(rsc); + list[Production] prods = deps(graph).retainProds(isNonEmpty).retainProds(hasCategory).getProds(); + list[Production] prodsNonRecursive = prods & deps(graph).removeProds(isCyclic, true).getProds(); + list[Production] prodsRecursive = prods - prodsNonRecursive; // Analyze delimiters println("[LOG] Analyzing delimiters"); @@ -134,12 +132,13 @@ list[ConversionUnit] analyze(RscGrammar rsc) { // Return bool isEmptyProd(prod(_, [\alt(alternatives)], _)) = alternatives == {}; - list[ConversionUnit] units - = [unit(rsc, p, hasNewline(rsc, p), getOuterDelimiterPair(rsc, p), getInnerDelimiterPair(rsc, p, getOnlyFirst = true)) | p <- prods] - + [unit(rsc, p, false, , ) | p <- prodsDelimiters, !isEmptyProd(p)] - + [unit(rsc, p, false, , ) | p <- prodsKeywords, !isEmptyProd(p)]; + set[ConversionUnit] units + = {unit(rsc, p, false, hasNewline(rsc, p), getOuterDelimiterPair(rsc, p), getInnerDelimiterPair(rsc, p, getOnlyFirst = true)) | p <- prodsNonRecursive} + + {unit(rsc, p, true, hasNewline(rsc, p), getOuterDelimiterPair(rsc, p), getInnerDelimiterPair(rsc, p, getOnlyFirst = true)) | p <- prodsRecursive} + + {unit(rsc, p, false, false, , ) | p <- prodsDelimiters, !isEmptyProd(p)} + + {unit(rsc, p, false, false, , ) | p <- prodsKeywords, !isEmptyProd(p)}; - return sort(units); + return sort([*removeStrictPrefixes(units)]); } @synopsis{ @@ -195,7 +194,7 @@ private list[ConversionUnit] addInnerRules(list[ConversionUnit] units) { // Convert all units in the group to match patterns (including, // optimistically, multi-line units as-if they are single-line) - for (u <- group) { + for (u <- group, !u.recursive) { TmRule r = toTmRule(toRegExp(u.rsc, u.prod, guard = true)) [name = "/inner/single/"]; @@ -215,41 +214,93 @@ private list[ConversionUnit] addInnerRules(list[ConversionUnit] units) { // Simple case: each unit does have an `end` inner delimiter if (_ <- group && all(u <- group, just(_) := u.innerDelimiters.end)) { - // Compute a list of segments that need to be consumed between + // Compute a set of segments that need to be consumed between // the `begin` delimiter and the `end` delimiters. Each of these // segments will be converted to a match pattern. - list[Segment] segments = [*getSegments(rsc, u.prod) | u <- group]; - - Segment removeBeginEnd(Segment seg) { - list[Symbol] symbols = seg.symbols; - if (seg.initial, _ <- symbols, symbols[0] == begin) { - symbols = symbols[1..]; - } - if (seg.final, _ <- symbols, symbols[-1] in ends) { - symbols = symbols[..-1]; - } - - return seg[symbols = symbols]; - } + set[Segment] segs = {*getSegments(rsc, u.prod) | u <- group}; + segs = {removeBeginEnd(seg, begins, ends) | seg <- segs}; - list[Symbol] terminals = [\seq(removeBeginEnd(seg).symbols) | seg <- segments]; - terminals = [s | s <- terminals, [] != s.symbols]; - terminals = [destar(s) | s <- terminals]; // The tokenization engine always tries to apply rules repeatedly - terminals = dup(terminals); - terminals = terminals + \char-class([range(1,0x10FFFF)]); // Any char (as a fallback) - TmRule r = toTmRule( toRegExp(rsc, [begin], {t}), toRegExp(rsc, [\alt(ends)], {t}), - [toTmRule(toRegExp(rsc, [s], {t})) | s <- terminals]) + [toTmRule(toRegExp(rsc, [s], {t})) | s <- toTerminals(segs)]) [name = "/inner/multi/"]; rules = insertIn(rules, (u: r | u <- group)); } - // Complex case: some unit doesn't have an `end` inner delimiter + // Complex case: some unit doesn't have an `end` inner delimiter. + // This requires (substantial) extra care, as there is no obvious + // marker to close the begin/end pattern with. else { - ; // TODO (part of future support for *recursive* multi-line units) + Decomposition decomposition = decompose([*group]); + + // TODO: The following condition can be true (even though there + // has to be a `begin` delimiter) because `decompose` doesn't + // expand non-terminals. Consider if it should, to maybe improve + // accuracy. + if ([] == decomposition.prefix) { + continue; + } + + RegExp reBegin = toRegExp(rsc, decomposition.prefix, {t}); + RegExp reEnd = regExp("(?=.)", []); + + patterns = for (suffix <- decomposition.suffixes) { + if (just(Symbol begin) := getInnerDelimiterPair(rsc, suffix[0], getOnlyFirst = true).begin) { + if (just(Symbol end) := getInnerDelimiterPair(rsc, suffix[-1], getOnlyFirst = true).end) { + set[Segment] segs = getSegments(rsc, suffix); + segs = {removeBeginEnd(seg, {begin}, {end}) | seg <- segs}; + + append toTmRule( + toRegExp(rsc, [begin], {t}), + toRegExp(rsc, [end], {t}), + [toTmRule(toRegExp(rsc, [s], {t})) | s <- toTerminals(segs)]); + } + + else { + append toTmRule(toRegExp(rsc, [begin], {t})); + } + } + } + + TmRule r = toTmRule(reBegin, reEnd, patterns); + r = r[name = "/inner/multi/"]; + r = r[applyEndPatternLast = true]; + + rules = insertIn(rules, (u: r | u <- group)); + + // TODO: The current approach produces "partially" + // newline-sensitive rules, in the sense that newlines are + // accepted between the prefix and the suffixes, but not between + // symbols in the prefix. This approach could be improved to + // produce "totally" newline-sensitive rules (at the cost of + // much more complicated rule generation and generated rules) by + // adopting an approach in which the rules for each symbol in + // the prefix looks something like the following three: + // + // ``` + // "foo": { + // "name": "foo", + // "begin": "(\\@)", + // "end": "(?!\\G)|(?:(?!$)(?![a-z]+))", + // "patterns": [{ "include": "#foo.$" }, { "match": "[a-z]+" }], + // "contentName": "comment", + // "beginCaptures": { "1": { "name": "comment" } } + // }, + // "foo.$": { + // "begin": "$", + // "end": "(?<=^.+)|(?:(?!$)(?![a-z]+))", + // "name": "foo.$", + // "patterns": [ { "include": "#foo.^" }] + // }, + // "foo.^": { + // "begin": "^", + // "end": "(?!\\G)|(?:(?!$)(?![a-z]+))", + // "name": "foo.^", + // "patterns": [{ "include": "#foo.$" }, { "match": "[a-z]+" }] + // } + // ``` } } } @@ -302,6 +353,27 @@ private list[ConversionUnit] addOuterRules(list[ConversionUnit] units) { // precision than a unit-driven approach; I suspect it might. } +private Segment removeBeginEnd(Segment seg, set[Symbol] begins, set[Symbol] ends) { + list[Symbol] symbols = seg.symbols; + if (seg.initial, _ <- symbols, symbols[0] in begins) { + symbols = symbols[1..]; + } + if (seg.final, _ <- symbols, symbols[-1] in ends) { + symbols = symbols[..-1]; + } + + return seg[symbols = symbols]; +} + +private list[Symbol] toTerminals(set[Segment] segs) { + list[Symbol] terminals = [\seq(seg.symbols) | seg <- segs]; + terminals = [s | s <- terminals, [] != s.symbols]; + terminals = [destar(s) | s <- terminals]; // The tokenization engine always tries to apply rules repeatedly + terminals = dup(terminals); + terminals = terminals + \char-class([range(1,0x10FFFF)]); // Any char (as a fallback) + return terminals; +} + // TODO: This function could be moved to a separate, generic module private list[&T] dupLast(list[&T] l) = reverse(dup(reverse(l))); // TODO: Optimize/avoid `reverse`-ing? From 5e1f917bd8f079375f0c369bd33bec071b252144 Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Fri, 6 Sep 2024 11:26:39 +0200 Subject: [PATCH 12/17] Update tests to support recursive productions --- .../rascal/lang/textmate/ConversionTests.rsc | 2 +- .../lang/textmate/conversiontests/Emoji.rsc | 6 +-- .../lang/textmate/conversiontests/Pico.rsc | 8 ++-- .../conversiontests/PicoWithCategories.rsc | 20 ++++---- .../lang/textmate/conversiontests/Rascal.rsc | 46 ++++++++++--------- .../textmate/conversiontests/RascalClass.rsc | 10 ++-- .../conversiontests/RascalComment.rsc | 4 +- .../conversiontests/RascalConcrete.rsc | 14 +++--- .../conversiontests/RascalStringConstant.rsc | 6 +-- .../conversiontests/RascalStringLiteral.rsc | 12 ++--- .../textmate/conversiontests/Walkthrough.rsc | 14 +++--- 11 files changed, 72 insertions(+), 70 deletions(-) diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionTests.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionTests.rsc index d7a8918..913c0f0 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionTests.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionTests.rsc @@ -38,7 +38,7 @@ bool doAnalyzeTest(RscGrammar rsc, list[ConversionUnit] expect, bool printActual println(); for (i <- [0..size(actual)]) { ConversionUnit u = actual[i]; - println(" unit(rsc, , , , )"); + println(" unit(rsc, , , , , )"); } println(); } diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/Emoji.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/Emoji.rsc index 997851d..9798e4a 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/Emoji.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/Emoji.rsc @@ -19,9 +19,9 @@ lexical Boolean Grammar rsc = preprocess(grammar(#Boolean)); list[ConversionUnit] units = [ - unit(rsc, prod(lex("Boolean"),[lit("🙂")],{\tag("category"("constant.language"))}), false, , ), - unit(rsc, prod(lex("Boolean"),[lit("🙁")],{\tag("category"("constant.language"))}), false, , ), - unit(rsc, prod(lex("Unit"),[lit("🌊")],{\tag("category"("constant.language"))}), false, , ) + unit(rsc, prod(lex("Boolean"),[lit("🙂")],{\tag("category"("constant.language"))}), false, false, , ), + unit(rsc, prod(lex("Boolean"),[lit("🙁")],{\tag("category"("constant.language"))}), false, false, , ), + unit(rsc, prod(lex("Unit"),[lit("🌊")],{\tag("category"("constant.language"))}), false, false, , ) ]; test bool analyzeTest() = doAnalyzeTest(rsc, units); diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/Pico.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/Pico.rsc index 24c6ff3..2dc1d7b 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/Pico.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/Pico.rsc @@ -13,10 +13,10 @@ import lang::pico::\syntax::Main; Grammar rsc = preprocess(grammar(#Program)); list[ConversionUnit] units = [ - unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("-"),lit(","),lit(")"),lit("("),lit("+"),lit("||"),lit(":="),lit("\""),lit(";"),lit("nil-type")})],{}), false, , ), - unit(rsc, prod(lex("WhitespaceAndComment"),[lit("%%"),conditional(\iter-star(\char-class([range(1,9),range(11,1114111)])),{\end-of-line()})],{\tag("category"("Comment"))}), false, , ), - unit(rsc, prod(lex("WhitespaceAndComment"),[lit("%"),iter(\char-class([range(1,36),range(38,1114111)])),lit("%")],{\tag("category"("Comment"))}), true, , ), - unit(rsc, prod(lex(KEYWORDS_PRODUCTION_NAME),[alt({lit("do"),lit("declare"),lit("fi"),lit("else"),lit("end"),lit("od"),lit("begin"),lit("natural"),lit("then"),lit("if"),lit("while"),lit("string")})],{\tag("category"("keyword.control"))}), false, , ) + unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("-"),lit(","),lit(")"),lit("("),lit("+"),lit("||"),lit(":="),lit("\""),lit(";"),lit("nil-type")})],{}), false, false, , ), + unit(rsc, prod(lex("WhitespaceAndComment"),[lit("%%"),conditional(\iter-star(\char-class([range(1,9),range(11,1114111)])),{\end-of-line()})],{\tag("category"("Comment"))}), false, false, , ), + unit(rsc, prod(lex("WhitespaceAndComment"),[lit("%"),iter(\char-class([range(1,36),range(38,1114111)])),lit("%")],{\tag("category"("Comment"))}), false, true, , ), + unit(rsc, prod(lex(KEYWORDS_PRODUCTION_NAME),[alt({lit("do"),lit("declare"),lit("fi"),lit("else"),lit("end"),lit("od"),lit("begin"),lit("natural"),lit("then"),lit("if"),lit("while"),lit("string")})],{\tag("category"("keyword.control"))}), false, false, , ) ]; test bool analyzeTest() = doAnalyzeTest(rsc, units); diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/PicoWithCategories.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/PicoWithCategories.rsc index 9a81068..d8cb623 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/PicoWithCategories.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/PicoWithCategories.rsc @@ -74,16 +74,16 @@ lexical WhitespaceAndComment Grammar rsc = preprocess(grammar(#Program)); list[ConversionUnit] units = [ - unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("-"),lit(","),lit(")"),lit("("),lit("+"),lit("||"),lit(":="),lit("\\")})],{}), false, , ), - unit(rsc, prod(label("natural",sort("Type")),[lit("natural")],{\tag("category"("storage.type"))}), false, , ), - unit(rsc, prod(label("nil",sort("Type")),[lit("nil-type")],{\tag("category"("storage.type"))}), false, , ), - unit(rsc, prod(label("string",sort("Type")),[lit("string")],{\tag("category"("storage.type"))}), false, , ), - unit(rsc, prod(lex("WhitespaceAndComment"),[lit("%%"),conditional(\iter-star(\char-class([range(1,9),range(11,1114111)])),{\end-of-line()})],{\tag("category"("comment.line"))}), false, , ), - unit(rsc, prod(lex("WhitespaceAndComment"),[lit("%"),iter(\char-class([range(1,36),range(38,1114111)])),lit("%")],{\tag("category"("comment.block"))}), true, , ), - unit(rsc, prod(label("strcon",sort("Expression")),[label("string",lex("String"))],{\tag("category"("string.quoted.double"))}), true, , ), - unit(rsc, prod(label("id",sort("Expression")),[label("name",lex("Id"))],{\tag("category"("variable.other"))}), false, , ), - unit(rsc, prod(label("natcon",sort("Expression")),[label("natcon",lex("Natural"))],{\tag("category"("constant.numeric"))}), false, , ), - unit(rsc, prod(lex(KEYWORDS_PRODUCTION_NAME),[alt({lit("do"),lit("declare"),lit("fi"),lit("else"),lit("end"),lit("od"),lit("begin"),lit("natural"),lit("then"),lit("if"),lit("while"),lit("string")})],{\tag("category"("keyword.control"))}), false, , ) + unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("-"),lit(","),lit(")"),lit("("),lit("+"),lit("||"),lit(":="),lit("\\")})],{}), false, false, , ), + unit(rsc, prod(label("natural",sort("Type")),[lit("natural")],{\tag("category"("storage.type"))}), false, false, , ), + unit(rsc, prod(label("nil",sort("Type")),[lit("nil-type")],{\tag("category"("storage.type"))}), false, false, , ), + unit(rsc, prod(label("string",sort("Type")),[lit("string")],{\tag("category"("storage.type"))}), false, false, , ), + unit(rsc, prod(lex("WhitespaceAndComment"),[lit("%%"),conditional(\iter-star(\char-class([range(1,9),range(11,1114111)])),{\end-of-line()})],{\tag("category"("comment.line"))}), false, false, , ), + unit(rsc, prod(lex("WhitespaceAndComment"),[lit("%"),iter(\char-class([range(1,36),range(38,1114111)])),lit("%")],{\tag("category"("comment.block"))}), false, true, , ), + unit(rsc, prod(label("strcon",sort("Expression")),[label("string",lex("String"))],{\tag("category"("string.quoted.double"))}), false, true, , ), + unit(rsc, prod(label("id",sort("Expression")),[label("name",lex("Id"))],{\tag("category"("variable.other"))}), false, false, , ), + unit(rsc, prod(label("natcon",sort("Expression")),[label("natcon",lex("Natural"))],{\tag("category"("constant.numeric"))}), false, false, , ), + unit(rsc, prod(lex(KEYWORDS_PRODUCTION_NAME),[alt({lit("do"),lit("declare"),lit("fi"),lit("else"),lit("end"),lit("od"),lit("begin"),lit("natural"),lit("then"),lit("if"),lit("while"),lit("string")})],{\tag("category"("keyword.control"))}), false, false, , ) ]; test bool analyzeTest() = doAnalyzeTest(rsc, units); diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/Rascal.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/Rascal.rsc index daed47d..70d94c5 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/Rascal.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/Rascal.rsc @@ -13,28 +13,30 @@ import lang::rascal::\syntax::Rascal; Grammar rsc = preprocess(grammar(#Module)); list[ConversionUnit] units = [ - unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("bottom-up-break"),lit(")"),lit("("),lit("≫"),lit("%"),lit("!:="),lit("\<==\>"),lit("!="),lit("\>="),lit("://"),lit("non-assoc"),lit("&="),lit("\<-"),lit("*="),lit("+="),lit("top-down-break"),lit(","),lit("..."),lit("/="),lit("!\<\<"),lit("=\>"),lit("!\>\>"),lit("||"),lit("\>\>"),lit("::"),lit("⚠"),lit("&&"),lit(":="),lit("#"),lit("\<\<="),lit("}"),lit("?="),lit("\<:"),lit("==\>"),lit("^"),lit(";"),lit("{"),lit("-="),lit("$T")})],{}), false, , ), - unit(rsc, prod(label("resultOutput",lex("Output")),[lit("⇨"),\iter-star(\char-class([range(1,9),range(11,12),range(14,1114111)])),lit("\n")],{\tag("category"("Result"))}), false, , ), - unit(rsc, prod(label("bq",lex("ConcretePart")),[lit("\\`")],{\tag("category"("MetaSkipped"))}), false, , ), - unit(rsc, prod(label("bs",lex("ConcretePart")),[lit("\\\\")],{\tag("category"("MetaSkipped"))}), false, , ), - unit(rsc, prod(label("gt",lex("ConcretePart")),[lit("\\\>")],{\tag("category"("MetaSkipped"))}), false, , ")),just(lit("\\\>"))>), - unit(rsc, prod(label("lt",lex("ConcretePart")),[lit("\\\<")],{\tag("category"("MetaSkipped"))}), false, , ), - unit(rsc, prod(label("text",lex("ConcretePart")),[conditional(iter(\char-class([range(1,9),range(11,59),range(61,61),range(63,91),range(93,95),range(97,1114111)])),{\not-follow(\char-class([range(1,9),range(11,59),range(61,61),range(63,91),range(93,95),range(97,1114111)]))})],{\tag("category"("MetaSkipped"))}), false, , ), - unit(rsc, prod(lex("Char"),[\char-class([range(1,31),range(33,33),range(35,38),range(40,44),range(46,59),range(61,61),range(63,90),range(94,1114111)])],{\tag("category"("Constant"))}), true, , ), - unit(rsc, prod(lex("Char"),[lex("UnicodeEscape")],{\tag("category"("Constant"))}), false, , ), - unit(rsc, prod(lex("Char"),[lit("\\"),\char-class([range(32,32),range(34,34),range(39,39),range(45,45),range(60,60),range(62,62),range(91,93),range(98,98),range(102,102),range(110,110),range(114,114),range(116,116)])],{\tag("category"("Constant"))}), false, , ), - unit(rsc, prod(label("empty",sort("Tag")),[lit("@"),layouts("LAYOUTLIST"),label("name",lex("Name"))],{\tag("Folded"()),\tag("category"("Comment"))}), true, , ), - unit(rsc, prod(lex("MidStringChars"),[lit("\>"),\iter-star(lex("StringCharacter")),lit("\<")],{\tag("category"("Constant"))}), true, , ")),just(lit("\<"))>), - unit(rsc, prod(lex("PostStringChars"),[lit("\>"),\iter-star(lex("StringCharacter")),lit("\"")],{\tag("category"("Constant"))}), true, , ")),just(lit("\""))>), - unit(rsc, prod(lex("Comment"),[lit("//"),conditional(\iter-star(\char-class([range(1,9),range(11,1114111)])),{\not-follow(\char-class([range(9,9),range(13,13),range(32,32),range(160,160),range(5760,5760),range(8192,8202),range(8239,8239),range(8287,8287),range(12288,12288)])),\end-of-line()})],{\tag("category"("Comment"))}), false, , ), - unit(rsc, prod(lex("Comment"),[lit("/*"),\iter-star(alt({\char-class([range(1,41),range(43,1114111)]),conditional(lit("*"),{\not-follow(lit("/"))})})),lit("*/")],{\tag("category"("Comment"))}), true, , ), - unit(rsc, prod(lex("CaseInsensitiveStringConstant"),[lit("\'"),label("chars",\iter-star(lex("StringCharacter"))),lit("\'")],{\tag("category"("Constant"))}), true, , ), - unit(rsc, prod(lex("PreStringChars"),[lit("\""),\iter-star(lex("StringCharacter")),lit("\<")],{\tag("category"("Constant"))}), true, , ), - unit(rsc, prod(lex("StringConstant"),[lit("\""),label("chars",\iter-star(lex("StringCharacter"))),lit("\"")],{\tag("category"("Constant"))}), true, , ), - unit(rsc, prod(label("stderrOutput",lex("Output")),[conditional(lit("⚠"),{\begin-of-line()}),\iter-star(\char-class([range(1,9),range(11,12),range(14,1114111)])),lit("\n")],{\tag("category"("StdErr"))}), false, , ), - unit(rsc, prod(label("stdoutOutput",lex("Output")),[conditional(lit("≫"),{\begin-of-line()}),\iter-star(\char-class([range(1,9),range(11,12),range(14,1114111)])),lit("\n")],{\tag("category"("StdOut"))}), false, , ), - unit(rsc, prod(lex(KEYWORDS_PRODUCTION_NAME),[alt({lit("lexical"),lit("loc"),lit("if"),lit("assoc"),lit("test"),lit("lrel"),lit("throws"),lit("clear"),lit("module"),lit("any"),lit("int"),lit("quit"),lit("o"),lit("anno"),lit("true"),lit("public"),lit("keyword"),lit("for"),lit("tuple"),lit("bracket"),lit("bag"),lit("it"),lit("visit"),lit("do"),lit("data"),lit("layout"),lit("bool"),lit("edit"),lit("join"),lit("is"),lit("import"),lit("view"),lit("in"),lit("rat"),lit("modules"),lit("continue"),lit("left"),lit("num"),lit("assert"),lit("throw"),lit("one"),lit("help"),lit("default"),lit("all"),lit("global"),lit("syntax"),lit("false"),lit("finally"),lit("private"),lit("mod"),lit("java"),lit("node"),lit("start"),lit("set"),lit("right"),lit("variable"),lit("map"),lit("10"),lit("on"),lit("break"),lit("dynamic"),lit("solve"),lit("fail"),lit("unimport"),lit("outermost"),lit("real"),lit("list"),lit("insert"),lit("innermost"),lit("declarations"),lit("else"),lit("rel"),lit("function"),lit("notin"),lit("filter"),lit("datetime"),lit("catch"),lit("try"),lit("renaming"),lit("tag"),lit("has"),lit("Z"),lit("when"),lit("type"),lit("append"),lit("extend"),lit("switch"),lit("void"),lit("history"),lit("T"),lit("while"),lit("str"),lit("value"),lit("undeclare"),lit("case"),lit("alias"),lit("return"),lit("0")})],{\tag("category"("keyword.control"))}), false, , ) + unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("bottom-up-break"),lit(")"),lit("("),lit("%"),lit("!:="),lit("\<==\>"),lit("\<\<="),lit("!="),lit("\>="),lit("://"),lit("non-assoc"),lit("&="),lit("\<-"),lit("*="),lit("+="),lit("top-down-break"),lit(","),lit("..."),lit("/="),lit("!\<\<"),lit("=\>"),lit("!\>\>"),lit("||"),lit("\>\>"),lit("::"),lit("&&"),lit(":="),lit("#"),lit("?="),lit("\<:"),lit("==\>"),lit("^"),lit(";"),lit("{"),lit("-="),lit("$T")})],{}), false, false, , ), + unit(rsc, prod(label("stderrOutput",lex("Output")),[conditional(lit("⚠"),{\begin-of-line()}),\iter-star(\char-class([range(1,9),range(11,12),range(14,1114111)])),lit("\n")],{\tag("category"("StdErr"))}), false, false, , ), + unit(rsc, prod(label("stdoutOutput",lex("Output")),[conditional(lit("≫"),{\begin-of-line()}),\iter-star(\char-class([range(1,9),range(11,12),range(14,1114111)])),lit("\n")],{\tag("category"("StdOut"))}), false, false, , ), + unit(rsc, prod(label("resultOutput",lex("Output")),[lit("⇨"),\iter-star(\char-class([range(1,9),range(11,12),range(14,1114111)])),lit("\n")],{\tag("category"("Result"))}), false, false, , ), + unit(rsc, prod(label("bq",lex("ConcretePart")),[lit("\\`")],{\tag("category"("MetaSkipped"))}), false, false, , ), + unit(rsc, prod(label("bs",lex("ConcretePart")),[lit("\\\\")],{\tag("category"("MetaSkipped"))}), false, false, , ), + unit(rsc, prod(label("gt",lex("ConcretePart")),[lit("\\\>")],{\tag("category"("MetaSkipped"))}), false, false, , ")),just(lit("\\\>"))>), + unit(rsc, prod(label("hole",lex("ConcretePart")),[label("hole",sort("ConcreteHole"))],{\tag("category"("MetaVariable"))}), true, true, , "))>), + unit(rsc, prod(label("lt",lex("ConcretePart")),[lit("\\\<")],{\tag("category"("MetaSkipped"))}), false, false, , ), + unit(rsc, prod(label("text",lex("ConcretePart")),[conditional(iter(\char-class([range(1,9),range(11,59),range(61,61),range(63,91),range(93,95),range(97,1114111)])),{\not-follow(\char-class([range(1,9),range(11,59),range(61,61),range(63,91),range(93,95),range(97,1114111)]))})],{\tag("category"("MetaSkipped"))}), false, false, , ), + unit(rsc, prod(lex("Char"),[\char-class([range(1,31),range(33,33),range(35,38),range(40,44),range(46,59),range(61,61),range(63,90),range(94,1114111)])],{\tag("category"("Constant"))}), false, true, , ), + unit(rsc, prod(lex("Char"),[lex("UnicodeEscape")],{\tag("category"("Constant"))}), false, false, , ), + unit(rsc, prod(lex("Char"),[lit("\\"),\char-class([range(32,32),range(34,34),range(39,39),range(45,45),range(60,60),range(62,62),range(91,93),range(98,98),range(102,102),range(110,110),range(114,114),range(116,116)])],{\tag("category"("Constant"))}), false, false, , ), + unit(rsc, prod(label("default",sort("Tag")),[lit("@"),layouts("LAYOUTLIST"),label("name",lex("Name")),layouts("LAYOUTLIST"),label("contents",lex("TagString"))],{\tag("Folded"()),\tag("category"("Comment"))}), true, true, , ), + unit(rsc, prod(label("expression",sort("Tag")),[lit("@"),layouts("LAYOUTLIST"),label("name",lex("Name")),layouts("LAYOUTLIST"),lit("="),layouts("LAYOUTLIST"),conditional(label("expression",sort("Expression")),{\not-follow(lit("@"))})],{\tag("Folded"()),\tag("category"("Comment"))}), true, true, , ), + unit(rsc, prod(lex("MidStringChars"),[lit("\>"),\iter-star(lex("StringCharacter")),lit("\<")],{\tag("category"("Constant"))}), false, true, , ")),just(lit("\<"))>), + unit(rsc, prod(lex("PostStringChars"),[lit("\>"),\iter-star(lex("StringCharacter")),lit("\"")],{\tag("category"("Constant"))}), false, true, , ")),just(lit("\""))>), + unit(rsc, prod(lex("Comment"),[lit("//"),conditional(\iter-star(\char-class([range(1,9),range(11,1114111)])),{\not-follow(\char-class([range(9,9),range(13,13),range(32,32),range(160,160),range(5760,5760),range(8192,8202),range(8239,8239),range(8287,8287),range(12288,12288)])),\end-of-line()})],{\tag("category"("Comment"))}), false, false, , ), + unit(rsc, prod(lex("Comment"),[lit("/*"),\iter-star(alt({\char-class([range(1,41),range(43,1114111)]),conditional(lit("*"),{\not-follow(lit("/"))})})),lit("*/")],{\tag("category"("Comment"))}), false, true, , ), + unit(rsc, prod(lex("CaseInsensitiveStringConstant"),[lit("\'"),label("chars",\iter-star(lex("StringCharacter"))),lit("\'")],{\tag("category"("Constant"))}), false, true, , ), + unit(rsc, prod(lex("PreStringChars"),[lit("\""),\iter-star(lex("StringCharacter")),lit("\<")],{\tag("category"("Constant"))}), false, true, , ), + unit(rsc, prod(lex("StringConstant"),[lit("\""),label("chars",\iter-star(lex("StringCharacter"))),lit("\"")],{\tag("category"("Constant"))}), false, true, , ), + unit(rsc, prod(lex(KEYWORDS_PRODUCTION_NAME),[alt({lit("lexical"),lit("loc"),lit("if"),lit("assoc"),lit("test"),lit("lrel"),lit("throws"),lit("clear"),lit("module"),lit("any"),lit("int"),lit("quit"),lit("o"),lit("anno"),lit("true"),lit("public"),lit("keyword"),lit("for"),lit("tuple"),lit("bracket"),lit("bag"),lit("it"),lit("visit"),lit("do"),lit("data"),lit("layout"),lit("bool"),lit("edit"),lit("join"),lit("is"),lit("import"),lit("view"),lit("in"),lit("rat"),lit("modules"),lit("continue"),lit("left"),lit("num"),lit("assert"),lit("throw"),lit("one"),lit("help"),lit("default"),lit("all"),lit("global"),lit("syntax"),lit("false"),lit("finally"),lit("private"),lit("mod"),lit("java"),lit("node"),lit("start"),lit("set"),lit("right"),lit("variable"),lit("map"),lit("10"),lit("on"),lit("break"),lit("dynamic"),lit("solve"),lit("fail"),lit("unimport"),lit("outermost"),lit("real"),lit("list"),lit("insert"),lit("innermost"),lit("declarations"),lit("else"),lit("rel"),lit("function"),lit("notin"),lit("filter"),lit("datetime"),lit("catch"),lit("try"),lit("renaming"),lit("tag"),lit("has"),lit("Z"),lit("when"),lit("type"),lit("append"),lit("extend"),lit("switch"),lit("void"),lit("history"),lit("T"),lit("while"),lit("str"),lit("value"),lit("undeclare"),lit("case"),lit("alias"),lit("return"),lit("0")})],{\tag("category"("keyword.control"))}), false, false, , ) ]; test bool analyzeTest() = doAnalyzeTest(rsc, units); -test bool transformTest() = doTransformTest(units, <21, 6, 0>); +test bool transformTest() = doTransformTest(units, <20, 8, 0>); diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalClass.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalClass.rsc index e7f7738..1dde5fb 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalClass.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalClass.rsc @@ -40,11 +40,11 @@ lexical UnicodeEscape Grammar rsc = preprocess(grammar(#Class)); list[ConversionUnit] units = [ - unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("-"),lit(")"),lit("("),lit("!"),lit("||"),lit("&&")})],{}), false, , ), - unit(rsc, prod(lex("Char"),[\char-class([range(1,31),range(33,33),range(35,38),range(40,44),range(46,59),range(61,61),range(63,90),range(94,1114111)])],{\tag("category"("Constant"))}), true, , ), - unit(rsc, prod(lex("Char"),[lex("UnicodeEscape")],{\tag("category"("Constant"))}), false, , ), - unit(rsc, prod(lex("Char"),[lit("\\"),\char-class([range(32,32),range(34,34),range(39,39),range(45,45),range(60,60),range(62,62),range(91,93),range(98,98),range(102,102),range(110,110),range(114,114),range(116,116)])],{\tag("category"("Constant"))}), false, , ), - unit(rsc, prod(lex(KEYWORDS_PRODUCTION_NAME),[alt({lit("10"),lit("0")})],{\tag("category"("keyword.control"))}), false, , ) + unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("-"),lit(")"),lit("("),lit("!"),lit("||"),lit("&&")})],{}), false, false, , ), + unit(rsc, prod(lex("Char"),[\char-class([range(1,31),range(33,33),range(35,38),range(40,44),range(46,59),range(61,61),range(63,90),range(94,1114111)])],{\tag("category"("Constant"))}), false, true, , ), + unit(rsc, prod(lex("Char"),[lex("UnicodeEscape")],{\tag("category"("Constant"))}), false, false, , ), + unit(rsc, prod(lex("Char"),[lit("\\"),\char-class([range(32,32),range(34,34),range(39,39),range(45,45),range(60,60),range(62,62),range(91,93),range(98,98),range(102,102),range(110,110),range(114,114),range(116,116)])],{\tag("category"("Constant"))}), false, false, , ), + unit(rsc, prod(lex(KEYWORDS_PRODUCTION_NAME),[alt({lit("10"),lit("0")})],{\tag("category"("keyword.control"))}), false, false, , ) ]; test bool analyzeTest() = doAnalyzeTest(rsc, units); diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalComment.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalComment.rsc index 9ed9f83..703b9f1 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalComment.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalComment.rsc @@ -18,8 +18,8 @@ lexical Comment Grammar rsc = preprocess(grammar(#Comment)); list[ConversionUnit] units = [ - unit(rsc, prod(lex("Comment"),[lit("//"),conditional(\iter-star(\char-class([range(1,9),range(11,1114111)])),{\not-follow(\char-class([range(9,9),range(13,13),range(32,32),range(160,160),range(5760,5760),range(8192,8202),range(8239,8239),range(8287,8287),range(12288,12288)])),\end-of-line()})],{\tag("category"("Comment"))}), false, , ), - unit(rsc, prod(lex("Comment"),[lit("/*"),\iter-star(alt({\char-class([range(1,41),range(43,1114111)]),conditional(lit("*"),{\not-follow(lit("/"))})})),lit("*/")],{\tag("category"("Comment"))}), true, , ) + unit(rsc, prod(lex("Comment"),[lit("//"),conditional(\iter-star(\char-class([range(1,9),range(11,1114111)])),{\not-follow(\char-class([range(9,9),range(13,13),range(32,32),range(160,160),range(5760,5760),range(8192,8202),range(8239,8239),range(8287,8287),range(12288,12288)])),\end-of-line()})],{\tag("category"("Comment"))}), false, false, , ), + unit(rsc, prod(lex("Comment"),[lit("/*"),\iter-star(alt({\char-class([range(1,41),range(43,1114111)]),conditional(lit("*"),{\not-follow(lit("/"))})})),lit("*/")],{\tag("category"("Comment"))}), false, true, , ) ]; test bool analyzeTest() = doAnalyzeTest(rsc, units); diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalConcrete.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalConcrete.rsc index af7ce68..ef1efb6 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalConcrete.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalConcrete.rsc @@ -30,13 +30,13 @@ syntax ConcreteHole Grammar rsc = preprocess(grammar(#Concrete)); list[ConversionUnit] units = [ - unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("\n"),lit("\'")})],{}), false, , ), - unit(rsc, prod(label("bq",lex("ConcretePart")),[lit("\\`")],{\tag("category"("MetaSkipped"))}), false, , ), - unit(rsc, prod(label("bs",lex("ConcretePart")),[lit("\\\\")],{\tag("category"("MetaSkipped"))}), false, , ), - unit(rsc, prod(label("gt",lex("ConcretePart")),[lit("\\\>")],{\tag("category"("MetaSkipped"))}), false, , ")),just(lit("\\\>"))>), - unit(rsc, prod(label("hole",lex("ConcretePart")),[label("hole",sort("ConcreteHole"))],{\tag("category"("MetaVariable"))}), false, , "))>), - unit(rsc, prod(label("lt",lex("ConcretePart")),[lit("\\\<")],{\tag("category"("MetaSkipped"))}), false, , ), - unit(rsc, prod(label("text",lex("ConcretePart")),[conditional(iter(\char-class([range(1,9),range(11,59),range(61,61),range(63,91),range(93,95),range(97,1114111)])),{\not-follow(\char-class([range(1,9),range(11,59),range(61,61),range(63,91),range(93,95),range(97,1114111)]))})],{\tag("category"("MetaSkipped"))}), false, , ) + unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("\n"),lit("\'")})],{}), false, false, , ), + unit(rsc, prod(label("bq",lex("ConcretePart")),[lit("\\`")],{\tag("category"("MetaSkipped"))}), false, false, , ), + unit(rsc, prod(label("bs",lex("ConcretePart")),[lit("\\\\")],{\tag("category"("MetaSkipped"))}), false, false, , ), + unit(rsc, prod(label("gt",lex("ConcretePart")),[lit("\\\>")],{\tag("category"("MetaSkipped"))}), false, false, , ")),just(lit("\\\>"))>), + unit(rsc, prod(label("hole",lex("ConcretePart")),[label("hole",sort("ConcreteHole"))],{\tag("category"("MetaVariable"))}), false, false, , "))>), + unit(rsc, prod(label("lt",lex("ConcretePart")),[lit("\\\<")],{\tag("category"("MetaSkipped"))}), false, false, , ), + unit(rsc, prod(label("text",lex("ConcretePart")),[conditional(iter(\char-class([range(1,9),range(11,59),range(61,61),range(63,91),range(93,95),range(97,1114111)])),{\not-follow(\char-class([range(1,9),range(11,59),range(61,61),range(63,91),range(93,95),range(97,1114111)]))})],{\tag("category"("MetaSkipped"))}), false, false, , ) ]; test bool analyzeTest() = doAnalyzeTest(rsc, units); diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalStringConstant.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalStringConstant.rsc index 482a873..d6af0fc 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalStringConstant.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalStringConstant.rsc @@ -30,9 +30,9 @@ lexical UnicodeEscape Grammar rsc = preprocess(grammar(#StringConstant)); list[ConversionUnit] units = [ - unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("\n"),lit("\'"),lit("\\")})],{}), false, , ), - unit(rsc, prod(lex("StringConstant"),[lit("\""),label("chars",\iter-star(lex("StringCharacter"))),lit("\"")],{\tag("category"("Constant"))}), true, , ), - unit(rsc, prod(lex(KEYWORDS_PRODUCTION_NAME),[alt({lit("10"),lit("0")})],{\tag("category"("keyword.control"))}), false, , ) + unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("\n"),lit("\'"),lit("\\")})],{}), false, false, , ), + unit(rsc, prod(lex("StringConstant"),[lit("\""),label("chars",\iter-star(lex("StringCharacter"))),lit("\"")],{\tag("category"("Constant"))}), false, true, , ), + unit(rsc, prod(lex(KEYWORDS_PRODUCTION_NAME),[alt({lit("10"),lit("0")})],{\tag("category"("keyword.control"))}), false, false, , ) ]; test bool analyzeTest() = doAnalyzeTest(rsc, units); diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalStringLiteral.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalStringLiteral.rsc index 172e1d1..fb39460 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalStringLiteral.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalStringLiteral.rsc @@ -69,12 +69,12 @@ syntax Expression Grammar rsc = preprocess(grammar(#StringLiteral)); list[ConversionUnit] units = [ - unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("-"),lit(","),lit(")"),lit("("),lit("\n"),lit("\'"),lit("\<="),lit("\\"),lit("\>="),lit(";"),lit("{")})],{}), false, , ), - unit(rsc, prod(lex("PostStringChars"),[lit("\>"),\iter-star(lex("StringCharacter")),lit("\"")],{\tag("category"("Constant"))}), true, , ")),just(lit("\""))>), - unit(rsc, prod(lex("MidStringChars"),[lit("\>"),\iter-star(lex("StringCharacter")),lit("\<")],{\tag("category"("Constant"))}), true, , ")),just(lit("\<"))>), - unit(rsc, prod(lex("PreStringChars"),[lit("\""),\iter-star(lex("StringCharacter")),lit("\<")],{\tag("category"("Constant"))}), true, , ), - unit(rsc, prod(lex("StringConstant"),[lit("\""),label("chars",\iter-star(lex("StringCharacter"))),lit("\"")],{\tag("category"("Constant"))}), true, , ), - unit(rsc, prod(lex(KEYWORDS_PRODUCTION_NAME),[alt({lit("for"),lit("do"),lit("if"),lit("10"),lit("else"),lit("while"),lit("0")})],{\tag("category"("keyword.control"))}), false, , ) + unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("-"),lit(","),lit(")"),lit("("),lit("\n"),lit("\'"),lit("\<="),lit("\\"),lit("\>="),lit(";"),lit("{")})],{}), false, false, , ), + unit(rsc, prod(lex("PostStringChars"),[lit("\>"),\iter-star(lex("StringCharacter")),lit("\"")],{\tag("category"("Constant"))}), false, true, , ")),just(lit("\""))>), + unit(rsc, prod(lex("MidStringChars"),[lit("\>"),\iter-star(lex("StringCharacter")),lit("\<")],{\tag("category"("Constant"))}), false, true, , ")),just(lit("\<"))>), + unit(rsc, prod(lex("PreStringChars"),[lit("\""),\iter-star(lex("StringCharacter")),lit("\<")],{\tag("category"("Constant"))}), false, true, , ), + unit(rsc, prod(lex("StringConstant"),[lit("\""),label("chars",\iter-star(lex("StringCharacter"))),lit("\"")],{\tag("category"("Constant"))}), false, true, , ), + unit(rsc, prod(lex(KEYWORDS_PRODUCTION_NAME),[alt({lit("for"),lit("do"),lit("if"),lit("10"),lit("else"),lit("while"),lit("0")})],{\tag("category"("keyword.control"))}), false, false, , ) ]; test bool analyzeTest() = doAnalyzeTest(rsc, units); diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/Walkthrough.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/Walkthrough.rsc index d5f443c..5281407 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/Walkthrough.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/Walkthrough.rsc @@ -288,13 +288,13 @@ lexical Boolean Grammar rsc = preprocess(grammar(#Value)); list[ConversionUnit] units = [ - unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit(","),lit("+"),lit("}"),lit("|"),lit("?"),lit("{"),lit("://")})],{}), false, , ), - unit(rsc, prod(label("line",lex("Comment")),[lit("//"),conditional(\iter-star(alt({lex("Blank"),lex("Alnum")})),{\end-of-line()})],{\tag("category"("comment.line.double-slash"))}), false, , ), - unit(rsc, prod(label("block",lex("Comment")),[lit("/*"),\iter-star(alt({lex("Alnum"),lex("Space")})),lit("*/")],{\tag("category"("comment.block"))}), true, , ), - unit(rsc, prod(label("alnum",lex("RegExpBody")),[conditional(iter(lex("Alnum")),{\not-follow(\char-class([range(48,57),range(65,90),range(97,122)]))})],{\tag("category"("markup.italic"))}), false, , ), - unit(rsc, prod(lex("String"),[lit("\""),\iter-star(lex("Alnum")),lit("\"")],{\tag("category"("string.quoted.double"))}), false, , ), - unit(rsc, prod(lex("Number"),[conditional(iter(lex("Digit")),{\not-follow(\char-class([range(48,57)]))})],{\tag("category"("constant.numeric"))}), false, , ), - unit(rsc, prod(lex(KEYWORDS_PRODUCTION_NAME),[alt({lit("true"),lit("false")})],{\tag("category"("keyword.control"))}), false, , ) + unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit(","),lit("+"),lit("}"),lit("|"),lit("?"),lit("{"),lit("://")})],{}), false, false, , ), + unit(rsc, prod(label("line",lex("Comment")),[lit("//"),conditional(\iter-star(alt({lex("Blank"),lex("Alnum")})),{\end-of-line()})],{\tag("category"("comment.line.double-slash"))}), false, false, , ), + unit(rsc, prod(label("block",lex("Comment")),[lit("/*"),\iter-star(alt({lex("Alnum"),lex("Space")})),lit("*/")],{\tag("category"("comment.block"))}), false, true, , ), + unit(rsc, prod(label("alnum",lex("RegExpBody")),[conditional(iter(lex("Alnum")),{\not-follow(\char-class([range(48,57),range(65,90),range(97,122)]))})],{\tag("category"("markup.italic"))}), false, false, , ), + unit(rsc, prod(lex("String"),[lit("\""),\iter-star(lex("Alnum")),lit("\"")],{\tag("category"("string.quoted.double"))}), false, false, , ), + unit(rsc, prod(lex("Number"),[conditional(iter(lex("Digit")),{\not-follow(\char-class([range(48,57)]))})],{\tag("category"("constant.numeric"))}), false, false, , ), + unit(rsc, prod(lex(KEYWORDS_PRODUCTION_NAME),[alt({lit("true"),lit("false")})],{\tag("category"("keyword.control"))}), false, false, , ) ]; test bool analyzeTest() = doAnalyzeTest(rsc, units); From c310743a633616cdf6664842e7bab4eefe82b0c5 Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Fri, 6 Sep 2024 14:19:57 +0200 Subject: [PATCH 13/17] Add new test module (`RascalTag`) to test support for recursive productions --- .../textmate/conversiontests/RascalTag.rsc | 49 +++++++++++++++++++ .../textmate/conversiontests/RascalTag.test | 41 ++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalTag.rsc create mode 100644 rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalTag.test diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalTag.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalTag.rsc new file mode 100644 index 0000000..5658888 --- /dev/null +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalTag.rsc @@ -0,0 +1,49 @@ +module lang::textmate::conversiontests::RascalTag + +import Grammar; +import ParseTree; +import util::Maybe; + +import lang::textmate::Conversion; +import lang::textmate::ConversionConstants; +import lang::textmate::ConversionTests; +import lang::textmate::ConversionUnit; + +// Based on `lang::rascal::\syntax::Rascal` + +syntax Tag + = @Folded @category="Comment" \default : "@" Name name TagString contents + | @Folded @category="Comment" empty : "@" Name name + | @Folded @category="Comment" expression: "@" Name name "=" Expression expression !>> "@" + ; + +lexical Name + = ([A-Z a-z _] !<< [A-Z _ a-z] [0-9 A-Z _ a-z]* !>> [0-9 A-Z _ a-z]) /* \ RascalKeywords */ + | [\\] [A-Z _ a-z] [\- 0-9 A-Z _ a-z]* !>> [\- 0-9 A-Z _ a-z] + ; + +lexical TagString + = "\\" !<< "{" ( ![{}] | ("\\" [{}]) | TagString)* contents "\\" !<< "}"; + +syntax Expression + = nonEmptyBlock: "{" Statement+ statements "}"; + +syntax Statement + = emptyStatement: ";"; + +lexical LAYOUT + = [\u0009-\u000D \u0020 \u0085 \u00A0 \u1680 \u180E \u2000-\u200A \u2028 \u2029 \u202F \u205F \u3000]; + +layout LAYOUTLIST + = LAYOUT* !>> [\u0009-\u000D \u0020 \u0085 \u00A0 \u1680 \u180E \u2000-\u200A \u2028 \u2029 \u202F \u205F \u3000] /* !>> "//" !>> "/*" */; + +Grammar rsc = preprocess(grammar(#Tag)); + +list[ConversionUnit] units = [ + unit(rsc, prod(lex(DELIMITERS_PRODUCTION_NAME),[alt({lit("="),lit("\\"),lit(";"),lit("{")})],{}), false, false, , ), + unit(rsc, prod(label("default",sort("Tag")),[lit("@"),layouts("LAYOUTLIST"),label("name",lex("Name")),layouts("LAYOUTLIST"),label("contents",lex("TagString"))],{\tag("Folded"()),\tag("category"("Comment"))}), true, true, , ), + unit(rsc, prod(label("expression",sort("Tag")),[lit("@"),layouts("LAYOUTLIST"),label("name",lex("Name")),layouts("LAYOUTLIST"),lit("="),layouts("LAYOUTLIST"),conditional(label("expression",sort("Expression")),{\not-follow(lit("@"))})],{\tag("Folded"()),\tag("category"("Comment"))}), false, true, , ) +]; + +test bool analyzeTest() = doAnalyzeTest(rsc, units); +test bool transformTest() = doTransformTest(units, <2, 1, 0>, name = "RascalTag"); diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalTag.test b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalTag.test new file mode 100644 index 0000000..c75087b --- /dev/null +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/conversiontests/RascalTag.test @@ -0,0 +1,41 @@ +# SYNTAX TEST "RascalTag" + + @memo foo +# ^^^^^ Comment +# ^^^ -Comment + + @synopsis{foo} bar +# ^^^^^^^^^^^^^^ Comment +# ^^^^ -Comment + + @synopsis{ +# ^^^^^^^^^^ Comment + foo bar +# ^^^^^^^^^ Comment + } baz +# ^ Comment +# ^^^^ -Comment + + @category={;} +# ^^^^^^^^^^^^^ Comment + +### TODO: The following tests show that, currently, no newlines are allowed +### between `@` and the tag name. Starting from commit 6ba991d, there is a TODO +### comment in module `lang::textmate::Conversion` that outlines an approach to +### improve this, but "at the cost of much more complicated rule generation and +### generated rules" + + @ +# ^ -Comment + memo +# ^^^^ -Comment + + @ +# ^ -Comment + synopsis{foo} +# ^^^^^^^^^^^^^ -Comment + + @ +# ^ -Comment + category={;} +# ^^^^^^^^^^^^^ -Comment \ No newline at end of file From 7fa2df69ab6ab4a4b2be2cfa5a7889acc96b2077 Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Mon, 9 Sep 2024 08:59:52 +0200 Subject: [PATCH 14/17] Add a few more comments --- .../src/main/rascal/lang/textmate/Conversion.rsc | 5 +++++ .../main/rascal/lang/textmate/ConversionUnit.rsc | 14 ++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc index 4e922b8..bfd20f1 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc @@ -302,6 +302,11 @@ private list[ConversionUnit] addInnerRules(list[ConversionUnit] units) { // "patterns": [{ "include": "#foo.$" }, { "match": "[a-z]+" }] // } // ``` + // + // Note: This alternative approach would likely render the + // present distinction between the "simple case" and the + // "complex case" unneeded, so in that sense, rule generation + // would actually become simpler. } } } diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionUnit.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionUnit.rsc index 6efe8d6..519eafe 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionUnit.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionUnit.rsc @@ -176,8 +176,18 @@ private bool isStrictPrefix([head1, *tail1], [head2, *tail2]) @synopsis{ Representation of a decomposition of a list of units (i.e., the lists of - symbols of their productions) into their maximally common non-recursive - prefix and their minimally disjoint suffixes. See also function `decompose`. + symbols of their productions) into their maximally common prefix + (non-recursive) and their minimally disjoint suffixes. See also function + `decompose`. +} + +@description{ + For instance, consider the following lists of symbols: + - `[lit("foo"), lit("bar"), lit("baz")]`; + - `[lit("foo"), lit("bar"), lit("qux"), lit("quux")]`. + + The maximally common prefix is `[lit("foo"), lit("bar")]`. The minimally + disjoint suffixes are `[lit("baz")]` and `[lit("qux"), lit("quux")]]`. } alias Decomposition = tuple[ From df909e5462d5523aca17b903b9056457a0ea01c3 Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Mon, 9 Sep 2024 09:34:46 +0200 Subject: [PATCH 15/17] Add a few more comments --- .../main/rascal/lang/textmate/Conversion.rsc | 18 ++++++++++++++++++ .../rascal/lang/textmate/ConversionUnit.rsc | 6 +++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc index bfd20f1..af75c9e 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc @@ -250,6 +250,11 @@ private list[ConversionUnit] addInnerRules(list[ConversionUnit] units) { patterns = for (suffix <- decomposition.suffixes) { if (just(Symbol begin) := getInnerDelimiterPair(rsc, suffix[0], getOnlyFirst = true).begin) { if (just(Symbol end) := getInnerDelimiterPair(rsc, suffix[-1], getOnlyFirst = true).end) { + // If the suffix has has both a `begin` delimiter + // and an `end` delimiter, then generate a + // begin/end pattern to highlight these delimiters + // and all content in between. + set[Segment] segs = getSegments(rsc, suffix); segs = {removeBeginEnd(seg, {begin}, {end}) | seg <- segs}; @@ -260,9 +265,22 @@ private list[ConversionUnit] addInnerRules(list[ConversionUnit] units) { } else { + // If the suffix has a `begin` delimiter, but not + // an `end` delimiter, then generate a match pattern + // just to highlight that `begin` delimiter. Ignore + // the remainder of the suffix (because it's + // recursive, so no regular expression can be + // generated for it). append toTmRule(toRegExp(rsc, [begin], {t})); } } + + else { + // If the suffix doesn't have a `begin` delimiter, then + // ignore it (because it's recursive, so no regular + // expression can be generated for it). + ; + } } TmRule r = toTmRule(reBegin, reEnd, patterns); diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionUnit.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionUnit.rsc index 519eafe..9cfe506 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionUnit.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionUnit.rsc @@ -175,9 +175,9 @@ private bool isStrictPrefix([head1, *tail1], [head2, *tail2]) = head1 == head2 && isStrictPrefix(tail1, tail2); @synopsis{ - Representation of a decomposition of a list of units (i.e., the lists of - symbols of their productions) into their maximally common prefix - (non-recursive) and their minimally disjoint suffixes. See also function + Representation of a *decomposition* of a list of units (i.e., the lists of + symbols of their productions) into their maximally common *prefix* + (non-recursive) and their minimally disjoint *suffixes*. See also function `decompose`. } From 546bb1393f0197bea929a5688056df819779f9ba Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Mon, 9 Sep 2024 09:38:38 +0200 Subject: [PATCH 16/17] Update generated TextMate grammar for Rascal/Pico --- .../syntaxes/rascal.tmLanguage.json | 497 +++++++++++------- 1 file changed, 298 insertions(+), 199 deletions(-) diff --git a/vscode-extension/syntaxes/rascal.tmLanguage.json b/vscode-extension/syntaxes/rascal.tmLanguage.json index c462663..2d2d627 100644 --- a/vscode-extension/syntaxes/rascal.tmLanguage.json +++ b/vscode-extension/syntaxes/rascal.tmLanguage.json @@ -9,6 +9,79 @@ } } }, + "/inner/multi/midstringchars,poststringchars": { + "begin": "(\\>)", + "end": "((?:\\\")|(?:\\<))", + "patterns": [ + { + "match": "((?:\\\\)U(?:(?:\\b10\\b)|(?:(?:\\b0\\b)[0-9A-Fa-f]))[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])", + "captures": { + "1": { + "name": "string.quoted.double" + } + } + }, + { + "match": "((?:\\\\)u[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])", + "captures": { + "1": { + "name": "string.quoted.double" + } + } + }, + { + "match": "((?:\\\\)a[0-7][0-9A-Fa-f])", + "captures": { + "1": { + "name": "string.quoted.double" + } + } + }, + { + "match": "((?:\\\\)[\\\"\\'\\<\\>\\\\bfnrt])", + "captures": { + "1": { + "name": "string.quoted.double" + } + } + }, + { + "match": "((?:\\n)[\\t\\x{20}\\x{A0}\\x{1680}\\x{2000}-\\x{200A}\\x{202F}\\x{205F}\\x{3000}]*?(?:\\'))", + "captures": { + "1": { + "name": "string.quoted.double" + } + } + }, + { + "match": "([\\x{01}-\\!\\#-\\&\\(-\\;\\=\\?-\\[\\]-\\x{10FFFF}])", + "captures": { + "1": { + "name": "string.quoted.double" + } + } + }, + { + "match": "([\\x{01}-\\x{10FFFF}])", + "captures": { + "1": { + "name": "string.quoted.double" + } + } + } + ], + "endCaptures": { + "1": { + "name": "string.quoted.double" + } + }, + "name": "/inner/multi/midstringchars,poststringchars", + "beginCaptures": { + "1": { + "name": "string.quoted.double" + } + } + }, "/inner/multi/comment.2": { "begin": "(\\/\\*)", "end": "(\\*\\/)", @@ -42,15 +115,6 @@ } } }, - "/inner/single/concretepart.bq": { - "match": "(\\\\\\`)", - "name": "/inner/single/concretepart.bq", - "captures": { - "1": { - "name": "string" - } - } - }, "/inner/single/literal.regExp": { "match": "((?<=(?:[\\t-\\r\\x{20}\\x{85}\\x{A0}\\x{1680}\\x{180E}\\x{2000}-\\x{200A}\\x{2028}-\\x{2029}\\x{202F}\\x{205F}\\x{3000}]|(?:\\/\\/)|(?:(?:^))|(?:\\/\\*)))(?:(?:\\/)(?:(?:(?:\\\\)(?![\\/\\<\\>\\\\]))|[\\x{01}-\\.0-\\;\\=\\?-\\[\\]-\\x{10FFFF}]|(?:(?:\\\\)[\\/\\<\\>\\\\])|(?:(?:\\<)(?:(?=(?(?:(?:(?.*)$)(?!(?:(?:(?:\\bbreak\\b)|(?:\\bfor\\b)|(?:\\bstr\\b)|(?:\\bnode\\b)|(?:\\btuple\\b)|(?:\\bsolve\\b)|(?:\\brat\\b)|(?:\\bdynamic\\b)|(?:\\bassoc\\b)|(?:\\bbag\\b)|(?:\\bset\\b)|(?:\\bo\\b)|(?:\\bstart\\b)|(?:(?:\\bint\\b)|(?:\\blrel\\b)|(?:\\bbool\\b)|(?:\\btype\\b)|(?:\\bset\\b)|(?:\\bbag\\b)|(?:\\brat\\b)|(?:\\breal\\b)|(?:\\bnode\\b)|(?:\\btuple\\b)|(?:\\bmap\\b)|(?:\\bloc\\b)|(?:\\bnum\\b)|(?:\\blist\\b)|(?:\\bvalue\\b)|(?:\\bvoid\\b)|(?:\\brel\\b)|(?:\\bdatetime\\b)|(?:\\bstr\\b))|(?:\\blrel\\b)|(?:\\bcontinue\\b)|(?:\\bbracket\\b)|(?:\\brel\\b)|(?:\\blist\\b)|(?:\\btest\\b)|(?:\\breturn\\b)|(?:\\bfalse\\b)|(?:\\bjoin\\b)|(?:\\belse\\b)|(?:\\bit\\b)|(?:\\bin\\b)|(?:\\bif\\b)|(?:non\\-assoc)|(?:\\blexical\\b)|(?:\\bvalue\\b)|(?:\\bmap\\b)|(?:\\bvisit\\b)|(?:\\ball\\b)|(?:\\btry\\b)|(?:\\bprivate\\b)|(?:\\btrue\\b)|(?:\\bfinally\\b)|(?:\\breal\\b)|(?:\\bvoid\\b)|(?:\\bkeyword\\b)|(?:\\bany\\b)|(?:\\bone\\b)|(?:\\bmodule\\b)|(?:\\bpublic\\b)|(?:\\bthrows\\b)|(?:\\balias\\b)|(?:\\bdefault\\b)|(?:\\bcatch\\b)|(?:\\binsert\\b)|(?:\\banno\\b)|(?:\\bthrow\\b)|(?:\\bbool\\b)|(?:\\bswitch\\b)|(?:\\btype\\b)|(?:\\bwhile\\b)|(?:\\bnotin\\b)|(?:\\bcase\\b)|(?:\\blayout\\b)|(?:\\bmod\\b)|(?:\\bextend\\b)|(?:\\bappend\\b)|(?:\\bfail\\b)|(?:\\bdatetime\\b)|(?:\\bfilter\\b)|(?:\\bloc\\b)|(?:\\bassert\\b)|(?:\\bdata\\b)|(?:\\bimport\\b)|(?:\\bnum\\b)|(?:\\btag\\b)|(?:\\bsyntax\\b)|(?:\\bint\\b)))\\k$)\\k|(?:(?:\\\\)[A-Z\\_a-z](?:[\\-0-9A-Z\\_a-z]*?(?![\\-0-9A-Z\\_a-z]))))(?:\\>))|(?:(?:\\<)(?:(?=(?(?:(?:(?.*)$)(?!(?:(?:(?:\\bbreak\\b)|(?:\\bfor\\b)|(?:\\bstr\\b)|(?:\\bnode\\b)|(?:\\btuple\\b)|(?:\\bsolve\\b)|(?:\\brat\\b)|(?:\\bdynamic\\b)|(?:\\bassoc\\b)|(?:\\bbag\\b)|(?:\\bset\\b)|(?:\\bo\\b)|(?:\\bstart\\b)|(?:(?:\\bint\\b)|(?:\\blrel\\b)|(?:\\bbool\\b)|(?:\\btype\\b)|(?:\\bset\\b)|(?:\\bbag\\b)|(?:\\brat\\b)|(?:\\breal\\b)|(?:\\bnode\\b)|(?:\\btuple\\b)|(?:\\bmap\\b)|(?:\\bloc\\b)|(?:\\bnum\\b)|(?:\\blist\\b)|(?:\\bvalue\\b)|(?:\\bvoid\\b)|(?:\\brel\\b)|(?:\\bdatetime\\b)|(?:\\bstr\\b))|(?:\\blrel\\b)|(?:\\bcontinue\\b)|(?:\\bbracket\\b)|(?:\\brel\\b)|(?:\\blist\\b)|(?:\\btest\\b)|(?:\\breturn\\b)|(?:\\bfalse\\b)|(?:\\bjoin\\b)|(?:\\belse\\b)|(?:\\bit\\b)|(?:\\bin\\b)|(?:\\bif\\b)|(?:non\\-assoc)|(?:\\blexical\\b)|(?:\\bvalue\\b)|(?:\\bmap\\b)|(?:\\bvisit\\b)|(?:\\ball\\b)|(?:\\btry\\b)|(?:\\bprivate\\b)|(?:\\btrue\\b)|(?:\\bfinally\\b)|(?:\\breal\\b)|(?:\\bvoid\\b)|(?:\\bkeyword\\b)|(?:\\bany\\b)|(?:\\bone\\b)|(?:\\bmodule\\b)|(?:\\bpublic\\b)|(?:\\bthrows\\b)|(?:\\balias\\b)|(?:\\bdefault\\b)|(?:\\bcatch\\b)|(?:\\binsert\\b)|(?:\\banno\\b)|(?:\\bthrow\\b)|(?:\\bbool\\b)|(?:\\bswitch\\b)|(?:\\btype\\b)|(?:\\bwhile\\b)|(?:\\bnotin\\b)|(?:\\bcase\\b)|(?:\\blayout\\b)|(?:\\bmod\\b)|(?:\\bextend\\b)|(?:\\bappend\\b)|(?:\\bfail\\b)|(?:\\bdatetime\\b)|(?:\\bfilter\\b)|(?:\\bloc\\b)|(?:\\bassert\\b)|(?:\\bdata\\b)|(?:\\bimport\\b)|(?:\\bnum\\b)|(?:\\btag\\b)|(?:\\bsyntax\\b)|(?:\\bint\\b)))\\k$)\\k|(?:(?:\\\\)[A-Z\\_a-z](?:[\\-0-9A-Z\\_a-z]*?(?![\\-0-9A-Z\\_a-z]))))(?:\\:)(?:(?:(?:\\\\)[\\/\\<\\>\\\\])|(?:(?:\\<)(?:(?=(?(?:(?:(?.*)$)(?!(?:(?:(?:\\bbreak\\b)|(?:\\bfor\\b)|(?:\\bstr\\b)|(?:\\bnode\\b)|(?:\\btuple\\b)|(?:\\bsolve\\b)|(?:\\brat\\b)|(?:\\bdynamic\\b)|(?:\\bassoc\\b)|(?:\\bbag\\b)|(?:\\bset\\b)|(?:\\bo\\b)|(?:\\bstart\\b)|(?:(?:\\bint\\b)|(?:\\blrel\\b)|(?:\\bbool\\b)|(?:\\btype\\b)|(?:\\bset\\b)|(?:\\bbag\\b)|(?:\\brat\\b)|(?:\\breal\\b)|(?:\\bnode\\b)|(?:\\btuple\\b)|(?:\\bmap\\b)|(?:\\bloc\\b)|(?:\\bnum\\b)|(?:\\blist\\b)|(?:\\bvalue\\b)|(?:\\bvoid\\b)|(?:\\brel\\b)|(?:\\bdatetime\\b)|(?:\\bstr\\b))|(?:\\blrel\\b)|(?:\\bcontinue\\b)|(?:\\bbracket\\b)|(?:\\brel\\b)|(?:\\blist\\b)|(?:\\btest\\b)|(?:\\breturn\\b)|(?:\\bfalse\\b)|(?:\\bjoin\\b)|(?:\\belse\\b)|(?:\\bit\\b)|(?:\\bin\\b)|(?:\\bif\\b)|(?:non\\-assoc)|(?:\\blexical\\b)|(?:\\bvalue\\b)|(?:\\bmap\\b)|(?:\\bvisit\\b)|(?:\\ball\\b)|(?:\\btry\\b)|(?:\\bprivate\\b)|(?:\\btrue\\b)|(?:\\bfinally\\b)|(?:\\breal\\b)|(?:\\bvoid\\b)|(?:\\bkeyword\\b)|(?:\\bany\\b)|(?:\\bone\\b)|(?:\\bmodule\\b)|(?:\\bpublic\\b)|(?:\\bthrows\\b)|(?:\\balias\\b)|(?:\\bdefault\\b)|(?:\\bcatch\\b)|(?:\\binsert\\b)|(?:\\banno\\b)|(?:\\bthrow\\b)|(?:\\bbool\\b)|(?:\\bswitch\\b)|(?:\\btype\\b)|(?:\\bwhile\\b)|(?:\\bnotin\\b)|(?:\\bcase\\b)|(?:\\blayout\\b)|(?:\\bmod\\b)|(?:\\bextend\\b)|(?:\\bappend\\b)|(?:\\bfail\\b)|(?:\\bdatetime\\b)|(?:\\bfilter\\b)|(?:\\bloc\\b)|(?:\\bassert\\b)|(?:\\bdata\\b)|(?:\\bimport\\b)|(?:\\bnum\\b)|(?:\\btag\\b)|(?:\\bsyntax\\b)|(?:\\bint\\b)))\\k$)\\k|(?:(?:\\\\)[A-Z\\_a-z](?:[\\-0-9A-Z\\_a-z]*?(?![\\-0-9A-Z\\_a-z]))))(?:\\>))|(?:(?:\\\\)(?![\\<\\>\\\\]))|[\\x{01}-\\.0-\\;\\=\\?-\\[\\]-\\x{10FFFF}])*?(?:\\>)))*?(?:\\/)[dims]*?))", "name": "/inner/single/literal.regExp", @@ -60,6 +124,64 @@ } } }, + "/inner/multi/tag.default,tag.expression": { + "begin": "((?:\\@)(?:(?:[\\t-\\r\\x{20}\\x{85}\\x{A0}\\x{1680}\\x{180E}\\x{2000}-\\x{200A}\\x{2028}-\\x{2029}\\x{202F}\\x{205F}\\x{3000}]|(?:((?:\\/\\*)(?:[\\x{01}-\\)\\+-\\x{10FFFF}]|(?:(?:\\*)(?!(?:\\/))))*?(?:\\*\\/))|((?:\\/\\/)(?:[\\x{01}-\\t\\x{0B}-\\x{10FFFF}]*?(?![\\t\\r\\x{20}\\x{A0}\\x{1680}\\x{2000}-\\x{200A}\\x{202F}\\x{205F}\\x{3000}])(?:$)))))*?(?![\\t-\\r\\x{20}\\x{85}\\x{A0}\\x{1680}\\x{180E}\\x{2000}-\\x{200A}\\x{2028}-\\x{2029}\\x{202F}\\x{205F}\\x{3000}])(?!(?:\\/\\/))(?!(?:\\/\\*)))(?:(?=(?(?:(?:(?.*)$)(?!(?:(?:(?:\\bbreak\\b)|(?:\\bfor\\b)|(?:\\bstr\\b)|(?:\\bnode\\b)|(?:\\btuple\\b)|(?:\\bsolve\\b)|(?:\\brat\\b)|(?:\\bdynamic\\b)|(?:\\bassoc\\b)|(?:\\bbag\\b)|(?:\\bset\\b)|(?:\\bo\\b)|(?:\\bstart\\b)|(?:(?:\\bint\\b)|(?:\\blrel\\b)|(?:\\bbool\\b)|(?:\\btype\\b)|(?:\\bset\\b)|(?:\\bbag\\b)|(?:\\brat\\b)|(?:\\breal\\b)|(?:\\bnode\\b)|(?:\\btuple\\b)|(?:\\bmap\\b)|(?:\\bloc\\b)|(?:\\bnum\\b)|(?:\\blist\\b)|(?:\\bvalue\\b)|(?:\\bvoid\\b)|(?:\\brel\\b)|(?:\\bdatetime\\b)|(?:\\bstr\\b))|(?:\\blrel\\b)|(?:\\bcontinue\\b)|(?:\\bbracket\\b)|(?:\\brel\\b)|(?:\\blist\\b)|(?:\\btest\\b)|(?:\\breturn\\b)|(?:\\bfalse\\b)|(?:\\bjoin\\b)|(?:\\belse\\b)|(?:\\bit\\b)|(?:\\bin\\b)|(?:\\bif\\b)|(?:non\\-assoc)|(?:\\blexical\\b)|(?:\\bvalue\\b)|(?:\\bmap\\b)|(?:\\bvisit\\b)|(?:\\ball\\b)|(?:\\btry\\b)|(?:\\bprivate\\b)|(?:\\btrue\\b)|(?:\\bfinally\\b)|(?:\\breal\\b)|(?:\\bvoid\\b)|(?:\\bkeyword\\b)|(?:\\bany\\b)|(?:\\bone\\b)|(?:\\bmodule\\b)|(?:\\bpublic\\b)|(?:\\bthrows\\b)|(?:\\balias\\b)|(?:\\bdefault\\b)|(?:\\bcatch\\b)|(?:\\binsert\\b)|(?:\\banno\\b)|(?:\\bthrow\\b)|(?:\\bbool\\b)|(?:\\bswitch\\b)|(?:\\btype\\b)|(?:\\bwhile\\b)|(?:\\bnotin\\b)|(?:\\bcase\\b)|(?:\\blayout\\b)|(?:\\bmod\\b)|(?:\\bextend\\b)|(?:\\bappend\\b)|(?:\\bfail\\b)|(?:\\bdatetime\\b)|(?:\\bfilter\\b)|(?:\\bloc\\b)|(?:\\bassert\\b)|(?:\\bdata\\b)|(?:\\bimport\\b)|(?:\\bnum\\b)|(?:\\btag\\b)|(?:\\bsyntax\\b)|(?:\\bint\\b)))\\k$)\\k|(?:(?:\\\\)[A-Z\\_a-z](?:[\\-0-9A-Z\\_a-z]*?(?![\\-0-9A-Z\\_a-z]))))(?:(?:[\\t-\\r\\x{20}\\x{85}\\x{A0}\\x{1680}\\x{180E}\\x{2000}-\\x{200A}\\x{2028}-\\x{2029}\\x{202F}\\x{205F}\\x{3000}]|(?:((?:\\/\\*)(?:[\\x{01}-\\)\\+-\\x{10FFFF}]|(?:(?:\\*)(?!(?:\\/))))*?(?:\\*\\/))|((?:\\/\\/)(?:[\\x{01}-\\t\\x{0B}-\\x{10FFFF}]*?(?![\\t\\r\\x{20}\\x{A0}\\x{1680}\\x{2000}-\\x{200A}\\x{202F}\\x{205F}\\x{3000}])(?:$)))))*?(?![\\t-\\r\\x{20}\\x{85}\\x{A0}\\x{1680}\\x{180E}\\x{2000}-\\x{200A}\\x{2028}-\\x{2029}\\x{202F}\\x{205F}\\x{3000}])(?!(?:\\/\\/))(?!(?:\\/\\*))))", + "end": "(?=.)", + "patterns": [ + { + "begin": "(\\{)", + "end": "(\\})", + "patterns": [ + { + "match": "([\\x{01}-\\x{10FFFF}])", + "captures": { + "1": { + "name": "comment" + } + } + } + ], + "endCaptures": { + "1": { + "name": "comment" + } + }, + "beginCaptures": { + "1": { + "name": "comment" + } + } + }, + { + "match": "(\\=)", + "captures": { + "1": { + "name": "comment" + } + } + } + ], + "endCaptures": {}, + "name": "/inner/multi/tag.default,tag.expression", + "beginCaptures": { + "1": { + "name": "comment" + }, + "2": { + "name": "comment" + }, + "3": { + "name": "comment" + }, + "6": { + "name": "comment" + }, + "7": { + "name": "comment" + } + }, + "applyEndPatternLast": true + }, "/inner/single/poststringchars": { "match": "((?<=(?:[\\t-\\r\\x{20}\\x{85}\\x{A0}\\x{1680}\\x{180E}\\x{2000}-\\x{200A}\\x{2028}-\\x{2029}\\x{202F}\\x{205F}\\x{3000}]|(?:\\/\\/)|(?:(?:^))|(?:\\/\\*)))(?:(?:\\>)(?:(?:(?:\\\\)[\\\"\\'\\<\\>\\\\bfnrt])|[\\x{01}-\\!\\#-\\&\\(-\\;\\=\\?-\\[\\]-\\x{10FFFF}]|(?:(?:\\n)[\\t\\x{20}\\x{A0}\\x{1680}\\x{2000}-\\x{200A}\\x{202F}\\x{205F}\\x{3000}]*?(?:\\'))|(?:(?:(?:\\\\)U(?:(?:\\b10\\b)|(?:(?:\\b0\\b)[0-9A-Fa-f]))[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])|(?:(?:\\\\)u[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])|(?:(?:\\\\)a[0-7][0-9A-Fa-f])))*?(?:\\\")))", "name": "/inner/single/poststringchars", @@ -121,6 +243,12 @@ { "include": "#/inner/single/$delimiters" }, + { + "include": "#/inner/single/output.stderrOutput" + }, + { + "include": "#/inner/single/output.stdoutOutput" + }, { "include": "#/inner/single/output.resultOutput" }, @@ -133,6 +261,9 @@ { "include": "#/inner/single/concretepart.gt" }, + { + "include": "#/inner/multi/concretepart.hole" + }, { "include": "#/inner/single/concretepart.lt" }, @@ -140,7 +271,10 @@ "include": "#/inner/single/concretepart.text" }, { - "include": "#/inner/single/tag.empty" + "include": "#/inner/multi/tag.default,tag.expression" + }, + { + "include": "#/inner/multi/tag.default,tag.expression" }, { "include": "#/inner/single/midstringchars" @@ -166,6 +300,9 @@ { "include": "#/inner/single/literal.regExp" }, + { + "include": "#/inner/multi/literal.regExp" + }, { "include": "#/inner/single/caseinsensitivestringconstant" }, @@ -176,13 +313,13 @@ "include": "#/inner/single/prestringchars" }, { - "include": "#/inner/multi/stringconstant,prestringchars" + "include": "#/inner/multi/prestringchars,stringconstant" }, { "include": "#/inner/single/stringconstant" }, { - "include": "#/inner/multi/stringconstant,prestringchars" + "include": "#/inner/multi/prestringchars,stringconstant" }, { "include": "#/inner/single/literal.integer" @@ -193,12 +330,6 @@ { "include": "#/inner/single/literal.real" }, - { - "include": "#/inner/single/output.stderrOutput" - }, - { - "include": "#/inner/single/output.stdoutOutput" - }, { "include": "#/inner/single/$keywords" } @@ -216,152 +347,6 @@ } } }, - "/inner/multi/midstringchars,poststringchars": { - "begin": "(\\>)", - "end": "((?:\\\")|(?:\\<))", - "patterns": [ - { - "match": "((?:\\\\)U(?:(?:\\b10\\b)|(?:(?:\\b0\\b)[0-9A-Fa-f]))[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])", - "captures": { - "1": { - "name": "string.quoted.double" - } - } - }, - { - "match": "((?:\\\\)u[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])", - "captures": { - "1": { - "name": "string.quoted.double" - } - } - }, - { - "match": "((?:\\\\)a[0-7][0-9A-Fa-f])", - "captures": { - "1": { - "name": "string.quoted.double" - } - } - }, - { - "match": "((?:\\n)[\\t\\x{20}\\x{A0}\\x{1680}\\x{2000}-\\x{200A}\\x{202F}\\x{205F}\\x{3000}]*?(?:\\'))", - "captures": { - "1": { - "name": "string.quoted.double" - } - } - }, - { - "match": "((?:\\\\)[\\\"\\'\\<\\>\\\\bfnrt])", - "captures": { - "1": { - "name": "string.quoted.double" - } - } - }, - { - "match": "([\\x{01}-\\!\\#-\\&\\(-\\;\\=\\?-\\[\\]-\\x{10FFFF}])", - "captures": { - "1": { - "name": "string.quoted.double" - } - } - }, - { - "match": "([\\x{01}-\\x{10FFFF}])", - "captures": { - "1": { - "name": "string.quoted.double" - } - } - } - ], - "endCaptures": { - "1": { - "name": "string.quoted.double" - } - }, - "name": "/inner/multi/midstringchars,poststringchars", - "beginCaptures": { - "1": { - "name": "string.quoted.double" - } - } - }, - "/inner/multi/stringconstant,prestringchars": { - "begin": "(\\\")", - "end": "((?:\\\")|(?:\\<))", - "patterns": [ - { - "match": "((?:\\\\)U(?:(?:\\b10\\b)|(?:(?:\\b0\\b)[0-9A-Fa-f]))[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])", - "captures": { - "1": { - "name": "string.quoted.double" - } - } - }, - { - "match": "((?:\\\\)u[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])", - "captures": { - "1": { - "name": "string.quoted.double" - } - } - }, - { - "match": "((?:\\\\)a[0-7][0-9A-Fa-f])", - "captures": { - "1": { - "name": "string.quoted.double" - } - } - }, - { - "match": "((?:\\\\)[\\\"\\'\\<\\>\\\\bfnrt])", - "captures": { - "1": { - "name": "string.quoted.double" - } - } - }, - { - "match": "((?:\\n)[\\t\\x{20}\\x{A0}\\x{1680}\\x{2000}-\\x{200A}\\x{202F}\\x{205F}\\x{3000}]*?(?:\\'))", - "captures": { - "1": { - "name": "string.quoted.double" - } - } - }, - { - "match": "([\\x{01}-\\!\\#-\\&\\(-\\;\\=\\?-\\[\\]-\\x{10FFFF}])", - "captures": { - "1": { - "name": "string.quoted.double" - } - } - }, - { - "match": "([\\x{01}-\\x{10FFFF}])", - "captures": { - "1": { - "name": "string.quoted.double" - } - } - } - ], - "endCaptures": { - "1": { - "name": "string.quoted.double" - } - }, - "name": "/inner/multi/stringconstant,prestringchars", - "beginCaptures": { - "1": { - "name": "string.quoted.double" - } - } - }, "/inner/single/comment.1": { "match": "((?:\\/\\/)(?:[\\x{01}-\\t\\x{0B}-\\x{10FFFF}]*?(?![\\t\\r\\x{20}\\x{A0}\\x{1680}\\x{2000}-\\x{200A}\\x{202F}\\x{205F}\\x{3000}])(?:$)))", "name": "/inner/single/comment.1", @@ -407,6 +392,28 @@ } } }, + "/inner/multi/literal.regExp": { + "begin": "((?:\\/)(?:(?:(?:\\\\)(?![\\/\\<\\>\\\\]))|[\\x{01}-\\.0-\\;\\=\\?-\\[\\]-\\x{10FFFF}]|(?:(?:\\\\)[\\/\\<\\>\\\\])|(?:(?:\\<)(?:(?=(?(?:(?:(?.*)$)(?!(?:(?:(?:\\bbreak\\b)|(?:\\bfor\\b)|(?:\\bstr\\b)|(?:\\bnode\\b)|(?:\\btuple\\b)|(?:\\bsolve\\b)|(?:\\brat\\b)|(?:\\bdynamic\\b)|(?:\\bassoc\\b)|(?:\\bbag\\b)|(?:\\bset\\b)|(?:\\bo\\b)|(?:\\bstart\\b)|(?:(?:\\bint\\b)|(?:\\blrel\\b)|(?:\\bbool\\b)|(?:\\btype\\b)|(?:\\bset\\b)|(?:\\bbag\\b)|(?:\\brat\\b)|(?:\\breal\\b)|(?:\\bnode\\b)|(?:\\btuple\\b)|(?:\\bmap\\b)|(?:\\bloc\\b)|(?:\\bnum\\b)|(?:\\blist\\b)|(?:\\bvalue\\b)|(?:\\bvoid\\b)|(?:\\brel\\b)|(?:\\bdatetime\\b)|(?:\\bstr\\b))|(?:\\blrel\\b)|(?:\\bcontinue\\b)|(?:\\bbracket\\b)|(?:\\brel\\b)|(?:\\blist\\b)|(?:\\btest\\b)|(?:\\breturn\\b)|(?:\\bfalse\\b)|(?:\\bjoin\\b)|(?:\\belse\\b)|(?:\\bit\\b)|(?:\\bin\\b)|(?:\\bif\\b)|(?:non\\-assoc)|(?:\\blexical\\b)|(?:\\bvalue\\b)|(?:\\bmap\\b)|(?:\\bvisit\\b)|(?:\\ball\\b)|(?:\\btry\\b)|(?:\\bprivate\\b)|(?:\\btrue\\b)|(?:\\bfinally\\b)|(?:\\breal\\b)|(?:\\bvoid\\b)|(?:\\bkeyword\\b)|(?:\\bany\\b)|(?:\\bone\\b)|(?:\\bmodule\\b)|(?:\\bpublic\\b)|(?:\\bthrows\\b)|(?:\\balias\\b)|(?:\\bdefault\\b)|(?:\\bcatch\\b)|(?:\\binsert\\b)|(?:\\banno\\b)|(?:\\bthrow\\b)|(?:\\bbool\\b)|(?:\\bswitch\\b)|(?:\\btype\\b)|(?:\\bwhile\\b)|(?:\\bnotin\\b)|(?:\\bcase\\b)|(?:\\blayout\\b)|(?:\\bmod\\b)|(?:\\bextend\\b)|(?:\\bappend\\b)|(?:\\bfail\\b)|(?:\\bdatetime\\b)|(?:\\bfilter\\b)|(?:\\bloc\\b)|(?:\\bassert\\b)|(?:\\bdata\\b)|(?:\\bimport\\b)|(?:\\bnum\\b)|(?:\\btag\\b)|(?:\\bsyntax\\b)|(?:\\bint\\b)))\\k$)\\k|(?:(?:\\\\)[A-Z\\_a-z](?:[\\-0-9A-Z\\_a-z]*?(?![\\-0-9A-Z\\_a-z]))))(?:\\>))|(?:(?:\\<)(?:(?=(?(?:(?:(?.*)$)(?!(?:(?:(?:\\bbreak\\b)|(?:\\bfor\\b)|(?:\\bstr\\b)|(?:\\bnode\\b)|(?:\\btuple\\b)|(?:\\bsolve\\b)|(?:\\brat\\b)|(?:\\bdynamic\\b)|(?:\\bassoc\\b)|(?:\\bbag\\b)|(?:\\bset\\b)|(?:\\bo\\b)|(?:\\bstart\\b)|(?:(?:\\bint\\b)|(?:\\blrel\\b)|(?:\\bbool\\b)|(?:\\btype\\b)|(?:\\bset\\b)|(?:\\bbag\\b)|(?:\\brat\\b)|(?:\\breal\\b)|(?:\\bnode\\b)|(?:\\btuple\\b)|(?:\\bmap\\b)|(?:\\bloc\\b)|(?:\\bnum\\b)|(?:\\blist\\b)|(?:\\bvalue\\b)|(?:\\bvoid\\b)|(?:\\brel\\b)|(?:\\bdatetime\\b)|(?:\\bstr\\b))|(?:\\blrel\\b)|(?:\\bcontinue\\b)|(?:\\bbracket\\b)|(?:\\brel\\b)|(?:\\blist\\b)|(?:\\btest\\b)|(?:\\breturn\\b)|(?:\\bfalse\\b)|(?:\\bjoin\\b)|(?:\\belse\\b)|(?:\\bit\\b)|(?:\\bin\\b)|(?:\\bif\\b)|(?:non\\-assoc)|(?:\\blexical\\b)|(?:\\bvalue\\b)|(?:\\bmap\\b)|(?:\\bvisit\\b)|(?:\\ball\\b)|(?:\\btry\\b)|(?:\\bprivate\\b)|(?:\\btrue\\b)|(?:\\bfinally\\b)|(?:\\breal\\b)|(?:\\bvoid\\b)|(?:\\bkeyword\\b)|(?:\\bany\\b)|(?:\\bone\\b)|(?:\\bmodule\\b)|(?:\\bpublic\\b)|(?:\\bthrows\\b)|(?:\\balias\\b)|(?:\\bdefault\\b)|(?:\\bcatch\\b)|(?:\\binsert\\b)|(?:\\banno\\b)|(?:\\bthrow\\b)|(?:\\bbool\\b)|(?:\\bswitch\\b)|(?:\\btype\\b)|(?:\\bwhile\\b)|(?:\\bnotin\\b)|(?:\\bcase\\b)|(?:\\blayout\\b)|(?:\\bmod\\b)|(?:\\bextend\\b)|(?:\\bappend\\b)|(?:\\bfail\\b)|(?:\\bdatetime\\b)|(?:\\bfilter\\b)|(?:\\bloc\\b)|(?:\\bassert\\b)|(?:\\bdata\\b)|(?:\\bimport\\b)|(?:\\bnum\\b)|(?:\\btag\\b)|(?:\\bsyntax\\b)|(?:\\bint\\b)))\\k$)\\k|(?:(?:\\\\)[A-Z\\_a-z](?:[\\-0-9A-Z\\_a-z]*?(?![\\-0-9A-Z\\_a-z]))))(?:\\:)(?:(?:(?:\\\\)[\\/\\<\\>\\\\])|(?:(?:\\<)(?:(?=(?(?:(?:(?.*)$)(?!(?:(?:(?:\\bbreak\\b)|(?:\\bfor\\b)|(?:\\bstr\\b)|(?:\\bnode\\b)|(?:\\btuple\\b)|(?:\\bsolve\\b)|(?:\\brat\\b)|(?:\\bdynamic\\b)|(?:\\bassoc\\b)|(?:\\bbag\\b)|(?:\\bset\\b)|(?:\\bo\\b)|(?:\\bstart\\b)|(?:(?:\\bint\\b)|(?:\\blrel\\b)|(?:\\bbool\\b)|(?:\\btype\\b)|(?:\\bset\\b)|(?:\\bbag\\b)|(?:\\brat\\b)|(?:\\breal\\b)|(?:\\bnode\\b)|(?:\\btuple\\b)|(?:\\bmap\\b)|(?:\\bloc\\b)|(?:\\bnum\\b)|(?:\\blist\\b)|(?:\\bvalue\\b)|(?:\\bvoid\\b)|(?:\\brel\\b)|(?:\\bdatetime\\b)|(?:\\bstr\\b))|(?:\\blrel\\b)|(?:\\bcontinue\\b)|(?:\\bbracket\\b)|(?:\\brel\\b)|(?:\\blist\\b)|(?:\\btest\\b)|(?:\\breturn\\b)|(?:\\bfalse\\b)|(?:\\bjoin\\b)|(?:\\belse\\b)|(?:\\bit\\b)|(?:\\bin\\b)|(?:\\bif\\b)|(?:non\\-assoc)|(?:\\blexical\\b)|(?:\\bvalue\\b)|(?:\\bmap\\b)|(?:\\bvisit\\b)|(?:\\ball\\b)|(?:\\btry\\b)|(?:\\bprivate\\b)|(?:\\btrue\\b)|(?:\\bfinally\\b)|(?:\\breal\\b)|(?:\\bvoid\\b)|(?:\\bkeyword\\b)|(?:\\bany\\b)|(?:\\bone\\b)|(?:\\bmodule\\b)|(?:\\bpublic\\b)|(?:\\bthrows\\b)|(?:\\balias\\b)|(?:\\bdefault\\b)|(?:\\bcatch\\b)|(?:\\binsert\\b)|(?:\\banno\\b)|(?:\\bthrow\\b)|(?:\\bbool\\b)|(?:\\bswitch\\b)|(?:\\btype\\b)|(?:\\bwhile\\b)|(?:\\bnotin\\b)|(?:\\bcase\\b)|(?:\\blayout\\b)|(?:\\bmod\\b)|(?:\\bextend\\b)|(?:\\bappend\\b)|(?:\\bfail\\b)|(?:\\bdatetime\\b)|(?:\\bfilter\\b)|(?:\\bloc\\b)|(?:\\bassert\\b)|(?:\\bdata\\b)|(?:\\bimport\\b)|(?:\\bnum\\b)|(?:\\btag\\b)|(?:\\bsyntax\\b)|(?:\\bint\\b)))\\k$)\\k|(?:(?:\\\\)[A-Z\\_a-z](?:[\\-0-9A-Z\\_a-z]*?(?![\\-0-9A-Z\\_a-z]))))(?:\\>))|(?:(?:\\\\)(?![\\<\\>\\\\]))|[\\x{01}-\\.0-\\;\\=\\?-\\[\\]-\\x{10FFFF}])*?(?:\\>)))*?(?:\\/)[dims]*?)", + "end": "(?=.)", + "patterns": [], + "endCaptures": {}, + "name": "/inner/multi/literal.regExp", + "beginCaptures": { + "1": { + "name": "constant.regexp" + } + }, + "applyEndPatternLast": true + }, + "/inner/single/concretepart.bq": { + "match": "(\\\\\\`)", + "name": "/inner/single/concretepart.bq", + "captures": { + "1": { + "name": "string" + } + } + }, "/inner/single/prestringchars": { "match": "((?<=(?:[\\t-\\r\\x{20}\\x{85}\\x{A0}\\x{1680}\\x{180E}\\x{2000}-\\x{200A}\\x{2028}-\\x{2029}\\x{202F}\\x{205F}\\x{3000}]|(?:\\/\\/)|(?:(?:^))|(?:\\/\\*)))(?:(?:\\\")(?:(?:(?:\\\\)[\\\"\\'\\<\\>\\\\bfnrt])|[\\x{01}-\\!\\#-\\&\\(-\\;\\=\\?-\\[\\]-\\x{10FFFF}]|(?:(?:\\n)[\\t\\x{20}\\x{A0}\\x{1680}\\x{2000}-\\x{200A}\\x{202F}\\x{205F}\\x{3000}]*?(?:\\'))|(?:(?:(?:\\\\)U(?:(?:\\b10\\b)|(?:(?:\\b0\\b)[0-9A-Fa-f]))[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])|(?:(?:\\\\)u[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])|(?:(?:\\\\)a[0-7][0-9A-Fa-f])))*?(?:\\<)))", "name": "/inner/single/prestringchars", @@ -444,7 +451,7 @@ } }, "/inner/single/$delimiters": { - "match": "(?:(?:bottom\\-up\\-break)|(?:\\))|(?:\\()|(?:\\x{226B})|(?:\\%)|(?:\\!\\:\\=)|(?:\\<\\=\\=\\>)|(?:\\!\\=)|(?:\\>\\=)|(?:\\:\\/\\/)|(?:non\\-assoc)|(?:\\&\\=)|(?:\\<\\-)|(?:\\*\\=)|(?:\\+\\=)|(?:top\\-down\\-break)|(?:\\,)|(?:\\.\\.\\.)|(?:\\/\\=)|(?:\\!\\<\\<)|(?:\\=\\>)|(?:\\!\\>\\>)|(?:\\|\\|)|(?:\\>\\>)|(?:\\:\\:)|(?:\\x{26A0})|(?:\\&\\&)|(?:\\:\\=)|(?:\\#)|(?:\\<\\<\\=)|(?:\\})|(?:\\?\\=)|(?:\\<\\:)|(?:\\=\\=\\>)|(?:\\^)|(?:\\;)|(?:\\{)|(?:\\-\\=)|(?:\\$T))", + "match": "(?:(?:bottom\\-up\\-break)|(?:\\))|(?:\\()|(?:\\%)|(?:\\!\\:\\=)|(?:\\<\\=\\=\\>)|(?:\\<\\<\\=)|(?:\\!\\=)|(?:\\>\\=)|(?:\\:\\/\\/)|(?:non\\-assoc)|(?:\\&\\=)|(?:\\<\\-)|(?:\\*\\=)|(?:\\+\\=)|(?:top\\-down\\-break)|(?:\\,)|(?:\\.\\.\\.)|(?:\\/\\=)|(?:\\!\\<\\<)|(?:\\=\\>)|(?:\\!\\>\\>)|(?:\\|\\|)|(?:\\>\\>)|(?:\\:\\:)|(?:\\&\\&)|(?:\\:\\=)|(?:\\#)|(?:\\?\\=)|(?:\\<\\:)|(?:\\=\\=\\>)|(?:\\^)|(?:\\;)|(?:\\{)|(?:\\-\\=)|(?:\\$T))", "name": "/inner/single/$delimiters", "captures": {} }, @@ -464,6 +471,12 @@ { "include": "#/inner/single/$delimiters" }, + { + "include": "#/inner/single/output.stderrOutput" + }, + { + "include": "#/inner/single/output.stdoutOutput" + }, { "include": "#/inner/single/output.resultOutput" }, @@ -477,7 +490,10 @@ "include": "#/inner/single/char.3" }, { - "include": "#/inner/single/tag.empty" + "include": "#/inner/multi/tag.default,tag.expression" + }, + { + "include": "#/inner/multi/tag.default,tag.expression" }, { "include": "#/inner/single/midstringchars" @@ -503,6 +519,9 @@ { "include": "#/inner/single/literal.regExp" }, + { + "include": "#/inner/multi/literal.regExp" + }, { "include": "#/inner/single/caseinsensitivestringconstant" }, @@ -513,13 +532,13 @@ "include": "#/inner/single/prestringchars" }, { - "include": "#/inner/multi/stringconstant,prestringchars" + "include": "#/inner/multi/prestringchars,stringconstant" }, { "include": "#/inner/single/stringconstant" }, { - "include": "#/inner/multi/stringconstant,prestringchars" + "include": "#/inner/multi/prestringchars,stringconstant" }, { "include": "#/inner/single/literal.integer" @@ -530,12 +549,6 @@ { "include": "#/inner/single/literal.real" }, - { - "include": "#/inner/single/output.stderrOutput" - }, - { - "include": "#/inner/single/output.stdoutOutput" - }, { "include": "#/inner/single/$keywords" } @@ -573,7 +586,7 @@ } }, { - "match": "((?:\\n)[\\t\\x{20}\\x{A0}\\x{1680}\\x{2000}-\\x{200A}\\x{202F}\\x{205F}\\x{3000}]*?(?:\\'))", + "match": "((?:\\\\)[\\\"\\'\\<\\>\\\\bfnrt])", "captures": { "1": { "name": "string.quoted.single" @@ -581,7 +594,7 @@ } }, { - "match": "((?:\\\\)[\\\"\\'\\<\\>\\\\bfnrt])", + "match": "((?:\\n)[\\t\\x{20}\\x{A0}\\x{1680}\\x{2000}-\\x{200A}\\x{202F}\\x{205F}\\x{3000}]*?(?:\\'))", "captures": { "1": { "name": "string.quoted.single" @@ -626,27 +639,110 @@ } } }, - "/inner/single/output.stdoutOutput": { - "match": "((?<=(?:[\\t-\\r\\x{20}\\x{85}\\x{A0}\\x{1680}\\x{180E}\\x{2000}-\\x{200A}\\x{2028}-\\x{2029}\\x{202F}\\x{205F}\\x{3000}]|(?:\\/\\/)|(?:(?:^))|(?:\\/\\*)))(?:(?:(?:^)(?:\\x{226B}))[\\x{01}-\\t\\x{0B}-\\x{0C}\\x{0E}-\\x{10FFFF}]*?(?:\\n)))", - "name": "/inner/single/output.stdoutOutput", - "captures": { + "/inner/multi/concretepart.hole": { + "begin": "(\\<)", + "end": "(\\>)", + "patterns": [ + { + "match": "([\\x{01}-\\x{10FFFF}])", + "captures": { + "1": { + "name": "variable" + } + } + } + ], + "endCaptures": { "1": { - "name": "string" + "name": "variable" + } + }, + "name": "/inner/multi/concretepart.hole", + "beginCaptures": { + "1": { + "name": "variable" } } }, - "/inner/single/tag.empty": { - "match": "((?:\\@)(?:(?:[\\t-\\r\\x{20}\\x{85}\\x{A0}\\x{1680}\\x{180E}\\x{2000}-\\x{200A}\\x{2028}-\\x{2029}\\x{202F}\\x{205F}\\x{3000}]|(?:((?:\\/\\*)(?:[\\x{01}-\\)\\+-\\x{10FFFF}]|(?:(?:\\*)(?!(?:\\/))))*?(?:\\*\\/))|((?:\\/\\/)(?:[\\x{01}-\\t\\x{0B}-\\x{10FFFF}]*?(?![\\t\\r\\x{20}\\x{A0}\\x{1680}\\x{2000}-\\x{200A}\\x{202F}\\x{205F}\\x{3000}])(?:$)))))*?(?![\\t-\\r\\x{20}\\x{85}\\x{A0}\\x{1680}\\x{180E}\\x{2000}-\\x{200A}\\x{2028}-\\x{2029}\\x{202F}\\x{205F}\\x{3000}])(?!(?:\\/\\/))(?!(?:\\/\\*)))(?:(?=(?(?:(?:(?.*)$)(?!(?:(?:(?:\\bbreak\\b)|(?:\\bfor\\b)|(?:\\bstr\\b)|(?:\\bnode\\b)|(?:\\btuple\\b)|(?:\\bsolve\\b)|(?:\\brat\\b)|(?:\\bdynamic\\b)|(?:\\bassoc\\b)|(?:\\bbag\\b)|(?:\\bset\\b)|(?:\\bo\\b)|(?:\\bstart\\b)|(?:(?:\\bint\\b)|(?:\\blrel\\b)|(?:\\bbool\\b)|(?:\\btype\\b)|(?:\\bset\\b)|(?:\\bbag\\b)|(?:\\brat\\b)|(?:\\breal\\b)|(?:\\bnode\\b)|(?:\\btuple\\b)|(?:\\bmap\\b)|(?:\\bloc\\b)|(?:\\bnum\\b)|(?:\\blist\\b)|(?:\\bvalue\\b)|(?:\\bvoid\\b)|(?:\\brel\\b)|(?:\\bdatetime\\b)|(?:\\bstr\\b))|(?:\\blrel\\b)|(?:\\bcontinue\\b)|(?:\\bbracket\\b)|(?:\\brel\\b)|(?:\\blist\\b)|(?:\\btest\\b)|(?:\\breturn\\b)|(?:\\bfalse\\b)|(?:\\bjoin\\b)|(?:\\belse\\b)|(?:\\bit\\b)|(?:\\bin\\b)|(?:\\bif\\b)|(?:non\\-assoc)|(?:\\blexical\\b)|(?:\\bvalue\\b)|(?:\\bmap\\b)|(?:\\bvisit\\b)|(?:\\ball\\b)|(?:\\btry\\b)|(?:\\bprivate\\b)|(?:\\btrue\\b)|(?:\\bfinally\\b)|(?:\\breal\\b)|(?:\\bvoid\\b)|(?:\\bkeyword\\b)|(?:\\bany\\b)|(?:\\bone\\b)|(?:\\bmodule\\b)|(?:\\bpublic\\b)|(?:\\bthrows\\b)|(?:\\balias\\b)|(?:\\bdefault\\b)|(?:\\bcatch\\b)|(?:\\binsert\\b)|(?:\\banno\\b)|(?:\\bthrow\\b)|(?:\\bbool\\b)|(?:\\bswitch\\b)|(?:\\btype\\b)|(?:\\bwhile\\b)|(?:\\bnotin\\b)|(?:\\bcase\\b)|(?:\\blayout\\b)|(?:\\bmod\\b)|(?:\\bextend\\b)|(?:\\bappend\\b)|(?:\\bfail\\b)|(?:\\bdatetime\\b)|(?:\\bfilter\\b)|(?:\\bloc\\b)|(?:\\bassert\\b)|(?:\\bdata\\b)|(?:\\bimport\\b)|(?:\\bnum\\b)|(?:\\btag\\b)|(?:\\bsyntax\\b)|(?:\\bint\\b)))\\k$)\\k|(?:(?:\\\\)[A-Z\\_a-z](?:[\\-0-9A-Z\\_a-z]*?(?![\\-0-9A-Z\\_a-z])))))", - "name": "/inner/single/tag.empty", - "captures": { - "1": { - "name": "comment" + "/inner/multi/prestringchars,stringconstant": { + "begin": "(\\\")", + "end": "((?:\\\")|(?:\\<))", + "patterns": [ + { + "match": "((?:\\\\)U(?:(?:\\b10\\b)|(?:(?:\\b0\\b)[0-9A-Fa-f]))[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])", + "captures": { + "1": { + "name": "string.quoted.double" + } + } }, - "2": { - "name": "comment" + { + "match": "((?:\\\\)u[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f])", + "captures": { + "1": { + "name": "string.quoted.double" + } + } }, - "3": { - "name": "comment" + { + "match": "((?:\\\\)a[0-7][0-9A-Fa-f])", + "captures": { + "1": { + "name": "string.quoted.double" + } + } + }, + { + "match": "((?:\\n)[\\t\\x{20}\\x{A0}\\x{1680}\\x{2000}-\\x{200A}\\x{202F}\\x{205F}\\x{3000}]*?(?:\\'))", + "captures": { + "1": { + "name": "string.quoted.double" + } + } + }, + { + "match": "((?:\\\\)[\\\"\\'\\<\\>\\\\bfnrt])", + "captures": { + "1": { + "name": "string.quoted.double" + } + } + }, + { + "match": "([\\x{01}-\\!\\#-\\&\\(-\\;\\=\\?-\\[\\]-\\x{10FFFF}])", + "captures": { + "1": { + "name": "string.quoted.double" + } + } + }, + { + "match": "([\\x{01}-\\x{10FFFF}])", + "captures": { + "1": { + "name": "string.quoted.double" + } + } + } + ], + "endCaptures": { + "1": { + "name": "string.quoted.double" + } + }, + "name": "/inner/multi/prestringchars,stringconstant", + "beginCaptures": { + "1": { + "name": "string.quoted.double" + } + } + }, + "/inner/single/output.stdoutOutput": { + "match": "((?<=(?:[\\t-\\r\\x{20}\\x{85}\\x{A0}\\x{1680}\\x{180E}\\x{2000}-\\x{200A}\\x{2028}-\\x{2029}\\x{202F}\\x{205F}\\x{3000}]|(?:\\/\\/)|(?:(?:^))|(?:\\/\\*)))(?:(?:(?:^)(?:\\x{226B}))[\\x{01}-\\t\\x{0B}-\\x{0C}\\x{0E}-\\x{10FFFF}]*?(?:\\n)))", + "name": "/inner/single/output.stdoutOutput", + "captures": { + "1": { + "name": "string" } } }, @@ -665,6 +761,12 @@ { "include": "#/inner/single/$delimiters" }, + { + "include": "#/inner/single/output.stderrOutput" + }, + { + "include": "#/inner/single/output.stdoutOutput" + }, { "include": "#/inner/single/output.resultOutput" }, @@ -675,7 +777,7 @@ "include": "#/outer/[" }, { - "include": "#/inner/single/tag.empty" + "include": "#/inner/multi/tag.default,tag.expression" }, { "include": "#/inner/single/midstringchars" @@ -698,6 +800,9 @@ { "include": "#/inner/single/literal.regExp" }, + { + "include": "#/inner/multi/literal.regExp" + }, { "include": "#/inner/single/caseinsensitivestringconstant" }, @@ -711,7 +816,7 @@ "include": "#/inner/single/stringconstant" }, { - "include": "#/inner/multi/stringconstant,prestringchars" + "include": "#/inner/multi/prestringchars,stringconstant" }, { "include": "#/inner/single/literal.integer" @@ -722,12 +827,6 @@ { "include": "#/inner/single/literal.real" }, - { - "include": "#/inner/single/output.stderrOutput" - }, - { - "include": "#/inner/single/output.stdoutOutput" - }, { "include": "#/inner/single/$keywords" } From e8a887cdd0921161b6ac728f51452e63d671a57b Mon Sep 17 00:00:00 2001 From: Sung-Shik Jongmans Date: Mon, 9 Sep 2024 16:21:09 +0200 Subject: [PATCH 17/17] Simplify a few expressions to improve readability --- .../main/rascal/lang/textmate/Conversion.rsc | 17 +++++++++-------- .../rascal/lang/textmate/ConversionUnit.rsc | 10 ++-------- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc index af75c9e..62286c9 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/Conversion.rsc @@ -55,7 +55,7 @@ RscGrammar preprocess(RscGrammar rsc) { // Replace occurrences of singleton ranges with just the corresponding // literal. This makes it easier to identify delimiters. return visit (rsc) { - case s: \char-class([range(char, char)]) => d + case \char-class([range(char, char)]) => d when d := \lit(""), isDelimiter(d) } } @@ -132,13 +132,14 @@ list[ConversionUnit] analyze(RscGrammar rsc) { list[Production] prodsKeywords = [prod(lex(KEYWORDS_PRODUCTION_NAME), [\alt(keywords)], {\tag("category"("keyword.control"))})]; // Return - bool isEmptyProd(prod(_, [\alt(alternatives)], _)) = alternatives == {}; - set[ConversionUnit] units - = {unit(rsc, p, false, hasNewline(rsc, p), getOuterDelimiterPair(rsc, p), getInnerDelimiterPair(rsc, p, getOnlyFirst = true)) | p <- prodsNonRecursive} - + {unit(rsc, p, true, hasNewline(rsc, p), getOuterDelimiterPair(rsc, p), getInnerDelimiterPair(rsc, p, getOnlyFirst = true)) | p <- prodsRecursive} - + {unit(rsc, p, false, false, , ) | p <- prodsDelimiters, !isEmptyProd(p)} - + {unit(rsc, p, false, false, , ) | p <- prodsKeywords, !isEmptyProd(p)}; - + bool isRecursive(Production p) + = p in prodsRecursive; + bool isEmptyProd(prod(_, [\alt(alternatives)], _)) + = alternatives == {}; + + set[ConversionUnit] units = {}; + units += {unit(rsc, p, isRecursive(p), hasNewline(rsc, p), getOuterDelimiterPair(rsc, p), getInnerDelimiterPair(rsc, p, getOnlyFirst = true)) | p <- prods}; + units += {unit(rsc, p, false, false, , ) | p <- prodsDelimiters + prodsKeywords, !isEmptyProd(p)}; return sort([*removeStrictPrefixes(units)]); } diff --git a/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionUnit.rsc b/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionUnit.rsc index 9cfe506..9ddbf31 100644 --- a/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionUnit.rsc +++ b/rascal-textmate-core/src/main/rascal/lang/textmate/ConversionUnit.rsc @@ -165,14 +165,8 @@ bool isStrictPrefix(ConversionUnit u1, ConversionUnit u2) = isStrictPrefix(u1.prod.symbols, u2.prod.symbols); // TODO: This function could be moved to a separate, generic module -private bool isStrictPrefix([], []) - = false; -private bool isStrictPrefix([], [_, *_]) - = true; -private bool isStrictPrefix([_, *_], []) - = false; -private bool isStrictPrefix([head1, *tail1], [head2, *tail2]) - = head1 == head2 && isStrictPrefix(tail1, tail2); +private bool isStrictPrefix(list[&T] l1, list[&T] l2) + = size(l1) < size(l2) && !any(i <- [0..size(l1)], l1[i] != l2[i]); @synopsis{ Representation of a *decomposition* of a list of units (i.e., the lists of