From c93888998b589174ac0920ab1ce58557099e5bb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabi=C3=A1n=20Heredia=20Montiel?= Date: Tue, 14 Apr 2020 23:17:01 -0500 Subject: [PATCH] Add unicode operator support for Mathematical Operators and Arrows --- src/Idris/Parser.idr | 36 +++++++++------- src/Parser/Lexer.idr | 64 ++++++++++++++++++----------- tests/Main.idr | 2 + tests/idris2/unicode001/Unicode.idr | 9 ++++ tests/idris2/unicode001/expected | 1 + tests/idris2/unicode001/run | 3 ++ 6 files changed, 78 insertions(+), 37 deletions(-) create mode 100644 tests/idris2/unicode001/Unicode.idr create mode 100644 tests/idris2/unicode001/expected create mode 100644 tests/idris2/unicode001/run diff --git a/src/Idris/Parser.idr b/src/Idris/Parser.idr index 0893ae7ce..e5b7da540 100644 --- a/src/Idris/Parser.idr +++ b/src/Idris/Parser.idr @@ -39,6 +39,15 @@ export plhs : ParseOpts plhs = MkParseOpts False False +leftArrow : Rule () +leftArrow = symbol "<-" <|> symbol "←" + +rightArrow : Rule () +rightArrow = symbol "->" <|> symbol "→" + +doubleRightArrow : Rule () +doubleRightArrow = symbol "=>" <|> symbol "⇒" + atom : FileName -> Rule PTerm atom fname = do start <- location @@ -418,12 +427,11 @@ mutual bindSymbol : Rule (PiInfo PTerm) bindSymbol - = do symbol "->" + = do rightArrow pure Explicit - <|> do symbol "=>" + <|> do doubleRightArrow pure AutoImplicit - explicitPi : FileName -> IndentInfo -> Rule PTerm explicitPi fname indents = do start <- location @@ -443,7 +451,7 @@ mutual commit binders <- pibindList fname start indents symbol "}" - symbol "->" + rightArrow scope <- typeExpr pdef fname indents end <- location pure (pibindAll (MkFC fname start end) AutoImplicit binders scope) @@ -457,7 +465,7 @@ mutual t <- simpleExpr fname indents binders <- pibindList fname start indents symbol "}" - symbol "->" + rightArrow scope <- typeExpr pdef fname indents end <- location pure (pibindAll (MkFC fname start end) (DefImplicit t) binders scope) @@ -483,7 +491,7 @@ mutual symbol "{" binders <- pibindList fname start indents symbol "}" - symbol "->" + rightArrow scope <- typeExpr pdef fname indents end <- location pure (pibindAll (MkFC fname start end) Implicit binders scope) @@ -493,7 +501,7 @@ mutual = do start <- location symbol "\\" binders <- bindList fname start indents - symbol "=>" + doubleRightArrow mustContinue indents Nothing scope <- expr pdef fname indents end <- location @@ -596,7 +604,7 @@ mutual caseRHS : FileName -> FilePos -> IndentInfo -> PTerm -> Rule PClause caseRHS fname start indents lhs - = do symbol "=>" + = do doubleRightArrow mustContinue indents Nothing rhs <- expr pdef fname indents atEnd indents @@ -633,7 +641,7 @@ mutual field : FileName -> IndentInfo -> Rule PFieldUpdate field fname indents - = do path <- sepBy1 (symbol "->") unqualifiedName + = do path <- sepBy1 rightArrow unqualifiedName upd <- (do symbol "="; pure PSetField) <|> (do symbol "$="; pure PSetFieldApp) @@ -675,7 +683,7 @@ mutual -- If the name doesn't begin with a lower case letter, we should -- treat this as a pattern, so fail validPatternVar n - symbol "<-" + leftArrow val <- expr pdef fname indents atEnd indents end <- location @@ -701,7 +709,7 @@ mutual (do atEnd indents end <- location pure [DoExp (MkFC fname start end) e]) - <|> (do symbol "<-" + <|> (do leftArrow val <- expr pnowith fname indents alts <- block (patAlt fname) atEnd indents @@ -1151,7 +1159,7 @@ getRight (Right v) = Just v constraints : FileName -> IndentInfo -> EmptyRule (List (Maybe Name, PTerm)) constraints fname indents = do tm <- appExpr pdef fname indents - symbol "=>" + doubleRightArrow more <- constraints fname indents pure ((Nothing, tm) :: more) <|> do symbol "(" @@ -1159,7 +1167,7 @@ constraints fname indents symbol ":" tm <- expr pdef fname indents symbol ")" - symbol "=>" + doubleRightArrow more <- constraints fname indents pure ((Just n, tm) :: more) <|> pure [] @@ -1173,7 +1181,7 @@ implBinds fname indents symbol ":" tm <- expr pdef fname indents symbol "}" - symbol "->" + rightArrow more <- implBinds fname indents pure ((n, rig, tm) :: more) <|> pure [] diff --git a/src/Parser/Lexer.idr b/src/Parser/Lexer.idr index 48dbfb434..5700ca9ee 100644 --- a/src/Parser/Lexer.idr +++ b/src/Parser/Lexer.idr @@ -40,6 +40,10 @@ export Show (TokenData Token) where show t = show (line t, col t, tok t) +-- +-- Comment Lexers +-- + comment : Lexer comment = is '-' <+> is '-' <+> many (isNot '\n') @@ -61,6 +65,35 @@ blockComment = is '{' <+> is '-' <+> toEndComment 1 docComment : Lexer docComment = is '|' <+> is '|' <+> is '|' <+> many (isNot '\n') +-- +-- Symbols +-- + +export +isOpChar : Char -> Bool +isOpChar c = inLatin || inArrows || inMathematicalOperators + where + inRange : (Int, Int) -> Lazy Bool + inRange (lowerBound, upperBound) = (c >= chr lowerBound && c <= chr upperBound) + inLatin = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~") + inArrows = inRange (8592, 8703) + inMathematicalOperators = inRange (8704, 8959) + +validSymbol : Lexer +validSymbol = some (pred isOpChar) + +||| Special symbols - things which can't be a prefix of another symbol, and +||| don't match 'validSymbol' +export +symbols : List String +symbols + = [".(", -- for things such as Foo.Bar.(+) + "@{", + "[|", "|]", + "(", ")", "{", "}", "[", "]", ",", ";", "_", + "`(", "`"] + +-- -- Identifier Lexer -- -- There are two variants, a strict ident and a relaxed ident. @@ -68,14 +101,14 @@ docComment = is '|' <+> is '|' <+> is '|' <+> many (isNot '\n') startIdent : Char -> Bool startIdent '_' = True -startIdent x = isAlpha x || x > chr 127 +startIdent c = isAlpha c || ((c > chr 127) && not (isOpChar c)) %inline validIdent' : Bool -> Char -> Bool validIdent' _ '_' = True validIdent' r '-' = r validIdent' _ '\'' = True -validIdent' _ x = isAlphaNum x || x > chr 127 +validIdent' _ c = isAlphaNum c || ((c > chr 127) && not (isOpChar c)) %inline ident' : Bool -> Lexer @@ -95,6 +128,10 @@ identRelaxed = ident' True holeIdent : Lexer holeIdent = is '?' <+> identStrict +-- +-- Literal Lexers +-- + doubleLit : Lexer doubleLit = digits <+> is '.' <+> digits <+> opt @@ -129,32 +166,13 @@ keywords = ["data", "module", "where", "let", "in", "do", "record", special : List String special = ["%lam", "%pi", "%imppi", "%let"] --- Special symbols - things which can't be a prefix of another symbol, and --- don't match 'validSymbol' -export -symbols : List String -symbols - = [".(", -- for things such as Foo.Bar.(+) - "@{", - "[|", "|]", - "(", ")", "{", "}", "[", "]", ",", ";", "_", - "`(", "`"] - - -export -isOpChar : Char -> Bool -isOpChar c = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~") - -validSymbol : Lexer -validSymbol = some (pred isOpChar) - -- Valid symbols which have a special meaning so can't be operators export reservedSymbols : List String reservedSymbols = symbols ++ - ["%", "\\", ":", "=", "|", "|||", "<-", "->", "=>", "?", "!", - "&", "**", ".."] + ["%", "\\", ":", "=", "|", "|||", "?", "!", "&", "**", + "..", "<-", "->", "=>", "←", "→", "⇒"] fromHexLit : String -> Integer fromHexLit str diff --git a/tests/Main.idr b/tests/Main.idr index 3f76cc352..a5ba1795c 100644 --- a/tests/Main.idr +++ b/tests/Main.idr @@ -51,6 +51,8 @@ idrisTests "literate001", "literate002", "literate003", "literate004", "literate005", "literate006", "literate007", "literate008", "literate009", + -- Unicode + "unicode001", -- Interfaces "interface001", "interface002", "interface003", "interface004", "interface005", "interface006", "interface007", "interface008", diff --git a/tests/idris2/unicode001/Unicode.idr b/tests/idris2/unicode001/Unicode.idr new file mode 100644 index 000000000..caea1bd70 --- /dev/null +++ b/tests/idris2/unicode001/Unicode.idr @@ -0,0 +1,9 @@ +module Unicode + +infix 6 ≡, ≢ + +public export +interface UnicodeEq a where + (≡) : a → a → Bool + (≢) : a → a → Bool + diff --git a/tests/idris2/unicode001/expected b/tests/idris2/unicode001/expected new file mode 100644 index 000000000..0143f0f45 --- /dev/null +++ b/tests/idris2/unicode001/expected @@ -0,0 +1 @@ +1/1: Building Unicode (Unicode.idr) diff --git a/tests/idris2/unicode001/run b/tests/idris2/unicode001/run new file mode 100644 index 000000000..a6a0124a8 --- /dev/null +++ b/tests/idris2/unicode001/run @@ -0,0 +1,3 @@ +$1 -c Unicode.idr + +rm -rf build/