Skip to content

Commit

Permalink
Add unicode operator support for Mathematical Operators and Arrows
Browse files Browse the repository at this point in the history
  • Loading branch information
fabianhjr committed Apr 16, 2020
1 parent 05298b4 commit c938889
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 37 deletions.
36 changes: 22 additions & 14 deletions src/Idris/Parser.idr
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,15 @@ export
plhs : ParseOpts
plhs = MkParseOpts False False

leftArrow : Rule ()
leftArrow = symbol "<-" <|> symbol ""

rightArrow : Rule ()
rightArrow = symbol "->" <|> symbol ""

doubleRightArrow : Rule ()
doubleRightArrow = symbol "=>" <|> symbol ""

atom : FileName -> Rule PTerm
atom fname
= do start <- location
Expand Down Expand Up @@ -418,12 +427,11 @@ mutual

bindSymbol : Rule (PiInfo PTerm)
bindSymbol
= do symbol "->"
= do rightArrow
pure Explicit
<|> do symbol "=>"
<|> do doubleRightArrow
pure AutoImplicit


explicitPi : FileName -> IndentInfo -> Rule PTerm
explicitPi fname indents
= do start <- location
Expand All @@ -443,7 +451,7 @@ mutual
commit
binders <- pibindList fname start indents
symbol "}"
symbol "->"
rightArrow
scope <- typeExpr pdef fname indents
end <- location
pure (pibindAll (MkFC fname start end) AutoImplicit binders scope)
Expand All @@ -457,7 +465,7 @@ mutual
t <- simpleExpr fname indents
binders <- pibindList fname start indents
symbol "}"
symbol "->"
rightArrow
scope <- typeExpr pdef fname indents
end <- location
pure (pibindAll (MkFC fname start end) (DefImplicit t) binders scope)
Expand All @@ -483,7 +491,7 @@ mutual
symbol "{"
binders <- pibindList fname start indents
symbol "}"
symbol "->"
rightArrow
scope <- typeExpr pdef fname indents
end <- location
pure (pibindAll (MkFC fname start end) Implicit binders scope)
Expand All @@ -493,7 +501,7 @@ mutual
= do start <- location
symbol "\\"
binders <- bindList fname start indents
symbol "=>"
doubleRightArrow
mustContinue indents Nothing
scope <- expr pdef fname indents
end <- location
Expand Down Expand Up @@ -596,7 +604,7 @@ mutual

caseRHS : FileName -> FilePos -> IndentInfo -> PTerm -> Rule PClause
caseRHS fname start indents lhs
= do symbol "=>"
= do doubleRightArrow
mustContinue indents Nothing
rhs <- expr pdef fname indents
atEnd indents
Expand Down Expand Up @@ -633,7 +641,7 @@ mutual

field : FileName -> IndentInfo -> Rule PFieldUpdate
field fname indents
= do path <- sepBy1 (symbol "->") unqualifiedName
= do path <- sepBy1 rightArrow unqualifiedName
upd <- (do symbol "="; pure PSetField)
<|>
(do symbol "$="; pure PSetFieldApp)
Expand Down Expand Up @@ -675,7 +683,7 @@ mutual
-- If the name doesn't begin with a lower case letter, we should
-- treat this as a pattern, so fail
validPatternVar n
symbol "<-"
leftArrow
val <- expr pdef fname indents
atEnd indents
end <- location
Expand All @@ -701,7 +709,7 @@ mutual
(do atEnd indents
end <- location
pure [DoExp (MkFC fname start end) e])
<|> (do symbol "<-"
<|> (do leftArrow
val <- expr pnowith fname indents
alts <- block (patAlt fname)
atEnd indents
Expand Down Expand Up @@ -1151,15 +1159,15 @@ getRight (Right v) = Just v
constraints : FileName -> IndentInfo -> EmptyRule (List (Maybe Name, PTerm))
constraints fname indents
= do tm <- appExpr pdef fname indents
symbol "=>"
doubleRightArrow
more <- constraints fname indents
pure ((Nothing, tm) :: more)
<|> do symbol "("
n <- name
symbol ":"
tm <- expr pdef fname indents
symbol ")"
symbol "=>"
doubleRightArrow
more <- constraints fname indents
pure ((Just n, tm) :: more)
<|> pure []
Expand All @@ -1173,7 +1181,7 @@ implBinds fname indents
symbol ":"
tm <- expr pdef fname indents
symbol "}"
symbol "->"
rightArrow
more <- implBinds fname indents
pure ((n, rig, tm) :: more)
<|> pure []
Expand Down
64 changes: 41 additions & 23 deletions src/Parser/Lexer.idr
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ export
Show (TokenData Token) where
show t = show (line t, col t, tok t)

--
-- Comment Lexers
--

comment : Lexer
comment = is '-' <+> is '-' <+> many (isNot '\n')

Expand All @@ -61,21 +65,50 @@ blockComment = is '{' <+> is '-' <+> toEndComment 1
docComment : Lexer
docComment = is '|' <+> is '|' <+> is '|' <+> many (isNot '\n')

--
-- Symbols
--

export
isOpChar : Char -> Bool
isOpChar c = inLatin || inArrows || inMathematicalOperators
where
inRange : (Int, Int) -> Lazy Bool
inRange (lowerBound, upperBound) = (c >= chr lowerBound && c <= chr upperBound)
inLatin = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~")
inArrows = inRange (8592, 8703)
inMathematicalOperators = inRange (8704, 8959)

validSymbol : Lexer
validSymbol = some (pred isOpChar)

||| Special symbols - things which can't be a prefix of another symbol, and
||| don't match 'validSymbol'
export
symbols : List String
symbols
= [".(", -- for things such as Foo.Bar.(+)
"@{",
"[|", "|]",
"(", ")", "{", "}", "[", "]", ",", ";", "_",
"`(", "`"]

--
-- Identifier Lexer
--
-- There are two variants, a strict ident and a relaxed ident.
-- Prime definitions recieve a boolean determining if it is relaxed.

startIdent : Char -> Bool
startIdent '_' = True
startIdent x = isAlpha x || x > chr 127
startIdent c = isAlpha c || ((c > chr 127) && not (isOpChar c))

%inline
validIdent' : Bool -> Char -> Bool
validIdent' _ '_' = True
validIdent' r '-' = r
validIdent' _ '\'' = True
validIdent' _ x = isAlphaNum x || x > chr 127
validIdent' _ c = isAlphaNum c || ((c > chr 127) && not (isOpChar c))

%inline
ident' : Bool -> Lexer
Expand All @@ -95,6 +128,10 @@ identRelaxed = ident' True
holeIdent : Lexer
holeIdent = is '?' <+> identStrict

--
-- Literal Lexers
--

doubleLit : Lexer
doubleLit
= digits <+> is '.' <+> digits <+> opt
Expand Down Expand Up @@ -129,32 +166,13 @@ keywords = ["data", "module", "where", "let", "in", "do", "record",
special : List String
special = ["%lam", "%pi", "%imppi", "%let"]

-- Special symbols - things which can't be a prefix of another symbol, and
-- don't match 'validSymbol'
export
symbols : List String
symbols
= [".(", -- for things such as Foo.Bar.(+)
"@{",
"[|", "|]",
"(", ")", "{", "}", "[", "]", ",", ";", "_",
"`(", "`"]


export
isOpChar : Char -> Bool
isOpChar c = c `elem` (unpack ":!#$%&*+./<=>?@\\^|-~")

validSymbol : Lexer
validSymbol = some (pred isOpChar)

-- Valid symbols which have a special meaning so can't be operators
export
reservedSymbols : List String
reservedSymbols
= symbols ++
["%", "\\", ":", "=", "|", "|||", "<-", "->", "=>", "?", "!",
"&", "**", ".."]
["%", "\\", ":", "=", "|", "|||", "?", "!", "&", "**",
"..", "<-", "->", "=>", "", "", ""]

fromHexLit : String -> Integer
fromHexLit str
Expand Down
2 changes: 2 additions & 0 deletions tests/Main.idr
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ idrisTests
"literate001", "literate002", "literate003", "literate004",
"literate005", "literate006", "literate007", "literate008",
"literate009",
-- Unicode
"unicode001",
-- Interfaces
"interface001", "interface002", "interface003", "interface004",
"interface005", "interface006", "interface007", "interface008",
Expand Down
9 changes: 9 additions & 0 deletions tests/idris2/unicode001/Unicode.idr
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
module Unicode

infix 6 ≡, ≢

public export
interface UnicodeEq a where
(≡) : a → a → Bool
(≢) : a → a → Bool

1 change: 1 addition & 0 deletions tests/idris2/unicode001/expected
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1/1: Building Unicode (Unicode.idr)
3 changes: 3 additions & 0 deletions tests/idris2/unicode001/run
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
$1 -c Unicode.idr

rm -rf build/

0 comments on commit c938889

Please sign in to comment.