From 0e83d6c7c6ad8b3b98758d6b5ab875121992c44c Mon Sep 17 00:00:00 2001 From: "Boyd Stephen Smith Jr." Date: Tue, 24 Sep 2024 10:26:30 -0500 Subject: [PATCH] Handle multiline comments in Package (ipkg) (#3386) --- CHANGELOG_NEXT.md | 2 + CONTRIBUTORS | 11 ++-- src/Parser/Lexer/Common.idr | 50 +++++++++++++++++++ src/Parser/Lexer/Package.idr | 1 + src/Parser/Lexer/Source.idr | 49 ------------------ tests/idris2/pkg/multiline-comments/Main.idr | 4 ++ tests/idris2/pkg/multiline-comments/expected | 2 + tests/idris2/pkg/multiline-comments/run | 4 ++ tests/idris2/pkg/multiline-comments/test.ipkg | 12 +++++ 9 files changed, 81 insertions(+), 54 deletions(-) create mode 100644 tests/idris2/pkg/multiline-comments/Main.idr create mode 100644 tests/idris2/pkg/multiline-comments/expected create mode 100755 tests/idris2/pkg/multiline-comments/run create mode 100644 tests/idris2/pkg/multiline-comments/test.ipkg diff --git a/CHANGELOG_NEXT.md b/CHANGELOG_NEXT.md index 440660c3f6..2cb67aff00 100644 --- a/CHANGELOG_NEXT.md +++ b/CHANGELOG_NEXT.md @@ -49,6 +49,8 @@ This CHANGELOG describes the merged but unreleased changes. Please see [CHANGELO * Support for macOS PowerPC added. +* Multiline comments `{- text -}` are now supported in ipkg files. + ### Language changes * Autobind and Typebind modifier on operators allow the user to diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 3648ef9be7..5e36d08394 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -15,9 +15,11 @@ André Videla Andy Lok Anthony Lodi Arnaud Bailly +Boyd Stephen Smith Jr. Brian Wignall Bryn Keller Christian Rasmussen +Claudio Etterli Cyrill Brunner Danylo Lapirov David Smith @@ -41,11 +43,11 @@ Johann Rudloff Kamil Shakirov Kevin Boulain LuoChen -Marc Petit-Huguenin MarcelineVQ +Marc Petit-Huguenin Marshall Bowers -Matthew Mosior Mathew Polzin +Matthew Mosior Matthew Wilson Matus Tejiscak Michael Messer @@ -58,9 +60,9 @@ Nicolas Biri Niklas Larsson Ohad Kammar Peter Hajdu +rhiannon morris Ricardo Prado Cunha Robert Walter -rhiannon morris Rodrigo Oliveira Rohit Grover Rui Barreiro @@ -74,14 +76,13 @@ then0rTh Theo Butler Thomas Dziedzic Thomas E. Hansen -Tim Süberkrüb Timmy Jose +Tim Süberkrüb Tom Harley troiganto Wen Kokke Wind Wong Zoe Stafford -Claudio Etterli Apologies to anyone we've missed - let us know and we'll correct it (or just send a PR with the correction). diff --git a/src/Parser/Lexer/Common.idr b/src/Parser/Lexer/Common.idr index 2523f3eea7..d8fef64ad5 100644 --- a/src/Parser/Lexer/Common.idr +++ b/src/Parser/Lexer/Common.idr @@ -14,6 +14,56 @@ comment <+> many (is '-') <+> reject (is '}') -- not a closing delimiter <+> many (isNot '\n') -- till the end of line +mutual + ||| The mutually defined functions represent different states in a + ||| small automaton. + ||| `toEndComment` is the default state and it will munch through + ||| the input until we detect a special character (a dash, an + ||| opening brace, or a double quote) and then switch to the + ||| appropriate state. + toEndComment : (k : Nat) -> Recognise (k /= 0) + toEndComment Z = empty + toEndComment (S k) + = some (pred (\c => c /= '-' && c /= '{' && c /= '"' && c /= '\'')) + <+> (eof <|> toEndComment (S k)) + <|> is '{' <+> singleBrace k + <|> is '-' <+> singleDash k + <|> (charLit <|> pred (== '\'')) <+> toEndComment (S k) + <|> stringLit <+> toEndComment (S k) + + ||| After reading a single brace, we may either finish reading an + ||| opening delimiter or ignore it (e.g. it could be an implicit + ||| binder). + singleBrace : (k : Nat) -> Lexer + singleBrace k + = is '-' <+> many (is '-') -- opening delimiter + <+> (eof <|> singleDash (S k)) -- `singleDash` handles the {----} special case + -- `eof` handles the file ending with {--- + <|> toEndComment (S k) -- not a valid comment + + ||| After reading a single dash, we may either find another one, + ||| meaning we may have started reading a line comment, or find + ||| a closing brace meaning we have found a closing delimiter. + singleDash : (k : Nat) -> Lexer + singleDash k + = is '-' <+> doubleDash k -- comment or closing delimiter + <|> is '}' <+> toEndComment k -- closing delimiter + <|> toEndComment (S k) -- not a valid comment + + ||| After reading a double dash, we are potentially reading a line + ||| comment unless the series of uninterrupted dashes is ended with + ||| a closing brace in which case it is a closing delimiter. + doubleDash : (k : Nat) -> Lexer + doubleDash k = with Prelude.(::) + many (is '-') <+> choice -- absorb all dashes + [ is '}' <+> toEndComment k -- closing delimiter + , many (isNot '\n') <+> toEndComment (S k) -- line comment + ] + +export +blockComment : Lexer +blockComment = is '{' <+> is '-' <+> many (is '-') <+> (eof <|> toEndComment 1) + -- Identifier Lexer -- There are multiple variants. diff --git a/src/Parser/Lexer/Package.idr b/src/Parser/Lexer/Package.idr index bf83360e08..6b89d48150 100644 --- a/src/Parser/Lexer/Package.idr +++ b/src/Parser/Lexer/Package.idr @@ -99,6 +99,7 @@ andop = is '&' <+> is '&' rawTokens : TokenMap Token rawTokens = [ (comment, Comment . drop 2) + , (blockComment, Comment . shrink 2) , (namespacedIdent, uncurry DotSepIdent . mkNamespacedIdent) , (identAllowDashes, DotSepIdent Nothing) , (separator, const Separator) diff --git a/src/Parser/Lexer/Source.idr b/src/Parser/Lexer/Source.idr index b3acb22f04..595c539a4a 100644 --- a/src/Parser/Lexer/Source.idr +++ b/src/Parser/Lexer/Source.idr @@ -140,55 +140,6 @@ Pretty Void Token where pretty (MagicDebugInfo di) = pretty (show di) pretty (Unrecognised x) = pretty "Unrecognised" <++> pretty x -mutual - ||| The mutually defined functions represent different states in a - ||| small automaton. - ||| `toEndComment` is the default state and it will munch through - ||| the input until we detect a special character (a dash, an - ||| opening brace, or a double quote) and then switch to the - ||| appropriate state. - toEndComment : (k : Nat) -> Recognise (k /= 0) - toEndComment Z = empty - toEndComment (S k) - = some (pred (\c => c /= '-' && c /= '{' && c /= '"' && c /= '\'')) - <+> (eof <|> toEndComment (S k)) - <|> is '{' <+> singleBrace k - <|> is '-' <+> singleDash k - <|> (charLit <|> pred (== '\'')) <+> toEndComment (S k) - <|> stringLit <+> toEndComment (S k) - - ||| After reading a single brace, we may either finish reading an - ||| opening delimiter or ignore it (e.g. it could be an implicit - ||| binder). - singleBrace : (k : Nat) -> Lexer - singleBrace k - = is '-' <+> many (is '-') -- opening delimiter - <+> (eof <|> singleDash (S k)) -- `singleDash` handles the {----} special case - -- `eof` handles the file ending with {--- - <|> toEndComment (S k) -- not a valid comment - - ||| After reading a single dash, we may either find another one, - ||| meaning we may have started reading a line comment, or find - ||| a closing brace meaning we have found a closing delimiter. - singleDash : (k : Nat) -> Lexer - singleDash k - = is '-' <+> doubleDash k -- comment or closing delimiter - <|> is '}' <+> toEndComment k -- closing delimiter - <|> toEndComment (S k) -- not a valid comment - - ||| After reading a double dash, we are potentially reading a line - ||| comment unless the series of uninterrupted dashes is ended with - ||| a closing brace in which case it is a closing delimiter. - doubleDash : (k : Nat) -> Lexer - doubleDash k = with Prelude.(::) - many (is '-') <+> choice -- absorb all dashes - [ is '}' <+> toEndComment k -- closing delimiter - , many (isNot '\n') <+> toEndComment (S k) -- line comment - ] - -blockComment : Lexer -blockComment = is '{' <+> is '-' <+> many (is '-') <+> (eof <|> toEndComment 1) - docComment : Lexer docComment = is '|' <+> is '|' <+> is '|' <+> many (isNot '\n') diff --git a/tests/idris2/pkg/multiline-comments/Main.idr b/tests/idris2/pkg/multiline-comments/Main.idr new file mode 100644 index 0000000000..4bd334760b --- /dev/null +++ b/tests/idris2/pkg/multiline-comments/Main.idr @@ -0,0 +1,4 @@ +module Main + +main : IO () +main = putStrLn "CouCou!" diff --git a/tests/idris2/pkg/multiline-comments/expected b/tests/idris2/pkg/multiline-comments/expected new file mode 100644 index 0000000000..803d4e86e1 --- /dev/null +++ b/tests/idris2/pkg/multiline-comments/expected @@ -0,0 +1,2 @@ +1/1: Building Main (Main.idr) +Now compiling the executable: test diff --git a/tests/idris2/pkg/multiline-comments/run b/tests/idris2/pkg/multiline-comments/run new file mode 100755 index 0000000000..0e6359d6c0 --- /dev/null +++ b/tests/idris2/pkg/multiline-comments/run @@ -0,0 +1,4 @@ +. ../../../testutils.sh + +# Previously reported: Uncaught error: Error: Can't recognise token. +idris2 --build test.ipkg diff --git a/tests/idris2/pkg/multiline-comments/test.ipkg b/tests/idris2/pkg/multiline-comments/test.ipkg new file mode 100644 index 0000000000..c59e007c20 --- /dev/null +++ b/tests/idris2/pkg/multiline-comments/test.ipkg @@ -0,0 +1,12 @@ +package test +{- +A test of +multiline +comments. +-} +version = 0.1.0 +authors = "Boyd Stephen Smith Jr." +main = Main +executable = "test" +{- "." is the top-level package directory -} +sourcedir = "."