Merge pull request #874 from Apteryks/fix-lisp-comments

Allow specifying a regexp for matching a single line comment.
fsfe · Jan 16, 2024 · e632207 · e632207
2 parents e51e337 + 59d9c94
commit e632207
Show file tree

Hide file tree

Showing 4 changed files with 193 additions and 22 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -100,6 +100,11 @@ CLI command and its behaviour. There are no guarantees of stability for the
 - Fix issue in `annotate` where `--single-line` and `--multi-line` would not
   correctly raise an error with an incompatible comment style. (#853)
 - Fix parsing existing copyright lines when they do not have a year (#861)
+- Better handling of Lisp comment styles. Now, any number of ";" characters is
+  recognised as the prefix to a Lisp comment, and ";;;" is used when inserting
+  comment headers, as per
+  <https://www.gnu.org/software/emacs/manual/html_node/elisp/Comment-Tips.html>.
+  (#874)
 
 ### Security
 

diff --git a/src/reuse/comment.py b/src/reuse/comment.py
@@ -15,6 +15,7 @@
 # SPDX-FileCopyrightText: 2023 Mathias Dannesbo <[email protected]>
 # SPDX-FileCopyrightText: 2023 Shun Sakai <[email protected]>
 # SPDX-FileCopyrightText: 2023 Juelich Supercomputing Centre, Forschungszentrum Juelich GmbH
+# SPDX-FileCopyrightText: 2023 Maxim Cournoyer <[email protected]>
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
@@ -24,8 +25,9 @@
 
 import logging
 import operator
+import re
 from textwrap import dedent
-from typing import List, NamedTuple, Type
+from typing import List, NamedTuple, Optional, Type
 
 _LOGGER = logging.getLogger(__name__)
 
@@ -53,6 +55,7 @@ class CommentStyle:
 
     SHORTHAND = ""
     SINGLE_LINE = ""
+    SINGLE_LINE_REGEXP: Optional[re.Pattern] = None
     INDENT_AFTER_SINGLE = ""
     # (start, middle, end)
     # e.g., ("/*", "*", "*/")
@@ -157,12 +160,20 @@ def _parse_comment_single(cls, text: str) -> str:
         result_lines = []
 
         for line in text.splitlines():
+            # TODO: When Python 3.8 is dropped, consider using str.removeprefix
+            if cls.SINGLE_LINE_REGEXP:
+                if match := cls.SINGLE_LINE_REGEXP.match(line):
+                    line = line.lstrip(match.group(0))
+                    result_lines.append(line)
+                    continue
+
             if not line.startswith(cls.SINGLE_LINE):
                 raise CommentParseError(
                     f"'{line}' does not start with a comment marker"
                 )
             line = line.lstrip(cls.SINGLE_LINE)
             result_lines.append(line)
+
         result = "\n".join(result_lines)
         return dedent(result)
 
@@ -246,23 +257,31 @@ def comment_at_first_character(cls, text: str) -> str:
             raise CommentParseError(f"{cls} cannot parse comments")
 
         lines = text.splitlines()
+        end: Optional[int] = None
 
-        if cls.can_handle_single() and text.startswith(cls.SINGLE_LINE):
-            end = 0
+        if cls.can_handle_single():
             for i, line in enumerate(lines):
-                if not line.startswith(cls.SINGLE_LINE):
+                if (
+                    cls.SINGLE_LINE_REGEXP
+                    and cls.SINGLE_LINE_REGEXP.match(line)
+                ) or line.startswith(cls.SINGLE_LINE):
+                    end = i
+                else:
                     break
-                end = i
-            return "\n".join(lines[0 : end + 1])
-        if cls.can_handle_multi() and text.startswith(cls.MULTI_LINE.start):
-            end = 0
+        if (
+            end is None
+            and cls.can_handle_multi()
+            and text.startswith(cls.MULTI_LINE.start)
+        ):
             for i, line in enumerate(lines):
                 end = i
                 if line.endswith(cls.MULTI_LINE.end):
                     break
             else:
                 raise CommentParseError("Comment block never delimits")
-            return "\n".join(lines[0 : end + 1])
+
+        if end is not None:
+            return "\n".join(lines[: end + 1])
 
         raise CommentParseError(
             "Could not find a parseable comment block at the first character"
@@ -321,6 +340,15 @@ class CCommentStyle(CommentStyle):
     ]
 
 
+class CSingleCommentStyle(CommentStyle):
+    """C single-only comment style."""
+
+    SHORTHAND = "csingle"
+
+    SINGLE_LINE = "//"
+    INDENT_AFTER_SINGLE = " "
+
+
 class CssCommentStyle(CommentStyle):
     """CSS comment style."""
 
@@ -424,7 +452,8 @@ class LispCommentStyle(CommentStyle):
 
     SHORTHAND = "lisp"
 
-    SINGLE_LINE = ";"
+    SINGLE_LINE = ";;;"
+    SINGLE_LINE_REGEXP = re.compile(r"^;+\s*")
     INDENT_AFTER_SINGLE = " "
 
 
@@ -480,6 +509,15 @@ class ReStructedTextCommentStyle(CommentStyle):
     INDENT_AFTER_SINGLE = " "
 
 
+class SemicolonCommentStyle(CommentStyle):
+    """Semicolon comment style."""
+
+    SHORTHAND = "semicolon"
+
+    SINGLE_LINE = ";"
+    INDENT_AFTER_SINGLE = " "
+
+
 class TexCommentStyle(CommentStyle):
     """TeX comment style."""
 
@@ -524,23 +562,14 @@ class XQueryCommentStyle(CommentStyle):
     INDENT_BEFORE_END = " "
 
 
-class CSingleCommentStyle(CommentStyle):
-    """C single-only comment style."""
-
-    SHORTHAND = "csingle"
-
-    SINGLE_LINE = "//"
-    INDENT_AFTER_SINGLE = " "
-
-
 #: A map of (common) file extensions against comment types.
 EXTENSION_COMMENT_STYLE_MAP = {
     ".adb": HaskellCommentStyle,
     ".adoc": CCommentStyle,
     ".ads": HaskellCommentStyle,
     ".aes": UncommentableCommentStyle,
-    ".ahk": LispCommentStyle,
-    ".ahkl": LispCommentStyle,
+    ".ahk": SemicolonCommentStyle,
+    ".ahkl": SemicolonCommentStyle,
     ".aidl": CCommentStyle,
     ".applescript": AppleScriptCommentStyle,
     ".arb": UncommentableCommentStyle,
@@ -619,7 +648,7 @@ class CSingleCommentStyle(CommentStyle):
     ".html": HtmlCommentStyle,
     ".hx": CCommentStyle,
     ".hxsl": CCommentStyle,
-    ".ini": LispCommentStyle,
+    ".ini": SemicolonCommentStyle,
     ".ino": CCommentStyle,
     ".ipynb": UncommentableCommentStyle,
     ".iuml": PlantUmlCommentStyle,

diff --git a/tests/test_comment.py b/tests/test_comment.py
@@ -1,4 +1,5 @@
 # SPDX-FileCopyrightText: 2019 Free Software Foundation Europe e.V. <https://fsfe.org>
+# SPDX-FileCopyrightText: 2023 Maxim Cournoyer <[email protected]>
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
@@ -17,6 +18,7 @@
     CommentParseError,
     CommentStyle,
     HtmlCommentStyle,
+    LispCommentStyle,
     PythonCommentStyle,
     _all_style_classes,
 )
@@ -667,3 +669,23 @@ def test_comment_at_first_character_c_multi_never_ends():
 
     with pytest.raises(CommentParseError):
         CCommentStyle.comment_at_first_character(text)
+
+
+def test_parse_comment_lisp():
+    """Parse a simple Lisp comment."""
+    text = cleandoc(
+        """
+        ;; Hello
+        ;;
+        ;; world
+        """
+    )
+    expected = cleandoc(
+        """
+        Hello
+
+        world
+        """
+    )
+
+    assert LispCommentStyle.parse_comment(text) == expected
diff --git a/tests/test_main_annotate.py b/tests/test_main_annotate.py
@@ -3,6 +3,7 @@
 # SPDX-FileCopyrightText: © 2020 Liferay, Inc. <https://liferay.com>
 # SPDX-FileCopyrightText: 2022 Florian Snow <[email protected]>
 # SPDX-FileCopyrightText: 2022 Carmen Bianca Bakker <[email protected]>
+# SPDX-FileCopyrightText: 2023 Maxim Cournoyer <[email protected]>
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
@@ -52,6 +53,120 @@ def test_annotate_simple(fake_repository, stringio, mock_date_today):
     assert simple_file.read_text() == expected
 
 
+def test_annotate_simple_scheme(fake_repository, stringio, mock_date_today):
+    "Add a header to a Scheme file."
+    simple_file = fake_repository / "foo.scm"
+    simple_file.write_text("#t")
+    expected = cleandoc(
+        """
+        ;;; SPDX-FileCopyrightText: 2018 Jane Doe
+        ;;;
+        ;;; SPDX-License-Identifier: GPL-3.0-or-later
+
+        #t
+        """
+    )
+
+    result = main(
+        [
+            "annotate",
+            "--license",
+            "GPL-3.0-or-later",
+            "--copyright",
+            "Jane Doe",
+            "foo.scm",
+        ],
+        out=stringio,
+    )
+
+    assert result == 0
+    assert simple_file.read_text() == expected
+
+
+def test_annotate_scheme_standardised(
+    fake_repository, stringio, mock_date_today
+):
+    """The comment block is rewritten/standardised."""
+    simple_file = fake_repository / "foo.scm"
+    simple_file.write_text(
+        cleandoc(
+            """
+            ; SPDX-FileCopyrightText: 2018 Jane Doe
+            ;
+            ; SPDX-License-Identifier: GPL-3.0-or-later
+
+            #t
+            """
+        )
+    )
+    expected = cleandoc(
+        """
+        ;;; SPDX-FileCopyrightText: 2018 Jane Doe
+        ;;;
+        ;;; SPDX-License-Identifier: GPL-3.0-or-later
+
+        #t
+        """
+    )
+
+    result = main(
+        [
+            "annotate",
+            "--license",
+            "GPL-3.0-or-later",
+            "--copyright",
+            "Jane Doe",
+            "foo.scm",
+        ],
+        out=stringio,
+    )
+
+    assert result == 0
+    assert simple_file.read_text() == expected
+
+
+def test_annotate_scheme_standardised2(
+    fake_repository, stringio, mock_date_today
+):
+    """The comment block is rewritten/standardised."""
+    simple_file = fake_repository / "foo.scm"
+    simple_file.write_text(
+        cleandoc(
+            """
+            ;; SPDX-FileCopyrightText: 2018 Jane Doe
+            ;;
+            ;; SPDX-License-Identifier: GPL-3.0-or-later
+
+            #t
+            """
+        )
+    )
+    expected = cleandoc(
+        """
+        ;;; SPDX-FileCopyrightText: 2018 Jane Doe
+        ;;;
+        ;;; SPDX-License-Identifier: GPL-3.0-or-later
+
+        #t
+        """
+    )
+
+    result = main(
+        [
+            "annotate",
+            "--license",
+            "GPL-3.0-or-later",
+            "--copyright",
+            "Jane Doe",
+            "foo.scm",
+        ],
+        out=stringio,
+    )
+
+    assert result == 0
+    assert simple_file.read_text() == expected
+
+
 def test_annotate_simple_no_replace(fake_repository, stringio, mock_date_today):
     """Add a header to a file without replacing the existing header."""
     simple_file = fake_repository / "foo.py"