From 9a3f5af3ef9c65612e617b25b3f22fba7f9c6b0f Mon Sep 17 00:00:00 2001 From: Maxim Cournoyer Date: Mon, 4 Dec 2023 22:44:07 -0500 Subject: [PATCH 1/2] Allow specifying a regexp for matching a single line comment. It allows specifying a new SINGLE_LINE_REGEXP property on a CommentStyle subclass, which provides more flexibility in parsing single line comments. Fixes: Signed-off-by: Carmen Bianca BAKKER --- CHANGELOG.md | 5 ++ src/reuse/comment.py | 55 +++++++++++++---- tests/test_comment.py | 22 +++++++ tests/test_main_annotate.py | 115 ++++++++++++++++++++++++++++++++++++ 4 files changed, 184 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a08a49e..46fedbf8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -100,6 +100,11 @@ CLI command and its behaviour. There are no guarantees of stability for the - Fix issue in `annotate` where `--single-line` and `--multi-line` would not correctly raise an error with an incompatible comment style. (#853) - Fix parsing existing copyright lines when they do not have a year (#861) +- Better handling of Lisp comment styles. Now, any number of ";" characters is + recognised as the prefix to a Lisp comment, and ";;;" is used when inserting + comment headers, as per + . + (#874) ### Security diff --git a/src/reuse/comment.py b/src/reuse/comment.py index 5463b934..371fbbb8 100644 --- a/src/reuse/comment.py +++ b/src/reuse/comment.py @@ -15,6 +15,7 @@ # SPDX-FileCopyrightText: 2023 Mathias Dannesbo # SPDX-FileCopyrightText: 2023 Shun Sakai # SPDX-FileCopyrightText: 2023 Juelich Supercomputing Centre, Forschungszentrum Juelich GmbH +# SPDX-FileCopyrightText: 2023 Maxim Cournoyer # # SPDX-License-Identifier: GPL-3.0-or-later @@ -24,8 +25,9 @@ import logging import operator +import re from textwrap import dedent -from typing import List, NamedTuple, Type +from typing import List, NamedTuple, Optional, Type _LOGGER = logging.getLogger(__name__) @@ -53,6 +55,7 @@ class CommentStyle: SHORTHAND = "" SINGLE_LINE = "" + SINGLE_LINE_REGEXP: Optional[re.Pattern] = None INDENT_AFTER_SINGLE = "" # (start, middle, end) # e.g., ("/*", "*", "*/") @@ -157,12 +160,20 @@ def _parse_comment_single(cls, text: str) -> str: result_lines = [] for line in text.splitlines(): + # TODO: When Python 3.8 is dropped, consider using str.removeprefix + if cls.SINGLE_LINE_REGEXP: + if match := cls.SINGLE_LINE_REGEXP.match(line): + line = line.lstrip(match.group(0)) + result_lines.append(line) + continue + if not line.startswith(cls.SINGLE_LINE): raise CommentParseError( f"'{line}' does not start with a comment marker" ) line = line.lstrip(cls.SINGLE_LINE) result_lines.append(line) + result = "\n".join(result_lines) return dedent(result) @@ -246,23 +257,31 @@ def comment_at_first_character(cls, text: str) -> str: raise CommentParseError(f"{cls} cannot parse comments") lines = text.splitlines() + end: Optional[int] = None - if cls.can_handle_single() and text.startswith(cls.SINGLE_LINE): - end = 0 + if cls.can_handle_single(): for i, line in enumerate(lines): - if not line.startswith(cls.SINGLE_LINE): + if ( + cls.SINGLE_LINE_REGEXP + and cls.SINGLE_LINE_REGEXP.match(line) + ) or line.startswith(cls.SINGLE_LINE): + end = i + else: break - end = i - return "\n".join(lines[0 : end + 1]) - if cls.can_handle_multi() and text.startswith(cls.MULTI_LINE.start): - end = 0 + if ( + end is None + and cls.can_handle_multi() + and text.startswith(cls.MULTI_LINE.start) + ): for i, line in enumerate(lines): end = i if line.endswith(cls.MULTI_LINE.end): break else: raise CommentParseError("Comment block never delimits") - return "\n".join(lines[0 : end + 1]) + + if end is not None: + return "\n".join(lines[: end + 1]) raise CommentParseError( "Could not find a parseable comment block at the first character" @@ -424,7 +443,8 @@ class LispCommentStyle(CommentStyle): SHORTHAND = "lisp" - SINGLE_LINE = ";" + SINGLE_LINE = ";;;" + SINGLE_LINE_REGEXP = re.compile(r"^;+\s*") INDENT_AFTER_SINGLE = " " @@ -480,6 +500,15 @@ class ReStructedTextCommentStyle(CommentStyle): INDENT_AFTER_SINGLE = " " +class SemicolonCommentStyle(CommentStyle): + """Semicolon comment style.""" + + SHORTHAND = "semicolon" + + SINGLE_LINE = ";" + INDENT_AFTER_SINGLE = " " + + class TexCommentStyle(CommentStyle): """TeX comment style.""" @@ -539,8 +568,8 @@ class CSingleCommentStyle(CommentStyle): ".adoc": CCommentStyle, ".ads": HaskellCommentStyle, ".aes": UncommentableCommentStyle, - ".ahk": LispCommentStyle, - ".ahkl": LispCommentStyle, + ".ahk": SemicolonCommentStyle, + ".ahkl": SemicolonCommentStyle, ".aidl": CCommentStyle, ".applescript": AppleScriptCommentStyle, ".arb": UncommentableCommentStyle, @@ -619,7 +648,7 @@ class CSingleCommentStyle(CommentStyle): ".html": HtmlCommentStyle, ".hx": CCommentStyle, ".hxsl": CCommentStyle, - ".ini": LispCommentStyle, + ".ini": SemicolonCommentStyle, ".ino": CCommentStyle, ".ipynb": UncommentableCommentStyle, ".iuml": PlantUmlCommentStyle, diff --git a/tests/test_comment.py b/tests/test_comment.py index e1a8ac06..72ea2d62 100644 --- a/tests/test_comment.py +++ b/tests/test_comment.py @@ -1,4 +1,5 @@ # SPDX-FileCopyrightText: 2019 Free Software Foundation Europe e.V. +# SPDX-FileCopyrightText: 2023 Maxim Cournoyer # # SPDX-License-Identifier: GPL-3.0-or-later @@ -17,6 +18,7 @@ CommentParseError, CommentStyle, HtmlCommentStyle, + LispCommentStyle, PythonCommentStyle, _all_style_classes, ) @@ -667,3 +669,23 @@ def test_comment_at_first_character_c_multi_never_ends(): with pytest.raises(CommentParseError): CCommentStyle.comment_at_first_character(text) + + +def test_parse_comment_lisp(): + """Parse a simple Lisp comment.""" + text = cleandoc( + """ + ;; Hello + ;; + ;; world + """ + ) + expected = cleandoc( + """ + Hello + + world + """ + ) + + assert LispCommentStyle.parse_comment(text) == expected diff --git a/tests/test_main_annotate.py b/tests/test_main_annotate.py index 9a1deb54..781ecc8f 100644 --- a/tests/test_main_annotate.py +++ b/tests/test_main_annotate.py @@ -3,6 +3,7 @@ # SPDX-FileCopyrightText: © 2020 Liferay, Inc. # SPDX-FileCopyrightText: 2022 Florian Snow # SPDX-FileCopyrightText: 2022 Carmen Bianca Bakker +# SPDX-FileCopyrightText: 2023 Maxim Cournoyer # # SPDX-License-Identifier: GPL-3.0-or-later @@ -52,6 +53,120 @@ def test_annotate_simple(fake_repository, stringio, mock_date_today): assert simple_file.read_text() == expected +def test_annotate_simple_scheme(fake_repository, stringio, mock_date_today): + "Add a header to a Scheme file." + simple_file = fake_repository / "foo.scm" + simple_file.write_text("#t") + expected = cleandoc( + """ + ;;; SPDX-FileCopyrightText: 2018 Jane Doe + ;;; + ;;; SPDX-License-Identifier: GPL-3.0-or-later + + #t + """ + ) + + result = main( + [ + "annotate", + "--license", + "GPL-3.0-or-later", + "--copyright", + "Jane Doe", + "foo.scm", + ], + out=stringio, + ) + + assert result == 0 + assert simple_file.read_text() == expected + + +def test_annotate_scheme_standardised( + fake_repository, stringio, mock_date_today +): + """The comment block is rewritten/standardised.""" + simple_file = fake_repository / "foo.scm" + simple_file.write_text( + cleandoc( + """ + ; SPDX-FileCopyrightText: 2018 Jane Doe + ; + ; SPDX-License-Identifier: GPL-3.0-or-later + + #t + """ + ) + ) + expected = cleandoc( + """ + ;;; SPDX-FileCopyrightText: 2018 Jane Doe + ;;; + ;;; SPDX-License-Identifier: GPL-3.0-or-later + + #t + """ + ) + + result = main( + [ + "annotate", + "--license", + "GPL-3.0-or-later", + "--copyright", + "Jane Doe", + "foo.scm", + ], + out=stringio, + ) + + assert result == 0 + assert simple_file.read_text() == expected + + +def test_annotate_scheme_standardised2( + fake_repository, stringio, mock_date_today +): + """The comment block is rewritten/standardised.""" + simple_file = fake_repository / "foo.scm" + simple_file.write_text( + cleandoc( + """ + ;; SPDX-FileCopyrightText: 2018 Jane Doe + ;; + ;; SPDX-License-Identifier: GPL-3.0-or-later + + #t + """ + ) + ) + expected = cleandoc( + """ + ;;; SPDX-FileCopyrightText: 2018 Jane Doe + ;;; + ;;; SPDX-License-Identifier: GPL-3.0-or-later + + #t + """ + ) + + result = main( + [ + "annotate", + "--license", + "GPL-3.0-or-later", + "--copyright", + "Jane Doe", + "foo.scm", + ], + out=stringio, + ) + + assert result == 0 + assert simple_file.read_text() == expected + + def test_annotate_simple_no_replace(fake_repository, stringio, mock_date_today): """Add a header to a file without replacing the existing header.""" simple_file = fake_repository / "foo.py" From 59d9c94c86e9202883a8f3acdd42a0402ebfd929 Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Tue, 16 Jan 2024 19:03:01 +0100 Subject: [PATCH 2/2] Move CSingleCommentStyle to alphabetic position Signed-off-by: Carmen Bianca BAKKER --- src/reuse/comment.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/reuse/comment.py b/src/reuse/comment.py index 371fbbb8..8b67bfb0 100644 --- a/src/reuse/comment.py +++ b/src/reuse/comment.py @@ -340,6 +340,15 @@ class CCommentStyle(CommentStyle): ] +class CSingleCommentStyle(CommentStyle): + """C single-only comment style.""" + + SHORTHAND = "csingle" + + SINGLE_LINE = "//" + INDENT_AFTER_SINGLE = " " + + class CssCommentStyle(CommentStyle): """CSS comment style.""" @@ -553,15 +562,6 @@ class XQueryCommentStyle(CommentStyle): INDENT_BEFORE_END = " " -class CSingleCommentStyle(CommentStyle): - """C single-only comment style.""" - - SHORTHAND = "csingle" - - SINGLE_LINE = "//" - INDENT_AFTER_SINGLE = " " - - #: A map of (common) file extensions against comment types. EXTENSION_COMMENT_STYLE_MAP = { ".adb": HaskellCommentStyle,