Skip to content

Commit

Permalink
Merge pull request #874 from Apteryks/fix-lisp-comments
Browse files Browse the repository at this point in the history
Allow specifying a regexp for matching a single line comment.
  • Loading branch information
carmenbianca authored Jan 16, 2024
2 parents e51e337 + 59d9c94 commit e632207
Show file tree
Hide file tree
Showing 4 changed files with 193 additions and 22 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,11 @@ CLI command and its behaviour. There are no guarantees of stability for the
- Fix issue in `annotate` where `--single-line` and `--multi-line` would not
correctly raise an error with an incompatible comment style. (#853)
- Fix parsing existing copyright lines when they do not have a year (#861)
- Better handling of Lisp comment styles. Now, any number of ";" characters is
recognised as the prefix to a Lisp comment, and ";;;" is used when inserting
comment headers, as per
<https://www.gnu.org/software/emacs/manual/html_node/elisp/Comment-Tips.html>.
(#874)

### Security

Expand Down
73 changes: 51 additions & 22 deletions src/reuse/comment.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# SPDX-FileCopyrightText: 2023 Mathias Dannesbo <[email protected]>
# SPDX-FileCopyrightText: 2023 Shun Sakai <[email protected]>
# SPDX-FileCopyrightText: 2023 Juelich Supercomputing Centre, Forschungszentrum Juelich GmbH
# SPDX-FileCopyrightText: 2023 Maxim Cournoyer <[email protected]>
#
# SPDX-License-Identifier: GPL-3.0-or-later

Expand All @@ -24,8 +25,9 @@

import logging
import operator
import re
from textwrap import dedent
from typing import List, NamedTuple, Type
from typing import List, NamedTuple, Optional, Type

_LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -53,6 +55,7 @@ class CommentStyle:

SHORTHAND = ""
SINGLE_LINE = ""
SINGLE_LINE_REGEXP: Optional[re.Pattern] = None
INDENT_AFTER_SINGLE = ""
# (start, middle, end)
# e.g., ("/*", "*", "*/")
Expand Down Expand Up @@ -157,12 +160,20 @@ def _parse_comment_single(cls, text: str) -> str:
result_lines = []

for line in text.splitlines():
# TODO: When Python 3.8 is dropped, consider using str.removeprefix
if cls.SINGLE_LINE_REGEXP:
if match := cls.SINGLE_LINE_REGEXP.match(line):
line = line.lstrip(match.group(0))
result_lines.append(line)
continue

if not line.startswith(cls.SINGLE_LINE):
raise CommentParseError(
f"'{line}' does not start with a comment marker"
)
line = line.lstrip(cls.SINGLE_LINE)
result_lines.append(line)

result = "\n".join(result_lines)
return dedent(result)

Expand Down Expand Up @@ -246,23 +257,31 @@ def comment_at_first_character(cls, text: str) -> str:
raise CommentParseError(f"{cls} cannot parse comments")

lines = text.splitlines()
end: Optional[int] = None

if cls.can_handle_single() and text.startswith(cls.SINGLE_LINE):
end = 0
if cls.can_handle_single():
for i, line in enumerate(lines):
if not line.startswith(cls.SINGLE_LINE):
if (
cls.SINGLE_LINE_REGEXP
and cls.SINGLE_LINE_REGEXP.match(line)
) or line.startswith(cls.SINGLE_LINE):
end = i
else:
break
end = i
return "\n".join(lines[0 : end + 1])
if cls.can_handle_multi() and text.startswith(cls.MULTI_LINE.start):
end = 0
if (
end is None
and cls.can_handle_multi()
and text.startswith(cls.MULTI_LINE.start)
):
for i, line in enumerate(lines):
end = i
if line.endswith(cls.MULTI_LINE.end):
break
else:
raise CommentParseError("Comment block never delimits")
return "\n".join(lines[0 : end + 1])

if end is not None:
return "\n".join(lines[: end + 1])

raise CommentParseError(
"Could not find a parseable comment block at the first character"
Expand Down Expand Up @@ -321,6 +340,15 @@ class CCommentStyle(CommentStyle):
]


class CSingleCommentStyle(CommentStyle):
"""C single-only comment style."""

SHORTHAND = "csingle"

SINGLE_LINE = "//"
INDENT_AFTER_SINGLE = " "


class CssCommentStyle(CommentStyle):
"""CSS comment style."""

Expand Down Expand Up @@ -424,7 +452,8 @@ class LispCommentStyle(CommentStyle):

SHORTHAND = "lisp"

SINGLE_LINE = ";"
SINGLE_LINE = ";;;"
SINGLE_LINE_REGEXP = re.compile(r"^;+\s*")
INDENT_AFTER_SINGLE = " "


Expand Down Expand Up @@ -480,6 +509,15 @@ class ReStructedTextCommentStyle(CommentStyle):
INDENT_AFTER_SINGLE = " "


class SemicolonCommentStyle(CommentStyle):
"""Semicolon comment style."""

SHORTHAND = "semicolon"

SINGLE_LINE = ";"
INDENT_AFTER_SINGLE = " "


class TexCommentStyle(CommentStyle):
"""TeX comment style."""

Expand Down Expand Up @@ -524,23 +562,14 @@ class XQueryCommentStyle(CommentStyle):
INDENT_BEFORE_END = " "


class CSingleCommentStyle(CommentStyle):
"""C single-only comment style."""

SHORTHAND = "csingle"

SINGLE_LINE = "//"
INDENT_AFTER_SINGLE = " "


#: A map of (common) file extensions against comment types.
EXTENSION_COMMENT_STYLE_MAP = {
".adb": HaskellCommentStyle,
".adoc": CCommentStyle,
".ads": HaskellCommentStyle,
".aes": UncommentableCommentStyle,
".ahk": LispCommentStyle,
".ahkl": LispCommentStyle,
".ahk": SemicolonCommentStyle,
".ahkl": SemicolonCommentStyle,
".aidl": CCommentStyle,
".applescript": AppleScriptCommentStyle,
".arb": UncommentableCommentStyle,
Expand Down Expand Up @@ -619,7 +648,7 @@ class CSingleCommentStyle(CommentStyle):
".html": HtmlCommentStyle,
".hx": CCommentStyle,
".hxsl": CCommentStyle,
".ini": LispCommentStyle,
".ini": SemicolonCommentStyle,
".ino": CCommentStyle,
".ipynb": UncommentableCommentStyle,
".iuml": PlantUmlCommentStyle,
Expand Down
22 changes: 22 additions & 0 deletions tests/test_comment.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# SPDX-FileCopyrightText: 2019 Free Software Foundation Europe e.V. <https://fsfe.org>
# SPDX-FileCopyrightText: 2023 Maxim Cournoyer <[email protected]>
#
# SPDX-License-Identifier: GPL-3.0-or-later

Expand All @@ -17,6 +18,7 @@
CommentParseError,
CommentStyle,
HtmlCommentStyle,
LispCommentStyle,
PythonCommentStyle,
_all_style_classes,
)
Expand Down Expand Up @@ -667,3 +669,23 @@ def test_comment_at_first_character_c_multi_never_ends():

with pytest.raises(CommentParseError):
CCommentStyle.comment_at_first_character(text)


def test_parse_comment_lisp():
"""Parse a simple Lisp comment."""
text = cleandoc(
"""
;; Hello
;;
;; world
"""
)
expected = cleandoc(
"""
Hello
world
"""
)

assert LispCommentStyle.parse_comment(text) == expected
115 changes: 115 additions & 0 deletions tests/test_main_annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# SPDX-FileCopyrightText: © 2020 Liferay, Inc. <https://liferay.com>
# SPDX-FileCopyrightText: 2022 Florian Snow <[email protected]>
# SPDX-FileCopyrightText: 2022 Carmen Bianca Bakker <[email protected]>
# SPDX-FileCopyrightText: 2023 Maxim Cournoyer <[email protected]>
#
# SPDX-License-Identifier: GPL-3.0-or-later

Expand Down Expand Up @@ -52,6 +53,120 @@ def test_annotate_simple(fake_repository, stringio, mock_date_today):
assert simple_file.read_text() == expected


def test_annotate_simple_scheme(fake_repository, stringio, mock_date_today):
"Add a header to a Scheme file."
simple_file = fake_repository / "foo.scm"
simple_file.write_text("#t")
expected = cleandoc(
"""
;;; SPDX-FileCopyrightText: 2018 Jane Doe
;;;
;;; SPDX-License-Identifier: GPL-3.0-or-later
#t
"""
)

result = main(
[
"annotate",
"--license",
"GPL-3.0-or-later",
"--copyright",
"Jane Doe",
"foo.scm",
],
out=stringio,
)

assert result == 0
assert simple_file.read_text() == expected


def test_annotate_scheme_standardised(
fake_repository, stringio, mock_date_today
):
"""The comment block is rewritten/standardised."""
simple_file = fake_repository / "foo.scm"
simple_file.write_text(
cleandoc(
"""
; SPDX-FileCopyrightText: 2018 Jane Doe
;
; SPDX-License-Identifier: GPL-3.0-or-later
#t
"""
)
)
expected = cleandoc(
"""
;;; SPDX-FileCopyrightText: 2018 Jane Doe
;;;
;;; SPDX-License-Identifier: GPL-3.0-or-later
#t
"""
)

result = main(
[
"annotate",
"--license",
"GPL-3.0-or-later",
"--copyright",
"Jane Doe",
"foo.scm",
],
out=stringio,
)

assert result == 0
assert simple_file.read_text() == expected


def test_annotate_scheme_standardised2(
fake_repository, stringio, mock_date_today
):
"""The comment block is rewritten/standardised."""
simple_file = fake_repository / "foo.scm"
simple_file.write_text(
cleandoc(
"""
;; SPDX-FileCopyrightText: 2018 Jane Doe
;;
;; SPDX-License-Identifier: GPL-3.0-or-later
#t
"""
)
)
expected = cleandoc(
"""
;;; SPDX-FileCopyrightText: 2018 Jane Doe
;;;
;;; SPDX-License-Identifier: GPL-3.0-or-later
#t
"""
)

result = main(
[
"annotate",
"--license",
"GPL-3.0-or-later",
"--copyright",
"Jane Doe",
"foo.scm",
],
out=stringio,
)

assert result == 0
assert simple_file.read_text() == expected


def test_annotate_simple_no_replace(fake_repository, stringio, mock_date_today):
"""Add a header to a file without replacing the existing header."""
simple_file = fake_repository / "foo.py"
Expand Down

0 comments on commit e632207

Please sign in to comment.