From cb94206dd86e5904fe3a52564dbc7500f7012264 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Wed, 11 Jan 2023 18:06:15 +0100
Subject: [PATCH 01/60] Rework `lint`

Adding functions to retrieve and format the data from the `ProjectReport` class.
---
 src/reuse/lint.py | 187 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 185 insertions(+), 2 deletions(-)

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index 8ff9657b..e2ed566d 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -8,6 +8,7 @@
 """
 
 import contextlib
+import json
 import os
 import sys
 from gettext import gettext as _
@@ -26,6 +27,13 @@ def _write_element(element, out=sys.stdout):
 
 def lint(report: ProjectReport, out=sys.stdout) -> bool:
     """Lint the entire project."""
+
+    # Collect data from report
+    # save into data structure (if report is not suitable)
+
+    # Write output formatting functions (dynamic output formats)
+    # Write output writing functions (stdout[, file, webrequest, ...])
+
     bad_licenses_result = lint_bad_licenses(report, out)
     deprecated_result = lint_deprecated_licenses(report, out)
     extensionless = lint_licenses_without_extension(report, out)
@@ -330,7 +338,178 @@ def add_arguments(parser):
     )
 
 
-def run(args, project: Project, out=sys.stdout):
+def collect_data_from_report(report: ProjectReport) -> dict:
+    """Collects and formats data from report and returns it as a dictionary
+
+    :param report: ProjectReport object
+    :return: Formatted dictionary containing data from the ProjectReport object
+    """
+    # Setup report data container
+    data = {
+        "json_version": "1.0",
+        "reuse_version": __REUSE_version__,
+        "non_compliant": {},
+        "files": {},
+        "summary": {
+            "used_licenses": [],
+        },
+    }
+
+    # Populate 'non_compliant'
+    data["non_compliant"] = {
+        "missing_licenses": [str(f) for f in report.missing_licenses],
+        "unused_licenses": [str(f) for f in report.unused_licenses],
+        "deprecated_licenses": [str(f) for f in report.deprecated_licenses],
+        "bad_licenses": [str(f) for f in report.bad_licenses],
+        "licenses_without_extension": [
+            str(f) for f in report.licenses_without_extension
+        ],
+        "missing_copyright_info": [
+            str(f) for f in report.files_without_copyright
+        ],
+        "missing_licensing_info": [str(f) for f in report.missing_licenses],
+        "read_error": [str(f) for f in report.read_errors],
+    }
+
+    # Populate 'files'
+    for file in report.file_reports:
+        copyrights = file.spdxfile.copyright.split("\n")
+        data["files"][str(file.path)] = {
+            "copyrights": [
+                {"value": copyright, "source": file.spdxfile.name}
+                for copyright in copyrights
+            ],
+            "licenses": [
+                {"value": license, "source": file.spdxfile.name}
+                for license in file.spdxfile.licenses_in_file
+            ],
+        }
+
+    # Populate 'summary'
+    number_of_files = len(report.file_reports)
+    is_compliant = not any(
+        any(result)
+        for result in (
+            data["non_compliant"]["missing_licenses"],
+            data["non_compliant"]["unused_licenses"],
+            data["non_compliant"]["bad_licenses"],
+            data["non_compliant"]["deprecated_licenses"],
+            data["non_compliant"]["licenses_without_extension"],
+            data["non_compliant"]["missing_copyright_info"],
+            data["non_compliant"]["missing_licensing_info"],
+            data["non_compliant"]["read_error"],
+        )
+    )
+    data["summary"] = {
+        "used_licenses": list(report.used_licenses),
+        "files_total": number_of_files,
+        "files_with_copyright_info": number_of_files
+        - len(report.files_without_copyright),
+        "files_with_licensing_info": number_of_files
+        - len(report.files_without_licenses),
+        "compliant": is_compliant,
+    }
+    return data
+
+
+def format_json(data) -> str:
+    """Formats data dictionary as JSON string ready to be printed to std.out
+
+    :param data: Dictionary containing formatted ProjectReport data
+    :return: String (representing JSON) that can be output to std.out
+    """
+    return json.dumps(data, indent=2)
+
+
+def format_plain(data) -> str:
+    """Formats data dictionary as plaintext string to be printed to std.out
+
+    :param data: Dictionary containing formatted ProjectReport data
+    :return: String (in plaintext) that can be output to std.out
+    """
+    output = ""
+    if not data["summary"]["compliant"]:
+
+        output += "# " + _("MISSING COPYRIGHT AND LICENSING INFORMATION")
+        output += "\n\n"
+        files_without_copyright = set(
+            data["non_compliant"]["missing_copyright_info"]
+        )
+        files_without_license = set(
+            data["non_compliant"]["missing_licensing_info"]
+        )
+        files_without_both = files_without_license.intersection(
+            files_without_license
+        )
+
+        if files_without_both:
+            output += _(
+                "The following files have no copyright and licensing "
+                "information:"
+            )
+            output += "\n"
+            for file in sorted(files_without_both):
+                output += f"* {file}\n"
+
+        if files_without_copyright - files_without_both:
+            output += _("The following files have no copyright information:")
+            output += "\n"
+            for file in sorted(files_without_copyright - files_without_both):
+                output += f"* {file}\n"
+
+        if files_without_license - files_without_both:
+            output += _("The following files have no licensing information:")
+            output += "\n"
+            for file in sorted(files_without_license - files_without_both):
+                output += f"* {file}\n"
+
+        # bad licenses
+        # deprecated licenses
+
+        output += "\n"
+
+    output += "# " + _("SUMMARY")
+    output += "\n\n"
+    summary_contents = [
+        (_("Bad licenses:"), ", ".join(data["non_compliant"]["bad_licenses"])),
+        (
+            _("Deprecated licenses:"),
+            ", ".join(data["non_compliant"]["deprecated_licenses"]),
+        ),
+        (
+            _("Licenses without file extension:"),
+            ", ".join(data["non_compliant"]["licenses_without_extension"]),
+        ),
+        (
+            _("Missing licenses:"),
+            ", ".join(data["non_compliant"]["missing_licensing_info"]),
+        ),
+        (
+            _("Unused licenses:"),
+            ", ".join(data["non_compliant"]["unused_licenses"]),
+        ),
+        (_("Used licenses:"), ", ".join(data["summary"]["used_licenses"])),
+    ]
+
+    for key, value in summary_contents:
+        if not value:
+            value = "0"
+        output += "* " + key + ": " + value + "\n"
+
+    return output
+
+
+def output_data(data: dict, formatter, out=sys.stdout):
+    """Outputs data to stdout
+
+    :param data:
+    :param formatter:
+    :param out:
+    """
+    out.write(formatter(data))
+
+
+def run(args, project: Project):
     """List all non-compliant files."""
     report = ProjectReport.generate(
         project, do_checksum=False, multiprocessing=not args.no_multiprocessing
@@ -338,7 +517,11 @@ def run(args, project: Project, out=sys.stdout):
 
     with contextlib.ExitStack() as stack:
         if args.quiet:
+            # TODO Rework quiet flag
             out = stack.enter_context(open(os.devnull, "w", encoding="utf-8"))
-        result = lint(report, out=out)
+        # TODO Toggle JSON formatter via flag
+        data = collect_data_from_report(report)
+        output_data(data, format_json())
+        result = data["summary"]["compliant"]
 
     return 0 if result else 1

From 3ae8af7e63b84051d5d195860604e7461a199d8c Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Tue, 24 Jan 2023 17:08:00 +0100
Subject: [PATCH 02/60] Add --json` argument

---
 src/reuse/lint.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index e2ed566d..91661542 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -336,6 +336,9 @@ def add_arguments(parser):
     parser.add_argument(
         "-q", "--quiet", action="store_true", help=_("prevents output")
     )
+    parser.add_argument(
+        "-j", "--json", action="store_true", help=_("formats output as JSON")
+    )
 
 
 def collect_data_from_report(report: ProjectReport) -> dict:
@@ -509,7 +512,8 @@ def output_data(data: dict, formatter, out=sys.stdout):
     out.write(formatter(data))
 
 
-def run(args, project: Project):
+
+def run(args, project: Project, out=sys.stdout, formatter=format_plain):
     """List all non-compliant files."""
     report = ProjectReport.generate(
         project, do_checksum=False, multiprocessing=not args.no_multiprocessing
@@ -517,11 +521,13 @@ def run(args, project: Project):
 
     with contextlib.ExitStack() as stack:
         if args.quiet:
-            # TODO Rework quiet flag
             out = stack.enter_context(open(os.devnull, "w", encoding="utf-8"))
-        # TODO Toggle JSON formatter via flag
+
+        if args.json:
+            formatter = format_json
+
         data = collect_data_from_report(report)
-        output_data(data, format_json())
+        lint(data, formatter=formatter, out=out)
         result = data["summary"]["compliant"]
 
     return 0 if result else 1

From 2e6805e7a17058b171882b3ab04a577f39fb876f Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Tue, 24 Jan 2023 17:08:29 +0100
Subject: [PATCH 03/60] Slim down data dictionary creation

---
 src/reuse/lint.py | 36 +++++++++++++++---------------------
 1 file changed, 15 insertions(+), 21 deletions(-)

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index 91661542..499b7b20 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -351,40 +351,34 @@ def collect_data_from_report(report: ProjectReport) -> dict:
     data = {
         "json_version": "1.0",
         "reuse_version": __REUSE_version__,
-        "non_compliant": {},
+        "non_compliant": {
+            "missing_licenses": report.missing_licenses,
+            "unused_licenses": [str(f) for f in report.unused_licenses],
+            "deprecated_licenses": [str(f) for f in report.deprecated_licenses],
+            "bad_licenses": report.bad_licenses,
+            "licenses_without_extension": [
+                str(f) for f in report.licenses_without_extension.values()
+            ],
+            "missing_copyright_info": [str(f) for f in report.files_without_copyright],
+            "missing_licensing_info": [str(f) for f in report.files_without_licenses],
+            "read_error": [str(f) for f in report.read_errors],
+        },
         "files": {},
         "summary": {
             "used_licenses": [],
         },
     }
 
-    # Populate 'non_compliant'
-    data["non_compliant"] = {
-        "missing_licenses": [str(f) for f in report.missing_licenses],
-        "unused_licenses": [str(f) for f in report.unused_licenses],
-        "deprecated_licenses": [str(f) for f in report.deprecated_licenses],
-        "bad_licenses": [str(f) for f in report.bad_licenses],
-        "licenses_without_extension": [
-            str(f) for f in report.licenses_without_extension
-        ],
-        "missing_copyright_info": [
-            str(f) for f in report.files_without_copyright
-        ],
-        "missing_licensing_info": [str(f) for f in report.missing_licenses],
-        "read_error": [str(f) for f in report.read_errors],
-    }
-
     # Populate 'files'
     for file in report.file_reports:
         copyrights = file.spdxfile.copyright.split("\n")
         data["files"][str(file.path)] = {
             "copyrights": [
-                {"value": copyright, "source": file.spdxfile.name}
-                for copyright in copyrights
+                {"value": cop, "source": file.spdxfile.name} for cop in copyrights
             ],
             "licenses": [
-                {"value": license, "source": file.spdxfile.name}
-                for license in file.spdxfile.licenses_in_file
+                {"value": lic, "source": file.spdxfile.name}
+                for lic in file.spdxfile.licenses_in_file
             ],
         }
 

From 5a03e9eedc2db0cae184f105d09f73be20c6a980 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Tue, 24 Jan 2023 17:09:15 +0100
Subject: [PATCH 04/60] Rework plaintext formatter

---
 src/reuse/lint.py | 135 ++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 113 insertions(+), 22 deletions(-)

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index 499b7b20..cbbde78d 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -419,54 +419,107 @@ def format_json(data) -> str:
 
 
 def format_plain(data) -> str:
-    """Formats data dictionary as plaintext string to be printed to std.out
+    """Formats data dictionary as plaintext string to be printed to sys.stdout
 
     :param data: Dictionary containing formatted ProjectReport data
-    :return: String (in plaintext) that can be output to std.out
+    :return: String (in plaintext) that can be output to sys.stdout
     """
     output = ""
+
+    # If the project is not compliant:
     if not data["summary"]["compliant"]:
 
-        output += "# " + _("MISSING COPYRIGHT AND LICENSING INFORMATION")
-        output += "\n\n"
-        files_without_copyright = set(
-            data["non_compliant"]["missing_copyright_info"]
-        )
-        files_without_license = set(
-            data["non_compliant"]["missing_licensing_info"]
-        )
-        files_without_both = files_without_license.intersection(
-            files_without_license
-        )
+        # Missing copyright and licensing information
+        files_without_copyright = set(data["non_compliant"]["missing_copyright_info"])
+        files_without_license = set(data["non_compliant"]["missing_licensing_info"])
+        files_without_both = files_without_license.intersection(files_without_copyright)
 
+        header = "# " + _("MISSING COPYRIGHT AND LICENSING INFORMATION") + "\n\n"
         if files_without_both:
+            output += header
             output += _(
-                "The following files have no copyright and licensing "
-                "information:"
+                "The following files have no copyright and licensing " "information:"
             )
             output += "\n"
             for file in sorted(files_without_both):
                 output += f"* {file}\n"
+            output += "\n\n"
 
         if files_without_copyright - files_without_both:
+            output += header
             output += _("The following files have no copyright information:")
             output += "\n"
             for file in sorted(files_without_copyright - files_without_both):
                 output += f"* {file}\n"
+            output += "\n\n"
 
         if files_without_license - files_without_both:
+            output += header
             output += _("The following files have no licensing information:")
             output += "\n"
             for file in sorted(files_without_license - files_without_both):
                 output += f"* {file}\n"
+            output += "\n\n"
+
+        # Bad licenses
+        bad_licenses = data["non_compliant"]["bad_licenses"]
+        if bad_licenses:
+            output += "# " + _("BAD LICENSES") + "\n\n"
+            for lic, files in sorted(bad_licenses.items()):
+                output += f"'{lic}' found in:" + "\n"
+                for f in sorted(files):
+                    output += f"* {f}\n"
+            output += "\n\n"
+
+        # Deprecated licenses
+        deprecated_licenses = data["non_compliant"]["deprecated_licenses"]
+        if deprecated_licenses:
+            output += "# " + _("DEPRECATED LICENSES") + "\n\n"
+            output += _("The following licenses are deprecated by SPDX:") + "\n"
+            for lic in sorted(deprecated_licenses):
+                output += f"* {lic}\n"
+            output += "\n\n"
+
+        # Licenses without extension
+        licenses_without_extension = data["non_compliant"]["licenses_without_extension"]
+        if licenses_without_extension:
+            output += "# " + _("LICENSES WITHOUT FILE EXTENSION") + "\n\n"
+            output += _("The following licenses have no file extension:") + "\n"
+            for path in sorted(licenses_without_extension):
+                output += f"* {str(path)}" + "\n"
+            output += "\n\n"
+
+        # Missing licenses
+        missing_licenses = data["non_compliant"]["missing_licenses"]
+        if missing_licenses:
+            output += "# " + _("MISSING LICENSES") + "\n\n"
+            for lic, files in sorted(missing_licenses.items()):
+                output += f"'{lic}' found in:" + "\n"
+                for f in sorted(files):
+                    output += f"* {f}\n"
+            output += "\n"
 
-        # bad licenses
-        # deprecated licenses
-
-        output += "\n"
+        # Unused licenses
+        unused_licenses = data["non_compliant"]["unused_licenses"]
+        if unused_licenses:
+            output += "# " + _("UNUSED LICENSES") + "\n\n"
+            output += _("The following licenses are not used:") + "\n"
+            for lic in sorted(deprecated_licenses):
+                output += f"* {lic}\n"
+            output += "\n\n"
+
+        # Read errors
+        read_errors = data["non_compliant"]["read_error"]
+        if read_errors:
+            output += "# " + _("READ ERRORS") + "\n\n"
+            output += _("Could not read:") + "\n"
+            for path in sorted(read_errors):
+                output += f"* {str(path)}" + "\n"
+            output += "\n\n"
 
     output += "# " + _("SUMMARY")
     output += "\n\n"
+
     summary_contents = [
         (_("Bad licenses:"), ", ".join(data["non_compliant"]["bad_licenses"])),
         (
@@ -475,23 +528,61 @@ def format_plain(data) -> str:
         ),
         (
             _("Licenses without file extension:"),
-            ", ".join(data["non_compliant"]["licenses_without_extension"]),
+            ", ".join(
+                [
+                    lic.split("/")[1]
+                    for lic in data["non_compliant"]["licenses_without_extension"]
+                ]
+            ),
         ),
         (
             _("Missing licenses:"),
-            ", ".join(data["non_compliant"]["missing_licensing_info"]),
+            ", ".join(data["non_compliant"]["missing_licenses"]),
         ),
         (
             _("Unused licenses:"),
             ", ".join(data["non_compliant"]["unused_licenses"]),
         ),
         (_("Used licenses:"), ", ".join(data["summary"]["used_licenses"])),
+        (
+            _("Read errors: {count}").format(
+                count=len(data["non_compliant"]["read_error"])
+            ),
+            "empty",
+        ),
+        (
+            _("files with copyright information: {count} / {total}").format(
+                count=data["summary"]["files_with_copyright_info"],
+                total=data["summary"]["files_total"],
+            ),
+            "empty",
+        ),
+        (
+            _("files with license information: {count} / {total}").format(
+                count=data["summary"]["files_with_licensing_info"],
+                total=data["summary"]["files_total"],
+            ),
+            "empty",
+        ),
     ]
 
     for key, value in summary_contents:
         if not value:
             value = "0"
-        output += "* " + key + ": " + value + "\n"
+        if value == "empty":
+            value = ""
+        output += "* " + key + " " + value + "\n"
+
+    if data["summary"]["compliant"]:
+        output += _(
+            "Congratulations! Your project is compliant with version"
+            " {} of the REUSE Specification :-)"
+        ).format(__REUSE_version__)
+    else:
+        output += _(
+            "Unfortunately, your project is not compliant with version "
+            "{} of the REUSE Specification :-("
+        ).format(__REUSE_version__)
 
     return output
 

From 21d00266c54c62dc953b90bc8b6a48b39c0f09c5 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Tue, 24 Jan 2023 17:09:47 +0100
Subject: [PATCH 05/60] Move JSON formatter and re-add `lint` function

---
 src/reuse/lint.py | 36 ++++++++++++++++++++++--------------
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index cbbde78d..15880861 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -409,15 +409,6 @@ def collect_data_from_report(report: ProjectReport) -> dict:
     return data
 
 
-def format_json(data) -> str:
-    """Formats data dictionary as JSON string ready to be printed to std.out
-
-    :param data: Dictionary containing formatted ProjectReport data
-    :return: String (representing JSON) that can be output to std.out
-    """
-    return json.dumps(data, indent=2)
-
-
 def format_plain(data) -> str:
     """Formats data dictionary as plaintext string to be printed to sys.stdout
 
@@ -587,15 +578,32 @@ def format_plain(data) -> str:
     return output
 
 
-def output_data(data: dict, formatter, out=sys.stdout):
-    """Outputs data to stdout
+def format_json(data) -> str:
+    """Formats data dictionary as JSON string ready to be printed to sys.stdout
 
-    :param data:
-    :param formatter:
-    :param out:
+    :param data: Dictionary containing formatted ProjectReport data
+    :return: String (representing JSON) that can be output to sys.stdout
     """
+
+    def set_default(obj):
+        if isinstance(obj, set):
+            return list(obj)
+
+    return json.dumps(data, indent=2, default=set_default)
+
+
+def lint(data: dict, formatter=format_plain, out=sys.stdout):
+    """Lints the entire project
+
+    :param data: Dictionary holding formatted ProjectReport data
+    :param formatter: Callable that formats the data dictionary
+    :param out: Where to output
+    """
+
     out.write(formatter(data))
 
+    result = data["summary"]["compliant"]
+    return result
 
 
 def run(args, project: Project, out=sys.stdout, formatter=format_plain):

From 0b473f0861b383ea8ad3533cd4584046f6cdea81 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Tue, 24 Jan 2023 17:10:11 +0100
Subject: [PATCH 06/60] Remove old output generation functions

---
 src/reuse/lint.py | 312 ----------------------------------------------
 1 file changed, 312 deletions(-)

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index 15880861..24327d1c 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -19,318 +19,6 @@
 from .report import ProjectReport
 
 
-def _write_element(element, out=sys.stdout):
-    out.write("* ")
-    out.write(str(element))
-    out.write("\n")
-
-
-def lint(report: ProjectReport, out=sys.stdout) -> bool:
-    """Lint the entire project."""
-
-    # Collect data from report
-    # save into data structure (if report is not suitable)
-
-    # Write output formatting functions (dynamic output formats)
-    # Write output writing functions (stdout[, file, webrequest, ...])
-
-    bad_licenses_result = lint_bad_licenses(report, out)
-    deprecated_result = lint_deprecated_licenses(report, out)
-    extensionless = lint_licenses_without_extension(report, out)
-    missing_licenses_result = lint_missing_licenses(report, out)
-    unused_licenses_result = lint_unused_licenses(report, out)
-    read_errors_result = lint_read_errors(report, out)
-    files_without_cali = lint_files_without_copyright_and_licensing(report, out)
-
-    lint_summary(report, out=out)
-
-    success = not any(
-        any(result)
-        for result in (
-            bad_licenses_result,
-            deprecated_result,
-            extensionless,
-            missing_licenses_result,
-            unused_licenses_result,
-            read_errors_result,
-            files_without_cali,
-        )
-    )
-
-    out.write("\n")
-    if success:
-        out.write(
-            _(
-                "Congratulations! Your project is compliant with version"
-                " {} of the REUSE Specification :-)"
-            ).format(__REUSE_version__)
-        )
-    else:
-        out.write(
-            _(
-                "Unfortunately, your project is not compliant with version "
-                "{} of the REUSE Specification :-("
-            ).format(__REUSE_version__)
-        )
-    out.write("\n")
-
-    return success
-
-
-def lint_bad_licenses(report: ProjectReport, out=sys.stdout) -> Iterable[str]:
-    """Lint for bad licenses. Bad licenses are licenses that are not in the
-    SPDX License List or do not start with LicenseRef-.
-    """
-    bad_files = []
-
-    if report.bad_licenses:
-        out.write("# ")
-        out.write(_("BAD LICENSES"))
-        out.write("\n")
-        for lic, files in sorted(report.bad_licenses.items()):
-            out.write("\n")
-            out.write(_("'{}' found in:").format(lic))
-            out.write("\n")
-            for file_ in sorted(files):
-                bad_files.append(file_)
-                _write_element(file_, out=out)
-        out.write("\n\n")
-
-    return bad_files
-
-
-def lint_deprecated_licenses(
-    report: ProjectReport, out=sys.stdout
-) -> Iterable[str]:
-    """Lint for deprecated licenses."""
-    deprecated = []
-
-    if report.deprecated_licenses:
-        out.write("# ")
-        out.write(_("DEPRECATED LICENSES"))
-        out.write("\n\n")
-        out.write(_("The following licenses are deprecated by SPDX:"))
-        out.write("\n")
-        for lic in sorted(report.deprecated_licenses):
-            deprecated.append(lic)
-            _write_element(lic, out=out)
-        out.write("\n\n")
-
-    return deprecated
-
-
-def lint_licenses_without_extension(
-    report: ProjectReport, out=sys.stdout
-) -> Iterable[str]:
-    """Lint for licenses without extensions."""
-    extensionless = []
-
-    if report.licenses_without_extension:
-        out.write("# ")
-        out.write(_("LICENSES WITHOUT FILE EXTENSION"))
-        out.write("\n\n")
-        out.write(_("The following licenses have no file extension:"))
-        out.write("\n")
-        for __, path in sorted(report.licenses_without_extension.items()):
-            extensionless.append(path)
-            _write_element(path, out=out)
-        out.write("\n\n")
-
-    return extensionless
-
-
-def lint_missing_licenses(
-    report: ProjectReport, out=sys.stdout
-) -> Iterable[str]:
-    """Lint for missing licenses. A license is missing when it is referenced
-    in a file, but cannot be found.
-    """
-    bad_files = []
-
-    if report.missing_licenses:
-        out.write("# ")
-        out.write(_("MISSING LICENSES"))
-        out.write("\n")
-
-        for lic, files in sorted(report.missing_licenses.items()):
-            out.write("\n")
-            out.write(_("'{}' found in:").format(lic))
-            out.write("\n")
-            for file_ in sorted(files):
-                bad_files.append(file_)
-                _write_element(file_, out=out)
-        out.write("\n\n")
-
-    return bad_files
-
-
-def lint_unused_licenses(
-    report: ProjectReport, out=sys.stdout
-) -> Iterable[str]:
-    """Lint for unused licenses."""
-    unused_licenses = []
-
-    if report.unused_licenses:
-        out.write("# ")
-        out.write(_("UNUSED LICENSES"))
-        out.write("\n\n")
-        out.write(_("The following licenses are not used:"))
-        out.write("\n")
-        for lic in sorted(report.unused_licenses):
-            unused_licenses.append(lic)
-            _write_element(lic, out=out)
-        out.write("\n\n")
-
-    return unused_licenses
-
-
-def lint_read_errors(report: ProjectReport, out=sys.stdout) -> Iterable[str]:
-    """Lint for read errors."""
-    bad_files = []
-
-    if report.read_errors:
-        out.write("# ")
-        out.write(_("READ ERRORS"))
-        out.write("\n\n")
-        out.write(_("Could not read:"))
-        out.write("\n")
-        for file_ in report.read_errors:
-            bad_files.append(file_)
-            _write_element(file_, out=out)
-        out.write("\n\n")
-
-    return bad_files
-
-
-def lint_files_without_copyright_and_licensing(
-    report: ProjectReport, out=sys.stdout
-) -> Iterable[str]:
-    """Lint for files that do not have copyright or licensing information."""
-    # TODO: The below three operations can probably be optimised.
-    both = set(report.files_without_copyright) & set(
-        report.files_without_licenses
-    )
-    only_copyright = set(report.files_without_copyright) - both
-    only_licensing = set(report.files_without_licenses) - both
-
-    if any((both, only_copyright, only_licensing)):
-        out.write("# ")
-        out.write(_("MISSING COPYRIGHT AND LICENSING INFORMATION"))
-        out.write("\n\n")
-        if both:
-            out.write(
-                _(
-                    "The following files have no copyright and licensing "
-                    "information:"
-                )
-            )
-            out.write("\n")
-            for file_ in sorted(both):
-                _write_element(file_, out=out)
-            out.write("\n")
-        if only_copyright:
-            out.write(_("The following files have no copyright information:"))
-            out.write("\n")
-            for file_ in sorted(only_copyright):
-                _write_element(file_, out=out)
-            out.write("\n")
-        if only_licensing:
-            out.write(_("The following files have no licensing information:"))
-            out.write("\n")
-            for file_ in sorted(only_licensing):
-                _write_element(file_, out=out)
-            out.write("\n")
-        out.write("\n")
-
-    return both | only_copyright | only_licensing
-
-
-def lint_summary(report: ProjectReport, out=sys.stdout) -> None:
-    """Print a summary for linting."""
-    # pylint: disable=too-many-statements
-    out.write("# ")
-    out.write(_("SUMMARY"))
-    out.write("\n\n")
-
-    file_total = len(report.file_reports)
-
-    out.write("* ")
-    out.write(_("Bad licenses:"))
-    for i, lic in enumerate(sorted(report.bad_licenses)):
-        if i:
-            out.write(",")
-        out.write(" ")
-        out.write(lic)
-    out.write("\n")
-
-    out.write("* ")
-    out.write(_("Deprecated licenses:"))
-    for i, lic in enumerate(sorted(report.deprecated_licenses)):
-        if i:
-            out.write(",")
-        out.write(" ")
-        out.write(lic)
-    out.write("\n")
-
-    out.write("* ")
-    out.write(_("Licenses without file extension:"))
-    for i, lic in enumerate(sorted(report.licenses_without_extension)):
-        if i:
-            out.write(",")
-        out.write(" ")
-        out.write(lic)
-    out.write("\n")
-
-    out.write("* ")
-    out.write(_("Missing licenses:"))
-    for i, lic in enumerate(sorted(report.missing_licenses)):
-        if i:
-            out.write(",")
-        out.write(" ")
-        out.write(lic)
-    out.write("\n")
-
-    out.write("* ")
-    out.write(_("Unused licenses:"))
-    for i, lic in enumerate(sorted(report.unused_licenses)):
-        if i:
-            out.write(",")
-        out.write(" ")
-        out.write(lic)
-    out.write("\n")
-
-    out.write("* ")
-    out.write(_("Used licenses:"))
-    for i, lic in enumerate(sorted(report.used_licenses)):
-        if i:
-            out.write(",")
-        out.write(" ")
-        out.write(lic)
-    out.write("\n")
-
-    out.write("* ")
-    out.write(_("Read errors: {count}").format(count=len(report.read_errors)))
-    out.write("\n")
-
-    out.write("* ")
-    out.write(
-        _("Files with copyright information: {count} / {total}").format(
-            count=file_total - len(report.files_without_copyright),
-            total=file_total,
-        )
-    )
-    out.write("\n")
-
-    out.write("* ")
-    out.write(
-        _("Files with license information: {count} / {total}").format(
-            count=file_total - len(report.files_without_licenses),
-            total=file_total,
-        )
-    )
-    out.write("\n")
-
-
 def add_arguments(parser):
     """Add arguments to parser."""
     parser.add_argument(

From 74fd00ee9c9d21012ef45c3e0c4a50398eede4ee Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Tue, 24 Jan 2023 17:16:44 +0100
Subject: [PATCH 07/60] Adapt tests

---
 tests/test_lint.py | 63 +++++++++++++++++++++++++---------------------
 1 file changed, 35 insertions(+), 28 deletions(-)

diff --git a/tests/test_lint.py b/tests/test_lint.py
index 99ec3932..322c09ec 100644
--- a/tests/test_lint.py
+++ b/tests/test_lint.py
@@ -5,20 +5,12 @@
 
 """All tests for reuse.lint"""
 
-
 import shutil
 import sys
 
 import pytest
 
-from reuse.lint import (
-    lint,
-    lint_bad_licenses,
-    lint_files_without_copyright_and_licensing,
-    lint_missing_licenses,
-    lint_read_errors,
-    lint_summary,
-)
+from reuse.lint import lint, collect_data_from_report
 from reuse.project import Project
 from reuse.report import ProjectReport
 
@@ -40,7 +32,8 @@ def test_lint_simple(fake_repository):
     """Extremely simple test for lint."""
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
-    result = lint(report)
+    data = collect_data_from_report(report)
+    result = lint(data)
     assert result
 
 
@@ -48,7 +41,8 @@ def test_lint_git(git_repository):
     """Extremely simple test for lint with a git repository."""
     project = Project(git_repository)
     report = ProjectReport.generate(project)
-    result = lint(report)
+    data = collect_data_from_report(report)
+    result = lint(data)
     assert result
 
 
@@ -57,7 +51,8 @@ def test_lint_submodule(submodule_repository):
     project = Project(submodule_repository)
     (submodule_repository / "submodule/foo.c").write_text("foo")
     report = ProjectReport.generate(project)
-    result = lint(report)
+    data = collect_data_from_report(report)
+    result = lint(data)
     assert result
 
 
@@ -66,7 +61,8 @@ def test_lint_submodule_included(submodule_repository):
     project = Project(submodule_repository, include_submodules=True)
     (submodule_repository / "submodule/foo.c").write_text("foo")
     report = ProjectReport.generate(project)
-    result = lint(report)
+    data = collect_data_from_report(report)
+    result = lint(data)
     assert not result
 
 
@@ -74,7 +70,8 @@ def test_lint_empty_directory(empty_directory):
     """An empty directory is compliant."""
     project = Project(empty_directory)
     report = ProjectReport.generate(project)
-    result = lint(report)
+    data = collect_data_from_report(report)
+    result = lint(data)
     assert result
 
 
@@ -90,7 +87,8 @@ def test_lint_deprecated(fake_repository, stringio):
 
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
-    result = lint(report, out=stringio)
+    data = collect_data_from_report(report)
+    result = lint(data, out=stringio)
 
     assert not result
     assert "GPL-3.0" in stringio.getvalue()
@@ -98,14 +96,13 @@ def test_lint_deprecated(fake_repository, stringio):
 
 def test_lint_bad_license(fake_repository, stringio):
     """A bad license is detected."""
-    (fake_repository / "foo.py").write_text(
-        "SPDX-License-Identifier: bad-license"
-    )
+    (fake_repository / "foo.py").write_text("SPDX-License-Identifier: bad-license")
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
-    result = lint_bad_licenses(report, out=stringio)
+    data = collect_data_from_report(report)
+    result = lint(data, out=stringio)
 
-    assert "foo.py" in str(list(result)[0])
+    assert not result
     assert "foo.py" in stringio.getvalue()
     assert "bad-license" in stringio.getvalue()
 
@@ -115,9 +112,10 @@ def test_lint_missing_licenses(fake_repository, stringio):
     (fake_repository / "foo.py").write_text("SPDX-License-Identifier: MIT")
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
-    result = lint_missing_licenses(report, out=stringio)
+    data = collect_data_from_report(report)
+    result = lint(data, out=stringio)
 
-    assert "foo.py" in str(list(result)[0])
+    assert not result
     assert "foo.py" in stringio.getvalue()
     assert "MIT" in stringio.getvalue()
 
@@ -127,9 +125,11 @@ def test_lint_unused_licenses(fake_repository, stringio):
     (fake_repository / "LICENSES/MIT.txt").write_text("foo")
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
-    lint_summary(report, out=stringio)
+    data = collect_data_from_report(report)
+    result = lint(data, out=stringio)
 
-    assert "MIT" in stringio.getvalue()
+    assert not result
+    assert "Unused licenses: MIT" in stringio.getvalue()
 
 
 @cpython
@@ -140,9 +140,11 @@ def test_lint_read_errors(fake_repository, stringio):
     (fake_repository / "foo.py").chmod(0o000)
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
-    result = lint_read_errors(report, out=stringio)
+    data = collect_data_from_report(report)
+    result = lint(data, out=stringio)
 
-    assert "foo.py" in str(list(result)[0])
+    assert not result
+    assert "Could not read:" in stringio.getvalue()
     assert "foo.py" in stringio.getvalue()
 
 
@@ -151,9 +153,14 @@ def test_lint_files_without_copyright_and_licensing(fake_repository, stringio):
     (fake_repository / "foo.py").write_text("foo")
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
-    result = lint_files_without_copyright_and_licensing(report, out=stringio)
+    data = collect_data_from_report(report)
+    result = lint(data, out=stringio)
 
-    assert "foo.py" in str(list(result)[0])
+    assert not result
+    assert (
+        "The following files have no copyright and licensing information:"
+        in stringio.getvalue()
+    )
     assert "foo.py" in stringio.getvalue()
 
 

From fb32eadad3b818b2987c1ff5d5c4608f9f6ce352 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Tue, 24 Jan 2023 17:16:57 +0100
Subject: [PATCH 08/60] Add new tests

---
 tests/test_main.py | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/tests/test_main.py b/tests/test_main.py
index 51949159..4bfdb230 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -11,6 +11,7 @@
 # pylint: disable=redefined-outer-name,unused-argument
 
 import errno
+import json
 import os
 import re
 from inspect import cleandoc
@@ -25,6 +26,7 @@
 from reuse._main import main
 from reuse._util import GIT_EXE, HG_EXE
 
+
 # REUSE-IgnoreStart
 
 
@@ -215,6 +217,29 @@ def test_lint_fail_quiet(fake_repository, stringio):
     assert stringio.getvalue() == ""
 
 
+def test_lint_json(fake_repository, stringio):
+    """Run a failed lint."""
+    result = main(["lint", "--json"], out=stringio)
+    output = json.loads(stringio.getvalue())
+
+    assert result == 0
+    assert output["json_version"] == "1.0"
+    assert len(output["files"]) == 7
+
+
+def test_lint_json_fail(fake_repository, stringio):
+    """Run a failed lint."""
+    (fake_repository / "foo.py").write_text("foo")
+    result = main(["lint", "--json"], out=stringio)
+    output = json.loads(stringio.getvalue())
+
+    assert result > 0
+    assert output["json_version"] == "1.0"
+    assert len(output["non_compliant"]["missing_licensing_info"]) == 1
+    assert len(output["non_compliant"]["missing_copyright_info"]) == 1
+    assert len(output["files"]) == 8
+
+
 def test_lint_no_file_extension(fake_repository, stringio):
     """If a license has no file extension, the lint fails."""
     (fake_repository / "LICENSES/CC0-1.0.txt").rename(
@@ -432,5 +457,4 @@ def test_supported_licenses(stringio):
         stringio.getvalue(),
     )
 
-
 # REUSE-IgnoreEnd

From ff3d3c39aa546043d9b12ad6c535f39d8e8b8cbe Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Tue, 24 Jan 2023 17:31:36 +0100
Subject: [PATCH 09/60] Satisfy pylint

---
 src/reuse/lint.py | 68 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 43 insertions(+), 25 deletions(-)

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index 24327d1c..6c4e1c0a 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -12,7 +12,7 @@
 import os
 import sys
 from gettext import gettext as _
-from typing import Iterable
+from typing import Dict
 
 from . import __REUSE_version__
 from .project import Project
@@ -47,8 +47,12 @@ def collect_data_from_report(report: ProjectReport) -> dict:
             "licenses_without_extension": [
                 str(f) for f in report.licenses_without_extension.values()
             ],
-            "missing_copyright_info": [str(f) for f in report.files_without_copyright],
-            "missing_licensing_info": [str(f) for f in report.files_without_licenses],
+            "missing_copyright_info": [
+                str(f) for f in report.files_without_copyright
+            ],
+            "missing_licensing_info": [
+                str(f) for f in report.files_without_licenses
+            ],
             "read_error": [str(f) for f in report.read_errors],
         },
         "files": {},
@@ -62,7 +66,8 @@ def collect_data_from_report(report: ProjectReport) -> dict:
         copyrights = file.spdxfile.copyright.split("\n")
         data["files"][str(file.path)] = {
             "copyrights": [
-                {"value": cop, "source": file.spdxfile.name} for cop in copyrights
+                {"value": cop, "source": file.spdxfile.name}
+                for cop in copyrights
             ],
             "licenses": [
                 {"value": lic, "source": file.spdxfile.name}
@@ -89,15 +94,16 @@ def collect_data_from_report(report: ProjectReport) -> dict:
         "used_licenses": list(report.used_licenses),
         "files_total": number_of_files,
         "files_with_copyright_info": number_of_files
-        - len(report.files_without_copyright),
+                                     - len(report.files_without_copyright),
         "files_with_licensing_info": number_of_files
-        - len(report.files_without_licenses),
+                                     - len(report.files_without_licenses),
         "compliant": is_compliant,
     }
     return data
 
 
-def format_plain(data) -> str:
+# pylint: disable=too-many-locals, too-many-branches, too-many-statements
+def format_plain(data: Dict) -> str:
     """Formats data dictionary as plaintext string to be printed to sys.stdout
 
     :param data: Dictionary containing formatted ProjectReport data
@@ -109,15 +115,24 @@ def format_plain(data) -> str:
     if not data["summary"]["compliant"]:
 
         # Missing copyright and licensing information
-        files_without_copyright = set(data["non_compliant"]["missing_copyright_info"])
-        files_without_license = set(data["non_compliant"]["missing_licensing_info"])
-        files_without_both = files_without_license.intersection(files_without_copyright)
+        files_without_copyright = set(
+            data["non_compliant"]["missing_copyright_info"]
+        )
+        files_without_license = set(
+            data["non_compliant"]["missing_licensing_info"]
+        )
+        files_without_both = files_without_license.intersection(
+            files_without_copyright
+        )
 
-        header = "# " + _("MISSING COPYRIGHT AND LICENSING INFORMATION") + "\n\n"
+        header = (
+            "# " + _("MISSING COPYRIGHT AND LICENSING INFORMATION") + "\n\n"
+        )
         if files_without_both:
             output += header
             output += _(
-                "The following files have no copyright and licensing " "information:"
+                "The following files have no copyright and licensing "
+                "information:"
             )
             output += "\n"
             for file in sorted(files_without_both):
@@ -146,8 +161,8 @@ def format_plain(data) -> str:
             output += "# " + _("BAD LICENSES") + "\n\n"
             for lic, files in sorted(bad_licenses.items()):
                 output += f"'{lic}' found in:" + "\n"
-                for f in sorted(files):
-                    output += f"* {f}\n"
+                for file in sorted(files):
+                    output += f"* {file}\n"
             output += "\n\n"
 
         # Deprecated licenses
@@ -160,7 +175,9 @@ def format_plain(data) -> str:
             output += "\n\n"
 
         # Licenses without extension
-        licenses_without_extension = data["non_compliant"]["licenses_without_extension"]
+        licenses_without_extension = data["non_compliant"][
+            "licenses_without_extension"
+        ]
         if licenses_without_extension:
             output += "# " + _("LICENSES WITHOUT FILE EXTENSION") + "\n\n"
             output += _("The following licenses have no file extension:") + "\n"
@@ -174,8 +191,8 @@ def format_plain(data) -> str:
             output += "# " + _("MISSING LICENSES") + "\n\n"
             for lic, files in sorted(missing_licenses.items()):
                 output += f"'{lic}' found in:" + "\n"
-                for f in sorted(files):
-                    output += f"* {f}\n"
+                for file in sorted(files):
+                    output += f"* {file}\n"
             output += "\n"
 
         # Unused licenses
@@ -210,7 +227,9 @@ def format_plain(data) -> str:
             ", ".join(
                 [
                     lic.split("/")[1]
-                    for lic in data["non_compliant"]["licenses_without_extension"]
+                    for lic in data["non_compliant"][
+                    "licenses_without_extension"
+                ]
                 ]
             ),
         ),
@@ -266,21 +285,20 @@ def format_plain(data) -> str:
     return output
 
 
-def format_json(data) -> str:
+def format_json(data: Dict) -> str:
     """Formats data dictionary as JSON string ready to be printed to sys.stdout
 
     :param data: Dictionary containing formatted ProjectReport data
     :return: String (representing JSON) that can be output to sys.stdout
     """
 
-    def set_default(obj):
-        if isinstance(obj, set):
-            return list(obj)
-
-    return json.dumps(data, indent=2, default=set_default)
+    return json.dumps(
+        # Serialize sets to lists
+        data, indent=2, default=lambda x: list(x) if isinstance(x, set) else x
+    )
 
 
-def lint(data: dict, formatter=format_plain, out=sys.stdout):
+def lint(data: Dict, formatter=format_plain, out=sys.stdout):
     """Lints the entire project
 
     :param data: Dictionary holding formatted ProjectReport data

From cfcac8eb6db2f7446d1e9140062685840342de84 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Tue, 24 Jan 2023 17:39:24 +0100
Subject: [PATCH 10/60] Satisfy isort

---
 tests/test_lint.py | 6 ++++--
 tests/test_main.py | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/test_lint.py b/tests/test_lint.py
index 322c09ec..0cc41dce 100644
--- a/tests/test_lint.py
+++ b/tests/test_lint.py
@@ -10,7 +10,7 @@
 
 import pytest
 
-from reuse.lint import lint, collect_data_from_report
+from reuse.lint import collect_data_from_report, lint
 from reuse.project import Project
 from reuse.report import ProjectReport
 
@@ -96,7 +96,9 @@ def test_lint_deprecated(fake_repository, stringio):
 
 def test_lint_bad_license(fake_repository, stringio):
     """A bad license is detected."""
-    (fake_repository / "foo.py").write_text("SPDX-License-Identifier: bad-license")
+    (fake_repository / "foo.py").write_text(
+        "SPDX-License-Identifier: bad-license"
+    )
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
     data = collect_data_from_report(report)
diff --git a/tests/test_main.py b/tests/test_main.py
index 4bfdb230..c2c6d148 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -26,7 +26,6 @@
 from reuse._main import main
 from reuse._util import GIT_EXE, HG_EXE
 
-
 # REUSE-IgnoreStart
 
 
@@ -457,4 +456,5 @@ def test_supported_licenses(stringio):
         stringio.getvalue(),
     )
 
+
 # REUSE-IgnoreEnd

From d749d51d15e4c0bdbfc2aee55fea26c3636d400d Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Tue, 24 Jan 2023 17:49:35 +0100
Subject: [PATCH 11/60] Make showing of license (not path) work on Windows

---
 src/reuse/lint.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index 6c4e1c0a..99b38826 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -45,7 +45,7 @@ def collect_data_from_report(report: ProjectReport) -> dict:
             "deprecated_licenses": [str(f) for f in report.deprecated_licenses],
             "bad_licenses": report.bad_licenses,
             "licenses_without_extension": [
-                str(f) for f in report.licenses_without_extension.values()
+                f for f in report.licenses_without_extension.values()
             ],
             "missing_copyright_info": [
                 str(f) for f in report.files_without_copyright
@@ -94,9 +94,9 @@ def collect_data_from_report(report: ProjectReport) -> dict:
         "used_licenses": list(report.used_licenses),
         "files_total": number_of_files,
         "files_with_copyright_info": number_of_files
-                                     - len(report.files_without_copyright),
+        - len(report.files_without_copyright),
         "files_with_licensing_info": number_of_files
-                                     - len(report.files_without_licenses),
+        - len(report.files_without_licenses),
         "compliant": is_compliant,
     }
     return data
@@ -226,10 +226,9 @@ def format_plain(data: Dict) -> str:
             _("Licenses without file extension:"),
             ", ".join(
                 [
-                    lic.split("/")[1]
-                    for lic in data["non_compliant"][
-                    "licenses_without_extension"
-                ]
+                    lic.parts[-1] for lic in data["non_compliant"][
+                        "licenses_without_extension"
+                    ]
                 ]
             ),
         ),
@@ -294,7 +293,9 @@ def format_json(data: Dict) -> str:
 
     return json.dumps(
         # Serialize sets to lists
-        data, indent=2, default=lambda x: list(x) if isinstance(x, set) else x
+        data,
+        indent=2,
+        default=lambda x: list(x) if isinstance(x, set) else x,
     )
 
 

From 5dcd872cbe5137a0a2ee8ffac5c8b80f4ec0d56c Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Tue, 24 Jan 2023 17:53:01 +0100
Subject: [PATCH 12/60] Satisfy pylint (again)

---
 src/reuse/lint.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index 99b38826..836458df 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -45,7 +45,7 @@ def collect_data_from_report(report: ProjectReport) -> dict:
             "deprecated_licenses": [str(f) for f in report.deprecated_licenses],
             "bad_licenses": report.bad_licenses,
             "licenses_without_extension": [
-                f for f in report.licenses_without_extension.values()
+                list(report.licenses_without_extension.values())
             ],
             "missing_copyright_info": [
                 str(f) for f in report.files_without_copyright

From e03fb3ed56c7a32050ad39a2561e61555c2e7ac2 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Tue, 24 Jan 2023 17:54:52 +0100
Subject: [PATCH 13/60] Add changelog entry

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 946fc020..7384b1d4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -61,6 +61,7 @@ CLI command and its behaviour. There are no guarantees of stability for the
   SPDX license expression remains the same:
   `Apache-2.0 AND CC0-1.0 AND CC-BY-SA-4.0 AND GPL-3.0-or-later`. (#733)
 - Added `--contributor` option to `annotate`. (#669)
+- Added `--json` flag to `lint` command (#654).
 
 ### Changed
 

From 89dbd8b10d69d7b0c5ba939e02232ae3d1369ed4 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Tue, 24 Jan 2023 17:59:17 +0100
Subject: [PATCH 14/60] Fix typo

---
 src/reuse/lint.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index 836458df..f755a00a 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -44,9 +44,9 @@ def collect_data_from_report(report: ProjectReport) -> dict:
             "unused_licenses": [str(f) for f in report.unused_licenses],
             "deprecated_licenses": [str(f) for f in report.deprecated_licenses],
             "bad_licenses": report.bad_licenses,
-            "licenses_without_extension": [
-                list(report.licenses_without_extension.values())
-            ],
+            "licenses_without_extension": list(
+                report.licenses_without_extension.values()
+            ),
             "missing_copyright_info": [
                 str(f) for f in report.files_without_copyright
             ],
@@ -226,7 +226,8 @@ def format_plain(data: Dict) -> str:
             _("Licenses without file extension:"),
             ", ".join(
                 [
-                    lic.parts[-1] for lic in data["non_compliant"][
+                    file.parts[-1]
+                    for file in data["non_compliant"][
                         "licenses_without_extension"
                     ]
                 ]

From 034a9af5555517f49470121085ac6425faf3e9e0 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Wed, 15 Feb 2023 11:24:04 +0100
Subject: [PATCH 15/60] Move `collect_data_from_report` to
 `ProjectReport.to_dict`

---
 src/reuse/lint.py   | 98 ++++++---------------------------------------
 src/reuse/report.py | 92 ++++++++++++++++++++++++++++++++----------
 2 files changed, 83 insertions(+), 107 deletions(-)

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index f755a00a..624f75d5 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -14,7 +14,6 @@
 from gettext import gettext as _
 from typing import Dict
 
-from . import __REUSE_version__
 from .project import Project
 from .report import ProjectReport
 
@@ -29,87 +28,15 @@ def add_arguments(parser):
     )
 
 
-def collect_data_from_report(report: ProjectReport) -> dict:
-    """Collects and formats data from report and returns it as a dictionary
-
-    :param report: ProjectReport object
-    :return: Formatted dictionary containing data from the ProjectReport object
-    """
-    # Setup report data container
-    data = {
-        "json_version": "1.0",
-        "reuse_version": __REUSE_version__,
-        "non_compliant": {
-            "missing_licenses": report.missing_licenses,
-            "unused_licenses": [str(f) for f in report.unused_licenses],
-            "deprecated_licenses": [str(f) for f in report.deprecated_licenses],
-            "bad_licenses": report.bad_licenses,
-            "licenses_without_extension": list(
-                report.licenses_without_extension.values()
-            ),
-            "missing_copyright_info": [
-                str(f) for f in report.files_without_copyright
-            ],
-            "missing_licensing_info": [
-                str(f) for f in report.files_without_licenses
-            ],
-            "read_error": [str(f) for f in report.read_errors],
-        },
-        "files": {},
-        "summary": {
-            "used_licenses": [],
-        },
-    }
-
-    # Populate 'files'
-    for file in report.file_reports:
-        copyrights = file.spdxfile.copyright.split("\n")
-        data["files"][str(file.path)] = {
-            "copyrights": [
-                {"value": cop, "source": file.spdxfile.name}
-                for cop in copyrights
-            ],
-            "licenses": [
-                {"value": lic, "source": file.spdxfile.name}
-                for lic in file.spdxfile.licenses_in_file
-            ],
-        }
-
-    # Populate 'summary'
-    number_of_files = len(report.file_reports)
-    is_compliant = not any(
-        any(result)
-        for result in (
-            data["non_compliant"]["missing_licenses"],
-            data["non_compliant"]["unused_licenses"],
-            data["non_compliant"]["bad_licenses"],
-            data["non_compliant"]["deprecated_licenses"],
-            data["non_compliant"]["licenses_without_extension"],
-            data["non_compliant"]["missing_copyright_info"],
-            data["non_compliant"]["missing_licensing_info"],
-            data["non_compliant"]["read_error"],
-        )
-    )
-    data["summary"] = {
-        "used_licenses": list(report.used_licenses),
-        "files_total": number_of_files,
-        "files_with_copyright_info": number_of_files
-        - len(report.files_without_copyright),
-        "files_with_licensing_info": number_of_files
-        - len(report.files_without_licenses),
-        "compliant": is_compliant,
-    }
-    return data
-
-
 # pylint: disable=too-many-locals, too-many-branches, too-many-statements
-def format_plain(data: Dict) -> str:
+def format_plain(report: ProjectReport) -> str:
     """Formats data dictionary as plaintext string to be printed to sys.stdout
 
-    :param data: Dictionary containing formatted ProjectReport data
+    :param report: ProjectReport data
     :return: String (in plaintext) that can be output to sys.stdout
     """
     output = ""
+    data = report.to_dict()
 
     # If the project is not compliant:
     if not data["summary"]["compliant"]:
@@ -285,31 +212,32 @@ def format_plain(data: Dict) -> str:
     return output
 
 
-def format_json(data: Dict) -> str:
+def format_json(report: ProjectReport) -> str:
     """Formats data dictionary as JSON string ready to be printed to sys.stdout
 
-    :param data: Dictionary containing formatted ProjectReport data
+    :param report: Dictionary containing formatted ProjectReport data
     :return: String (representing JSON) that can be output to sys.stdout
     """
 
     return json.dumps(
-        # Serialize sets to lists
-        data,
+        report.to_dict(),
         indent=2,
+        # Serialize sets to lists
         default=lambda x: list(x) if isinstance(x, set) else x,
     )
 
 
-def lint(data: Dict, formatter=format_plain, out=sys.stdout):
+def lint(report: ProjectReport, formatter=format_plain, out=sys.stdout) -> bool:
     """Lints the entire project
 
-    :param data: Dictionary holding formatted ProjectReport data
+    :param report: Dictionary holding formatted ProjectReport data
     :param formatter: Callable that formats the data dictionary
     :param out: Where to output
     """
 
-    out.write(formatter(data))
+    out.write(formatter(report))
 
+    data = report.to_dict()
     result = data["summary"]["compliant"]
     return result
 
@@ -327,8 +255,6 @@ def run(args, project: Project, out=sys.stdout, formatter=format_plain):
         if args.json:
             formatter = format_json
 
-        data = collect_data_from_report(report)
-        lint(data, formatter=formatter, out=out)
-        result = data["summary"]["compliant"]
+        result = lint(report, formatter=formatter, out=out)
 
     return 0 if result else 1
diff --git a/src/reuse/report.py b/src/reuse/report.py
index 8426d306..fda7803f 100644
--- a/src/reuse/report.py
+++ b/src/reuse/report.py
@@ -18,7 +18,7 @@
 from typing import Iterable, List, NamedTuple, Optional, Set
 from uuid import uuid4
 
-from . import __version__
+from . import __version__, __REUSE_version__
 from ._util import _LICENSING, _checksum
 from .project import Project
 
@@ -79,30 +79,79 @@ def __init__(self, do_checksum: bool = True):
         self._files_without_copyright = None
 
     def to_dict(self):
-        """Turn the report into a json-like dictionary."""
-        return {
-            "path": str(Path(self.path).resolve()),
-            "licenses": {
-                identifier: str(path)
-                for identifier, path in self.licenses.items()
-            },
-            "bad_licenses": {
-                lic: [str(file_) for file_ in files]
-                for lic, files in self.bad_licenses.items()
-            },
-            "deprecated_licenses": sorted(self.deprecated_licenses),
-            "licenses_without_extension": {
-                identifier: str(path)
-                for identifier, path in self.licenses_without_extension.items()
+        """Collects and formats data from report and returns it as a dictionary
+
+        :param report: ProjectReport object
+        :return: Formatted dictionary containing data from the ProjectReport object
+        """
+        # Setup report data container
+        data = {
+            "json_version": "1.0",
+            "reuse_version": __REUSE_version__,
+            "non_compliant": {
+                "missing_licenses": self.missing_licenses,
+                "unused_licenses": [str(f) for f in self.unused_licenses],
+                "deprecated_licenses": [str(f) for f in self.deprecated_licenses],
+                "bad_licenses": self.bad_licenses,
+                "licenses_without_extension": list(
+                    self.licenses_without_extension.values()
+                ),
+                "missing_copyright_info": [
+                    str(f) for f in self.files_without_copyright
+                ],
+                "missing_licensing_info": [
+                    str(f) for f in self.files_without_licenses
+                ],
+                "read_error": [str(f) for f in self.read_errors],
             },
-            "missing_licenses": {
-                lic: [str(file_) for file_ in files]
-                for lic, files in self.missing_licenses.items()
+            "files": {},
+            "summary": {
+                "used_licenses": [],
             },
-            "read_errors": list(map(str, self.read_errors)),
-            "file_reports": [report.to_dict() for report in self.file_reports],
         }
 
+        # Populate 'files'
+        for file in self.file_reports:
+            copyrights = file.spdxfile.copyright.split("\n")
+            data["files"][str(file.path)] = {
+                "copyrights": [
+                    # TODO Find correct source file for copyrights info
+                    {"value": cop, "source": file.spdxfile.name}
+                    for cop in copyrights
+                ],
+                "licenses": [
+                    # TODO Find correct source file for licensing info
+                    {"value": lic, "source": file.spdxfile.name}
+                    for lic in file.spdxfile.licenses_in_file
+                ],
+            }
+
+        # Populate 'summary'
+        number_of_files = len(self.file_reports)
+        is_compliant = not any(
+            any(result)
+            for result in (
+                data["non_compliant"]["missing_licenses"],
+                data["non_compliant"]["unused_licenses"],
+                data["non_compliant"]["bad_licenses"],
+                data["non_compliant"]["deprecated_licenses"],
+                data["non_compliant"]["licenses_without_extension"],
+                data["non_compliant"]["missing_copyright_info"],
+                data["non_compliant"]["missing_licensing_info"],
+                data["non_compliant"]["read_error"],
+            )
+        )
+        data["summary"] = {
+            "used_licenses": list(self.used_licenses),
+            "files_total": number_of_files,
+            "files_with_copyright_info": number_of_files
+                                         - len(self.files_without_copyright),
+            "files_with_licensing_info": number_of_files
+                                         - len(self.files_without_licenses),
+            "compliant": is_compliant,
+        }
+        return data
+
     def bill_of_materials(
         self,
         creator_person: Optional[str] = None,
@@ -379,6 +428,7 @@ def generate(
         report.spdxfile.spdx_id = f"SPDXRef-{spdx_id.hexdigest()}"
 
         spdx_info = project.spdx_info_of(path)
+        # TODO Return source of licensing and copyright info together with SPDX info. Depends on #669
         for expression in spdx_info.spdx_expressions:
             for identifier in _LICENSING.license_keys(expression):
                 # A license expression akin to Apache-1.0+ should register

From 6bb2a4765a5dfd04653142d96daaa36bfb85afaa Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Wed, 15 Feb 2023 11:33:13 +0100
Subject: [PATCH 16/60] (convenience) Add `--format` to easily switch between
 plain and JSON output

---
 src/reuse/lint.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index 624f75d5..9ac33f65 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -23,9 +23,11 @@ def add_arguments(parser):
     parser.add_argument(
         "-q", "--quiet", action="store_true", help=_("prevents output")
     )
-    parser.add_argument(
-        "-j", "--json", action="store_true", help=_("formats output as JSON")
-    )
+    mutex_group = parser.add_mutually_exclusive_group()
+    mutex_group.add_argument("-j", "--json", action="store_true", help=_("formats output as JSON"))
+    mutex_group.add_argument("-p", "--plain", action="store_true", help=_("formats output as plain text"))
+    mutex_group.add_argument("--format", nargs="?", choices=("json", "plain"),
+                             help=_("formats output using the chosen formatter"))
 
 
 # pylint: disable=too-many-locals, too-many-branches, too-many-statements
@@ -252,8 +254,12 @@ def run(args, project: Project, out=sys.stdout, formatter=format_plain):
         if args.quiet:
             out = stack.enter_context(open(os.devnull, "w", encoding="utf-8"))
 
-        if args.json:
+        if args.json or args.format == "json":
             formatter = format_json
+        elif args.plain or args.format == "plain":
+            formatter = format_plain
+        else:
+            formatter = format_plain
 
         result = lint(report, formatter=formatter, out=out)
 

From 6bc172a924bb3b9e9190aed97730885b38da5e77 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Wed, 15 Feb 2023 11:33:47 +0100
Subject: [PATCH 17/60] Get `__REUSE_version__` from report object

---
 src/reuse/lint.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index 9ac33f65..c13be85f 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -204,12 +204,12 @@ def format_plain(report: ProjectReport) -> str:
         output += _(
             "Congratulations! Your project is compliant with version"
             " {} of the REUSE Specification :-)"
-        ).format(__REUSE_version__)
+        ).format(data["reuse_version"])
     else:
         output += _(
             "Unfortunately, your project is not compliant with version "
             "{} of the REUSE Specification :-("
-        ).format(__REUSE_version__)
+        ).format(data["reuse_version"])
 
     return output
 

From 2a2e44fd28e59f318cd04719b98ff9801b1ec319 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Wed, 15 Feb 2023 11:33:59 +0100
Subject: [PATCH 18/60] Add newline at the end of the output

---
 src/reuse/lint.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index c13be85f..21af472d 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -211,6 +211,7 @@ def format_plain(report: ProjectReport) -> str:
             "{} of the REUSE Specification :-("
         ).format(data["reuse_version"])
 
+    output += "\n"
     return output
 
 

From 5c25198cb1ff41f2656c47b4875afb08897043d9 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Wed, 15 Feb 2023 11:39:47 +0100
Subject: [PATCH 19/60] Update tests according to code changes

---
 tests/test_lint.py | 35 ++++++++++++-----------------------
 1 file changed, 12 insertions(+), 23 deletions(-)

diff --git a/tests/test_lint.py b/tests/test_lint.py
index 0cc41dce..807ef000 100644
--- a/tests/test_lint.py
+++ b/tests/test_lint.py
@@ -10,7 +10,7 @@
 
 import pytest
 
-from reuse.lint import collect_data_from_report, lint
+from reuse.lint import lint
 from reuse.project import Project
 from reuse.report import ProjectReport
 
@@ -32,8 +32,7 @@ def test_lint_simple(fake_repository):
     """Extremely simple test for lint."""
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
-    data = collect_data_from_report(report)
-    result = lint(data)
+    result = lint(report)
     assert result
 
 
@@ -41,8 +40,7 @@ def test_lint_git(git_repository):
     """Extremely simple test for lint with a git repository."""
     project = Project(git_repository)
     report = ProjectReport.generate(project)
-    data = collect_data_from_report(report)
-    result = lint(data)
+    result = lint(report)
     assert result
 
 
@@ -51,8 +49,7 @@ def test_lint_submodule(submodule_repository):
     project = Project(submodule_repository)
     (submodule_repository / "submodule/foo.c").write_text("foo")
     report = ProjectReport.generate(project)
-    data = collect_data_from_report(report)
-    result = lint(data)
+    result = lint(report)
     assert result
 
 
@@ -61,8 +58,7 @@ def test_lint_submodule_included(submodule_repository):
     project = Project(submodule_repository, include_submodules=True)
     (submodule_repository / "submodule/foo.c").write_text("foo")
     report = ProjectReport.generate(project)
-    data = collect_data_from_report(report)
-    result = lint(data)
+    result = lint(report)
     assert not result
 
 
@@ -70,8 +66,7 @@ def test_lint_empty_directory(empty_directory):
     """An empty directory is compliant."""
     project = Project(empty_directory)
     report = ProjectReport.generate(project)
-    data = collect_data_from_report(report)
-    result = lint(data)
+    result = lint(report)
     assert result
 
 
@@ -87,8 +82,7 @@ def test_lint_deprecated(fake_repository, stringio):
 
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
-    data = collect_data_from_report(report)
-    result = lint(data, out=stringio)
+    result = lint(report, out=stringio)
 
     assert not result
     assert "GPL-3.0" in stringio.getvalue()
@@ -101,8 +95,7 @@ def test_lint_bad_license(fake_repository, stringio):
     )
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
-    data = collect_data_from_report(report)
-    result = lint(data, out=stringio)
+    result = lint(report, out=stringio)
 
     assert not result
     assert "foo.py" in stringio.getvalue()
@@ -114,8 +107,7 @@ def test_lint_missing_licenses(fake_repository, stringio):
     (fake_repository / "foo.py").write_text("SPDX-License-Identifier: MIT")
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
-    data = collect_data_from_report(report)
-    result = lint(data, out=stringio)
+    result = lint(report, out=stringio)
 
     assert not result
     assert "foo.py" in stringio.getvalue()
@@ -127,8 +119,7 @@ def test_lint_unused_licenses(fake_repository, stringio):
     (fake_repository / "LICENSES/MIT.txt").write_text("foo")
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
-    data = collect_data_from_report(report)
-    result = lint(data, out=stringio)
+    result = lint(report, out=stringio)
 
     assert not result
     assert "Unused licenses: MIT" in stringio.getvalue()
@@ -142,8 +133,7 @@ def test_lint_read_errors(fake_repository, stringio):
     (fake_repository / "foo.py").chmod(0o000)
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
-    data = collect_data_from_report(report)
-    result = lint(data, out=stringio)
+    result = lint(report, out=stringio)
 
     assert not result
     assert "Could not read:" in stringio.getvalue()
@@ -155,8 +145,7 @@ def test_lint_files_without_copyright_and_licensing(fake_repository, stringio):
     (fake_repository / "foo.py").write_text("foo")
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
-    data = collect_data_from_report(report)
-    result = lint(data, out=stringio)
+    result = lint(report, out=stringio)
 
     assert not result
     assert (

From f697c7375d0c901cdf8903d131b967303ea74166 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Wed, 5 Apr 2023 10:41:24 +0200
Subject: [PATCH 20/60] Fix minor issues in `report.py`

---
 src/reuse/report.py | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/src/reuse/report.py b/src/reuse/report.py
index fda7803f..b5dbbdd4 100644
--- a/src/reuse/report.py
+++ b/src/reuse/report.py
@@ -93,16 +93,10 @@ def to_dict(self):
                 "unused_licenses": [str(f) for f in self.unused_licenses],
                 "deprecated_licenses": [str(f) for f in self.deprecated_licenses],
                 "bad_licenses": self.bad_licenses,
-                "licenses_without_extension": list(
-                    self.licenses_without_extension.values()
-                ),
-                "missing_copyright_info": [
-                    str(f) for f in self.files_without_copyright
-                ],
-                "missing_licensing_info": [
-                    str(f) for f in self.files_without_licenses
-                ],
-                "read_error": [str(f) for f in self.read_errors],
+                "licenses_without_extension": self.licenses_without_extension,
+                "missing_copyright_info": [str(f) for f in self.files_without_copyright],
+                "missing_licensing_info": [str(f) for f in self.files_without_licenses],
+                "read_errors": [str(f) for f in self.read_errors],
             },
             "files": {},
             "summary": {
@@ -111,7 +105,7 @@ def to_dict(self):
         }
 
         # Populate 'files'
-        for file in self.file_reports:
+        for file in self.file_reports.copy():
             copyrights = file.spdxfile.copyright.split("\n")
             data["files"][str(file.path)] = {
                 "copyrights": [
@@ -138,7 +132,7 @@ def to_dict(self):
                 data["non_compliant"]["licenses_without_extension"],
                 data["non_compliant"]["missing_copyright_info"],
                 data["non_compliant"]["missing_licensing_info"],
-                data["non_compliant"]["read_error"],
+                data["non_compliant"]["read_errors"],
             )
         )
         data["summary"] = {
@@ -279,11 +273,13 @@ def generate(
             # File report.
             project_report.file_reports.add(file_report)
 
-            # Bad and missing licenses.
+            # Missing licenses.
             for missing_license in file_report.missing_licenses:
-                project_report.missing_licenses.setdefault(
-                    missing_license, set()
-                ).add(file_report.path)
+                project_report.missing_licenses.setdefault(missing_license, set()).add(
+                    file_report.path
+                )
+
+            # Bad licenses
             for bad_license in file_report.bad_licenses:
                 project_report.bad_licenses.setdefault(bad_license, set()).add(
                     file_report.path

From f20429f4ed23872c92444dacf5d30b19bc19af5e Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Wed, 5 Apr 2023 10:41:55 +0200
Subject: [PATCH 21/60] Fix plaintext output

---
 src/reuse/lint.py | 117 ++++++++++++++++++++++------------------------
 1 file changed, 57 insertions(+), 60 deletions(-)

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index 21af472d..07b2ffc4 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -12,7 +12,7 @@
 import os
 import sys
 from gettext import gettext as _
-from typing import Dict
+from pathlib import PosixPath
 
 from .project import Project
 from .report import ProjectReport
@@ -29,7 +29,6 @@ def add_arguments(parser):
     mutex_group.add_argument("--format", nargs="?", choices=("json", "plain"),
                              help=_("formats output using the chosen formatter"))
 
-
 # pylint: disable=too-many-locals, too-many-branches, too-many-statements
 def format_plain(report: ProjectReport) -> str:
     """Formats data dictionary as plaintext string to be printed to sys.stdout
@@ -42,57 +41,14 @@ def format_plain(report: ProjectReport) -> str:
 
     # If the project is not compliant:
     if not data["summary"]["compliant"]:
-
-        # Missing copyright and licensing information
-        files_without_copyright = set(
-            data["non_compliant"]["missing_copyright_info"]
-        )
-        files_without_license = set(
-            data["non_compliant"]["missing_licensing_info"]
-        )
-        files_without_both = files_without_license.intersection(
-            files_without_copyright
-        )
-
-        header = (
-            "# " + _("MISSING COPYRIGHT AND LICENSING INFORMATION") + "\n\n"
-        )
-        if files_without_both:
-            output += header
-            output += _(
-                "The following files have no copyright and licensing "
-                "information:"
-            )
-            output += "\n"
-            for file in sorted(files_without_both):
-                output += f"* {file}\n"
-            output += "\n\n"
-
-        if files_without_copyright - files_without_both:
-            output += header
-            output += _("The following files have no copyright information:")
-            output += "\n"
-            for file in sorted(files_without_copyright - files_without_both):
-                output += f"* {file}\n"
-            output += "\n\n"
-
-        if files_without_license - files_without_both:
-            output += header
-            output += _("The following files have no licensing information:")
-            output += "\n"
-            for file in sorted(files_without_license - files_without_both):
-                output += f"* {file}\n"
-            output += "\n\n"
-
         # Bad licenses
         bad_licenses = data["non_compliant"]["bad_licenses"]
         if bad_licenses:
             output += "# " + _("BAD LICENSES") + "\n\n"
-            for lic, files in sorted(bad_licenses.items()):
-                output += f"'{lic}' found in:" + "\n"
-                for file in sorted(files):
-                    output += f"* {file}\n"
-            output += "\n\n"
+            for lic in sorted(bad_licenses.keys()):
+                output += _("'{}' found in:").format(lic) + "\n"
+                output += f"* {list(bad_licenses[lic])[0]}" + "\n\n"
+            output += "\n"
 
         # Deprecated licenses
         deprecated_licenses = data["non_compliant"]["deprecated_licenses"]
@@ -110,31 +66,31 @@ def format_plain(report: ProjectReport) -> str:
         if licenses_without_extension:
             output += "# " + _("LICENSES WITHOUT FILE EXTENSION") + "\n\n"
             output += _("The following licenses have no file extension:") + "\n"
-            for path in sorted(licenses_without_extension):
-                output += f"* {str(path)}" + "\n"
+            for lic in sorted(licenses_without_extension):
+                output += f"* {str(licenses_without_extension[lic])}" + "\n"
             output += "\n\n"
 
         # Missing licenses
         missing_licenses = data["non_compliant"]["missing_licenses"]
         if missing_licenses:
             output += "# " + _("MISSING LICENSES") + "\n\n"
-            for lic, files in sorted(missing_licenses.items()):
-                output += f"'{lic}' found in:" + "\n"
-                for file in sorted(files):
+            for lic in zip(missing_licenses.keys()):
+                output += _("'{}' found in:").format(lic[0]) + "\n"
+                for file in missing_licenses[lic[0]]:
                     output += f"* {file}\n"
-            output += "\n"
+            output += "\n\n"
 
         # Unused licenses
         unused_licenses = data["non_compliant"]["unused_licenses"]
         if unused_licenses:
             output += "# " + _("UNUSED LICENSES") + "\n\n"
             output += _("The following licenses are not used:") + "\n"
-            for lic in sorted(deprecated_licenses):
+            for lic in sorted(unused_licenses):
                 output += f"* {lic}\n"
             output += "\n\n"
 
         # Read errors
-        read_errors = data["non_compliant"]["read_error"]
+        read_errors = data["non_compliant"]["read_errors"]
         if read_errors:
             output += "# " + _("READ ERRORS") + "\n\n"
             output += _("Could not read:") + "\n"
@@ -142,6 +98,46 @@ def format_plain(report: ProjectReport) -> str:
                 output += f"* {str(path)}" + "\n"
             output += "\n\n"
 
+        # Missing copyright and licensing information
+        files_without_copyright = set(
+            data["non_compliant"]["missing_copyright_info"]
+        )
+        files_without_license = set(
+            data["non_compliant"]["missing_licensing_info"]
+        )
+        files_without_both = files_without_license.intersection(
+            files_without_copyright
+        )
+
+        header = (
+            "# " + _("MISSING COPYRIGHT AND LICENSING INFORMATION") + "\n\n"
+        )
+        output += header
+        if files_without_both:
+            output += _(
+                "The following files have no copyright and licensing "
+                "information:"
+            )
+            output += "\n"
+            for file in sorted(files_without_both):
+                output += f"* {file}\n"
+            output += "\n"
+
+        if files_without_copyright - files_without_both:
+            output += _("The following files have no copyright information:")
+            output += "\n"
+            for file in sorted(files_without_copyright - files_without_both):
+                output += f"* {file}\n"
+            output += "\n"
+
+        if files_without_license - files_without_both:
+            output += _("The following files have no licensing information:")
+            output += "\n"
+            for file in sorted(files_without_license - files_without_both):
+                output += f"* {file}\n"
+            output += "\n"
+
+    output += "\n"
     output += "# " + _("SUMMARY")
     output += "\n\n"
 
@@ -155,7 +151,7 @@ def format_plain(report: ProjectReport) -> str:
             _("Licenses without file extension:"),
             ", ".join(
                 [
-                    file.parts[-1]
+                    file
                     for file in data["non_compliant"][
                         "licenses_without_extension"
                     ]
@@ -173,7 +169,7 @@ def format_plain(report: ProjectReport) -> str:
         (_("Used licenses:"), ", ".join(data["summary"]["used_licenses"])),
         (
             _("Read errors: {count}").format(
-                count=len(data["non_compliant"]["read_error"])
+                count=len(data["non_compliant"]["read_errors"])
             ),
             "empty",
         ),
@@ -200,6 +196,7 @@ def format_plain(report: ProjectReport) -> str:
             value = ""
         output += "* " + key + " " + value + "\n"
 
+    output += "\n"
     if data["summary"]["compliant"]:
         output += _(
             "Congratulations! Your project is compliant with version"
@@ -210,8 +207,8 @@ def format_plain(report: ProjectReport) -> str:
             "Unfortunately, your project is not compliant with version "
             "{} of the REUSE Specification :-("
         ).format(data["reuse_version"])
-
     output += "\n"
+
     return output
 
 

From 1b2bd90d059618edbe6f5ba26b4cd28a3bf8ed30 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Wed, 5 Apr 2023 10:42:03 +0200
Subject: [PATCH 22/60] Add custom JSON serializer

---
 src/reuse/lint.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index 07b2ffc4..71ddbb73 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -219,11 +219,24 @@ def format_json(report: ProjectReport) -> str:
     :return: String (representing JSON) that can be output to sys.stdout
     """
 
+    output = report.to_dict()
+
+    def custom_serializer(obj):
+        """Custom serializer for the dictionary output of ProjectReport
+
+        :param obj: Object to be serialized
+        """
+        if isinstance(obj, PosixPath):
+            return str(obj)
+        if isinstance(obj, set):
+            return list(obj)
+        raise TypeError(f"Object of type {obj.__class__.__name__} is not JSON serializable")
+
     return json.dumps(
         report.to_dict(),
         indent=2,
         # Serialize sets to lists
-        default=lambda x: list(x) if isinstance(x, set) else x,
+        default=custom_serializer
     )
 
 

From d98d162715ed0710362a7b9243af24cc5f311610 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Wed, 5 Apr 2023 11:18:45 +0200
Subject: [PATCH 23/60] Adapt `SpdxInfo` namedtuple to contain source path

---
 src/reuse/__init__.py |  3 ++-
 src/reuse/_util.py    |  5 +++--
 src/reuse/header.py   |  3 ++-
 src/reuse/project.py  |  8 +++++---
 tests/test_header.py  | 36 ++++++++++++++++++------------------
 tests/test_util.py    |  2 +-
 6 files changed, 31 insertions(+), 26 deletions(-)

diff --git a/src/reuse/__init__.py b/src/reuse/__init__.py
index 70b09745..dd770bae 100644
--- a/src/reuse/__init__.py
+++ b/src/reuse/__init__.py
@@ -20,7 +20,7 @@
 import os
 import re
 from dataclasses import dataclass, field
-from typing import NamedTuple, Set
+from typing import NamedTuple, Optional, Set
 
 try:
     from importlib.metadata import PackageNotFoundError, version
@@ -95,6 +95,7 @@ class SpdxInfo:
     spdx_expressions: Set[Expression] = field(default_factory=set)
     copyright_lines: Set[str] = field(default_factory=set)
     contributor_lines: Set[str] = field(default_factory=set)
+    license_path: Optional[str] = None
 
     def __bool__(self):
         return any(self.__dict__.values())
diff --git a/src/reuse/_util.py b/src/reuse/_util.py
index b50fb080..19780a3b 100644
--- a/src/reuse/_util.py
+++ b/src/reuse/_util.py
@@ -208,11 +208,12 @@ def _copyright_from_dep5(path: PathLike, dep5_copyright: Copyright) -> SpdxInfo:
     result = dep5_copyright.find_files_paragraph(Path(path).as_posix())
 
     if result is None:
-        return SpdxInfo(set(), set())
+        return SpdxInfo(set(), set(), str(path))
 
     return SpdxInfo(
         set(map(_LICENSING.parse, [result.license.synopsis])),
         set(map(str.strip, result.copyright.splitlines())),
+        str(path)
     )
 
 
@@ -316,7 +317,7 @@ def extract_spdx_info(text: str) -> SpdxInfo:
                 copyright_matches.add(match.groupdict()["copyright"].strip())
                 break
 
-    return SpdxInfo(expressions, copyright_matches)
+    return SpdxInfo(expressions, copyright_matches, "")
 
 
 def find_license_identifiers(text: str) -> Iterator[str]:
diff --git a/src/reuse/header.py b/src/reuse/header.py
index 520fcaf2..8454c283 100644
--- a/src/reuse/header.py
+++ b/src/reuse/header.py
@@ -178,6 +178,7 @@ def create_header(
             spdx_info.spdx_expressions.union(existing_spdx.spdx_expressions),
             spdx_copyrights,
             spdx_info.contributor_lines.union(existing_spdx.contributor_lines),
+            "",
         )
 
     new_header += _create_new_header(
@@ -784,7 +785,7 @@ def run(args, project: Project, out=sys.stdout) -> int:
         set(args.contributor) if args.contributor is not None else set()
     )
 
-    spdx_info = SpdxInfo(expressions, copyright_lines, contributors)
+    spdx_info = SpdxInfo(expressions, copyright_lines, contributors, "")
 
     result = 0
     for path in paths:
diff --git a/src/reuse/project.py b/src/reuse/project.py
index 7801a129..b47175fa 100644
--- a/src/reuse/project.py
+++ b/src/reuse/project.py
@@ -144,13 +144,14 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo:
         """Return SPDX info of *path*.
 
         This function will return any SPDX information that it can find, both
-        from within the file and from the .reuse/dep5 file.
+        from within the file, the .license file and from the .reuse/dep5 file.
         """
         path = _determine_license_path(path)
         _LOGGER.debug(f"searching '{path}' for SPDX information")
 
-        dep5_result = SpdxInfo(set(), set())
-        file_result = SpdxInfo(set(), set())
+        # NOTE This means that only one 'source' of licensing/copyright information is captured in SpdxInfo
+        dep5_result = SpdxInfo(set(), set(), "")
+        file_result = SpdxInfo(set(), set(), "")
 
         # Search the .reuse/dep5 file for SPDX information.
         if self._copyright:
@@ -188,6 +189,7 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo:
         return SpdxInfo(
             dep5_result.spdx_expressions.union(file_result.spdx_expressions),
             dep5_result.copyright_lines.union(file_result.copyright_lines),
+            str(path)
         )
 
     def relative_from_root(self, path: Path) -> Path:
diff --git a/tests/test_header.py b/tests/test_header.py
index c97d4144..5eb3e80b 100644
--- a/tests/test_header.py
+++ b/tests/test_header.py
@@ -24,7 +24,7 @@
 def test_create_header_simple():
     """Create a super simple header."""
     spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}
+        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, ""
     )
     expected = cleandoc(
         """
@@ -57,7 +57,7 @@ def test_create_header_simple_with_contributor():
 def test_create_header_template_simple(template_simple):
     """Create a header with a simple template."""
     spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}
+        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, ""
     )
     expected = cleandoc(
         """
@@ -77,7 +77,7 @@ def test_create_header_template_simple(template_simple):
 def test_create_header_template_no_spdx(template_no_spdx):
     """Create a header with a template that does not have all SPDX info."""
     spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}
+        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, ""
     )
 
     with pytest.raises(MissingSpdxInfo):
@@ -87,7 +87,7 @@ def test_create_header_template_no_spdx(template_no_spdx):
 def test_create_header_template_commented(template_commented):
     """Create a header with an already-commented template."""
     spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}
+        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, ""
     )
     expected = cleandoc(
         """
@@ -113,7 +113,7 @@ def test_create_header_template_commented(template_commented):
 def test_create_header_already_contains_spdx():
     """Create a new header from a header that already contains SPDX info."""
     spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}
+        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, ""
     )
     existing = cleandoc(
         """
@@ -138,7 +138,7 @@ def test_create_header_already_contains_spdx():
 def test_create_header_existing_is_wrong():
     """If the existing header contains errors, raise a CommentCreateError."""
     spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}
+        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, ""
     )
     existing = cleandoc(
         """
@@ -154,7 +154,7 @@ def test_create_header_existing_is_wrong():
 
 def test_create_header_old_syntax():
     """Old copyright syntax is preserved when creating a new header."""
-    spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set())
+    spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set(), "")
     existing = cleandoc(
         """
         # Copyright John Doe
@@ -173,7 +173,7 @@ def test_create_header_old_syntax():
 
 def test_create_header_remove_fluff():
     """Any stuff that isn't SPDX info is removed when using create_header."""
-    spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set())
+    spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set(), "")
     existing = cleandoc(
         """
         # SPDX-FileCopyrightText: John Doe
@@ -199,7 +199,7 @@ def test_add_new_header_simple():
     the old one.
     """
     spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}
+        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, ""
     )
     text = cleandoc(
         """
@@ -229,7 +229,7 @@ def test_add_new_header_simple():
 def test_find_and_replace_no_header():
     """Given text without header, add a header."""
     spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}
+        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, ""
     )
     text = "pass"
     expected = cleandoc(
@@ -251,7 +251,7 @@ def test_find_and_replace_no_header():
 
 def test_find_and_replace_verbatim():
     """Replace a header with itself."""
-    spdx_info = SpdxInfo(set(), set())
+    spdx_info = SpdxInfo(set(), set(), "")
     text = cleandoc(
         """
         # SPDX-FileCopyrightText: Jane Doe
@@ -270,7 +270,7 @@ def test_find_and_replace_newline_before_header():
     preceding whitespace.
     """
     spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"}
+        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"}, ""
     )
     text = cleandoc(
         """
@@ -297,7 +297,7 @@ def test_find_and_replace_newline_before_header():
 def test_find_and_replace_preserve_preceding():
     """When the SPDX header is in the middle of the file, keep it there."""
     spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"}
+        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"}, ""
     )
     text = cleandoc(
         """
@@ -335,7 +335,7 @@ def test_find_and_replace_keep_shebang():
     it.
     """
     spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"}
+        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"}, ""
     )
     text = cleandoc(
         """
@@ -366,7 +366,7 @@ def test_find_and_replace_separate_shebang():
     """When the shebang is part of the same comment as the SPDX comment,
     separate the two.
     """
-    spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set())
+    spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set(), "")
     text = cleandoc(
         """
         #!/usr/bin/env python3
@@ -394,7 +394,7 @@ def test_find_and_replace_separate_shebang():
 
 def test_find_and_replace_only_shebang():
     """When the file only contains a shebang, keep it at the top of the file."""
-    spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set())
+    spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set(), "")
     text = cleandoc(
         """
         #!/usr/bin/env python3
@@ -424,7 +424,7 @@ def test_find_and_replace_keep_old_comment():
     licensing information, preserve it below the REUSE header.
     """
     spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}
+        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, ""
     )
     text = cleandoc(
         """
@@ -451,7 +451,7 @@ def test_find_and_replace_keep_old_comment():
 def test_find_and_replace_preserve_newline():
     """If the file content ends with a newline, don't remove it."""
 
-    spdx_info = SpdxInfo(set(), set())
+    spdx_info = SpdxInfo(set(), set(), "")
     text = (
         cleandoc(
             """
diff --git a/tests/test_util.py b/tests/test_util.py
index efa09b63..49011e65 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -78,7 +78,7 @@ def test_extract_no_info():
     object.
     """
     result = _util.extract_spdx_info("")
-    assert result == _util.SpdxInfo(set(), set())
+    assert result == _util.SpdxInfo(set(), set(), "")
 
 
 def test_extract_tab():

From 9c106f14f3b8335e78004326227796ca64c18b99 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Wed, 5 Apr 2023 11:19:48 +0200
Subject: [PATCH 24/60] Remove useless line

---
 src/reuse/lint.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index 71ddbb73..9bac4e6a 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -219,8 +219,6 @@ def format_json(report: ProjectReport) -> str:
     :return: String (representing JSON) that can be output to sys.stdout
     """
 
-    output = report.to_dict()
-
     def custom_serializer(obj):
         """Custom serializer for the dictionary output of ProjectReport
 

From 9beac70726695768ae642ccd757ad8ea95374e61 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Wed, 5 Apr 2023 11:56:41 +0200
Subject: [PATCH 25/60] Actually return license/copyright source path

---
 src/reuse/project.py | 7 +++++++
 src/reuse/report.py  | 6 ++----
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/reuse/project.py b/src/reuse/project.py
index b47175fa..f589b3cb 100644
--- a/src/reuse/project.py
+++ b/src/reuse/project.py
@@ -145,10 +145,14 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo:
 
         This function will return any SPDX information that it can find, both
         from within the file, the .license file and from the .reuse/dep5 file.
+
+        It also returns a single primary source path of the license/copyright
+        information, where 'primary' means '.license file' > 'header' > 'dep5'
         """
         path = _determine_license_path(path)
         _LOGGER.debug(f"searching '{path}' for SPDX information")
 
+        license_path = ""
         # NOTE This means that only one 'source' of licensing/copyright information is captured in SpdxInfo
         dep5_result = SpdxInfo(set(), set(), "")
         file_result = SpdxInfo(set(), set(), "")
@@ -162,6 +166,7 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo:
                 _LOGGER.info(
                     _("'{path}' covered by .reuse/dep5").format(path=path)
                 )
+                license_path = ".reuse/dep5"
 
         # Search the file for SPDX information.
         with path.open("rb") as fp:
@@ -178,6 +183,8 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo:
                 file_result = extract_spdx_info(
                     decoded_text_from_binary(fp, size=read_limit)
                 )
+                if any(file_result):
+                    license_path = str(path)
             except (ExpressionError, ParseError):
                 _LOGGER.error(
                     _(
diff --git a/src/reuse/report.py b/src/reuse/report.py
index b5dbbdd4..76a2385c 100644
--- a/src/reuse/report.py
+++ b/src/reuse/report.py
@@ -109,13 +109,11 @@ def to_dict(self):
             copyrights = file.spdxfile.copyright.split("\n")
             data["files"][str(file.path)] = {
                 "copyrights": [
-                    # TODO Find correct source file for copyrights info
-                    {"value": cop, "source": file.spdxfile.name}
+                    {"value": cop, "source": file.spdxfile.info.license_path}
                     for cop in copyrights
                 ],
                 "licenses": [
-                    # TODO Find correct source file for licensing info
-                    {"value": lic, "source": file.spdxfile.name}
+                    {"value": lic, "source": file.spdxfile.info.license_path}
                     for lic in file.spdxfile.licenses_in_file
                 ],
             }

From c43c91fbc5afacd0a745b663e9f1f2b97aea837b Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Wed, 5 Apr 2023 12:07:44 +0200
Subject: [PATCH 26/60] Determine license source correctly

---
 src/reuse/project.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/reuse/project.py b/src/reuse/project.py
index f589b3cb..fa85d87a 100644
--- a/src/reuse/project.py
+++ b/src/reuse/project.py
@@ -193,10 +193,12 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo:
                     ).format(path=path)
                 )
 
+        spdx_expressions = dep5_result.spdx_expressions.union(file_result.spdx_expressions)
+        copyright_lines = dep5_result.copyright_lines.union(file_result.copyright_lines)
         return SpdxInfo(
-            dep5_result.spdx_expressions.union(file_result.spdx_expressions),
-            dep5_result.copyright_lines.union(file_result.copyright_lines),
-            str(path)
+            spdx_expressions,
+            copyright_lines,
+            license_path
         )
 
     def relative_from_root(self, path: Path) -> Path:

From 6fc5ec118c05fd15845d0bcb53ad52bdd36600ee Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Wed, 5 Apr 2023 12:08:05 +0200
Subject: [PATCH 27/60] Allow returning empty arrays

---
 src/reuse/report.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/reuse/report.py b/src/reuse/report.py
index 76a2385c..2ecfbeac 100644
--- a/src/reuse/report.py
+++ b/src/reuse/report.py
@@ -110,11 +110,11 @@ def to_dict(self):
             data["files"][str(file.path)] = {
                 "copyrights": [
                     {"value": cop, "source": file.spdxfile.info.license_path}
-                    for cop in copyrights
+                    for cop in copyrights if cop
                 ],
                 "licenses": [
                     {"value": lic, "source": file.spdxfile.info.license_path}
-                    for lic in file.spdxfile.licenses_in_file
+                    for lic in file.spdxfile.licenses_in_file if lic
                 ],
             }
 
@@ -364,6 +364,7 @@ def __init__(self, name, spdx_id=None, chk_sum=None):
         self.licenses_in_file: List[str] = []
         self.license_concluded: str = None
         self.copyright: str = None
+        self.info: SpdxInfo = None
 
 
 class FileReport:
@@ -462,7 +463,7 @@ def generate(
 
         # Copyright text
         report.spdxfile.copyright = "\n".join(sorted(spdx_info.copyright_lines))
-
+        report.spdxfile.info = spdx_info
         return report
 
     def __hash__(self):

From 3ad1fe38fadc61469113f44fe1aa215d025f5c48 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Thu, 6 Apr 2023 08:10:58 +0200
Subject: [PATCH 28/60] Satisfy black

---
 src/reuse/_util.py   |  2 +-
 src/reuse/lint.py    | 35 ++++++++++++++++++++-------------
 src/reuse/project.py | 16 ++++++++-------
 src/reuse/report.py  | 47 +++++++++++++++++++++++++-------------------
 4 files changed, 58 insertions(+), 42 deletions(-)

diff --git a/src/reuse/_util.py b/src/reuse/_util.py
index 19780a3b..bb60a924 100644
--- a/src/reuse/_util.py
+++ b/src/reuse/_util.py
@@ -213,7 +213,7 @@ def _copyright_from_dep5(path: PathLike, dep5_copyright: Copyright) -> SpdxInfo:
     return SpdxInfo(
         set(map(_LICENSING.parse, [result.license.synopsis])),
         set(map(str.strip, result.copyright.splitlines())),
-        str(path)
+        str(path),
     )
 
 
diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index 9bac4e6a..b1296438 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -24,10 +24,22 @@ def add_arguments(parser):
         "-q", "--quiet", action="store_true", help=_("prevents output")
     )
     mutex_group = parser.add_mutually_exclusive_group()
-    mutex_group.add_argument("-j", "--json", action="store_true", help=_("formats output as JSON"))
-    mutex_group.add_argument("-p", "--plain", action="store_true", help=_("formats output as plain text"))
-    mutex_group.add_argument("--format", nargs="?", choices=("json", "plain"),
-                             help=_("formats output using the chosen formatter"))
+    mutex_group.add_argument(
+        "-j", "--json", action="store_true", help=_("formats output as JSON")
+    )
+    mutex_group.add_argument(
+        "-p",
+        "--plain",
+        action="store_true",
+        help=_("formats output as plain text"),
+    )
+    mutex_group.add_argument(
+        "--format",
+        nargs="?",
+        choices=("json", "plain"),
+        help=_("formats output using the chosen formatter"),
+    )
+
 
 # pylint: disable=too-many-locals, too-many-branches, too-many-statements
 def format_plain(report: ProjectReport) -> str:
@@ -149,14 +161,7 @@ def format_plain(report: ProjectReport) -> str:
         ),
         (
             _("Licenses without file extension:"),
-            ", ".join(
-                [
-                    file
-                    for file in data["non_compliant"][
-                        "licenses_without_extension"
-                    ]
-                ]
-            ),
+            ", ".join(data["non_compliant"]["licenses_without_extension"]),
         ),
         (
             _("Missing licenses:"),
@@ -228,13 +233,15 @@ def custom_serializer(obj):
             return str(obj)
         if isinstance(obj, set):
             return list(obj)
-        raise TypeError(f"Object of type {obj.__class__.__name__} is not JSON serializable")
+        raise TypeError(
+            f"Object of type {obj.__class__.__name__} is not JSON serializable"
+        )
 
     return json.dumps(
         report.to_dict(),
         indent=2,
         # Serialize sets to lists
-        default=custom_serializer
+        default=custom_serializer,
     )
 
 
diff --git a/src/reuse/project.py b/src/reuse/project.py
index fa85d87a..2f93556e 100644
--- a/src/reuse/project.py
+++ b/src/reuse/project.py
@@ -153,7 +153,9 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo:
         _LOGGER.debug(f"searching '{path}' for SPDX information")
 
         license_path = ""
-        # NOTE This means that only one 'source' of licensing/copyright information is captured in SpdxInfo
+
+        # This means that only one 'source' of licensing/copyright information
+        # is captured in SpdxInfo
         dep5_result = SpdxInfo(set(), set(), "")
         file_result = SpdxInfo(set(), set(), "")
 
@@ -193,13 +195,13 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo:
                     ).format(path=path)
                 )
 
-        spdx_expressions = dep5_result.spdx_expressions.union(file_result.spdx_expressions)
-        copyright_lines = dep5_result.copyright_lines.union(file_result.copyright_lines)
-        return SpdxInfo(
-            spdx_expressions,
-            copyright_lines,
-            license_path
+        spdx_expressions = dep5_result.spdx_expressions.union(
+            file_result.spdx_expressions
+        )
+        copyright_lines = dep5_result.copyright_lines.union(
+            file_result.copyright_lines
         )
+        return SpdxInfo(spdx_expressions, copyright_lines, license_path)
 
     def relative_from_root(self, path: Path) -> Path:
         """If the project root is /tmp/project, and *path* is
diff --git a/src/reuse/report.py b/src/reuse/report.py
index 2ecfbeac..d7e7b57d 100644
--- a/src/reuse/report.py
+++ b/src/reuse/report.py
@@ -18,9 +18,9 @@
 from typing import Iterable, List, NamedTuple, Optional, Set
 from uuid import uuid4
 
-from . import __version__, __REUSE_version__
+from . import __REUSE_version__, __version__
 from ._util import _LICENSING, _checksum
-from .project import Project
+from .project import Project, SpdxInfo
 
 _LOGGER = logging.getLogger(__name__)
 
@@ -82,7 +82,7 @@ def to_dict(self):
         """Collects and formats data from report and returns it as a dictionary
 
         :param report: ProjectReport object
-        :return: Formatted dictionary containing data from the ProjectReport object
+        :return: Dictionary containing data from the ProjectReport object
         """
         # Setup report data container
         data = {
@@ -90,13 +90,19 @@ def to_dict(self):
             "reuse_version": __REUSE_version__,
             "non_compliant": {
                 "missing_licenses": self.missing_licenses,
-                "unused_licenses": [str(f) for f in self.unused_licenses],
-                "deprecated_licenses": [str(f) for f in self.deprecated_licenses],
+                "unused_licenses": [str(file) for file in self.unused_licenses],
+                "deprecated_licenses": [
+                    str(file) for file in self.deprecated_licenses
+                ],
                 "bad_licenses": self.bad_licenses,
                 "licenses_without_extension": self.licenses_without_extension,
-                "missing_copyright_info": [str(f) for f in self.files_without_copyright],
-                "missing_licensing_info": [str(f) for f in self.files_without_licenses],
-                "read_errors": [str(f) for f in self.read_errors],
+                "missing_copyright_info": [
+                    str(file) for file in self.files_without_copyright
+                ],
+                "missing_licensing_info": [
+                    str(file) for file in self.files_without_licenses
+                ],
+                "read_errors": [str(file) for file in self.read_errors],
             },
             "files": {},
             "summary": {
@@ -110,11 +116,13 @@ def to_dict(self):
             data["files"][str(file.path)] = {
                 "copyrights": [
                     {"value": cop, "source": file.spdxfile.info.license_path}
-                    for cop in copyrights if cop
+                    for cop in copyrights
+                    if cop
                 ],
                 "licenses": [
                     {"value": lic, "source": file.spdxfile.info.license_path}
-                    for lic in file.spdxfile.licenses_in_file if lic
+                    for lic in file.spdxfile.licenses_in_file
+                    if lic
                 ],
             }
 
@@ -137,9 +145,9 @@ def to_dict(self):
             "used_licenses": list(self.used_licenses),
             "files_total": number_of_files,
             "files_with_copyright_info": number_of_files
-                                         - len(self.files_without_copyright),
+            - len(self.files_without_copyright),
             "files_with_licensing_info": number_of_files
-                                         - len(self.files_without_licenses),
+            - len(self.files_without_licenses),
             "compliant": is_compliant,
         }
         return data
@@ -163,8 +171,7 @@ def bill_of_materials(
         # TODO: Generate UUID from git revision maybe
         # TODO: Fix the URL
         out.write(
-            f"DocumentNamespace:"
-            f" http://spdx.org/spdxdocs/spdx-v2.1-{uuid4()}\n"
+            f"DocumentNamespace: http://spdx.org/spdxdocs/spdx-v2.1-{uuid4()}\n"
         )
 
         # Author
@@ -187,7 +194,7 @@ def bill_of_materials(
 
         for report in reports:
             out.write(
-                f"Relationship: SPDXRef-DOCUMENT describes"
+                "Relationship: SPDXRef-DOCUMENT describes"
                 f" {report.spdxfile.spdx_id}\n"
             )
 
@@ -204,7 +211,7 @@ def bill_of_materials(
                 out.write(f"LicenseInfoInFile: {lic}\n")
             if report.spdxfile.copyright:
                 out.write(
-                    f"FileCopyrightText:"
+                    "FileCopyrightText:"
                     f" <text>{report.spdxfile.copyright}</text>\n"
                 )
             else:
@@ -273,9 +280,9 @@ def generate(
 
             # Missing licenses.
             for missing_license in file_report.missing_licenses:
-                project_report.missing_licenses.setdefault(missing_license, set()).add(
-                    file_report.path
-                )
+                project_report.missing_licenses.setdefault(
+                    missing_license, set()
+                ).add(file_report.path)
 
             # Bad licenses
             for bad_license in file_report.bad_licenses:
@@ -423,7 +430,6 @@ def generate(
         report.spdxfile.spdx_id = f"SPDXRef-{spdx_id.hexdigest()}"
 
         spdx_info = project.spdx_info_of(path)
-        # TODO Return source of licensing and copyright info together with SPDX info. Depends on #669
         for expression in spdx_info.spdx_expressions:
             for identifier in _LICENSING.license_keys(expression):
                 # A license expression akin to Apache-1.0+ should register
@@ -463,6 +469,7 @@ def generate(
 
         # Copyright text
         report.spdxfile.copyright = "\n".join(sorted(spdx_info.copyright_lines))
+        # Source of licensing and copyright info
         report.spdxfile.info = spdx_info
         return report
 

From 2fcc8ebfd28f189aefaf628f4d0c120c797bf4a1 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Thu, 6 Apr 2023 08:21:23 +0200
Subject: [PATCH 29/60] Ensure consistency of the JSON output

---
 src/reuse/lint.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index b1296438..cf9544d8 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -242,6 +242,7 @@ def custom_serializer(obj):
         indent=2,
         # Serialize sets to lists
         default=custom_serializer,
+        sort_keys=True,
     )
 
 

From d50d15e9bd9e92e77fa218d551d2777b9cf35087 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Thu, 6 Apr 2023 08:25:52 +0200
Subject: [PATCH 30/60] Ensure consistency of the plaintext output

---
 src/reuse/lint.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index cf9544d8..356784ed 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -86,9 +86,9 @@ def format_plain(report: ProjectReport) -> str:
         missing_licenses = data["non_compliant"]["missing_licenses"]
         if missing_licenses:
             output += "# " + _("MISSING LICENSES") + "\n\n"
-            for lic in zip(missing_licenses.keys()):
-                output += _("'{}' found in:").format(lic[0]) + "\n"
-                for file in missing_licenses[lic[0]]:
+            for lic in sorted(missing_licenses.keys()):
+                output += _("'{}' found in:").format(lic) + "\n"
+                for file in sorted(missing_licenses[lic]):
                     output += f"* {file}\n"
             output += "\n\n"
 

From fbf6e534ef96bf2ed45129b289678e3517f4d965 Mon Sep 17 00:00:00 2001
From: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
Date: Mon, 10 Apr 2023 18:21:40 +0200
Subject: [PATCH 31/60] Move logic to FileReport.to_dict and rename method to
 to_dict_lint

Signed-off-by: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
---
 src/reuse/lint.py    |  6 +++---
 src/reuse/report.py  | 47 +++++++++++++++++++++-----------------------
 tests/test_report.py |  4 ++--
 3 files changed, 27 insertions(+), 30 deletions(-)

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index 356784ed..a0d568b1 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -49,7 +49,7 @@ def format_plain(report: ProjectReport) -> str:
     :return: String (in plaintext) that can be output to sys.stdout
     """
     output = ""
-    data = report.to_dict()
+    data = report.to_dict_lint()
 
     # If the project is not compliant:
     if not data["summary"]["compliant"]:
@@ -238,7 +238,7 @@ def custom_serializer(obj):
         )
 
     return json.dumps(
-        report.to_dict(),
+        report.to_dict_lint(),
         indent=2,
         # Serialize sets to lists
         default=custom_serializer,
@@ -256,7 +256,7 @@ def lint(report: ProjectReport, formatter=format_plain, out=sys.stdout) -> bool:
 
     out.write(formatter(report))
 
-    data = report.to_dict()
+    data = report.to_dict_lint()
     result = data["summary"]["compliant"]
     return result
 
diff --git a/src/reuse/report.py b/src/reuse/report.py
index d7e7b57d..982afaa0 100644
--- a/src/reuse/report.py
+++ b/src/reuse/report.py
@@ -78,10 +78,10 @@ def __init__(self, do_checksum: bool = True):
         self._files_without_licenses = None
         self._files_without_copyright = None
 
-    def to_dict(self):
-        """Collects and formats data from report and returns it as a dictionary
+    def to_dict_lint(self):
+        """Collects and formats data relevant to linting from report and returns
+        it as a dictionary.
 
-        :param report: ProjectReport object
         :return: Dictionary containing data from the ProjectReport object
         """
         # Setup report data container
@@ -104,27 +104,15 @@ def to_dict(self):
                 ],
                 "read_errors": [str(file) for file in self.read_errors],
             },
-            "files": {},
+            "files": [],
             "summary": {
                 "used_licenses": [],
             },
         }
 
         # Populate 'files'
-        for file in self.file_reports.copy():
-            copyrights = file.spdxfile.copyright.split("\n")
-            data["files"][str(file.path)] = {
-                "copyrights": [
-                    {"value": cop, "source": file.spdxfile.info.license_path}
-                    for cop in copyrights
-                    if cop
-                ],
-                "licenses": [
-                    {"value": lic, "source": file.spdxfile.info.license_path}
-                    for lic in file.spdxfile.licenses_in_file
-                    if lic
-                ],
-            }
+        for file_report in self.file_reports:
+            data["files"].append(file_report.to_dict_lint())
 
         # Populate 'summary'
         number_of_files = len(self.file_reports)
@@ -389,15 +377,24 @@ def __init__(
         self.bad_licenses = set()
         self.missing_licenses = set()
 
-    def to_dict(self):
-        """Turn the report into a json-like dictionary."""
+    def to_dict_lint(self):
+        """Turn the report into a json-like dictionary with exclusively
+        information relevant for linting.
+        """
         return {
             "path": str(Path(self.path).resolve()),
-            "name": self.spdxfile.name,
-            "spdx_id": self.spdxfile.spdx_id,
-            "chk_sum": self.spdxfile.chk_sum,
-            "licenses_in_file": sorted(self.spdxfile.licenses_in_file),
-            "copyright": self.spdxfile.copyright,
+            # TODO: Why does every copyright line have the same source?
+            "copyrights": [
+                {"value": copyright_, "source": self.spdxfile.info.license_path}
+                for copyright_ in self.spdxfile.copyright.split("\n")
+                if copyright_
+            ],
+            # TODO: Why does every license expression have the same source?
+            "licenses": [
+                {"value": license_, "source": self.spdxfile.info.license_path}
+                for license_ in self.spdxfile.licenses_in_file
+                if license_
+            ],
         }
 
     @classmethod
diff --git a/tests/test_report.py b/tests/test_report.py
index 1ff0da58..5a5c77c4 100644
--- a/tests/test_report.py
+++ b/tests/test_report.py
@@ -378,12 +378,12 @@ def test_generate_project_report_read_error(fake_repository, multiprocessing):
     assert (fake_repository / "bad") in result.read_errors
 
 
-def test_generate_project_report_to_dict(fake_repository, multiprocessing):
+def test_generate_project_report_to_dict_lint(fake_repository, multiprocessing):
     """Extremely simple test for ProjectReport.to_dict."""
     project = Project(fake_repository)
     report = ProjectReport.generate(project, multiprocessing=multiprocessing)
     # TODO: Actually do something
-    report.to_dict()
+    report.to_dict_lint()
 
 
 def test_bill_of_materials(fake_repository, multiprocessing):

From bd5946573a3a309eb0d92d77e54247266e2a3c65 Mon Sep 17 00:00:00 2001
From: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
Date: Mon, 10 Apr 2023 18:32:23 +0200
Subject: [PATCH 32/60] Create ProjectReport.is_compliant

Signed-off-by: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
---
 src/reuse/report.py | 37 +++++++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/src/reuse/report.py b/src/reuse/report.py
index 982afaa0..63c12512 100644
--- a/src/reuse/report.py
+++ b/src/reuse/report.py
@@ -77,6 +77,7 @@ def __init__(self, do_checksum: bool = True):
         self._used_licenses = None
         self._files_without_licenses = None
         self._files_without_copyright = None
+        self._is_compliant = None
 
     def to_dict_lint(self):
         """Collects and formats data relevant to linting from report and returns
@@ -116,19 +117,6 @@ def to_dict_lint(self):
 
         # Populate 'summary'
         number_of_files = len(self.file_reports)
-        is_compliant = not any(
-            any(result)
-            for result in (
-                data["non_compliant"]["missing_licenses"],
-                data["non_compliant"]["unused_licenses"],
-                data["non_compliant"]["bad_licenses"],
-                data["non_compliant"]["deprecated_licenses"],
-                data["non_compliant"]["licenses_without_extension"],
-                data["non_compliant"]["missing_copyright_info"],
-                data["non_compliant"]["missing_licensing_info"],
-                data["non_compliant"]["read_errors"],
-            )
-        )
         data["summary"] = {
             "used_licenses": list(self.used_licenses),
             "files_total": number_of_files,
@@ -136,7 +124,7 @@ def to_dict_lint(self):
             - len(self.files_without_copyright),
             "files_with_licensing_info": number_of_files
             - len(self.files_without_licenses),
-            "compliant": is_compliant,
+            "compliant": self.is_compliant,
         }
         return data
 
@@ -346,6 +334,27 @@ def files_without_copyright(self) -> Iterable[PathLike]:
 
         return self._files_without_copyright
 
+    @property
+    def is_compliant(self) -> bool:
+        """Whether the report is compliant with the REUSE Spec."""
+        if self._is_compliant is not None:
+            return self._is_compliant
+
+        self._is_compliant = not any(
+            (
+                self.missing_licenses,
+                self.unused_licenses,
+                self.bad_licenses,
+                self.deprecated_licenses,
+                self.licenses_without_extension,
+                self.files_without_copyright,
+                self.files_without_licenses,
+                self.read_errors,
+            )
+        )
+
+        return self._is_compliant
+
 
 class _File:  # pylint: disable=too-few-public-methods
     """Represent an SPDX file. Sufficiently enough for our purposes, in any

From 8f3b5d0d33feda9ff19baee0f9e18c5c4db1c2ad Mon Sep 17 00:00:00 2001
From: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
Date: Mon, 10 Apr 2023 18:33:36 +0200
Subject: [PATCH 33/60] Rename json_version to lint_version

Signed-off-by: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
---
 src/reuse/report.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/reuse/report.py b/src/reuse/report.py
index 63c12512..9704c2a3 100644
--- a/src/reuse/report.py
+++ b/src/reuse/report.py
@@ -87,7 +87,7 @@ def to_dict_lint(self):
         """
         # Setup report data container
         data = {
-            "json_version": "1.0",
+            "lint_version": "1.0",
             "reuse_version": __REUSE_version__,
             "non_compliant": {
                 "missing_licenses": self.missing_licenses,

From 8feed9e53dd2f669603dc253e79c9306d50ae386 Mon Sep 17 00:00:00 2001
From: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
Date: Mon, 10 Apr 2023 19:07:17 +0200
Subject: [PATCH 34/60] Remove superfluous function

Signed-off-by: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
---
 src/reuse/lint.py | 19 ++-----------------
 1 file changed, 2 insertions(+), 17 deletions(-)

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index a0d568b1..9a5751d0 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -246,21 +246,6 @@ def custom_serializer(obj):
     )
 
 
-def lint(report: ProjectReport, formatter=format_plain, out=sys.stdout) -> bool:
-    """Lints the entire project
-
-    :param report: Dictionary holding formatted ProjectReport data
-    :param formatter: Callable that formats the data dictionary
-    :param out: Where to output
-    """
-
-    out.write(formatter(report))
-
-    data = report.to_dict_lint()
-    result = data["summary"]["compliant"]
-    return result
-
-
 def run(args, project: Project, out=sys.stdout, formatter=format_plain):
     """List all non-compliant files."""
     report = ProjectReport.generate(
@@ -278,6 +263,6 @@ def run(args, project: Project, out=sys.stdout, formatter=format_plain):
         else:
             formatter = format_plain
 
-        result = lint(report, formatter=formatter, out=out)
+        out.write(formatter(report))
 
-    return 0 if result else 1
+    return 0 if report.is_compliant else 1

From 7423f622480def5693229ff4449af3f71f6848ed Mon Sep 17 00:00:00 2001
From: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
Date: Thu, 11 May 2023 11:57:54 +0200
Subject: [PATCH 35/60] Make tests run by importing an existing function

Signed-off-by: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
---
 tests/test_lint.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/tests/test_lint.py b/tests/test_lint.py
index 807ef000..0fb42713 100644
--- a/tests/test_lint.py
+++ b/tests/test_lint.py
@@ -10,7 +10,8 @@
 
 import pytest
 
-from reuse.lint import lint
+# FIXME: Verify whether format_plain is the thing being tested.
+from reuse.lint import format_plain
 from reuse.project import Project
 from reuse.report import ProjectReport
 
@@ -32,7 +33,7 @@ def test_lint_simple(fake_repository):
     """Extremely simple test for lint."""
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
-    result = lint(report)
+    result = format_plain(report)
     assert result
 
 
@@ -40,7 +41,7 @@ def test_lint_git(git_repository):
     """Extremely simple test for lint with a git repository."""
     project = Project(git_repository)
     report = ProjectReport.generate(project)
-    result = lint(report)
+    result = format_plain(report)
     assert result
 
 
@@ -49,7 +50,7 @@ def test_lint_submodule(submodule_repository):
     project = Project(submodule_repository)
     (submodule_repository / "submodule/foo.c").write_text("foo")
     report = ProjectReport.generate(project)
-    result = lint(report)
+    result = format_plain(report)
     assert result
 
 
@@ -58,7 +59,7 @@ def test_lint_submodule_included(submodule_repository):
     project = Project(submodule_repository, include_submodules=True)
     (submodule_repository / "submodule/foo.c").write_text("foo")
     report = ProjectReport.generate(project)
-    result = lint(report)
+    result = format_plain(report)
     assert not result
 
 
@@ -66,7 +67,7 @@ def test_lint_empty_directory(empty_directory):
     """An empty directory is compliant."""
     project = Project(empty_directory)
     report = ProjectReport.generate(project)
-    result = lint(report)
+    result = format_plain(report)
     assert result
 
 
@@ -82,7 +83,7 @@ def test_lint_deprecated(fake_repository, stringio):
 
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
-    result = lint(report, out=stringio)
+    result = format_plain(report, out=stringio)
 
     assert not result
     assert "GPL-3.0" in stringio.getvalue()
@@ -95,7 +96,7 @@ def test_lint_bad_license(fake_repository, stringio):
     )
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
-    result = lint(report, out=stringio)
+    result = format_plain(report, out=stringio)
 
     assert not result
     assert "foo.py" in stringio.getvalue()
@@ -107,7 +108,7 @@ def test_lint_missing_licenses(fake_repository, stringio):
     (fake_repository / "foo.py").write_text("SPDX-License-Identifier: MIT")
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
-    result = lint(report, out=stringio)
+    result = format_plain(report, out=stringio)
 
     assert not result
     assert "foo.py" in stringio.getvalue()
@@ -119,7 +120,7 @@ def test_lint_unused_licenses(fake_repository, stringio):
     (fake_repository / "LICENSES/MIT.txt").write_text("foo")
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
-    result = lint(report, out=stringio)
+    result = format_plain(report, out=stringio)
 
     assert not result
     assert "Unused licenses: MIT" in stringio.getvalue()
@@ -133,7 +134,7 @@ def test_lint_read_errors(fake_repository, stringio):
     (fake_repository / "foo.py").chmod(0o000)
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
-    result = lint(report, out=stringio)
+    result = format_plain(report, out=stringio)
 
     assert not result
     assert "Could not read:" in stringio.getvalue()
@@ -145,7 +146,7 @@ def test_lint_files_without_copyright_and_licensing(fake_repository, stringio):
     (fake_repository / "foo.py").write_text("foo")
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
-    result = lint(report, out=stringio)
+    result = format_plain(report, out=stringio)
 
     assert not result
     assert (

From f64835684ead47cee7314e97e8398ef80388341d Mon Sep 17 00:00:00 2001
From: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
Date: Sun, 21 May 2023 15:29:14 +0200
Subject: [PATCH 36/60] Repair boolean check of SpdxInfo; object not iterable

Signed-off-by: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
---
 src/reuse/project.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/reuse/project.py b/src/reuse/project.py
index 2f93556e..abbbcadf 100644
--- a/src/reuse/project.py
+++ b/src/reuse/project.py
@@ -185,7 +185,7 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo:
                 file_result = extract_spdx_info(
                     decoded_text_from_binary(fp, size=read_limit)
                 )
-                if any(file_result):
+                if file_result:
                     license_path = str(path)
             except (ExpressionError, ParseError):
                 _LOGGER.error(

From c75a3e36d4b51722f8afd71c82dc69d26eddcdea Mon Sep 17 00:00:00 2001
From: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
Date: Sun, 21 May 2023 15:36:15 +0200
Subject: [PATCH 37/60] Correctly use format_plain

Signed-off-by: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
---
 tests/test_lint.py | 59 +++++++++++++++++++++++-----------------------
 1 file changed, 29 insertions(+), 30 deletions(-)

diff --git a/tests/test_lint.py b/tests/test_lint.py
index 0fb42713..050dbf5d 100644
--- a/tests/test_lint.py
+++ b/tests/test_lint.py
@@ -10,7 +10,6 @@
 
 import pytest
 
-# FIXME: Verify whether format_plain is the thing being tested.
 from reuse.lint import format_plain
 from reuse.project import Project
 from reuse.report import ProjectReport
@@ -60,7 +59,7 @@ def test_lint_submodule_included(submodule_repository):
     (submodule_repository / "submodule/foo.c").write_text("foo")
     report = ProjectReport.generate(project)
     result = format_plain(report)
-    assert not result
+    assert ":-(" in result
 
 
 def test_lint_empty_directory(empty_directory):
@@ -71,7 +70,7 @@ def test_lint_empty_directory(empty_directory):
     assert result
 
 
-def test_lint_deprecated(fake_repository, stringio):
+def test_lint_deprecated(fake_repository):
     """If a repo has a deprecated license, detect it."""
     shutil.copy(
         fake_repository / "LICENSES/GPL-3.0-or-later.txt",
@@ -83,77 +82,77 @@ def test_lint_deprecated(fake_repository, stringio):
 
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
-    result = format_plain(report, out=stringio)
+    result = format_plain(report)
 
-    assert not result
-    assert "GPL-3.0" in stringio.getvalue()
+    assert ":-(" in result
+    assert "GPL-3.0" in result
 
 
-def test_lint_bad_license(fake_repository, stringio):
+def test_lint_bad_license(fake_repository):
     """A bad license is detected."""
     (fake_repository / "foo.py").write_text(
         "SPDX-License-Identifier: bad-license"
     )
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
-    result = format_plain(report, out=stringio)
+    result = format_plain(report)
 
-    assert not result
-    assert "foo.py" in stringio.getvalue()
-    assert "bad-license" in stringio.getvalue()
+    assert ":-(" in result
+    assert "foo.py" in result
+    assert "bad-license" in result
 
 
-def test_lint_missing_licenses(fake_repository, stringio):
+def test_lint_missing_licenses(fake_repository):
     """A missing license is detected."""
     (fake_repository / "foo.py").write_text("SPDX-License-Identifier: MIT")
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
-    result = format_plain(report, out=stringio)
+    result = format_plain(report)
 
-    assert not result
-    assert "foo.py" in stringio.getvalue()
-    assert "MIT" in stringio.getvalue()
+    assert ":-(" in result
+    assert "foo.py" in result
+    assert "MIT" in result
 
 
-def test_lint_unused_licenses(fake_repository, stringio):
+def test_lint_unused_licenses(fake_repository):
     """An unused license is detected."""
     (fake_repository / "LICENSES/MIT.txt").write_text("foo")
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
-    result = format_plain(report, out=stringio)
+    result = format_plain(report)
 
-    assert not result
-    assert "Unused licenses: MIT" in stringio.getvalue()
+    assert ":-(" in result
+    assert "Unused licenses: MIT" in result
 
 
 @cpython
 @posix
-def test_lint_read_errors(fake_repository, stringio):
+def test_lint_read_errors(fake_repository):
     """A read error is detected."""
     (fake_repository / "foo.py").write_text("foo")
     (fake_repository / "foo.py").chmod(0o000)
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
-    result = format_plain(report, out=stringio)
+    result = format_plain(report)
 
-    assert not result
-    assert "Could not read:" in stringio.getvalue()
-    assert "foo.py" in stringio.getvalue()
+    assert ":-(" in result
+    assert "Could not read:" in result
+    assert "foo.py" in result
 
 
-def test_lint_files_without_copyright_and_licensing(fake_repository, stringio):
+def test_lint_files_without_copyright_and_licensing(fake_repository):
     """A file without copyright and licensing is detected."""
     (fake_repository / "foo.py").write_text("foo")
     project = Project(fake_repository)
     report = ProjectReport.generate(project)
-    result = format_plain(report, out=stringio)
+    result = format_plain(report)
 
-    assert not result
+    assert ":-(" in result
     assert (
         "The following files have no copyright and licensing information:"
-        in stringio.getvalue()
+        in result
     )
-    assert "foo.py" in stringio.getvalue()
+    assert "foo.py" in result
 
 
 # REUSE-IgnoreEnd

From 5bc2c58bc426efb363ba8fe4280fc75efaa7e009 Mon Sep 17 00:00:00 2001
From: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
Date: Sun, 21 May 2023 15:41:28 +0200
Subject: [PATCH 38/60] fixup! Rename json_version to lint_version

---
 tests/test_main.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_main.py b/tests/test_main.py
index c2c6d148..a0c081a5 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -222,7 +222,7 @@ def test_lint_json(fake_repository, stringio):
     output = json.loads(stringio.getvalue())
 
     assert result == 0
-    assert output["json_version"] == "1.0"
+    assert output["lint_version"] == "1.0"
     assert len(output["files"]) == 7
 
 
@@ -233,7 +233,7 @@ def test_lint_json_fail(fake_repository, stringio):
     output = json.loads(stringio.getvalue())
 
     assert result > 0
-    assert output["json_version"] == "1.0"
+    assert output["lint_version"] == "1.0"
     assert len(output["non_compliant"]["missing_licensing_info"]) == 1
     assert len(output["non_compliant"]["missing_copyright_info"]) == 1
     assert len(output["files"]) == 8

From 54a8a66bad4063464909617982d1052866921479 Mon Sep 17 00:00:00 2001
From: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
Date: Sun, 21 May 2023 15:44:30 +0200
Subject: [PATCH 39/60] fixup! fixup! Rename json_version to lint_version

---
 tests/test_main.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_main.py b/tests/test_main.py
index a0c081a5..c4bf45d7 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -223,7 +223,7 @@ def test_lint_json(fake_repository, stringio):
 
     assert result == 0
     assert output["lint_version"] == "1.0"
-    assert len(output["files"]) == 7
+    assert len(output["files"]) == 8
 
 
 def test_lint_json_fail(fake_repository, stringio):
@@ -236,7 +236,7 @@ def test_lint_json_fail(fake_repository, stringio):
     assert output["lint_version"] == "1.0"
     assert len(output["non_compliant"]["missing_licensing_info"]) == 1
     assert len(output["non_compliant"]["missing_copyright_info"]) == 1
-    assert len(output["files"]) == 8
+    assert len(output["files"]) == 9
 
 
 def test_lint_no_file_extension(fake_repository, stringio):

From d3208db186469dc7090681e6f4a061aaccd0f686 Mon Sep 17 00:00:00 2001
From: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
Date: Sun, 21 May 2023 15:53:22 +0200
Subject: [PATCH 40/60] Don't pass string where (optional) set is expected

Signed-off-by: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
---
 tests/test_header.py | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/tests/test_header.py b/tests/test_header.py
index 5eb3e80b..c97d4144 100644
--- a/tests/test_header.py
+++ b/tests/test_header.py
@@ -24,7 +24,7 @@
 def test_create_header_simple():
     """Create a super simple header."""
     spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, ""
+        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}
     )
     expected = cleandoc(
         """
@@ -57,7 +57,7 @@ def test_create_header_simple_with_contributor():
 def test_create_header_template_simple(template_simple):
     """Create a header with a simple template."""
     spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, ""
+        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}
     )
     expected = cleandoc(
         """
@@ -77,7 +77,7 @@ def test_create_header_template_simple(template_simple):
 def test_create_header_template_no_spdx(template_no_spdx):
     """Create a header with a template that does not have all SPDX info."""
     spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, ""
+        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}
     )
 
     with pytest.raises(MissingSpdxInfo):
@@ -87,7 +87,7 @@ def test_create_header_template_no_spdx(template_no_spdx):
 def test_create_header_template_commented(template_commented):
     """Create a header with an already-commented template."""
     spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, ""
+        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}
     )
     expected = cleandoc(
         """
@@ -113,7 +113,7 @@ def test_create_header_template_commented(template_commented):
 def test_create_header_already_contains_spdx():
     """Create a new header from a header that already contains SPDX info."""
     spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, ""
+        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}
     )
     existing = cleandoc(
         """
@@ -138,7 +138,7 @@ def test_create_header_already_contains_spdx():
 def test_create_header_existing_is_wrong():
     """If the existing header contains errors, raise a CommentCreateError."""
     spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, ""
+        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}
     )
     existing = cleandoc(
         """
@@ -154,7 +154,7 @@ def test_create_header_existing_is_wrong():
 
 def test_create_header_old_syntax():
     """Old copyright syntax is preserved when creating a new header."""
-    spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set(), "")
+    spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set())
     existing = cleandoc(
         """
         # Copyright John Doe
@@ -173,7 +173,7 @@ def test_create_header_old_syntax():
 
 def test_create_header_remove_fluff():
     """Any stuff that isn't SPDX info is removed when using create_header."""
-    spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set(), "")
+    spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set())
     existing = cleandoc(
         """
         # SPDX-FileCopyrightText: John Doe
@@ -199,7 +199,7 @@ def test_add_new_header_simple():
     the old one.
     """
     spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, ""
+        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}
     )
     text = cleandoc(
         """
@@ -229,7 +229,7 @@ def test_add_new_header_simple():
 def test_find_and_replace_no_header():
     """Given text without header, add a header."""
     spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, ""
+        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}
     )
     text = "pass"
     expected = cleandoc(
@@ -251,7 +251,7 @@ def test_find_and_replace_no_header():
 
 def test_find_and_replace_verbatim():
     """Replace a header with itself."""
-    spdx_info = SpdxInfo(set(), set(), "")
+    spdx_info = SpdxInfo(set(), set())
     text = cleandoc(
         """
         # SPDX-FileCopyrightText: Jane Doe
@@ -270,7 +270,7 @@ def test_find_and_replace_newline_before_header():
     preceding whitespace.
     """
     spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"}, ""
+        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"}
     )
     text = cleandoc(
         """
@@ -297,7 +297,7 @@ def test_find_and_replace_newline_before_header():
 def test_find_and_replace_preserve_preceding():
     """When the SPDX header is in the middle of the file, keep it there."""
     spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"}, ""
+        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"}
     )
     text = cleandoc(
         """
@@ -335,7 +335,7 @@ def test_find_and_replace_keep_shebang():
     it.
     """
     spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"}, ""
+        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"}
     )
     text = cleandoc(
         """
@@ -366,7 +366,7 @@ def test_find_and_replace_separate_shebang():
     """When the shebang is part of the same comment as the SPDX comment,
     separate the two.
     """
-    spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set(), "")
+    spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set())
     text = cleandoc(
         """
         #!/usr/bin/env python3
@@ -394,7 +394,7 @@ def test_find_and_replace_separate_shebang():
 
 def test_find_and_replace_only_shebang():
     """When the file only contains a shebang, keep it at the top of the file."""
-    spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set(), "")
+    spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set())
     text = cleandoc(
         """
         #!/usr/bin/env python3
@@ -424,7 +424,7 @@ def test_find_and_replace_keep_old_comment():
     licensing information, preserve it below the REUSE header.
     """
     spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, ""
+        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}
     )
     text = cleandoc(
         """
@@ -451,7 +451,7 @@ def test_find_and_replace_keep_old_comment():
 def test_find_and_replace_preserve_newline():
     """If the file content ends with a newline, don't remove it."""
 
-    spdx_info = SpdxInfo(set(), set(), "")
+    spdx_info = SpdxInfo(set(), set())
     text = (
         cleandoc(
             """

From 8a40fb681091221fa43f969da96bf67129dec85e Mon Sep 17 00:00:00 2001
From: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
Date: Sun, 21 May 2023 15:55:29 +0200
Subject: [PATCH 41/60] fixup! Don't pass string where (optional) set is
 expected

---
 src/reuse/project.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/reuse/project.py b/src/reuse/project.py
index abbbcadf..f73d52f3 100644
--- a/src/reuse/project.py
+++ b/src/reuse/project.py
@@ -156,8 +156,8 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo:
 
         # This means that only one 'source' of licensing/copyright information
         # is captured in SpdxInfo
-        dep5_result = SpdxInfo(set(), set(), "")
-        file_result = SpdxInfo(set(), set(), "")
+        dep5_result = SpdxInfo(set(), set())
+        file_result = SpdxInfo(set(), set())
 
         # Search the .reuse/dep5 file for SPDX information.
         if self._copyright:

From a769306ba3598583984b8beafe2331856f63c62b Mon Sep 17 00:00:00 2001
From: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
Date: Sun, 21 May 2023 16:03:59 +0200
Subject: [PATCH 42/60] fixup! fixup! Don't pass string where (optional) set is
 expected

---
 src/reuse/_util.py   | 4 ++--
 src/reuse/project.py | 4 +++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/reuse/_util.py b/src/reuse/_util.py
index bb60a924..693d6615 100644
--- a/src/reuse/_util.py
+++ b/src/reuse/_util.py
@@ -208,12 +208,12 @@ def _copyright_from_dep5(path: PathLike, dep5_copyright: Copyright) -> SpdxInfo:
     result = dep5_copyright.find_files_paragraph(Path(path).as_posix())
 
     if result is None:
-        return SpdxInfo(set(), set(), str(path))
+        return SpdxInfo(set(), set(), license_path=str(path))
 
     return SpdxInfo(
         set(map(_LICENSING.parse, [result.license.synopsis])),
         set(map(str.strip, result.copyright.splitlines())),
-        str(path),
+        license_path=str(path),
     )
 
 
diff --git a/src/reuse/project.py b/src/reuse/project.py
index f73d52f3..08b79e64 100644
--- a/src/reuse/project.py
+++ b/src/reuse/project.py
@@ -201,7 +201,9 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo:
         copyright_lines = dep5_result.copyright_lines.union(
             file_result.copyright_lines
         )
-        return SpdxInfo(spdx_expressions, copyright_lines, license_path)
+        return SpdxInfo(
+            spdx_expressions, copyright_lines, license_path=license_path
+        )
 
     def relative_from_root(self, path: Path) -> Path:
         """If the project root is /tmp/project, and *path* is

From cc5a383b9ec77219867fb3b471ece0033998a9fa Mon Sep 17 00:00:00 2001
From: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
Date: Sun, 21 May 2023 16:13:37 +0200
Subject: [PATCH 43/60] Create SpdxInfo.contains_copyright_or_licensing

Signed-off-by: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
---
 src/reuse/__init__.py |  4 ++++
 src/reuse/project.py  |  4 ++--
 tests/test_core.py    | 36 ++++++++++++++++++++++++++++++++++++
 3 files changed, 42 insertions(+), 2 deletions(-)
 create mode 100644 tests/test_core.py

diff --git a/src/reuse/__init__.py b/src/reuse/__init__.py
index dd770bae..46cd1fd2 100644
--- a/src/reuse/__init__.py
+++ b/src/reuse/__init__.py
@@ -97,6 +97,10 @@ class SpdxInfo:
     contributor_lines: Set[str] = field(default_factory=set)
     license_path: Optional[str] = None
 
+    def contains_copyright_or_licensing(self) -> bool:
+        """Either *spdx_expressions* or *copyright_lines* is non-empty."""
+        return bool(self.spdx_expressions or self.copyright_lines)
+
     def __bool__(self):
         return any(self.__dict__.values())
 
diff --git a/src/reuse/project.py b/src/reuse/project.py
index 08b79e64..6bf6cae1 100644
--- a/src/reuse/project.py
+++ b/src/reuse/project.py
@@ -164,11 +164,11 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo:
             dep5_result = _copyright_from_dep5(
                 self.relative_from_root(path), self._copyright
             )
-            if bool(dep5_result):
+            if dep5_result.contains_copyright_or_licensing():
                 _LOGGER.info(
                     _("'{path}' covered by .reuse/dep5").format(path=path)
                 )
-                license_path = ".reuse/dep5"
+                license_path = dep5_result.license_path
 
         # Search the file for SPDX information.
         with path.open("rb") as fp:
diff --git a/tests/test_core.py b/tests/test_core.py
new file mode 100644
index 00000000..2b926eb4
--- /dev/null
+++ b/tests/test_core.py
@@ -0,0 +1,36 @@
+# SPDX-FileCopyrightText: 2023 Carmen Bianca BAKKER <carmenbianca@fsfe.org>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Tests for some core components."""
+
+from reuse import SpdxInfo
+
+# REUSE-IgnoreStart
+
+
+def test_spdx_info_contains_copyright_or_licensing():
+    """If either spdx_expressions or copyright_lines is truthy, expect True."""
+    arguments = [
+        ({"GPL-3.0-or-later"}, set()),
+        (set(), "SPDX-FileCopyrightText: 2017 Jane Doe"),
+        ({"GPL-3.0-or-later"}, "SPDX-FileCopyrightText: 2017 Jane Doe"),
+    ]
+    for args in arguments:
+        info = SpdxInfo(*args)
+        assert info.contains_copyright_or_licensing()
+
+
+def test_spdx_info_contains_copyright_or_licensing_empty():
+    """If the SpdxInfo object is completely empty, expect False."""
+    info = SpdxInfo()
+    assert not info.contains_copyright_or_licensing()
+
+
+def test_spdx_info_contains_copyright_or_licensing_other_truthy():
+    """If another attribute is truthy, still expect False."""
+    info = SpdxInfo(contributor_lines={"SPDX-FileContributor: 2017 Jane Doe"})
+    assert not info.contains_copyright_or_licensing()
+
+
+# REUSE-IgnoreEnd

From 23fa4ccad6e38a42a08e90ac7bd909544ff58222 Mon Sep 17 00:00:00 2001
From: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
Date: Sun, 21 May 2023 16:24:01 +0200
Subject: [PATCH 44/60] Make lint version a global constant

Signed-off-by: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
---
 src/reuse/report.py | 4 +++-
 tests/test_main.py  | 5 +++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/reuse/report.py b/src/reuse/report.py
index 9704c2a3..357c0bc3 100644
--- a/src/reuse/report.py
+++ b/src/reuse/report.py
@@ -24,6 +24,8 @@
 
 _LOGGER = logging.getLogger(__name__)
 
+LINT_VERSION = "1.0"
+
 
 class _MultiprocessingContainer:
     """Container that remembers some data in order to generate a FileReport."""
@@ -87,7 +89,7 @@ def to_dict_lint(self):
         """
         # Setup report data container
         data = {
-            "lint_version": "1.0",
+            "lint_version": LINT_VERSION,
             "reuse_version": __REUSE_version__,
             "non_compliant": {
                 "missing_licenses": self.missing_licenses,
diff --git a/tests/test_main.py b/tests/test_main.py
index c4bf45d7..0faed6df 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -25,6 +25,7 @@
 from reuse import download
 from reuse._main import main
 from reuse._util import GIT_EXE, HG_EXE
+from reuse.report import LINT_VERSION
 
 # REUSE-IgnoreStart
 
@@ -222,7 +223,7 @@ def test_lint_json(fake_repository, stringio):
     output = json.loads(stringio.getvalue())
 
     assert result == 0
-    assert output["lint_version"] == "1.0"
+    assert output["lint_version"] == LINT_VERSION
     assert len(output["files"]) == 8
 
 
@@ -233,7 +234,7 @@ def test_lint_json_fail(fake_repository, stringio):
     output = json.loads(stringio.getvalue())
 
     assert result > 0
-    assert output["lint_version"] == "1.0"
+    assert output["lint_version"] == LINT_VERSION
     assert len(output["non_compliant"]["missing_licensing_info"]) == 1
     assert len(output["non_compliant"]["missing_copyright_info"]) == 1
     assert len(output["files"]) == 9

From 8fa808a401d711adb273207fb800f16ff4df4f3f Mon Sep 17 00:00:00 2001
From: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
Date: Sun, 21 May 2023 16:27:16 +0200
Subject: [PATCH 45/60] Add tool version to json output

Signed-off-by: Carmen Bianca BAKKER <carmenbianca@fsfe.org>
---
 src/reuse/lint.py   | 4 ++--
 src/reuse/report.py | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index 9a5751d0..d1a0a665 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -206,12 +206,12 @@ def format_plain(report: ProjectReport) -> str:
         output += _(
             "Congratulations! Your project is compliant with version"
             " {} of the REUSE Specification :-)"
-        ).format(data["reuse_version"])
+        ).format(data["reuse_spec_version"])
     else:
         output += _(
             "Unfortunately, your project is not compliant with version "
             "{} of the REUSE Specification :-("
-        ).format(data["reuse_version"])
+        ).format(data["reuse_spec_version"])
     output += "\n"
 
     return output
diff --git a/src/reuse/report.py b/src/reuse/report.py
index 357c0bc3..6ef8a683 100644
--- a/src/reuse/report.py
+++ b/src/reuse/report.py
@@ -90,7 +90,8 @@ def to_dict_lint(self):
         # Setup report data container
         data = {
             "lint_version": LINT_VERSION,
-            "reuse_version": __REUSE_version__,
+            "reuse_spec_version": __REUSE_version__,
+            "reuse_tool_version": __version__,
             "non_compliant": {
                 "missing_licenses": self.missing_licenses,
                 "unused_licenses": [str(file) for file in self.unused_licenses],

From c94b5a0eb0adb94f71343cf37f7e8736cc3b791d Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Thu, 25 May 2023 14:44:04 +0200
Subject: [PATCH 46/60] Refactoring precedence for license path

---
 src/reuse/project.py | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/src/reuse/project.py b/src/reuse/project.py
index 6bf6cae1..c1a86eef 100644
--- a/src/reuse/project.py
+++ b/src/reuse/project.py
@@ -168,7 +168,7 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo:
                 _LOGGER.info(
                     _("'{path}' covered by .reuse/dep5").format(path=path)
                 )
-                license_path = dep5_result.license_path
+                license_path = str(self.root / ".reuse/dep5")
 
         # Search the file for SPDX information.
         with path.open("rb") as fp:
@@ -195,14 +195,25 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo:
                     ).format(path=path)
                 )
 
-        spdx_expressions = dep5_result.spdx_expressions.union(
-            file_result.spdx_expressions
-        )
-        copyright_lines = dep5_result.copyright_lines.union(
-            file_result.copyright_lines
-        )
+        # There is only a .dep5 file
+        if (
+            dep5_result.contains_copyright_or_licensing()
+            and not file_result.contains_copyright_or_licensing()
+        ):
+            # Information in the file header takes precendence over .dep5 file
+            return SpdxInfo(
+                spdx_expressions=dep5_result.spdx_expressions,
+                copyright_lines=dep5_result.copyright_lines,
+                license_path=license_path,
+            )
+            # TODO Emit warning that information in .dep5 file was ommitted
+
+        # There is both information in a .dep5 file and in the file header
+        # or there is only a file header
         return SpdxInfo(
-            spdx_expressions, copyright_lines, license_path=license_path
+            spdx_expressions=file_result.spdx_expressions,
+            copyright_lines=file_result.copyright_lines,
+            license_path=license_path,
         )
 
     def relative_from_root(self, path: Path) -> Path:

From 7493ed1bb1edd7e52608cfd72b92ae36f7c686f9 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Thu, 25 May 2023 14:48:00 +0200
Subject: [PATCH 47/60] Add two `--json` specific test to `tests/test_lint.py`

---
 tests/test_lint.py | 64 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/tests/test_lint.py b/tests/test_lint.py
index 050dbf5d..6439006b 100644
--- a/tests/test_lint.py
+++ b/tests/test_lint.py
@@ -155,4 +155,68 @@ def test_lint_files_without_copyright_and_licensing(fake_repository):
     assert "foo.py" in result
 
 
+def test_lint_json_output(fake_repository):
+    """Test for lint with JSON output."""
+    (fake_repository / "foo.py").write_text("SPDX-License-Identifier: MIT")
+    project = Project(fake_repository)
+    report = ProjectReport.generate(project)
+
+    json_result = report.to_dict_lint()
+
+    assert json_result
+    # Test if all the keys are present
+    assert "lint_version" in json_result
+    assert "reuse_spec_version" in json_result
+    assert "reuse_tool_version" in json_result
+    assert "non_compliant" in json_result
+    assert "files" in json_result
+    assert "summary" in json_result
+    # Test length of resulting list values
+    assert len(json_result["files"]) == 9
+    assert len(json_result["summary"]) == 5
+    # Test result
+    assert json_result["summary"]["compliant"] is False
+    # Test license path
+    for test_file in json_result["files"]:
+        if test_file["path"] == str(fake_repository / "foo.py"):
+            assert test_file["licenses"][0]["value"] == "MIT"
+            assert test_file["licenses"][0]["source"] == str(
+                fake_repository / "foo.py"
+            )
+        if test_file["path"].startswith(str(fake_repository / "doc")):
+            assert test_file["licenses"][0]["value"] == "CC0-1.0"
+            assert test_file["licenses"][0]["source"] == str(
+                fake_repository / ".reuse/dep5"
+            )
+
+
+def test_lint_json_output_precedence(fake_repository):
+    """Test for lint with JSON output with focus on precedence."""
+    (fake_repository / "doc/differently_licensed_docs.rst").write_text(
+        "SPDX-License-Identifier: MIT"
+    )
+    project = Project(fake_repository)
+    report = ProjectReport.generate(project)
+
+    json_result = report.to_dict_lint()
+
+    assert json_result
+    # Test result
+    assert json_result["summary"]["compliant"] is False
+    # Test license path precedence
+    for test_file in json_result["files"]:
+        if test_file["path"].startswith(
+            str(fake_repository / "doc/differently_licensed_docs.rst")
+        ):
+            assert test_file["licenses"][0]["value"] == "MIT"
+            assert test_file["licenses"][0]["source"] == str(
+                fake_repository / "doc/differently_licensed_docs.rst"
+            )
+        if test_file["path"].startswith(str(fake_repository / "doc/index.rst")):
+            assert test_file["licenses"][0]["value"] == "CC0-1.0"
+            assert test_file["licenses"][0]["source"] == str(
+                fake_repository / ".reuse/dep5"
+            )
+
+
 # REUSE-IgnoreEnd

From bb3217a68cd15aeb09c1c17564a14e0adc2ffac9 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Thu, 25 May 2023 15:14:56 +0200
Subject: [PATCH 48/60] Adapting two tests to new precedence behaviour

---
 tests/test_project.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/test_project.py b/tests/test_project.py
index 487c8a03..15cd65ee 100644
--- a/tests/test_project.py
+++ b/tests/test_project.py
@@ -247,22 +247,22 @@ def test_spdx_info_of_only_copyright(fake_repository):
 
 def test_spdx_info_of_only_copyright_also_covered_by_debian(fake_repository):
     """A file contains only a copyright line, but debian/copyright also has
-    information on this file. Use both.
+    information on this file. Use only the information from file header.
     """
     (fake_repository / "doc/foo.py").write_text(
         "SPDX-FileCopyrightText: in file"
     )
     project = Project(fake_repository)
     spdx_info = project.spdx_info_of("doc/foo.py")
-    assert any(spdx_info.spdx_expressions)
-    assert len(spdx_info.copyright_lines) == 2
+
+    assert len(spdx_info.copyright_lines) == 1
     assert "SPDX-FileCopyrightText: in file" in spdx_info.copyright_lines
-    assert "2017 Jane Doe" in spdx_info.copyright_lines
 
 
 def test_spdx_info_of_also_covered_by_dep5(fake_repository):
     """A file contains all SPDX information, but .reuse/dep5 also
-    provides information on this file. Use both.
+    provides information on this file. Use only the information
+    from the file header.
     """
     (fake_repository / "doc/foo.py").write_text(
         dedent(
@@ -274,9 +274,9 @@ def test_spdx_info_of_also_covered_by_dep5(fake_repository):
     project = Project(fake_repository)
     spdx_info = project.spdx_info_of("doc/foo.py")
     assert LicenseSymbol("MIT") in spdx_info.spdx_expressions
-    assert LicenseSymbol("CC0-1.0") in spdx_info.spdx_expressions
+    assert LicenseSymbol("CC0-1.0") not in spdx_info.spdx_expressions
     assert "SPDX-FileCopyrightText: in file" in spdx_info.copyright_lines
-    assert "2017 Jane Doe" in spdx_info.copyright_lines
+    assert "2017 Jane Doe" not in spdx_info.copyright_lines
 
 
 def test_spdx_info_of_no_duplicates(empty_directory):

From f2f1ef067a9fc7144eeaccaa6abd31adaff2f90e Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Thu, 25 May 2023 15:28:15 +0200
Subject: [PATCH 49/60] Adding a warning when information is overriden

---
 src/reuse/project.py | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/reuse/project.py b/src/reuse/project.py
index c1a86eef..37eee586 100644
--- a/src/reuse/project.py
+++ b/src/reuse/project.py
@@ -195,21 +195,31 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo:
                     ).format(path=path)
                 )
 
-        # There is only a .dep5 file
+        # There is both information in a .dep5 file and in the file header
         if (
+            dep5_result.contains_copyright_or_licensing()
+            and file_result.contains_copyright_or_licensing()
+        ):
+            _LOGGER.warning(
+                _(
+                    "Copyright and licensing information for '{path}' have been"
+                    " found in both the file header or .license file and the"
+                    " DEP5 file located at '{dep5_path}'. The information in"
+                    " the DEP5 file has been overriden. Please ensure that this"
+                    " is correct."
+                ).format(path=path, dep5_path=".reuse/dep5")
+            )
+        # There is only a .dep5 file
+        elif (
             dep5_result.contains_copyright_or_licensing()
             and not file_result.contains_copyright_or_licensing()
         ):
-            # Information in the file header takes precendence over .dep5 file
             return SpdxInfo(
                 spdx_expressions=dep5_result.spdx_expressions,
                 copyright_lines=dep5_result.copyright_lines,
                 license_path=license_path,
             )
-            # TODO Emit warning that information in .dep5 file was ommitted
-
-        # There is both information in a .dep5 file and in the file header
-        # or there is only a file header
+        # There is only a file header
         return SpdxInfo(
             spdx_expressions=file_result.spdx_expressions,
             copyright_lines=file_result.copyright_lines,

From cd820e16848646070e542e67a006f47cc294c68f Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Thu, 25 May 2023 15:55:41 +0200
Subject: [PATCH 50/60] Import `Path` instead of `PosixPath`

---
 src/reuse/lint.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index d1a0a665..9af42938 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -12,7 +12,7 @@
 import os
 import sys
 from gettext import gettext as _
-from pathlib import PosixPath
+from pathlib import Path
 
 from .project import Project
 from .report import ProjectReport
@@ -229,7 +229,7 @@ def custom_serializer(obj):
 
         :param obj: Object to be serialized
         """
-        if isinstance(obj, PosixPath):
+        if isinstance(obj, Path):
             return str(obj)
         if isinstance(obj, set):
             return list(obj)

From 6c5154515ea2a414606a92d7c1adddcc7ee383b2 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Thu, 25 May 2023 16:29:38 +0200
Subject: [PATCH 51/60] Use `StringIO` object instead of normal string object

---
 src/reuse/lint.py | 125 ++++++++++++++++++++++++++--------------------
 1 file changed, 70 insertions(+), 55 deletions(-)

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index 9af42938..f9681d64 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -12,6 +12,7 @@
 import os
 import sys
 from gettext import gettext as _
+from io import StringIO
 from pathlib import Path
 
 from .project import Project
@@ -48,7 +49,7 @@ def format_plain(report: ProjectReport) -> str:
     :param report: ProjectReport data
     :return: String (in plaintext) that can be output to sys.stdout
     """
-    output = ""
+    output = StringIO()
     data = report.to_dict_lint()
 
     # If the project is not compliant:
@@ -56,59 +57,63 @@ def format_plain(report: ProjectReport) -> str:
         # Bad licenses
         bad_licenses = data["non_compliant"]["bad_licenses"]
         if bad_licenses:
-            output += "# " + _("BAD LICENSES") + "\n\n"
+            output.write("# " + _("BAD LICENSES") + "\n\n")
             for lic in sorted(bad_licenses.keys()):
-                output += _("'{}' found in:").format(lic) + "\n"
-                output += f"* {list(bad_licenses[lic])[0]}" + "\n\n"
-            output += "\n"
+                output.write(_("'{}' found in:").format(lic) + "\n")
+                output.write(f"* {list(bad_licenses[lic])[0]}" + "\n\n")
+            output.write("\n")
 
         # Deprecated licenses
         deprecated_licenses = data["non_compliant"]["deprecated_licenses"]
         if deprecated_licenses:
-            output += "# " + _("DEPRECATED LICENSES") + "\n\n"
-            output += _("The following licenses are deprecated by SPDX:") + "\n"
+            output.write("# " + _("DEPRECATED LICENSES") + "\n\n")
+            output.write(
+                _("The following licenses are deprecated by SPDX:") + "\n"
+            )
             for lic in sorted(deprecated_licenses):
-                output += f"* {lic}\n"
-            output += "\n\n"
+                output.write(f"* {lic}\n")
+            output.write("\n\n")
 
         # Licenses without extension
         licenses_without_extension = data["non_compliant"][
             "licenses_without_extension"
         ]
         if licenses_without_extension:
-            output += "# " + _("LICENSES WITHOUT FILE EXTENSION") + "\n\n"
-            output += _("The following licenses have no file extension:") + "\n"
+            output.write("# " + _("LICENSES WITHOUT FILE EXTENSION") + "\n\n")
+            output.write(
+                _("The following licenses have no file extension:") + "\n"
+            )
             for lic in sorted(licenses_without_extension):
-                output += f"* {str(licenses_without_extension[lic])}" + "\n"
-            output += "\n\n"
+                output.write(f"* {str(licenses_without_extension[lic])}" + "\n")
+            output.write("\n\n")
 
         # Missing licenses
         missing_licenses = data["non_compliant"]["missing_licenses"]
         if missing_licenses:
-            output += "# " + _("MISSING LICENSES") + "\n\n"
+            output.write("# " + _("MISSING LICENSES") + "\n\n")
             for lic in sorted(missing_licenses.keys()):
-                output += _("'{}' found in:").format(lic) + "\n"
+                output.write(_("'{}' found in:").format(lic) + "\n")
                 for file in sorted(missing_licenses[lic]):
-                    output += f"* {file}\n"
-            output += "\n\n"
+                    output.write(f"* {file}\n")
+            output.write("\n\n")
 
         # Unused licenses
         unused_licenses = data["non_compliant"]["unused_licenses"]
         if unused_licenses:
-            output += "# " + _("UNUSED LICENSES") + "\n\n"
-            output += _("The following licenses are not used:") + "\n"
+            output.write("# " + _("UNUSED LICENSES") + "\n\n")
+            output.write(_("The following licenses are not used:") + "\n")
             for lic in sorted(unused_licenses):
-                output += f"* {lic}\n"
-            output += "\n\n"
+                output.write(f"* {lic}\n")
+            output.write("\n\n")
 
         # Read errors
         read_errors = data["non_compliant"]["read_errors"]
         if read_errors:
-            output += "# " + _("READ ERRORS") + "\n\n"
-            output += _("Could not read:") + "\n"
+            output.write("# " + _("READ ERRORS") + "\n\n")
+            output.write(_("Could not read:") + "\n")
             for path in sorted(read_errors):
-                output += f"* {str(path)}" + "\n"
-            output += "\n\n"
+                output.write(f"* {str(path)}" + "\n")
+            output.write("\n\n")
 
         # Missing copyright and licensing information
         files_without_copyright = set(
@@ -124,34 +129,40 @@ def format_plain(report: ProjectReport) -> str:
         header = (
             "# " + _("MISSING COPYRIGHT AND LICENSING INFORMATION") + "\n\n"
         )
-        output += header
+        output.write(header)
         if files_without_both:
-            output += _(
-                "The following files have no copyright and licensing "
-                "information:"
+            output.write(
+                _(
+                    "The following files have no copyright and licensing "
+                    "information:"
+                )
             )
-            output += "\n"
+            output.write("\n")
             for file in sorted(files_without_both):
-                output += f"* {file}\n"
-            output += "\n"
+                output.write(f"* {file}\n")
+            output.write("\n")
 
         if files_without_copyright - files_without_both:
-            output += _("The following files have no copyright information:")
-            output += "\n"
+            output.write(
+                _("The following files have no copyright information:")
+            )
+            output.write("\n")
             for file in sorted(files_without_copyright - files_without_both):
-                output += f"* {file}\n"
-            output += "\n"
+                output.write(f"* {file}\n")
+            output.write("\n")
 
         if files_without_license - files_without_both:
-            output += _("The following files have no licensing information:")
-            output += "\n"
+            output.write(
+                _("The following files have no licensing information:")
+            )
+            output.write("\n")
             for file in sorted(files_without_license - files_without_both):
-                output += f"* {file}\n"
-            output += "\n"
+                output.write(f"* {file}\n")
+            output.write("\n")
 
-    output += "\n"
-    output += "# " + _("SUMMARY")
-    output += "\n\n"
+    output.write("\n")
+    output.write("# " + _("SUMMARY"))
+    output.write("\n\n")
 
     summary_contents = [
         (_("Bad licenses:"), ", ".join(data["non_compliant"]["bad_licenses"])),
@@ -199,22 +210,26 @@ def format_plain(report: ProjectReport) -> str:
             value = "0"
         if value == "empty":
             value = ""
-        output += "* " + key + " " + value + "\n"
+        output.write("* " + key + " " + value + "\n")
 
-    output += "\n"
+    output.write("\n")
     if data["summary"]["compliant"]:
-        output += _(
-            "Congratulations! Your project is compliant with version"
-            " {} of the REUSE Specification :-)"
-        ).format(data["reuse_spec_version"])
+        output.write(
+            _(
+                "Congratulations! Your project is compliant with version"
+                " {} of the REUSE Specification :-)"
+            ).format(data["reuse_spec_version"])
+        )
     else:
-        output += _(
-            "Unfortunately, your project is not compliant with version "
-            "{} of the REUSE Specification :-("
-        ).format(data["reuse_spec_version"])
-    output += "\n"
+        output.write(
+            _(
+                "Unfortunately, your project is not compliant with version "
+                "{} of the REUSE Specification :-("
+            ).format(data["reuse_spec_version"])
+        )
+    output.write("\n")
 
-    return output
+    return output.getvalue()
 
 
 def format_json(report: ProjectReport) -> str:

From d1c21469b570ccc54339990ca31f621c791ee046 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Thu, 25 May 2023 16:51:28 +0200
Subject: [PATCH 52/60] Make sure that some keys are on top of sorted output

---
 src/reuse/lint.py | 16 ++++++++++++++--
 test              |  1 +
 2 files changed, 15 insertions(+), 2 deletions(-)
 create mode 160000 test

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index f9681d64..09b4a5b3 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -252,12 +252,24 @@ def custom_serializer(obj):
             f"Object of type {obj.__class__.__name__} is not JSON serializable"
         )
 
+    def custom_sort_key(key):
+        order = {
+            "lint_version": 0,
+            "reuse_spec_version": 1,
+            "reuse_tool_version": 2,
+        }
+        return (order.get(key, 3), key)
+
+    report_dict = report.to_dict_lint()
+    sorted_report_dict = dict(
+        sorted(report_dict.items(), key=lambda item: custom_sort_key(item[0]))
+    )
+
     return json.dumps(
-        report.to_dict_lint(),
+        sorted_report_dict,
         indent=2,
         # Serialize sets to lists
         default=custom_serializer,
-        sort_keys=True,
     )
 
 
diff --git a/test b/test
new file mode 160000
index 00000000..2703fff2
--- /dev/null
+++ b/test
@@ -0,0 +1 @@
+Subproject commit 2703fff268f581fc01293e275d16c8a210a5965c

From c8baa6ec630fa97c963fada0cc4c6e2a7f626aba Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Thu, 25 May 2023 16:57:42 +0200
Subject: [PATCH 53/60] Make `summary_contents` a dictionary

---
 src/reuse/lint.py | 74 ++++++++++++++++++++---------------------------
 1 file changed, 31 insertions(+), 43 deletions(-)

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index 09b4a5b3..ff0b7011 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -164,53 +164,41 @@ def format_plain(report: ProjectReport) -> str:
     output.write("# " + _("SUMMARY"))
     output.write("\n\n")
 
-    summary_contents = [
-        (_("Bad licenses:"), ", ".join(data["non_compliant"]["bad_licenses"])),
-        (
-            _("Deprecated licenses:"),
-            ", ".join(data["non_compliant"]["deprecated_licenses"]),
+    summary_contents = {
+        _("Bad licenses:"): ", ".join(data["non_compliant"]["bad_licenses"]),
+        _("Deprecated licenses:"): ", ".join(
+            data["non_compliant"]["deprecated_licenses"]
         ),
-        (
-            _("Licenses without file extension:"),
-            ", ".join(data["non_compliant"]["licenses_without_extension"]),
+        _("Licenses without file extension:"): ", ".join(
+            data["non_compliant"]["licenses_without_extension"]
         ),
-        (
-            _("Missing licenses:"),
-            ", ".join(data["non_compliant"]["missing_licenses"]),
+        _("Missing licenses:"): ", ".join(
+            data["non_compliant"]["missing_licenses"]
         ),
-        (
-            _("Unused licenses:"),
-            ", ".join(data["non_compliant"]["unused_licenses"]),
+        _("Unused licenses:"): ", ".join(
+            data["non_compliant"]["unused_licenses"]
         ),
-        (_("Used licenses:"), ", ".join(data["summary"]["used_licenses"])),
-        (
-            _("Read errors: {count}").format(
-                count=len(data["non_compliant"]["read_errors"])
-            ),
-            "empty",
-        ),
-        (
-            _("files with copyright information: {count} / {total}").format(
-                count=data["summary"]["files_with_copyright_info"],
-                total=data["summary"]["files_total"],
-            ),
-            "empty",
-        ),
-        (
-            _("files with license information: {count} / {total}").format(
-                count=data["summary"]["files_with_licensing_info"],
-                total=data["summary"]["files_total"],
-            ),
-            "empty",
-        ),
-    ]
-
-    for key, value in summary_contents:
-        if not value:
-            value = "0"
-        if value == "empty":
-            value = ""
-        output.write("* " + key + " " + value + "\n")
+        _("Used licenses:"): ", ".join(data["summary"]["used_licenses"]),
+        _("Read errors: {count}").format(
+            count=len(data["non_compliant"]["read_errors"])
+        ): "empty",
+        _("files with copyright information: {count} / {total}").format(
+            count=data["summary"]["files_with_copyright_info"],
+            total=data["summary"]["files_total"],
+        ): "empty",
+        _("files with license information: {count} / {total}").format(
+            count=data["summary"]["files_with_licensing_info"],
+            total=data["summary"]["files_total"],
+        ): "empty",
+    }
+
+    filtered_summary_contents = {
+        key: (value if value not in ("", "empty") else "0" if not value else "")
+        for key, value in summary_contents.items()
+    }
+
+    for key, value in filtered_summary_contents.items():
+        output.write(f"* {key} {value}\n")
 
     output.write("\n")
     if data["summary"]["compliant"]:

From 983b0c4158b4185c9d25f1a0af09aed9c3326f10 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Fri, 26 May 2023 17:02:51 +0200
Subject: [PATCH 54/60] Simplify `lint --quiet`

---
 src/reuse/lint.py | 28 ++++++++++------------------
 1 file changed, 10 insertions(+), 18 deletions(-)

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index ff0b7011..29a7268c 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -7,9 +7,7 @@
 the reports and printing some conclusions.
 """
 
-import contextlib
 import json
-import os
 import sys
 from gettext import gettext as _
 from io import StringIO
@@ -21,10 +19,10 @@
 
 def add_arguments(parser):
     """Add arguments to parser."""
-    parser.add_argument(
+    mutex_group = parser.add_mutually_exclusive_group()
+    mutex_group.add_argument(
         "-q", "--quiet", action="store_true", help=_("prevents output")
     )
-    mutex_group = parser.add_mutually_exclusive_group()
     mutex_group.add_argument(
         "-j", "--json", action="store_true", help=_("formats output as JSON")
     )
@@ -37,7 +35,7 @@ def add_arguments(parser):
     mutex_group.add_argument(
         "--format",
         nargs="?",
-        choices=("json", "plain"),
+        choices=("json", "plain", "quiet"),
         help=_("formats output using the chosen formatter"),
     )
 
@@ -261,23 +259,17 @@ def custom_sort_key(key):
     )
 
 
-def run(args, project: Project, out=sys.stdout, formatter=format_plain):
+def run(args, project: Project, out=sys.stdout):
     """List all non-compliant files."""
     report = ProjectReport.generate(
         project, do_checksum=False, multiprocessing=not args.no_multiprocessing
     )
 
-    with contextlib.ExitStack() as stack:
-        if args.quiet:
-            out = stack.enter_context(open(os.devnull, "w", encoding="utf-8"))
-
-        if args.json or args.format == "json":
-            formatter = format_json
-        elif args.plain or args.format == "plain":
-            formatter = format_plain
-        else:
-            formatter = format_plain
-
-        out.write(formatter(report))
+    if args.quiet or args.format == "quiet":
+        pass
+    elif args.json or args.format == "json":
+        out.write(format_json(report))
+    else:
+        out.write(format_plain(report))
 
     return 0 if report.is_compliant else 1

From d7af2e282dfaea9a361a0e19cb1b3d78d2296ea2 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Fri, 26 May 2023 17:20:44 +0200
Subject: [PATCH 55/60] Move dictionary sorting and test it

---
 src/reuse/lint.py    | 15 +--------------
 src/reuse/report.py  | 27 +++++++++++++++++++++++----
 tests/test_report.py | 15 ++++++++++++---
 3 files changed, 36 insertions(+), 21 deletions(-)

diff --git a/src/reuse/lint.py b/src/reuse/lint.py
index 29a7268c..25eb9022 100644
--- a/src/reuse/lint.py
+++ b/src/reuse/lint.py
@@ -238,21 +238,8 @@ def custom_serializer(obj):
             f"Object of type {obj.__class__.__name__} is not JSON serializable"
         )
 
-    def custom_sort_key(key):
-        order = {
-            "lint_version": 0,
-            "reuse_spec_version": 1,
-            "reuse_tool_version": 2,
-        }
-        return (order.get(key, 3), key)
-
-    report_dict = report.to_dict_lint()
-    sorted_report_dict = dict(
-        sorted(report_dict.items(), key=lambda item: custom_sort_key(item[0]))
-    )
-
     return json.dumps(
-        sorted_report_dict,
+        report.to_dict_lint(),
         indent=2,
         # Serialize sets to lists
         default=custom_serializer,
diff --git a/src/reuse/report.py b/src/reuse/report.py
index 6ef8a683..d74c2df9 100644
--- a/src/reuse/report.py
+++ b/src/reuse/report.py
@@ -89,9 +89,6 @@ def to_dict_lint(self):
         """
         # Setup report data container
         data = {
-            "lint_version": LINT_VERSION,
-            "reuse_spec_version": __REUSE_version__,
-            "reuse_tool_version": __version__,
             "non_compliant": {
                 "missing_licenses": self.missing_licenses,
                 "unused_licenses": [str(file) for file in self.unused_licenses],
@@ -129,7 +126,29 @@ def to_dict_lint(self):
             - len(self.files_without_licenses),
             "compliant": self.is_compliant,
         }
-        return data
+
+        # Add the top three keys
+        unsorted_data = {
+            "lint_version": LINT_VERSION,
+            "reuse_spec_version": __REUSE_version__,
+            "reuse_tool_version": __version__,
+            **data,
+        }
+
+        # Sort dictionary keys while keeping the top three keys at the beginning
+        sorted_keys = sorted(list(unsorted_data.keys()))
+        sorted_keys.remove("lint_version")
+        sorted_keys.remove("reuse_spec_version")
+        sorted_keys.remove("reuse_tool_version")
+        sorted_keys = [
+            "lint_version",
+            "reuse_spec_version",
+            "reuse_tool_version",
+        ] + sorted_keys
+
+        sorted_data = {key: unsorted_data[key] for key in sorted_keys}
+
+        return sorted_data
 
     def bill_of_materials(
         self,
diff --git a/tests/test_report.py b/tests/test_report.py
index 5a5c77c4..4bc2a23f 100644
--- a/tests/test_report.py
+++ b/tests/test_report.py
@@ -379,11 +379,20 @@ def test_generate_project_report_read_error(fake_repository, multiprocessing):
 
 
 def test_generate_project_report_to_dict_lint(fake_repository, multiprocessing):
-    """Extremely simple test for ProjectReport.to_dict."""
+    """Generate dictionary output and verify correct ordering."""
     project = Project(fake_repository)
     report = ProjectReport.generate(project, multiprocessing=multiprocessing)
-    # TODO: Actually do something
-    report.to_dict_lint()
+    result = report.to_dict_lint()
+
+    # Check if the top three keys are at the beginning of the dictionary
+    assert list(result.keys())[:3] == [
+        "lint_version",
+        "reuse_spec_version",
+        "reuse_tool_version",
+    ]
+
+    # Check if the rest of the keys are sorted alphabetically
+    assert list(result.keys())[3:] == sorted(list(result.keys())[3:])
 
 
 def test_bill_of_materials(fake_repository, multiprocessing):

From 33456cbeb1aeb2daf5f4b85f6fc349cf2486ff80 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Fri, 26 May 2023 17:25:42 +0200
Subject: [PATCH 56/60] Rename `SpdxInfo` to `ReuseInfo`

---
 src/reuse/__init__.py |   4 +-
 src/reuse/_util.py    |  14 +++---
 src/reuse/header.py   |  16 +++----
 src/reuse/project.py  |  12 ++---
 src/reuse/report.py   |   4 +-
 tests/test_core.py    |   8 ++--
 tests/test_header.py  | 106 ++++++++++++++++--------------------------
 tests/test_util.py    |   2 +-
 8 files changed, 71 insertions(+), 95 deletions(-)

diff --git a/src/reuse/__init__.py b/src/reuse/__init__.py
index 46cd1fd2..fe8e70f7 100644
--- a/src/reuse/__init__.py
+++ b/src/reuse/__init__.py
@@ -89,8 +89,8 @@
 
 
 @dataclass(frozen=True)
-class SpdxInfo:
-    """Simple class holding SPDX information"""
+class ReuseInfo:
+    """Simple dataclass holding licensing and copyright information"""
 
     spdx_expressions: Set[Expression] = field(default_factory=set)
     copyright_lines: Set[str] = field(default_factory=set)
diff --git a/src/reuse/_util.py b/src/reuse/_util.py
index 693d6615..22bf92c6 100644
--- a/src/reuse/_util.py
+++ b/src/reuse/_util.py
@@ -31,7 +31,7 @@
 from debian.copyright import Copyright
 from license_expression import ExpressionError, Licensing
 
-from . import SpdxInfo
+from . import ReuseInfo
 from ._licenses import ALL_NON_DEPRECATED_MAP
 from .comment import _all_style_classes
 
@@ -203,14 +203,16 @@ def _determine_license_suffix_path(path: PathLike) -> Path:
     return Path(f"{path}.license")
 
 
-def _copyright_from_dep5(path: PathLike, dep5_copyright: Copyright) -> SpdxInfo:
+def _copyright_from_dep5(
+    path: PathLike, dep5_copyright: Copyright
+) -> ReuseInfo:
     """Find the reuse information of *path* in the dep5 Copyright object."""
     result = dep5_copyright.find_files_paragraph(Path(path).as_posix())
 
     if result is None:
-        return SpdxInfo(set(), set(), license_path=str(path))
+        return ReuseInfo(set(), set(), license_path=str(path))
 
-    return SpdxInfo(
+    return ReuseInfo(
         set(map(_LICENSING.parse, [result.license.synopsis])),
         set(map(str.strip, result.copyright.splitlines())),
         license_path=str(path),
@@ -290,7 +292,7 @@ def merge_copyright_lines(copyright_lines: Set[str]) -> Set[str]:
     return copyright_out
 
 
-def extract_spdx_info(text: str) -> SpdxInfo:
+def extract_spdx_info(text: str) -> ReuseInfo:
     """Extract SPDX information from comments in a string.
 
     :raises ExpressionError: if an SPDX expression could not be parsed
@@ -317,7 +319,7 @@ def extract_spdx_info(text: str) -> SpdxInfo:
                 copyright_matches.add(match.groupdict()["copyright"].strip())
                 break
 
-    return SpdxInfo(expressions, copyright_matches, "")
+    return ReuseInfo(expressions, copyright_matches, "")
 
 
 def find_license_identifiers(text: str) -> Iterator[str]:
diff --git a/src/reuse/header.py b/src/reuse/header.py
index 8454c283..bef038d6 100644
--- a/src/reuse/header.py
+++ b/src/reuse/header.py
@@ -33,7 +33,7 @@
 from jinja2.exceptions import TemplateNotFound
 from license_expression import ExpressionError
 
-from . import SpdxInfo
+from . import ReuseInfo
 from ._util import (
     _COPYRIGHT_STYLES,
     PathType,
@@ -81,7 +81,7 @@ class MissingSpdxInfo(Exception):
 
 # TODO: Add a template here maybe.
 def _create_new_header(
-    spdx_info: SpdxInfo,
+    spdx_info: ReuseInfo,
     template: Template = None,
     template_is_commented: bool = False,
     style: CommentStyle = None,
@@ -131,7 +131,7 @@ def _create_new_header(
 
 # pylint: disable=too-many-arguments
 def create_header(
-    spdx_info: SpdxInfo,
+    spdx_info: ReuseInfo,
     header: str = None,
     template: Template = None,
     template_is_commented: bool = False,
@@ -174,7 +174,7 @@ def create_header(
             )
 
         # TODO: This behaviour does not match the docstring.
-        spdx_info = SpdxInfo(
+        spdx_info = ReuseInfo(
             spdx_info.spdx_expressions.union(existing_spdx.spdx_expressions),
             spdx_copyrights,
             spdx_info.contributor_lines.union(existing_spdx.contributor_lines),
@@ -250,7 +250,7 @@ def _extract_shebang(prefix: str, text: str) -> Tuple[str, str]:
 # pylint: disable=too-many-arguments
 def find_and_replace_header(
     text: str,
-    spdx_info: SpdxInfo,
+    spdx_info: ReuseInfo,
     template: Template = None,
     template_is_commented: bool = False,
     style: CommentStyle = None,
@@ -327,7 +327,7 @@ def find_and_replace_header(
 # pylint: disable=too-many-arguments
 def add_new_header(
     text: str,
-    spdx_info: SpdxInfo,
+    spdx_info: ReuseInfo,
     template: Template = None,
     template_is_commented: bool = False,
     style: CommentStyle = None,
@@ -464,7 +464,7 @@ def _find_template(project: Project, name: str) -> Template:
 
 def _add_header_to_file(
     path: PathLike,
-    spdx_info: SpdxInfo,
+    spdx_info: ReuseInfo,
     template: Template,
     template_is_commented: bool,
     style: Optional[str],
@@ -785,7 +785,7 @@ def run(args, project: Project, out=sys.stdout) -> int:
         set(args.contributor) if args.contributor is not None else set()
     )
 
-    spdx_info = SpdxInfo(expressions, copyright_lines, contributors, "")
+    spdx_info = ReuseInfo(expressions, copyright_lines, contributors, "")
 
     result = 0
     for path in paths:
diff --git a/src/reuse/project.py b/src/reuse/project.py
index 37eee586..5fabcda1 100644
--- a/src/reuse/project.py
+++ b/src/reuse/project.py
@@ -24,7 +24,7 @@
     _IGNORE_FILE_PATTERNS,
     _IGNORE_MESON_PARENT_DIR_PATTERNS,
     IdentifierNotFound,
-    SpdxInfo,
+    ReuseInfo,
 )
 from ._licenses import EXCEPTION_MAP, LICENSE_MAP
 from ._util import (
@@ -140,7 +140,7 @@ def all_files(self, directory: PathLike = None) -> Iterator[Path]:
                 _LOGGER.debug("yielding '%s'", the_file)
                 yield the_file
 
-    def spdx_info_of(self, path: PathLike) -> SpdxInfo:
+    def spdx_info_of(self, path: PathLike) -> ReuseInfo:
         """Return SPDX info of *path*.
 
         This function will return any SPDX information that it can find, both
@@ -156,8 +156,8 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo:
 
         # This means that only one 'source' of licensing/copyright information
         # is captured in SpdxInfo
-        dep5_result = SpdxInfo(set(), set())
-        file_result = SpdxInfo(set(), set())
+        dep5_result = ReuseInfo(set(), set())
+        file_result = ReuseInfo(set(), set())
 
         # Search the .reuse/dep5 file for SPDX information.
         if self._copyright:
@@ -214,13 +214,13 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo:
             dep5_result.contains_copyright_or_licensing()
             and not file_result.contains_copyright_or_licensing()
         ):
-            return SpdxInfo(
+            return ReuseInfo(
                 spdx_expressions=dep5_result.spdx_expressions,
                 copyright_lines=dep5_result.copyright_lines,
                 license_path=license_path,
             )
         # There is only a file header
-        return SpdxInfo(
+        return ReuseInfo(
             spdx_expressions=file_result.spdx_expressions,
             copyright_lines=file_result.copyright_lines,
             license_path=license_path,
diff --git a/src/reuse/report.py b/src/reuse/report.py
index d74c2df9..6defb2e7 100644
--- a/src/reuse/report.py
+++ b/src/reuse/report.py
@@ -20,7 +20,7 @@
 
 from . import __REUSE_version__, __version__
 from ._util import _LICENSING, _checksum
-from .project import Project, SpdxInfo
+from .project import Project, ReuseInfo
 
 _LOGGER = logging.getLogger(__name__)
 
@@ -390,7 +390,7 @@ def __init__(self, name, spdx_id=None, chk_sum=None):
         self.licenses_in_file: List[str] = []
         self.license_concluded: str = None
         self.copyright: str = None
-        self.info: SpdxInfo = None
+        self.info: ReuseInfo = None
 
 
 class FileReport:
diff --git a/tests/test_core.py b/tests/test_core.py
index 2b926eb4..85c80bf6 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -4,7 +4,7 @@
 
 """Tests for some core components."""
 
-from reuse import SpdxInfo
+from reuse import ReuseInfo
 
 # REUSE-IgnoreStart
 
@@ -17,19 +17,19 @@ def test_spdx_info_contains_copyright_or_licensing():
         ({"GPL-3.0-or-later"}, "SPDX-FileCopyrightText: 2017 Jane Doe"),
     ]
     for args in arguments:
-        info = SpdxInfo(*args)
+        info = ReuseInfo(*args)
         assert info.contains_copyright_or_licensing()
 
 
 def test_spdx_info_contains_copyright_or_licensing_empty():
     """If the SpdxInfo object is completely empty, expect False."""
-    info = SpdxInfo()
+    info = ReuseInfo()
     assert not info.contains_copyright_or_licensing()
 
 
 def test_spdx_info_contains_copyright_or_licensing_other_truthy():
     """If another attribute is truthy, still expect False."""
-    info = SpdxInfo(contributor_lines={"SPDX-FileContributor: 2017 Jane Doe"})
+    info = ReuseInfo(contributor_lines={"SPDX-FileContributor: 2017 Jane Doe"})
     assert not info.contains_copyright_or_licensing()
 
 
diff --git a/tests/test_header.py b/tests/test_header.py
index c97d4144..0a1a492a 100644
--- a/tests/test_header.py
+++ b/tests/test_header.py
@@ -9,7 +9,7 @@
 
 import pytest
 
-from reuse import SpdxInfo
+from reuse import ReuseInfo
 from reuse.comment import CCommentStyle, CommentCreateError
 from reuse.header import (
     MissingSpdxInfo,
@@ -23,9 +23,7 @@
 
 def test_create_header_simple():
     """Create a super simple header."""
-    spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}
-    )
+    info = ReuseInfo({"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"})
     expected = cleandoc(
         """
         # SPDX-FileCopyrightText: Jane Doe
@@ -34,12 +32,12 @@ def test_create_header_simple():
         """
     )
 
-    assert create_header(spdx_info).strip() == expected
+    assert create_header(info).strip() == expected
 
 
 def test_create_header_simple_with_contributor():
     """Create a super simple header."""
-    spdx_info = SpdxInfo(
+    info = ReuseInfo(
         {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, {"John Doe"}
     )
     expected = cleandoc(
@@ -51,14 +49,12 @@ def test_create_header_simple_with_contributor():
         """
     )
 
-    assert create_header(spdx_info).strip() == expected
+    assert create_header(info).strip() == expected
 
 
 def test_create_header_template_simple(template_simple):
     """Create a header with a simple template."""
-    spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}
-    )
+    info = ReuseInfo({"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"})
     expected = cleandoc(
         """
         # Hello, world!
@@ -69,26 +65,20 @@ def test_create_header_template_simple(template_simple):
         """
     )
 
-    assert (
-        create_header(spdx_info, template=template_simple).strip() == expected
-    )
+    assert create_header(info, template=template_simple).strip() == expected
 
 
 def test_create_header_template_no_spdx(template_no_spdx):
     """Create a header with a template that does not have all SPDX info."""
-    spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}
-    )
+    info = ReuseInfo({"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"})
 
     with pytest.raises(MissingSpdxInfo):
-        create_header(spdx_info, template=template_no_spdx)
+        create_header(info, template=template_no_spdx)
 
 
 def test_create_header_template_commented(template_commented):
     """Create a header with an already-commented template."""
-    spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}
-    )
+    info = ReuseInfo({"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"})
     expected = cleandoc(
         """
         # Hello, world!
@@ -101,7 +91,7 @@ def test_create_header_template_commented(template_commented):
 
     assert (
         create_header(
-            spdx_info,
+            info,
             template=template_commented,
             template_is_commented=True,
             style=CCommentStyle,
@@ -112,9 +102,7 @@ def test_create_header_template_commented(template_commented):
 
 def test_create_header_already_contains_spdx():
     """Create a new header from a header that already contains SPDX info."""
-    spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}
-    )
+    info = ReuseInfo({"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"})
     existing = cleandoc(
         """
         # SPDX-FileCopyrightText: John Doe
@@ -132,14 +120,12 @@ def test_create_header_already_contains_spdx():
         """
     )
 
-    assert create_header(spdx_info, header=existing).strip() == expected
+    assert create_header(info, header=existing).strip() == expected
 
 
 def test_create_header_existing_is_wrong():
     """If the existing header contains errors, raise a CommentCreateError."""
-    spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}
-    )
+    info = ReuseInfo({"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"})
     existing = cleandoc(
         """
         # SPDX-FileCopyrightText: John Doe
@@ -149,12 +135,12 @@ def test_create_header_existing_is_wrong():
     )
 
     with pytest.raises(CommentCreateError):
-        create_header(spdx_info, header=existing)
+        create_header(info, header=existing)
 
 
 def test_create_header_old_syntax():
     """Old copyright syntax is preserved when creating a new header."""
-    spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set())
+    info = ReuseInfo({"GPL-3.0-or-later"}, set())
     existing = cleandoc(
         """
         # Copyright John Doe
@@ -168,12 +154,12 @@ def test_create_header_old_syntax():
         """
     )
 
-    assert create_header(spdx_info, header=existing).strip() == expected
+    assert create_header(info, header=existing).strip() == expected
 
 
 def test_create_header_remove_fluff():
     """Any stuff that isn't SPDX info is removed when using create_header."""
-    spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set())
+    info = ReuseInfo({"GPL-3.0-or-later"}, set())
     existing = cleandoc(
         """
         # SPDX-FileCopyrightText: John Doe
@@ -191,16 +177,14 @@ def test_create_header_remove_fluff():
         """
     )
 
-    assert create_header(spdx_info, header=existing).strip() == expected
+    assert create_header(info, header=existing).strip() == expected
 
 
 def test_add_new_header_simple():
     """Given text that already contains a header, create a new one, and preserve
     the old one.
     """
-    spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}
-    )
+    info = ReuseInfo({"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"})
     text = cleandoc(
         """
         # SPDX-FileCopyrightText: John Doe
@@ -223,14 +207,12 @@ def test_add_new_header_simple():
         pass
         """
     )
-    assert add_new_header(text, spdx_info) == expected
+    assert add_new_header(text, info) == expected
 
 
 def test_find_and_replace_no_header():
     """Given text without header, add a header."""
-    spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}
-    )
+    info = ReuseInfo({"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"})
     text = "pass"
     expected = cleandoc(
         """
@@ -243,15 +225,15 @@ def test_find_and_replace_no_header():
     )
 
     assert (
-        find_and_replace_header(text, spdx_info)
-        == add_new_header(text, spdx_info)
+        find_and_replace_header(text, info)
+        == add_new_header(text, info)
         == expected
     )
 
 
 def test_find_and_replace_verbatim():
     """Replace a header with itself."""
-    spdx_info = SpdxInfo(set(), set())
+    info = ReuseInfo(set(), set())
     text = cleandoc(
         """
         # SPDX-FileCopyrightText: Jane Doe
@@ -262,16 +244,14 @@ def test_find_and_replace_verbatim():
         """
     )
 
-    assert find_and_replace_header(text, spdx_info) == text
+    assert find_and_replace_header(text, info) == text
 
 
 def test_find_and_replace_newline_before_header():
     """In a scenario where the header is preceded by whitespace, remove the
     preceding whitespace.
     """
-    spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"}
-    )
+    info = ReuseInfo({"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"})
     text = cleandoc(
         """
         # SPDX-FileCopyrightText: Jane Doe
@@ -291,14 +271,12 @@ def test_find_and_replace_newline_before_header():
         """
     )
 
-    assert find_and_replace_header(text, spdx_info) == expected
+    assert find_and_replace_header(text, info) == expected
 
 
 def test_find_and_replace_preserve_preceding():
     """When the SPDX header is in the middle of the file, keep it there."""
-    spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"}
-    )
+    info = ReuseInfo({"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"})
     text = cleandoc(
         """
         # Hello, world!
@@ -327,16 +305,14 @@ def foo(bar):
         """
     )
 
-    assert find_and_replace_header(text, spdx_info) == expected
+    assert find_and_replace_header(text, info) == expected
 
 
 def test_find_and_replace_keep_shebang():
     """When encountering a shebang, keep it and put the REUSE header beneath
     it.
     """
-    spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"}
-    )
+    info = ReuseInfo({"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"})
     text = cleandoc(
         """
         #!/usr/bin/env python3
@@ -359,14 +335,14 @@ def test_find_and_replace_keep_shebang():
         """
     )
 
-    assert find_and_replace_header(text, spdx_info) == expected
+    assert find_and_replace_header(text, info) == expected
 
 
 def test_find_and_replace_separate_shebang():
     """When the shebang is part of the same comment as the SPDX comment,
     separate the two.
     """
-    spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set())
+    info = ReuseInfo({"GPL-3.0-or-later"}, set())
     text = cleandoc(
         """
         #!/usr/bin/env python3
@@ -389,12 +365,12 @@ def test_find_and_replace_separate_shebang():
         """
     )
 
-    assert find_and_replace_header(text, spdx_info) == expected
+    assert find_and_replace_header(text, info) == expected
 
 
 def test_find_and_replace_only_shebang():
     """When the file only contains a shebang, keep it at the top of the file."""
-    spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set())
+    info = ReuseInfo({"GPL-3.0-or-later"}, set())
     text = cleandoc(
         """
         #!/usr/bin/env python3
@@ -416,16 +392,14 @@ def test_find_and_replace_only_shebang():
         """
     )
 
-    assert find_and_replace_header(text, spdx_info) == expected
+    assert find_and_replace_header(text, info) == expected
 
 
 def test_find_and_replace_keep_old_comment():
     """When encountering a comment that does not contain copyright and
     licensing information, preserve it below the REUSE header.
     """
-    spdx_info = SpdxInfo(
-        {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}
-    )
+    info = ReuseInfo({"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"})
     text = cleandoc(
         """
         # Hello, world!
@@ -445,13 +419,13 @@ def test_find_and_replace_keep_old_comment():
         """
     )
 
-    assert find_and_replace_header(text, spdx_info) == expected
+    assert find_and_replace_header(text, info) == expected
 
 
 def test_find_and_replace_preserve_newline():
     """If the file content ends with a newline, don't remove it."""
 
-    spdx_info = SpdxInfo(set(), set())
+    info = ReuseInfo(set(), set())
     text = (
         cleandoc(
             """
@@ -465,7 +439,7 @@ def test_find_and_replace_preserve_newline():
         + "\n"
     )
 
-    assert find_and_replace_header(text, spdx_info) == text
+    assert find_and_replace_header(text, info) == text
 
 
 # REUSE-IgnoreEnd
diff --git a/tests/test_util.py b/tests/test_util.py
index 49011e65..45ba6e37 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -78,7 +78,7 @@ def test_extract_no_info():
     object.
     """
     result = _util.extract_spdx_info("")
-    assert result == _util.SpdxInfo(set(), set(), "")
+    assert result == _util.ReuseInfo(set(), set(), "")
 
 
 def test_extract_tab():

From 962521f3e1542a3f3e38fb0ae1756bafe2b5b88f Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Fri, 26 May 2023 17:31:47 +0200
Subject: [PATCH 57/60] Rename `ReuseInfo.license_path` to
 `ReuseInfo.source_path`

---
 src/reuse/__init__.py |  2 +-
 src/reuse/_util.py    |  4 ++--
 src/reuse/project.py  | 10 +++++-----
 src/reuse/report.py   |  4 ++--
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/reuse/__init__.py b/src/reuse/__init__.py
index fe8e70f7..350ff332 100644
--- a/src/reuse/__init__.py
+++ b/src/reuse/__init__.py
@@ -95,7 +95,7 @@ class ReuseInfo:
     spdx_expressions: Set[Expression] = field(default_factory=set)
     copyright_lines: Set[str] = field(default_factory=set)
     contributor_lines: Set[str] = field(default_factory=set)
-    license_path: Optional[str] = None
+    source_path: Optional[str] = None
 
     def contains_copyright_or_licensing(self) -> bool:
         """Either *spdx_expressions* or *copyright_lines* is non-empty."""
diff --git a/src/reuse/_util.py b/src/reuse/_util.py
index 22bf92c6..2137f7ab 100644
--- a/src/reuse/_util.py
+++ b/src/reuse/_util.py
@@ -210,12 +210,12 @@ def _copyright_from_dep5(
     result = dep5_copyright.find_files_paragraph(Path(path).as_posix())
 
     if result is None:
-        return ReuseInfo(set(), set(), license_path=str(path))
+        return ReuseInfo(set(), set(), source_path=str(path))
 
     return ReuseInfo(
         set(map(_LICENSING.parse, [result.license.synopsis])),
         set(map(str.strip, result.copyright.splitlines())),
-        license_path=str(path),
+        source_path=str(path),
     )
 
 
diff --git a/src/reuse/project.py b/src/reuse/project.py
index 5fabcda1..65098c2f 100644
--- a/src/reuse/project.py
+++ b/src/reuse/project.py
@@ -152,7 +152,7 @@ def spdx_info_of(self, path: PathLike) -> ReuseInfo:
         path = _determine_license_path(path)
         _LOGGER.debug(f"searching '{path}' for SPDX information")
 
-        license_path = ""
+        source_path = ""
 
         # This means that only one 'source' of licensing/copyright information
         # is captured in SpdxInfo
@@ -168,7 +168,7 @@ def spdx_info_of(self, path: PathLike) -> ReuseInfo:
                 _LOGGER.info(
                     _("'{path}' covered by .reuse/dep5").format(path=path)
                 )
-                license_path = str(self.root / ".reuse/dep5")
+                source_path = str(self.root / ".reuse/dep5")
 
         # Search the file for SPDX information.
         with path.open("rb") as fp:
@@ -186,7 +186,7 @@ def spdx_info_of(self, path: PathLike) -> ReuseInfo:
                     decoded_text_from_binary(fp, size=read_limit)
                 )
                 if file_result:
-                    license_path = str(path)
+                    source_path = str(path)
             except (ExpressionError, ParseError):
                 _LOGGER.error(
                     _(
@@ -217,13 +217,13 @@ def spdx_info_of(self, path: PathLike) -> ReuseInfo:
             return ReuseInfo(
                 spdx_expressions=dep5_result.spdx_expressions,
                 copyright_lines=dep5_result.copyright_lines,
-                license_path=license_path,
+                source_path=source_path,
             )
         # There is only a file header
         return ReuseInfo(
             spdx_expressions=file_result.spdx_expressions,
             copyright_lines=file_result.copyright_lines,
-            license_path=license_path,
+            source_path=source_path,
         )
 
     def relative_from_root(self, path: Path) -> Path:
diff --git a/src/reuse/report.py b/src/reuse/report.py
index 6defb2e7..f4d23671 100644
--- a/src/reuse/report.py
+++ b/src/reuse/report.py
@@ -416,13 +416,13 @@ def to_dict_lint(self):
             "path": str(Path(self.path).resolve()),
             # TODO: Why does every copyright line have the same source?
             "copyrights": [
-                {"value": copyright_, "source": self.spdxfile.info.license_path}
+                {"value": copyright_, "source": self.spdxfile.info.source_path}
                 for copyright_ in self.spdxfile.copyright.split("\n")
                 if copyright_
             ],
             # TODO: Why does every license expression have the same source?
             "licenses": [
-                {"value": license_, "source": self.spdxfile.info.license_path}
+                {"value": license_, "source": self.spdxfile.info.source_path}
                 for license_ in self.spdxfile.licenses_in_file
                 if license_
             ],

From 852b3fbc0e4090518fe912ae119e0d21bff958d0 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Fri, 26 May 2023 18:27:55 +0200
Subject: [PATCH 58/60] Also encode `source_type` in `ReuseInfo`

---
 src/reuse/__init__.py |  1 +
 src/reuse/project.py  | 16 ++++++++++++----
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/reuse/__init__.py b/src/reuse/__init__.py
index 350ff332..2da98817 100644
--- a/src/reuse/__init__.py
+++ b/src/reuse/__init__.py
@@ -96,6 +96,7 @@ class ReuseInfo:
     copyright_lines: Set[str] = field(default_factory=set)
     contributor_lines: Set[str] = field(default_factory=set)
     source_path: Optional[str] = None
+    source_type: Optional[str] = None
 
     def contains_copyright_or_licensing(self) -> bool:
         """Either *spdx_expressions* or *copyright_lines* is non-empty."""
diff --git a/src/reuse/project.py b/src/reuse/project.py
index 65098c2f..00b68a34 100644
--- a/src/reuse/project.py
+++ b/src/reuse/project.py
@@ -150,9 +150,10 @@ def spdx_info_of(self, path: PathLike) -> ReuseInfo:
         information, where 'primary' means '.license file' > 'header' > 'dep5'
         """
         path = _determine_license_path(path)
-        _LOGGER.debug(f"searching '{path}' for SPDX information")
-
         source_path = ""
+        source_type = ""
+
+        _LOGGER.debug(f"searching '{path}' for SPDX information")
 
         # This means that only one 'source' of licensing/copyright information
         # is captured in SpdxInfo
@@ -187,6 +188,11 @@ def spdx_info_of(self, path: PathLike) -> ReuseInfo:
                 )
                 if file_result:
                     source_path = str(path)
+                    if path.suffix == ".license":
+                        source_type = ".license file"
+                    else:
+                        source_type = "file header"
+
             except (ExpressionError, ParseError):
                 _LOGGER.error(
                     _(
@@ -209,7 +215,7 @@ def spdx_info_of(self, path: PathLike) -> ReuseInfo:
                     " is correct."
                 ).format(path=path, dep5_path=".reuse/dep5")
             )
-        # There is only a .dep5 file
+        # Information is only found in a DEP5 file
         elif (
             dep5_result.contains_copyright_or_licensing()
             and not file_result.contains_copyright_or_licensing()
@@ -218,12 +224,14 @@ def spdx_info_of(self, path: PathLike) -> ReuseInfo:
                 spdx_expressions=dep5_result.spdx_expressions,
                 copyright_lines=dep5_result.copyright_lines,
                 source_path=source_path,
+                source_type="DEP5 file",
             )
-        # There is only a file header
+        # There is a file header or a .license file
         return ReuseInfo(
             spdx_expressions=file_result.spdx_expressions,
             copyright_lines=file_result.copyright_lines,
             source_path=source_path,
+            source_type=source_type,
         )
 
     def relative_from_root(self, path: Path) -> Path:

From 322dd8a1cc3167690993d7b6a7f89c8a7314a1cf Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Fri, 26 May 2023 18:32:53 +0200
Subject: [PATCH 59/60] Rename `spdx_info_of` to `reuse_info_of`

---
 src/reuse/project.py  |  2 +-
 src/reuse/report.py   | 14 +++++----
 tests/test_project.py | 70 +++++++++++++++++++++----------------------
 3 files changed, 44 insertions(+), 42 deletions(-)

diff --git a/src/reuse/project.py b/src/reuse/project.py
index 00b68a34..7f650686 100644
--- a/src/reuse/project.py
+++ b/src/reuse/project.py
@@ -140,7 +140,7 @@ def all_files(self, directory: PathLike = None) -> Iterator[Path]:
                 _LOGGER.debug("yielding '%s'", the_file)
                 yield the_file
 
-    def spdx_info_of(self, path: PathLike) -> ReuseInfo:
+    def reuse_info_of(self, path: PathLike) -> ReuseInfo:
         """Return SPDX info of *path*.
 
         This function will return any SPDX information that it can find, both
diff --git a/src/reuse/report.py b/src/reuse/report.py
index f4d23671..1303f974 100644
--- a/src/reuse/report.py
+++ b/src/reuse/report.py
@@ -457,8 +457,8 @@ def generate(
         spdx_id.update(report.spdxfile.chk_sum.encode("utf-8"))
         report.spdxfile.spdx_id = f"SPDXRef-{spdx_id.hexdigest()}"
 
-        spdx_info = project.spdx_info_of(path)
-        for expression in spdx_info.spdx_expressions:
+        reuse_info = project.reuse_info_of(path)
+        for expression in reuse_info.spdx_expressions:
             for identifier in _LICENSING.license_keys(expression):
                 # A license expression akin to Apache-1.0+ should register
                 # correctly if LICENSES/Apache-1.0.txt exists.
@@ -477,7 +477,7 @@ def generate(
 
         if not add_license_concluded:
             report.spdxfile.license_concluded = "NOASSERTION"
-        elif not spdx_info.spdx_expressions:
+        elif not reuse_info.spdx_expressions:
             report.spdxfile.license_concluded = "NONE"
         else:
             # Merge all the license expressions together, wrapping them in
@@ -488,7 +488,7 @@ def generate(
                 _LICENSING.parse(
                     " AND ".join(
                         f"({expression})"
-                        for expression in spdx_info.spdx_expressions
+                        for expression in reuse_info.spdx_expressions
                     ),
                 )
                 .simplify()
@@ -496,9 +496,11 @@ def generate(
             )
 
         # Copyright text
-        report.spdxfile.copyright = "\n".join(sorted(spdx_info.copyright_lines))
+        report.spdxfile.copyright = "\n".join(
+            sorted(reuse_info.copyright_lines)
+        )
         # Source of licensing and copyright info
-        report.spdxfile.info = spdx_info
+        report.spdxfile.info = reuse_info
         return report
 
     def __hash__(self):
diff --git a/tests/test_project.py b/tests/test_project.py
index 15cd65ee..20aa80cb 100644
--- a/tests/test_project.py
+++ b/tests/test_project.py
@@ -201,34 +201,34 @@ def test_all_files_hg_ignored_contains_newline(hg_repository):
     assert Path("hello\nworld.pyc").absolute() not in project.all_files()
 
 
-def test_spdx_info_of_file_does_not_exist(fake_repository):
+def test_reuse_info_of_file_does_not_exist(fake_repository):
     """Raise FileNotFoundError when asking for the SPDX info of a file that
     does not exist.
     """
     project = Project(fake_repository)
     with pytest.raises(FileNotFoundError):
-        project.spdx_info_of(fake_repository / "does_not_exist")
+        project.reuse_info_of(fake_repository / "does_not_exist")
 
 
-def test_spdx_info_of_directory(empty_directory):
-    """Raise IsADirectoryError when calling spdx_info_of on a directory."""
+def test_reuse_info_of_directory(empty_directory):
+    """Raise IsADirectoryError when calling reuse_info_of on a directory."""
     (empty_directory / "src").mkdir()
 
     project = Project(empty_directory)
     with pytest.raises((IsADirectoryError, PermissionError)):
-        project.spdx_info_of(empty_directory / "src")
+        project.reuse_info_of(empty_directory / "src")
 
 
-def test_spdx_info_of_unlicensed_file(fake_repository):
+def test_reuse_info_of_unlicensed_file(fake_repository):
     """Return an empty SpdxInfo object when asking for the SPDX information
     of a file that has no SPDX information.
     """
     (fake_repository / "foo.py").write_text("foo")
     project = Project(fake_repository)
-    assert not bool(project.spdx_info_of("foo.py"))
+    assert not bool(project.reuse_info_of("foo.py"))
 
 
-def test_spdx_info_of_only_copyright(fake_repository):
+def test_reuse_info_of_only_copyright(fake_repository):
     """A file contains only a copyright line. Test whether it correctly picks
     up on that.
     """
@@ -236,16 +236,16 @@ def test_spdx_info_of_only_copyright(fake_repository):
         "SPDX-FileCopyrightText: 2017 Jane Doe"
     )
     project = Project(fake_repository)
-    spdx_info = project.spdx_info_of("foo.py")
-    assert not any(spdx_info.spdx_expressions)
-    assert len(spdx_info.copyright_lines) == 1
+    reuse_info = project.reuse_info_of("foo.py")
+    assert not any(reuse_info.spdx_expressions)
+    assert len(reuse_info.copyright_lines) == 1
     assert (
-        spdx_info.copyright_lines.pop()
+        reuse_info.copyright_lines.pop()
         == "SPDX-FileCopyrightText: 2017 Jane Doe"
     )
 
 
-def test_spdx_info_of_only_copyright_also_covered_by_debian(fake_repository):
+def test_reuse_info_of_only_copyright_also_covered_by_debian(fake_repository):
     """A file contains only a copyright line, but debian/copyright also has
     information on this file. Use only the information from file header.
     """
@@ -253,13 +253,13 @@ def test_spdx_info_of_only_copyright_also_covered_by_debian(fake_repository):
         "SPDX-FileCopyrightText: in file"
     )
     project = Project(fake_repository)
-    spdx_info = project.spdx_info_of("doc/foo.py")
+    reuse_info = project.reuse_info_of("doc/foo.py")
 
-    assert len(spdx_info.copyright_lines) == 1
-    assert "SPDX-FileCopyrightText: in file" in spdx_info.copyright_lines
+    assert len(reuse_info.copyright_lines) == 1
+    assert "SPDX-FileCopyrightText: in file" in reuse_info.copyright_lines
 
 
-def test_spdx_info_of_also_covered_by_dep5(fake_repository):
+def test_reuse_info_of_also_covered_by_dep5(fake_repository):
     """A file contains all SPDX information, but .reuse/dep5 also
     provides information on this file. Use only the information
     from the file header.
@@ -272,14 +272,14 @@ def test_spdx_info_of_also_covered_by_dep5(fake_repository):
         )
     )
     project = Project(fake_repository)
-    spdx_info = project.spdx_info_of("doc/foo.py")
-    assert LicenseSymbol("MIT") in spdx_info.spdx_expressions
-    assert LicenseSymbol("CC0-1.0") not in spdx_info.spdx_expressions
-    assert "SPDX-FileCopyrightText: in file" in spdx_info.copyright_lines
-    assert "2017 Jane Doe" not in spdx_info.copyright_lines
+    reuse_info = project.reuse_info_of("doc/foo.py")
+    assert LicenseSymbol("MIT") in reuse_info.spdx_expressions
+    assert LicenseSymbol("CC0-1.0") not in reuse_info.spdx_expressions
+    assert "SPDX-FileCopyrightText: in file" in reuse_info.copyright_lines
+    assert "2017 Jane Doe" not in reuse_info.copyright_lines
 
 
-def test_spdx_info_of_no_duplicates(empty_directory):
+def test_reuse_info_of_no_duplicates(empty_directory):
     """A file contains the same lines twice. The SpdxInfo only contains those
     lines once.
     """
@@ -291,25 +291,25 @@ def test_spdx_info_of_no_duplicates(empty_directory):
 
     (empty_directory / "foo.py").write_text(text * 2)
     project = Project(empty_directory)
-    spdx_info = project.spdx_info_of("foo.py")
-    assert len(spdx_info.spdx_expressions) == 1
-    assert LicenseSymbol("GPL-3.0-or-later") in spdx_info.spdx_expressions
-    assert len(spdx_info.copyright_lines) == 1
+    reuse_info = project.reuse_info_of("foo.py")
+    assert len(reuse_info.spdx_expressions) == 1
+    assert LicenseSymbol("GPL-3.0-or-later") in reuse_info.spdx_expressions
+    assert len(reuse_info.copyright_lines) == 1
     assert (
         "SPDX-FileCopyrightText: 2017 Free Software Foundation Europe"
-        in spdx_info.copyright_lines
+        in reuse_info.copyright_lines
     )
 
 
-def test_spdx_info_of_binary_succeeds(fake_repository):
-    """spdx_info_of succeeds when the target is covered by dep5."""
+def test_reuse_info_of_binary_succeeds(fake_repository):
+    """reuse_info_of succeeds when the target is covered by dep5."""
     shutil.copy(
         RESOURCES_DIRECTORY / "fsfe.png", fake_repository / "doc/fsfe.png"
     )
 
     project = Project(fake_repository)
-    spdx_info = project.spdx_info_of("doc/fsfe.png")
-    assert LicenseSymbol("CC0-1.0") in spdx_info.spdx_expressions
+    reuse_info = project.reuse_info_of("doc/fsfe.png")
+    assert LicenseSymbol("CC0-1.0") in reuse_info.spdx_expressions
 
 
 def test_license_file_detected(empty_directory):
@@ -322,10 +322,10 @@ def test_license_file_detected(empty_directory):
     )
 
     project = Project(empty_directory)
-    spdx_info = project.spdx_info_of("foo.py")
+    reuse_info = project.reuse_info_of("foo.py")
 
-    assert "SPDX-FileCopyrightText: 2017 Jane Doe" in spdx_info.copyright_lines
-    assert LicenseSymbol("MIT") in spdx_info.spdx_expressions
+    assert "SPDX-FileCopyrightText: 2017 Jane Doe" in reuse_info.copyright_lines
+    assert LicenseSymbol("MIT") in reuse_info.spdx_expressions
 
 
 def test_licenses_filename(empty_directory):

From 7346234169b30705f75b2d69a0a34d69d81e2bc8 Mon Sep 17 00:00:00 2001
From: linozen <linus@sehn.tech>
Date: Thu, 1 Jun 2023 10:52:03 +0200
Subject: [PATCH 60/60] Add SourceType enum and use it for source_type in
 ReuseInfo

This commit adds a new enum `SourceType` with three possible values to indicate if the source type is a `.license file`, `file header` or `DEP5 file`. It then updates the usage of `source_type` by replacing the string type with the new `SourceType` enum type. This improves readability and makes the code more maintainable.
---
 src/reuse/__init__.py | 18 +++++++++++++++++-
 src/reuse/project.py  |  9 +++++----
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/src/reuse/__init__.py b/src/reuse/__init__.py
index 2da98817..a20317b5 100644
--- a/src/reuse/__init__.py
+++ b/src/reuse/__init__.py
@@ -20,6 +20,7 @@
 import os
 import re
 from dataclasses import dataclass, field
+from enum import Enum, auto
 from typing import NamedTuple, Optional, Set
 
 try:
@@ -88,6 +89,21 @@
 _IGNORE_FILE_PATTERNS.extend(_IGNORE_SPDX_PATTERNS)
 
 
+class SourceType(Enum):
+    """
+    An enumeration representing the types of sources for license information.
+
+    Potential values:
+        DOT_LICENSE_FILE: A .license file containing license information.
+        FILE_HEADER: A file header containing license information.
+        DEP5_FILE: A .reuse/dep5 file containing license information.
+    """
+
+    DOT_LICENSE_FILE = ".license file"
+    FILE_HEADER = "file header"
+    DEP5_FILE = ".reuse/dep5 file"
+
+
 @dataclass(frozen=True)
 class ReuseInfo:
     """Simple dataclass holding licensing and copyright information"""
@@ -96,7 +112,7 @@ class ReuseInfo:
     copyright_lines: Set[str] = field(default_factory=set)
     contributor_lines: Set[str] = field(default_factory=set)
     source_path: Optional[str] = None
-    source_type: Optional[str] = None
+    source_type: Optional[SourceType] = None
 
     def contains_copyright_or_licensing(self) -> bool:
         """Either *spdx_expressions* or *copyright_lines* is non-empty."""
diff --git a/src/reuse/project.py b/src/reuse/project.py
index 7f650686..8fb2c1b9 100644
--- a/src/reuse/project.py
+++ b/src/reuse/project.py
@@ -25,6 +25,7 @@
     _IGNORE_MESON_PARENT_DIR_PATTERNS,
     IdentifierNotFound,
     ReuseInfo,
+    SourceType,
 )
 from ._licenses import EXCEPTION_MAP, LICENSE_MAP
 from ._util import (
@@ -151,7 +152,7 @@ def reuse_info_of(self, path: PathLike) -> ReuseInfo:
         """
         path = _determine_license_path(path)
         source_path = ""
-        source_type = ""
+        source_type = None
 
         _LOGGER.debug(f"searching '{path}' for SPDX information")
 
@@ -189,9 +190,9 @@ def reuse_info_of(self, path: PathLike) -> ReuseInfo:
                 if file_result:
                     source_path = str(path)
                     if path.suffix == ".license":
-                        source_type = ".license file"
+                        source_type = SourceType.DOT_LICENSE_FILE
                     else:
-                        source_type = "file header"
+                        source_type = SourceType.FILE_HEADER
 
             except (ExpressionError, ParseError):
                 _LOGGER.error(
@@ -224,7 +225,7 @@ def reuse_info_of(self, path: PathLike) -> ReuseInfo:
                 spdx_expressions=dep5_result.spdx_expressions,
                 copyright_lines=dep5_result.copyright_lines,
                 source_path=source_path,
-                source_type="DEP5 file",
+                source_type=SourceType.DEP5_FILE,
             )
         # There is a file header or a .license file
         return ReuseInfo(