Skip to content

Commit

Permalink
Merge pull request #30 from fsfe/only-licenses-dir
Browse files Browse the repository at this point in the history
Remove Valid-License-Identifier
  • Loading branch information
carmenbianca authored Jun 21, 2019
2 parents 266991c + c3f002e commit 6fb180d
Show file tree
Hide file tree
Showing 7 changed files with 58 additions and 215 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ Changed

- The list of SPDX licenses has been updated.

- ``Valid-License-Identifier`` is no longer used, and licenses and exceptions
can now only live inside of the LICENSES/ directory.

Removed
~~~~~~~

Expand Down
8 changes: 0 additions & 8 deletions src/reuse/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,6 @@
re.compile(r"(Copyright .*?)" + _END_PATTERN),
re.compile(r"(© .*?)" + _END_PATTERN),
]
_VALID_LICENSE_PATTERN = re.compile(
r"Valid" "-License-Identifier: (.*?)" + _END_PATTERN, re.MULTILINE
)

# Amount of bytes that we assume will be big enough to contain the entire
# comment header (including SPDX tags), so that we don't need to read the
Expand Down Expand Up @@ -217,11 +214,6 @@ def extract_spdx_info(text: str) -> None:
return SpdxInfo(expressions, copyright_matches)


def extract_valid_license(text: str) -> Set[str]:
"""Extract SPDX identifier from a string."""
return set(map(str.strip, _VALID_LICENSE_PATTERN.findall(text)))


def _checksum(path: PathLike) -> str:
path = Path(path)

Expand Down
9 changes: 0 additions & 9 deletions src/reuse/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,6 @@ def put_license_in_file(
licenses_path = find_licenses_directory(root=root)
licenses_path.mkdir(exist_ok=True)
destination = licenses_path / "".join((spdx_identifier, ".txt"))
else:
is_exception = spdx_identifier in EXCEPTION_MAP
header = (
"Valid-{licexc}-Identifier: {identifier}\n"
"{licexc}-Text:\n\n".format(
identifier=spdx_identifier,
licexc="Exception" if is_exception else "License",
)
)

destination = Path(destination)
if destination.exists():
Expand Down
156 changes: 52 additions & 104 deletions src/reuse/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import os
from gettext import gettext as _
from pathlib import Path
from typing import Dict, Iterator, List, Optional
from typing import Dict, Iterator, Optional

from debian.copyright import Copyright, NotMachineReadableError
from license_expression import ExpressionError
Expand All @@ -30,7 +30,6 @@
_determine_license_path,
decoded_text_from_binary,
extract_spdx_info,
extract_valid_license,
find_root,
in_git_repo,
)
Expand Down Expand Up @@ -191,51 +190,14 @@ def _is_path_ignored(self, path: PathLike) -> bool:

return False

def _identifiers_of_license(self, path: PathLike) -> List[str]:
"""Figure out the SPDX identifier(s) of a license given its path.
The order of precedence is:
- A .license file containing the `Valid-License-Identifier` tag.
- A `Valid-License-Identifier` tag within the license file itself.
- The name of the file (minus extension) if:
- The name is an SPDX license.
- The name starts with 'LicenseRef-'.
def _identifier_of_license(self, path: PathLike) -> str:
"""Figure out the SPDX identifier of a license given its path. The name
of the path (minus its extension) should be a valid SPDX identifier.
"""
path = _determine_license_path(path)
file_name_identifier = None

# Identifier inside of file name?
if path.stem in self.license_map:
file_name_identifier = path.stem
elif path.name in self.license_map:
file_name_identifier = path.name
elif path.stem.startswith("LicenseRef-"):
file_name_identifier = path.stem

with (self.root / path).open("rb") as fp:
result = extract_valid_license(
decoded_text_from_binary(fp, size=_HEADER_BYTES)
)
for identifier in result:
# Mismatch with file_name_identifier
if (
file_name_identifier is not None
and identifier != file_name_identifier
):
raise RuntimeError(
"{path}: Valid-License-Identifier {valid} conflicts "
"with path name".format(path=path, valid=identifier)
)
if result:
return result

if file_name_identifier:
return [file_name_identifier]
return path.stem
if path.stem.startswith("LicenseRef-"):
return path.stem

raise IdentifierNotFound(
"Could not find SPDX identifier for {}".format(path)
Expand Down Expand Up @@ -275,67 +237,53 @@ def _licenses(self) -> Dict[str, Path]:
unknown_counter = 0
license_files = dict()

patterns = [
"LICENSE*",
"LICENCE*",
"COPYING*",
"COPYRIGHT*",
"LICENCES/**",
"LICENSES/**",
]
for pattern in patterns:
pattern = str(self.root.resolve() / pattern)
for path in glob.iglob(pattern, recursive=True):
# For some reason, LICENSES/** is resolved even though it
# doesn't exist. I have no idea why. Deal with that here.
if not Path(path).exists() or Path(path).is_dir():
continue
if Path(path).suffix == ".license":
continue
if Path(path).suffix == ".spdx":
continue
directory = str(self.root.resolve() / "LICENSES/**")
for path in glob.iglob(directory, recursive=True):
# For some reason, LICENSES/** is resolved even though it
# doesn't exist. I have no idea why. Deal with that here.
if not Path(path).exists() or Path(path).is_dir():
continue
if Path(path).suffix == ".license":
continue
if Path(path).suffix == ".spdx":
continue

path = _determine_license_path(path)
path = self._relative_from_root(path)
_LOGGER.debug("searching %s for license tags", path)

try:
identifiers = self._identifiers_of_license(path)
except IdentifierNotFound:
identifier = "LicenseRef-Unknown{}".format(unknown_counter)
identifiers = [identifier]
unknown_counter += 1
_LOGGER.warning(
_(
"Could not resolve SPDX identifier of {path}, "
"resolving to {identifier}"
).format(path=path, identifier=identifier)
)
path = self._relative_from_root(path)
_LOGGER.debug("searching %s for license tags", path)

for identifier in identifiers:
if identifier in license_files:
_LOGGER.critical(
_(
"{identifier} is the SPDX identifier of both "
"{path} and {other_path}"
).format(
identifier=identifier,
path=path,
other_path=license_files[identifier],
)
)
raise RuntimeError(
"Multiple licenses resolve to {}".format(
identifier
)
)
# Add the identifiers
license_files[identifier] = path
if (
identifier.startswith("LicenseRef-")
and "Unknown" not in identifier
):
self.license_map[identifier] = path
try:
identifier = self._identifier_of_license(path)
except IdentifierNotFound:
identifier = "LicenseRef-Unknown{}".format(unknown_counter)
unknown_counter += 1
_LOGGER.warning(
_(
"Could not resolve SPDX identifier of {path}, "
"resolving to {identifier}"
).format(path=path, identifier=identifier)
)

if identifier in license_files:
_LOGGER.critical(
_(
"{identifier} is the SPDX identifier of both "
"{path} and {other_path}"
).format(
identifier=identifier,
path=path,
other_path=license_files[identifier],
)
)
raise RuntimeError(
"Multiple licenses resolve to {}".format(identifier)
)
# Add the identifiers
license_files[identifier] = path
if (
identifier.startswith("LicenseRef-")
and "Unknown" not in identifier
):
self.license_map[identifier] = path

return license_files

Expand Down
24 changes: 1 addition & 23 deletions tests/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,26 +126,4 @@ def test_put_custom_output(empty_directory, monkeypatch):
)
put_license_in_file("0BSD", destination="foo")

assert (
(empty_directory / "foo").read_text()
== "Valid-License-Identifier: 0BSD\n"
"License-Text:\n"
"\n"
"hello\n"
)


def test_put_custom_exception(empty_directory, monkeypatch):
"""Download the exception into a custom file."""
monkeypatch.setattr(
requests, "get", lambda _: MockResponse("hello\n", 200)
)
put_license_in_file("Autoconf-exception-3.0", destination="foo")

assert (
(empty_directory / "foo").read_text()
== "Valid-Exception-Identifier: Autoconf-exception-3.0\n"
"Exception-Text:\n"
"\n"
"hello\n"
)
assert (empty_directory / "foo").read_text() == "hello\n"
66 changes: 2 additions & 64 deletions tests/test_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,80 +224,18 @@ def test_license_file_detected(empty_directory):
assert LicenseSymbol("MIT") in spdx_info.spdx_expressions


def test_detect_all_licenses(empty_directory):
"""In a directory where licenses are marked through various means, detect
all of them.
"""
license_files = ["COPYING", "LICENSE", "COPYRIGHT", "LICENSES/foo"]
(empty_directory / "LICENSES").mkdir()

counter = 1
for lic in license_files:
(empty_directory / lic).write_text(
"Valid-License-Identifier: LicenseRef-{}".format(counter)
)
counter += 1

(empty_directory / "LICENSES/MIT.txt").write_text("nothing")
(
empty_directory / "LICENSES/LicenseRef-{}.txt".format(counter)
).write_text("nothing")

project = Project(empty_directory)

assert len(project.licenses) == 6
assert all(
[Path(lic) in project.licenses.values() for lic in license_files]
)
assert Path("LICENSES/MIT.txt") in project.licenses.values()
assert (
Path("LICENSES/LicenseRef-{}.txt".format(counter))
in project.licenses.values()
)


@pytest.mark.parametrize("license_file", ["LICENSE", "LICENSES/foo.txt"])
def test_licenses_empty(empty_directory, license_file):
def test_licenses_empty(empty_directory):
"""If the identifier of a license could not be identified, silently carry
on."""
(empty_directory / "LICENSES").mkdir()
(empty_directory / license_file).touch()
(empty_directory / "LICENSES/foo.txt").touch()
project = Project(empty_directory)
assert "LicenseRef-Unknown0" in project.licenses


def test_licenses_mismatch_license(empty_directory):
"""Raise a RuntimeError if there is a mismatch between the filename and
Valid-License-Identifier tag.
"""
(empty_directory / "LICENSES").mkdir()
(empty_directory / "LICENSES/MIT.txt").write_text(
"Valid-License-Identifier: GPL-3.0-or-later"
)
with pytest.raises(RuntimeError):
Project(empty_directory)


def test_licenses_duplicate(empty_directory):
"""Raise a RuntimeError if multiple files resolve to the same license."""
text = "Valid-License-Identifier: MIT"
(empty_directory / "COPYING").write_text(text)
(empty_directory / "LICENSE").write_text(text)
with pytest.raises(RuntimeError):
Project(empty_directory)


def test_licenses_subdirectory(empty_directory):
"""Find a license in a subdirectory of LICENSES/."""
(empty_directory / "LICENSES/sub").mkdir(parents=True)
(empty_directory / "LICENSES/sub/MIT.txt").touch()
project = Project(empty_directory)
assert "MIT" in project.licenses


def test_licenses_no_file_extension(empty_directory):
"""Also find a license that has no extension."""
(empty_directory / "LICENSES").mkdir()
(empty_directory / "LICENSES/GPL-3.0-or-later").touch()
project = Project(empty_directory)
assert "GPL-3.0-or-later" in project.licenses
7 changes: 0 additions & 7 deletions tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,6 @@ def test_extract_copyright_variations():
assert len(lines) == len(result.copyright_lines)


def test_extract_valid_license():
"""Correctly extract valid license identifier tag from file."""
text = "Valid-License-Identifier: MIT"
result = _util.extract_valid_license(text)
assert result == {"MIT"}


def test_copyright_from_dep5(copyright):
"""Verify that the glob in the dep5 file is matched."""
result = _util._copyright_from_dep5("doc/foo.rst", copyright)
Expand Down

0 comments on commit 6fb180d

Please sign in to comment.