Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/handling false positives #53

Merged
merged 9 commits into from
Mar 18, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,29 @@ options:
-q, --quiet disable most output
```

Additionally, there is an option to ignore single files, folders and types of files.
If there exists a `.scanignore` in the **top level directory** of a package,
everything in it is going to be ignored.
The file entries work similar to a `.gitignore` file, including making comments with `#`.
One Example for a custom `.scanignore` file:

```
.git/* # folder
README.txt # file
README.* # file pattern
```

Per default, ros_license_toolkit ignores the following:

```
.scanignore
package.xml
setup.py
setup.cfg
CMakeLists.txt
.git/*
```

### Using it as a GitHub action

You can use `ros_license_toolkit` inside your GitHub workflow in order to check licenses in your
Expand Down
8 changes: 8 additions & 0 deletions how_to_update.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Note for me

## Requirements

```bash
pip install bumpver build twine
```

## Steps

How to update the project:

1. Increment version
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "ros_license_toolkit"
version = "1.2.2"
version = "1.3.0"
description = "Checks ROS packages for correct license declaration."
readme = "README.md"
authors = [
Expand Down
4 changes: 2 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
[flake8]
ignore = Q000, I100, I201, W503, W504
ignore = Q000, I100, I201, W503


[bumpver]
current_version = "1.2.2"
current_version = "1.3.0"
version_pattern = "MAJOR.MINOR.PATCH"
commit = True
tag = True
Expand Down
2 changes: 1 addition & 1 deletion src/ros_license_toolkit/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
"""ROS License Toolkit."""
__version__ = "1.2.2"
__version__ = "1.3.0"
30 changes: 12 additions & 18 deletions src/ros_license_toolkit/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,10 @@
from pprint import pformat
from typing import Any, Dict, List, Optional

from ros_license_toolkit.common import get_spdx_license_name
from ros_license_toolkit.license_tag import (LicenseTag,
is_license_name_in_spdx_list)
from ros_license_toolkit.package import (Package, PackageException,
get_spdx_license_name,
is_license_text_file)
from ros_license_toolkit.package import Package, PackageException
from ros_license_toolkit.ui_elements import NO_REASON_STR, green, red, yellow


Expand Down Expand Up @@ -160,11 +159,11 @@ def _check(self, package: Package):
return

self._check_licenses(package)
self._evaluate_results(package)
self._evaluate_results()

def _check_licenses(self, package: Package) -> None:
'''checks each license tag for the corresponding license text. Also
detects inofficial licenses when tag is other than SPDX license file'''
detects inofficial licenses when tag is not in the SPDX license list'''
self.found_license_texts = package.found_license_texts
for license_tag in package.license_tags.values():
if not license_tag.has_license_text_file():
Expand All @@ -185,7 +184,7 @@ def _check_licenses(self, package: Package) -> None:
" in scan results."
self.missing_license_texts_status[license_tag] = Status.FAILURE
continue
if not is_license_text_file(
if not get_spdx_license_name(
self.found_license_texts[license_text_file]):
self.license_tags_without_license_text[license_tag] =\
f"License text file '{license_text_file}' is not " +\
Expand All @@ -207,12 +206,9 @@ def _check_licenses(self, package: Package) -> None:
f"of license {actual_license} but tag is " +\
f"{license_tag.get_license_id()}."
self.missing_license_texts_status[license_tag] = Status.WARNING
self.files_with_wrong_tags[license_tag] = \
{'actual_license': actual_license,
'license_tag': license_tag.get_license_id()}
continue

def _evaluate_results(self, package: Package):
def _evaluate_results(self):
if len(self.license_tags_without_license_text) > 0:
if max(self.missing_license_texts_status.values()) \
== Status.WARNING:
Expand All @@ -222,12 +218,6 @@ def _evaluate_results(self, package: Package):
"license text:\n" + "\n".join(
[f" '{x[0]}': {x[1]}" for x in
self.license_tags_without_license_text.items()]))
for entry in self.files_with_wrong_tags.items():
# if exactly one license text is found,
# treat wrong license tag internally as this license
# optional check for similarity between tag and file
package.inofficial_license_tag[entry[1]['actual_license']]\
= entry[1]['license_tag']
else:
self._failed(
"The following license tags do not "
Expand Down Expand Up @@ -271,13 +261,17 @@ def _check_license_files(self, package: Package) -> None:
for license_str in licenses:
if license_str not in self.declared_licenses:
# this license has an inofficial tag
if license_str in package.inofficial_license_tag.keys():
inofficial_licenses = {
lic_tag.id_from_license_text: key
for key, lic_tag in package.license_tags.items()
if lic_tag.id_from_license_text != ''}
if license_str in inofficial_licenses.keys():
if fname not in self.files_with_inofficial_tag:
self.files_with_inofficial_tag[fname] = []
self.files_with_inofficial_tag[fname].append(
license_str)
self.files_with_inofficial_tag[fname].append(
package.inofficial_license_tag[license_str])
inofficial_licenses[license_str])
continue
# this license is not declared by any license tag
if fname not in self.files_with_uncovered_licenses:
Expand Down
45 changes: 38 additions & 7 deletions src/ros_license_toolkit/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,44 @@

"""Common utility functions."""

from typing import Any, Dict
import os
from typing import Any, Dict, List, Optional

REQUIRED_PERCENTAGE_OF_LICENSE_TEXT = 95.0


def is_license_text_file(scan_results: Dict[str, Any]) -> bool:
"""Check if a file is a license text file."""
return (
scan_results["percentage_of_license_text"] >=
REQUIRED_PERCENTAGE_OF_LICENSE_TEXT)
# files we ignore in scan results
IGNORED = [
".scanignore",
"package.xml",
"setup.py",
"setup.cfg",
"CMakeLists.txt",
".git/*"
]


def get_spdx_license_name(scan_results: Dict[str, Any]) -> Optional[str]:
"""Get the SPDX license name from scan results."""
if scan_results['percentage_of_license_text'] \
>= REQUIRED_PERCENTAGE_OF_LICENSE_TEXT:
return scan_results['detected_license_expression_spdx']
return None


def get_ignored_content(pkg_abspath: str) -> List[str]:
"""Return all ignored patterns from '.scanignore'
and local IGNORED definition."""
ignored_content: List[str] = []
scanignore_path = pkg_abspath + "/.scanignore"
if os.path.exists(scanignore_path):
with open(scanignore_path, 'r', encoding="utf-8") as f:
for line in f:
line_contents = line.split('#')
ignore_pattern = line_contents[0].rstrip()
if len(ignore_pattern) > 0:
ignored_content.append(ignore_pattern)
f.close()
for pattern in IGNORED:
if pattern not in ignored_content:
ignored_content.append(pattern)
return ignored_content
24 changes: 21 additions & 3 deletions src/ros_license_toolkit/license_tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import os
import xml.etree.ElementTree as ET
from glob import glob
from typing import List, Optional, Set
from typing import Any, Dict, List, Optional, Set

from spdx.config import LICENSE_MAP

Expand Down Expand Up @@ -60,20 +60,31 @@ def _eval_glob(glob_str: str, pkg_path: str) -> Set[str]:
class LicenseTag:
"""A license tag found in a package.xml file."""

def __init__(self, element: ET.Element, pkg_path: str):
def __init__(self, element: ET.Element,
pkg_path: str,
license_file_scan_results: Optional[Dict[str, Any]] = None):
"""Initialize a license tag from an XML element."""
self.element = element
assert self.element.text is not None, "License tag must have text."

raw_license_name: str = str(self.element.text)
# Name of the license (in SPDX tag format for comparability)
raw_license_name: str = str(self.element.text)

# If the tag is wrong (like BSD) but the actual license can
# be found out through declaration, this field contains the tag
self.id_from_license_text: Optional[str] = None

try:
self.id = to_spdx_license_tag(raw_license_name)
except ValueError:
# If the license name is not in the SPDX list,
# we assume it is a custom license and use the name as-is.
# This will be detected in `LicenseTagIsInSpdxListCheck`.
self.id = raw_license_name
# If a file is linked to the tag, set its id for internal checks
if license_file_scan_results:
self.id_from_license_text = \
get_id_from_license_text(license_file_scan_results)

# Path to the file containing the license text
# (relative to package root)
Expand Down Expand Up @@ -139,3 +150,10 @@ def make_this_the_main_license(self, other_licenses: List["LicenseTag"]):
continue
source_files -= other_license.source_files
self._source_files = source_files


def get_id_from_license_text(license_file_scan_result: Dict[str, Any]) -> str:
"""Return the detected license id from the license declaration"""
if 'detected_license_expression_spdx' in license_file_scan_result:
return license_file_scan_result['detected_license_expression_spdx']
return ''
53 changes: 31 additions & 22 deletions src/ros_license_toolkit/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,29 +26,12 @@
from rospkg.common import PACKAGE_FILE
from scancode.api import get_licenses

from ros_license_toolkit.common import (REQUIRED_PERCENTAGE_OF_LICENSE_TEXT,
is_license_text_file)
from ros_license_toolkit.common import (get_ignored_content,
get_spdx_license_name)
from ros_license_toolkit.copyright import get_copyright_strings_per_pkg
from ros_license_toolkit.license_tag import LicenseTag
from ros_license_toolkit.repo import NotARepoError, Repo

# files we ignore in scan results
IGNORED = [
"package.xml",
"setup.py",
"setup.cfg",
"CMakeLists.txt",
".git/*",
]


def get_spdx_license_name(scan_results: Dict[str, Any]) -> Optional[str]:
"""Get the SPDX license name from scan results."""
if scan_results['percentage_of_license_text'] >=\
REQUIRED_PERCENTAGE_OF_LICENSE_TEXT:
return scan_results['detected_license_expression_spdx']
return None


class PackageException(Exception):
"""Exception raised when a package is invalid."""
Expand Down Expand Up @@ -101,6 +84,9 @@ def __init__(self, path: str, repo: Optional[Repo] = None):
# be found out through declaration, this field contains the tag
self.inofficial_license_tag: Dict[str, str] = {}

# All ignored files and folders
self._ignored_content: List[str] = get_ignored_content(self.abspath)

def _get_path_relative_to_pkg(self, path: str) -> str:
"""Get path relative to pkg root"""
return os.path.relpath(path, self.abspath)
Expand Down Expand Up @@ -134,7 +120,7 @@ def _run_scan_and_save_results(self):
for (root, _, files) in os.walk(self.abspath):
files_rel_to_pkg = [self._get_path_relative_to_pkg(
os.path.join(root, f)) for f in files]
for pattern in IGNORED:
ant-u marked this conversation as resolved.
Show resolved Hide resolved
for pattern in self._ignored_content:
matched = fnmatch.filter(files_rel_to_pkg, pattern)
ant-u marked this conversation as resolved.
Show resolved Hide resolved
for m in matched:
files_rel_to_pkg.remove(m)
Expand All @@ -143,7 +129,7 @@ def _run_scan_and_save_results(self):
fpath = os.path.join(self.abspath, fname)
# Path relative to package root
scan_results = get_licenses(fpath)
if is_license_text_file(scan_results):
if get_spdx_license_name(scan_results):
self._found_license_texts[fname
] = scan_results
else:
Expand Down Expand Up @@ -210,18 +196,41 @@ def _check_single_license_tag_without_file_attribute(self):
tag.license_text_file = potential_license_files[0]
break

def _check_for_single_tag_without_file(self):
"""Set the id_from_license_text if only one tag and one
declaration exist."""
if len(self._license_tags) == 1 and len(self.found_license_texts) == 1:
license_tag_key = next(iter(self._license_tags.keys()))
id_from_text = self._license_tags[
license_tag_key].id_from_license_text
if id_from_text is None:
only_file_id = self.found_license_texts[
next(iter(self.found_license_texts))][
'detected_license_expression_spdx']
self._license_tags[license_tag_key].id_from_license_text = \
only_file_id

@property
def license_tags(self) -> Dict[str, LicenseTag]:
"""Get all license tags in the package.xml file."""
if self._license_tags is not None:
return self._license_tags
self._license_tags = {}
for license_tag in self.package_xml.iterfind('license'):
tag = LicenseTag(license_tag, self.abspath)
license_file_scan_result = None
if 'file' in license_tag.attrib:
license_file = license_tag.attrib['file']
if license_file in self.found_license_texts:
license_file_scan_result = \
self.found_license_texts[license_file]
license_file_scan_result['filename'] = license_file
tag = LicenseTag(license_tag,
self.abspath, license_file_scan_result)
self._license_tags[tag.get_license_id()] = tag

self._check_single_license_tag_without_source_files()
self._check_single_license_tag_without_file_attribute()
self._check_for_single_tag_without_file()

return self._license_tags

Expand Down
11 changes: 7 additions & 4 deletions src/ros_license_toolkit/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import git
from scancode.api import get_licenses

from ros_license_toolkit.common import is_license_text_file
from ros_license_toolkit.common import get_spdx_license_name

# how many folders up to search for a repo
REPO_SEARCH_DEPTH = 5
Expand Down Expand Up @@ -80,13 +80,16 @@ def __init__(self, package_path: str):
if not os.path.isfile(fpath):
continue
scan_results = get_licenses(fpath)
if is_license_text_file(scan_results):
self.license_text_files[fpath] = scan_results
if get_spdx_license_name(scan_results):
if 'ros_license_toolkit/LICENSE' not in fpath:
self.license_text_files[fpath] = scan_results

# get the remote url
self.remote_url: Optional[str] = None
if len(repo.remotes) > 0:
self.remote_url = repo.remotes[0].url
# Ignore package
if 'ros_license_toolkit' not in repo.remotes[0].url:
self.remote_url = repo.remotes[0].url

def __eq__(self, __o) -> bool:
"""Check if two repos are the same."""
Expand Down
Loading
Loading