Skip to content

Commit

Permalink
Fix unknown license clues bug
Browse files Browse the repository at this point in the history
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
  • Loading branch information
AyanSinhaMahapatra committed Jul 20, 2023
1 parent 0988c72 commit edd7974
Show file tree
Hide file tree
Showing 10 changed files with 396 additions and 13 deletions.
24 changes: 14 additions & 10 deletions etc/scripts/licenses/buildrules-template.txt
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
----------------------------------------
license_expression:
relevance: 100
minimum_coverage: 90
license_expression: apache-2.0
is_license_notice: yes
is_license_text: yes
is_license_reference: yes
is_license_tag: yes
is_false_positive: yes
is_license_intro: yes
is_license_clue: yes
referenced_filenames:
notes:
- ASL2.0
notes: seen in woodstox
---
This copy of is licensed under the
{{Apache (Software) License, version 2.0 ("the License")}}.
See the License for details about distribution rights, and the
specific rights regarding derivate works.

You may obtain a copy of the License at:

http://www.apache.org/licenses/

A copy is also included in the downloadable source code package
containing , in file {{"ASL2.0"}}, under the same directory
as this file.
----------------------------------------
license_expression:
relevance: 100
Expand Down
7 changes: 7 additions & 0 deletions src/licensedcode/data/rules/license-clue_1.RULE
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
license_expression: unknown-license-reference
is_license_clue: yes
notes: Seen in woodstox
---

This product currently only contains code developed by authors of specific components, as identified by the source code files.
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
---
license_expression: unknown-license-reference
is_license_reference: yes
is_deprecated: yes
notes: Seen in woodstox
---

Expand Down
28 changes: 25 additions & 3 deletions src/licensedcode/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -927,10 +927,21 @@ def is_undetected_license_matches(license_matches):
return True


def is_correct_detection_non_unknown(license_matches):
"""
Return True if all the matches in ``license_matches`` List of LicenseMatch
are correct/perfect license detections and also there aren't any unknowns.
"""
return (
is_correct_detection(license_matches)
and not has_unknown_matches(license_matches)
)


def is_correct_detection(license_matches):
"""
Return True if all the matches in ``license_matches`` List of LicenseMatch
are correct license detections.
are perfect license detections.
"""
matchers = (license_match.matcher for license_match in license_matches)
is_match_coverage_perfect = [
Expand All @@ -940,7 +951,7 @@ def is_correct_detection(license_matches):

return (
all(matcher in ("1-hash", "1-spdx-id", "2-aho") for matcher in matchers)
and all(is_match_coverage_perfect) and not has_unknown_matches(license_matches)
and all(is_match_coverage_perfect)
)


Expand Down Expand Up @@ -1088,6 +1099,14 @@ def is_unknown_intro(license_match):
)


def has_correct_license_clue_matches(license_matches):
"""
Return True if all the matches in ``license_matches`` List of LicenseMatch
has True for the `is_license_clue` rule attribute.
"""
return is_correct_detection(license_matches) and all(match.rule.is_license_clue for match in license_matches)


def is_license_clues(license_matches):
"""
Return True if the license_matches are not part of a correct
Expand Down Expand Up @@ -1480,9 +1499,12 @@ def analyze_detection(license_matches, package_license=False):
return DetectionCategory.FALSE_POSITVE.value

# Case where all matches have `matcher` as `1-hash` or `4-spdx-id`
elif is_correct_detection(license_matches=license_matches):
elif is_correct_detection_non_unknown(license_matches=license_matches):
return DetectionCategory.PERFECT_DETECTION.value

elif has_correct_license_clue_matches(license_matches=license_matches):
return DetectionCategory.LICENSE_CLUES.value

# Case where even though the matches have perfect coverage, they have
# matches with `unknown` rule identifiers
elif has_unknown_matches(license_matches=license_matches):
Expand Down
293 changes: 293 additions & 0 deletions tests/licensedcode/data/plugin_license/clues/woodstox.expected.json

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions tests/licensedcode/data/plugin_license/clues/woodstox/ASL2.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/


12 changes: 12 additions & 0 deletions tests/licensedcode/data/plugin_license/clues/woodstox/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
This copy of Woodstox XML processor is licensed under the
Apache (Software) License, version 2.0 ("the License").
See the License for details about distribution rights, and the
specific rights regarding derivate works.

You may obtain a copy of the License at:

http://www.apache.org/licenses/

A copy is also included in the downloadable source code package
containing Woodstox, in file "ASL2.0", under the same directory
as this file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Manifest-Version: 1.0
Ant-Version: Apache Ant 1.6.5
Created-By: 1.5.0_14-b03 (Sun Microsystems Inc.)
Built-By: tatu
Specification-Title: StAX 1.0 API
Specification-Version: 1.0
Specification-Vendor: http://jcp.org/en/jsr/detail?id=173
Implementation-Title: WoodSToX XML-processor
Implementation-Version: 3.2.8
Implementation-Vendor: woodstox.codehaus.org

8 changes: 8 additions & 0 deletions tests/licensedcode/data/plugin_license/clues/woodstox/NOTICE
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
This product currently only contains code developed by authors
of specific components, as identified by the source code files.

Since product implements StAX API, it has dependencies to StAX API
classes.

For additional credits (generally to people who reported problems)
see CREDITS file.
19 changes: 19 additions & 0 deletions tests/licensedcode/test_plugin_license_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,25 @@ def test_license_match_unknown_license_intro_with_dual_license():
check_json_scan(test_loc, result_file, regen=REGEN_TEST_FIXTURES)


def test_license_match_unknown_clues_is_not_in_expression():
test_dir = test_env.get_test_loc('plugin_license/clues/woodstox/', copy=True)
result_file = test_env.get_temp_file('json')
args = [
'--license',
'--license-text',
'--license-text-diagnostics',
'--license-diagnostics',
'--license-references',
'--strip-root',
'--verbose',
'--json', result_file,
test_dir,
]
run_scan_click(args)
test_loc = test_env.get_test_loc('plugin_license/clues/woodstox.expected.json')
check_json_scan(test_loc, result_file, regen=REGEN_TEST_FIXTURES)


def test_license_match_unknown_license_intro_eclipse_foundation():
test_dir = test_env.get_test_loc('plugin_license/unknown_intro/scan-unknown-intro-eclipse-foundation/', copy=True)
result_file = test_env.get_temp_file('json')
Expand Down

0 comments on commit edd7974

Please sign in to comment.