From 84831f3dcb2058fb4989b3b5a23283dfbc537bd9 Mon Sep 17 00:00:00 2001 From: Ayan Sinha Mahapatra Date: Thu, 20 Jul 2023 19:24:27 +0530 Subject: [PATCH] Add new license rule model attributes To the license Rule class: - Adds is_license_clue attribute - Adds is_deprecated attribute Also implements related processing. Signed-off-by: Ayan Sinha Mahapatra --- etc/scripts/licenses/buildrules-template.txt | 126 ++++++++++++------ etc/scripts/licenses/buildrules.py | 2 + etc/scripts/licenses/report_license_rules.py | 2 + src/licensedcode/models.py | 48 ++++++- src/packagedcode/jar_manifest.py | 2 +- src/summarycode/score.py | 2 + ...e-reference-works-with-clues.expected.json | 19 +++ ...-matched-text-with-reference.expected.json | 4 + .../scan-with-reference.expected.json | 4 + 9 files changed, 164 insertions(+), 45 deletions(-) diff --git a/etc/scripts/licenses/buildrules-template.txt b/etc/scripts/licenses/buildrules-template.txt index f6edea21012..f4f6565e088 100644 --- a/etc/scripts/licenses/buildrules-template.txt +++ b/etc/scripts/licenses/buildrules-template.txt @@ -7,7 +7,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -21,7 +22,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -35,7 +37,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -49,7 +52,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -63,7 +67,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -77,7 +82,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -91,7 +97,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -105,7 +112,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -119,7 +127,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -133,7 +142,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -147,7 +157,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -161,7 +172,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -175,7 +187,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -189,7 +202,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -203,7 +217,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -217,7 +232,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -231,7 +247,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -245,7 +262,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -259,7 +277,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -273,7 +292,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -287,7 +307,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -301,7 +322,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -315,7 +337,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -329,7 +352,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -343,7 +367,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -357,7 +382,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -371,7 +397,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -385,7 +412,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -399,7 +427,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -413,7 +442,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -427,7 +457,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -441,7 +472,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -455,7 +487,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -469,7 +502,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -483,7 +517,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -497,7 +532,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -511,7 +547,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -525,7 +562,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -539,7 +577,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -553,7 +592,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -567,7 +607,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- @@ -581,7 +622,8 @@ is_license_text: yes is_license_reference: yes is_license_tag: yes is_false_positive: yes -is_license_inro: yes +is_license_intro: yes +is_license_clue: yes referenced_filenames: notes: --- diff --git a/etc/scripts/licenses/buildrules.py b/etc/scripts/licenses/buildrules.py index 999fb770f12..3f2277a3964 100644 --- a/etc/scripts/licenses/buildrules.py +++ b/etc/scripts/licenses/buildrules.py @@ -222,6 +222,8 @@ def cli(licenses_file): base_name = "false-positive" elif rule.is_license_intro: base_name = "license-intro" + elif rule.is_license_clue: + base_name = "license-clue" else: base_name = rule.license_expression diff --git a/etc/scripts/licenses/report_license_rules.py b/etc/scripts/licenses/report_license_rules.py index a15d5d367f5..8e8ff04abfb 100644 --- a/etc/scripts/licenses/report_license_rules.py +++ b/etc/scripts/licenses/report_license_rules.py @@ -61,6 +61,8 @@ "is_license_tag", "is_license_reference", "is_license_intro", + "is_license_clue", + "is_deprecated", "has_unknown", "only_known_words", "notes", diff --git a/src/licensedcode/models.py b/src/licensedcode/models.py index 92a32a36287..5c522c65a35 100644 --- a/src/licensedcode/models.py +++ b/src/licensedcode/models.py @@ -1189,7 +1189,12 @@ def get_license_tokens(): yield 'licensed' -def load_rules(rules_data_dir=rules_data_dir, with_checks=True, is_builtin=True): +def load_rules( + rules_data_dir=rules_data_dir, + with_checks=True, + is_builtin=True, + ignore_deprecated=True, +): """ Return an iterable of rules loaded from rule files in ``rules_data_dir``. Optionally check for consistency if ``with_checks`` is True. @@ -1211,7 +1216,12 @@ def load_rules(rules_data_dir=rules_data_dir, with_checks=True, is_builtin=True) space_problems.append(rule_file) try: - yield Rule.from_file(rule_file=rule_file) + rule = Rule.from_file(rule_file=rule_file) + if rule.is_deprecated and ignore_deprecated: + continue + else: + yield rule + except Exception as re: if with_checks: model_errors.append(str(re)) @@ -1387,6 +1397,22 @@ class BasicRule: 'after. Mutually exclusive from any other is_license_* flag') ) + is_license_clue = attr.ib( + default=False, + repr=False, + metadata=dict( + help='True if this is rule text is a clue to a license ' + 'but cannot be considered in a proper license detection ' + 'as a license text/notice/reference/tag/intro as it is' + 'merely a clue and does not actually point to or refer to ' + 'the actual license directly. This is still valuable information ' + 'useful in determining the license/origin of a file, but this ' + 'should not be summarized/present in the license expression for ' + 'a package or a file, nor its list of license detections. ' + 'considered in the context of the detection that it precedes. ' + 'Mutually exclusive from any other is_license_* flag') + ) + is_false_positive = attr.ib( default=False, repr=False, @@ -1505,6 +1531,19 @@ class BasicRule: 'built at runtime, such as an SPDX license rule.') ) + is_deprecated = attr.ib( + default=False, + repr=False, + metadata=dict( + help='Flag set to True if this rule is deleted, ' + 'and not to be used anymore in license detection. ' + 'This happens usually when a rule is renamed/assigned ' + 'to a seperate license-expression, promoted to being a ' + 'license text or just plain retired. This is used to ' + 'preserve the link to the rule, and therefore make links ' + 'to rules as permanent.') + ) + ########################################################################### # lists of clues that can be ignored when detected in this license as they # are part of the license or rule text itself @@ -1769,6 +1808,7 @@ def validate(self, licensing=None, thorough=False): self.is_license_reference, self.is_license_tag, self.is_license_intro, + self.is_license_clue, ) has_license_flags = any(license_flags) @@ -1924,6 +1964,7 @@ def to_reference(self): data['is_license_reference'] = self.is_license_reference data['is_license_tag'] = self.is_license_tag data['is_license_intro'] = self.is_license_intro + data['is_license_clue'] = self.is_license_clue data['is_continuous'] = self.is_continuous data['is_builtin'] = self.is_builtin data['is_from_license'] = self.is_from_license @@ -1961,6 +2002,7 @@ def to_dict(self, include_text=False): 'is_license_reference', 'is_license_tag', 'is_license_intro', + 'is_license_clue', 'is_continuous', ) @@ -2253,7 +2295,9 @@ def load(self, rule_file, with_checks=True): self.is_license_tag = data.get('is_license_tag', False) self.is_license_reference = data.get('is_license_reference', False) self.is_license_intro = data.get('is_license_intro', False) + self.is_license_clue = data.get('is_license_clue', False) self.is_continuous = data.get('is_continuous', False) + self.is_deprecated = data.get('is_deprecated', False) self.referenced_filenames = data.get('referenced_filenames', []) or [] diff --git a/src/packagedcode/jar_manifest.py b/src/packagedcode/jar_manifest.py index adc2a6882f7..48df5373eb1 100644 --- a/src/packagedcode/jar_manifest.py +++ b/src/packagedcode/jar_manifest.py @@ -216,7 +216,7 @@ def dget(s): name = i_title or am_nm or ext_nm or nm descriptions = [s_title, i_title, nm] - datasource_id =get_datasource_id(package_type=package_type) + datasource_id = get_datasource_id(package_type=package_type) descriptions = unique(descriptions) descriptions = [d for d in descriptions if d and d.strip() and d != name] diff --git a/src/summarycode/score.py b/src/summarycode/score.py index 26b271e7dfd..b0fe826fa85 100644 --- a/src/summarycode/score.py +++ b/src/summarycode/score.py @@ -250,6 +250,7 @@ class LicenseFilter(object): is_license_tag=LicenseFilter(min_coverage=100), is_license_reference=LicenseFilter(min_score=50, min_coverage=100), is_license_intro=LicenseFilter(min_score=100, min_coverage=100), + is_license_clue=LicenseFilter(min_score=100, min_coverage=100), ) @@ -268,6 +269,7 @@ def is_good_license(license_match_object): ('is_license_reference', license_match_object.rule.is_license_reference), ('is_license_tag', license_match_object.rule.is_license_tag), ('is_license_intro', license_match_object.rule.is_license_intro), + ('is_license_clue', license_match_object.rule.is_license_clue), ] ) matched = False diff --git a/tests/licensedcode/data/licenses_reference_reporting/license-reference-works-with-clues.expected.json b/tests/licensedcode/data/licenses_reference_reporting/license-reference-works-with-clues.expected.json index 559d7d3d47e..d2c8fd87484 100644 --- a/tests/licensedcode/data/licenses_reference_reporting/license-reference-works-with-clues.expected.json +++ b/tests/licensedcode/data/licenses_reference_reporting/license-reference-works-with-clues.expected.json @@ -643,6 +643,7 @@ "is_license_reference": false, "is_license_tag": false, "is_license_intro": false, + "is_license_clue": false, "is_continuous": false, "is_builtin": true, "is_from_license": false, @@ -669,6 +670,7 @@ "is_license_reference": false, "is_license_tag": false, "is_license_intro": false, + "is_license_clue": false, "is_continuous": false, "is_builtin": true, "is_from_license": false, @@ -695,6 +697,7 @@ "is_license_reference": false, "is_license_tag": false, "is_license_intro": false, + "is_license_clue": false, "is_continuous": false, "is_builtin": true, "is_from_license": false, @@ -721,6 +724,7 @@ "is_license_reference": false, "is_license_tag": false, "is_license_intro": false, + "is_license_clue": false, "is_continuous": false, "is_builtin": true, "is_from_license": true, @@ -747,6 +751,7 @@ "is_license_reference": false, "is_license_tag": false, "is_license_intro": false, + "is_license_clue": false, "is_continuous": false, "is_builtin": true, "is_from_license": false, @@ -773,6 +778,7 @@ "is_license_reference": true, "is_license_tag": false, "is_license_intro": false, + "is_license_clue": false, "is_continuous": false, "is_builtin": true, "is_from_license": false, @@ -799,6 +805,7 @@ "is_license_reference": true, "is_license_tag": false, "is_license_intro": false, + "is_license_clue": false, "is_continuous": false, "is_builtin": true, "is_from_license": false, @@ -825,6 +832,7 @@ "is_license_reference": false, "is_license_tag": false, "is_license_intro": true, + "is_license_clue": false, "is_continuous": false, "is_builtin": true, "is_from_license": false, @@ -851,6 +859,7 @@ "is_license_reference": true, "is_license_tag": false, "is_license_intro": false, + "is_license_clue": false, "is_continuous": false, "is_builtin": true, "is_from_license": false, @@ -877,6 +886,7 @@ "is_license_reference": false, "is_license_tag": false, "is_license_intro": false, + "is_license_clue": false, "is_continuous": false, "is_builtin": true, "is_from_license": false, @@ -905,6 +915,7 @@ "is_license_reference": false, "is_license_tag": false, "is_license_intro": false, + "is_license_clue": false, "is_continuous": false, "is_builtin": true, "is_from_license": false, @@ -941,6 +952,7 @@ "is_license_reference": true, "is_license_tag": false, "is_license_intro": false, + "is_license_clue": false, "is_continuous": false, "is_builtin": true, "is_from_license": false, @@ -967,6 +979,7 @@ "is_license_reference": false, "is_license_tag": false, "is_license_intro": false, + "is_license_clue": false, "is_continuous": false, "is_builtin": true, "is_from_license": true, @@ -993,6 +1006,7 @@ "is_license_reference": false, "is_license_tag": false, "is_license_intro": false, + "is_license_clue": false, "is_continuous": false, "is_builtin": true, "is_from_license": false, @@ -1030,6 +1044,7 @@ "is_license_reference": true, "is_license_tag": false, "is_license_intro": false, + "is_license_clue": false, "is_continuous": false, "is_builtin": true, "is_from_license": false, @@ -1058,6 +1073,7 @@ "is_license_reference": false, "is_license_tag": false, "is_license_intro": false, + "is_license_clue": false, "is_continuous": false, "is_builtin": true, "is_from_license": false, @@ -1084,6 +1100,7 @@ "is_license_reference": false, "is_license_tag": false, "is_license_intro": false, + "is_license_clue": false, "is_continuous": false, "is_builtin": true, "is_from_license": true, @@ -1120,6 +1137,7 @@ "is_license_reference": false, "is_license_tag": false, "is_license_intro": false, + "is_license_clue": false, "is_continuous": false, "is_builtin": true, "is_from_license": true, @@ -1150,6 +1168,7 @@ "is_license_reference": false, "is_license_tag": false, "is_license_intro": false, + "is_license_clue": false, "is_continuous": false, "is_builtin": true, "is_from_license": false, diff --git a/tests/licensedcode/data/licenses_reference_reporting/scan-matched-text-with-reference.expected.json b/tests/licensedcode/data/licenses_reference_reporting/scan-matched-text-with-reference.expected.json index 71be31b2270..8b5ef3c5d2b 100644 --- a/tests/licensedcode/data/licenses_reference_reporting/scan-matched-text-with-reference.expected.json +++ b/tests/licensedcode/data/licenses_reference_reporting/scan-matched-text-with-reference.expected.json @@ -287,6 +287,7 @@ "is_license_reference": false, "is_license_tag": false, "is_license_intro": false, + "is_license_clue": false, "is_continuous": false, "is_builtin": true, "is_from_license": false, @@ -317,6 +318,7 @@ "is_license_reference": false, "is_license_tag": true, "is_license_intro": false, + "is_license_clue": false, "is_continuous": true, "is_builtin": true, "is_from_license": false, @@ -343,6 +345,7 @@ "is_license_reference": false, "is_license_tag": true, "is_license_intro": false, + "is_license_clue": false, "is_continuous": false, "is_builtin": true, "is_from_license": false, @@ -369,6 +372,7 @@ "is_license_reference": false, "is_license_tag": true, "is_license_intro": false, + "is_license_clue": false, "is_continuous": false, "is_builtin": true, "is_from_license": false, diff --git a/tests/licensedcode/data/licenses_reference_reporting/scan-with-reference.expected.json b/tests/licensedcode/data/licenses_reference_reporting/scan-with-reference.expected.json index d503931d0a8..b4947f047b8 100644 --- a/tests/licensedcode/data/licenses_reference_reporting/scan-with-reference.expected.json +++ b/tests/licensedcode/data/licenses_reference_reporting/scan-with-reference.expected.json @@ -284,6 +284,7 @@ "is_license_reference": false, "is_license_tag": false, "is_license_intro": false, + "is_license_clue": false, "is_continuous": false, "is_builtin": true, "is_from_license": false, @@ -314,6 +315,7 @@ "is_license_reference": false, "is_license_tag": true, "is_license_intro": false, + "is_license_clue": false, "is_continuous": true, "is_builtin": true, "is_from_license": false, @@ -340,6 +342,7 @@ "is_license_reference": false, "is_license_tag": true, "is_license_intro": false, + "is_license_clue": false, "is_continuous": false, "is_builtin": true, "is_from_license": false, @@ -366,6 +369,7 @@ "is_license_reference": false, "is_license_tag": true, "is_license_intro": false, + "is_license_clue": false, "is_continuous": false, "is_builtin": true, "is_from_license": false,