Skip to content

Commit

Permalink
Adjust package license detections from sibling files
Browse files Browse the repository at this point in the history
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
  • Loading branch information
AyanSinhaMahapatra committed Jul 31, 2024
1 parent d9bf0bd commit a798ccc
Show file tree
Hide file tree
Showing 11 changed files with 198 additions and 2,687 deletions.
35 changes: 22 additions & 13 deletions src/packagedcode/licensing.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from licensedcode import query

from packagedcode.utils import combine_expressions
from packagedcode.models import PackageData
from summarycode.classify import check_resource_name_start_and_end
from summarycode.classify import LEGAL_STARTS_ENDS
from summarycode.classify import README_STARTS_ENDS
Expand Down Expand Up @@ -351,32 +352,40 @@ def add_license_from_sibling_file(resource, codebase):
if not resource.is_file:
return

package_data = resource.package_data
if not package_data:
package_data_mappings = resource.package_data
if not package_data_mappings:
return

for pkg in package_data:
for pkg in package_data_mappings:
pkg_license_detections = pkg["license_detections"]
if pkg_license_detections:
return

license_detections, license_expression = get_license_detections_from_sibling_file(
resource=resource,
codebase=codebase,
)
package_data_mapping = resource.package_data[0]
package_data = PackageData.from_data(package_data_mapping)
license_detections = None

# We do not want to get licenses detections populated from sibling files
# for package manifests which are not the primary package manifests, without
# purls (example: dependency lockfiles/requirements/other build manifests)
if package_data.purl and package_data.can_assemble:
license_detections, license_expression = get_license_detections_from_sibling_file(
resource=resource,
codebase=codebase,
)

if not license_detections:
return

package = resource.package_data[0]
package["license_detections"] = license_detections
package["declared_license_expression"] = license_expression
package["declared_license_expression_spdx"] = str(build_spdx_license_expression(
license_expression=package["declared_license_expression"],
package_data_mapping["license_detections"] = license_detections
package_data_mapping["declared_license_expression"] = license_expression
package_data_mapping["declared_license_expression_spdx"] = str(build_spdx_license_expression(
license_expression=package_data_mapping["declared_license_expression"],
licensing=get_cache().licensing,
))

codebase.save_resource(resource)
return package
return package_data_mapping


def is_legal_or_readme(resource):
Expand Down
49 changes: 35 additions & 14 deletions src/packagedcode/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
except ImportError:
licensing = None

from packagedcode.licensing import get_declared_license_expression_spdx

"""
This module contain data models for package and dependencies, abstracting and
Expand Down Expand Up @@ -755,27 +754,23 @@ def from_data(cls, package_data, package_only=False):
Skip the license/copyright detection step if `package_only` is True.
"""
if "purl" in package_data:
package_data.pop("purl")
package_mapping = package_data.copy()
if "purl" in package_mapping:
package_mapping.pop("purl")

package_data = cls(**package_data)
package_data_obj = cls(**package_mapping)

if not package_only:
package_data.populate_license_fields()
package_data.populate_holder_field()
package_data_obj.populate_license_fields()
package_data_obj.populate_holder_field()
else:
package_data.normalize_extracted_license_statement()
package_data_obj.normalize_extracted_license_statement()

return package_data
return package_data_obj

@property
def can_assemble(self):
from packagedcode import HANDLER_BY_DATASOURCE_ID
handler = HANDLER_BY_DATASOURCE_ID.get(self.datasource_id)
if issubclass(handler, NonAssemblableDatafileHandler):
return False

return True
return is_from_assemblable_handler(self.datasource_id)

def normalize_extracted_license_statement(self):
"""
Expand Down Expand Up @@ -836,6 +831,8 @@ def populate_license_fields(self):
object, and add the declared_license_expression (and the spdx expression)
and corresponding LicenseDetection data.
"""
from packagedcode.licensing import get_declared_license_expression_spdx

if not self.declared_license_expression and self.extracted_license_statement:

self.license_detections, self.declared_license_expression = \
Expand Down Expand Up @@ -976,6 +973,22 @@ def get_license_detections_and_expression(self):
)


def is_from_assemblable_handler(datasource_id):
"""
Return True if the corresponding datafile handler for a
`datasource_id` can be assembled in a package instance.
"""
if not datasource_id:
return False

from packagedcode import HANDLER_BY_DATASOURCE_ID
handler = HANDLER_BY_DATASOURCE_ID.get(datasource_id)
if issubclass(handler, NonAssemblableDatafileHandler):
return False

return True


def get_default_relation_license(datasource_id):
from packagedcode import HANDLER_BY_DATASOURCE_ID
handler = HANDLER_BY_DATASOURCE_ID.get(datasource_id, None)
Expand Down Expand Up @@ -1471,6 +1484,8 @@ def populate_license_fields(cls, package_data):
object, and add the declared_license_expression (and the spdx expression)
and corresponding LicenseDetection data.
"""
from packagedcode.licensing import get_declared_license_expression_spdx

if not package_data.declared_license_expression and package_data.extracted_license_statement:

package_data.license_detections, package_data.declared_license_expression = \
Expand Down Expand Up @@ -1762,6 +1777,12 @@ def update(
return True

def refresh_license_expressions(self, default_relation='AND'):
"""
Re-populate the declared and other license expressions from the
license detections and other license detections for a package.
"""
from packagedcode.licensing import get_declared_license_expression_spdx

if self.license_detections:
self.declared_license_expression = str(combine_expressions(
expressions=[
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1042,7 +1042,7 @@
"identifier": "apache_2_0-08479bef-4de5-8be8-0987-1bec0c232b20",
"license_expression": "apache-2.0",
"license_expression_spdx": "Apache-2.0",
"detection_count": 3,
"detection_count": 1,
"reference_matches": [
{
"license_expression": "apache-2.0",
Expand Down Expand Up @@ -1774,31 +1774,9 @@
"vcs_url": "",
"copyright": null,
"holder": null,
"declared_license_expression": "apache-2.0",
"declared_license_expression_spdx": "Apache-2.0",
"license_detections": [
{
"license_expression": "apache-2.0",
"license_expression_spdx": "Apache-2.0",
"matches": [
{
"license_expression": "apache-2.0",
"spdx_license_expression": "Apache-2.0",
"from_file": "many-podspecs/amplify-ios.LICENSE",
"start_line": 2,
"end_line": 175,
"matcher": "1-hash",
"score": 100.0,
"matched_length": 1405,
"match_coverage": 100.0,
"rule_relevance": 100,
"rule_identifier": "apache-2.0_70.RULE",
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_70.RULE"
}
],
"identifier": "apache_2_0-08479bef-4de5-8be8-0987-1bec0c232b20"
}
],
"declared_license_expression": null,
"declared_license_expression_spdx": null,
"license_detections": [],
"other_license_expression": null,
"other_license_expression_spdx": null,
"other_license_detections": [],
Expand Down Expand Up @@ -1855,31 +1833,9 @@
"vcs_url": null,
"copyright": null,
"holder": null,
"declared_license_expression": "apache-2.0",
"declared_license_expression_spdx": "Apache-2.0",
"license_detections": [
{
"license_expression": "apache-2.0",
"license_expression_spdx": "Apache-2.0",
"matches": [
{
"license_expression": "apache-2.0",
"spdx_license_expression": "Apache-2.0",
"from_file": "many-podspecs/amplify-ios.LICENSE",
"start_line": 2,
"end_line": 175,
"matcher": "1-hash",
"score": 100.0,
"matched_length": 1405,
"match_coverage": 100.0,
"rule_relevance": 100,
"rule_identifier": "apache-2.0_70.RULE",
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/apache-2.0_70.RULE"
}
],
"identifier": "apache_2_0-08479bef-4de5-8be8-0987-1bec0c232b20"
}
],
"declared_license_expression": null,
"declared_license_expression_spdx": null,
"license_detections": [],
"other_license_expression": null,
"other_license_expression_spdx": null,
"other_license_detections": [],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,25 @@
]
},
{
"identifier": "mit-cacd5c0c-204a-85c2-affc-e4c125b2492a",
"identifier": "mit-56f9dd7c-a466-cdf0-4fe0-6e57d31bc32a",
"license_expression": "mit",
"license_expression_spdx": "MIT",
"detection_count": 2,
"detection_count": 1,
"reference_matches": [
{
"license_expression": "unknown-license-reference",
"license_expression_spdx": "LicenseRef-scancode-unknown-license-reference",
"from_file": "pypi-with-test-manifests/PKG-INFO",
"start_line": 26,
"end_line": 26,
"matcher": "2-aho",
"score": 100.0,
"matched_length": 3,
"match_coverage": 100.0,
"rule_relevance": 100,
"rule_identifier": "unknown-license-reference_see_license_at_manifest_1.RULE",
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/unknown-license-reference_see_license_at_manifest_1.RULE"
},
{
"license_expression": "mit",
"license_expression_spdx": "MIT",
Expand All @@ -139,25 +153,11 @@
]
},
{
"identifier": "mit-56f9dd7c-a466-cdf0-4fe0-6e57d31bc32a",
"identifier": "mit-cacd5c0c-204a-85c2-affc-e4c125b2492a",
"license_expression": "mit",
"license_expression_spdx": "MIT",
"detection_count": 1,
"reference_matches": [
{
"license_expression": "unknown-license-reference",
"license_expression_spdx": "LicenseRef-scancode-unknown-license-reference",
"from_file": "pypi-with-test-manifests/PKG-INFO",
"start_line": 26,
"end_line": 26,
"matcher": "2-aho",
"score": 100.0,
"matched_length": 3,
"match_coverage": 100.0,
"rule_relevance": 100,
"rule_identifier": "unknown-license-reference_see_license_at_manifest_1.RULE",
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/unknown-license-reference_see_license_at_manifest_1.RULE"
},
{
"license_expression": "mit",
"license_expression_spdx": "MIT",
Expand Down Expand Up @@ -904,31 +904,9 @@
"vcs_url": null,
"copyright": null,
"holder": null,
"declared_license_expression": "mit",
"declared_license_expression_spdx": "MIT",
"license_detections": [
{
"license_expression": "mit",
"license_expression_spdx": "MIT",
"matches": [
{
"license_expression": "mit",
"spdx_license_expression": "MIT",
"from_file": "pypi-with-test-manifests/LICENSE",
"start_line": 3,
"end_line": 19,
"matcher": "2-aho",
"score": 100.0,
"matched_length": 161,
"match_coverage": 100.0,
"rule_relevance": 100,
"rule_identifier": "mit.LICENSE",
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/mit.LICENSE"
}
],
"identifier": "mit-cacd5c0c-204a-85c2-affc-e4c125b2492a"
}
],
"declared_license_expression": null,
"declared_license_expression_spdx": null,
"license_detections": [],
"other_license_expression": null,
"other_license_expression_spdx": null,
"other_license_detections": [],
Expand Down
Loading

0 comments on commit a798ccc

Please sign in to comment.