Skip to content

Commit

Permalink
Support scanning .dsc and copyright files
Browse files Browse the repository at this point in the history
Supports scanning .dsc and _copyright files
from debain package and metadata archives for
package metadata.

Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
  • Loading branch information
AyanSinhaMahapatra committed Jan 30, 2024
1 parent 4f3cca0 commit c5dd035
Show file tree
Hide file tree
Showing 9 changed files with 13,005 additions and 688 deletions.
10 changes: 9 additions & 1 deletion src/packagedcode/debian.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,11 +197,19 @@ def parse(cls, location):
location=location,
remove_pgp_signature=True,
)
yield build_package_data(

package_data_from_file = build_package_data_from_package_filename(
filename=os.path.basename(location),
datasource_id=cls.datasource_id,
package_type=cls.default_package_type,
)
package_data = build_package_data(
debian_data=debian_data,
datasource_id=cls.datasource_id,
package_type=cls.default_package_type,
)
package_data.update_purl_fields(package_data=package_data_from_file)
yield package_data

@classmethod
def assign_package_to_resources(cls, package, resource, codebase, package_adder):
Expand Down
56 changes: 56 additions & 0 deletions src/packagedcode/debian_copyright.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
from debian_inspector.copyright import CopyrightLicenseParagraph
from debian_inspector.copyright import CopyrightHeaderParagraph
from debian_inspector.copyright import DebianCopyright
from debian_inspector.package import CodeMetadata
from debian_inspector.version import Version as DebVersion
from license_expression import ExpressionError
from license_expression import LicenseSymbolLike
from license_expression import Licensing
Expand Down Expand Up @@ -263,11 +265,65 @@ class StandaloneDebianCopyrightFileHandler(BaseDebianCopyrightFileHandler):
'*_copyright',
)

@classmethod
def is_datafile(cls, location, filetypes=tuple()):
return (
super().is_datafile(location, filetypes=filetypes)
and not DebianCopyrightFileInPackageHandler.is_datafile(location)
and not DebianCopyrightFileInSourceHandler.is_datafile(location)
)

@classmethod
def assemble(cls, package_data, resource, codebase, package_adder):
# assemble is the default
yield from super().assemble(package_data, resource, codebase, package_adder)

@classmethod
def parse(cls, location):
"""
Gets license/copyright information from file like
other copyright files, but also gets purl fields if
present in copyright filename, if obtained from
upstream metadata archive.
"""
package_data = list(super().parse(location)).pop()
package_data_from_file = build_package_data_from_metadata_filename(
filename=os.path.basename(location),
datasource_id=cls.datasource_id,
package_type=cls.default_package_type,
)
if package_data_from_file:
package_data.update_purl_fields(package_data=package_data_from_file)

yield package_data


def build_package_data_from_metadata_filename(filename, datasource_id, package_type):
"""
Return a PackageData built from the filename of a Debian package metadata.
"""

# TODO: we cannot know the distro from the name only
# PURLs without namespace is invalid, so we need to
# have a default value for this
distro = 'debian'
try:
deb = CodeMetadata.from_filename(filename=filename)
except ValueError:
return

version = deb.version
if isinstance(version, DebVersion):
version = str(version)

return models.PackageData(
datasource_id=datasource_id,
type=package_type,
name=deb.name,
namespace=distro,
version=version,
)


class NotReallyStructuredCopyrightFile(Exception):
"""
Expand Down
20 changes: 20 additions & 0 deletions src/packagedcode/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -784,6 +784,26 @@ def populate_license_fields(self):
if self.extracted_license_statement and not isinstance(self.extracted_license_statement, str):
self.extracted_license_statement = saneyaml.dump(self.extracted_license_statement)

def update_purl_fields(self, package_data, replace=False):

if not self.type == package_data.type:
return

purl_fields = [
"name",
"namespace",
"version",
"qualifiers"
]

for purl_field in purl_fields:
value = getattr(self, purl_field)
# We will not update only when replace is False and value is non-empty
if not replace and value:
continue

setattr(self, purl_field, getattr(package_data, purl_field))

def to_dict(self, with_details=True, **kwargs):
mapping = super().to_dict(with_details=with_details, **kwargs)
if not with_details:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
This is the Debian prepackaged version of the GNU diffutils package.
GNU `diff' was written by Mike Haertel, David Hayes, Richard Stallman,
Len Tower, and Paul Eggert. Wayne Davison designed and implemented
the unified output format. GNU `diff3' was written by Randy Smith.
GNU `sdiff' was written by Thomas Lord. GNU `cmp' was written by
Torbjorn Granlund and David MacKenzie.

The source for this release was obtained from

https://ftp.gnu.org/gnu/diffutils/diffutils-3.7.tar.xz

Program copyright and license:
=============================

Copyright (C) 1988-1996, 1998, 2001-2002, 2004, 2006-2007, 2009-2013, 2015-2018
Free Software Foundation, Inc.

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

On Debian systems, the complete text of the GNU General Public License
may be found in `/usr/share/common-licenses/GPL'.


Manual copyright and license:
============================

Copyright (C) 1992-1994, 1998, 2001-2002, 2004, 2006, 2009-2018 Free
Software Foundation, Inc.

Permission is granted to copy, distribute and/or modify this document
under the terms of the GNU Free Documentation License, Version 1.3 or
any later version published by the Free Software Foundation; with no
Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts.

On Debian systems, the complete text of the GNU Free Documentation
License may be found in `/usr/share/common-licenses/GFDL'.
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
[
{
"type": "deb",
"namespace": "debian",
"name": "diffutils",
"version": "3.7-5",
"qualifiers": {},
"subpath": null,
"primary_language": null,
"description": null,
"release_date": null,
"parties": [],
"keywords": [],
"homepage_url": null,
"download_url": null,
"size": null,
"sha1": null,
"md5": null,
"sha256": null,
"sha512": null,
"bug_tracking_url": null,
"code_view_url": null,
"vcs_url": null,
"copyright": "Copyright (c) 1988-1996, 1998, 2001-2002, 2004, 2006-2007, 2009-2013, 2015-2018 Free Software Foundation, Inc.\nCopyright (c) 1992-1994, 1998, 2001-2002, 2004, 2006, 2009-2018 Free Software Foundation, Inc.",
"holder": "Free Software Foundation, Inc.\nFree Software Foundation, Inc.",
"declared_license_expression": "gpl-3.0-plus AND gfdl-1.3-plus",
"declared_license_expression_spdx": "GPL-3.0-or-later AND GFDL-1.3-or-later",
"license_detections": [
{
"license_expression": "gpl-3.0-plus",
"license_expression_spdx": "GPL-3.0-or-later",
"matches": [
{
"license_expression": "gpl-3.0-plus",
"spdx_license_expression": "GPL-3.0-or-later",
"from_file": null,
"start_line": 18,
"end_line": 29,
"matcher": "2-aho",
"score": 100.0,
"matched_length": 100,
"match_coverage": 100.0,
"rule_relevance": 100,
"rule_identifier": "gpl-3.0-plus_234.RULE",
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gpl-3.0-plus_234.RULE",
"matched_text": "This program is free software: you can redistribute it and/or modify\n it under the terms of the GNU General Public License as published by\n the Free Software Foundation, either version 3 of the License, or\n (at your option) any later version.\n\n This program is distributed in the hope that it will be useful,\n but WITHOUT ANY WARRANTY; without even the implied warranty of\n MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n GNU General Public License for more details.\n\nOn Debian systems, the complete text of the GNU General Public License\nmay be found in `/usr/share/common-licenses/GPL'."
}
],
"identifier": "gpl_3_0_plus-5534b6bc-4eef-713f-94c7-caa583171b85"
},
{
"license_expression": "gfdl-1.3-plus",
"license_expression_spdx": "GFDL-1.3-or-later",
"matches": [
{
"license_expression": "gfdl-1.3-plus",
"spdx_license_expression": "GFDL-1.3-or-later",
"from_file": null,
"start_line": 38,
"end_line": 44,
"matcher": "2-aho",
"score": 100.0,
"matched_length": 67,
"match_coverage": 100.0,
"rule_relevance": 100,
"rule_identifier": "gfdl-1.3-plus_3.RULE",
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/gfdl-1.3-plus_3.RULE",
"matched_text": "Permission is granted to copy, distribute and/or modify this document\n under the terms of the GNU Free Documentation License, Version 1.3 or\n any later version published by the Free Software Foundation; with no\n Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts.\n\nOn Debian systems, the complete text of the GNU Free Documentation\nLicense may be found in `/usr/share/common-licenses/GFDL'."
}
],
"identifier": "gfdl_1_3_plus-42b93f1a-aad4-0749-d2f0-4655bd194e40"
}
],
"other_license_expression": null,
"other_license_expression_spdx": null,
"other_license_detections": [],
"extracted_license_statement": null,
"notice_text": null,
"source_packages": [],
"file_references": [],
"extra_data": {},
"dependencies": [],
"repository_homepage_url": null,
"repository_download_url": null,
"api_data_url": null,
"datasource_id": "debian_copyright_standalone",
"purl": "pkg:deb/debian/[email protected]"
}
]
Original file line number Diff line number Diff line change
@@ -1,5 +1,64 @@
{
"packages": [],
"packages": [
{
"type": "deb",
"namespace": "debian",
"name": "adduser",
"version": "3.118+deb11u1",
"qualifiers": {
"architecture": "all"
},
"subpath": null,
"primary_language": null,
"description": null,
"release_date": null,
"parties": [
{
"type": null,
"role": "maintainer",
"name": "Debian Adduser Developers <[email protected]>",
"email": null,
"url": null
}
],
"keywords": [],
"homepage_url": null,
"download_url": null,
"size": null,
"sha1": null,
"md5": null,
"sha256": null,
"sha512": null,
"bug_tracking_url": null,
"code_view_url": null,
"vcs_url": null,
"copyright": null,
"holder": null,
"declared_license_expression": null,
"declared_license_expression_spdx": null,
"license_detections": [],
"other_license_expression": null,
"other_license_expression_spdx": null,
"other_license_detections": [],
"extracted_license_statement": null,
"notice_text": null,
"source_packages": [
"pkg:deb/debian/adduser"
],
"extra_data": {},
"repository_homepage_url": null,
"repository_download_url": null,
"api_data_url": null,
"package_uid": "pkg:deb/debian/[email protected]%2Bdeb11u1?architecture=all&uuid=fixed-uid-done-for-testing-5642512d1758",
"datafile_paths": [
"adduser_3.118+deb11u1.dsc"
],
"datasource_ids": [
"debian_source_control_dsc"
],
"purl": "pkg:deb/debian/[email protected]%2Bdeb11u1?architecture=all"
}
],
"dependencies": [],
"files": [
{
Expand All @@ -9,7 +68,7 @@
{
"type": "deb",
"namespace": "debian",
"name": null,
"name": "adduser",
"version": "3.118+deb11u1",
"qualifiers": {
"architecture": "all"
Expand Down Expand Up @@ -58,10 +117,12 @@
"repository_download_url": null,
"api_data_url": null,
"datasource_id": "debian_source_control_dsc",
"purl": null
"purl": "pkg:deb/debian/[email protected]%2Bdeb11u1?architecture=all"
}
],
"for_packages": [],
"for_packages": [
"pkg:deb/debian/[email protected]%2Bdeb11u1?architecture=all&uuid=fixed-uid-done-for-testing-5642512d1758"
],
"scan_errors": []
}
]
Expand Down
Loading

0 comments on commit c5dd035

Please sign in to comment.