Skip to content

Commit

Permalink
Add new --purl option to only get purls
Browse files Browse the repository at this point in the history
Adds a new option --purl to only parse and return in
package data the purl fields, in package scan.

Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
  • Loading branch information
AyanSinhaMahapatra committed Sep 27, 2023
1 parent a916a6b commit 1ee41b6
Show file tree
Hide file tree
Showing 37 changed files with 1,287 additions and 805 deletions.
11 changes: 10 additions & 1 deletion src/packagedcode/about.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class AboutFileHandler(models.DatafileHandler):
documentation_url = 'https://aboutcode-toolkit.readthedocs.io/en/latest/specification.html'

@classmethod
def parse(cls, location):
def parse(cls, location, purl_only=False):
"""
Yield one or more Package manifest objects given a file ``location`` pointing to a
package archive, manifest or similar.
Expand All @@ -71,6 +71,15 @@ def parse(cls, location):

name = package_data.get('name')
version = package_data.get('version')
if purl_only:
yield models.PackageData(
datasource_id=cls.datasource_id,
type=package_type,
namespace=package_ns,
name=name,
version=version,
)
return

homepage_url = package_data.get('home_url') or package_data.get('homepage_url')
download_url = package_data.get('download_url')
Expand Down
82 changes: 59 additions & 23 deletions src/packagedcode/alpine.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,12 @@ class AlpineInstalledDatabaseHandler(models.DatafileHandler):
description = 'Alpine Linux installed package database'

@classmethod
def parse(cls, location):
def parse(cls, location, purl_only=False):
yield from parse_alpine_installed_db(
location=location,
datasource_id=cls.datasource_id,
package_type=cls.default_package_type,
purl_only=purl_only,
)

@classmethod
Expand Down Expand Up @@ -134,9 +135,10 @@ class AlpineApkbuildHandler(models.DatafileHandler):
documentation_url = 'https://wiki.alpinelinux.org/wiki/APKBUILD_Reference'

@classmethod
def parse(cls, location):
package_data = parse_apkbuild(location, strict=True)
cls.populate_license_fields(package_data)
def parse(cls, location, purl_only=False):
package_data = parse_apkbuild(location, strict=True, purl_only=purl_only)
if not purl_only:
cls.populate_license_fields(package_data)
if package_data:
yield package_data

Expand Down Expand Up @@ -165,7 +167,12 @@ def assign_package_to_resources(cls, package, resource, codebase, package_adder)
)


def parse_alpine_installed_db(location, datasource_id, package_type):
def parse_alpine_installed_db(
location,
datasource_id,
package_type,
purl_only=False,
):
"""
Yield PackageData objects from an installed database file at `location`
or None. Typically found at '/lib/apk/db/installed' in an Alpine
Expand All @@ -179,6 +186,7 @@ def parse_alpine_installed_db(location, datasource_id, package_type):
package_fields=package_fields,
datasource_id=datasource_id,
package_type=package_type,
purl_only=purl_only,
)


Expand Down Expand Up @@ -241,7 +249,7 @@ def get_alpine_installed_db_fields(location):
])


def parse_apkbuild(location, strict=False):
def parse_apkbuild(location, strict=False, purl_only=False):
"""
Return a PackageData object from an APKBUILD file at ``location`` or None.
Expand All @@ -256,6 +264,7 @@ def parse_apkbuild(location, strict=False):
datasource_id=AlpineApkbuildHandler.datasource_id,
package_type=AlpineApkbuildHandler.default_package_type,
strict=strict,
purl_only=purl_only,
)


Expand Down Expand Up @@ -732,7 +741,13 @@ def fix_apkbuild(text):
return text


def parse_apkbuild_text(text, datasource_id, package_type, strict=False):
def parse_apkbuild_text(
text,
datasource_id,
package_type,
strict=False,
purl_only=False
):
"""
Return a PackageData object from an APKBUILD text context or None. Only
consider variables with a name listed in the ``names`` set.
Expand Down Expand Up @@ -761,7 +776,8 @@ def parse_apkbuild_text(text, datasource_id, package_type, strict=False):
package = build_package_data(
variables,
datasource_id=datasource_id,
package_type=package_type
package_type=package_type,
purl_only=purl_only,
)

if package and unresolved:
Expand Down Expand Up @@ -800,7 +816,12 @@ def parse_pkginfo(location):
raise NotImplementedError


def build_package_data(package_fields, datasource_id, package_type):
def build_package_data(
package_fields,
datasource_id,
package_type,
purl_only=False
):
"""
Return a PackageData object from a ``package_fields`` iterable of (name,
value) tuples.
Expand Down Expand Up @@ -832,10 +853,17 @@ def build_package_data(package_fields, datasource_id, package_type):
'type': package_type,
}
for name, value in package_fields:
handler = package_handlers_by_field_name.get(name)
handler = package_handlers_by_field_name_purl_only.get(name)
if not purl_only and not handler:
handler = package_handlers_by_field_name_others.get(name)

if handler:
try:
converted = handler(value, all_fields=all_fields, **converted_fields)
converted = handler(
value,
all_fields=all_fields,
**converted_fields
)
except:
raise Exception(*list(package_fields))

Expand Down Expand Up @@ -1199,11 +1227,11 @@ def source_handler(value, **kwargs):
# mapping of:
# - the package field one letter name in the installed db,
# - an handler for that field
package_handlers_by_field_name = {
package_handlers_by_field_name_purl_only = {

############################################################################
# per-package fields
############################################################################
###########################################################################
# per-package fields (only purl fields)
###########################################################################

# name of the package
# For example: P:busybox
Expand All @@ -1218,6 +1246,22 @@ def source_handler(value, **kwargs):
'V': build_name_value_str_handler('version'),
'pkgver': apkbuild_version_handler,

# For example: D:scanelf so:libc.musl-x86_64.so.1
# For example: D:so:libc.musl-x86_64.so.1 so:libcrypto.so.1.1 so:libssl.so.1.1 so:libz.so.1
# Can occur more than once
# 'depend' in .PKGINFO and APKBUILD
# TODO: add other dependencies (e.g. makedepends)
'D': D_dependencies_handler,
'depend': D_dependencies_handler,
}


package_handlers_by_field_name_others = {

###########################################################################
# per-package fields (other than purls)
###########################################################################

# For example: T:Size optimized toolbox of many common UNIX utilities
# 'pkgdesc' in .PKGINFO and APKBUILD
'T': build_name_value_str_handler('description'),
Expand Down Expand Up @@ -1272,14 +1316,6 @@ def source_handler(value, **kwargs):
'c': c_git_commit_handler,
'commit': c_git_commit_handler,

# For example: D:scanelf so:libc.musl-x86_64.so.1
# For example: D:so:libc.musl-x86_64.so.1 so:libcrypto.so.1.1 so:libssl.so.1.1 so:libz.so.1
# Can occur more than once
# 'depend' in .PKGINFO and APKBUILD
# TODO: add other dependencies (e.g. makedepends)
'D': D_dependencies_handler,
'depend': D_dependencies_handler,

# For example: source="http://liba52.sourceforge.net/files/$pkgname-$pkgver.tar.gz
# automake.patch
# fix-globals-test-x86-pie.patch"
Expand Down
84 changes: 45 additions & 39 deletions src/packagedcode/bower.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,42 +25,13 @@ class BowerJsonHandler(models.DatafileHandler):
documentation_url = 'https://bower.io'

@classmethod
def parse(cls, location):
def parse(cls, location, purl_only=False):
with io.open(location, encoding='utf-8') as loc:
package_data = json.load(loc)

# note: having no name is not a problem for private packages. See #1514
name = package_data.get('name')

description = package_data.get('description')
version = package_data.get('version')
extracted_license_statement = package_data.get('license')
keywords = package_data.get('keywords') or []

parties = []

authors = package_data.get('authors') or []
for author in authors:
if isinstance(author, dict):
name = author.get('name')
email = author.get('email')
url = author.get('homepage')
party = models.Party(name=name, role='author', email=email, url=url)
parties.append(party)
elif isinstance(author, str):
parties.append(models.Party(name=author, role='author'))
else:
parties.append(models.Party(name=repr(author), role='author'))

homepage_url = package_data.get('homepage')

repository = package_data.get('repository') or {}
repo_type = repository.get('type')
repo_url = repository.get('url')

vcs_url = None
if repo_type and repo_url:
vcs_url = f'{repo_type}+{repo_url}'

deps = package_data.get('dependencies') or {}
dependencies = []
Expand All @@ -86,17 +57,52 @@ def parse(cls, location):
is_optional=True,
)
)

yield models.PackageData(
pkg = models.PackageData(
datasource_id=cls.datasource_id,
type=cls.default_package_type,
name=name,
description=description,
version=version,
extracted_license_statement=extracted_license_statement,
keywords=keywords,
parties=parties,
homepage_url=homepage_url,
vcs_url=vcs_url,
dependencies=dependencies
dependencies=dependencies,
)
if purl_only:
yield pkg
return

description = package_data.get('description')
extracted_license_statement = package_data.get('license')
keywords = package_data.get('keywords') or []

parties = []

authors = package_data.get('authors') or []
for author in authors:
if isinstance(author, dict):
name = author.get('name')
email = author.get('email')
url = author.get('homepage')
party = models.Party(name=name, role='author', email=email, url=url)
parties.append(party)
elif isinstance(author, str):
parties.append(models.Party(name=author, role='author'))
else:
parties.append(models.Party(name=repr(author), role='author'))

homepage_url = package_data.get('homepage')

repository = package_data.get('repository') or {}
repo_type = repository.get('type')
repo_url = repository.get('url')

vcs_url = None
if repo_type and repo_url:
vcs_url = f'{repo_type}+{repo_url}'

pkg.description = description
pkg.primary_language = BowerJsonHandler.default_primary_language
pkg.extracted_license_statement = extracted_license_statement
pkg.keywords = keywords
pkg.parties = parties
pkg.homepage_url = homepage_url
pkg.vcs_url = vcs_url
yield pkg
Loading

0 comments on commit 1ee41b6

Please sign in to comment.