Skip to content

Commit

Permalink
Initial implementation of --package-only
Browse files Browse the repository at this point in the history
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
  • Loading branch information
AyanSinhaMahapatra committed Mar 5, 2024
1 parent 66d7166 commit 44b2734
Show file tree
Hide file tree
Showing 7 changed files with 180 additions and 20 deletions.
11 changes: 8 additions & 3 deletions src/packagedcode/bower.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class BowerJsonHandler(models.DatafileHandler):
documentation_url = 'https://bower.io'

@classmethod
def parse(cls, location):
def parse(cls, location, package_only=False):
with io.open(location, encoding='utf-8') as loc:
package_data = json.load(loc)

Expand Down Expand Up @@ -87,7 +87,12 @@ def parse(cls, location):
)
)

yield models.PackageData(
if package_only:
package_klass = models.PackageDataOnly
else:
package_klass = models.PackageData

yield package_klass(
datasource_id=cls.datasource_id,
type=cls.default_package_type,
name=name,
Expand All @@ -98,5 +103,5 @@ def parse(cls, location):
parties=parties,
homepage_url=homepage_url,
vcs_url=vcs_url,
dependencies=dependencies
dependencies=dependencies,
)
11 changes: 11 additions & 0 deletions src/packagedcode/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -905,6 +905,17 @@ def get_license_detections_and_expression(self):
)


class PackageDataOnly(PackageData):
"""
PackageData class which skips the license/copyright detection during instance
creation.
"""

def __attrs_post_init__(self):
if self.extracted_license_statement and not isinstance(self.extracted_license_statement, str):
self.extracted_license_statement = saneyaml.dump(self.extracted_license_statement)


def get_default_relation_license(datasource_id):
from packagedcode import HANDLER_BY_DATASOURCE_ID
handler = HANDLER_BY_DATASOURCE_ID[datasource_id]
Expand Down
29 changes: 23 additions & 6 deletions src/packagedcode/plugin_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ def get_available_package_parsers(docs=False):
return all_data_packages



@scan_impl
class PackageScanner(ScanPlugin):
"""
Expand Down Expand Up @@ -161,7 +160,19 @@ class PackageScanner(ScanPlugin):
help_group=SCAN_GROUP,
sort_order=21,
),

PluggableCommandLineOption(
(
'--package-only',
),
is_flag=True,
default=False,
help=(
'Only detect package information and skip license/copyright detection steps, '
'in application package and dependency manifests, lockfiles and related data.'
),
help_group=SCAN_GROUP,
sort_order=22,
),
PluggableCommandLineOption(
('--list-packages',),
is_flag=True,
Expand All @@ -172,10 +183,10 @@ class PackageScanner(ScanPlugin):
),
]

def is_enabled(self, package, system_package, **kwargs):
return package or system_package
def is_enabled(self, package, system_package, package_only, **kwargs):
return package or system_package or package_only

def get_scanner(self, package=True, system_package=False, **kwargs):
def get_scanner(self, package=True, system_package=False, package_only=False, **kwargs):
"""
Return a scanner callable to scan a file for package data.
"""
Expand All @@ -185,9 +196,10 @@ def get_scanner(self, package=True, system_package=False, **kwargs):
get_package_data,
application=package,
system=system_package,
package_only=package_only,
)

def process_codebase(self, codebase, strip_root=False, **kwargs):
def process_codebase(self, codebase, strip_root=False, package_only=False, **kwargs):
"""
Populate the ``codebase`` top level ``packages`` and ``dependencies``
with package and dependency instances, assembling parsed package data
Expand All @@ -196,6 +208,11 @@ def process_codebase(self, codebase, strip_root=False, **kwargs):
Also perform additional package license detection that depends on either
file license detection or the package detections.
"""
# If we only want purls, we want to skip both the package
# assembly and the extra package license detection steps
if package_only:
return

has_licenses = hasattr(codebase.root, 'license_detections')

# These steps add proper license detections to package_data and hence
Expand Down
14 changes: 10 additions & 4 deletions src/packagedcode/recognize.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def recognize_package_data(
location,
application=True,
system=False,
package_only=False,
):
"""
Return a list of Package objects if any package_data were recognized for
Expand All @@ -55,19 +56,24 @@ def recognize_package_data(
if not filetype.is_file(location):
return []

assert application or system
if application and system:
assert application or system or package_only
if package_only or (application and system):
datafile_handlers = ALL_DATAFILE_HANDLERS
elif application:
datafile_handlers = APPLICATION_PACKAGE_DATAFILE_HANDLERS
elif system:
datafile_handlers = SYSTEM_PACKAGE_DATAFILE_HANDLERS

return list(_parse(location, datafile_handlers=datafile_handlers))
return list(_parse(
location=location,
package_only=package_only,
datafile_handlers=datafile_handlers,
))


def _parse(
location,
package_only=False,
datafile_handlers=APPLICATION_PACKAGE_DATAFILE_HANDLERS,
):
"""
Expand All @@ -85,7 +91,7 @@ def _parse(
logger_debug(f'_parse:.is_datafile: {location}')

try:
for parsed in handler.parse(location):
for parsed in handler.parse(location=location, package_only=package_only):
if TRACE:
logger_debug(f' _parse: parsed: {parsed!r}')
yield parsed
Expand Down
10 changes: 6 additions & 4 deletions src/scancode/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,20 +248,21 @@ def get_licenses(
SCANCODE_DEBUG_PACKAGE_API = os.environ.get('SCANCODE_DEBUG_PACKAGE_API', False)


def _get_package_data(location, application=True, system=False, **kwargs):
def _get_package_data(location, application=True, system=False, package_only=False, **kwargs):
"""
Return a mapping of package manifest information detected in the file at ``location``.
Include ``application`` packages (such as pypi) and/or ``system`` packages.
Note that all exceptions are caught if there are any errors while parsing a
package manifest.
"""
assert application or system
assert application or system or package_only

This comment has been minimized.

Copy link
@pombredanne

pombredanne Mar 5, 2024

Contributor

These are not related at all IMHO, are they?

This comment has been minimized.

Copy link
@AyanSinhaMahapatra

AyanSinhaMahapatra Mar 5, 2024

Author Contributor

Yeah, we can run application and system package scan together, if required, but the --package-only can't be run with the other scan options. See the code in recognize.py for choosing datafile handlers.

from packagedcode.recognize import recognize_package_data
try:
return recognize_package_data(
location=location,
application=application,
system=system
system=system,
package_only=package_only,
) or []

except Exception as e:
Expand Down Expand Up @@ -291,7 +292,7 @@ def get_package_info(location, **kwargs):
return dict(packages=[p.to_dict() for p in packages])


def get_package_data(location, application=True, system=False, **kwargs):
def get_package_data(location, application=True, system=False, package_only=False, **kwargs):
"""
Return a mapping of package manifest information detected in the file at
`location`.
Expand All @@ -304,6 +305,7 @@ def get_package_data(location, application=True, system=False, **kwargs):
location=location,
application=application,
system=system,
package_only=package_only,
**kwargs,
) or []

Expand Down
113 changes: 113 additions & 0 deletions tests/packagedcode/data/bower/scan-package-only-expected.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
{
"packages": [],
"dependencies": [],
"files": [
{
"path": "scan",
"type": "directory",
"package_data": [],
"for_packages": [],
"scan_errors": []
},
{
"path": "scan/bower.json",
"type": "file",
"package_data": [
{
"type": "bower",
"namespace": null,
"name": "John Doe",
"version": null,
"qualifiers": {},
"subpath": null,
"primary_language": null,
"description": "Physics-like animations for pretty particles",
"release_date": null,
"parties": [
{
"type": null,
"role": "author",
"name": "Betty Beta <[email protected]>",
"email": null,
"url": null
},
{
"type": null,
"role": "author",
"name": "John Doe",
"email": "[email protected]",
"url": "http://johndoe.com"
}
],
"keywords": [
"motion",
"physics",
"particles"
],
"homepage_url": null,
"download_url": null,
"size": null,
"sha1": null,
"md5": null,
"sha256": null,
"sha512": null,
"bug_tracking_url": null,
"code_view_url": null,
"vcs_url": null,
"copyright": null,
"holder": null,
"declared_license_expression": null,
"declared_license_expression_spdx": null,
"license_detections": [],
"other_license_expression": null,
"other_license_expression_spdx": null,
"other_license_detections": [],
"extracted_license_statement": "- MIT\n- Apache 2.0\n- BSD-3-Clause\n",
"notice_text": null,
"source_packages": [],
"file_references": [],
"extra_data": {},
"dependencies": [
{
"purl": "pkg:bower/get-size",
"extracted_requirement": "~1.2.2",
"scope": "dependencies",
"is_runtime": true,
"is_optional": false,
"is_resolved": false,
"resolved_package": {},
"extra_data": {}
},
{
"purl": "pkg:bower/eventEmitter",
"extracted_requirement": "~4.2.11",
"scope": "dependencies",
"is_runtime": true,
"is_optional": false,
"is_resolved": false,
"resolved_package": {},
"extra_data": {}
},
{
"purl": "pkg:bower/qunit",
"extracted_requirement": "~1.16.0",
"scope": "devDependencies",
"is_runtime": false,
"is_optional": true,
"is_resolved": false,
"resolved_package": {},
"extra_data": {}
}
],
"repository_homepage_url": null,
"repository_download_url": null,
"api_data_url": null,
"datasource_id": "bower_json",
"purl": "pkg:bower/John%20Doe"
}
],
"for_packages": [],
"scan_errors": []
}
]
}
12 changes: 9 additions & 3 deletions tests/packagedcode/test_bower.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from packagedcode import bower
from packages_test_utils import PackageTester
from scancode_config import REGEN_TEST_FIXTURES
from scancode.cli_test_utils import check_json_scan
from scancode.cli_test_utils import run_scan_click


class TestBower(PackageTester):
Expand Down Expand Up @@ -40,11 +42,15 @@ def test_parse_bower_json_author_objects(self):
self.check_packages_data(package, expected_loc, regen=REGEN_TEST_FIXTURES)

def test_end2end_bower_scan_is_moved_to_parent(self):
from scancode.cli_test_utils import check_json_scan
from scancode.cli_test_utils import run_scan_click

test_file = self.get_test_loc('bower/scan')
expected_file = self.get_test_loc('bower/scan-expected.json')
result_file = self.get_temp_file('results.json')
run_scan_click(['--package', test_file, '--json-pp', result_file])
check_json_scan(expected_file, result_file, regen=REGEN_TEST_FIXTURES)

def test_end2end_bower_scan_is_moved_to_parent_package_only(self):
test_file = self.get_test_loc('bower/scan')
expected_file = self.get_test_loc('bower/scan-package-only-expected.json')
result_file = self.get_temp_file('results.json')
run_scan_click(['--package-only', test_file, '--json-pp', result_file])
check_json_scan(expected_file, result_file, regen=REGEN_TEST_FIXTURES)

0 comments on commit 44b2734

Please sign in to comment.