diff --git a/src/packagedcode/bower.py b/src/packagedcode/bower.py index fe8c197cc87..69b7ae352c2 100644 --- a/src/packagedcode/bower.py +++ b/src/packagedcode/bower.py @@ -25,7 +25,7 @@ class BowerJsonHandler(models.DatafileHandler): documentation_url = 'https://bower.io' @classmethod - def parse(cls, location): + def parse(cls, location, package_only=False): with io.open(location, encoding='utf-8') as loc: package_data = json.load(loc) @@ -87,7 +87,12 @@ def parse(cls, location): ) ) - yield models.PackageData( + if package_only: + package_klass = models.PackageDataOnly + else: + package_klass = models.PackageData + + yield package_klass( datasource_id=cls.datasource_id, type=cls.default_package_type, name=name, @@ -98,5 +103,5 @@ def parse(cls, location): parties=parties, homepage_url=homepage_url, vcs_url=vcs_url, - dependencies=dependencies + dependencies=dependencies, ) diff --git a/src/packagedcode/models.py b/src/packagedcode/models.py index 8311e570e42..9d0ad4a8077 100644 --- a/src/packagedcode/models.py +++ b/src/packagedcode/models.py @@ -905,6 +905,17 @@ def get_license_detections_and_expression(self): ) +class PackageDataOnly(PackageData): + """ + PackageData class which skips the license/copyright detection during instance + creation. + """ + + def __attrs_post_init__(self): + if self.extracted_license_statement and not isinstance(self.extracted_license_statement, str): + self.extracted_license_statement = saneyaml.dump(self.extracted_license_statement) + + def get_default_relation_license(datasource_id): from packagedcode import HANDLER_BY_DATASOURCE_ID handler = HANDLER_BY_DATASOURCE_ID[datasource_id] diff --git a/src/packagedcode/plugin_package.py b/src/packagedcode/plugin_package.py index 78c5464c48d..95655f74eaa 100644 --- a/src/packagedcode/plugin_package.py +++ b/src/packagedcode/plugin_package.py @@ -112,7 +112,6 @@ def get_available_package_parsers(docs=False): return all_data_packages - @scan_impl class PackageScanner(ScanPlugin): """ @@ -161,7 +160,19 @@ class PackageScanner(ScanPlugin): help_group=SCAN_GROUP, sort_order=21, ), - + PluggableCommandLineOption( + ( + '--package-only', + ), + is_flag=True, + default=False, + help=( + 'Only detect package information and skip license/copyright detection steps, ' + 'in application package and dependency manifests, lockfiles and related data.' + ), + help_group=SCAN_GROUP, + sort_order=22, + ), PluggableCommandLineOption( ('--list-packages',), is_flag=True, @@ -172,10 +183,10 @@ class PackageScanner(ScanPlugin): ), ] - def is_enabled(self, package, system_package, **kwargs): - return package or system_package + def is_enabled(self, package, system_package, package_only, **kwargs): + return package or system_package or package_only - def get_scanner(self, package=True, system_package=False, **kwargs): + def get_scanner(self, package=True, system_package=False, package_only=False, **kwargs): """ Return a scanner callable to scan a file for package data. """ @@ -185,9 +196,10 @@ def get_scanner(self, package=True, system_package=False, **kwargs): get_package_data, application=package, system=system_package, + package_only=package_only, ) - def process_codebase(self, codebase, strip_root=False, **kwargs): + def process_codebase(self, codebase, strip_root=False, package_only=False, **kwargs): """ Populate the ``codebase`` top level ``packages`` and ``dependencies`` with package and dependency instances, assembling parsed package data @@ -196,6 +208,11 @@ def process_codebase(self, codebase, strip_root=False, **kwargs): Also perform additional package license detection that depends on either file license detection or the package detections. """ + # If we only want purls, we want to skip both the package + # assembly and the extra package license detection steps + if package_only: + return + has_licenses = hasattr(codebase.root, 'license_detections') # These steps add proper license detections to package_data and hence diff --git a/src/packagedcode/recognize.py b/src/packagedcode/recognize.py index c7e794ecaf6..1162cc1b054 100644 --- a/src/packagedcode/recognize.py +++ b/src/packagedcode/recognize.py @@ -44,6 +44,7 @@ def recognize_package_data( location, application=True, system=False, + package_only=False, ): """ Return a list of Package objects if any package_data were recognized for @@ -55,19 +56,24 @@ def recognize_package_data( if not filetype.is_file(location): return [] - assert application or system - if application and system: + assert application or system or package_only + if package_only or (application and system): datafile_handlers = ALL_DATAFILE_HANDLERS elif application: datafile_handlers = APPLICATION_PACKAGE_DATAFILE_HANDLERS elif system: datafile_handlers = SYSTEM_PACKAGE_DATAFILE_HANDLERS - return list(_parse(location, datafile_handlers=datafile_handlers)) + return list(_parse( + location=location, + package_only=package_only, + datafile_handlers=datafile_handlers, + )) def _parse( location, + package_only=False, datafile_handlers=APPLICATION_PACKAGE_DATAFILE_HANDLERS, ): """ @@ -85,7 +91,7 @@ def _parse( logger_debug(f'_parse:.is_datafile: {location}') try: - for parsed in handler.parse(location): + for parsed in handler.parse(location=location, package_only=package_only): if TRACE: logger_debug(f' _parse: parsed: {parsed!r}') yield parsed diff --git a/src/scancode/api.py b/src/scancode/api.py index 7d3edbf1516..36cc4bc2ed8 100644 --- a/src/scancode/api.py +++ b/src/scancode/api.py @@ -248,20 +248,21 @@ def get_licenses( SCANCODE_DEBUG_PACKAGE_API = os.environ.get('SCANCODE_DEBUG_PACKAGE_API', False) -def _get_package_data(location, application=True, system=False, **kwargs): +def _get_package_data(location, application=True, system=False, package_only=False, **kwargs): """ Return a mapping of package manifest information detected in the file at ``location``. Include ``application`` packages (such as pypi) and/or ``system`` packages. Note that all exceptions are caught if there are any errors while parsing a package manifest. """ - assert application or system + assert application or system or package_only from packagedcode.recognize import recognize_package_data try: return recognize_package_data( location=location, application=application, - system=system + system=system, + package_only=package_only, ) or [] except Exception as e: @@ -291,7 +292,7 @@ def get_package_info(location, **kwargs): return dict(packages=[p.to_dict() for p in packages]) -def get_package_data(location, application=True, system=False, **kwargs): +def get_package_data(location, application=True, system=False, package_only=False, **kwargs): """ Return a mapping of package manifest information detected in the file at `location`. @@ -304,6 +305,7 @@ def get_package_data(location, application=True, system=False, **kwargs): location=location, application=application, system=system, + package_only=package_only, **kwargs, ) or [] diff --git a/tests/packagedcode/data/bower/scan-package-only-expected.json b/tests/packagedcode/data/bower/scan-package-only-expected.json new file mode 100644 index 00000000000..e739c38fa0c --- /dev/null +++ b/tests/packagedcode/data/bower/scan-package-only-expected.json @@ -0,0 +1,113 @@ +{ + "packages": [], + "dependencies": [], + "files": [ + { + "path": "scan", + "type": "directory", + "package_data": [], + "for_packages": [], + "scan_errors": [] + }, + { + "path": "scan/bower.json", + "type": "file", + "package_data": [ + { + "type": "bower", + "namespace": null, + "name": "John Doe", + "version": null, + "qualifiers": {}, + "subpath": null, + "primary_language": null, + "description": "Physics-like animations for pretty particles", + "release_date": null, + "parties": [ + { + "type": null, + "role": "author", + "name": "Betty Beta ", + "email": null, + "url": null + }, + { + "type": null, + "role": "author", + "name": "John Doe", + "email": "john@doe.com", + "url": "http://johndoe.com" + } + ], + "keywords": [ + "motion", + "physics", + "particles" + ], + "homepage_url": null, + "download_url": null, + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": null, + "copyright": null, + "holder": null, + "declared_license_expression": null, + "declared_license_expression_spdx": null, + "license_detections": [], + "other_license_expression": null, + "other_license_expression_spdx": null, + "other_license_detections": [], + "extracted_license_statement": "- MIT\n- Apache 2.0\n- BSD-3-Clause\n", + "notice_text": null, + "source_packages": [], + "file_references": [], + "extra_data": {}, + "dependencies": [ + { + "purl": "pkg:bower/get-size", + "extracted_requirement": "~1.2.2", + "scope": "dependencies", + "is_runtime": true, + "is_optional": false, + "is_resolved": false, + "resolved_package": {}, + "extra_data": {} + }, + { + "purl": "pkg:bower/eventEmitter", + "extracted_requirement": "~4.2.11", + "scope": "dependencies", + "is_runtime": true, + "is_optional": false, + "is_resolved": false, + "resolved_package": {}, + "extra_data": {} + }, + { + "purl": "pkg:bower/qunit", + "extracted_requirement": "~1.16.0", + "scope": "devDependencies", + "is_runtime": false, + "is_optional": true, + "is_resolved": false, + "resolved_package": {}, + "extra_data": {} + } + ], + "repository_homepage_url": null, + "repository_download_url": null, + "api_data_url": null, + "datasource_id": "bower_json", + "purl": "pkg:bower/John%20Doe" + } + ], + "for_packages": [], + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/tests/packagedcode/test_bower.py b/tests/packagedcode/test_bower.py index e2185dfa1cb..cd3337af2a2 100644 --- a/tests/packagedcode/test_bower.py +++ b/tests/packagedcode/test_bower.py @@ -12,6 +12,8 @@ from packagedcode import bower from packages_test_utils import PackageTester from scancode_config import REGEN_TEST_FIXTURES +from scancode.cli_test_utils import check_json_scan +from scancode.cli_test_utils import run_scan_click class TestBower(PackageTester): @@ -40,11 +42,15 @@ def test_parse_bower_json_author_objects(self): self.check_packages_data(package, expected_loc, regen=REGEN_TEST_FIXTURES) def test_end2end_bower_scan_is_moved_to_parent(self): - from scancode.cli_test_utils import check_json_scan - from scancode.cli_test_utils import run_scan_click - test_file = self.get_test_loc('bower/scan') expected_file = self.get_test_loc('bower/scan-expected.json') result_file = self.get_temp_file('results.json') run_scan_click(['--package', test_file, '--json-pp', result_file]) check_json_scan(expected_file, result_file, regen=REGEN_TEST_FIXTURES) + + def test_end2end_bower_scan_is_moved_to_parent_package_only(self): + test_file = self.get_test_loc('bower/scan') + expected_file = self.get_test_loc('bower/scan-package-only-expected.json') + result_file = self.get_temp_file('results.json') + run_scan_click(['--package-only', test_file, '--json-pp', result_file]) + check_json_scan(expected_file, result_file, regen=REGEN_TEST_FIXTURES)