diff --git a/minecode/collectors/npm.py b/minecode/collectors/npm.py new file mode 100644 index 00000000..818e645f --- /dev/null +++ b/minecode/collectors/npm.py @@ -0,0 +1,84 @@ +import logging + +import requests +from packagedcode.npm import NpmPackageJsonHandler, npm_api_url +from packageurl import PackageURL + +from minecode import priority_router +from packagedb.models import PackageContentType + +logger = logging.getLogger(__name__) +handler = logging.StreamHandler() +logger.addHandler(handler) +logger.setLevel(logging.INFO) + + +def get_package_json(namespace, name, version): + """ + Return the contents of the package.json file of the package described by the purl + field arguments in a string. + """ + # Create URLs using purl fields + url = npm_api_url( + namespace=namespace, + name=name, + version=version, + ) + + try: + response = requests.get(url) + response.raise_for_status() + return response.json() + except requests.exceptions.HTTPError as err: + logger.error(f"HTTP error occurred: {err}") + + +def map_npm_package(package_url): + """ + Add a npm `package_url` to the PackageDB. + + Return an error string if any errors are encountered during the process + """ + from minecode.model_utils import add_package_to_scan_queue, merge_or_create_package + + package_json = get_package_json( + namespace=package_url.namespace, + name=package_url.name, + version=package_url.version, + ) + + if not package_json: + error = f"Package does not exist on npmjs: {package_url}" + logger.error(error) + return error + + package = NpmPackageJsonHandler._parse(json_data=package_json) + package.extra_data["package_content"] = PackageContentType.SOURCE_ARCHIVE + + db_package, _, _, error = merge_or_create_package(package, visit_level=0) + + # Submit package for scanning + if db_package: + add_package_to_scan_queue(db_package) + + return error + + +@priority_router.route("pkg:npm/.*") +def process_request(purl_str): + """ + Process `priority_resource_uri` containing a npm Package URL (PURL) as a + URI. + + This involves obtaining Package information for the PURL from npm and + using it to create a new PackageDB entry. The package is then added to the + scan queue afterwards. + """ + package_url = PackageURL.from_string(purl_str) + if not package_url.version: + return + + error_msg = map_npm_package(package_url) + + if error_msg: + return error_msg diff --git a/minecode/tests/test_npm.py b/minecode/tests/test_npm.py index 745aabe2..eb36ef1c 100644 --- a/minecode/tests/test_npm.py +++ b/minecode/tests/test_npm.py @@ -22,6 +22,7 @@ import packagedb from minecode import mappers from minecode import route +from minecode.collectors import npm as npm_collector from minecode.models import ResourceURI from minecode.utils_test import JsonBasedTesting from minecode.utils_test import mocked_requests_get @@ -184,7 +185,7 @@ def setUp(self): ) def test_get_package_json(self, regen=False): - json_contents = npm.get_package_json( + json_contents = npm_collector.get_package_json( namespace=self.scan_package.namespace, name=self.scan_package.name, version=self.scan_package.version @@ -198,7 +199,7 @@ def test_map_npm_package(self): package_count = packagedb.models.Package.objects.all().count() self.assertEqual(0, package_count) package_url = PackageURL.from_string(self.scan_package.purl) - npm.map_npm_package(package_url) + npm_collector.map_npm_package(package_url) package_count = packagedb.models.Package.objects.all().count() self.assertEqual(1, package_count) package = packagedb.models.Package.objects.all().first() diff --git a/minecode/visitors/npm.py b/minecode/visitors/npm.py index 26fca883..afcc342b 100644 --- a/minecode/visitors/npm.py +++ b/minecode/visitors/npm.py @@ -10,20 +10,16 @@ import logging import json -import requests from packageurl import PackageURL from packagedcode.npm import npm_api_url from packagedcode.npm import split_scoped_package_name -from packagedcode.npm import NpmPackageJsonHandler from minecode import seed from minecode import visit_router -from minecode import priority_router from minecode.visitors import NonPersistentHttpVisitor from minecode.visitors import URI -from packagedb.models import PackageContentType """ @@ -107,77 +103,3 @@ def get_uris(self, content): data=json.dumps(doc, separators=(',', ':'), ensure_ascii=False), # note: visited is True since there nothing more to visit visited=True) - - -def get_package_json(namespace, name, version): - """ - Return the contents of the package.json file of the package described by the purl - field arguments in a string. - """ - # Create URLs using purl fields - url = npm_api_url( - namespace=namespace, - name=name, - version=version, - ) - - try: - response = requests.get(url) - response.raise_for_status() - return response.json() - except requests.exceptions.HTTPError as err: - logger.error(f"HTTP error occurred: {err}") - - -def map_npm_package(package_url): - """ - Add a npm `package_url` to the PackageDB. - - Return an error string if any errors are encountered during the process - """ - from minecode.model_utils import add_package_to_scan_queue - from minecode.model_utils import merge_or_create_package - - package_json = get_package_json( - namespace = package_url.namespace, - name=package_url.name, - version=package_url.version, - ) - - if not package_json: - error = f'Package does not exist on npmjs: {package_url}' - logger.error(error) - return error - - package = NpmPackageJsonHandler._parse( - json_data=package_json - ) - package.extra_data['package_content'] = PackageContentType.SOURCE_ARCHIVE - - db_package, _, _, error = merge_or_create_package(package, visit_level=0) - - # Submit package for scanning - if db_package: - add_package_to_scan_queue(db_package) - - return error - - -@priority_router.route('pkg:npm/.*') -def process_request(purl_str): - """ - Process `priority_resource_uri` containing a npm Package URL (PURL) as a - URI. - - This involves obtaining Package information for the PURL from npm and - using it to create a new PackageDB entry. The package is then added to the - scan queue afterwards. - """ - package_url = PackageURL.from_string(purl_str) - if not package_url.version: - return - - error_msg = map_npm_package(package_url) - - if error_msg: - return error_msg