diff --git a/src/packagedcode/models.py b/src/packagedcode/models.py index 42795bb3f30..26c6b6d0c8c 100644 --- a/src/packagedcode/models.py +++ b/src/packagedcode/models.py @@ -374,10 +374,11 @@ class DependentPackage(ModelMixin): is_direct = Boolean( default=True, label='is direct flag', - help='True if this dependency version requirement is ' - 'a direct dependency relation between two packages ' - 'as opposed to a transitive dependency relation, ' - 'which are present in lockfiles/dependency list.') + help='True if this is a direct, first-level dependency, ' + 'defined in the manifest of a package. False if this ' + 'is an indirect, transitive dependency resolved from ' + 'first level dependencies.' + ) resolved_package = Mapping( label='resolved package data', @@ -692,18 +693,20 @@ class PackageData(IdentifiablePackageData): is_private = Boolean( default=False, - label='is resolved flag', - help='True if the associated package for this package manifest ' - 'is never meant to be published to the corresponding package ' - 'repository, and is a private package.' + label='is private flag', + help='True if this is a private package, either not meant to be ' + 'published on a repository, and/or a local package without a ' + 'name and version used primarily to track dependencies and ' + 'other information, and build this package, for instance with ' + 'JavaScript and PHP applications.' ) is_virtual = Boolean( default=False, label='is virtual flag', - help='True if this package or any of its files are not present in ' - 'the codebase, but this package was created from a resolved ' - 'package, typically present in a lockfile.' + help='True if this package is created only from a manifest or lockfile, ' + 'and not from its actual packaged code. The files of this package ' + 'are not present in the codebase.' ) extra_data = Mapping( diff --git a/src/packagedcode/npm.py b/src/packagedcode/npm.py index 6085c67fb55..864ee39f0c7 100644 --- a/src/packagedcode/npm.py +++ b/src/packagedcode/npm.py @@ -25,6 +25,8 @@ from packagedcode.utils import yield_dependencies_from_package_data from packagedcode.utils import yield_dependencies_from_package_resource from packagedcode.utils import update_dependencies_as_resolved +from packagedcode.utils import is_path_pattern +from packagedcode.utils import is_simple_path_pattern import saneyaml """ @@ -299,8 +301,7 @@ def get_workspace_members(cls, workspaces, codebase, workspace_root_path): for workspace_path in workspaces: # Case 1: A definite path, instead of a pattern (only one package.json) - if '*' not in workspace_path: - + if is_path_pattern(workspace_path): workspace_dir_path = os.path.join(workspace_root_path, workspace_path) workspace_member_path = os.path.join(workspace_dir_path, 'package.json') workspace_member = codebase.get_resource(path=workspace_member_path) @@ -310,8 +311,8 @@ def get_workspace_members(cls, workspaces, codebase, workspace_root_path): # Case 2: we have glob path which is a directory, relative to the workspace root # Here we have only one * at the last (This is an optimization, this is a very # commonly encountered subcase of case 3) - elif '*' == workspace_path[-1] and '*' not in workspace_path.replace('*', ''): - workspace_pattern_prefix = workspace_path.replace('*', '') + elif is_simple_path_pattern(workspace_path): + workspace_pattern_prefix = workspace_path.rstrip('*') workspace_dir_path = os.path.join(workspace_root_path, workspace_pattern_prefix) workspace_search_dir = codebase.get_resource(path=workspace_dir_path) if not workspace_search_dir: @@ -785,7 +786,8 @@ def parse(cls, location, package_only=False): version=version, ) - # TODO: what type of checksum is this? + # TODO: what type of checksum is this? ... this is a complex one + # See https://github.com/yarnpkg/berry/blob/f1edfae49d1bab7679ce3061e2749113dc3b80e8/packages/yarnpkg-core/sources/tgzUtils.ts checksum = details.get('checksum') dependencies = details.get('dependencies') or {} peer_dependencies = details.get('peerDependencies') or {} @@ -826,12 +828,16 @@ def parse(cls, location, package_only=False): is_virtual=True, ) resolved_package = models.PackageData.from_data(resolved_package_mapping) + + # These are top level dependencies which do not have a + # scope defined there, so we are assigning the default + # scope, this would be merged with the dependency having + # correct scope value when resolved dependency = models.DependentPackage( purl=str(purl), extracted_requirement=version, is_resolved=True, resolved_package=resolved_package.to_dict(), - # FIXME: these are NOT correct scope='dependencies', is_optional=False, is_runtime=True, @@ -1008,7 +1014,8 @@ def parse(cls, location, package_only=False): if not dep_purl in dependencies_by_purl: dependencies_by_purl[dep_purl] = dep.to_dict() else: - # We have duplicate dependencies because of aliases + # FIXME: We have duplicate dependencies because of aliases + # should we do something? pass dependencies = list(dependencies_by_purl.values()) diff --git a/src/packagedcode/utils.py b/src/packagedcode/utils.py index bcc813a202a..f851e35a96f 100644 --- a/src/packagedcode/utils.py +++ b/src/packagedcode/utils.py @@ -231,30 +231,22 @@ def update_dependencies_as_resolved(dependencies): # These are only type, namespace and name (without version and qualifiers) base_resolved_purls = [] - base_purl_fields = ["type", "namespace", "name"] - try: - resolved_packages = [ - dep.get("resolved_package") - for dep in dependencies - if dep.get("resolved_package") - ] - except AttributeError: - raise Exception(dependencies) + resolved_packages = [ + dep.get("resolved_package") + for dep in dependencies + if dep.get("resolved_package") + ] # No resolved packages are present for dependencies if not resolved_packages: return for pkg in resolved_packages: - purl_mapping = PackageURL.from_string(purl=pkg.get("purl")).to_dict() - base_purl_mapping = { - purl_field: purl_value - for purl_field, purl_value in purl_mapping.items() - if purl_field in base_purl_fields - } - base_resolved_purls.append( - PackageURL(**base_purl_mapping).to_string() - ) + purl=pkg.get("purl") + if purl: + base_resolved_purls.append( + get_base_purl(purl=purl) + ) for dependency in dependencies: resolved_package = dependency.get("resolved_package") @@ -271,3 +263,24 @@ def update_dependencies_as_resolved(dependencies): dep["is_resolved"] = True +def get_base_purl(purl): + """ + Get a base purl with only the type, name and namespace from + a given purl. + """ + base_purl_fields = ["type", "namespace", "name"] + purl_mapping = PackageURL.from_string(purl=purl).to_dict() + base_purl_mapping = { + purl_field: purl_value + for purl_field, purl_value in purl_mapping.items() + if purl_field in base_purl_fields + } + return PackageURL(**base_purl_mapping).to_string() + + +def is_path_pattern(path): + return '*' not in path + + +def is_simple_path_pattern(path): + return path.endswith('*') and path.count('*') == 1