Skip to content

Commit

Permalink
Add support for pip-inspect.deplock files
Browse files Browse the repository at this point in the history
Add parser for pip-inspect.deplock files generated by deplock
which has all the package metadata, i.e. the resolved versions
and the dependency relationships.

Reference: aboutcode-org/scancode.io#1262
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
  • Loading branch information
AyanSinhaMahapatra committed Jun 28, 2024
1 parent c8046f1 commit fc4ada9
Show file tree
Hide file tree
Showing 6 changed files with 2,291 additions and 1 deletion.
3 changes: 3 additions & 0 deletions src/packagedcode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,9 @@
windows.MicrosoftUpdateManifestHandler,

win_pe.WindowsExecutableHandler,

# These are handlers for deplock generated files
pypi.PipInspectDeplockHandler,
]

if on_linux:
Expand Down
121 changes: 120 additions & 1 deletion src/packagedcode/pypi.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
from packagedcode.utils import parse_maintainer_name_email
from packagedcode.utils import yield_dependencies_from_package_data
from packagedcode.utils import yield_dependencies_from_package_resource
from packagedcode.utils import get_base_purl

try:
from zipfile import Path as ZipPath
Expand Down Expand Up @@ -563,6 +564,123 @@ def parse(cls, location, package_only=False):
yield models.PackageData.from_data(package_data, package_only)


class PipInspectDeplockHandler(models.DatafileHandler):
datasource_id = 'pypi_inspect_deplock'
path_patterns = ('*pip-inspect.deplock',)
default_package_type = 'pypi'
default_primary_language = 'Python'
description = 'Python poetry pyproject.toml'
# These are files generated by deplock, see https://github.com/nexB/dependency-inspector
documentation_url = 'https://pip.pypa.io/en/stable/cli/pip_inspect/'

@classmethod
def get_resolved_package_from_metadata(cls, metadata, package_only=False):

requires_dist = metadata.get('requires_dist')
dependencies_for_resolved = get_requires_dependencies(
requires=requires_dist,
)
package_data = dict(
datasource_id=cls.datasource_id,
type=cls.default_package_type,
primary_language='Python',
name=metadata.get('name'),
version=metadata.get('version'),
extracted_license_statement=metadata.get('license'),
description=metadata.get('description'),
keywords=metadata.get('keywords'),
is_virtual=True,
dependencies=[
dep.to_dict()
for dep in dependencies_for_resolved
],
)
return models.PackageData.from_data(package_data, package_only)

@classmethod
def parse(cls, location, package_only=False):

with open(location) as f:
content = f.read()

data = json.loads(content)
installed_packages = data.get('installed')
if not installed_packages:
return

main_package_metadata = {}
dependencies = []

direct_deps_of_main_package = []

for package_metadata in installed_packages:
package_metadata_dep = package_metadata.get('metadata')

# `direct_url` is only present for root package
# `requested` is true for root package and direct dependencies only
if package_metadata.get('requested') and 'direct_url' in package_metadata:
main_package_metadata = package_metadata_dep
main_package_requires = main_package_metadata.get('requires_dist')
dependencies_for_main = get_requires_dependencies(
requires=main_package_requires,
)
direct_deps_of_main_package.extend([
get_base_purl(dep.purl)
for dep in dependencies_for_main
])
continue

package_data_dep = cls.get_resolved_package_from_metadata(
metadata=package_metadata_dep,
package_only=package_only,
)
dep_purl = package_data_dep.purl
dependency = models.DependentPackage(
purl=dep_purl,
extracted_requirement=None,
scope=None,
is_runtime=True,
is_optional=False,
is_direct=False,
is_resolved=True,
resolved_package=package_data_dep.to_dict()
)
dependencies.append(dependency)

dependency_mappings = []
resolved_main_dependencies = []

# Update is_direct for direct dependencies
for dep in dependencies:
base_purl = get_base_purl(dep.purl)
if base_purl in direct_deps_of_main_package:
dep.is_direct = True
resolved_main_dependencies.append(base_purl)

dependency_mappings.append(dep.to_dict())

pip_version = data.get('pip_version')
inspect_version = data.get('version')
extra_data = {
"pip_version": pip_version,
"inspect_version": inspect_version,
}

package_data_main = cls.get_resolved_package_from_metadata(
metadata=main_package_metadata,
package_only=package_only,
)

main_dependencies = []
for dep in package_data_main.dependencies:
base_purl = get_base_purl(purl=dep.get('purl'))
if base_purl not in resolved_main_dependencies:
main_dependencies.append(dep)

package_data_main.dependencies = dependencies
package_data_main.dependencies.extend(main_dependencies)
package_data_main.extra_data = extra_data
yield package_data_main


META_DIR_SUFFIXES = '.dist-info', '.egg-info', 'EGG-INFO',
Expand Down Expand Up @@ -1494,7 +1612,7 @@ def get_dist_dependencies(dist):
return get_requires_dependencies(requires=dist.requires)


def get_requires_dependencies(requires, default_scope='install'):
def get_requires_dependencies(requires, default_scope='install', is_direct=True):
"""
Return a list of DependentPackage found in a ``requires`` list of
requirement strings or an empty list.
Expand Down Expand Up @@ -1539,6 +1657,7 @@ def get_requires_dependencies(requires, default_scope='install'):
is_runtime=True,
is_optional=True if bool(extra) else False,
is_resolved=is_resolved,
is_direct=is_direct,
extracted_requirement=str(req),
))

Expand Down
7 changes: 7 additions & 0 deletions tests/packagedcode/data/plugin/help.txt
Original file line number Diff line number Diff line change
Expand Up @@ -713,6 +713,13 @@ Package type: pypi
description: PyPI extracted egg PKG-INFO
path_patterns: '*/EGG-INFO/PKG-INFO'
--------------------------------------------
Package type: pypi
datasource_id: pypi_inspect_deplock
documentation URL: https://pip.pypa.io/en/stable/cli/pip_inspect/
primary language: Python
description: Python poetry pyproject.toml
path_patterns: '*pip-inspect.deplock'
--------------------------------------------
Package type: pypi
datasource_id: pypi_poetry_pyproject_toml
documentation URL: https://packaging.python.org/en/latest/specifications/pyproject-toml/
Expand Down
519 changes: 519 additions & 0 deletions tests/packagedcode/data/pypi/deplock/univers/pip-inspect.deplock

Large diffs are not rendered by default.

Loading

0 comments on commit fc4ada9

Please sign in to comment.