From 0cba5ad5a851562fadc6841f6cc2a983d5869cf0 Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Tue, 31 Jan 2023 20:58:12 -0500 Subject: [PATCH] Better errors when an invalid requirement is encountered (#507) * pypi_provider: better invalid requirement handling Signed-off-by: William Woodruff * exception plumbing, bring cov back up Signed-off-by: William Woodruff * requirement: better message on invalid requirement Signed-off-by: William Woodruff * requirement: lintage Signed-off-by: William Woodruff * CHANGELOG: record changes Signed-off-by: William Woodruff --------- Signed-off-by: William Woodruff Co-authored-by: Alex Cameron --- CHANGELOG.md | 6 ++ pip_audit/_dependency_source/__init__.py | 2 + pip_audit/_dependency_source/interface.py | 7 ++ pip_audit/_dependency_source/requirement.py | 10 +- .../resolvelib/pypi_provider.py | 99 +++++++++++-------- .../resolvelib/test_pypi_provider.py | 28 +++++- 6 files changed, 105 insertions(+), 47 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8263f5df..4f326d81 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,12 @@ All versions prior to 0.0.9 are untracked. ## [Unreleased] +### Changed + +* Improved error messaging when a requirements input or indirect dependency + has an invalid (non-PEP 440) requirements specifier + ([#507](https://github.com/pypa/pip-audit/pull/507)) + ## [2.4.15] ### Fixed diff --git a/pip_audit/_dependency_source/__init__.py b/pip_audit/_dependency_source/__init__.py index 97f5db2c..32fa4818 100644 --- a/pip_audit/_dependency_source/__init__.py +++ b/pip_audit/_dependency_source/__init__.py @@ -10,6 +10,7 @@ DependencySourceError, HashMismatchError, HashMissingError, + InvalidRequirementSpecifier, RequirementHashes, UnsupportedHashAlgorithm, ) @@ -27,6 +28,7 @@ "DependencySourceError", "HashMismatchError", "HashMissingError", + "InvalidRequirementSpecifier", "PipSource", "PipSourceError", "PyProjectSource", diff --git a/pip_audit/_dependency_source/interface.py b/pip_audit/_dependency_source/interface.py index 337f83e5..b4efa550 100644 --- a/pip_audit/_dependency_source/interface.py +++ b/pip_audit/_dependency_source/interface.py @@ -86,6 +86,13 @@ class UnsupportedHashAlgorithm(Exception): pass +class InvalidRequirementSpecifier(DependencySourceError): + """ + A `DependencySourceError` specialized for the case of a non-PEP 440 requirements + specifier. + """ + + class RequirementHashes: """ Represents the hashes contained within a requirements file. diff --git a/pip_audit/_dependency_source/requirement.py b/pip_audit/_dependency_source/requirement.py index 97cf60c8..a0b19149 100644 --- a/pip_audit/_dependency_source/requirement.py +++ b/pip_audit/_dependency_source/requirement.py @@ -25,6 +25,7 @@ DependencyResolverError, DependencySource, DependencySourceError, + InvalidRequirementSpecifier, RequirementHashes, ) from pip_audit._fix import ResolvedFixVersion @@ -89,10 +90,11 @@ def collect(self) -> Iterator[Dependency]: for filename in self._filenames: try: rf = RequirementsFile.from_file(filename) - if rf.invalid_lines: - raise RequirementSourceError( - f"requirement file {filename} contains invalid lines: " - f"{str(rf.invalid_lines)}" + if len(rf.invalid_lines) > 0: + invalid = rf.invalid_lines[0] + raise InvalidRequirementSpecifier( + f"requirement file {filename} contains invalid specifier at " + f"line {invalid.line_number}: {invalid.error_message}" ) reqs: list[InstallRequirement] = [] diff --git a/pip_audit/_dependency_source/resolvelib/pypi_provider.py b/pip_audit/_dependency_source/resolvelib/pypi_provider.py index 683d861a..f5bc2639 100644 --- a/pip_audit/_dependency_source/resolvelib/pypi_provider.py +++ b/pip_audit/_dependency_source/resolvelib/pypi_provider.py @@ -26,14 +26,18 @@ import html5lib import requests from cachecontrol import CacheControl -from packaging.requirements import Requirement +from packaging.requirements import InvalidRequirement, Requirement from packaging.specifiers import InvalidSpecifier, SpecifierSet from packaging.utils import canonicalize_name, parse_sdist_filename, parse_wheel_filename from packaging.version import Version from resolvelib.providers import AbstractProvider from resolvelib.resolvers import RequirementInformation -from pip_audit._dependency_source import RequirementHashes, UnsupportedHashAlgorithm +from pip_audit._dependency_source import ( + InvalidRequirementSpecifier, + RequirementHashes, + UnsupportedHashAlgorithm, +) from pip_audit._service import SkippedDependency from pip_audit._state import AuditState from pip_audit._util import python_version @@ -134,7 +138,16 @@ def _get_dependencies(self) -> Iterator[Requirement]: extras = self.extras if self.extras else [""] for d in deps: - r = Requirement(d) + # NOTE: packaging 22 and later are more strict about PEP 440 requirements, + # meaning that pre-existing packages might have invalid requirements + # specifiers in their dependency sets (e.g. `pytz (>dev)`). + try: + r = Requirement(d) + except InvalidRequirement as exc: + raise InvalidRequirementSpecifier( + f"{self.name} has invalid requirement '{d}': {exc}" + ) + if r.marker is None: yield r else: @@ -440,49 +453,51 @@ def find_matches( for r in requirements: extras |= r.extras - # Need to pass the extras to the search, so they - # are added to the candidate at creation - we - # treat candidates as immutable once created. - all_candidates = get_project_from_indexes( - self.index_urls, - self.session, - identifier, - requirements, - extras, - self.req_hashes, - self.timeout, - self._state, - ) - - candidates: list[ResolvedCandidate] try: - candidates = sorted( - [ - candidate - for candidate in all_candidates - if candidate.version not in bad_versions - # NOTE(ww): We use `filter(...)` instead of checking - # `candidate.version in r.specifier` because the former has subtle (and PEP 440 - # mandated) behavior around prereleases. Specifically, `filter(...)` - # returns prereleases even if not explicitly configured, but only if - # there are no non-prereleases. - # See: https://github.com/pypa/pip-audit/issues/472 - and all([any(r.specifier.filter((candidate.version,))) for r in requirements]) - # HACK(ww): Additionally check that each candidate's name matches the - # expected project name (identifier). - # This technically shouldn't be required, but parsing distribution names - # from package indices is imprecise/unreliable when distribution filenames - # are PEP 440 compliant but not normalized. - # See: https://github.com/pypa/packaging/issues/527 - and candidate.name == identifier - ], - key=attrgetter("version", "is_wheel"), - reverse=True, + # Need to pass the extras to the search, so that they are added to the candidate at + # creation, since we treat candidates as immutable once created. + # We also eagerly collect the candidate list here, to pull any "not found" + # exceptions out before doing candidate filtering below. + all_candidates = list( + get_project_from_indexes( + self.index_urls, + self.session, + identifier, + requirements, + extras, + self.req_hashes, + self.timeout, + self._state, + ) ) - except PyPINotFoundError as e: - yield SkippedCandidate(name=identifier, skip_reason=str(e)) + except PyPINotFoundError as exc: + yield SkippedCandidate(name=identifier, skip_reason=str(exc)) return + candidates: list[ResolvedCandidate] = sorted( + [ + candidate + for candidate in all_candidates + if candidate.version not in bad_versions + # NOTE(ww): We use `filter(...)` instead of checking + # `candidate.version in r.specifier` because the former has subtle (and PEP 440 + # mandated) behavior around prereleases. Specifically, `filter(...)` + # returns prereleases even if not explicitly configured, but only if + # there are no non-prereleases. + # See: https://github.com/pypa/pip-audit/issues/472 + and all([any(r.specifier.filter((candidate.version,))) for r in requirements]) + # HACK(ww): Additionally check that each candidate's name matches the + # expected project name (identifier). + # This technically shouldn't be required, but parsing distribution names + # from package indices is imprecise/unreliable when distribution filenames + # are PEP 440 compliant but not normalized. + # See: https://github.com/pypa/packaging/issues/527 + and candidate.name == identifier + ], + key=attrgetter("version", "is_wheel"), + reverse=True, + ) + logger.debug(f"{identifier} has candidates: {candidates}") # If we have multiple candidates for a single version and some are wheels, diff --git a/test/dependency_source/resolvelib/test_pypi_provider.py b/test/dependency_source/resolvelib/test_pypi_provider.py index 171e7e9a..608de45e 100644 --- a/test/dependency_source/resolvelib/test_pypi_provider.py +++ b/test/dependency_source/resolvelib/test_pypi_provider.py @@ -1,3 +1,4 @@ +from email.message import Message from pathlib import Path from subprocess import CalledProcessError @@ -6,12 +7,13 @@ from packaging.version import Version from pip_audit._dependency_source import RequirementHashes +from pip_audit._dependency_source.interface import InvalidRequirementSpecifier from pip_audit._dependency_source.resolvelib import pypi_provider from pip_audit._dependency_source.resolvelib.pypi_provider import ResolvedCandidate from pip_audit._virtual_env import VirtualEnvError -class TestCandidate: +class TestResolvedCandidate: def test_get_metadata_for_sdist_venv_create_fails(self, monkeypatch): virtualenv_obj = pretend.stub( create=pretend.call_recorder( @@ -53,6 +55,30 @@ def test_get_metadata_for_sdist_venv_create_fails(self, monkeypatch): "Installing source distribution in isolated environment for fakepkg (1.0.0)" ) + @pytest.mark.parametrize("invalid", ["pytz (>dev)", "fakedep>=3.*"]) + def test_get_dependencies_invalid_req_specifer(self, invalid, monkeypatch): + candidate = ResolvedCandidate( + "fakepkg", + "fakepkg", + Path("fakepath"), + Version("1.0.0"), + url="hxxps://fake.url", + extras=set(), + is_wheel=False, + reqs=[], + session=pretend.stub(), + timeout=None, + state=pretend.stub(), + req_hashes=RequirementHashes(), + ) + + metadata = Message() + metadata["Requires-Dist"] = invalid + monkeypatch.setattr(candidate, "_metadata", metadata) + + with pytest.raises(InvalidRequirementSpecifier): + list(candidate._get_dependencies()) + def test_get_project_from_index_relative_url(): data = """