diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 97148a510..a0ad0e77d 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,7 +4,10 @@ Changelog *unreleased* ~~~~~~~~~~~~ -No unreleased changes. +* Fix parsing of ``Version`` and ``Specifier``, to prevent certain + non-ASCII letters from being accepted as a part of the local version + segment (:issue:`469`); also, fix the docs of ``VERSION_PATTERN``, to + mention necessity of the ``re.ASCII`` flag 21.0 - 2021-07-03 ~~~~~~~~~~~~~~~~~ diff --git a/docs/version.rst b/docs/version.rst index a43cf7868..33e146474 100644 --- a/docs/version.rst +++ b/docs/version.rst @@ -284,7 +284,7 @@ Reference The pattern is not anchored at either end, and is intended for embedding in larger expressions (for example, matching a version number as part of a file name). The regular expression should be compiled with the - ``re.VERBOSE`` and ``re.IGNORECASE`` flags set. + ``re.VERBOSE``, ``re.IGNORECASE`` and ``re.ASCII`` flags set. .. _PEP 440: https://www.python.org/dev/peps/pep-0440/ diff --git a/packaging/specifiers.py b/packaging/specifiers.py index ce66bd4ad..e31a72604 100644 --- a/packaging/specifiers.py +++ b/packaging/specifiers.py @@ -411,6 +411,20 @@ class Specifier(_IndividualSpecifier): _regex = re.compile(r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE) + # Note: an additional check, based of the following regular + # expression, is necessary because without it the 'a-z' + # character ranges in the above regular expression, in + # conjunction with re.IGNORECASE, would cause erroneous + # acceptance of non-ASCII letters in the local version segment + # (see: https://docs.python.org/library/re.html#re.IGNORECASE). + _supplementary_restriction_regex = re.compile(r""" + \s*===.* # No restriction in the identity operator case. + | + [\s\0-\177]* # In all other cases only whitespace characters + # and ASCII-only non-whitespace characters are + # allowed. + """, re.VERBOSE) + _operators = { "~=": "compatible", "==": "equal", @@ -422,6 +436,13 @@ class Specifier(_IndividualSpecifier): "===": "arbitrary", } + def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: + super().__init__(spec, prereleases) + + match = self._supplementary_restriction_regex.fullmatch(spec) + if not match: + raise InvalidSpecifier(f"Invalid specifier: '{spec}'") + @_require_version_compare def _compare_compatible(self, prospective: ParsedVersion, spec: str) -> bool: diff --git a/packaging/version.py b/packaging/version.py index de9a09a4e..0a8d39a00 100644 --- a/packaging/version.py +++ b/packaging/version.py @@ -256,12 +256,21 @@ def _legacy_cmpkey(version: str) -> LegacyCmpKey: class Version(_BaseVersion): - _regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE) + _regex = re.compile( + VERSION_PATTERN, + + # Note: the re.ASCII flag is necessary because without it the + # 'a-z' character ranges in VERSION_PATTERN, in conjunction + # with re.IGNORECASE, would cause erroneous acceptance of + # non-ASCII letters in the local version segment (see: + # https://docs.python.org/library/re.html#re.IGNORECASE). + re.VERBOSE | re.IGNORECASE | re.ASCII, + ) def __init__(self, version: str) -> None: # Validate the version and parse it into pieces - match = self._regex.search(version) + match = self._regex.fullmatch(version.strip()) if not match: raise InvalidVersion(f"Invalid version: '{version}'") diff --git a/tests/test_specifiers.py b/tests/test_specifiers.py index 0b8e742c1..43282d345 100644 --- a/tests/test_specifiers.py +++ b/tests/test_specifiers.py @@ -81,6 +81,10 @@ def test_specifiers_valid(self, specifier): # Cannot use a prefix matching after a .devN version "==1.0.dev1.*", "!=1.0.dev1.*", + # Local version which includes a non-ASCII letter that + # matches regex '[a-z]' when re.IGNORECASE is in force in + # conjunction with implicit re.UNICODE (i.e., without re.ASCII) + "==1.0+\u0130", ], ) def test_specifiers_invalid(self, specifier): diff --git a/tests/test_version.py b/tests/test_version.py index 8fe563290..d062f848d 100644 --- a/tests/test_version.py +++ b/tests/test_version.py @@ -96,6 +96,10 @@ def test_valid_versions(self, version): "1.0+_foobar", "1.0+foo&asd", "1.0+1+1", + # Local version which includes a non-ASCII letter that + # matches regex '[a-z]' when re.IGNORECASE is in force in + # conjunction with implicit re.UNICODE (i.e., without re.ASCII) + "1.0+\u0130", ], ) def test_invalid_versions(self, version):