diff --git a/.gitignore b/.gitignore index 0c80aee..5b5671b 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ micropip/_version.py dist/ build/ .venv* +.hypothesis/ .vscode pyodide/ diff --git a/micropip/_compat.py b/micropip/_compat.py index b445f56..eed40bd 100644 --- a/micropip/_compat.py +++ b/micropip/_compat.py @@ -13,7 +13,6 @@ loadedPackages, loadPackage, to_js, - wheel_dist_info_dir, ) else: from ._compat_not_in_pyodide import ( @@ -26,7 +25,6 @@ loadedPackages, loadPackage, to_js, - wheel_dist_info_dir, ) __all__ = [ @@ -38,6 +36,5 @@ "loadDynlib", "loadPackage", "get_dynlibs", - "wheel_dist_info_dir", "to_js", ] diff --git a/micropip/_compat_in_pyodide.py b/micropip/_compat_in_pyodide.py index 7c5b03a..02a5068 100644 --- a/micropip/_compat_in_pyodide.py +++ b/micropip/_compat_in_pyodide.py @@ -2,7 +2,7 @@ from typing import IO from urllib.parse import urlparse -from pyodide._package_loader import get_dynlibs, wheel_dist_info_dir +from pyodide._package_loader import get_dynlibs from pyodide.ffi import IN_BROWSER, to_js from pyodide.http import pyfetch @@ -54,6 +54,5 @@ async def fetch_string_and_headers( "loadDynlib", "loadPackage", "get_dynlibs", - "wheel_dist_info_dir", "to_js", ] diff --git a/micropip/_compat_not_in_pyodide.py b/micropip/_compat_not_in_pyodide.py index 1da81f4..c229aa9 100644 --- a/micropip/_compat_not_in_pyodide.py +++ b/micropip/_compat_not_in_pyodide.py @@ -2,7 +2,6 @@ from io import BytesIO from pathlib import Path from typing import IO, Any -from zipfile import ZipFile REPODATA_PACKAGES: dict[str, dict[str, Any]] = {} @@ -64,43 +63,6 @@ def canonicalize_name(name: str) -> str: return _canonicalize_regex.sub("-", name).lower() -# Vendored from pip -class UnsupportedWheel(Exception): - """Unsupported wheel.""" - - -def wheel_dist_info_dir(source: ZipFile, name: str) -> str: - """Returns the name of the contained .dist-info directory. - Raises UnsupportedWheel if not found, >1 found, or it doesn't match the - provided name. - """ - # Zip file path separators must be / - subdirs = {p.split("/", 1)[0] for p in source.namelist()} - - info_dirs = [s for s in subdirs if s.endswith(".dist-info")] - - if not info_dirs: - raise UnsupportedWheel(f".dist-info directory not found in wheel {name!r}") - - if len(info_dirs) > 1: - raise UnsupportedWheel( - "multiple .dist-info directories found in wheel {!r}: {}".format( - name, ", ".join(info_dirs) - ) - ) - - info_dir = info_dirs[0] - - info_dir_name = canonicalize_name(info_dir) - canonical_name = canonicalize_name(name) - if not info_dir_name.startswith(canonical_name): - raise UnsupportedWheel( - f".dist-info directory {info_dir!r} does not start with {canonical_name!r}" - ) - - return info_dir - - class pyodide_js_: def __get__(self, attr): raise RuntimeError(f"Attempted to access property '{attr}' on pyodide_js dummy") @@ -122,6 +84,5 @@ def loadPackage(packages: str | list[str]) -> None: "loadedPackages", "loadPackage", "get_dynlibs", - "wheel_dist_info_dir", "to_js", ] diff --git a/micropip/externals/pip/__init__.py b/micropip/externals/pip/__init__.py deleted file mode 100644 index d7801cb..0000000 --- a/micropip/externals/pip/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -""" -Based on https://github.com/pypa/pip/commit/57be6a77c57ab5d512371b5c48d508a7620c3217 -""" diff --git a/micropip/externals/pip/_internal/__init__.py b/micropip/externals/pip/_internal/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/micropip/externals/pip/_internal/utils/__init__.py b/micropip/externals/pip/_internal/utils/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/micropip/externals/pip/_internal/utils/pkg_resources.py b/micropip/externals/pip/_internal/utils/pkg_resources.py deleted file mode 100644 index f3c7c08..0000000 --- a/micropip/externals/pip/_internal/utils/pkg_resources.py +++ /dev/null @@ -1,40 +0,0 @@ -from typing import Dict, Iterable, List - -from ..._vendor.pkg_resources import yield_lines - - -class DictMetadata: - """IMetadataProvider that reads metadata files from a dictionary.""" - - def __init__(self, metadata): - # type: (Dict[str, bytes]) -> None - self._metadata = metadata - - def has_metadata(self, name): - # type: (str) -> bool - return name in self._metadata - - def get_metadata(self, name): - # type: (str) -> str - try: - return self._metadata[name].decode() - except UnicodeDecodeError as e: - # Mirrors handling done in pkg_resources.NullProvider. - e.reason += f" in {name} file" - raise - - def get_metadata_lines(self, name): - # type: (str) -> Iterable[str] - return yield_lines(self.get_metadata(name)) - - def metadata_isdir(self, name): - # type: (str) -> bool - return False - - def metadata_listdir(self, name): - # type: (str) -> List[str] - return [] - - def run_script(self, script_name, namespace): - # type: (str, str) -> None - pass diff --git a/micropip/externals/pip/_internal/utils/wheel.py b/micropip/externals/pip/_internal/utils/wheel.py deleted file mode 100644 index 288b33d..0000000 --- a/micropip/externals/pip/_internal/utils/wheel.py +++ /dev/null @@ -1,194 +0,0 @@ -"""Support functions for working with wheel files. -""" - -import logging -from email.message import Message -from email.parser import Parser -from typing import Dict, Tuple -from zipfile import BadZipFile, ZipFile - -from packaging.utils import canonicalize_name -from ..._vendor.pkg_resources import DistInfoDistribution, Distribution -from .pkg_resources import DictMetadata - -# from pip._vendor.pkg_resources import DistInfoDistribution, Distribution -# from pip._internal.utils.pkg_resources import DictMetadata - -VERSION_COMPATIBLE = (1, 0) - - -class UnsupportedWheel(Exception): - """Unsupported wheel.""" - - -logger = logging.getLogger(__name__) - - -class WheelMetadata(DictMetadata): - """Metadata provider that maps metadata decoding exceptions to our - internal exception type. - """ - - def __init__(self, metadata, wheel_name): - # type: (Dict[str, bytes], str) -> None - super().__init__(metadata) - self._wheel_name = wheel_name - - def get_metadata(self, name): - # type: (str) -> str - try: - return super().get_metadata(name) - except UnicodeDecodeError as e: - # Augment the default error with the origin of the file. - raise UnsupportedWheel( - f"Error decoding metadata for {self._wheel_name}: {e}" - ) - - -def pkg_resources_distribution_for_wheel(wheel_zip, name, location): - # type: (ZipFile, str, str) -> Distribution - """Get a pkg_resources distribution given a wheel. - - :raises UnsupportedWheel: on any errors - """ - info_dir, _ = parse_wheel(wheel_zip, name) - - metadata_files = [p for p in wheel_zip.namelist() if p.startswith(f"{info_dir}/")] - - metadata_text = {} # type: Dict[str, bytes] - for path in metadata_files: - _, metadata_name = path.split("/", 1) - - try: - metadata_text[metadata_name] = read_wheel_metadata_file(wheel_zip, path) - except UnsupportedWheel as e: - raise UnsupportedWheel("{} has an invalid wheel, {}".format(name, str(e))) - - metadata = WheelMetadata(metadata_text, location) - - return DistInfoDistribution(location=location, metadata=metadata, project_name=name) - - -def parse_wheel(wheel_zip, name): - # type: (ZipFile, str) -> Tuple[str, Message] - """Extract information from the provided wheel, ensuring it meets basic - standards. - - Returns the name of the .dist-info directory and the parsed WHEEL metadata. - """ - try: - info_dir = wheel_dist_info_dir(wheel_zip, name) - metadata = wheel_metadata(wheel_zip, info_dir) - version = wheel_version(metadata) - except UnsupportedWheel as e: - raise UnsupportedWheel("{} has an invalid wheel, {}".format(name, str(e))) - - check_compatibility(version, name) - - return info_dir, metadata - - -def wheel_dist_info_dir(source, name): - # type: (ZipFile, str) -> str - """Returns the name of the contained .dist-info directory. - - Raises AssertionError or UnsupportedWheel if not found, >1 found, or - it doesn't match the provided name. - """ - # Zip file path separators must be / - subdirs = {p.split("/", 1)[0] for p in source.namelist()} - - info_dirs = [s for s in subdirs if s.endswith(".dist-info")] - - if not info_dirs: - raise UnsupportedWheel(".dist-info directory not found") - - if len(info_dirs) > 1: - raise UnsupportedWheel( - "multiple .dist-info directories found: {}".format(", ".join(info_dirs)) - ) - - info_dir = info_dirs[0] - - info_dir_name = canonicalize_name(info_dir) - canonical_name = canonicalize_name(name) - if not info_dir_name.startswith(canonical_name): - raise UnsupportedWheel( - ".dist-info directory {!r} does not start with {!r}".format( - info_dir, canonical_name - ) - ) - - return info_dir - - -def read_wheel_metadata_file(source, path): - # type: (ZipFile, str) -> bytes - try: - return source.read(path) - # BadZipFile for general corruption, KeyError for missing entry, - # and RuntimeError for password-protected files - except (BadZipFile, KeyError, RuntimeError) as e: - raise UnsupportedWheel(f"could not read {path!r} file: {e!r}") - - -def wheel_metadata(source, dist_info_dir): - # type: (ZipFile, str) -> Message - """Return the WHEEL metadata of an extracted wheel, if possible. - Otherwise, raise UnsupportedWheel. - """ - path = f"{dist_info_dir}/WHEEL" - # Zip file path separators must be / - wheel_contents = read_wheel_metadata_file(source, path) - - try: - wheel_text = wheel_contents.decode() - except UnicodeDecodeError as e: - raise UnsupportedWheel(f"error decoding {path!r}: {e!r}") - - # FeedParser (used by Parser) does not raise any exceptions. The returned - # message may have .defects populated, but for backwards-compatibility we - # currently ignore them. - return Parser().parsestr(wheel_text) - - -def wheel_version(wheel_data): - # type: (Message) -> Tuple[int, ...] - """Given WHEEL metadata, return the parsed Wheel-Version. - Otherwise, raise UnsupportedWheel. - """ - version_text = wheel_data["Wheel-Version"] - if version_text is None: - raise UnsupportedWheel("WHEEL is missing Wheel-Version") - - version = version_text.strip() - - try: - return tuple(map(int, version.split("."))) - except ValueError: - raise UnsupportedWheel(f"invalid Wheel-Version: {version!r}") - - -def check_compatibility(version, name): - # type: (Tuple[int, ...], str) -> None - """Raises errors or warns if called with an incompatible Wheel-Version. - - pip should refuse to install a Wheel-Version that's a major series - ahead of what it's compatible with (e.g 2.0 > 1.1); and warn when - installing a version only minor version ahead (e.g 1.2 > 1.1). - - version: a 2-tuple representing a Wheel-Version (Major, Minor) - name: name of wheel or package to raise exception about - - :raises UnsupportedWheel: when an incompatible Wheel-Version is given - """ - if version[0] > VERSION_COMPATIBLE[0]: - raise UnsupportedWheel( - "{}'s Wheel-Version ({}) is not compatible with this version " - "of pip".format(name, ".".join(map(str, version))) - ) - elif version > VERSION_COMPATIBLE: - logger.warning( - "Installing from a newer Wheel-Version (%s)", - ".".join(map(str, version)), - ) diff --git a/micropip/externals/pip/_vendor/__init__.py b/micropip/externals/pip/_vendor/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/micropip/externals/pip/_vendor/pkg_resources.py b/micropip/externals/pip/_vendor/pkg_resources.py deleted file mode 100644 index 3d03a4f..0000000 --- a/micropip/externals/pip/_vendor/pkg_resources.py +++ /dev/null @@ -1,537 +0,0 @@ -# coding: utf-8 -""" -Package resource API --------------------- - -A resource is a logical file contained within a package, or a logical -subdirectory thereof. The package resource API expects resource names -to have their path parts separated with ``/``, *not* whatever the local -path separator is. Do not use os.path operations to manipulate resource -names being passed into the API. - -The package resource API is designed to work with normal filesystem packages, -.egg files, and unpacked .egg files. It can also work in a limited way with -.zip files and with custom PEP 302 loaders that support the ``get_data()`` -method. -""" - -from __future__ import absolute_import - -import sys -import re -import warnings -import email.parser -import urllib - -try: - FileExistsError -except NameError: - FileExistsError = OSError - -import packaging.version -import packaging.specifiers -import packaging.requirements -import packaging.markers - - -__metaclass__ = type - - -class PEP440Warning(RuntimeWarning): - """ - Used when there is an issue with a version or specifier not complying with - PEP 440. - """ - - -def parse_version(v): - try: - return packaging.version.Version(v) - except packaging.version.InvalidVersion: - return packaging.version.LegacyVersion(v) - - -__all__ = [ - "DistInfoDistribution", - "Distribution", - "DictMetadata", -] - - -class ResolutionError(Exception): - """Abstract base for dependency resolution errors""" - - def __repr__(self): - return self.__class__.__name__ + repr(self.args) - - -class UnknownExtra(ResolutionError): - """Distribution doesn't have an "extra feature" of the given name""" - - -PY_MAJOR = "{}.{}".format(*sys.version_info) -EGG_DIST = 3 - - -def safe_name(name): - """Convert an arbitrary string to a standard distribution name - - Any runs of non-alphanumeric/. characters are replaced with a single '-'. - """ - return re.sub("[^A-Za-z0-9.]+", "-", name) - - -def safe_version(version): - """ - Convert an arbitrary string to a standard version string - """ - try: - # normalize the version - return str(packaging.version.Version(version)) - except packaging.version.InvalidVersion: - version = version.replace(" ", ".") - return re.sub("[^A-Za-z0-9.]+", "-", version) - - -def safe_extra(extra): - """Convert an arbitrary string to a standard 'extra' name - - Any runs of non-alphanumeric characters are replaced with a single '_', - and the result is always lowercased. - """ - return re.sub("[^A-Za-z0-9.-]+", "_", extra).lower() - - -def invalid_marker(text): - """ - Validate text as a PEP 508 environment marker; return an exception - if invalid or False otherwise. - """ - try: - evaluate_marker(text) - except SyntaxError as e: - e.filename = None - e.lineno = None - return e - return False - - -def evaluate_marker(text, extra=None): - """ - Evaluate a PEP 508 environment marker. - Return a boolean indicating the marker result in this environment. - Raise SyntaxError if marker is invalid. - - This implementation uses the 'pyparsing' module. - """ - try: - marker = packaging.markers.Marker(text) - return marker.evaluate() - except packaging.markers.InvalidMarker as e: - raise SyntaxError(e) - - -def yield_lines(strs): - """Yield non-empty/non-comment lines of a string or sequence""" - if isinstance(strs, str): - for s in strs.splitlines(): - s = s.strip() - # skip blank lines/comments - if s and not s.startswith("#"): - yield s - else: - for ss in strs: - for s in yield_lines(ss): - yield s - - -def _remove_md5_fragment(location): - if not location: - return "" - parsed = urllib.parse.urlparse(location) - if parsed[-1].startswith("md5="): - return urllib.parse.urlunparse(parsed[:-1] + ("",)) - return location - - -def _version_from_file(lines): - """ - Given an iterable of lines from a Metadata file, return - the value of the Version field, if present, or None otherwise. - """ - - def is_version_line(line): - return line.lower().startswith("version:") - - version_lines = filter(is_version_line, lines) - line = next(iter(version_lines), "") - _, _, value = line.partition(":") - return safe_version(value.strip()) or None - - -class Distribution: - """Wrap an actual or potential sys.path entry w/metadata""" - - PKG_INFO = "PKG-INFO" - - def __init__( - self, - location=None, - metadata=None, - project_name=None, - version=None, - py_version=PY_MAJOR, - platform=None, - precedence=EGG_DIST, - ): - self.project_name = safe_name(project_name or "Unknown") - if version is not None: - self._version = safe_version(version) - self.py_version = py_version - self.platform = platform - self.location = location - self.precedence = precedence - self._provider = metadata - - @property - def hashcmp(self): - return ( - self.parsed_version, - self.precedence, - self.key, - _remove_md5_fragment(self.location), - self.py_version or "", - self.platform or "", - ) - - def __hash__(self): - return hash(self.hashcmp) - - def __lt__(self, other): - return self.hashcmp < other.hashcmp - - def __le__(self, other): - return self.hashcmp <= other.hashcmp - - def __gt__(self, other): - return self.hashcmp > other.hashcmp - - def __ge__(self, other): - return self.hashcmp >= other.hashcmp - - def __eq__(self, other): - if not isinstance(other, self.__class__): - # It's not a Distribution, so they are not equal - return False - return self.hashcmp == other.hashcmp - - def __ne__(self, other): - return not self == other - - # These properties have to be lazy so that we don't have to load any - # metadata until/unless it's actually needed. (i.e., some distributions - # may not know their name or version without loading PKG-INFO) - - @property - def key(self): - try: - return self._key - except AttributeError: - self._key = key = self.project_name.lower() - return key - - @property - def parsed_version(self): - if not hasattr(self, "_parsed_version"): - self._parsed_version = parse_version(self.version) - - return self._parsed_version - - @property - def version(self): - try: - return self._version - except AttributeError: - version = self._get_version() - if version is None: - path = self._get_metadata_path_for_display(self.PKG_INFO) - msg = ("Missing 'Version:' header and/or {} file at path: {}").format( - self.PKG_INFO, path - ) - raise ValueError(msg, self) - - return version - - @property - def _dep_map(self): - """ - A map of extra to its list of (direct) requirements - for this distribution, including the null extra. - """ - try: - return self.__dep_map - except AttributeError: - self.__dep_map = self._filter_extras(self._build_dep_map()) - return self.__dep_map - - @staticmethod - def _filter_extras(dm): - """ - Given a mapping of extras to dependencies, strip off - environment markers and filter out any dependencies - not matching the markers. - """ - for extra in list(filter(None, dm)): - new_extra = extra - reqs = dm.pop(extra) - new_extra, _, marker = extra.partition(":") - fails_marker = marker and ( - invalid_marker(marker) or not evaluate_marker(marker) - ) - if fails_marker: - reqs = [] - new_extra = safe_extra(new_extra) or None - - dm.setdefault(new_extra, []).extend(reqs) - return dm - - def _build_dep_map(self): - dm = {} - for name in "requires.txt", "depends.txt": - for extra, reqs in split_sections(self._get_metadata(name)): - dm.setdefault(extra, []).extend(parse_requirements(reqs)) - return dm - - def requires(self, extras=()): - """List of Requirements needed for this distro if `extras` are used""" - dm = self._dep_map - deps = [] - deps.extend(dm.get(None, ())) - for ext in extras: - try: - deps.extend(dm[safe_extra(ext)]) - except KeyError: - raise UnknownExtra("%s has no such extra feature %r" % (self, ext)) - return deps - - def _get_metadata_path_for_display(self, name): - """ - Return the path to the given metadata file, if available. - """ - try: - # We need to access _get_metadata_path() on the provider object - # directly rather than through this class's __getattr__() - # since _get_metadata_path() is marked private. - path = self._provider._get_metadata_path(name) - - # Handle exceptions e.g. in case the distribution's metadata - # provider doesn't support _get_metadata_path(). - except Exception: - return "[could not detect]" - - return path - - def _get_metadata(self, name): - if self.has_metadata(name): - for line in self.get_metadata_lines(name): - yield line - - def _get_version(self): - lines = self._get_metadata(self.PKG_INFO) - version = _version_from_file(lines) - - return version - - def __repr__(self): - if self.location: - return "%s (%s)" % (self, self.location) - else: - return str(self) - - def __str__(self): - try: - version = getattr(self, "version", None) - except ValueError: - version = None - version = version or "[unknown version]" - return "%s %s" % (self.project_name, version) - - def __getattr__(self, attr): - """Delegate all unrecognized public attributes to .metadata provider""" - if attr.startswith("_"): - raise AttributeError(attr) - return getattr(self._provider, attr) - - def __dir__(self): - return list( - set(super(Distribution, self).__dir__()) - | set(attr for attr in self._provider.__dir__() if not attr.startswith("_")) - ) - - if not hasattr(object, "__dir__"): - # python 2.7 not supported - del __dir__ - - @property - def extras(self): - return [dep for dep in self._dep_map if dep] - - -class DistInfoDistribution(Distribution): - """ - Wrap an actual or potential sys.path entry - w/metadata, .dist-info style. - """ - - PKG_INFO = "METADATA" - - @property - def _parsed_pkg_info(self): - """Parse and cache metadata""" - try: - return self._pkg_info - except AttributeError: - metadata = self.get_metadata(self.PKG_INFO) - self._pkg_info = email.parser.Parser().parsestr(metadata) - return self._pkg_info - - @property - def _dep_map(self): - try: - return self.__dep_map - except AttributeError: - self.__dep_map = self._compute_dependencies() - return self.__dep_map - - def _compute_dependencies(self): - """Recompute this distribution's dependencies.""" - dm = self.__dep_map = {None: []} - - reqs = [] - # Including any condition expressions - for req in self._parsed_pkg_info.get_all("Requires-Dist") or []: - reqs.extend(parse_requirements(req)) - - def reqs_for_extra(extra): - for req in reqs: - if not req.marker or req.marker.evaluate({"extra": extra}): - yield req - - common = frozenset(reqs_for_extra(None)) - dm[None].extend(common) - - for extra in self._parsed_pkg_info.get_all("Provides-Extra") or []: - s_extra = safe_extra(extra.strip()) - dm[s_extra] = list(frozenset(reqs_for_extra(extra)) - common) - - return dm - - -class RequirementParseError(ValueError): - def __str__(self): - return " ".join(self.args) - - -def parse_requirements(strs): - """Yield ``Requirement`` objects for each specification in `strs` - - `strs` must be a string, or a (possibly-nested) iterable thereof. - """ - # create a steppable iterator, so we can handle \-continuations - lines = iter(yield_lines(strs)) - - for line in lines: - # Drop comments -- a hash without a space may be in a URL. - if " #" in line: - line = line[: line.find(" #")] - # If there is a line continuation, drop it, and append the next line. - if line.endswith("\\"): - line = line[:-2].strip() - try: - line += next(lines) - except StopIteration: - return - yield Requirement(line) - - -class Requirement(packaging.requirements.Requirement): - def __init__(self, requirement_string): - """DO NOT CALL THIS UNDOCUMENTED METHOD; use Requirement.parse()!""" - try: - super(Requirement, self).__init__(requirement_string) - except packaging.requirements.InvalidRequirement as e: - raise RequirementParseError(str(e)) - self.unsafe_name = self.name - project_name = safe_name(self.name) - self.project_name, self.key = project_name, project_name.lower() - self.extras = tuple(map(safe_extra, self.extras)) - self.hashCmp = ( - self.key, - self.url, - self.specifier, - frozenset(self.extras), - str(self.marker) if self.marker else None, - ) - self.__hash = hash(self.hashCmp) - - def __eq__(self, other): - return isinstance(other, Requirement) and self.hashCmp == other.hashCmp - - def __ne__(self, other): - return not self == other - - def __contains__(self, item): - if isinstance(item, Distribution): - if item.key != self.key: - return False - - item = item.version - - # Allow prereleases always in order to match the previous behavior of - # this method. In the future this should be smarter and follow PEP 440 - # more accurately. - return self.specifier.contains(item, prereleases=True) - - def __hash__(self): - return self.__hash - - def __repr__(self): - return "Requirement.parse(%r)" % str(self) - - @staticmethod - def parse(s): - (req,) = parse_requirements(s) - return req - - -def split_sections(s): - """Split a string or iterable thereof into (section, content) pairs - - Each ``section`` is a stripped version of the section header ("[section]") - and each ``content`` is a list of stripped lines excluding blank lines and - comment-only lines. If there are any such lines before the first section - header, they're returned in a first ``section`` of ``None``. - """ - section = None - content = [] - for line in yield_lines(s): - if line.startswith("["): - if line.endswith("]"): - if section or content: - yield section, content - section = line[1:-1].strip() - content = [] - else: - raise ValueError("Invalid section heading", line) - else: - content.append(line) - - # wrap up last segment - yield section, content - - -# Silence the PEP440Warning by default, so that end users don't get hit by it -# randomly just because they use pkg_resources. We want to append the rule -# because we want earlier uses of filterwarnings to take precedence over this -# one. -warnings.filterwarnings("ignore", category=PEP440Warning, append=True) diff --git a/micropip/metadata.py b/micropip/metadata.py new file mode 100644 index 0000000..2241f0e --- /dev/null +++ b/micropip/metadata.py @@ -0,0 +1,129 @@ +""" +This is a stripped down version of pip._vendor.pkg_resources.DistInfoDistribution +""" +import re +import zipfile +from collections.abc import Iterable +from pathlib import Path + +from packaging.requirements import Requirement +from packaging.utils import canonicalize_name + + +def safe_name(name): + """Convert an arbitrary string to a standard distribution name + + Any runs of non-alphanumeric/. characters are replaced with a single '-'. + """ + return re.sub("[^A-Za-z0-9.]+", "-", name) + + +def safe_extra(extra): + """Convert an arbitrary string to a standard 'extra' name + + Any runs of non-alphanumeric characters are replaced with a single '_', + and the result is always lowercased. + """ + return re.sub("[^A-Za-z0-9.-]+", "_", extra).lower() + + +# Vendored from pip +class UnsupportedWheel(Exception): + """Unsupported wheel.""" + + +def wheel_dist_info_dir(source: zipfile.ZipFile, name: str) -> str: + """Returns the name of the contained .dist-info directory. + Raises UnsupportedWheel if not found, >1 found, or it doesn't match the + provided name. + """ + # Zip file path separators must be / + subdirs = {p.split("/", 1)[0] for p in source.namelist()} + + info_dirs = [s for s in subdirs if s.endswith(".dist-info")] + + if not info_dirs: + raise UnsupportedWheel(f".dist-info directory not found in wheel {name!r}") + + if len(info_dirs) > 1: + raise UnsupportedWheel( + "multiple .dist-info directories found in wheel {!r}: {}".format( + name, ", ".join(info_dirs) + ) + ) + + info_dir = info_dirs[0] + + info_dir_name = canonicalize_name(info_dir) + canonical_name = canonicalize_name(name) + if not info_dir_name.startswith(canonical_name): + raise UnsupportedWheel( + f".dist-info directory {info_dir!r} does not start with {canonical_name!r}" + ) + + return info_dir + + +class Metadata: + """ + Represents a metadata file in a wheel + """ + + PKG_INFO = "METADATA" + REQUIRES_DIST = "Requires-Dist:" + PROVIDES_EXTRA = "Provides-Extra:" + + def __init__(self, metadata: Path | zipfile.Path | bytes): + self.metadata: list[str] = [] + + if isinstance(metadata, Path | zipfile.Path): + self.metadata = metadata.read_text(encoding="utf-8").splitlines() + elif isinstance(metadata, bytes): + self.metadata = metadata.decode("utf-8").splitlines() + + self.deps = self._compute_dependencies() + + def _parse_requirement(self, line: str) -> Requirement: + line = line[len(self.REQUIRES_DIST) :] + if " #" in line: + line = line[: line.find(" #")] + + return Requirement(line.strip()) + + def _compute_dependencies(self) -> dict[str | None, frozenset[Requirement]]: + """ + Compute the dependencies of the metadata file + """ + deps: dict[str | None, frozenset[Requirement]] = {} + reqs: list[Requirement] = [] + extras: list[str] = [] + + def reqs_for_extra(extra: str | None) -> Iterable[Requirement]: + environment = {"extra": extra} if extra else None + for req in reqs: + if not req.marker or req.marker.evaluate(environment): + yield req + + for line in self.metadata: + if line.startswith(self.REQUIRES_DIST): + reqs.append(self._parse_requirement(line)) + elif line.startswith(self.PROVIDES_EXTRA): + extras.append(line[len(self.PROVIDES_EXTRA) :].strip()) + + deps[None] = frozenset(reqs_for_extra(None)) + for extra in extras: + deps[safe_extra(extra)] = frozenset(reqs_for_extra(extra)) - deps[None] + + return deps + + def requires(self, extras: Iterable[str] = ()) -> list[Requirement]: + """List of Requirements needed for this distro if `extras` are used""" + deps: list[Requirement] = [] + + deps.extend(self.deps.get(None, ())) + for ext in extras: + try: + deps.extend(self.deps[safe_extra(ext)]) + except KeyError: + raise KeyError(f"Unknown extra {ext!r}") from None + return deps diff --git a/micropip/transaction.py b/micropip/transaction.py index ee95a3a..bbc91e2 100644 --- a/micropip/transaction.py +++ b/micropip/transaction.py @@ -46,7 +46,7 @@ def __post_init__(self): async def gather_requirements( self, - requirements: list[str], + requirements: list[str] | list[Requirement], ) -> None: requirement_promises = [] for requirement in requirements: diff --git a/micropip/wheelinfo.py b/micropip/wheelinfo.py index 8d39e55..e831fdd 100644 --- a/micropip/wheelinfo.py +++ b/micropip/wheelinfo.py @@ -1,11 +1,11 @@ import asyncio import hashlib import json +import zipfile from dataclasses import dataclass from pathlib import Path from typing import IO, Any from urllib.parse import ParseResult, urlparse -from zipfile import ZipFile from packaging.requirements import Requirement from packaging.tags import Tag @@ -16,11 +16,9 @@ get_dynlibs, loadDynlib, loadedPackages, - wheel_dist_info_dir, ) from ._utils import parse_wheel_filename -from .externals.pip._internal.utils.wheel import pkg_resources_distribution_for_wheel -from .externals.pip._vendor.pkg_resources import Distribution +from .metadata import Metadata, safe_name, wheel_dist_info_dir @dataclass @@ -42,16 +40,15 @@ class WheelInfo: # Fields below are only available after downloading the wheel, i.e. after calling `download()`. _data: IO[bytes] | None = None # Wheel file contents. - _dist: Distribution | None = None # pkg_resources.Distribution object. + _metadata: Metadata | None = None # Wheel metadata. _requires: list[Requirement] | None = None # List of requirements. - # Note: `_project_name`` is taken from the wheel metadata, while `name` is taken from the wheel filename or metadata of the package index. - # They are mostly the same, but can be different in some weird cases (e.g. a user manually renaming the wheel file), so just to be safe we store both. - _project_name: str | None = None # Project name. - # Path to the .dist-info directory. This is only available after extracting the wheel, i.e. after calling `extract()`. _dist_info: Path | None = None + def __post_init__(self): + self._project_name = safe_name(self.name) + @classmethod def from_url(cls, url: str) -> "WheelInfo": """Parse wheels URL and extract available metadata @@ -122,24 +119,20 @@ async def download(self, fetch_kwargs: dict[str, Any]): return self._data = await self._fetch_bytes(fetch_kwargs) - with ZipFile(self._data) as zip_file: - self._dist = pkg_resources_distribution_for_wheel( - zip_file, self.name, "???" - ) + with zipfile.ZipFile(self._data) as zf: + metadata_path = wheel_dist_info_dir(zf, self.name) + "/" + Metadata.PKG_INFO + self._metadata = Metadata(zipfile.Path(zf, metadata_path)) - self._project_name = self._dist.project_name - if self._project_name == "UNKNOWN": - self._project_name = self.name - - def requires(self, extras: set[str]) -> list[str]: + def requires(self, extras: set[str]) -> list[Requirement]: """ Get a list of requirements for the wheel. """ - if not self._dist: + if self._metadata is None: raise RuntimeError( - "Micropip internal error: attempted to access wheel 'requires' before downloading it?" + "Micropip internal error: attempted to get requirements before downloading the wheel?" ) - requires = self._dist.requires(extras) + + requires = self._metadata.requires(extras) self._requires = requires return requires @@ -173,7 +166,7 @@ def _validate(self): def _extract(self, target: Path) -> None: assert self._data - with ZipFile(self._data) as zf: + with zipfile.ZipFile(self._data) as zf: zf.extractall(target) self._dist_info = target / wheel_dist_info_dir(zf, self.name) @@ -193,8 +186,7 @@ def _set_installer(self) -> None: "PYODIDE_REQUIRES", json.dumps(sorted(x.name for x in self._requires)) ) - name = self._project_name or self.name - setattr(loadedPackages, name, wheel_source) + setattr(loadedPackages, self._project_name, wheel_source) def _write_dist_info(self, file: str, content: str) -> None: assert self._dist_info diff --git a/tests/conftest.py b/tests/conftest.py index c456b12..cca0c4d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -30,7 +30,7 @@ def pytest_addoption(parser): PYTEST_WHEEL = "pytest-7.2.2-py3-none-any.whl" -def _read_pypi_response(file: Path) -> bytes: +def _read_gzipped_testfile(file: Path) -> bytes: return gzip.decompress(file.read_bytes()) @@ -283,7 +283,7 @@ def _mock_package_index_gen( base = secrets.token_hex(16) for pkg in pkgs: - data = _read_pypi_response(TEST_PYPI_RESPONSE_DIR / f"{pkg}{suffix}") + data = _read_gzipped_testfile(TEST_PYPI_RESPONSE_DIR / f"{pkg}{suffix}") httpserver.expect_request(f"/{base}/{pkg}/").respond_with_data( data, content_type=content_type, diff --git a/tests/test_data/metadata/boto3-1.28.51-py3-none-any.whl.metadata.gz b/tests/test_data/metadata/boto3-1.28.51-py3-none-any.whl.metadata.gz new file mode 100644 index 0000000..2dc5023 Binary files /dev/null and b/tests/test_data/metadata/boto3-1.28.51-py3-none-any.whl.metadata.gz differ diff --git a/tests/test_data/metadata/requests-2.31.0-py3-none-any.whl.metadata.gz b/tests/test_data/metadata/requests-2.31.0-py3-none-any.whl.metadata.gz new file mode 100644 index 0000000..3df6c1f Binary files /dev/null and b/tests/test_data/metadata/requests-2.31.0-py3-none-any.whl.metadata.gz differ diff --git a/tests/test_data/metadata/urllib3-2.0.5-py3-none-any.whl.metadata.gz b/tests/test_data/metadata/urllib3-2.0.5-py3-none-any.whl.metadata.gz new file mode 100644 index 0000000..6467564 Binary files /dev/null and b/tests/test_data/metadata/urllib3-2.0.5-py3-none-any.whl.metadata.gz differ diff --git a/tests/test_metadata.py b/tests/test_metadata.py new file mode 100644 index 0000000..f3679ee --- /dev/null +++ b/tests/test_metadata.py @@ -0,0 +1,91 @@ +from pathlib import Path + +import pytest +from conftest import _read_gzipped_testfile + +TEST_METADATA_DIR = Path(__file__).parent / "test_data" / "metadata" + + +@pytest.mark.parametrize( + "metadata_path, extras, expected", + [ + ( + "boto3-1.28.51-py3-none-any.whl.metadata.gz", + (), + ["botocore", "jmespath", "s3transfer"], + ), + ( + "requests-2.31.0-py3-none-any.whl.metadata.gz", + (), + ["certifi", "urllib3", "charset-normalizer", "idna"], + ), + ( + "requests-2.31.0-py3-none-any.whl.metadata.gz", + ( + "socks", + "use_chardet_on_py3", + ), + ["certifi", "urllib3", "charset-normalizer", "idna", "PySocks", "chardet"], + ), + ], +) +def test_Metadata_requires(metadata_path, extras, expected): + from micropip.metadata import Metadata + + metadata = _read_gzipped_testfile(TEST_METADATA_DIR / metadata_path) + m = Metadata(metadata) + + reqs = m.requires(extras) + reqs_set = set([r.name for r in reqs]) + assert reqs_set == set(expected) + + +def test_Metadata_extra_invalid(): + from micropip.metadata import Metadata + + metadata = _read_gzipped_testfile( + TEST_METADATA_DIR / "boto3-1.28.51-py3-none-any.whl.metadata.gz" + ) + m = Metadata(metadata) + extras = ("invalid",) + + with pytest.raises(KeyError, match="Unknown extra"): + m.requires(extras) + + +def test_Metadata_marker(): + from micropip.metadata import Metadata + + metadata = _read_gzipped_testfile( + TEST_METADATA_DIR / "urllib3-2.0.5-py3-none-any.whl.metadata.gz" + ) + m = Metadata(metadata) + extras = ("brotli", "zstd") + + reqs = m.requires(extras) + markers = {r.name: str(r.marker) for r in reqs} + + assert "brotli" in markers + assert ( + markers["brotli"] + == 'platform_python_implementation == "CPython" and extra == "brotli"' + ) + + assert "zstandard" in markers + assert markers["zstandard"] == 'extra == "zstd"' + + +def test_Metadata_extra_of_requires(): + from micropip.metadata import Metadata + + metadata = _read_gzipped_testfile( + TEST_METADATA_DIR / "boto3-1.28.51-py3-none-any.whl.metadata.gz" + ) + m = Metadata(metadata) + extras = ("crt",) + + reqs = m.requires(extras) + reqs_set = {r.name: r.extras for r in reqs} + + assert "botocore" in reqs_set + assert reqs_set["botocore"] == {"crt"} diff --git a/tests/test_package_index.py b/tests/test_package_index.py index a0e6b0d..507a7c0 100644 --- a/tests/test_package_index.py +++ b/tests/test_package_index.py @@ -1,5 +1,5 @@ import pytest -from conftest import TEST_PYPI_RESPONSE_DIR, _read_pypi_response +from conftest import TEST_PYPI_RESPONSE_DIR, _read_gzipped_testfile import micropip._commands.index_urls as index_urls import micropip.package_index as package_index @@ -25,7 +25,7 @@ def _check_project_info(project_info: package_index.ProjectInfo): ) def test_project_info_from_json(name): test_file = TEST_PYPI_RESPONSE_DIR / f"{name}_json.json.gz" - test_data = _read_pypi_response(test_file) + test_data = _read_gzipped_testfile(test_file) info = package_index.ProjectInfo.from_json_api(test_data) _check_project_info(info) @@ -36,7 +36,7 @@ def test_project_info_from_json(name): ) def test_project_info_from_simple_json(name): test_file = TEST_PYPI_RESPONSE_DIR / f"{name}_simple.json.gz" - test_data = _read_pypi_response(test_file) + test_data = _read_gzipped_testfile(test_file) info = package_index.ProjectInfo.from_simple_json_api(test_data) _check_project_info(info) @@ -47,7 +47,7 @@ def test_project_info_from_simple_json(name): ) def test_project_info_from_simple_html(name): test_file = TEST_PYPI_RESPONSE_DIR / f"{name}_simple.html.gz" - test_data = _read_pypi_response(test_file) + test_data = _read_gzipped_testfile(test_file) info = package_index.ProjectInfo.from_simple_html_api( test_data.decode("utf-8"), name @@ -64,8 +64,8 @@ def test_project_info_equal(name): test_file_json = TEST_PYPI_RESPONSE_DIR / f"{name}_json.json.gz" test_file_simple_json = TEST_PYPI_RESPONSE_DIR / f"{name}_simple.json.gz" - test_data_json = _read_pypi_response(test_file_json) - test_data_simple_json = _read_pypi_response(test_file_simple_json) + test_data_json = _read_gzipped_testfile(test_file_json) + test_data_simple_json = _read_gzipped_testfile(test_file_simple_json) index_json = package_index.ProjectInfo.from_json_api(test_data_json) index_simple_json = package_index.ProjectInfo.from_simple_json_api( diff --git a/tests/test_wheelinfo.py b/tests/test_wheelinfo.py index 0688fce..a5af701 100644 --- a/tests/test_wheelinfo.py +++ b/tests/test_wheelinfo.py @@ -103,13 +103,11 @@ def test_install(): async def test_download(dummy_wheel_url): wheel = WheelInfo.from_url(dummy_wheel_url) - assert wheel._project_name is None - assert wheel._dist is None + assert wheel._metadata is None await wheel.download({}) - assert wheel._project_name == "pytest" - assert wheel._dist is not None + assert wheel._metadata is not None @pytest.mark.asyncio