Skip to content

Commit

Permalink
MAINT Store data in bytes not io.BytesIO (#91)
Browse files Browse the repository at this point in the history
* Store data in bytes instead of io

* Fix lint

* Fix typo

* Address reviews

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix test package config

* Use normalized name

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
ryanking13 and pre-commit-ci[bot] committed Dec 28, 2023
1 parent f975747 commit 48ccefc
Show file tree
Hide file tree
Showing 8 changed files with 81 additions and 88 deletions.
14 changes: 6 additions & 8 deletions micropip/_compat_in_pyodide.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from io import BytesIO
from typing import IO
from pathlib import Path
from urllib.parse import urlparse

from pyodide._package_loader import get_dynlibs
Expand All @@ -20,15 +19,14 @@
# Otherwise, this is pytest test collection so let it go.


async def fetch_bytes(url: str, kwargs: dict[str, str]) -> IO[bytes]:
async def fetch_bytes(url: str, kwargs: dict[str, str]) -> bytes:
parsed_url = urlparse(url)
if parsed_url.scheme == "emfs":
return open(parsed_url.path, "rb")
return Path(parsed_url.path).read_bytes()
if parsed_url.scheme == "file":
result_bytes = (await loadBinaryFile(parsed_url.path)).to_bytes()
else:
result_bytes = await (await pyfetch(url, **kwargs)).bytes()
return BytesIO(result_bytes)
return (await loadBinaryFile(parsed_url.path)).to_bytes()

return await (await pyfetch(url, **kwargs)).bytes()


async def fetch_string_and_headers(
Expand Down
6 changes: 2 additions & 4 deletions micropip/_compat_not_in_pyodide.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import re
from io import BytesIO
from pathlib import Path
from typing import IO, Any

Expand All @@ -20,9 +19,8 @@ def _fetch(url: str, kwargs: dict[str, Any]) -> addinfourl:
return urlopen(Request(url, **kwargs))


async def fetch_bytes(url: str, kwargs: dict[str, Any]) -> IO[bytes]:
response = _fetch(url, kwargs=kwargs)
return BytesIO(response.read())
async def fetch_bytes(url: str, kwargs: dict[str, Any]) -> bytes:
return _fetch(url, kwargs=kwargs).read()


async def fetch_string_and_headers(
Expand Down
46 changes: 20 additions & 26 deletions micropip/wheelinfo.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import asyncio
import hashlib
import io
import json
import zipfile
from dataclasses import dataclass
from pathlib import Path
from typing import IO, Any
from typing import Any
from urllib.parse import ParseResult, urlparse

from packaging.requirements import Requirement
Expand Down Expand Up @@ -39,7 +40,7 @@ class WheelInfo:

# Fields below are only available after downloading the wheel, i.e. after calling `download()`.

_data: IO[bytes] | None = None # Wheel file contents.
_data: bytes | None = None # Wheel file contents.
_metadata: Metadata | None = None # Wheel metadata.
_requires: list[Requirement] | None = None # List of requirements.

Expand Down Expand Up @@ -109,7 +110,7 @@ async def install(self, target: Path) -> None:
raise RuntimeError(
"Micropip internal error: attempted to install wheel before downloading it?"
)
self._validate()
_validate_sha256_checksum(self._data, self.sha256)
self._extract(target)
await self._load_libraries(target)
self._set_installer()
Expand All @@ -119,7 +120,7 @@ async def download(self, fetch_kwargs: dict[str, Any]):
return

self._data = await self._fetch_bytes(fetch_kwargs)
with zipfile.ZipFile(self._data) as zf:
with zipfile.ZipFile(io.BytesIO(self._data)) as zf:
metadata_path = wheel_dist_info_dir(zf, self.name) + "/" + Metadata.PKG_INFO
self._metadata = Metadata(zipfile.Path(zf, metadata_path))

Expand Down Expand Up @@ -153,20 +154,9 @@ async def _fetch_bytes(self, fetch_kwargs: dict[str, Any]):
"Check if the server is sending the correct 'Access-Control-Allow-Origin' header."
) from e

def _validate(self):
if self.sha256 is None:
# No checksums available, e.g. because installing
# from a different location than PyPI.
return

assert self._data
sha256_actual = _generate_package_hash(self._data)
if sha256_actual != self.sha256:
raise ValueError("Contents don't match hash")

def _extract(self, target: Path) -> None:
assert self._data
with zipfile.ZipFile(self._data) as zf:
with zipfile.ZipFile(io.BytesIO(self._data)) as zf:
zf.extractall(target)
self._dist_info = target / wheel_dist_info_dir(zf, self.name)

Expand Down Expand Up @@ -198,16 +188,20 @@ async def _load_libraries(self, target: Path) -> None:
TODO: integrate with pyodide's dynamic library loading mechanism.
"""
assert self._data
dynlibs = get_dynlibs(self._data, ".whl", target)
dynlibs = get_dynlibs(io.BytesIO(self._data), ".whl", target)
await asyncio.gather(*map(lambda dynlib: loadDynlib(dynlib, False), dynlibs))


def _generate_package_hash(data: IO[bytes]) -> str:
"""
Generate a SHA256 hash of the package data.
"""
sha256_hash = hashlib.sha256()
data.seek(0)
while chunk := data.read(4096):
sha256_hash.update(chunk)
return sha256_hash.hexdigest()
def _validate_sha256_checksum(data: bytes, expected: str | None = None) -> None:
if expected is None:
# No checksums available, e.g. because installing
# from a different location than PyPI.
return

actual = _generate_package_hash(data)
if actual != expected:
raise RuntimeError(f"Invalid checksum: expected {expected}, got {actual}")


def _generate_package_hash(data: bytes) -> str:
return hashlib.sha256(data).hexdigest()
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ def write_file(filename, contents):

tmp.seek(0)

return tmp
return tmp.read()


@pytest.fixture
Expand Down
4 changes: 2 additions & 2 deletions tests/test_data/test_wheel_uninstall/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
[project]
name = "test_wheel_uninstall"
name = "test-wheel-uninstall"
description = "Test wheel uninstall"
requires-python = ">=3.10"
version = "1.0.0"

[tool.setuptools]
packages = ["deep", "deep.deep", "shallow", "test_wheel_uninstall"]
packages = ["deep", "deep.deep", "shallow", "test_wheel_uninstall", "deep.data"]
py-modules = ["top_level"]

[tool.setuptools.package-data]
Expand Down
4 changes: 1 addition & 3 deletions tests/test_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,8 +370,6 @@ async def run_test(selenium, url, name, version):

@pytest.mark.asyncio
async def test_custom_index_urls(mock_package_index_json_api, monkeypatch):
from io import BytesIO

mock_server_fake_package = mock_package_index_json_api(
pkgs=["fake-pkg-micropip-test"]
)
Expand All @@ -381,7 +379,7 @@ async def test_custom_index_urls(mock_package_index_json_api, monkeypatch):
async def _mock_fetch_bytes(url, *args):
nonlocal _wheel_url
_wheel_url = url
return BytesIO(b"fake wheel")
return b"fake wheel"

from micropip import wheelinfo

Expand Down
70 changes: 48 additions & 22 deletions tests/test_uninstall.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
import pytest
from pytest_pyodide import run_in_pyodide, spawn_web_server
from conftest import SNOWBALL_WHEEL, TEST_WHEEL_DIR
from packaging.utils import parse_wheel_filename
from packaging.utils import parse_wheel_filename, canonicalize_name

TEST_PACKAGE_NAME = "test_wheel_uninstall"
TEST_PACKAGE_NAME_NORMALIZED = canonicalize_name(TEST_PACKAGE_NAME)


@pytest.fixture(scope="module")
Expand All @@ -19,15 +20,15 @@ def test_wheel_url(test_wheel_path):

def test_basic(selenium_standalone_micropip, test_wheel_url):
@run_in_pyodide()
async def run(selenium, pkg_name, wheel_url):
async def run(selenium, pkg_name, pkg_name_normalized, wheel_url):
import importlib.metadata
import sys

import micropip

await micropip.install(wheel_url)

assert pkg_name in micropip.list()
assert pkg_name_normalized in micropip.list()
assert pkg_name not in sys.modules

__import__(pkg_name)
Expand All @@ -52,7 +53,12 @@ async def run(selenium, pkg_name, wheel_url):
# 3. Check that the module is not available with micropip.list()
assert pkg_name not in micropip.list()

run(selenium_standalone_micropip, TEST_PACKAGE_NAME, test_wheel_url)
run(
selenium_standalone_micropip,
TEST_PACKAGE_NAME,
TEST_PACKAGE_NAME_NORMALIZED,
test_wheel_url,
)


def test_files(selenium_standalone_micropip, test_wheel_url):
Expand All @@ -61,13 +67,13 @@ def test_files(selenium_standalone_micropip, test_wheel_url):
"""

@run_in_pyodide()
async def run(selenium, pkg_name, wheel_url):
async def run(selenium, pkg_name, pkg_name_normalized, wheel_url):
import importlib.metadata

import micropip

await micropip.install(wheel_url)
assert pkg_name in micropip.list()
assert pkg_name_normalized in micropip.list()

dist = importlib.metadata.distribution(pkg_name)
files = dist.files
Expand All @@ -86,7 +92,12 @@ async def run(selenium, pkg_name, wheel_url):

assert not dist._path.is_dir(), f"{dist._path} still exists after removal"

run(selenium_standalone_micropip, TEST_PACKAGE_NAME, test_wheel_url)
run(
selenium_standalone_micropip,
TEST_PACKAGE_NAME,
TEST_PACKAGE_NAME_NORMALIZED,
test_wheel_url,
)


def test_install_again(selenium_standalone_micropip, test_wheel_url):
Expand All @@ -95,20 +106,20 @@ def test_install_again(selenium_standalone_micropip, test_wheel_url):
"""

@run_in_pyodide()
async def run(selenium, pkg_name, wheel_url):
async def run(selenium, pkg_name, pkg_name_normalized, wheel_url):
import sys

import micropip

await micropip.install(wheel_url)

assert pkg_name in micropip.list()
assert pkg_name_normalized in micropip.list()

__import__(pkg_name)

micropip.uninstall(pkg_name)

assert pkg_name not in micropip.list()
assert pkg_name_normalized not in micropip.list()

del sys.modules[pkg_name]

Expand All @@ -121,10 +132,15 @@ async def run(selenium, pkg_name, wheel_url):

await micropip.install(wheel_url)

assert pkg_name in micropip.list()
assert pkg_name_normalized in micropip.list()
__import__(pkg_name)

run(selenium_standalone_micropip, TEST_PACKAGE_NAME, test_wheel_url)
run(
selenium_standalone_micropip,
TEST_PACKAGE_NAME,
TEST_PACKAGE_NAME_NORMALIZED,
test_wheel_url,
)


def test_warning_not_installed(selenium_standalone_micropip):
Expand Down Expand Up @@ -156,7 +172,7 @@ def test_warning_file_removed(selenium_standalone_micropip, test_wheel_url):
"""

@run_in_pyodide()
async def run(selenium, pkg_name, wheel_url):
async def run(selenium, pkg_name, pkg_name_normalized, wheel_url):
from importlib.metadata import distribution
import micropip
import contextlib
Expand All @@ -165,17 +181,17 @@ async def run(selenium, pkg_name, wheel_url):
with io.StringIO() as buf, contextlib.redirect_stdout(buf):
await micropip.install(wheel_url)

assert pkg_name in micropip.list()
assert pkg_name_normalized in micropip.list()

dist = distribution(pkg_name)
dist = distribution(pkg_name_normalized)
files = dist.files
file1 = files[0]
file2 = files[1]

file1.locate().unlink()
file2.locate().unlink()

micropip.uninstall(pkg_name)
micropip.uninstall(pkg_name_normalized)

captured = buf.getvalue()
logs = captured.strip().split("\n")
Expand All @@ -184,7 +200,12 @@ async def run(selenium, pkg_name, wheel_url):
assert "does not exist" in logs[-1]
assert "does not exist" in logs[-2]

run(selenium_standalone_micropip, TEST_PACKAGE_NAME, test_wheel_url)
run(
selenium_standalone_micropip,
TEST_PACKAGE_NAME,
TEST_PACKAGE_NAME_NORMALIZED,
test_wheel_url,
)


def test_warning_remaining_file(selenium_standalone_micropip, test_wheel_url):
Expand All @@ -193,28 +214,33 @@ def test_warning_remaining_file(selenium_standalone_micropip, test_wheel_url):
"""

@run_in_pyodide()
async def run(selenium, pkg_name, wheel_url):
async def run(selenium, pkg_name, pkg_name_normalized, wheel_url):
from importlib.metadata import distribution
import micropip
import contextlib
import io

with io.StringIO() as buf, contextlib.redirect_stdout(buf):
await micropip.install(wheel_url)
assert pkg_name in micropip.list()
assert pkg_name_normalized in micropip.list()

pkg_dir = distribution(pkg_name)._path.parent / "deep"
pkg_dir = distribution(pkg_name_normalized)._path.parent / "deep"
(pkg_dir / "extra-file.txt").touch()

micropip.uninstall(pkg_name)
micropip.uninstall(pkg_name_normalized)

captured = buf.getvalue()
logs = captured.strip().split("\n")

assert len(logs) == 1
assert "is not empty after uninstallation" in logs[0]

run(selenium_standalone_micropip, TEST_PACKAGE_NAME, test_wheel_url)
run(
selenium_standalone_micropip,
TEST_PACKAGE_NAME,
TEST_PACKAGE_NAME_NORMALIZED,
test_wheel_url,
)


def test_pyodide_repodata(selenium_standalone_micropip):
Expand Down
Loading

0 comments on commit 48ccefc

Please sign in to comment.