Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Downloader support resume from connection reset #9422

Merged
merged 11 commits into from
Aug 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,19 @@ values, usage instructions and warnings.

Use parallel execution when using the new (`>=1.1.0`) installer.

### `requests.max-retries`

**Type**: `int`

**Default**: `0`

**Environment Variable**: `POETRY_REQUESTS_MAX_RETRIES`

*Introduced in 1.9.0*
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reminder to myself: The next version might be 2.0.0.


Set the maximum number of retries in an unstable network.
This setting has no effect if the server does not support HTTP range requests.

### `solver.lazy-wheel`

**Type**: `boolean`
Expand Down
8 changes: 7 additions & 1 deletion src/poetry/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,9 @@ class Config:
"experimental": {
"system-git-client": False,
},
"requests": {
"max-retries": 0,
},
"installer": {
"parallel": True,
"max-workers": None,
Expand Down Expand Up @@ -316,7 +319,10 @@ def _get_normalizer(name: str) -> Callable[[str], Any]:
if name == "virtualenvs.path":
return lambda val: str(Path(val))

if name == "installer.max-workers":
if name in {
"installer.max-workers",
"requests.max-retries",
}:
return int_normalizer

if name in ["installer.no-binary", "installer.only-binary"]:
Expand Down
1 change: 1 addition & 0 deletions src/poetry/console/commands/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def unique_config_values(self) -> dict[str, tuple[Any, Any]]:
"virtualenvs.prefer-active-python": (boolean_validator, boolean_normalizer),
"virtualenvs.prompt": (str, str),
"experimental.system-git-client": (boolean_validator, boolean_normalizer),
"requests.max-retries": (lambda val: int(val) >= 0, int_normalizer),
"installer.parallel": (boolean_validator, boolean_normalizer),
"installer.max-workers": (lambda val: int(val) > 0, int_normalizer),
"installer.no-binary": (
Expand Down
5 changes: 4 additions & 1 deletion src/poetry/installation/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ def __init__(
# Cache whether decorated output is supported.
# https://github.com/python-poetry/cleo/issues/423
self._decorated_output: bool = self._io.output.is_decorated()
self._max_retries = config.get("requests.max-retries", 0)

@property
def installations_count(self) -> int:
Expand Down Expand Up @@ -719,7 +720,9 @@ def _download_archive(
url: str,
dest: Path,
) -> None:
downloader = Downloader(url, dest, self._authenticator)
downloader = Downloader(
url, dest, self._authenticator, max_retries=self._max_retries
)
wheel_size = downloader.total_size

operation_message = self.get_operation_message(operation)
Expand Down
7 changes: 6 additions & 1 deletion src/poetry/packages/direct_origin.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from poetry.core.packages.utils.link import Link

from poetry.config.config import Config
from poetry.inspection.info import PackageInfo
from poetry.inspection.info import PackageInfoError
from poetry.utils.authenticator import get_default_authenticator
Expand Down Expand Up @@ -57,6 +58,8 @@ def _get_package_from_git(
class DirectOrigin:
def __init__(self, artifact_cache: ArtifactCache) -> None:
self._artifact_cache = artifact_cache
config = Config.create()
self._max_retries = config.get("requests.max-retries", 0)
self._authenticator = get_default_authenticator()

@classmethod
Expand All @@ -77,7 +80,9 @@ def get_package_from_directory(cls, directory: Path) -> Package:
return PackageInfo.from_directory(path=directory).to_package(root_dir=directory)

def _download_file(self, url: str, dest: Path) -> None:
download_file(url, dest, session=self._authenticator)
download_file(
url, dest, session=self._authenticator, max_retries=self._max_retries
)

def get_package_from_url(self, url: str) -> Package:
link = Link(url)
Expand Down
7 changes: 6 additions & 1 deletion src/poetry/repositories/http_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def __init__(
self.get_page = functools.lru_cache(maxsize=None)(self._get_page)

self._lazy_wheel = config.get("solver.lazy-wheel", True)
self._max_retries = config.get("requests.max-retries", 0)
# We are tracking if a domain supports range requests or not to avoid
# unnecessary requests.
# ATTENTION: A domain might support range requests only for some files, so the
Expand Down Expand Up @@ -95,7 +96,11 @@ def _download(
self, url: str, dest: Path, *, raise_accepts_ranges: bool = False
) -> None:
return download_file(
url, dest, session=self.session, raise_accepts_ranges=raise_accepts_ranges
url,
dest,
session=self.session,
raise_accepts_ranges=raise_accepts_ranges,
max_retries=self._max_retries,
)

@contextmanager
Expand Down
55 changes: 45 additions & 10 deletions src/poetry/utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
from typing import Any
from typing import overload

from requests.exceptions import ChunkedEncodingError
from requests.exceptions import ConnectionError
from requests.utils import atomic_open

from poetry.utils.authenticator import get_default_authenticator
Expand All @@ -33,6 +35,7 @@
from types import TracebackType

from poetry.core.packages.package import Package
from requests import Response
from requests import Session

from poetry.utils.authenticator import Authenticator
Expand Down Expand Up @@ -133,10 +136,11 @@ def download_file(
session: Authenticator | Session | None = None,
chunk_size: int = 1024,
raise_accepts_ranges: bool = False,
max_retries: int = 0,
) -> None:
from poetry.puzzle.provider import Indicator

downloader = Downloader(url, dest, session)
downloader = Downloader(url, dest, session, max_retries=max_retries)

if raise_accepts_ranges and downloader.accepts_ranges:
raise HTTPRangeRequestSupported(f"URL {url} supports range requests.")
Expand Down Expand Up @@ -168,16 +172,13 @@ def __init__(
url: str,
dest: Path,
session: Authenticator | Session | None = None,
max_retries: int = 0,
):
self._dest = dest

session = session or get_default_authenticator()
headers = {"Accept-Encoding": "Identity"}

self._response = session.get(
url, stream=True, headers=headers, timeout=REQUESTS_TIMEOUT
)
self._response.raise_for_status()
self._max_retries = max_retries
self._session = session or get_default_authenticator()
self._url = url
self._response = self._get()

@cached_property
def accepts_ranges(self) -> bool:
Expand All @@ -191,10 +192,44 @@ def total_size(self) -> int:
total_size = int(self._response.headers["Content-Length"])
return total_size

def _get(self, start: int = 0) -> Response:
headers = {"Accept-Encoding": "Identity"}
if start > 0:
headers["Range"] = f"bytes={start}-"
response = self._session.get(
self._url, stream=True, headers=headers, timeout=REQUESTS_TIMEOUT
)
response.raise_for_status()
return response

def _iter_content_with_resume(self, chunk_size: int) -> Iterator[bytes]:
fetched_size = 0
retries = 0
while True:
try:
for chunk in self._response.iter_content(chunk_size=chunk_size):
yield chunk
fetched_size += len(chunk)
except (ChunkedEncodingError, ConnectionError):
if (
retries < self._max_retries
and self.accepts_ranges
and fetched_size > 0
):
# only retry if server supports byte ranges
# and we have fetched at least one chunk
# otherwise, we should just fail
retries += 1
self._response = self._get(fetched_size)
continue
raise
else:
break

def download_with_progress(self, chunk_size: int = 1024) -> Iterator[int]:
fetched_size = 0
with atomic_open(self._dest) as f:
for chunk in self._response.iter_content(chunk_size=chunk_size):
for chunk in self._iter_content_with_resume(chunk_size=chunk_size):
if chunk:
f.write(chunk)
fetched_size += len(chunk)
Expand Down
7 changes: 6 additions & 1 deletion tests/config/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,12 @@ def get_options_based_on_normalizer(normalizer: Normalizer) -> Iterator[str]:


@pytest.mark.parametrize(
("name", "value"), [("installer.parallel", True), ("virtualenvs.create", True)]
("name", "value"),
[
("installer.parallel", True),
("virtualenvs.create", True),
("requests.max-retries", 0),
],
)
def test_config_get_default_value(config: Config, name: str, value: bool) -> None:
assert config.get(name) is value
Expand Down
6 changes: 6 additions & 0 deletions tests/console/commands/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def test_list_displays_default_value_if_not_set(
installer.only-binary = null
installer.parallel = true
keyring.enabled = true
requests.max-retries = 0
solver.lazy-wheel = true
virtualenvs.create = true
virtualenvs.in-project = null
Expand Down Expand Up @@ -92,6 +93,7 @@ def test_list_displays_set_get_setting(
installer.only-binary = null
installer.parallel = true
keyring.enabled = true
requests.max-retries = 0
solver.lazy-wheel = true
virtualenvs.create = false
virtualenvs.in-project = null
Expand Down Expand Up @@ -145,6 +147,7 @@ def test_unset_setting(
installer.only-binary = null
installer.parallel = true
keyring.enabled = true
requests.max-retries = 0
solver.lazy-wheel = true
virtualenvs.create = true
virtualenvs.in-project = null
Expand Down Expand Up @@ -176,6 +179,7 @@ def test_unset_repo_setting(
installer.only-binary = null
installer.parallel = true
keyring.enabled = true
requests.max-retries = 0
solver.lazy-wheel = true
virtualenvs.create = true
virtualenvs.in-project = null
Expand Down Expand Up @@ -305,6 +309,7 @@ def test_list_displays_set_get_local_setting(
installer.only-binary = null
installer.parallel = true
keyring.enabled = true
requests.max-retries = 0
solver.lazy-wheel = true
virtualenvs.create = false
virtualenvs.in-project = null
Expand Down Expand Up @@ -345,6 +350,7 @@ def test_list_must_not_display_sources_from_pyproject_toml(
installer.parallel = true
keyring.enabled = true
repositories.foo.url = "https://foo.bar/simple/"
requests.max-retries = 0
solver.lazy-wheel = true
virtualenvs.create = true
virtualenvs.in-project = null
Expand Down
6 changes: 5 additions & 1 deletion tests/repositories/test_http_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,11 @@ def test_get_info_from_wheel(
else:
mock_metadata_from_wheel_url.assert_not_called()
mock_download.assert_called_once_with(
url, mocker.ANY, session=repo.session, raise_accepts_ranges=lazy_wheel
url,
mocker.ANY,
session=repo.session,
raise_accepts_ranges=lazy_wheel,
max_retries=0,
)
if lazy_wheel:
assert repo._supports_range_requests[domain] is False
Expand Down
74 changes: 74 additions & 0 deletions tests/utils/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import pytest

from poetry.core.utils.helpers import parse_requires
from requests.exceptions import ChunkedEncodingError

from poetry.utils.helpers import Downloader
from poetry.utils.helpers import HTTPRangeRequestSupported
Expand Down Expand Up @@ -150,6 +151,79 @@ def test_download_file(
assert http.last_request().headers["Accept-Encoding"] == "Identity"


def test_download_file_recover_from_error(
http: type[httpretty], fixture_dir: FixtureDirGetter, tmp_path: Path
) -> None:
file_path = fixture_dir("distributions") / "demo-0.1.0.tar.gz"
file_body = file_path.read_bytes()
file_length = len(file_body)
url = "https://foo.com/demo-0.1.0.tar.gz"

def handle_request(
request: HTTPrettyRequest, uri: str, response_headers: dict[str, Any]
) -> tuple[int, dict[str, Any], bytes]:
if request.headers.get("Range") is None:
response_headers["Content-Length"] = str(file_length)
response_headers["Accept-Ranges"] = "bytes"
return 200, response_headers, file_body[: file_length // 2]
else:
start = int(
request.headers.get("Range", "bytes=0-").split("=")[1].split("-")[0]
)
return 206, response_headers, file_body[start:]

http.register_uri(http.GET, url, body=handle_request)
dest = tmp_path / "demo-0.1.0.tar.gz"

download_file(url, dest, chunk_size=file_length // 2, max_retries=1)

expect_sha_256 = "9fa123ad707a5c6c944743bf3e11a0e80d86cb518d3cf25320866ca3ef43e2ad"
assert get_file_hash(dest) == expect_sha_256
assert http.last_request().headers["Accept-Encoding"] == "Identity"
assert http.last_request().headers["Range"] == f"bytes={file_length // 2}-"


def test_download_file_fail_when_no_range(
http: type[httpretty], fixture_dir: FixtureDirGetter, tmp_path: Path
) -> None:
file_path = fixture_dir("distributions") / "demo-0.1.0.tar.gz"
file_body = file_path.read_bytes()
file_length = len(file_body)
url = "https://foo.com/demo-0.1.0.tar.gz"

def handle_request(
request: HTTPrettyRequest, uri: str, response_headers: dict[str, Any]
) -> tuple[int, dict[str, Any], bytes]:
response_headers["Content-Length"] = str(file_length)
return 200, response_headers, file_body[: file_length // 2]

http.register_uri(http.GET, url, body=handle_request)
dest = tmp_path / "demo-0.1.0.tar.gz"
with pytest.raises(ChunkedEncodingError):
download_file(url, dest, chunk_size=file_length // 2, max_retries=1)


def test_download_file_fail_when_first_chunk_failed(
http: type[httpretty], fixture_dir: FixtureDirGetter, tmp_path: Path
) -> None:
file_path = fixture_dir("distributions") / "demo-0.1.0.tar.gz"
file_body = file_path.read_bytes()
file_length = len(file_body)
url = "https://foo.com/demo-0.1.0.tar.gz"

def handle_request(
request: HTTPrettyRequest, uri: str, response_headers: dict[str, Any]
) -> tuple[int, dict[str, Any], bytes]:
response_headers["Content-Length"] = str(file_length)
response_headers["Accept-Ranges"] = "bytes"
return 200, response_headers, file_body[: file_length // 2]

http.register_uri(http.GET, url, body=handle_request)
dest = tmp_path / "demo-0.1.0.tar.gz"
with pytest.raises(ChunkedEncodingError):
download_file(url, dest, chunk_size=file_length, max_retries=1)


@pytest.mark.parametrize(
"hash_types,expected",
[
Expand Down
Loading