Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Range requests #762

Merged
merged 6 commits into from
Jun 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ Write the date in place of the "Unreleased" in the case a new version is release

## Unreleased

### Added
- Support partial download of an asset using the
[HTTP `Range` Header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Range).

### Fixed
- When authenticated as a Service Principal, display the SP's uuid in
the client Context repr.
Expand Down
45 changes: 44 additions & 1 deletion tiled/_tests/test_asset_access.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
import hashlib
from pathlib import Path

import pandas
import pytest
from starlette.status import HTTP_403_FORBIDDEN
from starlette.status import (
HTTP_400_BAD_REQUEST,
HTTP_403_FORBIDDEN,
HTTP_416_REQUESTED_RANGE_NOT_SATISFIABLE,
)

from ..catalog import in_memory
from ..client import Context, from_context
Expand Down Expand Up @@ -66,6 +71,44 @@ def test_raw_export(client, tmpdir):
assert orig_hashes == exported_hashes


def test_asset_range_request(client, tmpdir):
"Access part of an asset using an HTTP Range header."
df = pandas.DataFrame({"A": [1, 2, 3], "B": [4.0, 5.0, 6.0]})
client.write_dataframe(df, key="x")
# Fetch the first byte.
first_byte_response = client.context.http_client.get(
"/api/v1/asset/bytes/x?id=1",
headers={"Range": "bytes=0-0"},
)
assert first_byte_response.content == b"P"
# Fetch the first two bytes.
first_two_bytes_response = client.context.http_client.get(
"/api/v1/asset/bytes/x?id=1",
headers={"Range": "bytes=0-1"},
)
assert first_two_bytes_response.content == b"PA"
# Fetch the second two bytes.
second_two_bytes_response = client.context.http_client.get(
"/api/v1/asset/bytes/x?id=1",
headers={"Range": "bytes=2-3"},
)
assert second_two_bytes_response.content == b"R1"
# Request outside of range
out_of_range_response = client.context.http_client.get(
"/api/v1/asset/bytes/x?id=1",
headers={"Range": "bytes=1000000-100000000"},
)
with fail_with_status_code(HTTP_416_REQUESTED_RANGE_NOT_SATISFIABLE):
out_of_range_response.raise_for_status()
# Request malformed range
malformed_response = client.context.http_client.get(
"/api/v1/asset/bytes/x?id=1",
headers={"Range": "bytes=abc"},
)
with fail_with_status_code(HTTP_400_BAD_REQUEST):
malformed_response.raise_for_status()


def test_get_asset_filepaths(client):
"Smoke test get_asset_filepaths."
client.write_array([1, 2, 3], key="x")
Expand Down
105 changes: 105 additions & 0 deletions tiled/server/file_response_with_range.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# This is a variation on starlette's FileRespones that adds support for the
# 'Range' HTTP header.

# It is adapted from a closed PR in starlette which was reviewed by a core
# starlette maintainer but put aside for now in favor of other priorities in
# starlette development. Thus, we implement it here in tiled. If in the future
# starlette adds support upstream, we should consider refactoring to use that.

# Ref: https://github.com/encode/starlette/pull/1999
import os
import stat
import typing

import anyio
from starlette.responses import (
FileResponse,
Receive,
Scope,
Send,
formatdate,
md5_hexdigest,
)
from starlette.status import HTTP_200_OK, HTTP_206_PARTIAL_CONTENT


class FileResponseWithRange(FileResponse):
def __init__(
self,
path: typing.Union[str, "os.PathLike[str]"],
status_code: int = HTTP_200_OK,
*args,
range: typing.Optional[typing.Tuple[int, int]] = None,
**kwargs,
):
if (range is not None) and (status_code != HTTP_206_PARTIAL_CONTENT):
raise RuntimeError(
f"Range requests must have a {HTTP_206_PARTIAL_CONTENT} status code."
)
self.range = range
super().__init__(path, status_code, *args, **kwargs)

def set_stat_headers(self, stat_result: os.stat_result) -> None:
content_length = str(stat_result.st_size)
size = str(stat_result.st_size)
last_modified = formatdate(stat_result.st_mtime, usegmt=True)
etag_base = str(stat_result.st_mtime) + "-" + str(stat_result.st_size)
if self.range is not None:
start, end = self.range
etag_base += f"-{start}/{end}"
content_length = str(end - start + 1)
self.headers.setdefault("accept-ranges", "bytes")
self.headers.setdefault("content-range", f"bytes {start}-{end}/{size}")
else:
content_length = size
etag = md5_hexdigest(etag_base.encode(), usedforsecurity=False)

self.headers.setdefault("content-length", content_length)
self.headers.setdefault("last-modified", last_modified)
self.headers.setdefault("etag", etag)

async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
if self.stat_result is None:
try:
stat_result = await anyio.to_thread.run_sync(os.stat, self.path)
self.set_stat_headers(stat_result)
except FileNotFoundError:
raise RuntimeError(f"File at path {self.path} does not exist.")
else:
mode = stat_result.st_mode
if not stat.S_ISREG(mode):
raise RuntimeError(f"File at path {self.path} is not a file.")
danielballan marked this conversation as resolved.
Show resolved Hide resolved
await send(
{
"type": "http.response.start",
"status": self.status_code,
"headers": self.raw_headers,
}
)
if scope["method"].upper() == "HEAD":
await send({"type": "http.response.body", "body": b"", "more_body": False})
elif "extensions" in scope and "http.response.pathsend" in scope["extensions"]:
await send({"type": "http.response.pathsend", "path": str(self.path)})
else:
async with await anyio.open_file(self.path, mode="rb") as file:
if self.range is not None:
start, end = self.range
await file.seek(start)
else:
start, end = 0, stat_result.st_size - 1
remaining_bytes = end - start + 1
more_body = True
while more_body:
chunk_size = min(remaining_bytes, self.chunk_size)
chunk = await file.read(chunk_size)
remaining_bytes -= len(chunk)
more_body = remaining_bytes > 0 and len(chunk) == chunk_size
await send(
{
"type": "http.response.body",
"body": chunk,
"more_body": more_body,
}
)
if self.background is not None:
await self.background()
37 changes: 34 additions & 3 deletions tiled/server/router.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import dataclasses
import inspect
import os
import re
import warnings
from datetime import datetime, timedelta
from functools import partial
Expand All @@ -13,14 +14,15 @@
from json_merge_patch import merge as apply_merge_patch
from jsonpatch import apply_patch as apply_json_patch
from pydantic_settings import BaseSettings
from starlette.responses import FileResponse
from starlette.status import (
HTTP_200_OK,
HTTP_206_PARTIAL_CONTENT,
HTTP_400_BAD_REQUEST,
HTTP_403_FORBIDDEN,
HTTP_404_NOT_FOUND,
HTTP_405_METHOD_NOT_ALLOWED,
HTTP_406_NOT_ACCEPTABLE,
HTTP_416_REQUESTED_RANGE_NOT_SATISFIABLE,
HTTP_422_UNPROCESSABLE_ENTITY,
)

Expand Down Expand Up @@ -55,6 +57,7 @@
get_validation_registry,
slice_,
)
from .file_response_with_range import FileResponseWithRange
from .links import links_for_node
from .settings import get_settings
from .utils import filter_for_access, get_base_url, record_timing
Expand Down Expand Up @@ -1519,6 +1522,12 @@ async def delete_revision(
return json_or_msgpack(request, None)


# For simplicity of implementation, we support a restricted subset of the full
# Range spec. This could be extended if the need arises.
# https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Range
RANGE_HEADER_PATTERN = re.compile(r"^bytes=(\d+)-(\d+)$")


@router.get("/asset/bytes/{path:path}")
async def get_asset(
request: Request,
Expand Down Expand Up @@ -1587,12 +1596,34 @@ async def get_asset(
full_path = path
stat_result = await anyio.to_thread.run_sync(os.stat, full_path)
filename = full_path.name
return FileResponse(
if "range" in request.headers:
range_header = request.headers["range"]
match = RANGE_HEADER_PATTERN.match(range_header)
if match is None:
raise HTTPException(
status_code=HTTP_400_BAD_REQUEST,
detail=(
"Only a Range headers of the form 'bytes=start-end' are supported. "
f"Could not parse Range header: {range_header}",
),
)
range = start, _ = (int(match.group(1)), int(match.group(2)))
if start > stat_result.st_size:
raise HTTPException(
status_code=HTTP_416_REQUESTED_RANGE_NOT_SATISFIABLE,
headers={"content-range": f"bytes */{stat_result.st_size}"},
)
status_code = HTTP_206_PARTIAL_CONTENT
else:
range = None
status_code = HTTP_200_OK
return FileResponseWithRange(
full_path,
stat_result=stat_result,
method="GET",
status_code=HTTP_200_OK,
status_code=status_code,
headers={"Content-Disposition": f'attachment; filename="{filename}"'},
range=range,
)


Expand Down
Loading