Skip to content

Commit

Permalink
Add tests for jpeg adapter
Browse files Browse the repository at this point in the history
  • Loading branch information
jwlodek committed Oct 16, 2024
1 parent 7399929 commit 1c4ac97
Show file tree
Hide file tree
Showing 5 changed files with 235 additions and 41 deletions.
10 changes: 5 additions & 5 deletions tiled/_tests/test_directory_walker.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
from ..client import Context, from_context
from ..client.register import (
Settings,
group_tiff_sequences,
group_image_sequences,
identity,
register,
register_tiff_sequence,
register_image_sequence,
skip_all,
strip_suffixes,
)
Expand Down Expand Up @@ -155,14 +155,14 @@ async def test_skip_all_in_combination(tmpdir):
# With skip_all, directories and tiff sequence are registered, but individual files are not
with Context.from_app(build_app(catalog)) as context:
client = from_context(context)
await register(client, tmpdir, walkers=[group_tiff_sequences, skip_all])
await register(client, tmpdir, walkers=[group_image_sequences, skip_all])
assert list(client) == ["one"]
assert "image" in client["one"]


@pytest.mark.asyncio
async def test_tiff_seq_custom_sorting(tmpdir):
"Register TIFFs that are not in alphanumeric order."
"Register images that are not in alphanumeric order."
N = 10
ordering = list(range(N))
random.Random(0).shuffle(ordering)
Expand All @@ -177,7 +177,7 @@ async def test_tiff_seq_custom_sorting(tmpdir):
catalog = in_memory(writable_storage=tmpdir)
with Context.from_app(build_app(catalog)) as context:
client = from_context(context)
await register_tiff_sequence(
await register_image_sequence(
client,
"image",
files,
Expand Down
189 changes: 189 additions & 0 deletions tiled/_tests/test_jpeg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
from pathlib import Path

import numpy
import pytest
from PIL import Image

from ..adapters.jpeg import JPEGAdapter, JPEGSequenceAdapter
from ..adapters.mapping import MapAdapter
from ..catalog import in_memory
from ..client import Context, from_context
from ..client.register import IMG_SEQUENCE_EMPTY_NAME_ROOT, register
from ..server.app import build_app
from ..utils import ensure_uri

COLOR_SHAPE = (11, 17, 3)


@pytest.fixture(scope="module")
def client(tmpdir_module):
sequence_directory = Path(tmpdir_module, "sequence")
sequence_directory.mkdir()
filepaths = []
for i in range(3):
# JPEGs can only be 8 bit ints
data = numpy.random.randint(0, 255, (5, 7), dtype="uint8")
filepath = sequence_directory / f"temp{i:05}.jpeg"
Image.fromarray(data).convert("L").save(filepath)
filepaths.append(filepath)
color_data = numpy.random.randint(0, 255, COLOR_SHAPE, dtype="uint8")
path = Path(tmpdir_module, "color.jpeg")
Image.fromarray(color_data).convert("RGB").save(path)

tree = MapAdapter(
{
"color": JPEGAdapter(ensure_uri(path)),
"sequence": JPEGSequenceAdapter.from_uris(
[ensure_uri(filepath) for filepath in filepaths]
),
}
)
app = build_app(tree)
with Context.from_app(app) as context:
client = from_context(context)
yield client


@pytest.mark.parametrize(
"slice_input, correct_shape",
[
(None, (3, 5, 7)),
(0, (5, 7)),
(slice(0, 3, 2), (2, 5, 7)),
((1, slice(0, 3), slice(0, 3)), (3, 3)),
((slice(0, 3), slice(0, 3), slice(0, 3)), (3, 3, 3)),
((..., 0, 0), (3,)),
((0, slice(0, 1), slice(0, 2), ...), (1, 2)),
((0, ..., slice(0, 2)), (5, 2)),
((..., slice(0, 1)), (3, 5, 1)),
],
)
def test_jpeg_sequence(client, slice_input, correct_shape):
arr = client["sequence"].read(slice=slice_input)
assert arr.shape == correct_shape


@pytest.mark.parametrize("block_input, correct_shape", [((0, 0, 0), (1, 5, 7))])
def test_jpeg_sequence_block(client, block_input, correct_shape):
arr = client["sequence"].read_block(block_input)
assert arr.shape == correct_shape


@pytest.mark.asyncio
async def test_jpeg_sequence_order(tmpdir):
"""
directory/
00001.jpeg
00002.jpeg
...
00010.jpeg
"""
data = numpy.ones((4, 5))
num_files = 10
for i in range(num_files):
Image.fromarray(data * i).convert("L").save(Path(tmpdir / f"image{i:05}.jpeg"))

adapter = in_memory(readable_storage=[tmpdir])
with Context.from_app(build_app(adapter)) as context:
client = from_context(context)
await register(client, tmpdir)
for i in range(num_files):
numpy.testing.assert_equal(client["image"][i], data * i)


@pytest.mark.asyncio
async def test_jpeg_sequence_with_directory_walker(tmpdir):
"""
directory/
00001.jpeg
00002.jpeg
...
00010.jpeg
single_image.jpeg
image00001.jpeg
image00002.jpeg
...
image00010.jpeg
other_image00001.jpeg
other_image00002.jpeg
...
other_image00010.jpeg
other_image2_00001.jpeg
other_image2_00002.jpeg
...
other_image2_00010.jpeg
other_file1.csv
other_file2.csv
stuff.csv
"""
data = numpy.random.randint(0, 255, (3, 5), dtype="uint8")
for i in range(10):
Image.fromarray(data).convert("L").save(Path(tmpdir / f"image{i:05}.jpeg"))
Image.fromarray(data).convert("L").save(
Path(tmpdir / f"other_image{i:05}.jpeg")
)
Image.fromarray(data).convert("L").save(Path(tmpdir / f"{i:05}.jpeg"))
Image.fromarray(data).convert("L").save(
Path(tmpdir / f"other_image2_{i:05}.jpeg")
)
Image.fromarray(data).save(Path(tmpdir / "single_image.jpeg"))
for target in ["stuff.csv", "other_file1.csv", "other_file2.csv"]:
with open(Path(tmpdir / target), "w") as file:
file.write(
"""
a,b,c
1,2,3
"""
)
adapter = in_memory(readable_storage=[tmpdir])
with Context.from_app(build_app(adapter)) as context:
client = from_context(context)
await register(client, tmpdir)
# Single image is its own node.
assert client["single_image"].shape == (3, 5)
# Each sequence is grouped into a node.
assert client[IMG_SEQUENCE_EMPTY_NAME_ROOT].shape == (10, 3, 5)
assert client["image"].shape == (10, 3, 5)
assert client["other_image"].shape == (10, 3, 5)
assert client["other_image2_"].shape == (10, 3, 5)
# The sequence grouping digit-only files appears with a uuid
named_keys = [
"single_image",
"image",
"other_image",
"other_image2_",
"other_file1",
"other_file2",
"stuff",
]
no_name_keys = [key for key in client.keys() if key not in named_keys]
# There is only a single one of this type
assert len(no_name_keys) == 1
assert client[no_name_keys[0]].shape == (10, 3, 5)
# Other files are single nodes.
assert client["stuff"].columns == ["a", "b", "c"]
assert client["other_file1"].columns == ["a", "b", "c"]
assert client["other_file2"].columns == ["a", "b", "c"]


def test_rgb(client):
"Test an RGB JPEG."
arr = client["color"].read()
assert arr.shape == COLOR_SHAPE


def test_jpeg_sequence_cache(client):
from numpy.testing import assert_raises

# The two requests go through the same method in the server (read_block) to
# call the same object
indexed_array = client["sequence"][0]
read_array = client["sequence"].read(0)

# Using a different index to confirm that the previous cache doesn't affect the new array
other_read_array = client["sequence"].read(1)

numpy.testing.assert_equal(indexed_array, read_array)
assert_raises(
AssertionError, numpy.testing.assert_equal, read_array, other_read_array
)
4 changes: 2 additions & 2 deletions tiled/_tests/test_tiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from ..adapters.tiff import TiffAdapter, TiffSequenceAdapter
from ..catalog import in_memory
from ..client import Context, from_context
from ..client.register import TIFF_SEQUENCE_EMPTY_NAME_ROOT, register
from ..client.register import IMG_SEQUENCE_EMPTY_NAME_ROOT, register
from ..server.app import build_app
from ..utils import ensure_uri

Expand Down Expand Up @@ -137,7 +137,7 @@ async def test_tiff_sequence_with_directory_walker(tmpdir):
# Single image is its own node.
assert client["single_image"].shape == (3, 5)
# Each sequence is grouped into a node.
assert client[TIFF_SEQUENCE_EMPTY_NAME_ROOT].shape == (10, 3, 5)
assert client[IMG_SEQUENCE_EMPTY_NAME_ROOT].shape == (10, 3, 5)
assert client["image"].shape == (10, 3, 5)
assert client["other_image"].shape == (10, 3, 5)
assert client["other_image2_"].shape == (10, 3, 5)
Expand Down
23 changes: 6 additions & 17 deletions tiled/adapters/jpeg.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import builtins
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, cast
from typing import Any, List, Optional, Tuple

import numpy as np
from numpy._typing import NDArray
Expand Down Expand Up @@ -54,18 +54,11 @@ def __init__(
self._provided_metadata = metadata or {}
self.access_policy = access_policy
if structure is None:
if self._file.is_shaped:
from_file: Tuple[Dict[str, Any], ...] = cast(
Tuple[Dict[str, Any], ...], self._file.shaped_metadata
)
shape = tuple(from_file[0]["shape"])
else:
arr = np.asarray(self._file)
shape = arr.shape
arr = np.asarray(self._file)
structure = ArrayStructure(
shape=shape,
chunks=tuple((dim,) for dim in shape),
data_type=BuiltinDtype.from_numpy_dtype(self._file.series[0].dtype),
shape=arr.shape,
chunks=tuple((dim,) for dim in arr.shape),
data_type=BuiltinDtype.from_numpy_dtype(arr.dtype),
)
self._structure = structure

Expand All @@ -76,11 +69,7 @@ def metadata(self) -> JSON:
-------
"""
# This contains some enums, but Python's built-in JSON serializer
# handles them fine (converting to str or int as appropriate).
d = {tag.name: tag.value for tag in self._file.pages[0].tags.values()}
d.update(self._provided_metadata)
return d
return {}

def read(self, slice: Optional[NDSlice] = None) -> NDArray[Any]:
"""
Expand Down
50 changes: 33 additions & 17 deletions tiled/client/register.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,11 +340,23 @@ async def register_single_item(

# Matches filename with (optional) prefix characters followed by digits \d
# and then the file extension .tif or .tiff.
TIFF_SEQUENCE_STEM_PATTERN = re.compile(r"^(.*?)(\d+)\.(?:tif|tiff)$")
TIFF_SEQUENCE_EMPTY_NAME_ROOT = "_unnamed"


async def group_tiff_sequences(
IMG_SEQUENCE_STEM_PATTERNS = {
".tif": re.compile(r"^(.*?)(\d+)\.(?:tif|tiff)$"),
".tiff": re.compile(r"^(.*?)(\d+)\.(?:tif|tiff)$"),
".jpg": re.compile(r"^(.*?)(\d+)\.(?:jpg|jpeg)$"),
".jpeg": re.compile(r"^(.*?)(\d+)\.(?:jpg|jpeg)$"),
}
IMG_SEQUENCE_EMPTY_NAME_ROOT = "_unnamed"

IMG_SEQUENCE_MIMETYPES = {
".tif": "multipart/related;type=image/tiff",
".tiff": "multipart/related;type=image/tiff",
".jpg": "multipart/related;type=image/jpeg",
".jpeg": "multipart/related;type=image/jpeg",
}


async def group_image_sequences(
node,
path,
files,
Expand All @@ -365,26 +377,30 @@ async def group_tiff_sequences(
unhandled_files = []
sequences = collections.defaultdict(list)
for file in files:
if file.is_file():
match = TIFF_SEQUENCE_STEM_PATTERN.match(file.name)
file_ext = Path(file).suffixes[-1] if len(Path(file).suffixes) > 0 else None
if file.is_file() and file_ext and file_ext in IMG_SEQUENCE_STEM_PATTERNS:
match = IMG_SEQUENCE_STEM_PATTERNS[file_ext].match(file.name)
if match:
sequence_name, _sequence_number = match.groups()
if sequence_name == "":
sequence_name = TIFF_SEQUENCE_EMPTY_NAME_ROOT
sequence_name = IMG_SEQUENCE_EMPTY_NAME_ROOT
sequences[sequence_name].append(file)
continue
unhandled_files.append(file)
for name, sequence in sequences.items():
await register_tiff_sequence(node, name, sorted(sequence), settings)
await register_image_sequence(node, name, sorted(sequence), settings)
return unhandled_files, unhandled_directories


TIFF_SEQ_MIMETYPE = "multipart/related;type=image/tiff"


async def register_tiff_sequence(node, name, sequence, settings):
logger.info(" Grouped %d TIFFs into a sequence '%s'", len(sequence), name)
adapter_class = settings.adapters_by_mimetype[TIFF_SEQ_MIMETYPE]
async def register_image_sequence(node, name, sequence, settings):
mimetype = IMG_SEQUENCE_MIMETYPES[Path(sequence[0]).suffixes[0]]
logger.info(
" Grouped %d %s images into a sequence '%s'",
len(sequence),
mimetype.split("/")[-1],
name,
)
adapter_class = settings.adapters_by_mimetype[mimetype]
key = settings.key_from_filename(name)
try:
adapter = adapter_class([ensure_uri(filepath) for filepath in sequence])
Expand All @@ -400,7 +416,7 @@ async def register_tiff_sequence(node, name, sequence, settings):
data_sources=[
DataSource(
structure_family=adapter.structure_family,
mimetype=TIFF_SEQ_MIMETYPE,
mimetype=mimetype,
structure=dict_or_none(adapter.structure()),
parameters={},
management=Management.external,
Expand Down Expand Up @@ -435,7 +451,7 @@ async def skip_all(
return [], directories


DEFAULT_WALKERS = [group_tiff_sequences, one_node_per_item]
DEFAULT_WALKERS = [group_image_sequences, one_node_per_item]


async def watch(
Expand Down

0 comments on commit 1c4ac97

Please sign in to comment.