Skip to content

Commit

Permalink
Insert images to fix bug with pyvips.arrayjoin & remove cmyk2rgb conv…
Browse files Browse the repository at this point in the history
…ersion
  • Loading branch information
BPdeRooij committed Sep 6, 2024
1 parent 0628a56 commit 94925fb
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 46 deletions.
65 changes: 36 additions & 29 deletions dlup/backends/deepzoom_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,15 @@
from pathlib import Path
from typing import Any, Union

# TODO: Fix cmyk case in read_region so we can remove PIL and numpy
# import PIL
import numpy as np
import pyvips

from dlup._types import PathLike
from dlup.backends.common import AbstractSlideBackend
from dlup.utils.backends import dict_to_snake_case, parse_xml_to_dict

# TODO: Fix cmyk case in read_region so we can remove PIL and numpy
# import PIL
# import numpy as np

METADATA_CACHE = 128
RELEVANT_VIPS_PROPERTIES = {
"openslide.vendor": str,
Expand All @@ -27,7 +26,9 @@
"openslide.bounds-width": int,
"openslide.bounds-x": int,
"openslide.bounds-y": int,
"openslide.quickhash-1": str,
"vips-loader": str,
"bands": int,
}

TileResponseTypes = Union[str, io.BytesIO]
Expand Down Expand Up @@ -100,16 +101,18 @@ def _fetch_properties(self) -> dict[str, Any]:
if not vips_properties_file.exists():
return {}
# Don't convert to snake case for now to keep original vips-property names
vips_properties_dict = parse_xml_to_dict(vips_properties_file, _to_snake_case=False)
properties = {
relevant_key.split("openslide.")[-1]: cast_fn(vips_properties_dict["properties"][relevant_key])
vips_properties = parse_xml_to_dict(vips_properties_file, _to_snake_case=False)["image"]["properties"]
relevant_properties = {
relevant_key.split("openslide.")[-1]: cast_fn(vips_properties[relevant_key])
for relevant_key, cast_fn in RELEVANT_VIPS_PROPERTIES.items()
if relevant_key in vips_properties_dict["properties"]
if relevant_key in vips_properties
}
if properties.get("vips-loader") is None or properties.get("vips-loader") != "openslideload":
raise NotImplementedError(f"Properties not implemented for vips-loader {properties.get('vips-loader')}.")
if relevant_properties.get("vips-loader", "") != "openslideload":
raise NotImplementedError(
f"Properties not implemented for vips-loader {relevant_properties.get('vips-loader')}."
)
# Convert to snake case naming convention in the end
return dict_to_snake_case(properties)
return dict_to_snake_case(relevant_properties)

@property
def dz_properties(self) -> dict[str, Any]:
Expand Down Expand Up @@ -218,6 +221,7 @@ def read_region(self, coordinates: tuple[Any, ...], level: int, size: tuple[int,

x, y = (coordinates[0] // level_downsample, coordinates[1] // level_downsample)
w, h = size
_overlap = self._overlap

# Calculate the range of rows and columns for tiles covering the specified region
start_row = y // tile_h
Expand All @@ -228,7 +232,12 @@ def read_region(self, coordinates: tuple[Any, ...], level: int, size: tuple[int,
indices = list(itertools.product(range(start_row, end_row), range(start_col, end_col)))
level_dz = self._level_count - level - 1
tile_files = self.retrieve_deepzoom_tiles(level_dz, indices)
_region_tiles = []

# The number of bands can be in the vips-properties.xml, but otherwise we can interpret from image mode
num_bands = self.properties.get("bands", 3 if self.mode != "cmyk" else 4)
# We create an image from an array with zeros so we can give the correct interpretation. Arrayjoin would be
# faster, but does not work for unregular grids of images.
_region = pyvips.Image.new_from_array(np.zeros((h, w, num_bands), dtype=np.uint8), interpretation=self.mode)
for (row, col), tile_file in zip(indices, tile_files):
_region_tile: pyvips.Image = (
pyvips.Image.new_from_buffer(tile_file.getvalue(), "")
Expand All @@ -251,30 +260,28 @@ def read_region(self, coordinates: tuple[Any, ...], level: int, size: tuple[int,

# All but edge tiles have overlap pixels outside of tile
if col > 0:
crop_start_x += self._overlap
crop_end_x += self._overlap
crop_start_x += _overlap
crop_end_x += _overlap
if col == level_end_col - 1:
crop_end_x -= self._overlap
crop_end_x -= _overlap

if row > 0:
crop_start_y += self._overlap
crop_end_y += self._overlap
crop_start_y += _overlap
crop_end_y += _overlap
if row == level_end_row - 1:
crop_end_y -= self._overlap

crop_end_y -= _overlap
_cropped_region_tile = _region_tile.crop(
crop_start_x, crop_start_y, crop_end_x - crop_start_x, crop_end_y - crop_start_y
)
_region_tiles.append(_cropped_region_tile)

_region = pyvips.Image.arrayjoin(_region_tiles, across=end_col - start_col)
# Convert to RGB if mode is cmyk
if self.mode == "cmyk":
# FIXME: This looks off when using pyvips but not when using PIL
_region = _region.colourspace("srgb") # _region = _region.icc_transform("srgb") <- Both look off
# _region = pyvips.Image.new_from_array(
# np.asarray(PIL.Image.fromarray(_region.numpy(), mode="CMYK").convert("RGB")), interpretation="srgb"
# ) # <- This looks good
_region = _region.insert(_cropped_region_tile, img_start_x, img_start_y)

# # Should convert from cmyk to rgb or should a user do this afterwards theirself?
# if self.mode == "cmyk":
# # FIXME: This looks off when using pyvips (colourspace and icc_transform) but not when using PIL
# _region = _region.colourspace("srgb", source_space="cmyk") # _region = _region.icc_transform("srgb")
# # _region = pyvips.Image.new_from_array(
# # np.asarray(PIL.Image.fromarray(_region.numpy(), mode="CMYK").convert("RGB")), interpretation="srgb"
# # ) # <- This looks good, but seems like a roundabout way
return _region

def close(self) -> None:
Expand Down
7 changes: 4 additions & 3 deletions dlup/backends/slidescore_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

METADATA_CACHE = 128
DEFAULT_ASYNC_REQUESTS = 6 # This is the number of requests to make asynchronously
API_TOKEN_OS_VARIABLE_NAME = "SLIDESCORE_API_TOKEN"


def open_slide(filename: PathLike) -> "SlideScoreSlide":
Expand All @@ -44,7 +45,7 @@ def __init__(self, filename: PathLike):
raise ValueError("Filename should be SlideScore URL for SlideScoreSlide.")

if not AIOHTTP_AVAILABLE:
raise RuntimeError("`aiohtpp` is not available. Install dlup with `slidescore_remote` dependencies.")
raise RuntimeError("`aiohttp` is not available. Install dlup with `slidescore_remote` dependencies.")

# Parse URL with regex
parsed_url = re.search(r"(https?://[^/?]+)(?=.*\bstudyId=(\d+))(?=.*\bimageId=(\d+)).*$", filename)
Expand Down Expand Up @@ -107,7 +108,7 @@ def _set_metadata(self) -> None:
RuntimeError
If serverside studyID is not the same as slide study_id
"""
api_token = os.getenv("SLIDESCORE_API_TOKEN")
api_token = os.getenv(API_TOKEN_OS_VARIABLE_NAME)
if api_token is None:
raise RuntimeError("SlideScore API token not found. Please set SLIDESCORE_API_TOKEN in os environment")
self.headers = {"Accept": "application/json", "Authorization": f"Bearer {api_token}"}
Expand Down Expand Up @@ -230,7 +231,7 @@ def close(self) -> None:
return


def export_api_key(file_path: PathLike, os_variable_name: str = "SLIDESCORE_API_TOKEN") -> None:
def export_api_key(file_path: PathLike, os_variable_name: str = API_TOKEN_OS_VARIABLE_NAME) -> None:
"""Reads SlideScore API key from path and exports it into operating system environment.
Parameters
Expand Down
46 changes: 32 additions & 14 deletions dlup/utils/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@


def parse_xml_to_dict(file_path: PathLike | io.BytesIO, _to_snake_case: bool = True) -> dict[str, Any]:
"""Parse XML file with name space. vips-properties.xml files will extract every property name-value pair in
`properties`.
"""Parse XML file (DeepZoom DZI or vips-properties.xml) into a dictionary. `vips-properties.xml` files will extract
every property name-value pair in `properties` key.
Parameters
----------
Expand All @@ -29,25 +29,43 @@ def parse_xml_to_dict(file_path: PathLike | io.BytesIO, _to_snake_case: bool = T
Parsed XML file as a dictionary. Name space will be replaced with an empty string.
"""
root = ET.parse(file_path).getroot()
namespace = root.tag.split("}")[0] + "}" if len(root.tag.split("}")) > 1 else ""
namespace = "".join(root.tag.partition("}")[:2]) if "}" in root.tag else ""
root_tag = root.tag.replace(namespace, "")
parsed_dict: dict[str, dict[str, Any]] = {root_tag: dict(root.attrib)}
for elem in root:
tag = elem.tag.replace(namespace, "")
if tag == "properties":
properties = {}
for prop in elem.findall(f".//{namespace}property"):
name = prop.find(f"{namespace}name")
if name is None:
continue
value = prop.find(f"{namespace}value")
properties[str(name.text)] = value.text if value is not None else value
parsed_dict["properties"] = properties
else:
parsed_dict[root_tag][tag] = dict(elem.attrib)
attributes = extract_vips_properties(elem, namespace=namespace) if tag == "properties" else dict(elem.attrib)
parsed_dict[root_tag][tag] = attributes
return dict_to_snake_case(parsed_dict) if _to_snake_case else parsed_dict


def extract_vips_properties(properties_elem: ET.Element, namespace: str) -> dict[str, Any]:
"""
Extract 'properties' section from vips-properties.xml, with name-value pairs.
Parameters
----------
properties_elem : xml.etree.ElementTree.Element
The 'properties' XML element.
namespace : str
The namespace for the XML document.
Returns
-------
dict[str, Any]
Dictionary of properties with name-value pairs.
"""
properties = {}
for prop in properties_elem.findall(f".//{namespace}property"):
name_elem = prop.find(f"{namespace}name")
if name_elem is None:
continue
value_elem = prop.find(f"{namespace}value")
properties[str(name_elem.text)] = value_elem.text if value_elem is not None else None

return properties


def dict_to_snake_case(dictionary: dict[str, Any]) -> dict[str, Any]:
"""Recursively convert all keys in a dictionary to snake case naming convention. String values will be
converted to floats and integers if appropriate.
Expand Down

0 comments on commit 94925fb

Please sign in to comment.