From 94925fbf60c7966fb6c8e4466ae5a9a9dc0ada9d Mon Sep 17 00:00:00 2001 From: Bart de Rooij Date: Fri, 6 Sep 2024 12:24:54 +0200 Subject: [PATCH] Insert images to fix bug with pyvips.arrayjoin & remove cmyk2rgb conversion --- dlup/backends/deepzoom_backend.py | 65 ++++++++++++++++------------- dlup/backends/slidescore_backend.py | 7 ++-- dlup/utils/backends.py | 46 +++++++++++++------- 3 files changed, 72 insertions(+), 46 deletions(-) diff --git a/dlup/backends/deepzoom_backend.py b/dlup/backends/deepzoom_backend.py index 02e5e9b3..8c93bd1b 100644 --- a/dlup/backends/deepzoom_backend.py +++ b/dlup/backends/deepzoom_backend.py @@ -7,16 +7,15 @@ from pathlib import Path from typing import Any, Union +# TODO: Fix cmyk case in read_region so we can remove PIL and numpy +# import PIL +import numpy as np import pyvips from dlup._types import PathLike from dlup.backends.common import AbstractSlideBackend from dlup.utils.backends import dict_to_snake_case, parse_xml_to_dict -# TODO: Fix cmyk case in read_region so we can remove PIL and numpy -# import PIL -# import numpy as np - METADATA_CACHE = 128 RELEVANT_VIPS_PROPERTIES = { "openslide.vendor": str, @@ -27,7 +26,9 @@ "openslide.bounds-width": int, "openslide.bounds-x": int, "openslide.bounds-y": int, + "openslide.quickhash-1": str, "vips-loader": str, + "bands": int, } TileResponseTypes = Union[str, io.BytesIO] @@ -100,16 +101,18 @@ def _fetch_properties(self) -> dict[str, Any]: if not vips_properties_file.exists(): return {} # Don't convert to snake case for now to keep original vips-property names - vips_properties_dict = parse_xml_to_dict(vips_properties_file, _to_snake_case=False) - properties = { - relevant_key.split("openslide.")[-1]: cast_fn(vips_properties_dict["properties"][relevant_key]) + vips_properties = parse_xml_to_dict(vips_properties_file, _to_snake_case=False)["image"]["properties"] + relevant_properties = { + relevant_key.split("openslide.")[-1]: cast_fn(vips_properties[relevant_key]) for relevant_key, cast_fn in RELEVANT_VIPS_PROPERTIES.items() - if relevant_key in vips_properties_dict["properties"] + if relevant_key in vips_properties } - if properties.get("vips-loader") is None or properties.get("vips-loader") != "openslideload": - raise NotImplementedError(f"Properties not implemented for vips-loader {properties.get('vips-loader')}.") + if relevant_properties.get("vips-loader", "") != "openslideload": + raise NotImplementedError( + f"Properties not implemented for vips-loader {relevant_properties.get('vips-loader')}." + ) # Convert to snake case naming convention in the end - return dict_to_snake_case(properties) + return dict_to_snake_case(relevant_properties) @property def dz_properties(self) -> dict[str, Any]: @@ -218,6 +221,7 @@ def read_region(self, coordinates: tuple[Any, ...], level: int, size: tuple[int, x, y = (coordinates[0] // level_downsample, coordinates[1] // level_downsample) w, h = size + _overlap = self._overlap # Calculate the range of rows and columns for tiles covering the specified region start_row = y // tile_h @@ -228,7 +232,12 @@ def read_region(self, coordinates: tuple[Any, ...], level: int, size: tuple[int, indices = list(itertools.product(range(start_row, end_row), range(start_col, end_col))) level_dz = self._level_count - level - 1 tile_files = self.retrieve_deepzoom_tiles(level_dz, indices) - _region_tiles = [] + + # The number of bands can be in the vips-properties.xml, but otherwise we can interpret from image mode + num_bands = self.properties.get("bands", 3 if self.mode != "cmyk" else 4) + # We create an image from an array with zeros so we can give the correct interpretation. Arrayjoin would be + # faster, but does not work for unregular grids of images. + _region = pyvips.Image.new_from_array(np.zeros((h, w, num_bands), dtype=np.uint8), interpretation=self.mode) for (row, col), tile_file in zip(indices, tile_files): _region_tile: pyvips.Image = ( pyvips.Image.new_from_buffer(tile_file.getvalue(), "") @@ -251,30 +260,28 @@ def read_region(self, coordinates: tuple[Any, ...], level: int, size: tuple[int, # All but edge tiles have overlap pixels outside of tile if col > 0: - crop_start_x += self._overlap - crop_end_x += self._overlap + crop_start_x += _overlap + crop_end_x += _overlap if col == level_end_col - 1: - crop_end_x -= self._overlap + crop_end_x -= _overlap if row > 0: - crop_start_y += self._overlap - crop_end_y += self._overlap + crop_start_y += _overlap + crop_end_y += _overlap if row == level_end_row - 1: - crop_end_y -= self._overlap - + crop_end_y -= _overlap _cropped_region_tile = _region_tile.crop( crop_start_x, crop_start_y, crop_end_x - crop_start_x, crop_end_y - crop_start_y ) - _region_tiles.append(_cropped_region_tile) - - _region = pyvips.Image.arrayjoin(_region_tiles, across=end_col - start_col) - # Convert to RGB if mode is cmyk - if self.mode == "cmyk": - # FIXME: This looks off when using pyvips but not when using PIL - _region = _region.colourspace("srgb") # _region = _region.icc_transform("srgb") <- Both look off - # _region = pyvips.Image.new_from_array( - # np.asarray(PIL.Image.fromarray(_region.numpy(), mode="CMYK").convert("RGB")), interpretation="srgb" - # ) # <- This looks good + _region = _region.insert(_cropped_region_tile, img_start_x, img_start_y) + + # # Should convert from cmyk to rgb or should a user do this afterwards theirself? + # if self.mode == "cmyk": + # # FIXME: This looks off when using pyvips (colourspace and icc_transform) but not when using PIL + # _region = _region.colourspace("srgb", source_space="cmyk") # _region = _region.icc_transform("srgb") + # # _region = pyvips.Image.new_from_array( + # # np.asarray(PIL.Image.fromarray(_region.numpy(), mode="CMYK").convert("RGB")), interpretation="srgb" + # # ) # <- This looks good, but seems like a roundabout way return _region def close(self) -> None: diff --git a/dlup/backends/slidescore_backend.py b/dlup/backends/slidescore_backend.py index b7bfe4b4..cd92b54f 100644 --- a/dlup/backends/slidescore_backend.py +++ b/dlup/backends/slidescore_backend.py @@ -22,6 +22,7 @@ METADATA_CACHE = 128 DEFAULT_ASYNC_REQUESTS = 6 # This is the number of requests to make asynchronously +API_TOKEN_OS_VARIABLE_NAME = "SLIDESCORE_API_TOKEN" def open_slide(filename: PathLike) -> "SlideScoreSlide": @@ -44,7 +45,7 @@ def __init__(self, filename: PathLike): raise ValueError("Filename should be SlideScore URL for SlideScoreSlide.") if not AIOHTTP_AVAILABLE: - raise RuntimeError("`aiohtpp` is not available. Install dlup with `slidescore_remote` dependencies.") + raise RuntimeError("`aiohttp` is not available. Install dlup with `slidescore_remote` dependencies.") # Parse URL with regex parsed_url = re.search(r"(https?://[^/?]+)(?=.*\bstudyId=(\d+))(?=.*\bimageId=(\d+)).*$", filename) @@ -107,7 +108,7 @@ def _set_metadata(self) -> None: RuntimeError If serverside studyID is not the same as slide study_id """ - api_token = os.getenv("SLIDESCORE_API_TOKEN") + api_token = os.getenv(API_TOKEN_OS_VARIABLE_NAME) if api_token is None: raise RuntimeError("SlideScore API token not found. Please set SLIDESCORE_API_TOKEN in os environment") self.headers = {"Accept": "application/json", "Authorization": f"Bearer {api_token}"} @@ -230,7 +231,7 @@ def close(self) -> None: return -def export_api_key(file_path: PathLike, os_variable_name: str = "SLIDESCORE_API_TOKEN") -> None: +def export_api_key(file_path: PathLike, os_variable_name: str = API_TOKEN_OS_VARIABLE_NAME) -> None: """Reads SlideScore API key from path and exports it into operating system environment. Parameters diff --git a/dlup/utils/backends.py b/dlup/utils/backends.py index f9d570db..1af23a32 100644 --- a/dlup/utils/backends.py +++ b/dlup/utils/backends.py @@ -13,8 +13,8 @@ def parse_xml_to_dict(file_path: PathLike | io.BytesIO, _to_snake_case: bool = True) -> dict[str, Any]: - """Parse XML file with name space. vips-properties.xml files will extract every property name-value pair in - `properties`. + """Parse XML file (DeepZoom DZI or vips-properties.xml) into a dictionary. `vips-properties.xml` files will extract + every property name-value pair in `properties` key. Parameters ---------- @@ -29,25 +29,43 @@ def parse_xml_to_dict(file_path: PathLike | io.BytesIO, _to_snake_case: bool = T Parsed XML file as a dictionary. Name space will be replaced with an empty string. """ root = ET.parse(file_path).getroot() - namespace = root.tag.split("}")[0] + "}" if len(root.tag.split("}")) > 1 else "" + namespace = "".join(root.tag.partition("}")[:2]) if "}" in root.tag else "" root_tag = root.tag.replace(namespace, "") parsed_dict: dict[str, dict[str, Any]] = {root_tag: dict(root.attrib)} for elem in root: tag = elem.tag.replace(namespace, "") - if tag == "properties": - properties = {} - for prop in elem.findall(f".//{namespace}property"): - name = prop.find(f"{namespace}name") - if name is None: - continue - value = prop.find(f"{namespace}value") - properties[str(name.text)] = value.text if value is not None else value - parsed_dict["properties"] = properties - else: - parsed_dict[root_tag][tag] = dict(elem.attrib) + attributes = extract_vips_properties(elem, namespace=namespace) if tag == "properties" else dict(elem.attrib) + parsed_dict[root_tag][tag] = attributes return dict_to_snake_case(parsed_dict) if _to_snake_case else parsed_dict +def extract_vips_properties(properties_elem: ET.Element, namespace: str) -> dict[str, Any]: + """ + Extract 'properties' section from vips-properties.xml, with name-value pairs. + + Parameters + ---------- + properties_elem : xml.etree.ElementTree.Element + The 'properties' XML element. + namespace : str + The namespace for the XML document. + + Returns + ------- + dict[str, Any] + Dictionary of properties with name-value pairs. + """ + properties = {} + for prop in properties_elem.findall(f".//{namespace}property"): + name_elem = prop.find(f"{namespace}name") + if name_elem is None: + continue + value_elem = prop.find(f"{namespace}value") + properties[str(name_elem.text)] = value_elem.text if value_elem is not None else None + + return properties + + def dict_to_snake_case(dictionary: dict[str, Any]) -> dict[str, Any]: """Recursively convert all keys in a dictionary to snake case naming convention. String values will be converted to floats and integers if appropriate.