Skip to content

Commit

Permalink
Move helper functions to utils/backends
Browse files Browse the repository at this point in the history
  • Loading branch information
BPdeRooij committed Sep 2, 2024
1 parent 65222b5 commit 0628a56
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 81 deletions.
3 changes: 3 additions & 0 deletions .mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,6 @@ ignore_missing_imports = True

[mypy-darwin.*]
ignore_missing_imports = True

[mypy-aiohttp.*]
ignore_missing_imports = True
1 change: 1 addition & 0 deletions dlup/backends/common.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Copyright (c) dlup contributors
from __future__ import annotations

import abc
Expand Down
73 changes: 3 additions & 70 deletions dlup/backends/deepzoom_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,19 @@
import io
import itertools
import math
import re
import xml.etree.ElementTree as ET
from pathlib import Path
from typing import Any, Union

import pyvips

from dlup._types import PathLike
from dlup.backends.common import AbstractSlideBackend
from dlup.utils.backends import dict_to_snake_case, parse_xml_to_dict

# TODO: Fix cmyk case in read_region so we can remove PIL and numpy
# import PIL
# import numpy as np


METADATA_CACHE = 128
RELEVANT_VIPS_PROPERTIES = {
"openslide.vendor": str,
Expand All @@ -35,73 +33,6 @@
TileResponseTypes = Union[str, io.BytesIO]


def parse_xml_to_dict(file_path: PathLike | io.BytesIO, _to_snake_case: bool = True) -> dict[str, Any]:
"""Parse XML file with name space. vips-properties.xml files will extract every property name-value pair in
`properties`.
Parameters
----------
file_path : Pathlike or BytesIO
Path or BytesIO object to XML file
_to_snake_case : bool, optional
Convert keys to snake case naming convention, by default True
Returns
-------
dict[str, Any]
Parsed XML file as a dictionary. Name space will be replaced with an empty string.
"""
root = ET.parse(file_path).getroot()
namespace = root.tag.split("}")[0] + "}" if len(root.tag.split("}")) > 1 else ""
root_tag = root.tag.replace(namespace, "")
parsed_dict: dict[str, dict[str, Any]] = {root_tag: dict(root.attrib)}
for elem in root:
tag = elem.tag.replace(namespace, "")
if tag == "properties":
properties = {}
for prop in elem.findall(f".//{namespace}property"):
name = prop.find(f"{namespace}name")
if name is None:
continue
value = prop.find(f"{namespace}value")
properties[str(name.text)] = value.text if value is not None else value
parsed_dict["properties"] = properties
else:
parsed_dict[root_tag][tag] = dict(elem.attrib)
return dict_to_snake_case(parsed_dict) if _to_snake_case else parsed_dict


def dict_to_snake_case(dictionary: dict[str, Any]) -> dict[str, Any]:
"""Recursively convert all keys in a dictionary to snake case naming convention. String values will be
converted to floats and integers if appropriate.
Parameters
----------
dictionary : dict[str, Any]
Dictionary with keys using Camel/Pascal naming convention.
Returns
-------
dict[str, Any]
Dictionary with keys using Snake case naming convention and values as strings, floats and integers.
"""
return_dict = {}
for k, v in dictionary.items():
if isinstance(v, dict):
# Recursively convert dictionary keys
v = dict_to_snake_case(v)
elif isinstance(v, str):
# Cast to float, int or leave as string
if re.compile(r"^\d+(\.\d+)?$").match(v):
v = float(v) if "." in v else int(v)

# Convert key to snake_case (i.e. no dashes/spaces, lowercase and underscore before capital letters)
if isinstance(k, str):
k = re.sub("([a-z0-9])([A-Z])", r"\1_\2", re.sub("(.)([A-Z][a-z]+)", r"\1_\2", k)).lower().replace("-", "_")
return_dict[k] = v
return return_dict


def open_slide(filename: PathLike) -> "DeepZoomSlide":
"""
Read slide with DeepZoomSlide backend. The input file should be a <slide_name>.dzi file with the deep zoom tiles
Expand All @@ -116,6 +47,8 @@ def open_slide(filename: PathLike) -> "DeepZoomSlide":


class DeepZoomSlide(AbstractSlideBackend):
_properties: dict[str, Any]
_dz_properties: dict[str, Any]

def __init__(self, filename: PathLike):
super().__init__(filename)
Expand Down
12 changes: 8 additions & 4 deletions dlup/backends/slidescore_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@
from io import BytesIO
from typing import Any, Optional

import dlup.utils.imports
from dlup._types import PathLike
from dlup.backends.deepzoom_backend import DeepZoomSlide, TileResponseTypes, dict_to_snake_case, parse_xml_to_dict
from dlup.backends.deepzoom_backend import DeepZoomSlide, TileResponseTypes
from dlup.backends.remote_backends import RemoteSlideBackend
from dlup.utils.backends import dict_to_snake_case, parse_xml_to_dict
from dlup.utils.imports import AIOHTTP_AVAILABLE

if dlup.utils.imports.AIOHTTP_AVAILABLE:
if AIOHTTP_AVAILABLE:
import asyncio

import aiohttp
Expand Down Expand Up @@ -42,6 +43,9 @@ def __init__(self, filename: PathLike):
if isinstance(filename, pathlib.Path):
raise ValueError("Filename should be SlideScore URL for SlideScoreSlide.")

if not AIOHTTP_AVAILABLE:
raise RuntimeError("`aiohtpp` is not available. Install dlup with `slidescore_remote` dependencies.")

# Parse URL with regex
parsed_url = re.search(r"(https?://[^/?]+)(?=.*\bstudyId=(\d+))(?=.*\bimageId=(\d+)).*$", filename)
if parsed_url is None:
Expand Down Expand Up @@ -174,7 +178,7 @@ async def fetch_requests(
connector = aiohttp.TCPConnector(limit=self._max_async_request)
async with aiohttp.ClientSession(cookies=self.cookies, headers=self.headers, connector=connector) as session:
tasks = [self.fetch_request(session=session, url=url, data=data) for url, data in zip(urls, data_dicts)]
return await asyncio.gather(*tasks)
return await asyncio.gather(*tasks) # pylint: disable=possibly-used-before-assignment

def run_fetch_requests(
self,
Expand Down
70 changes: 70 additions & 0 deletions dlup/utils/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,83 @@
"""Utilities to handle backends."""
from __future__ import annotations

import io
import re
import xml.etree.ElementTree as ET
from enum import Enum
from typing import Any, Callable

from dlup._types import PathLike
from dlup.utils.imports import AIOHTTP_AVAILABLE


def parse_xml_to_dict(file_path: PathLike | io.BytesIO, _to_snake_case: bool = True) -> dict[str, Any]:
"""Parse XML file with name space. vips-properties.xml files will extract every property name-value pair in
`properties`.
Parameters
----------
file_path : Pathlike or BytesIO
Path or BytesIO object to XML file
_to_snake_case : bool, optional
Convert keys to snake case naming convention, by default True
Returns
-------
dict[str, Any]
Parsed XML file as a dictionary. Name space will be replaced with an empty string.
"""
root = ET.parse(file_path).getroot()
namespace = root.tag.split("}")[0] + "}" if len(root.tag.split("}")) > 1 else ""
root_tag = root.tag.replace(namespace, "")
parsed_dict: dict[str, dict[str, Any]] = {root_tag: dict(root.attrib)}
for elem in root:
tag = elem.tag.replace(namespace, "")
if tag == "properties":
properties = {}
for prop in elem.findall(f".//{namespace}property"):
name = prop.find(f"{namespace}name")
if name is None:
continue
value = prop.find(f"{namespace}value")
properties[str(name.text)] = value.text if value is not None else value
parsed_dict["properties"] = properties
else:
parsed_dict[root_tag][tag] = dict(elem.attrib)
return dict_to_snake_case(parsed_dict) if _to_snake_case else parsed_dict


def dict_to_snake_case(dictionary: dict[str, Any]) -> dict[str, Any]:
"""Recursively convert all keys in a dictionary to snake case naming convention. String values will be
converted to floats and integers if appropriate.
Parameters
----------
dictionary : dict[str, Any]
Dictionary with keys using Camel/Pascal naming convention.
Returns
-------
dict[str, Any]
Dictionary with keys using Snake case naming convention and values as strings, floats and integers.
"""
return_dict = {}
for k, v in dictionary.items():
if isinstance(v, dict):
# Recursively convert dictionary keys
v = dict_to_snake_case(v)
elif isinstance(v, str):
# Cast to float, int or leave as string
if re.compile(r"^\d+(\.\d+)?$").match(v):
v = float(v) if "." in v else int(v)

# Convert key to snake_case (i.e. no dashes/spaces, lowercase and underscore before capital letters)
if isinstance(k, str):
k = re.sub("([a-z0-9])([A-Z])", r"\1_\2", re.sub("(.)([A-Z][a-z]+)", r"\1_\2", k)).lower().replace("-", "_")
return_dict[k] = v
return return_dict


class ImageBackend(Enum):
"""Available image experimental_backends."""

Expand Down
1 change: 0 additions & 1 deletion dlup/utils/imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,4 @@ def _module_available(module_path: str) -> bool:
PYTORCH_AVAILABLE = _module_available("pytorch")
PYHALOXML_AVAILABLE = _module_available("pyhaloxml")
DARWIN_SDK_AVAILABLE = _module_available("darwin")
BOTO3_AVAILABLE = _module_available("boto3")
AIOHTTP_AVAILABLE = _module_available("aiohttp")
6 changes: 0 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,6 @@ dev = [
"darwin-py>=0.8.62",
]
darwin = ["darwin-py>=0.8.59"]
s3_remote = [
"boto3",
"botocore",
"boto3-stubs",
"botocore-stubs",
]
slidescore_remote = ["asyncio", "aiohttp"]

[project.urls]
Expand Down

0 comments on commit 0628a56

Please sign in to comment.