diff --git a/tiled/adapters/awkward.py b/tiled/adapters/awkward.py index 85f49ee49..70bade5a6 100644 --- a/tiled/adapters/awkward.py +++ b/tiled/adapters/awkward.py @@ -3,6 +3,7 @@ import awkward import awkward.forms from numpy.typing import NDArray +from type_alliases import JSON from ..access_policies import DummyAccessPolicy, SimpleAccessPolicy from ..adapters.awkward_buffers import DirectoryContainer @@ -68,7 +69,7 @@ def from_array( access_policy=access_policy, ) - def metadata(self) -> dict[str, str]: + def metadata(self) -> JSON: """ Returns @@ -100,7 +101,7 @@ def read_buffers(self, form_keys: Optional[list[str]] = None) -> dict[Any, bytes buffers[key] = self.container[key] return buffers - def read(self) -> dict[str, bytes]: + def read(self) -> JSON: return dict(self.container) def write(self, container: DirectoryContainer) -> None: diff --git a/tiled/adapters/csv.py b/tiled/adapters/csv.py index 0f9638b13..d44bfa985 100644 --- a/tiled/adapters/csv.py +++ b/tiled/adapters/csv.py @@ -29,7 +29,7 @@ def read_csv( data_uri: str, structure: Optional[TableStructure] = None, metadata: Optional[JSON] = None, - specs: Optional[List[str]] = None, + specs: Optional[List[Spec]] = None, access_policy: Optional[Union[DummyAccessPolicy, SimpleAccessPolicy]] = None, **kwargs: Any, ) -> TableAdapter: @@ -121,15 +121,19 @@ def init_storage(cls, data_uri: str, structure: TableStructure) -> Any: ] return assets - def append_partition(self, data: Any, partition: int) -> None: + def append_partition( + self, data: Union[dask.dataframe.DataFrame, pandas.DataFrame], partition: int + ) -> None: uri = self._partition_paths[partition] data.to_csv(uri, index=False, mode="a", header=False) - def write_partition(self, data: Any, partition: int) -> None: + def write_partition( + self, data: Union[dask.dataframe.DataFrame, pandas.DataFrame], partition: int + ) -> None: uri = self._partition_paths[partition] data.to_csv(uri, index=False) - def write(self, data: Any) -> None: + def write(self, data: Union[dask.dataframe.DataFrame, pandas.DataFrame]) -> None: if self.structure().npartitions != 1: raise NotImplementedError uri = self._partition_paths[0] diff --git a/tiled/adapters/excel.py b/tiled/adapters/excel.py index c60fee471..21c039d69 100644 --- a/tiled/adapters/excel.py +++ b/tiled/adapters/excel.py @@ -1,3 +1,5 @@ +from typing import Any + import dask.dataframe import pandas @@ -8,7 +10,7 @@ class ExcelAdapter(MapAdapter): @classmethod - def from_file(cls, file, **kwargs): + def from_file(cls, file: Any, **kwargs: Any) -> "ExcelAdapter": """ Read the sheets in an Excel file. @@ -52,7 +54,7 @@ def from_file(cls, file, **kwargs): return cls(mapping, **kwargs) @classmethod - def from_uri(cls, data_uri, **kwargs): + def from_uri(cls, data_uri: str, **kwargs: Any) -> "ExcelAdapter": """ Read the sheets in an Excel file. diff --git a/tiled/adapters/hdf5.py b/tiled/adapters/hdf5.py index 882e5a93e..4244f72ca 100644 --- a/tiled/adapters/hdf5.py +++ b/tiled/adapters/hdf5.py @@ -7,7 +7,7 @@ import h5py import numpy from numpy._typing import NDArray -from type_alliases import HDF5, Spec +from type_alliases import JSON, Spec from ..access_policies import DummyAccessPolicy, SimpleAccessPolicy from ..adapters.utils import IndexersMixin @@ -62,7 +62,7 @@ def __init__( node: Any, *, structure: Optional[TableStructure] = None, - metadata: Optional[HDF5] = None, + metadata: Optional[JSON] = None, specs: Optional[list[Spec]] = None, access_policy: Optional[Union[SimpleAccessPolicy, DummyAccessPolicy]] = None, ) -> None: @@ -78,7 +78,7 @@ def from_file( file: Any, *, structure: Optional[TableStructure] = None, - metadata: HDF5 = None, + metadata: JSON = None, swmr: bool = SWMR_DEFAULT, libver: str = "latest", specs: Optional[List[Spec]] = None, @@ -92,7 +92,7 @@ def from_uri( data_uri: Union[str, list[str]], *, structure: Optional[TableStructure] = None, - metadata: Optional[HDF5] = None, + metadata: Optional[JSON] = None, swmr: bool = SWMR_DEFAULT, libver: str = "latest", specs: Optional[list[Spec]] = None, @@ -112,7 +112,7 @@ def access_policy(self) -> Optional[Union[SimpleAccessPolicy, DummyAccessPolicy] def structure(self) -> None: return None - def metadata(self) -> HDF5: + def metadata(self) -> JSON: d = dict(self._node.attrs) for k, v in list(d.items()): # Convert any bytes to str. @@ -197,7 +197,7 @@ def hdf5_lookup( data_uri: Union[str, list[str]], *, structure: Optional[TableStructure] = None, - metadata: Optional[HDF5] = None, + metadata: Optional[JSON] = None, swmr: bool = SWMR_DEFAULT, libver: str = "latest", specs: Optional[List[Spec]] = None, diff --git a/tiled/adapters/netcdf.py b/tiled/adapters/netcdf.py index 60f7f4d29..9111e7738 100644 --- a/tiled/adapters/netcdf.py +++ b/tiled/adapters/netcdf.py @@ -1,8 +1,11 @@ +from pathlib import Path +from typing import Union + import xarray from .xarray import DatasetAdapter -def read_netcdf(filepath): +def read_netcdf(filepath: Union[str, list[str], Path]) -> DatasetAdapter: ds = xarray.open_dataset(filepath, decode_times=False) return DatasetAdapter.from_dataset(ds) diff --git a/tiled/adapters/parquet.py b/tiled/adapters/parquet.py index 100814218..09291f0f8 100644 --- a/tiled/adapters/parquet.py +++ b/tiled/adapters/parquet.py @@ -63,11 +63,13 @@ def init_storage( ] return assets - def write_partition(self, data: Any, partition: int) -> None: + def write_partition( + self, data: Union[dask.dataframe.DataFrame, pandas.DataFrame], partition: int + ) -> None: uri = self._partition_paths[partition] data.to_parquet(uri) - def write(self, data: Any) -> None: + def write(self, data: Union[dask.dataframe.DataFrame, pandas.DataFrame]) -> None: if self.structure().npartitions != 1: raise NotImplementedError uri = self._partition_paths[0] diff --git a/tiled/adapters/sparse.py b/tiled/adapters/sparse.py index 7e3c5afcf..bca72143c 100644 --- a/tiled/adapters/sparse.py +++ b/tiled/adapters/sparse.py @@ -1,6 +1,13 @@ +from typing import Any, Optional, Tuple, Union + +import dask import numpy +import pandas import sparse +from numpy._typing import NDArray +from type_alliases import JSON, Spec +from ..access_policies import DummyAccessPolicy, SimpleAccessPolicy from ..structures.core import StructureFamily from ..structures.sparse import COOStructure from .array import slice_and_shape_from_block_and_chunks @@ -13,14 +20,14 @@ class COOAdapter: @classmethod def from_arrays( cls, - coords, - data, - shape, - dims=None, - metadata=None, - specs=None, - access_policy=None, - ): + coords: NDArray[Any], + data: Union[dask.dataframe.DataFrame, pandas.DataFrame], + shape: Tuple[int, ...], + dims: Optional[Tuple[str, ...]] = None, + metadata: Optional[JSON] = None, + specs: Optional[list[Spec]] = None, + access_policy: Optional[Union[SimpleAccessPolicy, DummyAccessPolicy]] = None, + ) -> "COOAdapter": """ Simplest constructor. Single chunk from coords, data arrays. """ @@ -39,7 +46,15 @@ def from_arrays( ) @classmethod - def from_coo(cls, coo, *, dims=None, metadata=None, specs=None, access_policy=None): + def from_coo( + cls, + coo: sparse.COO, + *, + dims: Optional[Tuple[str, ...]] = None, + metadata: Optional[JSON] = None, + specs: Optional[list[Spec]] = None, + access_policy: Optional[Union[SimpleAccessPolicy, DummyAccessPolicy]] = None, + ) -> "COOAdapter": "Construct from sparse.COO object." return cls.from_arrays( coords=coo.coords, @@ -54,15 +69,15 @@ def from_coo(cls, coo, *, dims=None, metadata=None, specs=None, access_policy=No @classmethod def from_global_ref( cls, - blocks, - shape, - chunks, + blocks: dict[Tuple[int, ...], Tuple[NDArray[Any], Any]], + shape: Tuple[int, ...], + chunks: Tuple[Tuple[int, ...], ...], *, - dims=None, - metadata=None, - specs=None, - access_policy=None, - ): + dims: Optional[Tuple[str, ...]] = None, + metadata: Optional[JSON] = None, + specs: Optional[list[Spec]] = None, + access_policy: Optional[Union[SimpleAccessPolicy, DummyAccessPolicy]] = None, + ) -> "COOAdapter": """ Construct from blocks with coords given in global reference frame. """ @@ -90,13 +105,13 @@ def from_global_ref( def __init__( self, - blocks, - structure, + blocks: dict[Tuple[int, ...], Tuple[NDArray[Any], Any]], + structure: COOStructure, *, - metadata=None, - specs=None, - access_policy=None, - ): + metadata: Optional[JSON] = None, + specs: Optional[list[Spec]] = None, + access_policy: Optional[Union[SimpleAccessPolicy, DummyAccessPolicy]] = None, + ) -> None: """ Construct from blocks with coords given in block-local reference frame. """ @@ -106,13 +121,15 @@ def __init__( self.specs = specs or [] self.access_policy = access_policy - def metadata(self): + def metadata(self) -> JSON: return self._metadata - def structure(self): + def structure(self) -> COOStructure: return self._structure - def read_block(self, block, slice=None): + def read_block( + self, block: Tuple[int, ...], slice: Optional[Union[int, slice]] = None + ) -> NDArray[Any]: coords, data = self.blocks[block] _, shape = slice_and_shape_from_block_and_chunks(block, self._structure.chunks) arr = sparse.COO(data=data[:], coords=coords[:], shape=shape) @@ -120,7 +137,7 @@ def read_block(self, block, slice=None): arr = arr[slice] return arr - def read(self, slice=None): + def read(self, slice: Optional[Union[int, slice]] = None) -> NDArray[Any]: all_coords = [] all_data = [] for block, (coords, data) in self.blocks.items(): diff --git a/tiled/adapters/sparse_blocks_parquet.py b/tiled/adapters/sparse_blocks_parquet.py index da479ce73..eb106d42c 100644 --- a/tiled/adapters/sparse_blocks_parquet.py +++ b/tiled/adapters/sparse_blocks_parquet.py @@ -1,15 +1,22 @@ import itertools +from typing import Any, Optional, Tuple, Union +import dask.base +import dask.dataframe import numpy import pandas import sparse +from numpy._typing import NDArray +from ..access_policies import DummyAccessPolicy, SimpleAccessPolicy from ..adapters.array import slice_and_shape_from_block_and_chunks from ..structures.core import StructureFamily +from ..structures.sparse import COOStructure from ..utils import path_from_uri +from .type_alliases import JSON, Spec -def load_block(uri): +def load_block(uri: str) -> Tuple[list[int], Tuple[NDArray[Any], Any]]: # TODO This can be done without pandas. # Better to use a plain I/O library. df = pandas.read_parquet(path_from_uri(uri)) @@ -23,12 +30,12 @@ class SparseBlocksParquetAdapter: def __init__( self, - data_uris, - structure, - metadata=None, - specs=None, - access_policy=None, - ): + data_uris: Union[str, list[str]], + structure: COOStructure, + metadata: Optional[JSON] = None, + specs: Optional[list[Spec]] = None, + access_policy: Optional[Union[SimpleAccessPolicy, DummyAccessPolicy]] = None, + ) -> None: num_blocks = (range(len(n)) for n in structure.chunks) self.blocks = {} for block, uri in zip(itertools.product(*num_blocks), data_uris): @@ -41,9 +48,9 @@ def __init__( @classmethod def init_storage( cls, - data_uri, - structure, - ): + data_uri: Union[str, list[str]], + structure: COOStructure, + ) -> Any: from ..server.schemas import Asset directory = path_from_uri(data_uri) @@ -61,20 +68,24 @@ def init_storage( ] return assets - def metadata(self): + def metadata(self) -> JSON: return self._metadata - def write_block(self, data, block): + def write_block( + self, + data: Union[dask.dataframe.DataFrame, pandas.DataFrame], + block: Tuple[int, ...], + ) -> None: uri = self.blocks[block] data.to_parquet(path_from_uri(uri)) - def write(self, data): + def write(self, data: Union[dask.dataframe.DataFrame, pandas.DataFrame]) -> None: if len(self.blocks) > 1: raise NotImplementedError uri = self.blocks[(0,) * len(self._structure.shape)] data.to_parquet(path_from_uri(uri)) - def read(self, slice=...): + def read(self, slice: Optional[Union[int, slice]]) -> NDArray[Any]: all_coords = [] all_data = [] for block, uri in self.blocks.items(): @@ -93,11 +104,13 @@ def read(self, slice=...): ) return arr[slice] - def read_block(self, block, slice=...): + def read_block( + self, block: Tuple[int, ...], slice: Optional[Union[int, slice]] + ) -> NDArray[Any]: coords, data = load_block(self.blocks[block]) _, shape = slice_and_shape_from_block_and_chunks(block, self._structure.chunks) arr = sparse.COO(data=data[:], coords=coords[:], shape=shape) return arr[slice] - def structure(self): + def structure(self) -> COOStructure: return self._structure diff --git a/tiled/adapters/table.py b/tiled/adapters/table.py index e242dc244..7f053165e 100644 --- a/tiled/adapters/table.py +++ b/tiled/adapters/table.py @@ -1,9 +1,11 @@ -from typing import Optional, Self +from typing import Any, Iterator, Optional, Union import dask.base import dask.dataframe import pandas +from type_alliases import JSON +from ..access_policies import DummyAccessPolicy, SimpleAccessPolicy from ..server.object_cache import get_object_cache from ..structures.core import Spec, StructureFamily from ..structures.table import TableStructure @@ -27,13 +29,13 @@ class TableAdapter: @classmethod def from_pandas( cls, - *args, - metadata=None, - specs=None, - access_policy=None, - npartitions=1, - **kwargs, - ): + *args: Any, + metadata: Optional[JSON] = None, + specs: Optional[list[Spec]] = None, + access_policy: Optional[Union[SimpleAccessPolicy, DummyAccessPolicy]] = None, + npartitions: int = 1, + **kwargs: Any, + ) -> "TableAdapter": ddf = dask.dataframe.from_pandas(*args, npartitions=npartitions, **kwargs) if specs is None: specs = [Spec("dataframe")] @@ -44,11 +46,11 @@ def from_pandas( @classmethod def from_dask_dataframe( cls, - ddf, - metadata=None, - specs=None, - access_policy=None, - ) -> Self: + ddf: dask.dataframe.DataFrame, + metadata: Optional[JSON] = None, + specs: Optional[list[Spec]] = None, + access_policy: Optional[Union[DummyAccessPolicy, SimpleAccessPolicy]] = None, + ) -> "TableAdapter": structure = TableStructure.from_dask_dataframe(ddf) if specs is None: specs = [Spec("dataframe")] @@ -62,12 +64,12 @@ def from_dask_dataframe( def __init__( self, - partitions, + partitions: list[Any], structure: TableStructure, *, - metadata=None, - specs=None, - access_policy=None, + metadata: Optional[JSON] = None, + specs: Optional[list[Spec]] = None, + access_policy: Optional[Union[SimpleAccessPolicy, DummyAccessPolicy]] = None, ) -> None: self._metadata = metadata or {} self._partitions = list(partitions) @@ -75,26 +77,26 @@ def __init__( self.specs = specs or [] self.access_policy = access_policy - def __repr__(self): + def __repr__(self) -> str: return f"{type(self).__name__}({self._structure.columns!r})" - def __getitem__(self, key): + def __getitem__(self, key: str) -> ArrayAdapter: # Must compute to determine shape. return ArrayAdapter.from_array(self.read([key])[key].values) - def items(self): + def items(self) -> Iterator[tuple[str, ArrayAdapter]]: yield from ( (key, ArrayAdapter.from_array(self.read([key])[key].values)) for key in self._structure.columns ) - def metadata(self): + def metadata(self) -> JSON: return self._metadata def structure(self) -> TableStructure: return self._structure - def read(self, fields: Optional[list[str]] = None) -> pandas.DataFrame: + def read(self, fields: Optional[Union[str, list[str]]] = None) -> pandas.DataFrame: if any(p is None for p in self._partitions): raise ValueError("Not all partitions have been stored.") if isinstance(self._partitions[0], dask.dataframe.DataFrame): @@ -113,7 +115,9 @@ def read(self, fields: Optional[list[str]] = None) -> pandas.DataFrame: return df def read_partition( - self, partition: int, fields: Optional[list[str]] = None + self, + partition: Union[dask.dataframe.DataFrame, pandas.DataFrame], + fields: Optional[str] = None, ) -> pandas.DataFrame: partition = self._partitions[partition] if partition is None: diff --git a/tiled/adapters/tiff.py b/tiled/adapters/tiff.py index 29e093fc7..12e64d235 100644 --- a/tiled/adapters/tiff.py +++ b/tiled/adapters/tiff.py @@ -58,10 +58,10 @@ def __init__( ) self._structure = structure - def metadata(self) -> dict[Any, Any]: + def metadata(self) -> JSON: # This contains some enums, but Python's built-in JSON serializer # handles them fine (converting to str or int as appropriate). - d = {tag.name: tag.value for tag in self._file.pages[0].tags.values()} # type: ignore + d = {tag.name: tag.value for tag in self._file.pages[0].tags.values()} d.update(self._provided_metadata) return d @@ -116,7 +116,7 @@ def from_uris( def __init__( self, - seq: Any, + seq: tifffile.TiffSequence, *, structure: Optional[ArrayStructure] = None, metadata: Optional[JSON] = None, diff --git a/tiled/adapters/type_alliases.py b/tiled/adapters/type_alliases.py index ea59b6fe1..0e3760f32 100644 --- a/tiled/adapters/type_alliases.py +++ b/tiled/adapters/type_alliases.py @@ -2,4 +2,3 @@ JSON = dict[str, "JSON"] | list["JSON"] | str | int | float | bool | None Spec = TypedDict({"name": str, "version": str}) -HDF5 = dict[str, "JSON"] | list["JSON"] | str | int | float | bool | None diff --git a/tiled/adapters/utils.py b/tiled/adapters/utils.py index b5382bf9e..73173bbb8 100644 --- a/tiled/adapters/utils.py +++ b/tiled/adapters/utils.py @@ -1,4 +1,5 @@ import warnings +from typing import Any # for back-compat from ..utils import node_repr as tree_repr # noqa: F401 @@ -16,18 +17,23 @@ class IndexersMixin: This is just for back-ward compatiblity. """ + keys: Any + values: Any + items: Any + fn: Any + @property - def keys_indexer(self): + def keys_indexer(self) -> Any: warnings.warn(_MESSAGE.format(name="keys"), DeprecationWarning) return self.keys() @property - def values_indexer(self): + def values_indexer(self) -> Any: warnings.warn(_MESSAGE.format(name="values"), DeprecationWarning) return self.values() @property - def items_indexer(self): + def items_indexer(self) -> Any: warnings.warn(_MESSAGE.format(name="items"), DeprecationWarning) return self.items() @@ -50,8 +56,8 @@ class IndexCallable: __slots__ = ("fn",) - def __init__(self, fn): + def __init__(self, fn: Any) -> None: self.fn = fn - def __getitem__(self, key): + def __getitem__(self, key: str) -> Any: return self.fn(key) diff --git a/tiled/adapters/xarray.py b/tiled/adapters/xarray.py index cc44e5040..c7e2c2013 100644 --- a/tiled/adapters/xarray.py +++ b/tiled/adapters/xarray.py @@ -1,6 +1,6 @@ import collections.abc import itertools -from typing import Any, Iterable, Iterator, Optional, Union +from typing import Any, Iterator, Optional, Union import xarray diff --git a/tiled/adapters/zarr.py b/tiled/adapters/zarr.py index fb4b18b54..a00d32bcb 100644 --- a/tiled/adapters/zarr.py +++ b/tiled/adapters/zarr.py @@ -4,6 +4,8 @@ from types import EllipsisType from typing import Any, Iterator, Optional, Tuple, Union +import dask +import pandas import zarr.core import zarr.hierarchy import zarr.storage @@ -82,7 +84,9 @@ def read_block( return self._array[self._stencil()][block_slice][slice] def write( - self, data: NDArray[Any], slice: Optional[Union[slice, EllipsisType]] + self, + data: Union[dask.dataframe.DataFrame, pandas.DataFrame], + slice: Optional[Union[slice, EllipsisType]], ) -> None: if slice is not ...: raise NotImplementedError @@ -90,7 +94,7 @@ def write( async def write_block( self, - data: NDArray[Any], + data: Union[dask.dataframe.DataFrame, pandas.DataFrame], block: Tuple[int, ...], slice: Optional[Union[slice, EllipsisType]], ) -> None: