From b948088607956f9e8afb02f61ec25cbbe58f5c38 Mon Sep 17 00:00:00 2001 From: Seher Karakuzu Date: Fri, 5 Apr 2024 11:04:22 -0400 Subject: [PATCH] some more typing added --- tiled/adapters/awkward.py | 71 ++++++++++++++++++++----- tiled/adapters/csv.py | 12 +++-- tiled/adapters/excel.py | 6 ++- tiled/adapters/hdf5.py | 12 ++--- tiled/adapters/netcdf.py | 5 +- tiled/adapters/parquet.py | 6 ++- tiled/adapters/sparse.py | 71 +++++++++++++++---------- tiled/adapters/sparse_blocks_parquet.py | 45 ++++++++++------ tiled/adapters/table.py | 51 ++++++++++-------- tiled/adapters/tiff.py | 6 +-- tiled/adapters/type_alliases.py | 1 - tiled/adapters/utils.py | 16 ++++-- tiled/adapters/xarray.py | 2 +- tiled/adapters/zarr.py | 8 ++- 14 files changed, 208 insertions(+), 104 deletions(-) diff --git a/tiled/adapters/awkward.py b/tiled/adapters/awkward.py index bc1d491d4..eea558f99 100644 --- a/tiled/adapters/awkward.py +++ b/tiled/adapters/awkward.py @@ -1,5 +1,7 @@ import awkward import awkward.forms +from numpy.typing import NDArray +from type_alliases import JSON from ..structures.awkward import AwkwardStructure from ..structures.core import StructureFamily @@ -10,12 +12,22 @@ class AwkwardAdapter: def __init__( self, - container, - structure, - metadata=None, - specs=None, - access_policy=None, - ): + container: DirectoryContainer, + structure: AwkwardStructure, + metadata: Optional[dict[str, Any]] = None, + specs: Optional[List[str]] = None, + access_policy: Optional[Union[DummyAccessPolicy, SimpleAccessPolicy]] = None, + ) -> None: + """ + + Parameters + ---------- + container : + structure : + metadata : + specs : + access_policy : + """ self.container = container self._metadata = metadata or {} self._structure = structure @@ -23,7 +35,26 @@ def __init__( self.access_policy = access_policy @classmethod - def from_array(cls, array, metadata=None, specs=None, access_policy=None): + def from_array( + cls, + array: NDArray[Any], + metadata: Optional[dict[str, str]] = None, + specs: Optional[List[str]] = None, + access_policy: Optional[Union[DummyAccessPolicy, SimpleAccessPolicy]] = None, + ) -> Self: + """ + + Parameters + ---------- + array : + metadata : + specs : + access_policy : + + Returns + ------- + + """ form, length, container = awkward.to_buffers(array) structure = AwkwardStructure(length=length, form=form.to_dict()) return cls( @@ -34,10 +65,26 @@ def from_array(cls, array, metadata=None, specs=None, access_policy=None): access_policy=access_policy, ) - def metadata(self): + def metadata(self) -> JSON: + """ + + Returns + ------- + + """ return self._metadata - def read_buffers(self, form_keys=None): + def read_buffers(self, form_keys: Optional[list[str]] = None) -> dict[Any, bytes]: + """ + + Parameters + ---------- + form_keys : + + Returns + ------- + + """ form = awkward.forms.from_dict(self._structure.form) keys = [ key @@ -50,12 +97,12 @@ def read_buffers(self, form_keys=None): buffers[key] = self.container[key] return buffers - def read(self): + def read(self) -> JSON: return dict(self.container) - def write(self, container): + def write(self, container: DirectoryContainer) -> None: for form_key, value in container.items(): self.container[form_key] = value - def structure(self): + def structure(self) -> AwkwardStructure: return self._structure diff --git a/tiled/adapters/csv.py b/tiled/adapters/csv.py index 92cff5ea0..a8e6b7252 100644 --- a/tiled/adapters/csv.py +++ b/tiled/adapters/csv.py @@ -28,7 +28,7 @@ def read_csv( data_uri: str, structure: Optional[TableStructure] = None, metadata: Optional[JSON] = None, - specs: Optional[List[str]] = None, + specs: Optional[List[Spec]] = None, access_policy: Optional[Union[DummyAccessPolicy, SimpleAccessPolicy]] = None, **kwargs: Any, ) -> TableAdapter: @@ -114,15 +114,19 @@ def init_storage(cls, data_uri: str, structure: TableStructure) -> Any: ] return assets - def append_partition(self, data: Any, partition: int) -> None: + def append_partition( + self, data: Union[dask.dataframe.DataFrame, pandas.DataFrame], partition: int + ) -> None: uri = self._partition_paths[partition] data.to_csv(uri, index=False, mode="a", header=False) - def write_partition(self, data: Any, partition: int) -> None: + def write_partition( + self, data: Union[dask.dataframe.DataFrame, pandas.DataFrame], partition: int + ) -> None: uri = self._partition_paths[partition] data.to_csv(uri, index=False) - def write(self, data: Any) -> None: + def write(self, data: Union[dask.dataframe.DataFrame, pandas.DataFrame]) -> None: if self.structure().npartitions != 1: raise NotImplementedError uri = self._partition_paths[0] diff --git a/tiled/adapters/excel.py b/tiled/adapters/excel.py index ed9418f52..eb1318aa4 100644 --- a/tiled/adapters/excel.py +++ b/tiled/adapters/excel.py @@ -1,3 +1,5 @@ +from typing import Any + import dask.dataframe import pandas @@ -7,7 +9,7 @@ class ExcelAdapter(MapAdapter): @classmethod - def from_file(cls, file, **kwargs): + def from_file(cls, file: Any, **kwargs: Any) -> "ExcelAdapter": """ Read the sheets in an Excel file. @@ -43,7 +45,7 @@ def from_file(cls, file, **kwargs): return cls(mapping, **kwargs) @classmethod - def from_uri(cls, data_uri, **kwargs): + def from_uri(cls, data_uri: str, **kwargs: Any) -> "ExcelAdapter": """ Read the sheets in an Excel file. diff --git a/tiled/adapters/hdf5.py b/tiled/adapters/hdf5.py index e93eb9aed..81a99310f 100644 --- a/tiled/adapters/hdf5.py +++ b/tiled/adapters/hdf5.py @@ -7,7 +7,7 @@ import h5py import numpy from numpy._typing import NDArray -from type_alliases import HDF5, Spec +from type_alliases import JSON, Spec from ..access_policies import DummyAccessPolicy, SimpleAccessPolicy from ..adapters.utils import IndexersMixin @@ -63,7 +63,7 @@ def __init__( node: Any, *, structure: Optional[TableStructure] = None, - metadata: Optional[HDF5] = None, + metadata: Optional[JSON] = None, specs: Optional[list[Spec]] = None, access_policy: Optional[Union[SimpleAccessPolicy, DummyAccessPolicy]] = None, ) -> None: @@ -79,7 +79,7 @@ def from_file( file: Any, *, structure: Optional[TableStructure] = None, - metadata: HDF5 = None, + metadata: JSON = None, swmr: bool = SWMR_DEFAULT, libver: str = "latest", specs: Optional[List[Spec]] = None, @@ -93,7 +93,7 @@ def from_uri( data_uri: Union[str, list[str]], *, structure: Optional[TableStructure] = None, - metadata: Optional[HDF5] = None, + metadata: Optional[JSON] = None, swmr: bool = SWMR_DEFAULT, libver: str = "latest", specs: Optional[list[Spec]] = None, @@ -116,7 +116,7 @@ def access_policy(self) -> Optional[Union[SimpleAccessPolicy, DummyAccessPolicy] def structure(self) -> None: return None - def metadata(self) -> HDF5: + def metadata(self) -> JSON: d = dict(self._node.attrs) for k, v in list(d.items()): # Convert any bytes to str. @@ -201,7 +201,7 @@ def hdf5_lookup( data_uri: Union[str, list[str]], *, structure: Optional[TableStructure] = None, - metadata: Optional[HDF5] = None, + metadata: Optional[JSON] = None, swmr: bool = SWMR_DEFAULT, libver: str = "latest", specs: Optional[List[Spec]] = None, diff --git a/tiled/adapters/netcdf.py b/tiled/adapters/netcdf.py index 60f7f4d29..9111e7738 100644 --- a/tiled/adapters/netcdf.py +++ b/tiled/adapters/netcdf.py @@ -1,8 +1,11 @@ +from pathlib import Path +from typing import Union + import xarray from .xarray import DatasetAdapter -def read_netcdf(filepath): +def read_netcdf(filepath: Union[str, list[str], Path]) -> DatasetAdapter: ds = xarray.open_dataset(filepath, decode_times=False) return DatasetAdapter.from_dataset(ds) diff --git a/tiled/adapters/parquet.py b/tiled/adapters/parquet.py index 100814218..09291f0f8 100644 --- a/tiled/adapters/parquet.py +++ b/tiled/adapters/parquet.py @@ -63,11 +63,13 @@ def init_storage( ] return assets - def write_partition(self, data: Any, partition: int) -> None: + def write_partition( + self, data: Union[dask.dataframe.DataFrame, pandas.DataFrame], partition: int + ) -> None: uri = self._partition_paths[partition] data.to_parquet(uri) - def write(self, data: Any) -> None: + def write(self, data: Union[dask.dataframe.DataFrame, pandas.DataFrame]) -> None: if self.structure().npartitions != 1: raise NotImplementedError uri = self._partition_paths[0] diff --git a/tiled/adapters/sparse.py b/tiled/adapters/sparse.py index 7e3c5afcf..bca72143c 100644 --- a/tiled/adapters/sparse.py +++ b/tiled/adapters/sparse.py @@ -1,6 +1,13 @@ +from typing import Any, Optional, Tuple, Union + +import dask import numpy +import pandas import sparse +from numpy._typing import NDArray +from type_alliases import JSON, Spec +from ..access_policies import DummyAccessPolicy, SimpleAccessPolicy from ..structures.core import StructureFamily from ..structures.sparse import COOStructure from .array import slice_and_shape_from_block_and_chunks @@ -13,14 +20,14 @@ class COOAdapter: @classmethod def from_arrays( cls, - coords, - data, - shape, - dims=None, - metadata=None, - specs=None, - access_policy=None, - ): + coords: NDArray[Any], + data: Union[dask.dataframe.DataFrame, pandas.DataFrame], + shape: Tuple[int, ...], + dims: Optional[Tuple[str, ...]] = None, + metadata: Optional[JSON] = None, + specs: Optional[list[Spec]] = None, + access_policy: Optional[Union[SimpleAccessPolicy, DummyAccessPolicy]] = None, + ) -> "COOAdapter": """ Simplest constructor. Single chunk from coords, data arrays. """ @@ -39,7 +46,15 @@ def from_arrays( ) @classmethod - def from_coo(cls, coo, *, dims=None, metadata=None, specs=None, access_policy=None): + def from_coo( + cls, + coo: sparse.COO, + *, + dims: Optional[Tuple[str, ...]] = None, + metadata: Optional[JSON] = None, + specs: Optional[list[Spec]] = None, + access_policy: Optional[Union[SimpleAccessPolicy, DummyAccessPolicy]] = None, + ) -> "COOAdapter": "Construct from sparse.COO object." return cls.from_arrays( coords=coo.coords, @@ -54,15 +69,15 @@ def from_coo(cls, coo, *, dims=None, metadata=None, specs=None, access_policy=No @classmethod def from_global_ref( cls, - blocks, - shape, - chunks, + blocks: dict[Tuple[int, ...], Tuple[NDArray[Any], Any]], + shape: Tuple[int, ...], + chunks: Tuple[Tuple[int, ...], ...], *, - dims=None, - metadata=None, - specs=None, - access_policy=None, - ): + dims: Optional[Tuple[str, ...]] = None, + metadata: Optional[JSON] = None, + specs: Optional[list[Spec]] = None, + access_policy: Optional[Union[SimpleAccessPolicy, DummyAccessPolicy]] = None, + ) -> "COOAdapter": """ Construct from blocks with coords given in global reference frame. """ @@ -90,13 +105,13 @@ def from_global_ref( def __init__( self, - blocks, - structure, + blocks: dict[Tuple[int, ...], Tuple[NDArray[Any], Any]], + structure: COOStructure, *, - metadata=None, - specs=None, - access_policy=None, - ): + metadata: Optional[JSON] = None, + specs: Optional[list[Spec]] = None, + access_policy: Optional[Union[SimpleAccessPolicy, DummyAccessPolicy]] = None, + ) -> None: """ Construct from blocks with coords given in block-local reference frame. """ @@ -106,13 +121,15 @@ def __init__( self.specs = specs or [] self.access_policy = access_policy - def metadata(self): + def metadata(self) -> JSON: return self._metadata - def structure(self): + def structure(self) -> COOStructure: return self._structure - def read_block(self, block, slice=None): + def read_block( + self, block: Tuple[int, ...], slice: Optional[Union[int, slice]] = None + ) -> NDArray[Any]: coords, data = self.blocks[block] _, shape = slice_and_shape_from_block_and_chunks(block, self._structure.chunks) arr = sparse.COO(data=data[:], coords=coords[:], shape=shape) @@ -120,7 +137,7 @@ def read_block(self, block, slice=None): arr = arr[slice] return arr - def read(self, slice=None): + def read(self, slice: Optional[Union[int, slice]] = None) -> NDArray[Any]: all_coords = [] all_data = [] for block, (coords, data) in self.blocks.items(): diff --git a/tiled/adapters/sparse_blocks_parquet.py b/tiled/adapters/sparse_blocks_parquet.py index da479ce73..eb106d42c 100644 --- a/tiled/adapters/sparse_blocks_parquet.py +++ b/tiled/adapters/sparse_blocks_parquet.py @@ -1,15 +1,22 @@ import itertools +from typing import Any, Optional, Tuple, Union +import dask.base +import dask.dataframe import numpy import pandas import sparse +from numpy._typing import NDArray +from ..access_policies import DummyAccessPolicy, SimpleAccessPolicy from ..adapters.array import slice_and_shape_from_block_and_chunks from ..structures.core import StructureFamily +from ..structures.sparse import COOStructure from ..utils import path_from_uri +from .type_alliases import JSON, Spec -def load_block(uri): +def load_block(uri: str) -> Tuple[list[int], Tuple[NDArray[Any], Any]]: # TODO This can be done without pandas. # Better to use a plain I/O library. df = pandas.read_parquet(path_from_uri(uri)) @@ -23,12 +30,12 @@ class SparseBlocksParquetAdapter: def __init__( self, - data_uris, - structure, - metadata=None, - specs=None, - access_policy=None, - ): + data_uris: Union[str, list[str]], + structure: COOStructure, + metadata: Optional[JSON] = None, + specs: Optional[list[Spec]] = None, + access_policy: Optional[Union[SimpleAccessPolicy, DummyAccessPolicy]] = None, + ) -> None: num_blocks = (range(len(n)) for n in structure.chunks) self.blocks = {} for block, uri in zip(itertools.product(*num_blocks), data_uris): @@ -41,9 +48,9 @@ def __init__( @classmethod def init_storage( cls, - data_uri, - structure, - ): + data_uri: Union[str, list[str]], + structure: COOStructure, + ) -> Any: from ..server.schemas import Asset directory = path_from_uri(data_uri) @@ -61,20 +68,24 @@ def init_storage( ] return assets - def metadata(self): + def metadata(self) -> JSON: return self._metadata - def write_block(self, data, block): + def write_block( + self, + data: Union[dask.dataframe.DataFrame, pandas.DataFrame], + block: Tuple[int, ...], + ) -> None: uri = self.blocks[block] data.to_parquet(path_from_uri(uri)) - def write(self, data): + def write(self, data: Union[dask.dataframe.DataFrame, pandas.DataFrame]) -> None: if len(self.blocks) > 1: raise NotImplementedError uri = self.blocks[(0,) * len(self._structure.shape)] data.to_parquet(path_from_uri(uri)) - def read(self, slice=...): + def read(self, slice: Optional[Union[int, slice]]) -> NDArray[Any]: all_coords = [] all_data = [] for block, uri in self.blocks.items(): @@ -93,11 +104,13 @@ def read(self, slice=...): ) return arr[slice] - def read_block(self, block, slice=...): + def read_block( + self, block: Tuple[int, ...], slice: Optional[Union[int, slice]] + ) -> NDArray[Any]: coords, data = load_block(self.blocks[block]) _, shape = slice_and_shape_from_block_and_chunks(block, self._structure.chunks) arr = sparse.COO(data=data[:], coords=coords[:], shape=shape) return arr[slice] - def structure(self): + def structure(self) -> COOStructure: return self._structure diff --git a/tiled/adapters/table.py b/tiled/adapters/table.py index 33cded17f..b1d1e078f 100644 --- a/tiled/adapters/table.py +++ b/tiled/adapters/table.py @@ -1,9 +1,14 @@ +from typing import Any, Iterator, Optional, Union + import dask.base from typing import Optional, Self import dask.dataframe import pandas +from type_alliases import JSON +from ..access_policies import DummyAccessPolicy, SimpleAccessPolicy +from ..server.object_cache import get_object_cache from ..structures.core import Spec, StructureFamily from ..structures.table import TableStructure from .array import ArrayAdapter @@ -26,13 +31,13 @@ class TableAdapter: @classmethod def from_pandas( cls, - *args, - metadata=None, - specs=None, - access_policy=None, - npartitions=1, - **kwargs, - ): + *args: Any, + metadata: Optional[JSON] = None, + specs: Optional[list[Spec]] = None, + access_policy: Optional[Union[SimpleAccessPolicy, DummyAccessPolicy]] = None, + npartitions: int = 1, + **kwargs: Any, + ) -> "TableAdapter": ddf = dask.dataframe.from_pandas(*args, npartitions=npartitions, **kwargs) if specs is None: specs = [Spec("dataframe")] @@ -43,11 +48,11 @@ def from_pandas( @classmethod def from_dask_dataframe( cls, - ddf, - metadata=None, - specs=None, - access_policy=None, - ) -> Self: + ddf: dask.dataframe.DataFrame, + metadata: Optional[JSON] = None, + specs: Optional[list[Spec]] = None, + access_policy: Optional[Union[DummyAccessPolicy, SimpleAccessPolicy]] = None, + ) -> "TableAdapter": structure = TableStructure.from_dask_dataframe(ddf) if specs is None: specs = [Spec("dataframe")] @@ -61,12 +66,12 @@ def from_dask_dataframe( def __init__( self, - partitions, + partitions: list[Any], structure: TableStructure, *, - metadata=None, - specs=None, - access_policy=None, + metadata: Optional[JSON] = None, + specs: Optional[list[Spec]] = None, + access_policy: Optional[Union[SimpleAccessPolicy, DummyAccessPolicy]] = None, ) -> None: self._metadata = metadata or {} self._partitions = list(partitions) @@ -74,26 +79,26 @@ def __init__( self.specs = specs or [] self.access_policy = access_policy - def __repr__(self): + def __repr__(self) -> str: return f"{type(self).__name__}({self._structure.columns!r})" - def __getitem__(self, key): + def __getitem__(self, key: str) -> ArrayAdapter: # Must compute to determine shape. return ArrayAdapter.from_array(self.read([key])[key].values) - def items(self): + def items(self) -> Iterator[tuple[str, ArrayAdapter]]: yield from ( (key, ArrayAdapter.from_array(self.read([key])[key].values)) for key in self._structure.columns ) - def metadata(self): + def metadata(self) -> JSON: return self._metadata def structure(self) -> TableStructure: return self._structure - def read(self, fields: Optional[list[str]] = None) -> pandas.DataFrame: + def read(self, fields: Optional[Union[str, list[str]]] = None) -> pandas.DataFrame: if any(p is None for p in self._partitions): raise ValueError("Not all partitions have been stored.") if isinstance(self._partitions[0], dask.dataframe.DataFrame): @@ -110,7 +115,9 @@ def read(self, fields: Optional[list[str]] = None) -> pandas.DataFrame: return df def read_partition( - self, partition: int, fields: Optional[list[str]] = None + self, + partition: Union[dask.dataframe.DataFrame, pandas.DataFrame], + fields: Optional[str] = None, ) -> pandas.DataFrame: partition = self._partitions[partition] if partition is None: diff --git a/tiled/adapters/tiff.py b/tiled/adapters/tiff.py index bd5b16b04..51f53a16c 100644 --- a/tiled/adapters/tiff.py +++ b/tiled/adapters/tiff.py @@ -59,10 +59,10 @@ def __init__( ) self._structure = structure - def metadata(self) -> dict[Any, Any]: + def metadata(self) -> JSON: # This contains some enums, but Python's built-in JSON serializer # handles them fine (converting to str or int as appropriate). - d = {tag.name: tag.value for tag in self._file.pages[0].tags.values()} # type: ignore + d = {tag.name: tag.value for tag in self._file.pages[0].tags.values()} d.update(self._provided_metadata) return d @@ -117,7 +117,7 @@ def from_uris( def __init__( self, - seq: Any, + seq: tifffile.TiffSequence, *, structure: Optional[ArrayStructure] = None, metadata: Optional[JSON] = None, diff --git a/tiled/adapters/type_alliases.py b/tiled/adapters/type_alliases.py index ea59b6fe1..0e3760f32 100644 --- a/tiled/adapters/type_alliases.py +++ b/tiled/adapters/type_alliases.py @@ -2,4 +2,3 @@ JSON = dict[str, "JSON"] | list["JSON"] | str | int | float | bool | None Spec = TypedDict({"name": str, "version": str}) -HDF5 = dict[str, "JSON"] | list["JSON"] | str | int | float | bool | None diff --git a/tiled/adapters/utils.py b/tiled/adapters/utils.py index b5382bf9e..73173bbb8 100644 --- a/tiled/adapters/utils.py +++ b/tiled/adapters/utils.py @@ -1,4 +1,5 @@ import warnings +from typing import Any # for back-compat from ..utils import node_repr as tree_repr # noqa: F401 @@ -16,18 +17,23 @@ class IndexersMixin: This is just for back-ward compatiblity. """ + keys: Any + values: Any + items: Any + fn: Any + @property - def keys_indexer(self): + def keys_indexer(self) -> Any: warnings.warn(_MESSAGE.format(name="keys"), DeprecationWarning) return self.keys() @property - def values_indexer(self): + def values_indexer(self) -> Any: warnings.warn(_MESSAGE.format(name="values"), DeprecationWarning) return self.values() @property - def items_indexer(self): + def items_indexer(self) -> Any: warnings.warn(_MESSAGE.format(name="items"), DeprecationWarning) return self.items() @@ -50,8 +56,8 @@ class IndexCallable: __slots__ = ("fn",) - def __init__(self, fn): + def __init__(self, fn: Any) -> None: self.fn = fn - def __getitem__(self, key): + def __getitem__(self, key: str) -> Any: return self.fn(key) diff --git a/tiled/adapters/xarray.py b/tiled/adapters/xarray.py index cc44e5040..c7e2c2013 100644 --- a/tiled/adapters/xarray.py +++ b/tiled/adapters/xarray.py @@ -1,6 +1,6 @@ import collections.abc import itertools -from typing import Any, Iterable, Iterator, Optional, Union +from typing import Any, Iterator, Optional, Union import xarray diff --git a/tiled/adapters/zarr.py b/tiled/adapters/zarr.py index fb4b18b54..a00d32bcb 100644 --- a/tiled/adapters/zarr.py +++ b/tiled/adapters/zarr.py @@ -4,6 +4,8 @@ from types import EllipsisType from typing import Any, Iterator, Optional, Tuple, Union +import dask +import pandas import zarr.core import zarr.hierarchy import zarr.storage @@ -82,7 +84,9 @@ def read_block( return self._array[self._stencil()][block_slice][slice] def write( - self, data: NDArray[Any], slice: Optional[Union[slice, EllipsisType]] + self, + data: Union[dask.dataframe.DataFrame, pandas.DataFrame], + slice: Optional[Union[slice, EllipsisType]], ) -> None: if slice is not ...: raise NotImplementedError @@ -90,7 +94,7 @@ def write( async def write_block( self, - data: NDArray[Any], + data: Union[dask.dataframe.DataFrame, pandas.DataFrame], block: Tuple[int, ...], slice: Optional[Union[slice, EllipsisType]], ) -> None: