diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 000000000..f20c8ea05 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,3 @@ +### Checklist +- [ ] Add a Changelog entry +- [ ] Add the ticket number which this PR closes to the comment section diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 1743ab7cf..3c21b69d0 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.8] + python-version: ["3.12"] fail-fast: false steps: diff --git a/.github/workflows/publish-docs.yml b/.github/workflows/publish-docs.yml index 394f01f9b..cbc9a5be9 100644 --- a/.github/workflows/publish-docs.yml +++ b/.github/workflows/publish-docs.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.8] + python-version: ["3.12"] fail-fast: false steps: diff --git a/.mypy.ini b/.mypy.ini new file mode 100644 index 000000000..fd2f013a1 --- /dev/null +++ b/.mypy.ini @@ -0,0 +1,26 @@ +[mypy] +ignore_errors = True +ignore_missing_imports = True +disallow_untyped_defs = False +disallow_untyped_calls = False +check_untyped_defs = False +disallow_incomplete_defs = False + + +[mypy-tiled.adapters.*] +ignore_errors = False +ignore_missing_imports = False +check_untyped_defs = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +disallow_untyped_calls = True +disallow_untyped_decorators = True + +[mypy-tiled._tests.test_protocols] +ignore_errors = False +ignore_missing_imports = False +check_untyped_defs = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +disallow_untyped_calls = True +disallow_untyped_decorators = True diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8b46f9f86..503573beb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,3 +27,9 @@ repos: rev: 23.10.1 hooks: - id: black + +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.9.0 + hooks: + - id: mypy + args: [--strict] diff --git a/CHANGELOG.md b/CHANGELOG.md index ed707989f..e9aaa8b73 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ Write the date in place of the "Unreleased" in the case a new version is release - The `tiled serve ...` CLI commands now accept a `--log-config` option, pointing to a custom uvicorn logging configuration file. An example file was added to the repository root, `example_log_config.yml`. +- Added `tiled.adapters.protocols` which will provide possibility for user to + implement their custom adapters in a way that satisfies mypy. ### Changed diff --git a/pyproject.toml b/pyproject.toml index 463e9153c..125e0f4e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -153,6 +153,7 @@ dev = [ "pytest <8", # TMP pin while plugins catch up "pytest-asyncio", "pytest-rerunfailures", + "pytest-mock", "sphinx !=4.1.0, !=4.1.1, !=4.1.2, !=4.2.0", # These are dependencies of various sphinx extensions for documentation. "ipython", diff --git a/tiled/_tests/test_catalog.py b/tiled/_tests/test_catalog.py index 22daa33da..51c8df69b 100644 --- a/tiled/_tests/test_catalog.py +++ b/tiled/_tests/test_catalog.py @@ -283,7 +283,8 @@ async def test_write_array_internal_direct(a, tmpdir): ) x = await a.lookup_adapter(["x"]) await x.write(arr) - assert numpy.array_equal(await x.read(), arr) + val = await x.read() + assert numpy.array_equal(val, arr) def test_write_array_internal_via_client(client): diff --git a/tiled/_tests/test_protocols.py b/tiled/_tests/test_protocols.py new file mode 100644 index 000000000..e54921cd5 --- /dev/null +++ b/tiled/_tests/test_protocols.py @@ -0,0 +1,420 @@ +from pathlib import Path +from typing import Any, Dict, List, Literal, Optional, Tuple, Union + +import dask.dataframe +import numpy +import pandas +import sparse +from numpy.typing import NDArray +from pytest_mock import MockFixture + +from tiled.access_policies import ALL_ACCESS, ALL_SCOPES +from tiled.adapters.awkward_directory_container import DirectoryContainer +from tiled.adapters.protocols import ( + AccessPolicy, + ArrayAdapter, + AwkwardAdapter, + BaseAdapter, + SparseAdapter, + TableAdapter, +) +from tiled.adapters.type_alliases import JSON, Filters, NDSlice, Scopes +from tiled.server.schemas import Principal, PrincipalType +from tiled.structures.array import ArrayStructure, BuiltinDtype +from tiled.structures.awkward import AwkwardStructure +from tiled.structures.core import Spec, StructureFamily +from tiled.structures.sparse import COOStructure +from tiled.structures.table import TableStructure + + +class CustomArrayAdapter: + structure_family: Literal[StructureFamily.array] = StructureFamily.array + + def __init__( + self, + array: NDArray[Any], + structure: ArrayStructure, + metadata: Optional[JSON] = None, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + ) -> None: + self._array = array + self._structure = structure + self._metadata = metadata or {} + self._specs = specs or [] + + def structure(self) -> ArrayStructure: + return self._structure + + def read(self, slice: NDSlice) -> NDArray[Any]: + return self._array + + def read_block(self, block: Tuple[int, ...]) -> NDArray[Any]: + return self._array + + def specs(self) -> List[Spec]: + return self._specs + + def metadata(self) -> JSON: + return self._metadata + + +def arrayadapter_protocol_functions( + adapter: ArrayAdapter, slice: NDSlice, block: Tuple[int, ...] +) -> None: + adapter.structure() + adapter.read(slice) + adapter.read_block(block) + adapter.specs() + adapter.metadata() + + +def test_arrayadapter_protocol(mocker: MockFixture) -> None: + mock_call = mocker.patch.object(CustomArrayAdapter, "structure") + mock_call2 = mocker.patch.object(CustomArrayAdapter, "read") + mock_call3 = mocker.patch.object(CustomArrayAdapter, "read_block") + mock_call4 = mocker.patch.object(CustomArrayAdapter, "specs") + mock_call5 = mocker.patch.object(CustomArrayAdapter, "metadata") + + structure = ArrayStructure( + data_type=BuiltinDtype.from_numpy_dtype(numpy.dtype("int32")), + shape=(2, 512, 512), + chunks=((1, 1), (512,), (512,)), + dims=("time", "x", "y"), # optional + ) + + array = numpy.random.rand(2, 512, 512) + metadata: JSON = {"foo": "bar"} + anyslice = (1, 1, 1) + anyblock = (1, 1, 1) + + anyarrayadapter = CustomArrayAdapter(array, structure, metadata=metadata) + assert anyarrayadapter.structure_family == StructureFamily.array + + arrayadapter_protocol_functions(anyarrayadapter, anyslice, anyblock) + mock_call.assert_called_once() + mock_call2.assert_called_once_with(anyslice) + mock_call3.assert_called_once_with(anyblock) + mock_call4.assert_called_once() + mock_call5.assert_called_once() + + +class CustomAwkwardAdapter: + structure_family: Literal[StructureFamily.awkward] = StructureFamily.awkward + + def __init__( + self, + container: DirectoryContainer, + structure: AwkwardStructure, + metadata: Optional[JSON] = None, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + ) -> None: + self.container = container + self._metadata = metadata or {} + self._structure = structure + self._specs = list(specs or []) + self.access_policy = access_policy + + def structure(self) -> AwkwardStructure: + return self._structure + + def read(self) -> NDArray[Any]: + return numpy.random.rand(4) + + def read_buffers(self, form_keys: Optional[List[str]] = None) -> Dict[str, Any]: + return {"a": 123} + + def write(self, container: DirectoryContainer) -> None: + return None + + def specs(self) -> List[Spec]: + return self._specs + + def metadata(self) -> JSON: + return self._metadata + + +def awkwardadapter_protocol_functions( + adapter: AwkwardAdapter, + slice: NDSlice, + form_keys: Optional[List[str]], + container: DirectoryContainer, +) -> None: + adapter.structure() + adapter.read() + adapter.read_buffers(form_keys) + adapter.write(container) + adapter.specs() + adapter.metadata() + + +def test_awkwardadapter_protocol(mocker: MockFixture) -> None: + mock_call = mocker.patch.object(CustomAwkwardAdapter, "structure") + mock_call2 = mocker.patch.object(CustomAwkwardAdapter, "read") + mock_call3 = mocker.patch.object(CustomAwkwardAdapter, "read_buffers") + mock_call4 = mocker.patch.object(CustomAwkwardAdapter, "write") + mock_call5 = mocker.patch.object(CustomAwkwardAdapter, "specs") + mock_call6 = mocker.patch.object(CustomAwkwardAdapter, "metadata") + + structure = AwkwardStructure(length=2, form={"a": "b"}) + + metadata: JSON = {"foo": "bar"} + anyslice = (1, 1, 1) + container = DirectoryContainer(directory=Path("somedirectory"), form={}) + form_keys = ["a", "b", "c"] + + anyawkwardadapter = CustomAwkwardAdapter(container, structure, metadata=metadata) + + assert anyawkwardadapter.structure_family == StructureFamily.awkward + + awkwardadapter_protocol_functions(anyawkwardadapter, anyslice, form_keys, container) + mock_call.assert_called_once() + mock_call2.assert_called_once() + mock_call3.assert_called_once_with(form_keys) + mock_call4.assert_called_once_with(container) + mock_call5.assert_called_once() + mock_call6.assert_called_once() + + +class CustomSparseAdapter: + structure_family: Literal[StructureFamily.sparse] = StructureFamily.sparse + + def __init__( + self, + blocks: Dict[Tuple[int, ...], Tuple[NDArray[Any], Any]], + structure: COOStructure, + *, + metadata: Optional[JSON] = None, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + ) -> None: + """ + Construct from blocks with coords given in block-local reference frame. + Parameters + ---------- + blocks : + structure : + metadata : + specs : + access_policy : + """ + self.blocks = blocks + self._metadata = metadata or {} + self._structure = structure + self._specs = specs or [] + self.access_policy = access_policy + + all_coords = [[1], [1]] + all_data = [[1], [1]] + + self.arr = sparse.COO( + data=numpy.concatenate(all_data), + coords=numpy.concatenate(all_coords, axis=-1), + shape=self._structure.shape, + ) + + def structure(self) -> COOStructure: + return self._structure + + def read(self, slice: NDSlice) -> sparse.COO: + return self.arr + + def read_block(self, block: Tuple[int, ...]) -> sparse.COO: + return self.arr + + def specs(self) -> List[Spec]: + return self._specs + + def metadata(self) -> JSON: + return self._metadata + + +def sparseadapter_protocol_functions( + adapter: SparseAdapter, slice: NDSlice, block: Tuple[int, ...] +) -> None: + adapter.structure() + adapter.read(slice) + adapter.read_block(block) + adapter.specs() + adapter.metadata() + + +def test_sparseadapter_protocol(mocker: MockFixture) -> None: + mock_call = mocker.patch.object(CustomSparseAdapter, "structure") + mock_call2 = mocker.patch.object(CustomSparseAdapter, "read") + mock_call3 = mocker.patch.object(CustomSparseAdapter, "read_block") + mock_call4 = mocker.patch.object(CustomSparseAdapter, "specs") + mock_call5 = mocker.patch.object(CustomSparseAdapter, "metadata") + + structure = COOStructure(shape=(2 * 5,), chunks=((5, 5),)) + + array = numpy.random.rand(2, 512, 512) + blocks: Dict[Tuple[int, ...], Tuple[NDArray[Any], Any]] = {(1,): (array, (1,))} + metadata: JSON = {"foo": "bar"} + anyslice = (1, 1, 1) + anyblock = (1, 1, 1) + + anysparseadapter = CustomSparseAdapter(blocks, structure, metadata=metadata) + assert anysparseadapter.structure_family == StructureFamily.sparse + + sparseadapter_protocol_functions(anysparseadapter, anyslice, anyblock) + mock_call.assert_called_once() + mock_call2.assert_called_once_with(anyslice) + mock_call3.assert_called_once_with(anyblock) + mock_call4.assert_called_once() + mock_call5.assert_called_once() + + +class CustomTableAdapter: + structure_family: Literal[StructureFamily.table] = StructureFamily.table + + def __init__( + self, + partitions: List[Any], + structure: TableStructure, + *, + metadata: Optional[JSON] = None, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + ) -> None: + """ + + Parameters + ---------- + partitions : + structure : + metadata : + specs : + access_policy : + """ + self._metadata = metadata or {} + self._partitions = list(partitions) + self._structure = structure + self._specs = specs or [] + self.access_policy = access_policy + + def structure(self) -> TableStructure: + return self._structure + + def read( + self, fields: Optional[List[str]] = None + ) -> Union[dask.dataframe.DataFrame, pandas.DataFrame]: + return self._partitions + + def read_partition( + self, + partition: int, + fields: Optional[str] = None, + ) -> Union[dask.dataframe.DataFrame, pandas.DataFrame]: + return self._partitions[partition] + + def specs(self) -> List[Spec]: + return self._specs + + def metadata(self) -> JSON: + return self._metadata + + def __getitem__(self, key: str) -> ArrayAdapter: + array = numpy.random.rand(2, 512, 512) + metadata: JSON = {"foo": "bar"} + structure = ArrayStructure( + data_type=BuiltinDtype.from_numpy_dtype(numpy.dtype("int32")), + shape=(2, 512, 512), + chunks=((1, 1), (512,), (512,)), + dims=("time", "x", "y"), # optional + ) + return CustomArrayAdapter(array, structure, metadata=metadata) + + +def tableadapter_protocol_functions( + adapter: TableAdapter, partition: int, fields: List[str] +) -> None: + adapter.structure() + adapter.read(fields) + adapter.read_partition(partition) + adapter.specs() + adapter.metadata() + adapter["abc"] + + +def test_tableadapter_protocol(mocker: MockFixture) -> None: + mock_call = mocker.patch.object(CustomTableAdapter, "structure") + mock_call2 = mocker.patch.object(CustomTableAdapter, "read") + mock_call3 = mocker.patch.object(CustomTableAdapter, "read_partition") + mock_call4 = mocker.patch.object(CustomTableAdapter, "specs") + mock_call5 = mocker.patch.object(CustomTableAdapter, "metadata") + mock_call6 = mocker.patch.object(CustomTableAdapter, "__getitem__") + + structure = TableStructure( + arrow_schema="a", npartitions=1, columns=["A"], resizable=False + ) + + partitions = pandas.DataFrame([1]) + metadata: JSON = {"foo": "bar"} + fields = ["a", "b", "c"] + partition = 1 + + anytableadapter = CustomTableAdapter(partitions, structure, metadata=metadata) + assert anytableadapter.structure_family == StructureFamily.table + + tableadapter_protocol_functions(anytableadapter, partition, fields) + mock_call.assert_called_once() + mock_call2.assert_called_once_with(fields) + mock_call3.assert_called_once_with(partition) + mock_call4.assert_called_once() + mock_call5.assert_called_once() + mock_call6.assert_called_once_with("abc") + + +class CustomAccessPolicy: + ALL = ALL_ACCESS + + def __init__(self, scopes: Optional[Scopes] = None) -> None: + self.scopes = scopes if (scopes is not None) else ALL_SCOPES + + def _get_id(self, principal: Principal) -> None: + return None + + def allowed_scopes(self, node: BaseAdapter, principal: Principal) -> Scopes: + allowed = self.scopes + somemetadata = node.metadata() # noqa: 841 + return allowed + + def filters( + self, node: BaseAdapter, principal: Principal, scopes: Scopes + ) -> Filters: + queries: Filters = [] + somespecs = node.specs() # noqa: 841 + return queries + + +def accesspolicy_protocol_functions( + policy: AccessPolicy, node: BaseAdapter, principal: Principal, scopes: Scopes +) -> None: + policy.allowed_scopes(node, principal) + policy.filters(node, principal, scopes) + + +def test_accesspolicy_protocol(mocker: MockFixture) -> None: + mock_call = mocker.patch.object(CustomAwkwardAdapter, "metadata") + mock_call2 = mocker.patch.object(CustomAwkwardAdapter, "specs") + + anyaccesspolicy = CustomAccessPolicy(scopes={"a12mdjnk4"}) + + structure = AwkwardStructure(length=2, form={"a": "b"}) + + metadata: JSON = {"foo": "bar"} + container = DirectoryContainer(directory=Path("somedirectory"), form={}) + principal = Principal( + uuid="12345678124123412345678123456781", type=PrincipalType.user + ) + scopes = {"abc"} + + anyawkwardadapter = CustomAwkwardAdapter(container, structure, metadata=metadata) + + accesspolicy_protocol_functions( + anyaccesspolicy, anyawkwardadapter, principal, scopes + ) + mock_call.assert_called_once() + mock_call2.assert_called_once() diff --git a/tiled/adapters/array.py b/tiled/adapters/array.py index 3996f63b9..c849bc799 100644 --- a/tiled/adapters/array.py +++ b/tiled/adapters/array.py @@ -1,7 +1,12 @@ +from typing import Any, List, Optional, Tuple + import dask.array +from numpy.typing import NDArray from ..structures.array import ArrayStructure -from ..structures.core import StructureFamily +from ..structures.core import Spec, StructureFamily +from .protocols import AccessPolicy +from .type_alliases import JSON, NDSlice class ArrayAdapter: @@ -23,13 +28,23 @@ class ArrayAdapter: def __init__( self, - array, - structure, + array: NDArray[Any], + structure: ArrayStructure, *, - metadata=None, - specs=None, - access_policy=None, - ): + metadata: Optional[JSON] = None, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + ) -> None: + """ + + Parameters + ---------- + array : + structure : + metadata : + specs : + access_policy : + """ self._array = array self._structure = structure self._metadata = metadata or {} @@ -38,15 +53,31 @@ def __init__( @classmethod def from_array( cls, - array, + array: NDArray[Any], *, - shape=None, - chunks=None, - dims=None, - metadata=None, - specs=None, - access_policy=None, - ): + shape: Optional[Tuple[int, ...]] = None, + chunks: Optional[Tuple[Tuple[int, ...], ...]] = None, + dims: Optional[Tuple[str, ...]] = None, + metadata: Optional[JSON] = None, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + ) -> "ArrayAdapter": + """ + + Parameters + ---------- + array : + shape : + chunks : + dims : + metadata : + specs : + access_policy : + + Returns + ------- + + """ structure = ArrayStructure.from_array( array, shape=shape, chunks=chunks, dims=dims ) @@ -58,28 +89,78 @@ def from_array( access_policy=access_policy, ) - def __repr__(self): + def __repr__(self) -> str: + """ + + Returns + ------- + + """ return f"{type(self).__name__}({self._array!r})" @property - def dims(self): + def dims(self) -> Optional[Tuple[str, ...]]: + """ + + Returns + ------- + + """ return self._structure.dims - def metadata(self): + def metadata(self) -> JSON: + """ + + Returns + ------- + + """ return self._metadata - def structure(self): + def structure(self) -> ArrayStructure: + """ + + Returns + ------- + + """ return self._structure - def read(self, slice=None): - array = self._array - if slice is not None: - array = array[slice] + def read( + self, + slice: NDSlice = ..., + ) -> NDArray[Any]: + """ + + Parameters + ---------- + slice : + + Returns + ------- + + """ + array = self._array[slice] if isinstance(self._array, dask.array.Array): return array.compute() return array - def read_block(self, block, slice=None): + def read_block( + self, + block: Tuple[int, ...], + slice: NDSlice = ..., + ) -> NDArray[Any]: + """ + + Parameters + ---------- + block : + slice : + + Returns + ------- + + """ # Slice the whole array to get this block. slice_, _ = slice_and_shape_from_block_and_chunks(block, self._structure.chunks) array = self._array[slice_] @@ -91,9 +172,19 @@ def read_block(self, block, slice=None): return array -def slice_and_shape_from_block_and_chunks(block, chunks): +def slice_and_shape_from_block_and_chunks( + block: Tuple[int, ...], chunks: Tuple[Tuple[int, ...], ...] +) -> Tuple[NDSlice, Tuple[int, ...]]: """ Given dask-like chunks and block id, return slice and shape of the block. + Parameters + ---------- + block : + chunks : + + Returns + ------- + """ slice_ = [] shape = [] diff --git a/tiled/adapters/awkward.py b/tiled/adapters/awkward.py index bc1d491d4..c5e76d1dc 100644 --- a/tiled/adapters/awkward.py +++ b/tiled/adapters/awkward.py @@ -1,8 +1,14 @@ +from typing import Any, Dict, List, Optional + import awkward import awkward.forms +from numpy.typing import NDArray from ..structures.awkward import AwkwardStructure -from ..structures.core import StructureFamily +from ..structures.core import Spec, StructureFamily +from .awkward_directory_container import DirectoryContainer +from .protocols import AccessPolicy +from .type_alliases import JSON class AwkwardAdapter: @@ -10,12 +16,22 @@ class AwkwardAdapter: def __init__( self, - container, - structure, - metadata=None, - specs=None, - access_policy=None, - ): + container: DirectoryContainer, + structure: AwkwardStructure, + metadata: Optional[JSON] = None, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + ) -> None: + """ + + Parameters + ---------- + container : + structure : + metadata : + specs : + access_policy : + """ self.container = container self._metadata = metadata or {} self._structure = structure @@ -23,7 +39,26 @@ def __init__( self.access_policy = access_policy @classmethod - def from_array(cls, array, metadata=None, specs=None, access_policy=None): + def from_array( + cls, + array: NDArray[Any], + metadata: Optional[JSON] = None, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + ) -> "AwkwardAdapter": + """ + + Parameters + ---------- + array : + metadata : + specs : + access_policy : + + Returns + ------- + + """ form, length, container = awkward.to_buffers(array) structure = AwkwardStructure(length=length, form=form.to_dict()) return cls( @@ -34,10 +69,26 @@ def from_array(cls, array, metadata=None, specs=None, access_policy=None): access_policy=access_policy, ) - def metadata(self): + def metadata(self) -> JSON: + """ + + Returns + ------- + + """ return self._metadata - def read_buffers(self, form_keys=None): + def read_buffers(self, form_keys: Optional[List[str]] = None) -> Dict[str, bytes]: + """ + + Parameters + ---------- + form_keys : + + Returns + ------- + + """ form = awkward.forms.from_dict(self._structure.form) keys = [ key @@ -50,12 +101,34 @@ def read_buffers(self, form_keys=None): buffers[key] = self.container[key] return buffers - def read(self): + def read(self) -> Dict[str, bytes]: + """ + + Returns + ------- + + """ return dict(self.container) - def write(self, container): + def write(self, container: DirectoryContainer) -> None: + """ + + Parameters + ---------- + container : + + Returns + ------- + + """ for form_key, value in container.items(): self.container[form_key] = value - def structure(self): + def structure(self) -> AwkwardStructure: + """ + + Returns + ------- + + """ return self._structure diff --git a/tiled/adapters/awkward_buffers.py b/tiled/adapters/awkward_buffers.py index a12b92234..8a595e6d8 100644 --- a/tiled/adapters/awkward_buffers.py +++ b/tiled/adapters/awkward_buffers.py @@ -1,60 +1,68 @@ """ A directory containing awkward buffers, one file per form key. """ -import collections.abc +from pathlib import Path +from typing import List, Optional import awkward.forms -from ..structures.core import StructureFamily +from ..server.schemas import Asset +from ..structures.awkward import AwkwardStructure +from ..structures.core import Spec, StructureFamily from ..utils import path_from_uri from .awkward import AwkwardAdapter - - -class DirectoryContainer(collections.abc.MutableMapping): - def __init__(self, directory, form): - self.directory = directory - self.form = form - - def __getitem__(self, form_key): - with open(self.directory / form_key, "rb") as file: - return file.read() - - def __setitem__(self, form_key, value): - with open(self.directory / form_key, "wb") as file: - file.write(value) - - def __delitem__(self, form_key): - (self.directory / form_key).unlink(missing_ok=True) - - def __iter__(self): - yield from self.form.expected_from_buffers() - - def __len__(self): - return len(self.form.expected_from_buffers()) +from .awkward_directory_container import DirectoryContainer +from .protocols import AccessPolicy +from .type_alliases import JSON class AwkwardBuffersAdapter(AwkwardAdapter): + """ """ + structure_family = StructureFamily.awkward @classmethod - def init_storage(cls, data_uri, structure): - from ..server.schemas import Asset + def init_storage(cls, data_uri: str, structure: AwkwardStructure) -> List[Asset]: + """ + + Parameters + ---------- + data_uri : + structure : + + Returns + ------- - directory = path_from_uri(data_uri) + """ + directory: Path = path_from_uri(data_uri) directory.mkdir(parents=True, exist_ok=True) return [Asset(data_uri=data_uri, is_directory=True, parameter="data_uri")] @classmethod def from_directory( cls, - data_uri, - structure, - metadata=None, - specs=None, - access_policy=None, - ): + data_uri: str, + structure: AwkwardStructure, + metadata: Optional[JSON] = None, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + ) -> "AwkwardBuffersAdapter": + """ + + Parameters + ---------- + data_uri : + structure : + metadata : + specs : + access_policy : + + Returns + ------- + + """ form = awkward.forms.from_dict(structure.form) - directory = path_from_uri(data_uri) + directory: Path = path_from_uri(data_uri) if not directory.is_dir(): raise ValueError(f"Not a directory: {directory}") container = DirectoryContainer(directory, form) diff --git a/tiled/adapters/awkward_directory_container.py b/tiled/adapters/awkward_directory_container.py new file mode 100644 index 000000000..1c7eab34c --- /dev/null +++ b/tiled/adapters/awkward_directory_container.py @@ -0,0 +1,87 @@ +import sys +from pathlib import Path +from typing import Any, Iterator + +if sys.version_info < (3, 9): + from typing_extensions import MutableMapping + + MappingType = MutableMapping +else: + import collections + + MappingType = collections.abc.MutableMapping + + +class DirectoryContainer(MappingType[str, bytes]): + """ """ + + def __init__(self, directory: Path, form: Any): + """ + + Parameters + ---------- + directory : + form : + """ + self.directory = directory + self.form = form + + def __getitem__(self, form_key: str) -> bytes: + """ + + Parameters + ---------- + form_key : + + Returns + ------- + + """ + with open(self.directory / form_key, "rb") as file: + return file.read() + + def __setitem__(self, form_key: str, value: bytes) -> None: + """ + + Parameters + ---------- + form_key : + value : + + Returns + ------- + + """ + with open(self.directory / form_key, "wb") as file: + file.write(value) + + def __delitem__(self, form_key: str) -> None: + """ + + Parameters + ---------- + form_key : + + Returns + ------- + + """ + (self.directory / form_key).unlink(missing_ok=True) + + def __iter__(self) -> Iterator[str]: + """ + + Returns + ------- + + """ + yield from self.form.expected_from_buffers() + + def __len__(self) -> int: + """ + + Returns + ------- + + """ + return len(self.form.expected_from_buffers()) diff --git a/tiled/adapters/csv.py b/tiled/adapters/csv.py index 03418c1b7..08eed0554 100644 --- a/tiled/adapters/csv.py +++ b/tiled/adapters/csv.py @@ -1,23 +1,28 @@ from pathlib import Path +from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Union import dask.dataframe +import pandas -from ..structures.core import StructureFamily +from ..structures.core import Spec, StructureFamily from ..structures.data_source import Asset, DataSource, Management from ..structures.table import TableStructure from ..utils import ensure_uri, path_from_uri from .array import ArrayAdapter from .dataframe import DataFrameAdapter +from .protocols import AccessPolicy +from .table import TableAdapter +from .type_alliases import JSON def read_csv( - data_uri, - structure=None, - metadata=None, - specs=None, - access_policy=None, - **kwargs, -): + data_uri: str, + structure: Optional[TableStructure] = None, + metadata: Optional[JSON] = None, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + **kwargs: Any, +) -> TableAdapter: """ Read a CSV. @@ -30,6 +35,18 @@ def read_csv( >>> read_csv("myfiles.*.csv") >>> read_csv("s3://bucket/myfiles.*.csv") + + Parameters + ---------- + data_uri : + structure : + metadata : + specs : + access_policy : + kwargs : + + Returns + ------- """ filepath = path_from_uri(data_uri) ddf = dask.dataframe.read_csv(filepath, **kwargs) @@ -49,16 +66,28 @@ def read_csv( class CSVAdapter: + """ """ + structure_family = StructureFamily.table def __init__( self, - data_uris, - structure=None, - metadata=None, - specs=None, - access_policy=None, - ): + data_uris: List[str], + structure: Optional[TableStructure] = None, + metadata: Optional[JSON] = None, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + ) -> None: + """ + + Parameters + ---------- + data_uris : + structure : + metadata : + specs : + access_policy : + """ # TODO Store data_uris instead and generalize to non-file schemes. self._partition_paths = [path_from_uri(uri) for uri in data_uris] self._metadata = metadata or {} @@ -69,11 +98,23 @@ def __init__( self.specs = list(specs or []) self.access_policy = access_policy - def metadata(self): + def metadata(self) -> JSON: + """ + + Returns + ------- + + """ return self._metadata @property - def dataframe_adapter(self): + def dataframe_adapter(self) -> TableAdapter: + """ + + Returns + ------- + + """ partitions = [] for path in self._partition_paths: if not Path(path).exists(): @@ -84,9 +125,18 @@ def dataframe_adapter(self): return DataFrameAdapter(partitions, self._structure) @classmethod - def init_storage(cls, data_uri, structure): - from ..server.schemas import Asset + def init_storage(cls, data_uri: str, structure: TableStructure) -> List[Asset]: + """ + + Parameters + ---------- + data_uri : + structure : + Returns + ------- + + """ directory = path_from_uri(data_uri) directory.mkdir(parents=True, exist_ok=True) assets = [ @@ -100,35 +150,130 @@ def init_storage(cls, data_uri, structure): ] return assets - def append_partition(self, data, partition): + def append_partition( + self, data: Union[dask.dataframe.DataFrame, pandas.DataFrame], partition: int + ) -> None: + """ + + Parameters + ---------- + data : + partition : + + Returns + ------- + + """ uri = self._partition_paths[partition] data.to_csv(uri, index=False, mode="a", header=False) - def write_partition(self, data, partition): + def write_partition( + self, data: Union[dask.dataframe.DataFrame, pandas.DataFrame], partition: int + ) -> None: + """ + + Parameters + ---------- + data : + partition : + + Returns + ------- + + """ uri = self._partition_paths[partition] data.to_csv(uri, index=False) - def write(self, data): + def write(self, data: Union[dask.dataframe.DataFrame, pandas.DataFrame]) -> None: + """ + + Parameters + ---------- + data : + + Returns + ------- + + """ if self.structure().npartitions != 1: raise NotImplementedError uri = self._partition_paths[0] data.to_csv(uri, index=False) - def read(self, *args, **kwargs): + def read( + self, *args: Any, **kwargs: Any + ) -> Union[pandas.DataFrame, dask.dataframe.DataFrame]: + """ + + Parameters + ---------- + args : + kwargs : + + Returns + ------- + + """ return self.dataframe_adapter.read(*args, **kwargs) - def read_partition(self, *args, **kwargs): + def read_partition(self, *args: Any, **kwargs: Any) -> pandas.DataFrame: + """ + + Parameters + ---------- + args : + kwargs : + + Returns + ------- + + """ return self.dataframe_adapter.read_partition(*args, **kwargs) - def structure(self): + def structure(self) -> TableStructure: + """ + + Returns + ------- + + """ return self._structure - def get(self, key): + def get(self, key: str) -> Union[ArrayAdapter, None]: + """ + + Parameters + ---------- + key : + + Returns + ------- + + """ if key not in self.structure().columns: return None return ArrayAdapter.from_array(self.read([key])[key].values) - def generate_data_sources(self, mimetype, dict_or_none, item, is_directory): + def generate_data_sources( + self, + mimetype: str, + dict_or_none: Callable[[TableStructure], Dict[str, str]], + item: Union[str, Path], + is_directory: bool, + ) -> List[DataSource]: + """ + + Parameters + ---------- + mimetype : + dict_or_none : + item : + is_directory : + + Returns + ------- + + """ return [ DataSource( structure_family=self.dataframe_adapter.structure_family, @@ -149,8 +294,27 @@ def generate_data_sources(self, mimetype, dict_or_none, item, is_directory): @classmethod def from_single_file( - cls, data_uri, structure=None, metadata=None, specs=None, access_policy=None - ): + cls, + data_uri: str, + structure: Optional[TableStructure] = None, + metadata: Optional[JSON] = None, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + ) -> "CSVAdapter": + """ + + Parameters + ---------- + data_uri : + structure : + metadata : + specs : + access_policy : + + Returns + ------- + + """ return cls( [data_uri], structure=structure, @@ -159,11 +323,27 @@ def from_single_file( access_policy=access_policy, ) - def __getitem__(self, key): + def __getitem__(self, key: str) -> ArrayAdapter: + """ + + Parameters + ---------- + key : + + Returns + ------- + + """ # Must compute to determine shape. return ArrayAdapter.from_array(self.read([key])[key].values) - def items(self): + def items(self) -> Iterator[Tuple[str, ArrayAdapter]]: + """ + + Returns + ------- + + """ yield from ( (key, ArrayAdapter.from_array(self.read([key])[key].values)) for key in self._structure.columns diff --git a/tiled/adapters/excel.py b/tiled/adapters/excel.py index ed9418f52..384aac86c 100644 --- a/tiled/adapters/excel.py +++ b/tiled/adapters/excel.py @@ -1,3 +1,5 @@ +from typing import Any + import dask.dataframe import pandas @@ -6,8 +8,10 @@ class ExcelAdapter(MapAdapter): + """ """ + @classmethod - def from_file(cls, file, **kwargs): + def from_file(cls, file: Any, **kwargs: Any) -> "ExcelAdapter": """ Read the sheets in an Excel file. @@ -28,6 +32,15 @@ def from_file(cls, file, **kwargs): >>> filepath = "path/to/excel_file.xlsx" >>> ef = pandas.ExcelFile(filepath) >>> ExcelAdapter.from_file(ef) + + Parameters + ---------- + file : + kwargs : + + Returns + ------- + """ if isinstance(file, pandas.ExcelFile): excel_file = file @@ -43,7 +56,7 @@ def from_file(cls, file, **kwargs): return cls(mapping, **kwargs) @classmethod - def from_uri(cls, data_uri, **kwargs): + def from_uri(cls, data_uri: str, **kwargs: Any) -> "ExcelAdapter": """ Read the sheets in an Excel file. @@ -56,6 +69,15 @@ def from_uri(cls, data_uri, **kwargs): Given a file path >>> ExcelAdapter.from_file("path/to/excel_file.xlsx") + + Parameters + ---------- + data_uri : + kwargs : + + Returns + ------- + """ file = pandas.ExcelFile(data_uri) return cls.from_file(file) diff --git a/tiled/adapters/hdf5.py b/tiled/adapters/hdf5.py index cd5489594..3483cec2a 100644 --- a/tiled/adapters/hdf5.py +++ b/tiled/adapters/hdf5.py @@ -1,55 +1,54 @@ import collections.abc import os +import sys import warnings +from pathlib import Path +from typing import Any, Iterator, List, Optional, Tuple, Union import h5py import numpy +from numpy._typing import NDArray from ..adapters.utils import IndexersMixin from ..iterviews import ItemsView, KeysView, ValuesView -from ..structures.core import StructureFamily +from ..structures.array import ArrayStructure +from ..structures.core import Spec, StructureFamily +from ..structures.table import TableStructure from ..utils import node_repr, path_from_uri from .array import ArrayAdapter +from .protocols import AccessPolicy from .resource_cache import with_resource_cache +from .type_alliases import JSON SWMR_DEFAULT = bool(int(os.getenv("TILED_HDF5_SWMR_DEFAULT", "0"))) INLINED_DEPTH = int(os.getenv("TILED_HDF5_INLINED_CONTENTS_MAX_DEPTH", "7")) -def hdf5_lookup( - data_uri, - *, - structure=None, - metadata=None, - swmr=SWMR_DEFAULT, - libver="latest", - specs=None, - access_policy=None, - path=None, -): - path = path or [] - adapter = HDF5Adapter.from_uri( - data_uri, - structure=structure, - metadata=metadata, - swmr=swmr, - libver=libver, - specs=specs, - access_policy=access_policy, - ) - for segment in path: - adapter = adapter.get(segment) - if adapter is None: - raise KeyError(segment) - # TODO What to do with metadata, specs? - return adapter +def from_dataset(dataset: NDArray[Any]) -> ArrayAdapter: + """ + Parameters + ---------- + dataset : -def from_dataset(dataset): + Returns + ------- + + """ return ArrayAdapter.from_array(dataset, metadata=getattr(dataset, "attrs", {})) -class HDF5Adapter(collections.abc.Mapping, IndexersMixin): +if sys.version_info < (3, 9): + from typing_extensions import Mapping + + MappingType = Mapping +else: + import collections + + MappingType = collections.abc.Mapping + + +class HDF5Adapter(MappingType[str, Union["HDF5Adapter", ArrayAdapter]], IndexersMixin): """ Read an HDF5 file or a group within one. @@ -80,8 +79,24 @@ class HDF5Adapter(collections.abc.Mapping, IndexersMixin): structure_family = StructureFamily.container def __init__( - self, node, *, structure=None, metadata=None, specs=None, access_policy=None - ): + self, + node: Any, + *, + structure: Optional[ArrayStructure] = None, + metadata: Optional[JSON] = None, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + ) -> None: + """ + + Parameters + ---------- + node : + structure : + metadata : + specs : + access_policy : + """ self._node = node self._access_policy = access_policy self.specs = specs or [] @@ -91,29 +106,61 @@ def __init__( @classmethod def from_file( cls, - file, + file: Any, *, - structure=None, - metadata=None, - swmr=SWMR_DEFAULT, - libver="latest", - specs=None, - access_policy=None, - ): + structure: Optional[TableStructure] = None, + metadata: Optional[JSON] = None, + swmr: bool = SWMR_DEFAULT, + libver: str = "latest", + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + ) -> "HDF5Adapter": + """ + + Parameters + ---------- + file : + structure : + metadata : + swmr : + libver : + specs : + access_policy : + + Returns + ------- + + """ return cls(file, metadata=metadata, specs=specs, access_policy=access_policy) @classmethod def from_uri( cls, - data_uri, + data_uri: str, *, - structure=None, - metadata=None, - swmr=SWMR_DEFAULT, - libver="latest", - specs=None, - access_policy=None, - ): + structure: Optional[ArrayStructure] = None, + metadata: Optional[JSON] = None, + swmr: bool = SWMR_DEFAULT, + libver: str = "latest", + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + ) -> "HDF5Adapter": + """ + + Parameters + ---------- + data_uri : + structure : + metadata : + swmr : + libver : + specs : + access_policy : + + Returns + ------- + + """ filepath = path_from_uri(data_uri) cache_key = (h5py.File, filepath, "r", swmr, libver) file = with_resource_cache( @@ -121,17 +168,41 @@ def from_uri( ) return cls.from_file(file) - def __repr__(self): + def __repr__(self) -> str: + """ + + Returns + ------- + + """ return node_repr(self, list(self)) @property - def access_policy(self): + def access_policy(self) -> Optional[AccessPolicy]: + """ + + Returns + ------- + + """ return self._access_policy - def structure(self): + def structure(self) -> None: + """ + + Returns + ------- + + """ return None - def metadata(self): + def metadata(self) -> JSON: + """ + + Returns + ------- + + """ d = dict(self._node.attrs) for k, v in list(d.items()): # Convert any bytes to str. @@ -140,10 +211,26 @@ def metadata(self): d.update(self._provided_metadata) return d - def __iter__(self): + def __iter__(self) -> Iterator[Any]: + """ + + Returns + ------- + + """ yield from self._node - def __getitem__(self, key): + def __getitem__(self, key: str) -> Union["HDF5Adapter", ArrayAdapter]: + """ + + Parameters + ---------- + key : + + Returns + ------- + + """ value = self._node[key] if isinstance(value, h5py.Group): return HDF5Adapter(value) @@ -169,42 +256,166 @@ def __getitem__(self, key): return from_dataset(numpy.array([])) return from_dataset(value) - def __len__(self): + def __len__(self) -> int: + """ + + Returns + ------- + + """ return len(self._node) - def keys(self): + def keys(self) -> KeysView: # type: ignore + """ + + Returns + ------- + + """ return KeysView(lambda: len(self), self._keys_slice) - def values(self): + def values(self) -> ValuesView: # type: ignore + """ + + Returns + ------- + + """ return ValuesView(lambda: len(self), self._items_slice) - def items(self): + def items(self) -> ItemsView: # type: ignore + """ + + Returns + ------- + + """ return ItemsView(lambda: len(self), self._items_slice) - def search(self, query): + def search(self, query: Any) -> None: """ - Return a Tree with a subset of the mapping. + + Parameters + ---------- + query : + + Returns + ------- + Return a Tree with a subset of the mapping. + """ raise NotImplementedError - def read(self, fields=None): + def read(self, fields: Optional[str] = None) -> "HDF5Adapter": + """ + + Parameters + ---------- + fields : + + Returns + ------- + + """ if fields is not None: raise NotImplementedError return self # The following two methods are used by keys(), values(), items(). - def _keys_slice(self, start, stop, direction): + def _keys_slice(self, start: int, stop: int, direction: int) -> List[Any]: + """ + + Parameters + ---------- + start : + stop : + direction : + + Returns + ------- + + """ keys = list(self._node) if direction < 0: - keys = reversed(keys) + keys = list(reversed(keys)) return keys[start:stop] - def _items_slice(self, start, stop, direction): + def _items_slice( + self, start: int, stop: int, direction: int + ) -> List[Tuple[Any, Any]]: + """ + + Parameters + ---------- + start : + stop : + direction : + + Returns + ------- + + """ items = [(key, self[key]) for key in list(self)] if direction < 0: - items = reversed(items) + items = list(reversed(items)) return items[start:stop] - def inlined_contents_enabled(self, depth): + def inlined_contents_enabled(self, depth: int) -> bool: + """ + + Parameters + ---------- + depth : + + Returns + ------- + + """ return depth <= INLINED_DEPTH + + +def hdf5_lookup( + data_uri: str, + *, + structure: Optional[ArrayStructure] = None, + metadata: Optional[JSON] = None, + swmr: bool = SWMR_DEFAULT, + libver: str = "latest", + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + path: Optional[Union[List[Path], List[str]]] = None, +) -> Union[HDF5Adapter, ArrayAdapter]: + """ + + Parameters + ---------- + data_uri : + structure : + metadata : + swmr : + libver : + specs : + access_policy : + path : + + Returns + ------- + + """ + path = path or [] + adapter = HDF5Adapter.from_uri( + data_uri, + structure=structure, + metadata=metadata, + swmr=swmr, + libver=libver, + specs=specs, + access_policy=access_policy, + ) + for segment in path: + adapter = adapter.get(segment) # type: ignore + if adapter is None: + raise KeyError(segment) + # TODO What to do with metadata, specs? + return adapter diff --git a/tiled/adapters/mapping.py b/tiled/adapters/mapping.py index 6fe2ab7fa..367d82c72 100644 --- a/tiled/adapters/mapping.py +++ b/tiled/adapters/mapping.py @@ -2,8 +2,12 @@ import copy import itertools import operator +import sys from collections import Counter -from datetime import datetime +from datetime import datetime, timedelta +from typing import Any, Dict, Iterator, List, Optional, Tuple, Union, cast + +from fastapi import APIRouter from ..iterviews import ItemsView, KeysView, ValuesView from ..queries import ( @@ -20,12 +24,25 @@ StructureFamilyQuery, ) from ..query_registration import QueryTranslationRegistry -from ..structures.core import StructureFamily -from ..utils import UNCHANGED +from ..server.schemas import SortingItem +from ..structures.core import Spec, StructureFamily +from ..structures.table import TableStructure +from ..utils import UNCHANGED, Sentinel +from .protocols import AccessPolicy, AnyAdapter +from .type_alliases import JSON from .utils import IndexersMixin +if sys.version_info < (3, 9): + from typing_extensions import Mapping + + MappingType = Mapping +else: + import collections + + MappingType = collections.abc.Mapping + -class MapAdapter(collections.abc.Mapping, IndexersMixin): +class MapAdapter(MappingType[str, AnyAdapter], IndexersMixin): """ Adapt any mapping (dictionary-like object) to Tiled. """ @@ -52,17 +69,17 @@ class MapAdapter(collections.abc.Mapping, IndexersMixin): def __init__( self, - mapping, + mapping: Dict[str, Any], *, - structure=None, - metadata=None, - sorting=None, - specs=None, - access_policy=None, - entries_stale_after=None, - metadata_stale_after=None, - must_revalidate=True, - ): + structure: Optional[TableStructure] = None, + metadata: Optional[JSON] = None, + sorting: Optional[List[SortingItem]] = None, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + entries_stale_after: Optional[timedelta] = None, + metadata_stale_after: Optional[timedelta] = None, + must_revalidate: bool = True, + ) -> None: """ Create a simple Adapter from any mapping (e.g. dict, OneShotCachedMap). @@ -92,36 +109,68 @@ def __init__( # This is a special case that means, "the given ordering". # By giving that a name ("_") we enable requests to asking for the # last N by requesting the sorting ("_", -1). - sorting = [("_", 1)] + sorting = [SortingItem(key="_", direction=1)] self._sorting = sorting self._metadata = metadata or {} self.specs = specs or [] self._access_policy = access_policy self._must_revalidate = must_revalidate - self.include_routers = [] - self.background_tasks = [] + self.include_routers: List[APIRouter] = [] + self.background_tasks: List[Any] = [] self.entries_stale_after = entries_stale_after self.metadata_stale_after = metadata_stale_after self.specs = specs or [] super().__init__() @property - def must_revalidate(self): + def must_revalidate(self) -> bool: + """ + + Returns + ------- + + """ return self._must_revalidate @must_revalidate.setter - def must_revalidate(self, value): + def must_revalidate(self, value: bool) -> None: + """ + + Parameters + ---------- + value : + + Returns + ------- + + """ self._must_revalidate = value @property - def access_policy(self): + def access_policy(self) -> Optional[AccessPolicy]: + """ + + Returns + ------- + + """ return self._access_policy @access_policy.setter - def access_policy(self, value): + def access_policy(self, value: AccessPolicy) -> None: + """ + + Parameters + ---------- + value : + + Returns + ------- + + """ self._access_policy = value - def metadata(self): + def metadata(self) -> JSON: "Metadata about this Adapter." # Ensure this is immutable (at the top level) to help the user avoid # getting the wrong impression that editing this would update anything @@ -129,56 +178,141 @@ def metadata(self): return self._metadata @property - def sorting(self): + def sorting(self) -> List[SortingItem]: + """ + + Returns + ------- + + """ return list(self._sorting) - def __repr__(self): + def __repr__(self) -> str: + """ + + Returns + ------- + + """ return ( f"<{type(self).__name__}({{{', '.join(repr(k) for k in self._mapping)}}})>" ) - def __getitem__(self, key): + def __getitem__(self, key: str) -> Any: + """ + + Parameters + ---------- + key : + + Returns + ------- + + """ return self._mapping[key] - def __iter__(self): + def __iter__(self) -> Iterator[str]: + """ + + Returns + ------- + + """ yield from self._mapping - def __len__(self): + def __len__(self) -> int: + """ + + Returns + ------- + + """ return len(self._mapping) - def keys(self): + def keys(self) -> KeysView: # type: ignore + """ + + Returns + ------- + + """ return KeysView(lambda: len(self), self._keys_slice) - def values(self): + def values(self) -> ValuesView: # type: ignore + """ + + Returns + ------- + + """ return ValuesView(lambda: len(self), self._items_slice) - def items(self): + def items(self) -> ItemsView: # type: ignore + """ + + Returns + ------- + + """ return ItemsView(lambda: len(self), self._items_slice) - def structure(self): + def structure(self) -> None: + """ + + Returns + ------- + + """ return None @property - def metadata_stale_at(self): + def metadata_stale_at(self) -> Optional[datetime]: + """ + + Returns + ------- + + """ if self.metadata_stale_after is None: - return + return None return self.metadata_stale_after + datetime.utcnow() @property - def entries_stale_at(self): + def entries_stale_at(self) -> Optional[datetime]: + """ + + Returns + ------- + + """ if self.entries_stale_after is None: - return + return None return self.entries_stale_after + datetime.utcnow() def new_variation( self, - *args, - mapping=UNCHANGED, - metadata=UNCHANGED, - sorting=UNCHANGED, - must_revalidate=UNCHANGED, - **kwargs, - ): + *args: Any, + mapping: Union[Sentinel, Dict[str, Any]] = UNCHANGED, + metadata: Union[Sentinel, JSON] = UNCHANGED, + sorting: Union[Sentinel, List[SortingItem]] = UNCHANGED, + must_revalidate: Union[Sentinel, bool] = UNCHANGED, + **kwargs: Any, + ) -> "MapAdapter": + """ + + Parameters + ---------- + args : + mapping : + metadata : + sorting : + must_revalidate : + kwargs : + + Returns + ------- + + """ if mapping is UNCHANGED: mapping = self._mapping if metadata is UNCHANGED: @@ -188,19 +322,29 @@ def new_variation( if must_revalidate is UNCHANGED: must_revalidate = self.must_revalidate return type(self)( - *args, - mapping=mapping, - sorting=sorting, - metadata=self._metadata, + # *args, + mapping=cast(Dict[str, Any], mapping), + sorting=cast(List[SortingItem], sorting), + metadata=cast(JSON, self._metadata), specs=self.specs, access_policy=self.access_policy, entries_stale_after=self.entries_stale_after, metadata_stale_after=self.entries_stale_after, - must_revalidate=must_revalidate, + must_revalidate=cast(bool, must_revalidate), **kwargs, ) - def read(self, fields=None): + def read(self, fields: Optional[str] = None) -> "MapAdapter": + """ + + Parameters + ---------- + fields : + + Returns + ------- + + """ if fields is not None: new_mapping = {} for field in fields: @@ -208,14 +352,41 @@ def read(self, fields=None): return self.new_variation(mapping=new_mapping) return self - def search(self, query): + def search(self, query: Any) -> Any: """ - Return a Adapter with a subset of the mapping. + + Parameters + ---------- + query : + + Returns + ------- + Return a Adapter with a subset of the mapping. """ return self.query_registry(query, self) - def get_distinct(self, metadata, structure_families, specs, counts): - data = {} + def get_distinct( + self, + metadata: JSON, + structure_families: StructureFamily, + specs: List[Spec], + counts: int, + ) -> Dict[str, Any]: + """ + + Parameters + ---------- + metadata : + structure_families : + specs : + counts : + + Returns + ------- + + """ + data: Dict[str, Any] = {} + # data: dict[str, list[dict[str, Any]]] = {} if metadata: data["metadata"] = {} @@ -236,7 +407,17 @@ def get_distinct(self, metadata, structure_families, specs, counts): return data - def sort(self, sorting): + def sort(self, sorting: SortingItem) -> "MapAdapter": + """ + + Parameters + ---------- + sorting : + + Returns + ------- + + """ mapping = copy.copy(self._mapping) for key, direction in reversed(sorting): if key == "_": @@ -247,9 +428,10 @@ def sort(self, sorting): mapping = dict( sorted( mapping.items(), - key=lambda item: item[1].metadata().get(key, _HIGH_SORTER), + key=lambda item: item[1].metadata().get(key, _HIGH_SORTER), # type: ignore ) ) + if direction < 0: # TODO In Python 3.8 dict items should be reservible # but I have seen errors in the wild that I could not @@ -261,21 +443,51 @@ def sort(self, sorting): # The following two methods are used by keys(), values(), items(). - def _keys_slice(self, start, stop, direction): + def _keys_slice( + self, start: int, stop: int, direction: int + ) -> Union[Iterator[str], List[str]]: + """ + + Parameters + ---------- + start : + stop : + direction : + + Returns + ------- + + """ if direction > 0: yield from itertools.islice(self._mapping.keys(), start, stop) else: - keys_to_slice = reversed( - list( - itertools.islice( - self._mapping.keys(), 0, len(self._mapping) - start + keys_to_slice = list( + reversed( + list( + itertools.islice( + self._mapping.keys(), 0, len(self._mapping) - start + ) ) ) ) keys = keys_to_slice[start:stop] return keys - def _items_slice(self, start, stop, direction): + def _items_slice( + self, start: int, stop: int, direction: int + ) -> Iterator[Tuple[str, Any]]: + """ + + Parameters + ---------- + start : + stop : + direction : + + Returns + ------- + + """ # A goal of this implementation is to avoid iterating over # self._mapping.values() because self._mapping may be a OneShotCachedMap which # only constructs its values at access time. With this in mind, we @@ -286,7 +498,7 @@ def _items_slice(self, start, stop, direction): ) -def walk_string_values(tree, node=None): +def walk_string_values(tree: MapAdapter, node: Optional[Any] = None) -> Iterator[str]: """ >>> list( ... walk_string_values( @@ -295,6 +507,14 @@ def walk_string_values(tree, node=None): ... ) ... ) ['apple', 'banana', 'cat', 'dog', 'elephant'] + + Parameters + ---------- + tree : + node : + + Returns + ------- """ if node is None: for node in tree: @@ -312,7 +532,18 @@ def walk_string_values(tree, node=None): yield item -def counter_to_dict(counter, counts): +def counter_to_dict(counter: Dict[str, Any], counts: Any) -> List[Dict[str, Any]]: + """ + + Parameters + ---------- + counter : + counts : + + Returns + ------- + + """ if counts: data = [{"value": k, "count": v} for k, v in counter.items() if k is not None] else: @@ -321,7 +552,20 @@ def counter_to_dict(counter, counts): return data -def iter_child_metadata(query_key, tree): +def iter_child_metadata( + query_key: Any, tree: MapAdapter +) -> Iterator[Tuple[str, Any, Any]]: + """ + + Parameters + ---------- + query_key : + tree : + + Returns + ------- + + """ for key, value in tree.items(): term = value.metadata() for subkey in query_key.split("."): @@ -333,7 +577,18 @@ def iter_child_metadata(query_key, tree): yield key, value, term -def full_text_search(query, tree): +def full_text_search(query: Any, tree: MapAdapter) -> MapAdapter: + """ + + Parameters + ---------- + query : + tree : + + Returns + ------- + + """ matches = {} text = query.text query_words = set(text.split()) @@ -354,7 +609,18 @@ def full_text_search(query, tree): MapAdapter.register_query(FullText, full_text_search) -def regex(query, tree): +def regex(query: Any, tree: MapAdapter) -> MapAdapter: + """ + + Parameters + ---------- + query : + tree : + + Returns + ------- + + """ import re matches = {} @@ -370,7 +636,18 @@ def regex(query, tree): MapAdapter.register_query(Regex, regex) -def eq(query, tree): +def eq(query: Any, tree: MapAdapter) -> MapAdapter: + """ + + Parameters + ---------- + query : + tree : + + Returns + ------- + + """ matches = {} for key, value, term in iter_child_metadata(query.key, tree): if term == query.value: @@ -381,7 +658,18 @@ def eq(query, tree): MapAdapter.register_query(Eq, eq) -def noteq(query, tree): +def noteq(query: Any, tree: MapAdapter) -> MapAdapter: + """ + + Parameters + ---------- + query : + tree : + + Returns + ------- + + """ matches = {} for key, value, term in iter_child_metadata(query.key, tree): if term != query.value: @@ -392,7 +680,18 @@ def noteq(query, tree): MapAdapter.register_query(NotEq, noteq) -def contains(query, tree): +def contains(query: Any, tree: MapAdapter) -> MapAdapter: + """ + + Parameters + ---------- + query : + tree : + + Returns + ------- + + """ matches = {} for key, value, term in iter_child_metadata(query.key, tree): if ( @@ -407,7 +706,18 @@ def contains(query, tree): MapAdapter.register_query(Contains, contains) -def comparison(query, tree): +def comparison(query: Any, tree: MapAdapter) -> MapAdapter: + """ + + Parameters + ---------- + query : + tree : + + Returns + ------- + + """ matches = {} for key, value, term in iter_child_metadata(query.key, tree): if query.operator not in {"le", "lt", "ge", "gt"}: @@ -421,7 +731,18 @@ def comparison(query, tree): MapAdapter.register_query(Comparison, comparison) -def _in(query, tree): +def _in(query: Any, tree: MapAdapter) -> MapAdapter: + """ + + Parameters + ---------- + query : + tree : + + Returns + ------- + + """ matches = {} for key, value, term in iter_child_metadata(query.key, tree): if term in query.value: @@ -432,7 +753,18 @@ def _in(query, tree): MapAdapter.register_query(In, _in) -def notin(query, tree): +def notin(query: Any, tree: MapAdapter) -> MapAdapter: + """ + + Parameters + ---------- + query : + tree : + + Returns + ------- + + """ matches = {} for key, value, term in iter_child_metadata(query.key, tree): if term not in query.value: @@ -443,7 +775,18 @@ def notin(query, tree): MapAdapter.register_query(NotIn, notin) -def specs(query, tree): +def specs(query: Any, tree: MapAdapter) -> MapAdapter: + """ + + Parameters + ---------- + query : + tree : + + Returns + ------- + + """ matches = {} include = set(query.include) exclude = set(query.exclude) @@ -459,7 +802,18 @@ def specs(query, tree): MapAdapter.register_query(SpecsQuery, specs) -def structure_family(query, tree): +def structure_family(query: Any, tree: MapAdapter) -> MapAdapter: + """ + + Parameters + ---------- + query : + tree : + + Returns + ------- + + """ matches = {} for key, value in tree.items(): if value.structure_family == query.value: @@ -471,7 +825,18 @@ def structure_family(query, tree): MapAdapter.register_query(StructureFamilyQuery, structure_family) -def keys_filter(query, tree): +def keys_filter(query: Any, tree: MapAdapter) -> MapAdapter: + """ + + Parameters + ---------- + query : + tree : + + Returns + ------- + + """ matches = {} for key, value in tree.items(): if key in query.keys: @@ -487,10 +852,30 @@ class _HIGH_SORTER_CLASS: Enables sort to work when metadata is sparse """ - def __lt__(self, other): + def __lt__(self, other: "_HIGH_SORTER_CLASS") -> bool: + """ + + Parameters + ---------- + other : + + Returns + ------- + + """ return False - def __gt__(self, other): + def __gt__(self, other: "_HIGH_SORTER_CLASS") -> bool: + """ + + Parameters + ---------- + other : + + Returns + ------- + + """ return True diff --git a/tiled/adapters/netcdf.py b/tiled/adapters/netcdf.py index 60f7f4d29..3b9d588fb 100644 --- a/tiled/adapters/netcdf.py +++ b/tiled/adapters/netcdf.py @@ -1,8 +1,21 @@ +from pathlib import Path +from typing import List, Union + import xarray from .xarray import DatasetAdapter -def read_netcdf(filepath): +def read_netcdf(filepath: Union[str, List[str], Path]) -> DatasetAdapter: + """ + + Parameters + ---------- + filepath : + + Returns + ------- + + """ ds = xarray.open_dataset(filepath, decode_times=False) return DatasetAdapter.from_dataset(ds) diff --git a/tiled/adapters/parquet.py b/tiled/adapters/parquet.py index 9c6903bff..cc7138c00 100644 --- a/tiled/adapters/parquet.py +++ b/tiled/adapters/parquet.py @@ -1,23 +1,41 @@ from pathlib import Path +from typing import Any, List, Optional, Union import dask.dataframe +import pandas -from ..structures.core import StructureFamily +from ..server.schemas import Asset +from ..structures.core import Spec, StructureFamily +from ..structures.table import TableStructure from ..utils import path_from_uri from .dataframe import DataFrameAdapter +from .protocols import AccessPolicy +from .type_alliases import JSON class ParquetDatasetAdapter: + """ """ + structure_family = StructureFamily.table def __init__( self, - data_uris, - structure, - metadata=None, - specs=None, - access_policy=None, - ): + data_uris: List[str], + structure: TableStructure, + metadata: Optional[JSON] = None, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + ) -> None: + """ + + Parameters + ---------- + data_uris : + structure : + metadata : + specs : + access_policy : + """ # TODO Store data_uris instead and generalize to non-file schemes. self._partition_paths = [path_from_uri(uri) for uri in data_uris] self._metadata = metadata or {} @@ -25,11 +43,23 @@ def __init__( self.specs = list(specs or []) self.access_policy = access_policy - def metadata(self): + def metadata(self) -> JSON: + """ + + Returns + ------- + + """ return self._metadata @property - def dataframe_adapter(self): + def dataframe_adapter(self) -> DataFrameAdapter: + """ + + Returns + ------- + + """ partitions = [] for path in self._partition_paths: if not Path(path).exists(): @@ -40,9 +70,18 @@ def dataframe_adapter(self): return DataFrameAdapter(partitions, self._structure) @classmethod - def init_storage(cls, data_uri, structure): - from ..server.schemas import Asset + def init_storage(cls, data_uri: str, structure: TableStructure) -> List[Asset]: + """ + Parameters + ---------- + data_uri : + structure : + + Returns + ------- + + """ directory = path_from_uri(data_uri) directory.mkdir(parents=True, exist_ok=True) assets = [ @@ -56,21 +95,72 @@ def init_storage(cls, data_uri, structure): ] return assets - def write_partition(self, data, partition): + def write_partition( + self, data: Union[dask.dataframe.DataFrame, pandas.DataFrame], partition: int + ) -> None: + """ + + Parameters + ---------- + data : + partition : + + Returns + ------- + + """ uri = self._partition_paths[partition] data.to_parquet(uri) - def write(self, data): + def write(self, data: Union[dask.dataframe.DataFrame, pandas.DataFrame]) -> None: + """ + + Parameters + ---------- + data : + + Returns + ------- + + """ if self.structure().npartitions != 1: raise NotImplementedError uri = self._partition_paths[0] data.to_parquet(uri) - def read(self, *args, **kwargs): + def read(self, *args: Any, **kwargs: Any) -> pandas.DataFrame: + """ + + Parameters + ---------- + args : + kwargs : + + Returns + ------- + + """ return self.dataframe_adapter.read(*args, **kwargs) - def read_partition(self, *args, **kwargs): + def read_partition(self, *args: Any, **kwargs: Any) -> pandas.DataFrame: + """ + + Parameters + ---------- + args : + kwargs : + + Returns + ------- + + """ return self.dataframe_adapter.read_partition(*args, **kwargs) - def structure(self): + def structure(self) -> TableStructure: + """ + + Returns + ------- + + """ return self._structure diff --git a/tiled/adapters/protocols.py b/tiled/adapters/protocols.py new file mode 100644 index 000000000..01d5cdacc --- /dev/null +++ b/tiled/adapters/protocols.py @@ -0,0 +1,144 @@ +import collections.abc +import sys +from abc import abstractmethod +from typing import Any, Dict, List, Literal, Optional, Protocol, Tuple, Union + +import dask.dataframe +import pandas +import sparse +from numpy.typing import NDArray + +from ..server.schemas import Principal +from ..structures.array import ArrayStructure +from ..structures.awkward import AwkwardStructure +from ..structures.core import Spec, StructureFamily +from ..structures.sparse import SparseStructure +from ..structures.table import TableStructure +from .awkward_directory_container import DirectoryContainer +from .type_alliases import JSON, Filters, NDSlice, Scopes + + +class BaseAdapter(Protocol): + @abstractmethod + def metadata(self) -> JSON: + pass + + @abstractmethod + def specs(self) -> List[Spec]: + pass + + +if sys.version_info < (3, 9): + from typing_extensions import Mapping + + MappingType = Mapping +else: + import collections + + MappingType = collections.abc.Mapping + + +class ContainerAdapter(MappingType[str, "AnyAdapter"], BaseAdapter): + structure_family: Literal[StructureFamily.container] + + @abstractmethod + def structure(self) -> None: + pass + + +class ArrayAdapter(BaseAdapter, Protocol): + structure_family: Literal[StructureFamily.array] + + @abstractmethod + def structure(self) -> ArrayStructure: + pass + + @abstractmethod + def read(self, slice: NDSlice) -> NDArray[Any]: + pass + + # TODO Fix slice + @abstractmethod + def read_block(self, block: Tuple[int, ...]) -> NDArray[Any]: + pass + + +class AwkwardAdapter(BaseAdapter, Protocol): + structure_family: Literal[StructureFamily.awkward] + + @abstractmethod + def structure(self) -> AwkwardStructure: + pass + + @abstractmethod + def read(self) -> NDArray[Any]: # Are Slice and Array defined by numpy somewhere? + pass + + @abstractmethod + def read_buffers(self, form_keys: Optional[List[str]] = None) -> Dict[str, Any]: + pass + + @abstractmethod + def write(self, container: DirectoryContainer) -> None: + pass + + +class SparseAdapter(BaseAdapter, Protocol): + structure_family: Literal[StructureFamily.sparse] = StructureFamily.sparse + + @abstractmethod + def structure(self) -> SparseStructure: + pass + + # TODO Fix slice (just like array) + def read( + self, slice: NDSlice + ) -> sparse.COO: # Are Slice and Array defined by numpy somewhere? + pass + + # TODO Fix slice (just like array) + def read_block(self, block: Tuple[int, ...]) -> sparse.COO: + pass + + +class TableAdapter(BaseAdapter, Protocol): + structure_family: Literal[StructureFamily.table] = StructureFamily.table + + @abstractmethod + def structure(self) -> TableStructure: + pass + + @abstractmethod + def read( + self, fields: List[str] + ) -> Union[dask.dataframe.DataFrame, pandas.DataFrame]: + pass + + @abstractmethod + def read_partition( + self, + partition: int, + fields: Optional[str] = None, + ) -> Union[dask.dataframe.DataFrame, pandas.DataFrame]: + pass + + @abstractmethod + def __getitem__(self, key: str) -> ArrayAdapter: + pass + + +AnyAdapter = Union[ + ArrayAdapter, AwkwardAdapter, ContainerAdapter, SparseAdapter, TableAdapter +] + + +class AccessPolicy(Protocol): + @abstractmethod + def allowed_scopes(self, node: BaseAdapter, principal: Principal) -> Scopes: + pass + + @abstractmethod + def filters( + self, node: BaseAdapter, principal: Principal, scopes: Scopes + ) -> Filters: + pass diff --git a/tiled/adapters/resource_cache.py b/tiled/adapters/resource_cache.py index 918ba9ede..2b1ca7e06 100644 --- a/tiled/adapters/resource_cache.py +++ b/tiled/adapters/resource_cache.py @@ -1,7 +1,7 @@ import os from typing import Any, Callable, Optional -import cachetools +import cachetools # type: ignore # When items are evicted from the cache, a hard reference is dropped, freeing # the resource to be closed by the garbage collector if there are no other @@ -23,35 +23,66 @@ def get_resource_cache() -> cachetools.Cache: def set_resource_cache(cache: cachetools.Cache) -> None: - "Set the resource cache, a process-global Cache." + """ + Set the resource cache, a process-global Cache. + Parameters + ---------- + cache : + + Returns + ------- + + """ global _cache _cache = cache -def default_ttu(_key: str, value: Any, now: float): +def default_ttu(_key: str, value: Any, now: float) -> float: """ Retain cached items for at most `DEFAULT_TIME_TO_USE_SECONDS` seconds (60s, by default). + + Parameters + ---------- + _key : + value : + now : + + Returns + ------- + """ return DEFAULT_TIME_TO_USE_SECONDS + now -def default_resource_cache(): +def default_resource_cache() -> cachetools.TLRUCache: "Create a new instance of the default resource cache." return cachetools.TLRUCache(DEFAULT_MAX_SIZE, default_ttu) def with_resource_cache( - cache_key: str, - factory: Callable, - *args, + cache_key: Any, + factory: Callable[..., Any], + *args: Any, _resource_cache: Optional[cachetools.Cache] = None, - **kwargs, -): + **kwargs: Any, +) -> Any: """ Use value from cache or, if not present, call `factory(*args, **kwargs)` and cache result. This uses a globally configured resource cache by default. For testing and debugging, a cache may be passed to the parameter _resource_cache. + + Parameters + ---------- + cache_key : + factory : + args : + _resource_cache : + kwargs : + + Returns + ------- + """ if _resource_cache is None: cache = get_resource_cache() diff --git a/tiled/adapters/sparse.py b/tiled/adapters/sparse.py index 7e3c5afcf..60b122d5c 100644 --- a/tiled/adapters/sparse.py +++ b/tiled/adapters/sparse.py @@ -1,9 +1,16 @@ +from typing import Any, Dict, List, Optional, Tuple, Union + +import dask import numpy +import pandas import sparse +from numpy._typing import NDArray -from ..structures.core import StructureFamily +from ..structures.core import Spec, StructureFamily from ..structures.sparse import COOStructure from .array import slice_and_shape_from_block_and_chunks +from .protocols import AccessPolicy +from .type_alliases import JSON, NDSlice class COOAdapter: @@ -13,16 +20,30 @@ class COOAdapter: @classmethod def from_arrays( cls, - coords, - data, - shape, - dims=None, - metadata=None, - specs=None, - access_policy=None, - ): + coords: NDArray[Any], + data: Union[dask.dataframe.DataFrame, pandas.DataFrame], + shape: Tuple[int, ...], + dims: Optional[Tuple[str, ...]] = None, + metadata: Optional[JSON] = None, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + ) -> "COOAdapter": """ Simplest constructor. Single chunk from coords, data arrays. + + Parameters + ---------- + coords : + data : + shape : + dims : + metadata : + specs : + access_policy : + + Returns + ------- + """ structure = COOStructure( dims=dims, @@ -39,8 +60,29 @@ def from_arrays( ) @classmethod - def from_coo(cls, coo, *, dims=None, metadata=None, specs=None, access_policy=None): - "Construct from sparse.COO object." + def from_coo( + cls, + coo: sparse.COO, + *, + dims: Optional[Tuple[str, ...]] = None, + metadata: Optional[JSON] = None, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + ) -> "COOAdapter": + """ + Construct from sparse.COO object. + Parameters + ---------- + coo : + dims : + metadata : + specs : + access_policy : + + Returns + ------- + + """ return cls.from_arrays( coords=coo.coords, data=coo.data, @@ -54,17 +96,30 @@ def from_coo(cls, coo, *, dims=None, metadata=None, specs=None, access_policy=No @classmethod def from_global_ref( cls, - blocks, - shape, - chunks, + blocks: Dict[Tuple[int, ...], Tuple[NDArray[Any], Any]], + shape: Tuple[int, ...], + chunks: Tuple[Tuple[int, ...], ...], *, - dims=None, - metadata=None, - specs=None, - access_policy=None, - ): + dims: Optional[Tuple[str, ...]] = None, + metadata: Optional[JSON] = None, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + ) -> "COOAdapter": """ Construct from blocks with coords given in global reference frame. + Parameters + ---------- + blocks : + shape : + chunks : + dims : + metadata : + specs : + access_policy : + + Returns + ------- + """ local_blocks = {} for block, (coords, data) in blocks.items(): @@ -90,15 +145,22 @@ def from_global_ref( def __init__( self, - blocks, - structure, + blocks: Dict[Tuple[int, ...], Tuple[NDArray[Any], Any]], + structure: COOStructure, *, - metadata=None, - specs=None, - access_policy=None, - ): + metadata: Optional[JSON] = None, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + ) -> None: """ Construct from blocks with coords given in block-local reference frame. + Parameters + ---------- + blocks : + structure : + metadata : + specs : + access_policy : """ self.blocks = blocks self._metadata = metadata or {} @@ -106,13 +168,38 @@ def __init__( self.specs = specs or [] self.access_policy = access_policy - def metadata(self): + def metadata(self) -> JSON: + """ + + Returns + ------- + + """ return self._metadata - def structure(self): + def structure(self) -> COOStructure: + """ + + Returns + ------- + + """ return self._structure - def read_block(self, block, slice=None): + def read_block( + self, block: Tuple[int, ...], slice: Optional[NDSlice] = None + ) -> sparse.COO: + """ + + Parameters + ---------- + block : + slice : + + Returns + ------- + + """ coords, data = self.blocks[block] _, shape = slice_and_shape_from_block_and_chunks(block, self._structure.chunks) arr = sparse.COO(data=data[:], coords=coords[:], shape=shape) @@ -120,7 +207,17 @@ def read_block(self, block, slice=None): arr = arr[slice] return arr - def read(self, slice=None): + def read(self, slice: Optional[NDSlice] = None) -> sparse.COO: + """ + + Parameters + ---------- + slice : + + Returns + ------- + + """ all_coords = [] all_data = [] for block, (coords, data) in self.blocks.items(): diff --git a/tiled/adapters/sparse_blocks_parquet.py b/tiled/adapters/sparse_blocks_parquet.py index da479ce73..1a5ed7dbb 100644 --- a/tiled/adapters/sparse_blocks_parquet.py +++ b/tiled/adapters/sparse_blocks_parquet.py @@ -1,15 +1,33 @@ import itertools +from typing import Any, List, Optional, Tuple, Union +import dask.base +import dask.dataframe import numpy import pandas import sparse +from numpy._typing import NDArray from ..adapters.array import slice_and_shape_from_block_and_chunks -from ..structures.core import StructureFamily +from ..server.schemas import Asset +from ..structures.core import Spec, StructureFamily +from ..structures.sparse import COOStructure from ..utils import path_from_uri +from .protocols import AccessPolicy +from .type_alliases import JSON, NDSlice -def load_block(uri): +def load_block(uri: str) -> Tuple[List[int], Tuple[NDArray[Any], Any]]: + """ + + Parameters + ---------- + uri : + + Returns + ------- + + """ # TODO This can be done without pandas. # Better to use a plain I/O library. df = pandas.read_parquet(path_from_uri(uri)) @@ -19,16 +37,28 @@ def load_block(uri): class SparseBlocksParquetAdapter: + """ """ + structure_family = StructureFamily.sparse def __init__( self, - data_uris, - structure, - metadata=None, - specs=None, - access_policy=None, - ): + data_uris: List[str], + structure: COOStructure, + metadata: Optional[JSON] = None, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + ) -> None: + """ + + Parameters + ---------- + data_uris : + structure : + metadata : + specs : + access_policy : + """ num_blocks = (range(len(n)) for n in structure.chunks) self.blocks = {} for block, uri in zip(itertools.product(*num_blocks), data_uris): @@ -41,11 +71,20 @@ def __init__( @classmethod def init_storage( cls, - data_uri, - structure, - ): - from ..server.schemas import Asset + data_uri: str, + structure: COOStructure, + ) -> List[Asset]: + """ + + Parameters + ---------- + data_uri : + structure : + + Returns + ------- + """ directory = path_from_uri(data_uri) directory.mkdir(parents=True, exist_ok=True) @@ -61,20 +100,61 @@ def init_storage( ] return assets - def metadata(self): + def metadata(self) -> JSON: + """ + + Returns + ------- + + """ return self._metadata - def write_block(self, data, block): + def write_block( + self, + data: Union[dask.dataframe.DataFrame, pandas.DataFrame], + block: Tuple[int, ...], + ) -> None: + """ + + Parameters + ---------- + data : + block : + + Returns + ------- + + """ uri = self.blocks[block] data.to_parquet(path_from_uri(uri)) - def write(self, data): + def write(self, data: Union[dask.dataframe.DataFrame, pandas.DataFrame]) -> None: + """ + + Parameters + ---------- + data : + + Returns + ------- + + """ if len(self.blocks) > 1: raise NotImplementedError uri = self.blocks[(0,) * len(self._structure.shape)] data.to_parquet(path_from_uri(uri)) - def read(self, slice=...): + def read(self, slice: NDSlice) -> sparse.COO: + """ + + Parameters + ---------- + slice : + + Returns + ------- + + """ all_coords = [] all_data = [] for block, uri in self.blocks.items(): @@ -93,11 +173,30 @@ def read(self, slice=...): ) return arr[slice] - def read_block(self, block, slice=...): + def read_block( + self, block: Tuple[int, ...], slice: Optional[NDSlice] + ) -> sparse.COO: + """ + + Parameters + ---------- + block : + slice : + + Returns + ------- + + """ coords, data = load_block(self.blocks[block]) _, shape = slice_and_shape_from_block_and_chunks(block, self._structure.chunks) arr = sparse.COO(data=data[:], coords=coords[:], shape=shape) return arr[slice] - def structure(self): + def structure(self) -> COOStructure: + """ + + Returns + ------- + + """ return self._structure diff --git a/tiled/adapters/table.py b/tiled/adapters/table.py index da63b8b39..b169ca05d 100644 --- a/tiled/adapters/table.py +++ b/tiled/adapters/table.py @@ -1,3 +1,5 @@ +from typing import Any, Iterator, List, Optional, Tuple, Union + import dask.base import dask.dataframe import pandas @@ -5,6 +7,8 @@ from ..structures.core import Spec, StructureFamily from ..structures.table import TableStructure from .array import ArrayAdapter +from .protocols import AccessPolicy +from .type_alliases import JSON class TableAdapter: @@ -24,13 +28,28 @@ class TableAdapter: @classmethod def from_pandas( cls, - *args, - metadata=None, - specs=None, - access_policy=None, - npartitions=1, - **kwargs, - ): + *args: Any, + metadata: Optional[JSON] = None, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + npartitions: int = 1, + **kwargs: Any, + ) -> "TableAdapter": + """ + + Parameters + ---------- + args : + metadata : + specs : + access_policy : + npartitions : + kwargs : + + Returns + ------- + + """ ddf = dask.dataframe.from_pandas(*args, npartitions=npartitions, **kwargs) if specs is None: specs = [Spec("dataframe")] @@ -41,11 +60,24 @@ def from_pandas( @classmethod def from_dask_dataframe( cls, - ddf, - metadata=None, - specs=None, - access_policy=None, - ): + ddf: dask.dataframe.DataFrame, + metadata: Optional[JSON] = None, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + ) -> "TableAdapter": + """ + + Parameters + ---------- + ddf : + metadata : + specs : + access_policy : + + Returns + ------- + + """ structure = TableStructure.from_dask_dataframe(ddf) if specs is None: specs = [Spec("dataframe")] @@ -59,39 +91,95 @@ def from_dask_dataframe( def __init__( self, - partitions, - structure, + partitions: Union[dask.dataframe.DataFrame, pandas.DataFrame], + structure: TableStructure, *, - metadata=None, - specs=None, - access_policy=None, - ): + metadata: Optional[JSON] = None, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + ) -> None: + """ + + Parameters + ---------- + partitions : + structure : + metadata : + specs : + access_policy : + """ self._metadata = metadata or {} self._partitions = list(partitions) self._structure = structure self.specs = specs or [] self.access_policy = access_policy - def __repr__(self): + def __repr__(self) -> str: + """ + + Returns + ------- + + """ return f"{type(self).__name__}({self._structure.columns!r})" - def __getitem__(self, key): + def __getitem__(self, key: str) -> ArrayAdapter: + """ + + Parameters + ---------- + key : + + Returns + ------- + + """ # Must compute to determine shape. return ArrayAdapter.from_array(self.read([key])[key].values) - def items(self): + def items(self) -> Iterator[Tuple[str, ArrayAdapter]]: + """ + + Returns + ------- + + """ yield from ( (key, ArrayAdapter.from_array(self.read([key])[key].values)) for key in self._structure.columns ) - def metadata(self): + def metadata(self) -> JSON: + """ + + Returns + ------- + + """ return self._metadata - def structure(self): + def structure(self) -> TableStructure: + """ + + Returns + ------- + + """ return self._structure - def read(self, fields=None): + def read( + self, fields: Optional[List[str]] = None + ) -> Union[dask.dataframe.DataFrame, pandas.DataFrame]: + """ + + Parameters + ---------- + fields : + + Returns + ------- + + """ if any(p is None for p in self._partitions): raise ValueError("Not all partitions have been stored.") if isinstance(self._partitions[0], dask.dataframe.DataFrame): @@ -107,14 +195,29 @@ def read(self, fields=None): df = df[fields] return df - def read_partition(self, partition, fields=None): - partition = self._partitions[partition] - if partition is None: + def read_partition( + self, + partition: int, + fields: Optional[str] = None, + ) -> Union[pandas.DataFrame, dask.dataframe.DataFrame]: + """ + + Parameters + ---------- + partition : + fields : + + Returns + ------- + + """ + df = self._partitions[partition] + if df is None: raise RuntimeError(f"partition {partition} has not be stored yet") if fields is not None: - partition = partition[fields] - if isinstance(partition, dask.dataframe.DataFrame): - return partition.compute() + df = df[fields] + if isinstance(df, dask.dataframe.DataFrame): + return df.compute() return partition diff --git a/tiled/adapters/tiff.py b/tiled/adapters/tiff.py index 1dee5aa5a..94a7f7e0c 100644 --- a/tiled/adapters/tiff.py +++ b/tiled/adapters/tiff.py @@ -1,12 +1,16 @@ import builtins +from typing import Any, Dict, List, Optional, Tuple, cast import numpy as np import tifffile +from numpy._typing import NDArray from ..structures.array import ArrayStructure, BuiltinDtype -from ..structures.core import StructureFamily +from ..structures.core import Spec, StructureFamily from ..utils import path_from_uri +from .protocols import AccessPolicy from .resource_cache import with_resource_cache +from .type_alliases import JSON, NDSlice class TiffAdapter: @@ -23,13 +27,23 @@ class TiffAdapter: def __init__( self, - data_uri, + data_uri: str, *, - structure=None, - metadata=None, - specs=None, - access_policy=None, - ): + structure: Optional[ArrayStructure] = None, + metadata: Optional[JSON] = None, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + ) -> None: + """ + + Parameters + ---------- + data_uri : + structure : + metadata : + specs : + access_policy : + """ if not isinstance(data_uri, str): raise Exception filepath = path_from_uri(data_uri) @@ -40,7 +54,10 @@ def __init__( self.access_policy = access_policy if structure is None: if self._file.is_shaped: - shape = tuple(self._file.shaped_metadata[0]["shape"]) + from_file: Tuple[Dict[str, Any], ...] = cast( + Tuple[Dict[str, Any], ...], self._file.shaped_metadata + ) + shape = tuple(from_file[0]["shape"]) else: arr = self._file.asarray() shape = arr.shape @@ -51,14 +68,30 @@ def __init__( ) self._structure = structure - def metadata(self): + def metadata(self) -> JSON: + """ + + Returns + ------- + + """ # This contains some enums, but Python's built-in JSON serializer # handles them fine (converting to str or int as appropriate). d = {tag.name: tag.value for tag in self._file.pages[0].tags.values()} d.update(self._provided_metadata) return d - def read(self, slice=None): + def read(self, slice: Optional[NDSlice] = None) -> NDArray[Any]: + """ + + Parameters + ---------- + slice : + + Returns + ------- + + """ # TODO Is there support for reading less than the whole array # if we only want a slice? I do not think that is possible with a # single-page TIFF but I'm not sure. Certainly it *is* possible for @@ -68,7 +101,20 @@ def read(self, slice=None): arr = arr[slice] return arr - def read_block(self, block, slice=None): + def read_block( + self, block: Tuple[int, ...], slice: Optional[slice] = None + ) -> NDArray[Any]: + """ + + Parameters + ---------- + block : + slice : + + Returns + ------- + + """ # For simplicity, this adapter always treat a single TIFF file as one # chunk. This could be relaxed in the future. if sum(block) != 0: @@ -79,22 +125,44 @@ def read_block(self, block, slice=None): arr = arr[slice] return arr - def structure(self): + def structure(self) -> ArrayStructure: + """ + + Returns + ------- + + """ return self._structure class TiffSequenceAdapter: + """ """ + structure_family = "array" @classmethod def from_uris( cls, - data_uris, - structure=None, - metadata=None, - specs=None, - access_policy=None, - ): + data_uris: List[str], + structure: Optional[ArrayStructure] = None, + metadata: Optional[JSON] = None, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + ) -> "TiffSequenceAdapter": + """ + + Parameters + ---------- + data_uris : + structure : + metadata : + specs : + access_policy : + + Returns + ------- + + """ filepaths = [path_from_uri(data_uri) for data_uri in data_uris] seq = tifffile.TiffSequence(filepaths) return cls( @@ -107,13 +175,23 @@ def from_uris( def __init__( self, - seq, + seq: tifffile.TiffSequence, *, - structure=None, - metadata=None, - specs=None, - access_policy=None, - ): + structure: Optional[ArrayStructure] = None, + metadata: Optional[JSON] = None, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + ) -> None: + """ + + Parameters + ---------- + seq : + structure : + metadata : + specs : + access_policy : + """ self._seq = seq # TODO Check shape, chunks against reality. self.specs = specs or [] @@ -130,17 +208,34 @@ def __init__( ) self._structure = structure - def metadata(self): + def metadata(self) -> JSON: + """ + + Returns + ------- + + """ # TODO How to deal with the many headers? return self._provided_metadata - def read(self, slice=Ellipsis): + def read(self, slice: Optional[NDSlice] = ...) -> NDArray[Any]: """Return a numpy array - Receives a sequence of values to select from a collection of tiff files that were saved in a folder - The input order is defined as: files --> vertical slice --> horizontal slice --> color slice --> ... - read() can receive one value or one slice to select all the data from one file or a sequence of files; - or it can receive a tuple (int or slice) to select a more specific sequence of pixels of a group of images. + Receives a sequence of values to select from a collection of tiff files + that were saved in a folder The input order is defined as: files --> + vertical slice --> horizontal slice --> color slice --> ... read() can + receive one value or one slice to select all the data from one file or + a sequence of files; or it can receive a tuple (int or slice) to select + a more specific sequence of pixels of a group of images. + + Parameters + ---------- + slice : + + Returns + ------- + Return a numpy array + """ if slice is Ellipsis: @@ -170,14 +265,30 @@ def read(self, slice=Ellipsis): arr = np.atleast_1d(arr[tuple(the_rest)]) return arr - def read_block(self, block, slice=None): + def read_block( + self, block: Tuple[int, ...], slice: Optional[NDSlice] = ... + ) -> NDArray[Any]: + """ + + Parameters + ---------- + block : + slice : + + Returns + ------- + + """ if any(block[1:]): - # e.g. block[1:] != [0,0, ..., 0] raise IndexError(block) arr = self.read(builtins.slice(block[0], block[0] + 1)) - if slice is not None: - arr = arr[slice] - return arr + return arr[slice] + + def structure(self) -> ArrayStructure: + """ + + Returns + ------- - def structure(self): + """ return self._structure diff --git a/tiled/adapters/type_alliases.py b/tiled/adapters/type_alliases.py new file mode 100644 index 000000000..7564b32aa --- /dev/null +++ b/tiled/adapters/type_alliases.py @@ -0,0 +1,17 @@ +import sys + +if sys.version_info < (3, 10): + EllipsisType = type(Ellipsis) +else: + from types import EllipsisType + +from typing import Any, Dict, List, Set, Tuple, Union + +JSON = Dict[str, Union[str, int, float, bool, Dict[str, "JSON"], List["JSON"]]] +NDSlice = Union[ + int, slice, Tuple[Union[int, slice, EllipsisType], ...], EllipsisType +] # TODO Replace this with our Union for a slice/tuple/.../etc. + +Scopes = Set[str] +Query = Any # for now... +Filters = List[Query] diff --git a/tiled/adapters/utils.py b/tiled/adapters/utils.py index b5382bf9e..151551e44 100644 --- a/tiled/adapters/utils.py +++ b/tiled/adapters/utils.py @@ -1,4 +1,5 @@ import warnings +from typing import Any # for back-compat from ..utils import node_repr as tree_repr # noqa: F401 @@ -16,18 +17,41 @@ class IndexersMixin: This is just for back-ward compatiblity. """ + keys: Any + values: Any + items: Any + fn: Any + @property - def keys_indexer(self): + def keys_indexer(self) -> Any: + """ + + Returns + ------- + + """ warnings.warn(_MESSAGE.format(name="keys"), DeprecationWarning) return self.keys() @property - def values_indexer(self): + def values_indexer(self) -> Any: + """ + + Returns + ------- + + """ warnings.warn(_MESSAGE.format(name="values"), DeprecationWarning) return self.values() @property - def items_indexer(self): + def items_indexer(self) -> Any: + """ + + Returns + ------- + + """ warnings.warn(_MESSAGE.format(name="items"), DeprecationWarning) return self.items() @@ -50,8 +74,24 @@ class IndexCallable: __slots__ = ("fn",) - def __init__(self, fn): + def __init__(self, fn: Any) -> None: + """ + + Parameters + ---------- + fn : + """ self.fn = fn - def __getitem__(self, key): + def __getitem__(self, key: str) -> Any: + """ + + Parameters + ---------- + key : + + Returns + ------- + + """ return self.fn(key) diff --git a/tiled/adapters/xarray.py b/tiled/adapters/xarray.py index f8e9088c1..6f23a2a45 100644 --- a/tiled/adapters/xarray.py +++ b/tiled/adapters/xarray.py @@ -1,11 +1,14 @@ import collections.abc import itertools +import sys +from typing import Any, Iterator, List, Optional import xarray from ..structures.core import Spec from .array import ArrayAdapter from .mapping import MapAdapter +from .protocols import AccessPolicy class DatasetAdapter(MapAdapter): @@ -14,7 +17,25 @@ class DatasetAdapter(MapAdapter): """ @classmethod - def from_dataset(cls, dataset, *, specs=None, access_policy=None): + def from_dataset( + cls, + dataset: Any, + *, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + ) -> "DatasetAdapter": + """ + + Parameters + ---------- + dataset : + specs : + access_policy : + + Returns + ------- + + """ mapping = _DatasetMap(dataset) specs = specs or [] if "xarray_dataset" not in [spec.name for spec in specs]: @@ -26,7 +47,24 @@ def from_dataset(cls, dataset, *, specs=None, access_policy=None): access_policy=access_policy, ) - def __init__(self, mapping, *args, specs=None, access_policy=None, **kwargs): + def __init__( + self, + mapping: Any, + *args: Any, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + **kwargs: Any, + ) -> None: + """ + + Parameters + ---------- + mapping : + args : + specs : + access_policy : + kwargs : + """ if isinstance(mapping, xarray.Dataset): raise TypeError( "Use DatasetAdapter.from_dataset(...), not DatasetAdapter(...)." @@ -35,24 +73,72 @@ def __init__(self, mapping, *args, specs=None, access_policy=None, **kwargs): mapping, *args, specs=specs, access_policy=access_policy, **kwargs ) - def inlined_contents_enabled(self, depth): + def inlined_contents_enabled(self, depth: int) -> bool: + """ + + Parameters + ---------- + depth : + + Returns + ------- + + """ # Tell the server to in-line the description of each array # (i.e. data_vars and coords) to avoid latency of a second # request. return True -class _DatasetMap(collections.abc.Mapping): - def __init__(self, dataset): +if sys.version_info < (3, 9): + from typing_extensions import Mapping + + MappingType = Mapping +else: + import collections + + MappingType = collections.abc.Mapping + + +class _DatasetMap(MappingType[str, Any]): + def __init__(self, dataset: Any) -> None: + """ + + Parameters + ---------- + dataset : + """ self._dataset = dataset - def __len__(self): + def __len__(self) -> int: + """ + + Returns + ------- + + """ return len(self._dataset.data_vars) + len(self._dataset.coords) - def __iter__(self): + def __iter__(self) -> Iterator[Any]: + """ + + Returns + ------- + + """ yield from itertools.chain(self._dataset.data_vars, self._dataset.coords) - def __getitem__(self, key): + def __getitem__(self, key: str) -> ArrayAdapter: + """ + + Parameters + ---------- + key : + + Returns + ------- + + """ data_array = self._dataset[key] if key in self._dataset.coords: spec = Spec("xarray_coord") diff --git a/tiled/adapters/zarr.py b/tiled/adapters/zarr.py index 40834a67d..7a914965c 100644 --- a/tiled/adapters/zarr.py +++ b/tiled/adapters/zarr.py @@ -1,23 +1,47 @@ import builtins import collections.abc import os +import sys +from typing import Any, Iterator, List, Optional, Tuple, Union import zarr.core import zarr.hierarchy import zarr.storage +from numpy._typing import NDArray from ..adapters.utils import IndexersMixin from ..iterviews import ItemsView, KeysView, ValuesView -from ..structures.core import StructureFamily +from ..server.schemas import Asset +from ..structures.array import ArrayStructure +from ..structures.core import Spec, StructureFamily from ..utils import node_repr, path_from_uri from .array import ArrayAdapter, slice_and_shape_from_block_and_chunks +from .protocols import AccessPolicy +from .type_alliases import JSON, NDSlice INLINED_DEPTH = int(os.getenv("TILED_HDF5_INLINED_CONTENTS_MAX_DEPTH", "7")) -def read_zarr(data_uri, structure=None, **kwargs): +def read_zarr( + data_uri: str, + structure: Optional[ArrayStructure] = None, + **kwargs: Any, +) -> Union["ZarrGroupAdapter", ArrayAdapter]: + """ + + Parameters + ---------- + data_uri : + structure : + kwargs : + + Returns + ------- + + """ filepath = path_from_uri(data_uri) zarr_obj = zarr.open(filepath) # Group or Array + adapter: Union[ZarrGroupAdapter, ArrayAdapter] if isinstance(zarr_obj, zarr.hierarchy.Group): adapter = ZarrGroupAdapter(zarr_obj, **kwargs) else: @@ -29,10 +53,21 @@ def read_zarr(data_uri, structure=None, **kwargs): class ZarrArrayAdapter(ArrayAdapter): + """ """ + @classmethod - def init_storage(cls, data_uri, structure): - from ..server.schemas import Asset + def init_storage(cls, data_uri: str, structure: ArrayStructure) -> List[Asset]: + """ + + Parameters + ---------- + data_uri : + structure : + + Returns + ------- + """ # Zarr requires evenly-sized chunks within each dimension. # Use the first chunk along each dimension. zarr_chunks = tuple(dim[0] for dim in structure.chunks) @@ -55,14 +90,47 @@ def init_storage(cls, data_uri, structure): ) ] - def _stencil(self): - "Trims overflow because Zarr always has equal-sized chunks." + def _stencil(self) -> Tuple[slice, ...]: + """ + Trims overflow because Zarr always has equal-sized chunks. + Returns + ------- + + """ return tuple(builtins.slice(0, dim) for dim in self.structure().shape) - def read(self, slice=...): + def read( + self, + slice: NDSlice = ..., + ) -> NDArray[Any]: + """ + + Parameters + ---------- + slice : + + Returns + ------- + + """ return self._array[self._stencil()][slice] - def read_block(self, block, slice=...): + def read_block( + self, + block: Tuple[int, ...], + slice: NDSlice = ..., + ) -> NDArray[Any]: + """ + + Parameters + ---------- + block : + slice : + + Returns + ------- + + """ block_slice, _ = slice_and_shape_from_block_and_chunks( block, self.structure().chunks ) @@ -70,12 +138,44 @@ def read_block(self, block, slice=...): # and optionally a sub-slice therein. return self._array[self._stencil()][block_slice][slice] - def write(self, data, slice=...): + def write( + self, + data: NDArray[Any], + slice: NDSlice = ..., + ) -> None: + """ + + Parameters + ---------- + data : + slice : + + Returns + ------- + + """ if slice is not ...: raise NotImplementedError self._array[self._stencil()] = data - async def write_block(self, data, block, slice=...): + async def write_block( + self, + data: NDArray[Any], + block: Tuple[int, ...], + slice: Optional[NDSlice] = ..., + ) -> None: + """ + + Parameters + ---------- + data : + block : + slice : + + Returns + ------- + + """ if slice is not ...: raise NotImplementedError block_slice, shape = slice_and_shape_from_block_and_chunks( @@ -84,12 +184,43 @@ async def write_block(self, data, block, slice=...): self._array[block_slice] = data -class ZarrGroupAdapter(collections.abc.Mapping, IndexersMixin): +if sys.version_info < (3, 9): + from typing_extensions import Mapping + + MappingType = Mapping +else: + import collections + + MappingType = collections.abc.Mapping + + +class ZarrGroupAdapter( + MappingType[str, Union["ArrayAdapter", "ZarrGroupAdapter"]], + IndexersMixin, +): + """ """ + structure_family = StructureFamily.container def __init__( - self, node, *, structure=None, metadata=None, specs=None, access_policy=None - ): + self, + node: Any, + *, + structure: Optional[ArrayStructure] = None, + metadata: Optional[JSON] = None, + specs: Optional[List[Spec]] = None, + access_policy: Optional[AccessPolicy] = None, + ) -> None: + """ + + Parameters + ---------- + node : + structure : + metadata : + specs : + access_policy : + """ if structure is not None: raise ValueError( f"structure is expected to be None for containers, not {structure}" @@ -100,65 +231,181 @@ def __init__( self._provided_metadata = metadata or {} super().__init__() - def __repr__(self): + def __repr__(self) -> str: + """ + + Returns + ------- + + """ return node_repr(self, list(self)) @property - def access_policy(self): + def access_policy(self) -> Optional[AccessPolicy]: + """ + + Returns + ------- + + """ return self._access_policy - def metadata(self): + def metadata(self) -> Any: + """ + + Returns + ------- + + """ return self._node.attrs - def structure(self): + def structure(self) -> None: + """ + + Returns + ------- + + """ return None - def __iter__(self): + def __iter__(self) -> Iterator[Any]: + """ + + Returns + ------- + + """ yield from self._node - def __getitem__(self, key): + def __getitem__(self, key: str) -> Union[ArrayAdapter, "ZarrGroupAdapter"]: + """ + + Parameters + ---------- + key : + + Returns + ------- + + """ value = self._node[key] if isinstance(value, zarr.hierarchy.Group): return ZarrGroupAdapter(value) else: return ZarrArrayAdapter.from_array(value) - def __len__(self): + def __len__(self) -> int: + """ + + Returns + ------- + + """ return len(self._node) - def keys(self): + def keys(self) -> KeysView: # type: ignore + """ + + Returns + ------- + + """ return KeysView(lambda: len(self), self._keys_slice) - def values(self): + def values(self) -> ValuesView: # type: ignore + """ + + Returns + ------- + + """ return ValuesView(lambda: len(self), self._items_slice) - def items(self): + def items(self) -> ItemsView: # type: ignore + """ + + Returns + ------- + + """ return ItemsView(lambda: len(self), self._items_slice) - def search(self, query): + def search(self, query: Any) -> None: """ - Return a Tree with a subset of the mapping. + + Parameters + ---------- + query : + + Returns + ------- + Return a Tree with a subset of the mapping. + """ raise NotImplementedError - def read(self, fields=None): + def read(self, fields: Optional[str]) -> "ZarrGroupAdapter": + """ + + Parameters + ---------- + fields : + + Returns + ------- + + """ if fields is not None: raise NotImplementedError return self # The following two methods are used by keys(), values(), items(). - def _keys_slice(self, start, stop, direction): + def _keys_slice(self, start: int, stop: int, direction: int) -> List[Any]: + """ + + Parameters + ---------- + start : + stop : + direction : + + Returns + ------- + + """ keys = list(self._node) if direction < 0: - keys = reversed(keys) + keys = list(reversed(keys)) return keys[start:stop] - def _items_slice(self, start, stop, direction): + def _items_slice(self, start: int, stop: int, direction: int) -> List[Any]: + """ + + Parameters + ---------- + start : + stop : + direction : + + Returns + ------- + + """ items = [(key, self[key]) for key in list(self)] if direction < 0: - items = reversed(items) + items = list(reversed(items)) return items[start:stop] - def inlined_contents_enabled(self, depth): + def inlined_contents_enabled(self, depth: int) -> bool: + """ + + Parameters + ---------- + depth : + + Returns + ------- + + """ return depth <= INLINED_DEPTH diff --git a/tiled/iterviews.py b/tiled/iterviews.py index 9f79d18fe..946375855 100644 --- a/tiled/iterviews.py +++ b/tiled/iterviews.py @@ -6,7 +6,7 @@ class IterViewBase: __slots__ = ("_get_length",) - def __init__(self, get_length): + def __init__(self, get_length) -> None: self._get_length = get_length def __repr__(self): @@ -44,7 +44,7 @@ class KeysView(IterViewBase): __slots__ = ("_keys_slice",) _name = "key" - def __init__(self, get_length, keys_slice): + def __init__(self, get_length, keys_slice) -> None: self._keys_slice = keys_slice super().__init__(get_length) @@ -81,7 +81,7 @@ class ItemsView(IterViewBase): __slots__ = ("_items_slice",) _name = "item" - def __init__(self, get_length, items_slice): + def __init__(self, get_length, items_slice) -> None: self._items_slice = items_slice super().__init__(get_length) @@ -120,7 +120,7 @@ class ValuesView(IterViewBase): __slots__ = ("_items_slice",) _name = "value" - def __init__(self, get_length, items_slice): + def __init__(self, get_length, items_slice) -> None: self._items_slice = items_slice super().__init__(get_length) diff --git a/tiled/query_registration.py b/tiled/query_registration.py index 7181ebb69..ec4bca9b7 100644 --- a/tiled/query_registration.py +++ b/tiled/query_registration.py @@ -6,6 +6,7 @@ """ import inspect from dataclasses import fields +from typing import Any from .utils import DictView, UnsupportedQueryType @@ -84,11 +85,11 @@ def inner(cls): class QueryTranslationRegistry: - def __init__(self): + def __init__(self) -> None: self._lookup = {} self._lazy = {} - def register(self, class_, translator): + def register(self, class_, translator) -> Any: self._lookup[class_] = translator return translator diff --git a/tiled/server/pydantic_array.py b/tiled/server/pydantic_array.py index 257d80851..6bc2090e8 100644 --- a/tiled/server/pydantic_array.py +++ b/tiled/server/pydantic_array.py @@ -38,7 +38,7 @@ class BuiltinDtype(BaseModel): __endianness_reverse_map = {"big": ">", "little": "<", "not_applicable": "|"} @classmethod - def from_numpy_dtype(cls, dtype): + def from_numpy_dtype(cls, dtype) -> "BuiltinDtype": return cls( endianness=cls.__endianness_map[dtype.byteorder], kind=Kind(dtype.kind), diff --git a/tiled/structures/array.py b/tiled/structures/array.py index 8ee3210ef..9581763b1 100644 --- a/tiled/structures/array.py +++ b/tiled/structures/array.py @@ -4,6 +4,8 @@ from dataclasses import dataclass from typing import List, Optional, Tuple, Union +import numpy + class Endianness(str, enum.Enum): """ @@ -85,14 +87,14 @@ class BuiltinDtype: __endianness_reverse_map = {"big": ">", "little": "<", "not_applicable": "|"} @classmethod - def from_numpy_dtype(cls, dtype): + def from_numpy_dtype(cls, dtype) -> "BuiltinDtype": return cls( endianness=cls.__endianness_map[dtype.byteorder], kind=Kind(dtype.kind), itemsize=dtype.itemsize, ) - def to_numpy_dtype(self): + def to_numpy_dtype(self) -> numpy.dtype: import numpy return numpy.dtype(self.to_numpy_str()) @@ -234,7 +236,7 @@ def from_json(cls, structure): ) @classmethod - def from_array(cls, array, shape=None, chunks=None, dims=None): + def from_array(cls, array, shape=None, chunks=None, dims=None) -> "ArrayStructure": from dask.array.core import normalize_chunks if not hasattr(array, "__array__"): diff --git a/tiled/structures/core.py b/tiled/structures/core.py index 2891814ab..a4708b35e 100644 --- a/tiled/structures/core.py +++ b/tiled/structures/core.py @@ -22,7 +22,7 @@ class Spec: name: str version: Optional[str] = None - def __init__(self, name, version=None): + def __init__(self, name, version=None) -> None: # Enable the name to be passed as a position argument. # The setattr stuff is necessary to make this work with a frozen dataclass. object.__setattr__(self, "name", name) diff --git a/tiled/structures/table.py b/tiled/structures/table.py index 6a1d41a9c..81a35d5c4 100644 --- a/tiled/structures/table.py +++ b/tiled/structures/table.py @@ -22,7 +22,7 @@ class TableStructure: resizable: Union[bool, Tuple[bool, ...]] = False @classmethod - def from_dask_dataframe(cls, ddf): + def from_dask_dataframe(cls, ddf) -> "TableStructure": import dask.dataframe.utils import pyarrow diff --git a/tiled/utils.py b/tiled/utils.py index c7cfd917d..8a429e3d3 100644 --- a/tiled/utils.py +++ b/tiled/utils.py @@ -398,17 +398,17 @@ def tree(tree, max_lines=20): class Sentinel: - def __init__(self, name): + def __init__(self, name: str) -> None: self.name = name - def __repr__(self): + def __repr__(self) -> str: return f"<{self.name}>" - def __copy__(self): + def __copy__(self) -> "Sentinel": # The goal here is to make copy.copy(sentinel) == sentinel return self - def __deepcopy__(self, memo): + def __deepcopy__(self, memo: "Sentinel") -> "Sentinel": # The goal here is to make copy.deepcopy(sentinel) == sentinel return self @@ -614,7 +614,7 @@ def get_share_tiled_path(): SHARE_TILED_PATH = get_share_tiled_path() -def node_repr(tree, sample): +def node_repr(tree, sample) -> str: sample_reprs = list(map(repr, sample)) out = f"<{type(tree).__name__} {{" # Always show at least one. @@ -666,7 +666,7 @@ async def ensure_awaitable(func, *args, **kwargs): return await anyio.to_thread.run_sync(functools.partial(func, **kwargs), *args) -def path_from_uri(uri): +def path_from_uri(uri) -> Path: """ Give a URI, return a Path. @@ -691,7 +691,7 @@ def path_from_uri(uri): SCHEME_PATTERN = re.compile(r"^[a-z0-9+]+:\/\/.*$") -def ensure_uri(uri_or_path): +def ensure_uri(uri_or_path) -> str: "Accept a URI or file path (Windows- or POSIX-style) and return a URI." if not SCHEME_PATTERN.match(str(uri_or_path)): # Interpret this as a filepath.