From 9cc1e1bc1bf46afe2b33f7cde0736d01af3f0add Mon Sep 17 00:00:00 2001 From: JSKenyon Date: Wed, 27 Mar 2024 10:20:44 +0200 Subject: [PATCH 1/3] Drop Python3.7 support - heavily deprecated upstream. --- .github/workflows/python-package.yml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 30e91f03..e6b7ed50 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.7", "3.8", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10"] steps: - uses: actions/checkout@v3 diff --git a/pyproject.toml b/pyproject.toml index 7febb0e1..306a7a52 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ packages = [ ] [tool.poetry.dependencies] -python = "^3.7" +python = "^3.8" munch = "^2.5.0" omegaconf = "^2.1" importlib_metadata = { version = "4.13.0", python = "3.7" } From afe569efffd67c6431efc0825ba170a61f6f5ffb Mon Sep 17 00:00:00 2001 From: JSKenyon Date: Tue, 26 Mar 2024 15:01:11 +0200 Subject: [PATCH 2/3] Checkpoint work so far. --- pyproject.toml | 1 + scabha/basetypes.py | 72 ++++++++++++++++++++++++++++++++++++--- stimela/backends/utils.py | 10 +++--- 3 files changed, 73 insertions(+), 10 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 306a7a52..2da49782 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ pydantic = "^1.10.2" psutil = "^5.9.3" rich = "^13.7.0" dill = "^0.3.6" +typeguard = "^4.2.1" [tool.poetry.scripts] stimela = "stimela.main:cli" diff --git a/scabha/basetypes.py b/scabha/basetypes.py index fa8c438c..23eb16bc 100644 --- a/scabha/basetypes.py +++ b/scabha/basetypes.py @@ -1,9 +1,12 @@ from dataclasses import field, dataclass from collections import OrderedDict -from typing import List +from typing import List, Union, get_args, get_origin import os.path import re from .exceptions import UnsetError +from itertools import zip_longest +from typeguard import check_type, TypeCheckError + def EmptyDictDefault(): return field(default_factory=lambda:OrderedDict()) @@ -55,7 +58,7 @@ def __init__(self, value): def parse(value: str, expand_user=True): """ Parses URI. If URI does not start with "protocol://", assumes "file://" - + Returns tuple of (protocol, path, is_remote) If expand_user is True, ~ in (file-protocol) paths will be expanded. @@ -75,7 +78,7 @@ class File(URI): @property def NAME(self): return File(os.path.basename(self)) - + @property def PATH(self): return File(os.path.abspath(self)) @@ -95,7 +98,7 @@ def BASENAME(self): @property def EXT(self): return os.path.splitext(self)[1] - + @property def EXISTS(self): return os.path.exists(self) @@ -114,3 +117,64 @@ def is_file_type(dtype): def is_file_list_type(dtype): return any(dtype == List[t] for t in FILE_TYPES) + +class Skip(object): + def iterate_samples(self, collection): + return () + + +def get_filelikes(dtype, value, filelikes=None): + """Recursively recover all filelike elements from a composite dtype.""" + + filelikes = set() if filelikes is None else filelikes + + origin = get_origin(dtype) + args = get_args(dtype) + + if origin: # Implies composition. + + if origin is dict: + + # No further work required for empty collections. + if len(value) == 0: + return filelikes + + k_dtype, v_dtype = args + + for k, v in value.items(): + filelikes = get_filelikes(k_dtype, k, filelikes) + filelikes = get_filelikes(v_dtype, v, filelikes) + + elif origin in (tuple, list, set): + + # No further work required for empty collections. + if len(value) == 0: + return filelikes + + # This is a special case for tuples of arbitrary + # length i.e. list-like behaviour. + if ... in args: + args = tuple([a for a in args if a != ...]) + + for dt, v in zip_longest(args, value, fillvalue=args[0]): + filelikes = get_filelikes(dt, v, filelikes) + + elif origin is Union: + + for dt in args: + + try: + # Do not check collection member types. + check_type(value, dt, collection_check_strategy=Skip()) + except TypeCheckError: + continue + filelikes = get_filelikes(dt, value, filelikes) + + else: + raise ValueError(f"Failed to traverse {dtype} dtype when looking for files.") + + else: + if is_file_type(dtype): + filelikes.add(value) + + return filelikes diff --git a/stimela/backends/utils.py b/stimela/backends/utils.py index 2dbe5124..accf5aee 100644 --- a/stimela/backends/utils.py +++ b/stimela/backends/utils.py @@ -4,7 +4,7 @@ from stimela.kitchen.cab import Cab, Parameter from scabha.exceptions import SchemaError from stimela.exceptions import BackendError -from scabha.basetypes import File, Directory, MS, URI +from scabha.basetypes import File, Directory, MS, URI, get_filelikes ## commenting out for now -- will need to fix when we reactive the kube backend (and have tests for it) @@ -34,11 +34,9 @@ def add_target(param_name, path, must_exist, readwrite): if schema is None: raise SchemaError(f"parameter {name} not in defined inputs or outputs for this cab. This should have been caught by validation earlier!") - if schema.is_file_type: - files = [value] - elif schema.is_file_list_type: - files = value - else: + files = get_filelikes(schema._dtype, value) + + if not files: continue must_exist = schema.must_exist and name in inputs From 9e574eac8e53f90b13914774f8eeda5f925f2ce9 Mon Sep 17 00:00:00 2001 From: JSKenyon Date: Tue, 26 Mar 2024 16:37:20 +0200 Subject: [PATCH 3/3] Add tests and refine approach. --- scabha/basetypes.py | 36 ++++++++++++++++-------- tests/scabha_tests/test_filelikes.py | 42 ++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 12 deletions(-) create mode 100644 tests/scabha_tests/test_filelikes.py diff --git a/scabha/basetypes.py b/scabha/basetypes.py index 23eb16bc..f541dd4b 100644 --- a/scabha/basetypes.py +++ b/scabha/basetypes.py @@ -1,11 +1,15 @@ +from __future__ import annotations from dataclasses import field, dataclass from collections import OrderedDict -from typing import List, Union, get_args, get_origin +from typing import List, Union, get_args, get_origin, Any import os.path import re from .exceptions import UnsetError from itertools import zip_longest -from typeguard import check_type, TypeCheckError +from typeguard import ( + check_type, TypeCheckError, TypeCheckerCallable, TypeCheckMemo, checker_lookup_functions +) +from inspect import isclass def EmptyDictDefault(): @@ -118,11 +122,21 @@ def is_file_list_type(dtype): return any(dtype == List[t] for t in FILE_TYPES) -class Skip(object): - def iterate_samples(self, collection): - return () +def check_filelike(value: Any, origin_type: Any, args: tuple[Any, ...], memo: TypeCheckMemo) -> None: + """Custom checker for filelike objects. Currently checks for strings.""" + if not isinstance(value, str): + raise TypeCheckError(f'{value} is not compatible with URI or its subclasses.') +def filelike_lookup(origin_type: Any, args: tuple[Any, ...], extras: tuple[Any, ...]) -> TypeCheckerCallable | None: + """Lookup the custom checker for filelike objects.""" + if isclass(origin_type) and issubclass(origin_type, URI): + return check_filelike + + return None + +checker_lookup_functions.append(filelike_lookup) # Register custom type checker. + def get_filelikes(dtype, value, filelikes=None): """Recursively recover all filelike elements from a composite dtype.""" @@ -152,9 +166,9 @@ def get_filelikes(dtype, value, filelikes=None): return filelikes # This is a special case for tuples of arbitrary - # length i.e. list-like behaviour. - if ... in args: - args = tuple([a for a in args if a != ...]) + # length i.e. list-like behaviour. We can simply + # strip out the Ellipsis. + args = tuple([arg for arg in args if arg != ...]) for dt, v in zip_longest(args, value, fillvalue=args[0]): filelikes = get_filelikes(dt, v, filelikes) @@ -162,11 +176,9 @@ def get_filelikes(dtype, value, filelikes=None): elif origin is Union: for dt in args: - try: - # Do not check collection member types. - check_type(value, dt, collection_check_strategy=Skip()) - except TypeCheckError: + check_type(value, dt) + except TypeCheckError: # Value doesn't match dtype - incorrect branch. continue filelikes = get_filelikes(dt, value, filelikes) diff --git a/tests/scabha_tests/test_filelikes.py b/tests/scabha_tests/test_filelikes.py new file mode 100644 index 00000000..b3697d92 --- /dev/null +++ b/tests/scabha_tests/test_filelikes.py @@ -0,0 +1,42 @@ +from scabha.basetypes import get_filelikes, File, URI, Directory, MS +from typing import Dict, List, Set, Tuple, Union, Optional +import pytest + + +@pytest.fixture(scope="module", params=[File, URI, Directory, MS]) +def templates(request): + + ft = request.param + + TEMPLATES = ( + (Tuple, (), set()), + (Tuple[int, ...], [1, 2], set()), + (Tuple[ft, ...], ("foo", "bar"), {"foo", "bar"}), + (Tuple[ft, str], ("foo", "bar"), {"foo"}), + (Dict[str, int], {"a": 1, "b": 2}, set()), + (Dict[str, ft], {"a": "foo", "b": "bar"}, {"foo", "bar"}), + (Dict[ft, str], {"foo": "a", "bar": "b"}, {"foo", "bar"}), + (List[ft], [], set()), + (List[int], [1, 2], set()), + (List[ft], ["foo", "bar"], {"foo", "bar"}), + (Set[ft], set(), set()), + (Set[int], {1, 2}, set()), + (Set[ft], {"foo", "bar"}, {"foo", "bar"}), + (Union[str, List[ft]], "foo", set()), + (Union[str, List[ft]], ["foo"], {"foo"}), + (Union[str, Tuple[ft]], "foo", set()), + (Union[str, Tuple[ft]], ("foo",), {"foo"}), + (Optional[ft], None, set()), + (Optional[ft], "foo", {"foo"}), + (Optional[Union[ft, int]], 1, set()), + (Optional[Union[ft, int]], "foo", {"foo"}), + (Dict[str, Tuple[ft, str]], {"a": ("foo", "bar")}, {"foo"}) + ) + + return TEMPLATES + + +def test_get_filelikes(templates): + + for dt, v, res in templates: + assert get_filelikes(dt, v) == res, f"Failed for dtype {dt} and value {v}."