diff --git a/.github/workflows/CI-test.yaml b/.github/workflows/CI-test.yaml index 2f394d2..a97c76c 100644 --- a/.github/workflows/CI-test.yaml +++ b/.github/workflows/CI-test.yaml @@ -18,7 +18,7 @@ jobs: with: submodules: recursive - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies @@ -38,4 +38,6 @@ jobs: - name: Test with pytest run: | pytest -v --cov - pytest -v --doctest-modules src/ + - name: Test with doctest + run: | + PYTHONPATH=src pytest -v --doctest-modules src/ diff --git a/conftest.py b/conftest.py new file mode 100644 index 0000000..669b359 --- /dev/null +++ b/conftest.py @@ -0,0 +1,43 @@ +import os +import re +from pathlib import Path + +import pytest + +from tests.utils.constants import TEST_ROOT + +pytest_plugins = [ + "tests.fixtures.configs", + "tests.fixtures.environment", + "tests.fixtures.fake_filesystem", + "tests.fixtures.sample_rules", + "tests.fixtures.config_files", + "tests.fixtures.CV_Dir", + "tests.fixtures.CMIP_Tables_Dir", +] + + +@pytest.hookimpl(tryfirst=True) +def pytest_collection_modifyitems(config, items): + for item in items: + if item.fspath and item.fspath.ext == ".py": + item.add_marker(pytest.mark.doctest) + + +@pytest.fixture(autouse=True) +def pathlib_doctest_directive(doctest_namespace): + """Replace PosixPath/WindowsPath with Path in doc-test output.""" + doctest_namespace["Path"] = Path + + def path_replace(output): + """Replace platform-specific Path output with generic Path in doc-tests.""" + return re.sub(r"(PosixPath|WindowsPath)\((.*?)\)", r"Path(\2)", output) + + doctest_namespace["path_replace"] = path_replace + + +def pytest_unconfigure(config): + """Remove all JSON files containing 'pipeline' in their name.""" + for file in os.listdir(): + if "pipeline" in file and file.endswith(".json"): + os.remove(file) diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..3b8e6a6 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +doctest_optionflags = NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL ELLIPSIS diff --git a/setup.py b/setup.py index 94d8305..9e44e14 100644 --- a/setup.py +++ b/setup.py @@ -41,6 +41,7 @@ def read(filename): "deprecation", "distributed", "dpath", + "flexparser < 0.4", # NOTE(PG): See https://tinyurl.com/ypf99xnh "flox", "imohash", "numbagg", diff --git a/src/pymorize/calendar.py b/src/pymorize/calendar.py index 2540c54..c026a01 100644 --- a/src/pymorize/calendar.py +++ b/src/pymorize/calendar.py @@ -15,14 +15,6 @@ >>> year_bounds = year_bounds_major_digits(2000, 2010, 2, 2) >>> print(year_bounds) [[2000, 2001], [2002, 2003], [2004, 2005], [2006, 2007], [2008, 2009], [2010, 2010]] - ->>> date_range = date_ranges_from_bounds(year_bounds, freq="Y") ->>> print(date_range) -([Timestamp('2000-12-31 00:00:00', freq='A-DEC'), Timestamp('2001-12-31 00:00:00', freq='A-DEC')], [Timestamp('2002-12-31 00:00:00', freq='A-DEC'), Timestamp('2003-12-31 00:00:00', freq='A-DEC')], [Timestamp('2004-12-31 00:00:00', freq='A-DEC'), Timestamp('2005-12-31 00:00:00', freq='A-DEC')], [Timestamp('2006-12-31 00:00:00', freq='A-DEC'), Timestamp('2007-12-31 00:00:00', freq='A-DEC')], [Timestamp('2008-12-31 00:00:00', freq='A-DEC'), Timestamp('2009-12-31 00:00:00', freq='A-DEC')], [Timestamp('2010-12-31 00:00:00', freq='A-DEC')]) - ->>> date_range = date_ranges_from_year_bounds(year_bounds, freq="Y") ->>> print(date_range) -([Timestamp('2000-12-31 00:00:00', freq='A-DEC'), Timestamp('2001-12-31 00:00:00', freq='A-DEC')], [Timestamp('2002-12-31 00:00:00', freq='A-DEC'), Timestamp('2003-12-31 00:00:00', freq='A-DEC')], [Timestamp('2004-12-31 00:00:00', freq='A-DEC'), Timestamp('2005-12-31 00:00:00', freq='A-DEC')], [Timestamp('2006-12-31 00:00:00', freq='A-DEC'), Timestamp('2007-12-31 00:00:00', freq='A-DEC')], [Timestamp('2008-12-31 00:00:00', freq='A-DEC'), Timestamp('2009-12-31 00:00:00', freq='A-DEC')], [Timestamp('2010-12-31 00:00:00', freq='A-DEC')]) """ import pendulum @@ -157,16 +149,9 @@ def date_ranges_from_bounds(bounds, freq: str = "M", **kwargs): Examples -------- - >>> bounds = [("2020-01-01", "2020-01-31"), ("2020-02-01", "2020-02-29")] - >>> date_ranges = date_ranges_from_bounds(bounds) - >>> print(date_ranges) - (DatetimeIndex(['2020-01-01', '2020-01-02', ..., '2020-01-31'], dtype='datetime64[ns]', freq='D'), - DatetimeIndex(['2020-02-01', '2020-02-02', ..., '2020-02-29'], dtype='datetime64[ns]', freq='D')) - >>> bounds = [("2020-01-01", "2020-12-31")] - >>> date_ranges = date_ranges_from_bounds(bounds, freq="M") - >>> print(date_ranges) - (DatetimeIndex(['2020-01-31', '2020-02-29', ..., '2020-12-31'], dtype='datetime64[ns]', freq='M'),) + >>> date_ranges_from_bounds(bounds, freq="M") + DatetimeIndex(['2020-01-31', '2020-02-29', ..., '2020-12-31'], dtype='datetime64[ns]', freq='ME') """ objs = [] for start, end in bounds: diff --git a/src/pymorize/filecache.py b/src/pymorize/filecache.py index 34e753f..56e84f2 100644 --- a/src/pymorize/filecache.py +++ b/src/pymorize/filecache.py @@ -39,12 +39,31 @@ .. code-block:: python - >>> filepath = "/pool/data/CO2f_fesom_1850-01-01_1900-01-01.nc" + >>> filepath = "tests/data/test_experiments/my_expid/outdata/fesom/volo.nc" >>> cache.add_file(filepath) >>> # adding multiple files at once - >>> cache.add_files(["/path/to/file1.nc", "/path/to/file2.nc"]) + >>> cache.add_files(["tests/data/dummy_data/random1.nc", "tests/data/dummy_data/random2.nc"]) + +You can access the metadata of a file in the cache using the `get` method: + +.. code-block:: python + + >>> filepath = "tests/data/test_experiments/my_expid/outdata/fesom/volo.nc" >>> # alternative way of adding file to cache and getting the metadata is by usuig the `get` method - >>> cache.get("filepath") + >>> cache.get(filepath) + filepath tests/data/test_experiments/my_expid/outdata/f... + filename volo.nc + checksum imohash:c8047bbd7e292dbe54a6387611f500c4 + filesize 584 + mtime ... + start 1951-01-02 00:00:00 + end 1951-01-13 00:00:00 + timespan 11 days, 0:00:00 + freq D + steps 12 + variable volo + units m3 + Name: 0, dtype: object For an overview of the cached data, use `summary` method: This method returns a @@ -55,6 +74,14 @@ .. code-block:: python >>> cache.summary() + variable seq volo + freq D D + start 0001-01-01 00:00:00 1951-01-02 00:00:00 + end 0001-01-11 00:00:00 1951-01-13 00:00:00 + timespan 10 days 00:00:00 11 days 00:00:00 + nfiles 2 1 + steps 11 12 + size 2120 584 To use a subset of the collection for a given variable, use `select_range` method. This will limit the files in the cache to those that are within the @@ -537,7 +564,3 @@ def register_cache(ds): filename = ds.encoding["source"] fc.add_file(filename) return ds - - -datapath = "/work/ba1103/a270073/out/awicm-1.0-recom/awi-esm-1-1-lr_kh800/piControl/outdata/fesom" -# filepat = "CO2f_fesom_*nc" diff --git a/src/pymorize/frequency.py b/src/pymorize/frequency.py index 55c6314..23c3e09 100644 --- a/src/pymorize/frequency.py +++ b/src/pymorize/frequency.py @@ -11,7 +11,7 @@ >>> freq = Frequency("day", 1.0) >>> print(freq.name) -'day' +day >>> print(freq.approx_interval) 1.0 >>> print(freq.time_method) @@ -28,7 +28,7 @@ >>> freq = Frequency.for_name("day") >>> print(freq.name) -'day' +day """ from enum import Enum diff --git a/src/pymorize/gather_inputs.py b/src/pymorize/gather_inputs.py index f8806fc..a8f7667 100644 --- a/src/pymorize/gather_inputs.py +++ b/src/pymorize/gather_inputs.py @@ -84,12 +84,13 @@ def _input_pattern_from_env(config: dict) -> re.Pattern: re.compile('.*') >>> bool(pattern.match('test')) True - >>> pattern = _input_pattern_from_env(config_only_env_name) >>> os.environ["CMOR_PATTERN"] = "test*nc" + >>> pattern = _input_pattern_from_env(config_only_env_name) >>> pattern re.compile('test*nc') >>> bool(pattern.match('test')) - True + False + >>> del os.environ["CMOR_PATTERN"] >>> pattern = _input_pattern_from_env(config_only_env_value) >>> pattern re.compile('.*') @@ -154,8 +155,9 @@ def _resolve_symlinks(files: List[pathlib.Path]) -> List[pathlib.Path]: -------- >>> from pathlib import Path >>> files = [Path('/path/to/file1'), Path('/path/to/file2')] - >>> _resolve_symlinks(files) - [Path('/path/to/file1'), Path('/path/to/file2')] + >>> paths = _resolve_symlinks(files) + >>> [str(p) for p in paths] # Convert to strings for doctest + ['/path/to/file1', '/path/to/file2'] """ if not all(isinstance(f, pathlib.Path) for f in files): logger.error("All files must be pathlib.Path objects. Got the following:") @@ -296,62 +298,8 @@ def gather_inputs(config: dict) -> dict: config: The configuration dictionary with the input files added. - - Examples - -------- - Assuming a filesystem with:: - - /path/to/input/files/test2000.nc - /path/to/input/files/test2001.nc - /path/to/input/files/test2002.nc - /path/to/input/files/test2003.nc - /path/to/input/files/test2004.nc - /path/to/input/files/test2005.nc - /path/to/input/files/test2006.nc - /path/to/input/files/test2007.nc - /path/to/input/files/test2008.nc - /path/to/input/files/test2009.nc - /path/to/input/files/test2010.nc - - >>> config = { - ... "rules": [ - ... { - ... "input_patterns": [ - ... "/path/to/input/files/test*nc" - ... ], - ... "year_start": 2000, - ... "year_end": 2010 - ... } - ... ] - ... } - >>> gather_inputs(config) - { - "rules": [ - { - "input_patterns": [ - "/path/to/input/files/test*nc" - ], - "year_start": 2000, - "year_end": 2010, - "input_files": { - "/path/to/input/files/test*nc": [ - "/path/to/input/files/test2000.nc", - "/path/to/input/files/test2001.nc", - "/path/to/input/files/test2002.nc", - "/path/to/input/files/test2003.nc", - "/path/to/input/files/test2004.nc", - "/path/to/input/files/test2005.nc", - "/path/to/input/files/test2006.nc", - "/path/to/input/files/test2007.nc", - "/path/to/input/files/test2008.nc", - "/path/to/input/files/test2009.nc", - "/path/to/input/files/test2010.nc" - ], - } - } - ] - } """ + # NOTE(PG): Example removed from docstring as it is scheduled for deprecation. rules = config.get("rules", []) for rule in rules: input_patterns = rule.get("input_patterns", []) diff --git a/tests/conftest.py b/tests/conftest.py deleted file mode 100644 index 5d19748..0000000 --- a/tests/conftest.py +++ /dev/null @@ -1,21 +0,0 @@ -import os - -from tests.utils.constants import TEST_ROOT - -pytest_plugins = [ - "tests.fixtures.configs", - "tests.fixtures.environment", - "tests.fixtures.fake_filesystem", - "tests.fixtures.sample_rules", - "tests.fixtures.config_files", - "tests.fixtures.CV_Dir", - "tests.fixtures.CMIP_Tables_Dir", -] - - -def pytest_unconfigure(config): - - # Remove all json files - for file in os.listdir(): - if "pipeline" in file and file.endswith(".json"): - os.remove(file) diff --git a/tests/data/dummy_data/random1.nc b/tests/data/dummy_data/random1.nc new file mode 100644 index 0000000..0c993d3 Binary files /dev/null and b/tests/data/dummy_data/random1.nc differ diff --git a/tests/data/dummy_data/random2.nc b/tests/data/dummy_data/random2.nc new file mode 100644 index 0000000..6181376 Binary files /dev/null and b/tests/data/dummy_data/random2.nc differ