Skip to content

Commit

Permalink
Merge pull request #53 from esm-tools/fix/doctests
Browse files Browse the repository at this point in the history
 Doctest Optimizations
  • Loading branch information
pgierz authored Nov 7, 2024
2 parents e937423 + 76b003f commit 994bb61
Show file tree
Hide file tree
Showing 11 changed files with 91 additions and 108 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/CI-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
with:
submodules: recursive
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
Expand All @@ -38,4 +38,6 @@ jobs:
- name: Test with pytest
run: |
pytest -v --cov
pytest -v --doctest-modules src/
- name: Test with doctest
run: |
PYTHONPATH=src pytest -v --doctest-modules src/
43 changes: 43 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import os
import re
from pathlib import Path

import pytest

from tests.utils.constants import TEST_ROOT

pytest_plugins = [
"tests.fixtures.configs",
"tests.fixtures.environment",
"tests.fixtures.fake_filesystem",
"tests.fixtures.sample_rules",
"tests.fixtures.config_files",
"tests.fixtures.CV_Dir",
"tests.fixtures.CMIP_Tables_Dir",
]


@pytest.hookimpl(tryfirst=True)
def pytest_collection_modifyitems(config, items):
for item in items:
if item.fspath and item.fspath.ext == ".py":
item.add_marker(pytest.mark.doctest)


@pytest.fixture(autouse=True)
def pathlib_doctest_directive(doctest_namespace):
"""Replace PosixPath/WindowsPath with Path in doc-test output."""
doctest_namespace["Path"] = Path

def path_replace(output):
"""Replace platform-specific Path output with generic Path in doc-tests."""
return re.sub(r"(PosixPath|WindowsPath)\((.*?)\)", r"Path(\2)", output)

doctest_namespace["path_replace"] = path_replace


def pytest_unconfigure(config):
"""Remove all JSON files containing 'pipeline' in their name."""
for file in os.listdir():
if "pipeline" in file and file.endswith(".json"):
os.remove(file)
2 changes: 2 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[pytest]
doctest_optionflags = NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL ELLIPSIS
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def read(filename):
"deprecation",
"distributed",
"dpath",
"flexparser < 0.4", # NOTE(PG): See https://tinyurl.com/ypf99xnh
"flox",
"imohash",
"numbagg",
Expand Down
19 changes: 2 additions & 17 deletions src/pymorize/calendar.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,6 @@
>>> year_bounds = year_bounds_major_digits(2000, 2010, 2, 2)
>>> print(year_bounds)
[[2000, 2001], [2002, 2003], [2004, 2005], [2006, 2007], [2008, 2009], [2010, 2010]]
>>> date_range = date_ranges_from_bounds(year_bounds, freq="Y")
>>> print(date_range)
([Timestamp('2000-12-31 00:00:00', freq='A-DEC'), Timestamp('2001-12-31 00:00:00', freq='A-DEC')], [Timestamp('2002-12-31 00:00:00', freq='A-DEC'), Timestamp('2003-12-31 00:00:00', freq='A-DEC')], [Timestamp('2004-12-31 00:00:00', freq='A-DEC'), Timestamp('2005-12-31 00:00:00', freq='A-DEC')], [Timestamp('2006-12-31 00:00:00', freq='A-DEC'), Timestamp('2007-12-31 00:00:00', freq='A-DEC')], [Timestamp('2008-12-31 00:00:00', freq='A-DEC'), Timestamp('2009-12-31 00:00:00', freq='A-DEC')], [Timestamp('2010-12-31 00:00:00', freq='A-DEC')])
>>> date_range = date_ranges_from_year_bounds(year_bounds, freq="Y")
>>> print(date_range)
([Timestamp('2000-12-31 00:00:00', freq='A-DEC'), Timestamp('2001-12-31 00:00:00', freq='A-DEC')], [Timestamp('2002-12-31 00:00:00', freq='A-DEC'), Timestamp('2003-12-31 00:00:00', freq='A-DEC')], [Timestamp('2004-12-31 00:00:00', freq='A-DEC'), Timestamp('2005-12-31 00:00:00', freq='A-DEC')], [Timestamp('2006-12-31 00:00:00', freq='A-DEC'), Timestamp('2007-12-31 00:00:00', freq='A-DEC')], [Timestamp('2008-12-31 00:00:00', freq='A-DEC'), Timestamp('2009-12-31 00:00:00', freq='A-DEC')], [Timestamp('2010-12-31 00:00:00', freq='A-DEC')])
"""

import pendulum
Expand Down Expand Up @@ -157,16 +149,9 @@ def date_ranges_from_bounds(bounds, freq: str = "M", **kwargs):
Examples
--------
>>> bounds = [("2020-01-01", "2020-01-31"), ("2020-02-01", "2020-02-29")]
>>> date_ranges = date_ranges_from_bounds(bounds)
>>> print(date_ranges)
(DatetimeIndex(['2020-01-01', '2020-01-02', ..., '2020-01-31'], dtype='datetime64[ns]', freq='D'),
DatetimeIndex(['2020-02-01', '2020-02-02', ..., '2020-02-29'], dtype='datetime64[ns]', freq='D'))
>>> bounds = [("2020-01-01", "2020-12-31")]
>>> date_ranges = date_ranges_from_bounds(bounds, freq="M")
>>> print(date_ranges)
(DatetimeIndex(['2020-01-31', '2020-02-29', ..., '2020-12-31'], dtype='datetime64[ns]', freq='M'),)
>>> date_ranges_from_bounds(bounds, freq="M")
DatetimeIndex(['2020-01-31', '2020-02-29', ..., '2020-12-31'], dtype='datetime64[ns]', freq='ME')
"""
objs = []
for start, end in bounds:
Expand Down
37 changes: 30 additions & 7 deletions src/pymorize/filecache.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,31 @@
.. code-block:: python
>>> filepath = "/pool/data/CO2f_fesom_1850-01-01_1900-01-01.nc"
>>> filepath = "tests/data/test_experiments/my_expid/outdata/fesom/volo.nc"
>>> cache.add_file(filepath)
>>> # adding multiple files at once
>>> cache.add_files(["/path/to/file1.nc", "/path/to/file2.nc"])
>>> cache.add_files(["tests/data/dummy_data/random1.nc", "tests/data/dummy_data/random2.nc"])
You can access the metadata of a file in the cache using the `get` method:
.. code-block:: python
>>> filepath = "tests/data/test_experiments/my_expid/outdata/fesom/volo.nc"
>>> # alternative way of adding file to cache and getting the metadata is by usuig the `get` method
>>> cache.get("filepath")
>>> cache.get(filepath)
filepath tests/data/test_experiments/my_expid/outdata/f...
filename volo.nc
checksum imohash:c8047bbd7e292dbe54a6387611f500c4
filesize 584
mtime ...
start 1951-01-02 00:00:00
end 1951-01-13 00:00:00
timespan 11 days, 0:00:00
freq D
steps 12
variable volo
units m3
Name: 0, dtype: object
For an overview of the cached data, use `summary` method: This method returns a
Expand All @@ -55,6 +74,14 @@
.. code-block:: python
>>> cache.summary()
variable seq volo
freq D D
start 0001-01-01 00:00:00 1951-01-02 00:00:00
end 0001-01-11 00:00:00 1951-01-13 00:00:00
timespan 10 days 00:00:00 11 days 00:00:00
nfiles 2 1
steps 11 12
size 2120 584
To use a subset of the collection for a given variable, use `select_range`
method. This will limit the files in the cache to those that are within the
Expand Down Expand Up @@ -537,7 +564,3 @@ def register_cache(ds):
filename = ds.encoding["source"]
fc.add_file(filename)
return ds


datapath = "/work/ba1103/a270073/out/awicm-1.0-recom/awi-esm-1-1-lr_kh800/piControl/outdata/fesom"
# filepat = "CO2f_fesom_*nc"
4 changes: 2 additions & 2 deletions src/pymorize/frequency.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
>>> freq = Frequency("day", 1.0)
>>> print(freq.name)
'day'
day
>>> print(freq.approx_interval)
1.0
>>> print(freq.time_method)
Expand All @@ -28,7 +28,7 @@
>>> freq = Frequency.for_name("day")
>>> print(freq.name)
'day'
day
"""

from enum import Enum
Expand Down
66 changes: 7 additions & 59 deletions src/pymorize/gather_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,13 @@ def _input_pattern_from_env(config: dict) -> re.Pattern:
re.compile('.*')
>>> bool(pattern.match('test'))
True
>>> pattern = _input_pattern_from_env(config_only_env_name)
>>> os.environ["CMOR_PATTERN"] = "test*nc"
>>> pattern = _input_pattern_from_env(config_only_env_name)
>>> pattern
re.compile('test*nc')
>>> bool(pattern.match('test'))
True
False
>>> del os.environ["CMOR_PATTERN"]
>>> pattern = _input_pattern_from_env(config_only_env_value)
>>> pattern
re.compile('.*')
Expand Down Expand Up @@ -154,8 +155,9 @@ def _resolve_symlinks(files: List[pathlib.Path]) -> List[pathlib.Path]:
--------
>>> from pathlib import Path
>>> files = [Path('/path/to/file1'), Path('/path/to/file2')]
>>> _resolve_symlinks(files)
[Path('/path/to/file1'), Path('/path/to/file2')]
>>> paths = _resolve_symlinks(files)
>>> [str(p) for p in paths] # Convert to strings for doctest
['/path/to/file1', '/path/to/file2']
"""
if not all(isinstance(f, pathlib.Path) for f in files):
logger.error("All files must be pathlib.Path objects. Got the following:")
Expand Down Expand Up @@ -296,62 +298,8 @@ def gather_inputs(config: dict) -> dict:
config:
The configuration dictionary with the input files added.
Examples
--------
Assuming a filesystem with::
/path/to/input/files/test2000.nc
/path/to/input/files/test2001.nc
/path/to/input/files/test2002.nc
/path/to/input/files/test2003.nc
/path/to/input/files/test2004.nc
/path/to/input/files/test2005.nc
/path/to/input/files/test2006.nc
/path/to/input/files/test2007.nc
/path/to/input/files/test2008.nc
/path/to/input/files/test2009.nc
/path/to/input/files/test2010.nc
>>> config = {
... "rules": [
... {
... "input_patterns": [
... "/path/to/input/files/test*nc"
... ],
... "year_start": 2000,
... "year_end": 2010
... }
... ]
... }
>>> gather_inputs(config)
{
"rules": [
{
"input_patterns": [
"/path/to/input/files/test*nc"
],
"year_start": 2000,
"year_end": 2010,
"input_files": {
"/path/to/input/files/test*nc": [
"/path/to/input/files/test2000.nc",
"/path/to/input/files/test2001.nc",
"/path/to/input/files/test2002.nc",
"/path/to/input/files/test2003.nc",
"/path/to/input/files/test2004.nc",
"/path/to/input/files/test2005.nc",
"/path/to/input/files/test2006.nc",
"/path/to/input/files/test2007.nc",
"/path/to/input/files/test2008.nc",
"/path/to/input/files/test2009.nc",
"/path/to/input/files/test2010.nc"
],
}
}
]
}
"""
# NOTE(PG): Example removed from docstring as it is scheduled for deprecation.
rules = config.get("rules", [])
for rule in rules:
input_patterns = rule.get("input_patterns", [])
Expand Down
21 changes: 0 additions & 21 deletions tests/conftest.py

This file was deleted.

Binary file added tests/data/dummy_data/random1.nc
Binary file not shown.
Binary file added tests/data/dummy_data/random2.nc
Binary file not shown.

0 comments on commit 994bb61

Please sign in to comment.