Skip to content

Commit

Permalink
use xr directly
Browse files Browse the repository at this point in the history
  • Loading branch information
malmans2 committed May 28, 2024
1 parent de1aae1 commit 2208ba8
Showing 1 changed file with 28 additions and 15 deletions.
43 changes: 28 additions & 15 deletions c3s_eqc_automatic_quality_control/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,23 +432,36 @@ def _download_and_transform_requests(
preprocess = functools.partial(
_preprocess,
collection_id=collection_id,
preprocess=open_mfdataset_kwargs.get("preprocess", None),
preprocess=open_mfdataset_kwargs.pop("preprocess", None),
)
ek_ds = earthkit.data.from_source("file", sources)
if isinstance(ek_ds, GribFieldList):
# https://github.com/ecmwf/earthkit-data/issues/374
# squeeze=True is cfgrib default
open_dataset_kwargs = {"chunks": {}, "squeeze": True} | open_mfdataset_kwargs
ds = ek_ds.to_xarray(xarray_open_dataset_kwargs=open_dataset_kwargs)
ds = preprocess(ds)
elif isinstance(ek_ds, File) and isinstance(ek_ds._reader, CSVReader):
assert not open_mfdataset_kwargs
ds = preprocess(ek_ds.to_xarray())
else:

if all(
isinstance(source, str) and (".grib", ".grb", ".grb1", ".grb2")
for source in sources
):
open_mfdataset_kwargs["preprocess"] = preprocess
ds = ek_ds.to_xarray(xarray_open_mfdataset_kwargs=open_mfdataset_kwargs)
if not isinstance(ds, xr.Dataset):
raise TypeError(f"`earthkit.data` returned {type(ds)} instead of a xr.Dataset")
ds = xr.open_mfdataset(sources, **open_mfdataset_kwargs)
else:
ek_ds = earthkit.data.from_source("file", sources)
if isinstance(ek_ds, GribFieldList):
# https://github.com/ecmwf/earthkit-data/issues/374
# squeeze=True is cfgrib default
open_dataset_kwargs = {
"chunks": {},
"squeeze": True,
} | open_mfdataset_kwargs
ds = ek_ds.to_xarray(xarray_open_dataset_kwargs=open_dataset_kwargs)
ds = preprocess(ds)
elif isinstance(ek_ds, File) and isinstance(ek_ds._reader, CSVReader):
assert not open_mfdataset_kwargs
ds = preprocess(ek_ds.to_xarray())
else:
open_mfdataset_kwargs["preprocess"] = preprocess
ds = ek_ds.to_xarray(xarray_open_mfdataset_kwargs=open_mfdataset_kwargs)
if not isinstance(ds, xr.Dataset):
raise TypeError(
f"`earthkit.data` returned {type(ds)} instead of a xr.Dataset"
)

if transform_func is not None:
with cacholote.config.set(return_cache_entry=False):
Expand Down

0 comments on commit 2208ba8

Please sign in to comment.