Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 99 #120

Merged
merged 5 commits into from
Jul 24, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
grid_data_c_2d,
grid_data_d,
grid_data_d_1d,
grid_data_dask_expected,
grid_data_dask_expected_layered,
grid_data_dask_source,
grid_data_dask_source_layered,
grid_data_dask_target,
grid_data_e,
grid_data_e_1d,
quads_0_25,
Expand Down
72 changes: 72 additions & 0 deletions tests/fixtures/fixture_regridder.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import dask.array as da
import numpy as np
import pytest
import xarray as xr
Expand Down Expand Up @@ -337,3 +338,74 @@ def expected_results_linear():
"dy": -50.0,
},
)


@pytest.fixture(scope="function")
def grid_data_dask_source():
data = np.arange(10000).reshape((100, 100))
data = da.from_array(data, chunks=(10, (10, 30, 60)))
return xr.DataArray(
data=data,
dims=["y", "x"],
coords={
"y": -np.arange(100),
"x": np.arange(100),
"dx": 1.0,
"dy": -1.0,
},
)


@pytest.fixture(scope="function")
def grid_data_dask_source_layered():
data = np.arange(30000).reshape((3, 100, 100))
data = da.from_array(data, chunks=(3, 10, (10, 30, 60)))
return xr.DataArray(
data=data,
dims=["layer", "y", "x"],
coords={
"layer": np.arange(3),
"y": -np.arange(100),
"x": np.arange(100),
"dx": 1.0,
"dy": -1.0,
},
)


@pytest.fixture(scope="function")
def grid_data_dask_target():
data = np.zeros(100).reshape((10, 10))
data = da.from_array(data, chunks=(10, (5, 5)))
return xr.DataArray(
data=data,
dims=["y", "x"],
coords={
"y": -np.arange(10) * 10,
"x": np.arange(10) * 10,
"dx": 10.0,
"dy": -10.0,
},
)


@pytest.fixture(scope="function")
def grid_data_dask_expected():
data1 = np.tile(np.arange(0.0, 100.0, 10.0), reps=10).reshape((10, 10))
data2 = np.repeat(np.arange(0.0, 10000.0, 1000.0), repeats=10).reshape((10, 10))
data = data1 + data2
return xr.DataArray(
data=data,
dims=["y", "x"],
coords={
"y": -np.arange(10) * 10,
"x": np.arange(10) * 10,
"dx": 10.0,
"dy": -10.0,
},
)


@pytest.fixture(scope="function")
def grid_data_dask_expected_layered(grid_data_dask_expected):
return grid_data_dask_expected.expand_dims(dim={"layer": np.arange(3)})
21 changes: 21 additions & 0 deletions tests/test_regrid/test_regridder.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,3 +192,24 @@ def test_regridder_from_dataset(cls, disk, quads_1):
new_regridder = cls.from_dataset(dataset)
new_result = new_regridder.regrid(disk)
assert np.array_equal(new_result.values, result.values, equal_nan=True)


def test_regridder_daks_arrays(
grid_data_dask_source,
grid_data_dask_source_layered,
grid_data_dask_target,
grid_data_dask_expected,
grid_data_dask_expected_layered,
):
regridder = CentroidLocatorRegridder(
source=grid_data_dask_source, target=grid_data_dask_target
)
result = regridder.regrid(grid_data_dask_source)
assert result.equals(grid_data_dask_expected)

# with broadcasting
regridder = CentroidLocatorRegridder(
source=grid_data_dask_source_layered, target=grid_data_dask_target
)
result = regridder.regrid(grid_data_dask_source_layered)
assert result.isel(layer=0).equals(grid_data_dask_expected_layered.isel(layer=0))
26 changes: 15 additions & 11 deletions xugrid/regrid/regridder.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,12 @@
import dask.array

DaskArray = dask.array.Array
DaskRechunk = dask.array.rechunk
DaskReshape = dask.array.reshape
except ImportError:
DaskArray = ()
DaskRechunk = ()
DaskReshape = ()

import xugrid
from xugrid.constants import FloatArray
Expand Down Expand Up @@ -144,8 +148,14 @@ def _regrid_array(self, source):
source = source.reshape((-1, source_grid.size))

size = self._target.size
# E.g.: sizes of ("time", "layer") + ("y", "x")
out_shape = first_dims_shape + self._target.shape

JoerivanEngelen marked this conversation as resolved.
Show resolved Hide resolved
if isinstance(source, DaskArray):
chunks = source.chunks[: -source_grid.ndim] + (self._target.shape,)
# for DaskArray's from multiple partitions, rechunk first to single size per dimension
# for now always rechunk, could be optional only when explicit chunks in single dimension
source = DaskRechunk(source, source.shape)
chunks = source.chunks[: -source_grid.ndim] + (self._target.shape)
JoerivanEngelen marked this conversation as resolved.
Show resolved Hide resolved
out = dask.array.map_blocks(
self._regrid, # func
source, # *args
Expand All @@ -155,17 +165,16 @@ def _regrid_array(self, source):
chunks=chunks,
meta=np.array((), dtype=source.dtype),
)
out = out.compute().reshape(out_shape)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mind that the call to compute results that the results are loaded into memory. Not sure if that fits each use case. For now it is fine, we can optimize this later. Perhaps add a #TODO comment to later investigate this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes that is possible suboptimal. I tried to use the dask.array.reshape() first, but that does not work for splitting up dimensions unevenly (https://docs.dask.org/en/stable/_modules/dask/array/reshape.html). So I used there suggestion to compute first. But definitely a TODO.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah thanks for the info, a proper struggle with reshaping! We can resolve this conversation when the TODO is added, and I think we can make an issue for this!

elif isinstance(source, np.ndarray):
out = self._regrid(source, self._weights, size)
out = out.reshape(out_shape)
else:
raise TypeError(
"Expected dask.array.Array or numpy.ndarray. Received: "
f"{type(source).__name__}"
)

# E.g.: sizes of ("time", "layer") + ("y", "x")
out_shape = first_dims_shape + self._target.shape
return out.reshape(out_shape)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are you calling the reshape in the if else tree above, instead of at the end like it was? The original version appears to be cleaner to me, as reshape only has to be mentioned once, instead of twice after your alterations.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because the reshape does not work for a dask-array. For the dask-array I first used the dask.array.reshape(), which gives the result that we want. For the current implementation (see reply above) I could move it back indeed.

Copy link
Contributor

@JoerivanEngelen JoerivanEngelen Jul 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah an understandable struggle with dask.array.reshape :-) I would prefer if you moved these lines back down because we can leave the code as it was now that the call to dask.array.compute was introduced.

return out

def regrid_dataarray(self, source: xr.DataArray, source_dims: Tuple[str]):
# Do not set vectorize=True: numba will run the for loop more
Expand Down Expand Up @@ -203,12 +212,7 @@ def regrid(self, object) -> UgridDataArray:
if type(self._target) is StructuredGrid2d:
source_dims = ("y", "x")
regridded = self.regrid_dataarray(object, source_dims)
regridded = regridded.assign_coords(
coords={
"y": np.flip(self._target.ybounds.midpoints),
"x": self._target.xbounds.midpoints,
}
)
regridded = regridded.assign_coords(coords=self._target.coords)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are the y coordinates still flipped in this implementation?

It is important to have y coordinates in descending order

I see you defined a new property coords, but I don't see it referring to midpoints, which I think is what we want?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added the coords property to the StructuredGrid- class. There it uses the actual grid properties directly and not the possible flipped midpoints. This implementation was needed to get the right dx/dy from the target-grid.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good, just to double check: This doesn't result in grids which are flipped from descending y to ascending y coordinates?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No it does not indeed. Only the midpoints arrays are fliped for computing the proper weights. The original coords are untouched and now used. I double checked myself + it is also tested in the testbench

return regridded
else:
source_dims = (object.ugrid.grid.face_dimension,)
Expand Down
15 changes: 14 additions & 1 deletion xugrid/regrid/structured.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,16 @@ def __init__(self, obj: Union[xr.DataArray, xr.Dataset], name: str):
self.bounds = bounds
self.flipped = flipped
self.side = side
self.grid = obj
self.dname = size_name
self.dvalue = obj[size_name].values
self.index = obj.indexes[name].values

@property
def coords(self) -> dict:
return {
self.name: self.index,
self.dname: self.dvalue,
}

@property
def ndim(self) -> int:
Expand Down Expand Up @@ -420,6 +429,10 @@ def __init__(
self.xbounds = StructuredGrid1d(obj, name_x)
self.ybounds = StructuredGrid1d(obj, name_y)

@property
def coords(self) -> dict:
return self.ybounds.coords | self.xbounds.coords

@property
def ndim(self) -> int:
return 2
Expand Down
Loading