Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 99 #120

Merged
merged 5 commits into from
Jul 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
grid_data_c_2d,
grid_data_d,
grid_data_d_1d,
grid_data_dask_expected,
grid_data_dask_expected_layered,
grid_data_dask_source,
grid_data_dask_source_layered,
grid_data_dask_target,
grid_data_e,
grid_data_e_1d,
quads_0_25,
Expand Down
72 changes: 72 additions & 0 deletions tests/fixtures/fixture_regridder.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import dask.array as da
import numpy as np
import pytest
import xarray as xr
Expand Down Expand Up @@ -337,3 +338,74 @@ def expected_results_linear():
"dy": -50.0,
},
)


@pytest.fixture(scope="function")
def grid_data_dask_source():
data = np.arange(10000).reshape((100, 100))
data = da.from_array(data, chunks=(10, (10, 30, 60)))
return xr.DataArray(
data=data,
dims=["y", "x"],
coords={
"y": -np.arange(100),
"x": np.arange(100),
"dx": 1.0,
"dy": -1.0,
},
)


@pytest.fixture(scope="function")
def grid_data_dask_source_layered():
data = np.arange(30000).reshape((3, 100, 100))
data = da.from_array(data, chunks=(3, 10, (10, 30, 60)))
return xr.DataArray(
data=data,
dims=["layer", "y", "x"],
coords={
"layer": np.arange(3),
"y": -np.arange(100),
"x": np.arange(100),
"dx": 1.0,
"dy": -1.0,
},
)


@pytest.fixture(scope="function")
def grid_data_dask_target():
data = np.zeros(100).reshape((10, 10))
data = da.from_array(data, chunks=(10, (5, 5)))
return xr.DataArray(
data=data,
dims=["y", "x"],
coords={
"y": -np.arange(10) * 10,
"x": np.arange(10) * 10,
"dx": 10.0,
"dy": -10.0,
},
)


@pytest.fixture(scope="function")
def grid_data_dask_expected():
data1 = np.tile(np.arange(0.0, 100.0, 10.0), reps=10).reshape((10, 10))
data2 = np.repeat(np.arange(0.0, 10000.0, 1000.0), repeats=10).reshape((10, 10))
data = data1 + data2
return xr.DataArray(
data=data,
dims=["y", "x"],
coords={
"y": -np.arange(10) * 10,
"x": np.arange(10) * 10,
"dx": 10.0,
"dy": -10.0,
},
)


@pytest.fixture(scope="function")
def grid_data_dask_expected_layered(grid_data_dask_expected):
return grid_data_dask_expected.expand_dims(dim={"layer": np.arange(3)})
21 changes: 21 additions & 0 deletions tests/test_regrid/test_regridder.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,3 +192,24 @@ def test_regridder_from_dataset(cls, disk, quads_1):
new_regridder = cls.from_dataset(dataset)
new_result = new_regridder.regrid(disk)
assert np.array_equal(new_result.values, result.values, equal_nan=True)


def test_regridder_daks_arrays(
grid_data_dask_source,
grid_data_dask_source_layered,
grid_data_dask_target,
grid_data_dask_expected,
grid_data_dask_expected_layered,
):
regridder = CentroidLocatorRegridder(
source=grid_data_dask_source, target=grid_data_dask_target
)
result = regridder.regrid(grid_data_dask_source)
assert result.equals(grid_data_dask_expected)

# with broadcasting
regridder = CentroidLocatorRegridder(
source=grid_data_dask_source_layered, target=grid_data_dask_target
)
result = regridder.regrid(grid_data_dask_source_layered)
assert result.isel(layer=0).equals(grid_data_dask_expected_layered.isel(layer=0))
19 changes: 11 additions & 8 deletions xugrid/regrid/regridder.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,17 @@ def _regrid_array(self, source):
source = source.reshape((-1, source_grid.size))

size = self._target.size

if isinstance(source, DaskArray):
chunks = source.chunks[: -source_grid.ndim] + (self._target.shape,)
# It's possible that the topology dimensions are chunked (e.g. from
# reading multiple partitions). The regrid operation does not
# support this, since we might need multiple source chunks for a
# single target chunk, which destroys the 1:1 relation between
# chunks. Here we ensure that the topology dimensions are contained
# in a single contiguous chunk.
contiguous_chunks = (source.chunks[0], (source.shape[-1],))
source = source.rechunk(contiguous_chunks)
chunks = source.chunks[:-1] + (self._target.size,)
out = dask.array.map_blocks(
self._regrid, # func
source, # *args
Expand All @@ -162,7 +171,6 @@ def _regrid_array(self, source):
"Expected dask.array.Array or numpy.ndarray. Received: "
f"{type(source).__name__}"
)

# E.g.: sizes of ("time", "layer") + ("y", "x")
out_shape = first_dims_shape + self._target.shape
return out.reshape(out_shape)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are you calling the reshape in the if else tree above, instead of at the end like it was? The original version appears to be cleaner to me, as reshape only has to be mentioned once, instead of twice after your alterations.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because the reshape does not work for a dask-array. For the dask-array I first used the dask.array.reshape(), which gives the result that we want. For the current implementation (see reply above) I could move it back indeed.

Copy link
Contributor

@JoerivanEngelen JoerivanEngelen Jul 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah an understandable struggle with dask.array.reshape :-) I would prefer if you moved these lines back down because we can leave the code as it was now that the call to dask.array.compute was introduced.

Expand Down Expand Up @@ -203,12 +211,7 @@ def regrid(self, object) -> UgridDataArray:
if type(self._target) is StructuredGrid2d:
source_dims = ("y", "x")
regridded = self.regrid_dataarray(object, source_dims)
regridded = regridded.assign_coords(
coords={
"y": np.flip(self._target.ybounds.midpoints),
"x": self._target.xbounds.midpoints,
}
)
regridded = regridded.assign_coords(coords=self._target.coords)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are the y coordinates still flipped in this implementation?

It is important to have y coordinates in descending order

I see you defined a new property coords, but I don't see it referring to midpoints, which I think is what we want?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added the coords property to the StructuredGrid- class. There it uses the actual grid properties directly and not the possible flipped midpoints. This implementation was needed to get the right dx/dy from the target-grid.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good, just to double check: This doesn't result in grids which are flipped from descending y to ascending y coordinates?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No it does not indeed. Only the midpoints arrays are fliped for computing the proper weights. The original coords are untouched and now used. I double checked myself + it is also tested in the testbench

return regridded
else:
source_dims = (object.ugrid.grid.face_dimension,)
Expand Down
15 changes: 14 additions & 1 deletion xugrid/regrid/structured.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,16 @@ def __init__(self, obj: Union[xr.DataArray, xr.Dataset], name: str):
self.bounds = bounds
self.flipped = flipped
self.side = side
self.grid = obj
self.dname = size_name
self.dvalue = obj[size_name].values
self.index = obj.indexes[name].values

@property
def coords(self) -> dict:
return {
self.name: self.index,
self.dname: self.dvalue,
}

@property
def ndim(self) -> int:
Expand Down Expand Up @@ -420,6 +429,10 @@ def __init__(
self.xbounds = StructuredGrid1d(obj, name_x)
self.ybounds = StructuredGrid1d(obj, name_y)

@property
def coords(self) -> dict:
return self.ybounds.coords | self.xbounds.coords

@property
def ndim(self) -> int:
return 2
Expand Down