Skip to content

Commit

Permalink
Merge pull request #279 from Deltares/278-prevent-inconsistent-chunks…
Browse files Browse the repository at this point in the history
…-error-in-xumerge_partitions

avoid inconsistent chunks error in `merge_partitions()`
  • Loading branch information
Huite authored Aug 14, 2024
2 parents dc5f113 + 34454b4 commit 63a5802
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 16 deletions.
23 changes: 15 additions & 8 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,22 @@ Fixed
all values are NaN or when all weights (overlaps) are zero, and all methods
give the same answer irrespective of the order in which the values are
encountered.

- :meth:`xugrid.merge_partitions` will now raise a ValueError if zero
partitions are provided.
- :meth:`xugrid.merge_partitions` will no longer error when chunks are
inconsistent across variables in a dataset, but now returns a merged dataset
while keeping the chunking per variable. (Note that if chunks are inconstent
for a variable **across partitions** that they are still and always unified
for the variable.)

Added
~~~~~

- Percentiles (5, 10, 25, 50, 75, 90, 95) have been added to the
:class:`xugrid.OverlapRegridder` as standard available reduction methods
(available as ``"p5", "p10"``, etc.). Custom percentile values (e.g. 2.5, 42) can be
setup using :meth:`xugrid.OverlapRegridder.create_percentile_method`.

Changed
~~~~~~~

Expand Down Expand Up @@ -77,7 +84,7 @@ Fixed
Added
~~~~~

- Included ``edge_node_connectivity`` in :meth:`xugrid.Ugrid2d.from_meshkernel`,
- Included ``edge_node_connectivity`` in :meth:`xugrid.Ugrid2d.from_meshkernel`,
so the ordering of edges is consistent with ``meshkernel``.
- Added :meth:`xugrid.Ugrid1d.create_data_array`,
:meth:`xugrid.Ugrid2d.create_data_array`, and
Expand All @@ -86,7 +93,7 @@ Added
- Added :func:`xugrid.create_snap_to_grid_dataframe` to provide
more versatile snapping, e.g. with custom reductions to assign_edge_coords
aggregated properties to grid edges.

Changed
~~~~~~~

Expand All @@ -104,10 +111,10 @@ Fixed
:class:`xugrid.CentroidLocatorRegridder`, :class:`xugrid.OverlapRegridder`,
:class:`xugrid.RelativeOverlapRegridder`, which gave the method the tendency
to repeat the first value in the source grid across the target grid.

Added
~~~~~

- :func:`xugrid.earcut_triangulate_polygons` and
:meth:`xugrid.Ugrid2d.earcut_triangulate_polygons` have been added to break
down polygon geodataframes into a triangular mesh for further processing.
Expand Down Expand Up @@ -245,7 +252,7 @@ Fixed
original grid being returned, rather than a new grid with the faces or edges
shuffled. This breaks the link the between topology and data when using
``.isel`` on a UgridDataset or UgridDataArray. This has been fixed: both data
and the topology are now shuffled accordingly.
and the topology are now shuffled accordingly.

[0.6.5] 2023-09-30
------------------
Expand All @@ -270,7 +277,7 @@ Changed
- Xugrid now contains a partial copy of the xarray plot utils module, and its
tests. The latest xarray release broke xugrid (on import), since (private)
parts of xarray were used which no longer existed.

Fixed
~~~~~

Expand Down
46 changes: 46 additions & 0 deletions tests/test_partitioning.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,27 @@ def test_labels_to_indices():
assert np.array_equal(indices[2], [3, 4])


def test_single_ugrid_chunk():
grid = generate_mesh_2d(3, 3)
ugrid_dims = set(grid.dimensions)
da = xr.DataArray(np.ones(grid.n_face), dims=(grid.face_dimension,))
assert pt.single_ugrid_chunk(da, ugrid_dims) is da

da = da.chunk({grid.face_dimension: (3, 3, 3)})
single = pt.single_ugrid_chunk(da, ugrid_dims)
assert single.chunks == ((9,),)

# Don't touch other dims
da_time = (
xr.DataArray(data=np.ones(3), dims=("time",)).chunk({"time": (1, 1, 1)}) * da
)
single = pt.single_ugrid_chunk(da_time, ugrid_dims)
assert single.chunks == (
(1, 1, 1),
(9,),
)


class TestGridPartitioning:
@pytest.fixture(autouse=True)
def setup(self):
Expand Down Expand Up @@ -178,6 +199,31 @@ def test_merge_partitions_no_duplicates(self):
merged = pt.merge_partitions([part1, part2])
assert np.bincount(merged["face_z"] == 1).all()

def test_merge_inconsistent_chunks_across_partitions(self):
part1, part2 = self.uds.ugrid.partition(n_part=2)
time = xr.DataArray(data=np.ones(3), dims=("time",))
part1 = (part1 * time).chunk({"time": (1, 1, 1)})
part2 = (part2 * time).chunk({"time": (1, 2)})
merged = pt.merge_partitions([part1, part2])
assert isinstance(merged, xu.UgridDataset)
assert merged.chunks["time"] == (1, 1, 1)

def test_merge_inconsistent_chunks_across_variables(self):
uds = self.uds * xr.DataArray(data=np.ones(3), dims=("time",))
# Make them inconsistent across the variables
uds["node_z"] = uds["node_z"].chunk({"time": (3,)})
uds["edge_z"] = uds["edge_z"].chunk({"time": (2, 1)})
uds["face_z"] = uds["face_z"].chunk({"time": (1, 2)})
part1, part2 = uds.ugrid.partition(n_part=2)
merged = pt.merge_partitions([part1, part2])
# Test that it runs without encountering the xarray "inconsistent
# chunks" ValueError.
assert isinstance(merged, xu.UgridDataset)
# Make sure they remain inconsistent after merging.
assert uds["node_z"].chunks == ((self.grid.n_node,), (3,))
assert uds["edge_z"].chunks == ((self.grid.n_edge,), (2, 1))
assert uds["face_z"].chunks == ((self.grid.n_face,), (1, 2))


class TestMultiTopology2DMergePartitions:
@pytest.fixture(autouse=True)
Expand Down
35 changes: 27 additions & 8 deletions xugrid/ugrid/partitioning.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,26 @@ def merge_data_along_dim(
return xr.concat(to_merge, dim=merge_dim)


def single_ugrid_chunk(da: xr.DataArray, ugrid_dims: set[str]) -> xr.DataArray:
"""
Ensure that along a UGRID dimension only a single chunk is defined.
Preserving chunks on the merged result may generate a very sub-optimal,
complex dask graph.
"""
if not da.chunks:
return da

chunks = {}
for dim, sizes in zip(da.dims, da.chunks):
# Define a single chunk for each UGRID dimension.
if dim in ugrid_dims:
chunks[dim] = (da.sizes[dim],)
else:
chunks[dim] = sizes
return da.chunk(chunks)


def merge_partitions(partitions, merge_ugrid_chunks: bool = True):
"""
Merge topology and data, partitioned along UGRID dimensions, into a single
Expand Down Expand Up @@ -382,13 +402,12 @@ def merge_partitions(partitions, merge_ugrid_chunks: bool = True):
)
merged.update(merged_selection)

# Merge chunks along the UGRID dimensions.
if merged.chunks and merge_ugrid_chunks:
chunks = dict(merged.chunks)
for dim in chunks:
# Define a single chunk for each UGRID dimension.
if dim in ugrid_dims:
chunks[dim] = (merged.sizes[dim],)
merged = merged.chunk(chunks)
# Merge chunks along the UGRID dimensions
if merge_ugrid_chunks:
# Running `merged[varname] = da` will cause xarray to auto-align. This
# is quite expensive, especially when there are many variables. Setting
# it via `._variables` sidesteps the alignment.
for varname, da in merged._variables.items():
merged._variables[varname] = single_ugrid_chunk(da, ugrid_dims)

return UgridDataset(merged, merged_grids)

0 comments on commit 63a5802

Please sign in to comment.