Add extra edge cases to snap to grid (#263)

* Add extra edge cases to snap to grid * Fix snapping index (#267) * Node index != line index; every N segments has N + 1 nodes! * Add create_snap_to_grid_dataframe function * Put snap_to_grid cases in pytest cases * Fix code in docstring example * Add case single line at edge * Update test for numba_celltree 0.1.8 fix --------- Co-authored-by: Huite <[email protected]>
Deltares · Aug 5, 2024 · f21165f · f21165f
1 parent 84b2e80
commit f21165f
Show file tree

Hide file tree

Showing 7 changed files with 3,585 additions and 3,389 deletions.
diff --git a/docs/api.rst b/docs/api.rst
@@ -143,6 +143,7 @@ nodes/edges/faces if within a certain range.
       :toctree: api/
 
        snap_to_grid
+       create_snap_to_grid_dataframe
 
 
 Regridding

diff --git a/docs/changelog.rst b/docs/changelog.rst
@@ -21,6 +21,9 @@ Fixed
   setting ``merge_ugrid_chunks=False``. This keyword will likely be deprecated
   in the future as merging the UGRID dimension chunks should be superior for
   (almost all?) subsquent operations.
+- :func:`xugrid.snap_to_grid` now returns proper line indexes when multiple
+  linestrings are snapped. Snapping previously could result in correct
+  linestring locations, but wrong line indexes.
 
 Added
 ~~~~~
@@ -31,7 +34,10 @@ Added
   :meth:`xugrid.Ugrid2d.create_data_array`, and
   :meth:`xugrid.UgridDataArray.from_data` to more easily instantiate a
   UgridDataArray from a grid topology and an array of values.
-
+- Added :func:`xugrid.create_snap_to_grid_dataframe` to provide
+  more versatile snapping, e.g. with custom reductions to assign_edge_coords
+  aggregated properties to grid edges.
+
 Changed
 ~~~~~~~
 

diff --git a/pixi.lock b/pixi.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -81,7 +81,7 @@ geopandas = "*"
 mapbox_earcut = "*"
 matplotlib-base = "*"
 netcdf4 = "*"
-numba_celltree = "*"
+numba_celltree = ">=0.1.8"
 numpy = "<2.0"
 pip = "*"
 pooch = "*"
@@ -90,6 +90,7 @@ pydata-sphinx-theme = "*"
 pymetis = "*"
 pyproj = "*"
 pytest = "*"
+pytest-cases = "*"
 pytest-cov = "*"
 python = ">=3.9"
 ruff = "*"

diff --git a/tests/test_snap.py b/tests/test_snap.py
@@ -4,9 +4,34 @@
 import shapely
 import shapely.geometry as sg
 import xarray as xr
+from pytest_cases import parametrize_with_cases
 
 import xugrid as xu
-from xugrid.ugrid.snapping import snap_nodes, snap_to_grid, snap_to_nodes
+from xugrid.ugrid.snapping import (
+    create_snap_to_grid_dataframe,
+    snap_nodes,
+    snap_to_grid,
+    snap_to_nodes,
+)
+
+
+@pytest.fixture(scope="function")
+def structured():
+    shape = nlay, nrow, ncol = 3, 9, 9
+    dx = 10.0
+    dy = -10.0
+    xmin = 0.0
+    xmax = dx * ncol
+    ymin = 0.0
+    ymax = abs(dy) * nrow
+    dims = ("layer", "y", "x")
+
+    layer = np.arange(1, nlay + 1)
+    y = np.arange(ymax, ymin, dy) + 0.5 * dy
+    x = np.arange(xmin, xmax, dx) + 0.5 * dx
+    coords = {"layer": layer, "y": y, "x": x}
+
+    return xr.DataArray(np.ones(shape, dtype=np.int32), coords=coords, dims=dims)
 
 
 def test_snap__three_points():
@@ -125,35 +150,138 @@ def test_snap_to_grid():
     # TODO test for returned values...
 
 
-def test_snap_to_grid_with_data():
-    # This caused a failure in 0.6.3
+class LineCases:
+    def case_single_line(self):
+        line_x = [40.2, 40.2, 40.2]
+        line_y = [82.0, 40.0, 0.0]
+        geometry = gpd.GeoDataFrame(
+            geometry=[shapely.linestrings(line_x, line_y)], data={"a": [1.0]}
+        )
+        unique_values = np.array([0.0, np.nan])
+        line_counts = np.array([8, 172])
+        return geometry, unique_values, line_counts
 
-    shape = nlay, nrow, ncol = 3, 9, 9
-    dx = 10.0
-    dy = -10.0
-    xmin = 0.0
-    xmax = dx * ncol
-    ymin = 0.0
-    ymax = abs(dy) * nrow
-    dims = ("layer", "y", "x")
+    def case_single_line_at_edge(self):
+        line_x = [40.0, 40.0, 40.0]
+        line_y = [82.0, 40.0, 0.0]
+        geometry = gpd.GeoDataFrame(
+            geometry=[shapely.linestrings(line_x, line_y)], data={"a": [1.0]}
+        )
+        unique_values = np.array([0.0, np.nan])
+        line_counts = np.array([8, 172])
+        return geometry, unique_values, line_counts
 
-    layer = np.arange(1, nlay + 1)
-    y = np.arange(ymax, ymin, dy) + 0.5 * dy
-    x = np.arange(xmin, xmax, dx) + 0.5 * dx
-    coords = {"layer": layer, "y": y, "x": x}
+    def case_parallel_lines(self):
+        line_x1 = [10.2, 10.2, 10.2]
+        line_x2 = [30.2, 30.2, 30.2]
+        line_y = [82.0, 40.0, 0.0]
+        line1 = shapely.linestrings(line_x1, line_y)
+        line2 = shapely.linestrings(line_x2, line_y)
+        geometry = gpd.GeoDataFrame(geometry=[line1, line2], data={"a": [1.0, 1.0]})
 
-    structured = xr.DataArray(np.ones(shape, dtype=np.int32), coords=coords, dims=dims)
+        unique_values = np.array([0.0, 1.0, np.nan])
+        line_counts = np.array([8, 8, 164])
+        return geometry, unique_values, line_counts
 
-    line_x = [2.2, 2.2, 2.2]
-    line_y = [82.0, 40.0, 0.0]
-    geometry = gpd.GeoDataFrame(
-        geometry=[shapely.linestrings(line_x, line_y)], data={"a": [1.0]}
-    )
+    def case_series_lines(self):
+        # This caused a failure up to 0.10.0
+        line_x = [40.2, 40.2]
+        line_y1 = [82.0, 60.0]
+        line_y2 = [60.0, 40.0]
+        line_y3 = [40.0, 20.0]
+        line_y4 = [20.0, 0.0]
+        line1 = shapely.linestrings(line_x, line_y1)
+        line2 = shapely.linestrings(line_x, line_y2)
+        line3 = shapely.linestrings(line_x, line_y3)
+        line4 = shapely.linestrings(line_x, line_y4)
+        geometry = gpd.GeoDataFrame(
+            geometry=[line1, line2, line3, line4], data={"a": [1.0, 1.0, 1.0, 1.0]}
+        )
+        unique_values = np.array([0.0, 1.0, 2.0, 3.0, np.nan])
+        line_counts = np.array([2, 2, 2, 2, 172])
+        return geometry, unique_values, line_counts
 
+    def case_crossing_lines(self):
+        # This caused a failure up to 0.10.0
+        line_x = [40.2, 40.2, 40.2]
+        line_y = [82.0, 40.0, 0.0]
+        line1 = shapely.linestrings(line_x, line_y)
+        line2 = shapely.linestrings(line_y, line_x)
+        geometry = gpd.GeoDataFrame(geometry=[line1, line2], data={"a": [1.0, 2.0]})
+
+        unique_values = np.array([0.0, 1.0, np.nan])
+        line_counts = np.array([8, 8, 164])
+        return geometry, unique_values, line_counts
+
+    def case_closely_parallel(self):
+        """
+        Snap closely parallel lines. These are snapped to same edge, the first
+        one should be taken. We can use this test to monitor if this behaviour
+        changes.
+        """
+        line_x1 = [19.0, 19.0, 19.0]
+        line_x2 = [21.0, 21.0, 21.0]
+        line_y = [82.0, 40.0, 0.0]
+        line1 = shapely.linestrings(line_x1, line_y)
+        line2 = shapely.linestrings(line_x2, line_y)
+        geometry = gpd.GeoDataFrame(geometry=[line1, line2], data={"a": [1.0, 1.0]})
+
+        unique_values = np.array([0.0, np.nan])
+        line_counts = np.array([8, 172])
+        return geometry, unique_values, line_counts
+
+
+@parametrize_with_cases(["geometry", "unique_values", "line_counts"], cases=LineCases)
+def test_snap_to_grid_with_data(structured, geometry, unique_values, line_counts):
     uds, gdf = snap_to_grid(geometry, structured, max_snap_distance=0.5)
     assert isinstance(uds, xu.UgridDataset)
     assert isinstance(gdf, gpd.GeoDataFrame)
     assert uds["a"].dims == (uds.ugrid.grid.edge_dimension,)
-    assert uds["a"].notnull().sum() == 8
-    assert uds["line_index"].notnull().sum() == 8
-    assert uds["line_index"].sum() == 0  # all values should be 0
+
+    actual_unique_values, actual_line_counts = np.unique(
+        uds["line_index"], return_counts=True
+    )
+    np.testing.assert_array_equal(unique_values, actual_unique_values)
+    np.testing.assert_array_equal(line_counts, actual_line_counts)
+
+
+def test_create_snap_to_grid_dataframe(structured):
+    """
+    Test if create_snap_to_grid_dataframe and its code example in the docstring
+    still work
+    """
+    # Arrange test
+    line_x1 = [19.0, 19.0, 19.0]
+    line_x2 = [21.0, 21.0, 21.0]
+    line_y = [82.0, 40.0, 0.0]
+
+    line1 = shapely.linestrings(line_x1, line_y)
+    line2 = shapely.linestrings(line_x2, line_y)
+    lines = gpd.GeoDataFrame(geometry=[line1, line2], data={"my_variable": [1.0, 2.0]})
+    grid2d = xu.Ugrid2d.from_structured(structured)
+    edge_arr_data = np.ones(grid2d.sizes["mesh2d_nEdges"])
+    edge_data = xu.UgridDataArray.from_data(edge_arr_data, grid2d, facet="edge")
+
+    # Act
+    snapping_df = create_snap_to_grid_dataframe(lines, grid2d, max_snap_distance=0.5)
+
+    # Assert
+    edge_index0 = snapping_df.loc[
+        snapping_df["line_index"] == 0, "edge_index"
+    ].to_numpy()
+    edge_index1 = snapping_df.loc[
+        snapping_df["line_index"] == 1, "edge_index"
+    ].to_numpy()
+    assert np.all(edge_index0 == edge_index1)
+
+    # Now test if docstring part works
+    snapping_df["my_variable"] = (
+        lines["my_variable"].iloc[snapping_df["line_index"]].to_numpy()
+    )
+    aggregated = snapping_df.groupby("edge_index").sum()
+
+    new = xu.full_like(edge_data, np.nan)
+    new.data[aggregated.index] = aggregated["my_variable"]
+
+    # A lot of values are NaN, but all not-nan values should be three.
+    np.testing.assert_array_equal(np.unique(new), [3, np.nan])
diff --git a/xugrid/__init__.py b/xugrid/__init__.py
@@ -24,7 +24,7 @@
 from xugrid.ugrid.conventions import UgridRolesAccessor
 from xugrid.ugrid.partitioning import merge_partitions
 from xugrid.ugrid.polygonize import polygonize
-from xugrid.ugrid.snapping import snap_to_grid
+from xugrid.ugrid.snapping import create_snap_to_grid_dataframe, snap_to_grid
 from xugrid.ugrid.ugrid1d import Ugrid1d
 from xugrid.ugrid.ugrid2d import Ugrid2d
 
@@ -56,6 +56,7 @@
     "merge_partitions",
     "polygonize",
     "snap_to_grid",
+    "create_snap_to_grid_dataframe",
     "Ugrid1d",
     "Ugrid2d",
 )