diff --git a/.github/workflows/requirements/environment.yml b/.github/workflows/requirements/environment.yml index 1e1c7c7c4..18b35af56 100644 --- a/.github/workflows/requirements/environment.yml +++ b/.github/workflows/requirements/environment.yml @@ -10,6 +10,7 @@ dependencies: - geopandas - flake8 - isort + - mapbox_earcut - netcdf4 - numba_celltree - pip diff --git a/docs/api.rst b/docs/api.rst index 941d8b26a..ce4c7c73a 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -31,6 +31,8 @@ Top-level functions concat merge merge_partitions + burn_vector_geometry + polygonize UgridDataArray -------------- diff --git a/docs/changelog.rst b/docs/changelog.rst index 74d7543fe..5492b600b 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -9,6 +9,15 @@ The format is based on `Keep a Changelog`_, and this project adheres to Unreleased ---------- +Added +~~~~~ + +- Added :func:`xugrid.burn_vector_geometries` to burn vector geometries in the + form of geopandas GeoDataFrames into a Ugrid2d topology. +- Added :func:`xugrid.polygonize` to create vector polygons for all connected + regions of a Ugrid2d topology sharing a common value. The result is a + geopandas GeoDataFrame. + [0.6.2] 2023-07-26 ------------------ diff --git a/docs/user_guide.rst b/docs/user_guide.rst index d6fef0ba8..974023ca0 100644 --- a/docs/user_guide.rst +++ b/docs/user_guide.rst @@ -16,6 +16,7 @@ Information on specific methods and classes can be found in the API Reference. examples/plotting.rst examples/regridder_overview.rst examples/overlap_regridder.rst + examples/vector_conversion.rst examples/connectivity.rst examples/partitioning.rst sample_data/index.rst diff --git a/examples/vector_conversion.py b/examples/vector_conversion.py new file mode 100644 index 000000000..7a8cc6d50 --- /dev/null +++ b/examples/vector_conversion.py @@ -0,0 +1,209 @@ +""" +Vector geometry conversion +========================== + +A great deal of geospatial data is available not in gridded form, but in +vectorized form: as points, lines, and polygons. In the Python data ecosystem, +these geometries and their associated data are generally represented by a +geopandas GeoDataFrame. + +Xugrid provides a number of utilities to use such data in combination with +unstructured grids. These are demonstrated below. +""" +# %% + +import geopandas as gpd +import matplotlib.pyplot as plt +import pandas as pd + +import xugrid as xu + +# %% +# We'll once again use the surface elevation data example. + +uda = xu.data.elevation_nl() +uda.ugrid.plot(vmin=-20, vmax=90, cmap="terrain") + +# %% +# Conversion to GeoDataFrame +# -------------------------- +# +# A UgridDataArray or UgridDataset can be directly converted to a GeoDataFrame, +# provided it only contains a spatial dimension (and not a dimension such as +# time). When calling +# ``.to_geodataframe``, a shapely Polygon is created for every face (cell). + +gdf = uda.ugrid.to_geodataframe() +print(gdf) + +# %% +# We see that a GeoDataFrame with 5248 rows is created: one row for each face. +# +# Conversion from GeoDataFrame +# ---------------------------- +# +# We can also make the opposite conversion: we can create a UgridDataSet from a +# GeoDataFrame. +# +back = xu.UgridDataset.from_geodataframe(gdf) +back + +# %% +# .. note:: +# Not every GeoDataFrame can be converted to a ``xugrid`` representation! +# While an unstructured grid topology is generally always a valid collection +# of polygon geometries, not every collection of polygon geometries is a +# valid grid: polygons should be convex and non-overlapping to create a valid +# unstructured grid. +# +# Secondly, each polygon fully owns its vertices (nodes), while the face of a +# UGRID topology shares its nodes with its neighbors. All the vertices of the +# polygons must therefore be exactly snapped together to form a connected +# mesh. +# +# Hence, the ``.from_geodataframe()`` is primarily meant to create ``xugrid`` +# objects from data that were originally created as triangulation or +# unstructured grid, but that were converted to vector geometry form. +# +# "Rasterizing", or "burning" vector geometries +# --------------------------------------------- +# +# Rasterizing is a common operation when working with raster and vector data. +# While we cannot name the operation "rasterizing" when we're dealing with +# unstructured grids, there is a clearly equivalent operation where we mark +# cells that are covered or touched by a polygon. +# +# In this example, we mark the faces that are covered by a certain province. +# +# We start by re-projecting the provinces dataset to the coordinate reference +# system (CRS), from WGS84 (EPSG:4326) to the Dutch National coordinate system +# (RD New, EPSG: 28992). Then, we give each province a unique id, which we +# burn into the grid. + +provinces = xu.data.provinces_nl().to_crs(28992) +provinces["id"] = range(len(provinces)) +burned = xu.burn_vector_geometry(provinces, uda, column="id") +burned.ugrid.plot() + +# %% +# This makes it very easy to classify and group data. Let's say +# we want to compute the average surface elevation per province: + +burned = xu.burn_vector_geometry(provinces, uda, column="id") +uda.groupby(burned).mean() + + +# %% +# This is a convenient way to create masks for specific regions: + +utrecht = provinces[provinces["name"] == "Utrecht"] +burned = xu.burn_vector_geometry(utrecht, uda) +xmin, ymin, xmax, ymax = utrecht.buffer(10_000).total_bounds + +fig, ax = plt.subplots() +burned.ugrid.plot(ax=ax) +burned.ugrid.plot.line(ax=ax, edgecolor="black", linewidth=0.5) +utrecht.plot(ax=ax, edgecolor="red", facecolor="none", linewidth=1.5) +ax.set_xlim(xmin, xmax) +ax.set_ylim(ymin, ymax) + +# %% +# By default, ``burn_vector_geometry`` will only include grid faces whose +# centroid are located in a polygon. We can also mark all intersected faces +# by setting ``all_touched=True``: + +burned = xu.burn_vector_geometry(utrecht, uda, all_touched=True) + +fig, ax = plt.subplots() +burned.ugrid.plot(ax=ax) +burned.ugrid.plot.line(ax=ax, edgecolor="black", linewidth=0.5) +utrecht.plot(ax=ax, edgecolor="red", facecolor="none", linewidth=1.5) +ax.set_xlim(xmin, xmax) +ax.set_ylim(ymin, ymax) + +# %% +# We can also use such "masks" to e.g. modify specific parts of the grid data: + +modified = (uda + 50.0).where(burned == 1, other=uda) +modified.ugrid.plot(vmin=-20, vmax=90, cmap="terrain") + +# %% +# Note that ``all_touched=True`` is less suitable when differently valued +# polygons are present that share borders. While the centroid of a face is +# contained by only a single polygon, the area of the polygon may be located +# in more than one polygon. In this case, the results of each polygon will +# overwrite each other. + +by_centroid = xu.burn_vector_geometry(provinces, uda, column="id") +by_touch = xu.burn_vector_geometry(provinces, uda, column="id", all_touched=True) + +fig, axes = plt.subplots(ncols=2, figsize=(10, 5)) +by_centroid.ugrid.plot(ax=axes[0], add_colorbar=False) +by_touch.ugrid.plot(ax=axes[1], add_colorbar=False) + +for ax, title in zip(axes, ("centroid", "all touched")): + burned.ugrid.plot.line(ax=ax, edgecolor="black", linewidth=0.5) + utrecht.plot(ax=ax, edgecolor="red", facecolor="none", linewidth=1.5) + ax.set_xlim(xmin, xmax) + ax.set_ylim(ymin, ymax) + ax.set_title(title) + +# %% +# This function can also be used to burn points or lines into the faces of an +# unstructured grid. +# +# The exterior boundaries of the province polygons will provide +# a collection of linestrings that we can burn into the grid: + +lines = gpd.GeoDataFrame(geometry=provinces.exterior) +burned = xu.burn_vector_geometry(lines, uda) + +fig, ax = plt.subplots() +burned.ugrid.plot(ax=ax) +burned.ugrid.plot.line(ax=ax, edgecolor="black", linewidth=0.5) +provinces.plot(ax=ax, edgecolor="red", facecolor="none", linewidth=1.5) +ax.set_xlim(xmin, xmax) +ax.set_ylim(ymin, ymax) + +# %% +# We can also burn points. + +province_centroids = gpd.GeoDataFrame(geometry=provinces.centroid) +burned = xu.burn_vector_geometry(province_centroids, uda) + +fig, ax = plt.subplots() +burned.ugrid.plot(ax=ax) +provinces.plot(ax=ax, edgecolor="red", facecolor="none") + +# %% +# Finally, it's also possible to combine multiple geometry types in a single +# burn operation. + +combined = pd.concat([lines, province_centroids]) +burned = xu.burn_vector_geometry(combined, uda) +burned.ugrid.plot() + +# %% +# Polygonizing +# ------------ +# +# We can also do the opposite operation: turn collections of same-valued grid +# faces into vector polygons. Let's classify the elevation data into below and +# above the boundary of 5 m above mean sea level: + +classified = uda > 5 +polygonized = xu.polygonize(classified) +polygonized.plot(facecolor="none") + +# %% +# We see that the results consists of two large polygons, in which the +# triangles of the triangular grid have been merged to form a single polygon, +# and many smaller polygons, some of which correspond one to one to the +# triangles of the grid. +# +# .. note:: +# The produced polygon edges will follow exactly the face boundaries. When +# the data consists of many unique values (e.g. unbinned elevation data), the +# result will essentially be one polygon per face. In such cases, it is more +# efficient to use ``xugrid.UgridDataArray.to_geodataframe``, which directly +# converts every face to a polygon. diff --git a/pyproject.toml b/pyproject.toml index b6a24eadf..cfa73165b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,7 @@ Issues = "https://github.com/deltares/xugrid/issues" [project.optional-dependencies] all = [ 'geopandas', + 'mapbox_earcut', 'matplotlib', 'meshkernel', 'netcdf4', diff --git a/tests/test_burn.py b/tests/test_burn.py new file mode 100644 index 000000000..66f5e3275 --- /dev/null +++ b/tests/test_burn.py @@ -0,0 +1,246 @@ +import geopandas as gpd +import numpy as np +import pytest +import shapely +from numba_celltree.constants import Point, Triangle +from shapely.geometry import Polygon + +import xugrid as xu +from xugrid.ugrid import burn + + +@pytest.fixture(scope="function") +def grid(): + """Three by three squares""" + x = np.arange(0.0, 4.0) + y = np.arange(0.0, 4.0) + node_y, node_x = [a.ravel() for a in np.meshgrid(y, x, indexing="ij")] + nx = ny = 3 + # Define the first vertex of every face, v. + v = (np.add.outer(np.arange(nx), nx * np.arange(ny)) + np.arange(ny)).T.ravel() + faces = np.column_stack((v, v + 1, v + nx + 2, v + nx + 1)) + return xu.Ugrid2d(node_x, node_y, -1, faces) + + +@pytest.fixture(scope="function") +def points_and_values(): + xy = np.array( + [ + [0.5, 0.5], + [1.5, 0.5], + [2.5, 2.5], + ] + ) + points = gpd.points_from_xy(*xy.T) + values = np.array([0.0, 1.0, 3.0]) + return points, values + + +@pytest.fixture(scope="function") +def lines_and_values(): + xy = np.array( + [ + [0.5, 0.5], + [2.5, 0.5], + [1.2, 1.5], + [1.8, 1.5], + [0.2, 2.2], + [0.8, 2.8], + [1.2, 2.2], + [1.8, 2.8], + ] + ) + indices = np.array([0, 0, 1, 1, 2, 2, 2, 2]) + values = np.array([0, 1, 2]) + lines = gpd.GeoSeries(shapely.linestrings(xy, indices=indices)) + return lines, values + + +@pytest.fixture(scope="function") +def polygons_and_values(): + values = [0, 1] + polygons = gpd.GeoSeries( + [ + shapely.Polygon(shell=[(0.0, 0.0), (2.0, 0.0), (2.0, 2.0), (0.0, 2.0)]), + shapely.Polygon( + shell=[ + (0.0, 2.0), + (2.0, 2.0), + (2.0, 0.0), + (3.0, 0.0), + (3.0, 3.0), + (0.0, 3.0), + ] + ), + ] + ) + return polygons, values + + +def test_point_in_triangle(): + a = Point(0.1, 0.1) + b = Point(0.7, 0.5) + c = Point(0.4, 0.7) + # Should work for clockwise and ccw orientation. + triangle = Triangle(a, b, c) + rtriangle = Triangle(c, b, a) + p = Point(0.5, 0.5) + assert burn.point_in_triangle(p, triangle) + assert burn.point_in_triangle(p, rtriangle) + + p = Point(0.0, 0.0) + assert not burn.point_in_triangle(p, triangle) + assert not burn.point_in_triangle(p, rtriangle) + + +def test_points_in_triangle(): + vertices = np.array( + [ + [0.0, 0.0], + [1.0, 0.0], + [1.0, 1.0], + [2.0, 0.0], + ] + ) + faces = np.array( + [ + [0, 1, 2], + [1, 3, 2], + ] + ) + points = np.array( + [ + [-0.5, 0.25], + [0.0, 0.0], # on vertex + [0.5, 0.5], # on edge + [0.5, 0.25], + [1.5, 0.25], + [2.5, 0.25], + ] + ) + face_indices = np.array([0, 0, 0, 0, 1, 1]) + expected = [False, True, True, True, True, False] + actual = burn.points_in_triangles( + points=points, + face_indices=face_indices, + faces=faces, + vertices=vertices, + ) + assert np.array_equal(expected, actual) + + +def test_locate_polygon(grid): + polygon = shapely.Polygon(shell=[(0.5, 0.5), (2.5, 0.5), (0.5, 2.5)]) + exterior = shapely.get_coordinates(polygon.exterior) + interiors = [shapely.get_coordinates(interior) for interior in polygon.interiors] + + actual = burn._locate_polygon(grid, exterior, interiors, all_touched=False) + assert np.array_equal(np.sort(actual), [0, 1, 2, 3, 4, 6]) + actual = burn._locate_polygon(grid, exterior, interiors, all_touched=True) + assert np.array_equal(np.sort(actual), [0, 1, 2, 3, 4, 6]) + + polygon = shapely.Polygon(shell=[(0.75, 0.5), (2.5, 0.5), (0.75, 2.5)]) + exterior = shapely.get_coordinates(polygon.exterior) + interiors = [shapely.get_coordinates(interior) for interior in polygon.interiors] + + actual = burn._locate_polygon(grid, exterior, interiors, all_touched=False) + assert np.array_equal(np.sort(actual), [1, 2, 4]) + actual = burn._locate_polygon(grid, exterior, interiors, all_touched=True) + assert np.array_equal(np.sort(actual), [0, 1, 2, 3, 4, 5, 6, 7]) + + +def test_locate_polygon_with_hole(grid): + # The hole omits the centroid at (1.5, 1.5) + polygon = shapely.Polygon( + shell=[(0.7, 0.7), (2.3, 0.7), (1.5, 2.3)], + holes=[[(1.4, 1.6), (1.5, 1.4), (1.6, 1.6)]], + ) + exterior = shapely.get_coordinates(polygon.exterior) + interiors = [shapely.get_coordinates(interior) for interior in polygon.interiors] + + actual = burn._locate_polygon(grid, exterior, interiors, all_touched=False) + assert np.array_equal(actual, []) + actual = burn._locate_polygon(grid, exterior, interiors, all_touched=True) + assert np.array_equal(np.unique(actual), [0, 1, 2, 3, 4, 5, 7]) + + +def test_burn_polygons(grid, polygons_and_values): + polygons, values = polygons_and_values + output = np.full(grid.n_face, np.nan) + burn._burn_polygons(polygons, grid, values, all_touched=False, output=output) + expected = np.array([0, 0, 1, 0, 0, 1, 1, 1, 1]) + assert np.allclose(output, expected) + + +def test_burn_points(grid, points_and_values): + points, values = points_and_values + output = np.full(grid.n_face, -1.0) + + burn._burn_points(points, grid, values, output=output) + expected = np.array([0, 1, -1, -1, -1, -1, -1, -1, 3]) + assert np.allclose(output, expected) + + +def test_burn_lines(grid, lines_and_values): + lines, values = lines_and_values + output = np.full(grid.n_face, -1.0) + + burn._burn_lines(lines, grid, values, output=output) + expected = np.array([0, 0, 0, -1, 1, -1, 2, 2, -1]) + assert np.allclose(output, expected) + + +def test_burn_vector_geometry__errors(grid, points_and_values): + with pytest.raises(TypeError, match="gdf must be GeoDataFrame"): + xu.burn_vector_geometry(0, grid) + + points, values = points_and_values + gdf = gpd.GeoDataFrame({"values": values}, geometry=points) + with pytest.raises(TypeError, match="Like must be Ugrid2d, UgridDataArray"): + xu.burn_vector_geometry(gdf, gdf) + + # This seems like the easiest way to generate a multi-polygon inside a + # GeoDataFrame, since it won't initialize with a multi-polygon. + p1 = Polygon([(0, 0), (1, 0), (1, 1)]) + p2 = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]) + p3 = Polygon([(2, 0), (3, 0), (3, 1), (2, 1)]) + gdf = gpd.GeoDataFrame({"values": [0, 0, 0]}, geometry=[p1, p2, p3]).dissolve( + by="values" + ) + with pytest.raises( + TypeError, match="GeoDataFrame contains unsupported geometry types" + ): + xu.burn_vector_geometry(gdf, grid) + + +def test_burn_vector_geometry( + grid, points_and_values, lines_and_values, polygons_and_values +): + polygons, poly_values = polygons_and_values + gdf = gpd.GeoDataFrame({"values": poly_values}, geometry=polygons) + actual = xu.burn_vector_geometry(gdf, grid) + assert isinstance(actual, xu.UgridDataArray) + assert np.allclose(actual.to_numpy(), 1) + actual = xu.burn_vector_geometry(gdf, grid, all_touched=True) + assert np.allclose(actual.to_numpy(), 1) + + expected = np.array([0, 0, 1, 0, 0, 1, 1, 1, 1]) + actual = xu.burn_vector_geometry(gdf, grid, column="values") + assert np.allclose(actual.to_numpy(), expected) + + points, point_values = points_and_values + lines, line_values = lines_and_values + line_values += 10 + point_values += 20 + values = np.concatenate([poly_values, line_values, point_values]) + geometry = np.concatenate( + [polygons.to_numpy(), lines.to_numpy(), points.to_numpy()] + ) + gdf = gpd.GeoDataFrame({"values": values}, geometry=geometry) + actual = xu.burn_vector_geometry(gdf, grid, column="values") + expected = np.array([20.0, 21.0, 10.0, 0.0, 11.0, 1.0, 12.0, 12.0, 23.0]) + assert np.allclose(actual.to_numpy(), expected) + + # All touched should give the same answer for this specific example. + actual = xu.burn_vector_geometry(gdf, grid, column="values", all_touched=True) + assert np.allclose(actual.to_numpy(), expected) diff --git a/tests/test_polygonize.py b/tests/test_polygonize.py new file mode 100644 index 000000000..b9edd109f --- /dev/null +++ b/tests/test_polygonize.py @@ -0,0 +1,49 @@ +import geopandas as gpd +import numpy as np +import pytest +import xarray as xr + +import xugrid as xu + + +@pytest.fixture(scope="function") +def grid(): + """Three by three squares""" + x = np.arange(0.0, 4.0) + y = np.arange(0.0, 4.0) + node_y, node_x = [a.ravel() for a in np.meshgrid(y, x, indexing="ij")] + nx = ny = 3 + # Define the first vertex of every face, v. + v = (np.add.outer(np.arange(nx), nx * np.arange(ny)) + np.arange(ny)).T.ravel() + faces = np.column_stack((v, v + 1, v + nx + 2, v + nx + 1)) + return xu.Ugrid2d(node_x, node_y, -1, faces) + + +def test_polygonize__errors(grid): + uda = xu.UgridDataArray( + xr.DataArray(np.ones(grid.n_edge), dims=[grid.edge_dimension]), grid=grid + ) + with pytest.raises(ValueError, match="Cannot polygonize non-face dimension"): + xu.polygonize(uda) + + uda = xu.UgridDataArray( + xr.DataArray(np.ones((3, grid.n_face)), dims=["layer", grid.face_dimension]), + grid=grid, + ) + with pytest.raises(ValueError, match="Cannot polygonize non-face dimension"): + xu.polygonize(uda) + + +def test_polygonize(grid): + a = np.array([0, 0, 0, 1, 1, 1, 0, 0, 0]) + uda = xu.UgridDataArray(xr.DataArray(a, dims=grid.face_dimension), grid) + actual = xu.polygonize(uda) + assert isinstance(actual, gpd.GeoDataFrame) + assert len(actual) == 3 + + # With a hole in the 1-valued polygon. + a = np.array([1, 1, 1, 1, 0, 1, 1, 1, 1]) + uda = xu.UgridDataArray(xr.DataArray(a, dims=grid.face_dimension), grid) + actual = xu.polygonize(uda) + assert isinstance(actual, gpd.GeoDataFrame) + assert len(actual) == 2 diff --git a/xugrid/__init__.py b/xugrid/__init__.py index 2a33f4a77..3048e1c60 100644 --- a/xugrid/__init__.py +++ b/xugrid/__init__.py @@ -22,8 +22,10 @@ OverlapRegridder, RelativeOverlapRegridder, ) +from xugrid.ugrid.burn import burn_vector_geometry from xugrid.ugrid.conventions import UgridRolesAccessor from xugrid.ugrid.partitioning import merge_partitions +from xugrid.ugrid.polygonize import polygonize from xugrid.ugrid.snapping import snap_to_grid from xugrid.ugrid.ugrid1d import Ugrid1d from xugrid.ugrid.ugrid2d import Ugrid2d diff --git a/xugrid/ugrid/burn.py b/xugrid/ugrid/burn.py new file mode 100644 index 000000000..2c1911b10 --- /dev/null +++ b/xugrid/ugrid/burn.py @@ -0,0 +1,305 @@ +from typing import List, Union + +import numba as nb +import numpy as np +import xarray as xr +from numba_celltree.constants import TOLERANCE_ON_EDGE, Point, Triangle +from numba_celltree.geometry_utils import ( + as_point, + as_triangle, + cross_product, + to_vector, +) + +import xugrid +from xugrid.constants import FloatArray, IntArray, MissingOptionalModule + +try: + import shapely +except ImportError: + shapely = MissingOptionalModule("shapely") + + +POINT = shapely.GeometryType.POINT +LINESTRING = shapely.GeometryType.LINESTRING +LINEARRING = shapely.GeometryType.LINEARRING +POLYGON = shapely.GeometryType.POLYGON +GEOM_NAMES = {v: k for k, v in shapely.GeometryType.__members__.items()} + + +@nb.njit(inline="always") +def in_bounds(p: Point, a: Point, b: Point) -> bool: + """ + Check whether point p falls within the bounding box created by a and b + (after we've checked the size of the cross product). + + However, we must take into account that a line may be either vertical + (dx=0) or horizontal (dy=0) and only evaluate the non-zero value. + + If the area created by p, a, b is tiny AND p is within the bounds of a and + b, the point lies very close to the edge. + """ + # Already in numba_celltree, unreleased. + dx = b.x - a.x + dy = b.y - a.y + if abs(dx) >= abs(dy): + if dx > 0: + return a.x <= p.x and p.x <= b.x + return b.x <= p.x and p.x <= a.x + else: + if dy > 0: + return a.y <= p.y and p.y <= b.y + return b.y <= p.y and p.y <= a.y + + +@nb.njit(inline="always") +def point_in_triangle(p: Point, t: Triangle) -> bool: + # TODO: move this into numba_celltree instead? + ap = to_vector(t.a, p) + bp = to_vector(t.b, p) + cp = to_vector(t.c, p) + ab = to_vector(t.a, t.b) + bc = to_vector(t.b, t.c) + ca = to_vector(t.c, t.a) + # Do a half plane check. + A = cross_product(ab, ap) + B = cross_product(bc, bp) + C = cross_product(ca, cp) + signA = A > 0 + signB = B > 0 + signC = C > 0 + if (signA == signB) and (signB == signC): + return True + + # Test whether p is located on/very close to edges. + if ( + (abs(A) < TOLERANCE_ON_EDGE) + and in_bounds(p, t.a, t.b) + or (abs(B) < TOLERANCE_ON_EDGE) + and in_bounds(p, t.b, t.c) + or (abs(C) < TOLERANCE_ON_EDGE) + and in_bounds(p, t.c, t.a) + ): + return True + + return False + + +@nb.njit(inline="always", parallel=True, cache=True) +def points_in_triangles( + points: FloatArray, + face_indices: IntArray, + faces: IntArray, + vertices: FloatArray, +): + # TODO: move this into numba_celltree instead? + n_points = len(points) + inside = np.empty(n_points, dtype=np.bool_) + for i in nb.prange(n_points): + face_index = face_indices[i] + face = faces[face_index] + triangle = as_triangle(vertices, face) + point = as_point(points[i]) + inside[i] = point_in_triangle(point, triangle) + return inside + + +def _locate_polygon( + grid: "xu.Ugrid2d", # type: ignore # noqa + exterior: FloatArray, + interiors: List[FloatArray], + all_touched: bool, +) -> IntArray: + """ + This algorithm burns polygon vector geometries in a 2d topology by: + + * Extracting the exterior and interiors (holes) coordinates from the + polygons. + * Breaking every polygon down into a triangles using an "earcut" algorithm. + * Searching the grid for these triangles. + + Due to the use of the separating axes theorem, _locate_faces finds all + intersecting triangles, including those who only touch the edge. + To enable all_touched=False, we have to search the centroids of candidates + in the intersecting triangles. + + Parameters + ---------- + grid: Ugrid2d + exterior: FloatArray + Exterior of the polygon. + interiors: List[FloatArray] + Interior holes of the polygon. + all_touched: bool + Whether to include include cells whose centroid falls inside, of every + cell that is touched. + """ + + import mapbox_earcut + + rings = np.cumsum([len(exterior)] + [len(interior) for interior in interiors]) + vertices = np.vstack([exterior] + interiors).astype(np.float64) + triangles = mapbox_earcut.triangulate_float64(vertices, rings).reshape((-1, 3)) + triangle_indices, grid_indices = grid.celltree._locate_faces(vertices, triangles) + if all_touched: + return grid_indices + else: + centroids = grid.centroids[grid_indices] + inside = points_in_triangles( + points=centroids, + face_indices=triangle_indices, + faces=triangles, + vertices=vertices, + ) + return grid_indices[inside] + + +def _burn_polygons( + polygons: "geopandas.GeoSeries", # type: ignore # noqa + like: "xugrid.Ugrid2d", + values: np.ndarray, + all_touched: bool, + output: FloatArray, +) -> None: + exterior_coordinates = [ + shapely.get_coordinates(exterior) for exterior in polygons.exterior + ] + interior_coordinates = [ + [shapely.get_coordinates(p_interior) for p_interior in p_interiors] + for p_interiors in polygons.interiors + ] + to_burn = np.empty(like.n_face, dtype=bool) + + for exterior, interiors, value in zip( + exterior_coordinates, interior_coordinates, values + ): + to_burn = _locate_polygon(like, exterior, interiors, all_touched) + output[to_burn] = value + + return + + +def _burn_points( + points: "geopandas.GeoSeries", # type: ignore # noqa + like: "xugrid.Ugrid2d", + values: np.ndarray, + output: FloatArray, +) -> None: + """ + Simply searches the points in the ``like`` 2D topology. + """ + xy = shapely.get_coordinates(points) + to_burn = like.locate_points(xy) + output[to_burn] = values + return + + +def _burn_lines( + lines: "geopandas.GeoSeries", # type: ignore # noqa + like: "xugrid.Ugrid2d", + values: np.ndarray, + output: FloatArray, +) -> None: + """ + This algorithm breaks any linestring down into edges (two x, y points). We + search and intersect every edge in the ``like`` grid, the intersections are + discarded. + """ + xy, index = shapely.get_coordinates(lines, return_index=True) + # From the coordinates and the index, create the (n_edge, 2, 2) shape array + # containing the edge_coordinates. + linear_index = np.arange(index.size) + segments = np.column_stack([linear_index[:-1], linear_index[1:]]) + # Only connections with vertices with the same index are valid. + valid = np.diff(index) == 0 + segments = segments[valid] + edges = xy[segments] + # Now query the grid for these edges. + edge_index, face_index, _ = like.intersect_edges(edges) + # Find the associated values. + line_index = index[1:][valid] + value_index = line_index[edge_index] + output[face_index] = values[value_index] + return + + +def burn_vector_geometry( + gdf: "geopandas.GeoDataframe", # type: ignore # noqa + like: Union["xugrid.Ugrid2d", "xugrid.UgridDataArray", "xugrid.UgridDataset"], + column: str = None, + fill: Union[int, float] = np.nan, + all_touched: bool = False, +) -> None: + """ + Burn vector geometries (points, lines, polygons) into a Ugrid2d mesh. + + If no ``column`` argument is provided, a value of 1.0 will be burned in to + the mesh. + + Parameters + ---------- + gdf: geopandas.GeoDataFrame + Polygons, points, and/or lines to be burned into the grid. + like: UgridDataArray, UgridDataset, or Ugrid2d + Grid to burn the vector data into. + column: str + Name of the geodataframe column of which to the values to burn into + grid. + fill: int, float, optional, default value ``np.nan``. + Fill value for nodata areas. + all_touched: bool, optional, default value ``False``. + All mesh faces (cells) touched by polygons will be updated, not just + those whose center point is within the polygon. + + Returns + ------- + burned: UgridDataArray + """ + import geopandas as gpd + + if not isinstance(gdf, gpd.GeoDataFrame): + raise TypeError(f"gdf must be GeoDataFrame, received: {type(like).__name__}") + if isinstance(like, (xugrid.UgridDataArray, xugrid.UgridDataset)): + like = like.ugrid.grid + if not isinstance(like, xugrid.Ugrid2d): + raise TypeError( + "Like must be Ugrid2d, UgridDataArray, or UgridDataset;" + f"received: {type(like).__name__}" + ) + geometry_id = shapely.get_type_id(gdf.geometry) + allowed_types = (POINT, LINESTRING, LINEARRING, POLYGON) + if not np.isin(geometry_id, allowed_types).all(): + received = ", ".join( + [GEOM_NAMES[geom_id] for geom_id in np.unique(geometry_id)] + ) + raise TypeError( + "GeoDataFrame contains unsupported geometry types. Can only burn " + "Point, LineString, LinearRing, and Polygon geometries. Received: " + f"{received}" + ) + + points = gdf.loc[geometry_id == POINT] + lines = gdf.loc[(geometry_id == LINESTRING) | (geometry_id == LINEARRING)] + polygons = gdf.loc[geometry_id == POLYGON] + + if column is None: + point_values = np.ones(len(points), dtype=float) + line_values = np.ones(len(lines), dtype=float) + poly_values = np.ones(len(polygons), dtype=float) + else: + point_values = points[column].to_numpy() + line_values = lines[column].to_numpy() + poly_values = polygons[column].to_numpy() + + output = np.full(like.n_face, fill) + if len(polygons) > 0: + _burn_polygons(polygons.geometry, like, poly_values, all_touched, output) + if len(lines) > 0: + _burn_lines(lines.geometry, like, line_values, output) + if len(points) > 0: + _burn_points(points.geometry, like, point_values, output) + + return xugrid.UgridDataArray( + obj=xr.DataArray(output, dims=[like.face_dimension], name=column), + grid=like, + ) diff --git a/xugrid/ugrid/polygonize.py b/xugrid/ugrid/polygonize.py new file mode 100644 index 000000000..6fb4d3b08 --- /dev/null +++ b/xugrid/ugrid/polygonize.py @@ -0,0 +1,131 @@ +""" +The functions in this module serve to polygonize +""" +from typing import Tuple + +import numpy as np +from scipy import sparse + +from xugrid.constants import IntArray + + +def _bbox_area(bounds): + return (bounds[2] - bounds[0]) * (bounds[3] - bounds[1]) + + +def _classify( + fill_value: int, i: IntArray, j: IntArray, face_values: np.ndarray +) -> Tuple[int, IntArray]: + """ + Find out how many discrete polygons are created. Identify the connectivity, + such that we can select a single polygon afterwards. + + Parameters + ---------- + fill_value: int + Fill value in j: marks exterior edges. + i: np.ndarray of int + First face of the edge. + j: np.ndarray of int + Second face of the edge. + face_values: np.ndarray + + Returns + ------- + n_polygon: int + polygon_id: np.ndarray of int + """ + # Every face connects up to two faces. vi holds the values of the first face, + # vj holds the value of the second face. + vi = face_values[i] + vj = face_values[j] + n = face_values.size + # For labelling, only those parts of the mesh that have the same value + # should be connected with each other. + # Since we dropped NaN values before, we needn't worry about those. + is_connection = (i != fill_value) & (j != fill_value) & (vi == vj) + i = i[is_connection] + j = j[is_connection] + ij = np.concatenate([i, j]) + ji = np.concatenate([j, i]) + coo_content = (ji, (ij, ji)) + # Make sure to explicitly set the matrix shape: otherwise, isolated + # elements witout any connection might disappear, and connected_components + # will not return a value for every face. + coo_matrix = sparse.coo_matrix(coo_content, shape=(n, n)) + # We can classify the grid faces using this (reduced) connectivity + return sparse.csgraph.connected_components(coo_matrix) + + +def polygonize(uda: "UgridDataArray") -> "gpd.GeoDataFrame": # type: ignore # noqa + """ + This function creates vector polygons for all connected regions of cells + (faces) in the Ugrid2d topology sharing a common value. + + The produced polygon edges will follow exactly the cell boundaries. When + the data consists of many unique values (e.g. unbinned elevation data), the + result will essentially be one polygon per face. In such cases, it is much + more efficient to use ``xugrid.UgridDataArray.to_geodataframe``, which + directly converts every cell to a polygon. This function is meant for data + with relatively few unique values such as classification results. + + Parameters + ---------- + uda: UgridDataArray + The DataArray should only contain the face dimension. Additional + dimensions, such as time, are not allowed. + + Returns + ------- + polygonized: GeoDataFrame + """ + + import geopandas as gpd + import shapely + + facedim = uda.ugrid.grid.face_dimension + if uda.dims != (facedim,): + raise ValueError( + "Cannot polygonize non-face dimensions. Expected only" + f"({facedim},), but received {uda.dims}." + ) + + # First remove the NaN values. These will not be polygonized anyway. + dropped = uda.dropna(dim=uda.ugrid.grid.face_dimension) + face_values = dropped.to_numpy() + grid = dropped.ugrid.grid + i, j = grid.edge_face_connectivity.T + fill_value = grid.fill_value + n_polygon, polygon_id = _classify(fill_value, i, j, face_values) + + # Now we identify for each label the subset of edges. These are the + # "exterior" edges: either the exterior edge of the mesh identified by a + # fill value, or by being connected to a cell with a different value. + coordinates = grid.node_coordinates + data_i = face_values[i] + vi = polygon_id[i] + vj = polygon_id[j] + # Ensure that no result thas has been created by indexing with the + # fill_value remains. Since polygon_id starts counting a 0, we may use -1. + vi[i == fill_value] = -1 + vj[j == fill_value] = -1 + boundary = vi != vj + + polygons = [] + values = [] + for label in range(n_polygon): + keep = ((vi == label) | (vj == label)) & boundary + # The result of shapely polygonize is always a GeometryCollection. + # Holes are included twice: once as holes in the largest body, and once + # more as polygons on their own. We are interested in the largest + # polygon, which we identify through its bounding box. + edges = grid.edge_node_connectivity[keep] + collection = shapely.polygonize(shapely.linestrings(coordinates[edges])) + polygon = max(collection.geoms, key=lambda x: _bbox_area(x.bounds)) + # Find the first True value in keep, use that to fetch the polygon + # value. + value = data_i[keep.argmax()] + polygons.append(polygon) + values.append(value) + + return gpd.GeoDataFrame({"values": values}, geometry=polygons)