From 9194de46b8f0c93cc79ce8423f84c215506eee84 Mon Sep 17 00:00:00 2001 From: Huite Bootsma Date: Mon, 24 Jul 2023 09:53:31 +0200 Subject: [PATCH 01/10] Add burn_vector_geometry --- xugrid/__init__.py | 1 + xugrid/ugrid/burn.py | 206 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 207 insertions(+) create mode 100644 xugrid/ugrid/burn.py diff --git a/xugrid/__init__.py b/xugrid/__init__.py index 2a33f4a77..2ce0d433b 100644 --- a/xugrid/__init__.py +++ b/xugrid/__init__.py @@ -22,6 +22,7 @@ OverlapRegridder, RelativeOverlapRegridder, ) +from xugrid.ugrid.burn import burn_vector_geometry from xugrid.ugrid.conventions import UgridRolesAccessor from xugrid.ugrid.partitioning import merge_partitions from xugrid.ugrid.snapping import snap_to_grid diff --git a/xugrid/ugrid/burn.py b/xugrid/ugrid/burn.py new file mode 100644 index 000000000..76139e4ad --- /dev/null +++ b/xugrid/ugrid/burn.py @@ -0,0 +1,206 @@ +from typing import List, Union + +import numpy as np +import xarray as xr + +import xugrid +from xugrid.constants import BoolArray, FloatArray, MissingOptionalModule + +try: + import shapely +except ImportError: + shapely = MissingOptionalModule("shapely") + +ShapelyArray = np.ndarray + + +def _locate_polygon( + xy: FloatArray, + exterior: FloatArray, + interiors: List[FloatArray], +) -> BoolArray: + import mapbox_earcut + + rings = np.cumsum([len(exterior)] + [len(interior) for interior in interiors]) + vertices = np.vstack([exterior] + interiors).astype(np.float64) + triangles = mapbox_earcut.triangulate_float64(vertices, rings).reshape((-1, 3)) + grid = xugrid.Ugrid2d(*vertices.T, -1, triangles) + return grid.locate_points(xy) != -1 + + +def _burn_polygons( + polygons: ShapelyArray, + like: "xugrid.Ugrid2d", + values: np.ndarray, + all_touched: bool, + output: FloatArray, +) -> None: + """ + This algorithm burns polygon vector geometries in a 2d topology by: + + * Extracting the exterior and interiors (holes) coordinates from the + polygons. + * Breaking every polygon down into a triangles using an "earcut" algorithm. + * Building a Ugrid2d from the triangles. + + When all_touched=False: + + * The grid created from the polygon is searched for every centroid of the + ``like`` grid. + + When all_touched=True: + + * The grid created from the polygon is searched for every node of the + ``like`` grid. + * If any of the associated nodes of a face is found in the polygon, the + value is burned into the face. + + """ + exterior_coordinates = [ + shapely.get_coordinates(exterior) for exterior in polygons.geometry.exterior + ] + interior_coordinates = [ + [shapely.get_coordinates(p_interior) for p_interior in p_interiors] + for p_interiors in polygons.geometry.interiors + ] + + if all_touched: + # Pre-allocate work arrays so we don't have re-allocate for every + # polygon. + to_burn2d = np.empty_like(like.face_node_connectivity, dtype=bool) + to_burn = np.empty(like.n_face, dtype=bool) + # These data are static: + mask = like.face_node_connectivity == like.fill_value + xy = like.node_coordinates + for exterior, interiors, value in zip( + exterior_coordinates, interior_coordinates, values + ): + location = _locate_polygon(xy, exterior, interiors) + # Equal to: to_burn2d = location[like.face_node_connectivity] + location.take(like.face_node_connectivity, out=to_burn2d) + to_burn2d[mask] = False + # Equal to: to_burn = to_burn2d.any(axis=1) + np.bitwise_or.reduce(to_burn2d, axis=1, out=to_burn) + output[to_burn] = value + + else: + xy = like.centroids + for exterior, interiors, value in zip( + exterior_coordinates, interior_coordinates, values + ): + to_burn = _locate_polygon(xy, exterior, interiors) + output[to_burn] = value + + return + + +def _burn_points( + points: ShapelyArray, like: "xugrid.Ugrid2d", values: np.ndarray, output: FloatArray +) -> None: + """ + Simply searches the points in the ``like`` 2D topology. + """ + xy = shapely.get_coordinates(points) + to_burn = like.locate_points(xy) + output[to_burn] = values + return + + +def _burn_lines( + lines: ShapelyArray, like: "xugrid.Ugrid2d", values: np.ndarray, output: FloatArray +) -> None: + """ + This algorithm breaks any linestring down into edges (two x, y points). We + search and intersect every edge in the ``like`` grid, the intersections are + discarded. + """ + xy, index = shapely.get_coordinates(lines, return_index=True) + # From the coordinates and the index, create the (n_edge, 2, 2) shape array + # containing the edge_coordinates. + linear_index = np.arange(index.size) + segments = np.column_stack([linear_index[:-1], linear_index[1:]]) + # Only connections with vertices with the same index are valid. + valid = np.diff(index) == 0 + segments = segments[valid] + edges = xy[segments] + # Now query the grid for these edges. + edge_index, face_index, _ = like.intersect_edges(edges) + # Find the associated values. + line_index = index[1:][valid] + value_index = line_index[edge_index] + output[face_index] = values[value_index] + return + + +def burn_vector_geometry( + gdf: "geopandas.GeoDataframe", # type: ignore # noqa + like: "xugrid.Ugrid2d", + column: str = None, + fill: Union[int, float] = np.nan, + all_touched: bool = False, +) -> None: + """ + Burn vector geometries (points, lines, polygons) into a Ugrid2d mesh. + + If no ``column`` argument is provided, a value of 1.0 will be burned in to + the mesh. + + Parameters + ---------- + gdf: geopandas.GeoDataFrame + like: UgridDataArray, UgridDataset, or Ugrid2d + column: str + Column name of geodataframe to burn into mesh + fill: int, float, optional, default value ``np.nan``. + Fill value for nodata areas. + all_touched: bool, optional, default value ``False``. + All mesh faces (cells) touched by polygons will be updated, not just + those whose center point is within the polygon. + + Returns + ------- + burned: UgridDataArray + """ + import geopandas as gpd + + if not isinstance(gdf, gpd.GeoDataFrame): + raise TypeError(f"gdf must be GeoDataFrame, received: {type(like).__name__}") + if isinstance(like, (xugrid.UgridDataArray, xugrid.UgridDataset)): + like = like.ugrid.grid + if not isinstance(like, xugrid.Ugrid2d): + raise TypeError( + "Like must be Ugrid2d, UgridDataArray, or UgridDataset;" + f"received: {type(like).__name__}" + ) + geometry_id = shapely.get_type_id(gdf.geometry) + allowed_types = ( + shapely.GeometryType.POINT, + shapely.GeometryType.LINESTRING, + shapely.GeometryType.POLYGON, + ) + if not np.isin(geometry_id, allowed_types).all(): + raise TypeError( + "GeoDataFrame contains unsupported geometry types. Can only burn " + "Point, LineString, and Polygon geometries." + ) + points = gdf.loc[geometry_id == shapely.GeometryType.POINT] + lines = gdf.loc[geometry_id == shapely.GeometryType.LINESTRING] + polygons = gdf.loc[geometry_id == shapely.GeometryType.POLYGON] + + if column is not None: + values = gdf["column"].to_numpy() + else: + values = np.ones(len(gdf), dtype=float) + + output = np.full(like.n_face, fill) + if len(points) > 0: + _burn_points(points.geometry.to_numpy(), like, values, output) + if len(lines) > 0: + _burn_lines(lines.geometry.to_numpy(), like, values, output) + if len(polygons) > 0: + _burn_polygons(polygons.geometry.to_numpy(), like, values, all_touched, output) + + return xugrid.UgridDataArray( + obj=xr.DataArray(output, dims=[like.face_dimension], name=column), + grid=like, + ) From 7c9d4fa55022ad8b177bbc3f39eda8f5936f1b24 Mon Sep 17 00:00:00 2001 From: Huite Bootsma Date: Mon, 24 Jul 2023 09:57:42 +0200 Subject: [PATCH 02/10] Add burn_vector_geometry --- xugrid/ugrid/burn.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/xugrid/ugrid/burn.py b/xugrid/ugrid/burn.py index 76139e4ad..cec9c52f8 100644 --- a/xugrid/ugrid/burn.py +++ b/xugrid/ugrid/burn.py @@ -11,8 +11,6 @@ except ImportError: shapely = MissingOptionalModule("shapely") -ShapelyArray = np.ndarray - def _locate_polygon( xy: FloatArray, @@ -29,7 +27,7 @@ def _locate_polygon( def _burn_polygons( - polygons: ShapelyArray, + polygons: "geopandas.GeoSeries", # type: ignore # noqa like: "xugrid.Ugrid2d", values: np.ndarray, all_touched: bool, @@ -57,11 +55,11 @@ def _burn_polygons( """ exterior_coordinates = [ - shapely.get_coordinates(exterior) for exterior in polygons.geometry.exterior + shapely.get_coordinates(exterior) for exterior in polygons.exterior ] interior_coordinates = [ [shapely.get_coordinates(p_interior) for p_interior in p_interiors] - for p_interiors in polygons.geometry.interiors + for p_interiors in polygons.interiors ] if all_touched: @@ -95,7 +93,10 @@ def _burn_polygons( def _burn_points( - points: ShapelyArray, like: "xugrid.Ugrid2d", values: np.ndarray, output: FloatArray + points: "geopandas.GeoSeries", # type: ignore # noqa + like: "xugrid.Ugrid2d", + values: np.ndarray, + output: FloatArray, ) -> None: """ Simply searches the points in the ``like`` 2D topology. @@ -107,7 +108,10 @@ def _burn_points( def _burn_lines( - lines: ShapelyArray, like: "xugrid.Ugrid2d", values: np.ndarray, output: FloatArray + lines: "geopandas.GeoSeries", # type: ignore # noqa + like: "xugrid.Ugrid2d", + values: np.ndarray, + output: FloatArray, ) -> None: """ This algorithm breaks any linestring down into edges (two x, y points). We @@ -194,11 +198,11 @@ def burn_vector_geometry( output = np.full(like.n_face, fill) if len(points) > 0: - _burn_points(points.geometry.to_numpy(), like, values, output) + _burn_points(points.geometry, like, values, output) if len(lines) > 0: - _burn_lines(lines.geometry.to_numpy(), like, values, output) + _burn_lines(lines.geometry, like, values, output) if len(polygons) > 0: - _burn_polygons(polygons.geometry.to_numpy(), like, values, all_touched, output) + _burn_polygons(polygons.geometry, like, values, all_touched, output) return xugrid.UgridDataArray( obj=xr.DataArray(output, dims=[like.face_dimension], name=column), From 5d1da0bb5e78031ff46eef54738d907c3c2af627 Mon Sep 17 00:00:00 2001 From: Huite Bootsma Date: Wed, 26 Jul 2023 15:01:37 +0200 Subject: [PATCH 03/10] Add polygonize, make start on User Guide: vector conversion --- examples/vector_conversion.py | 92 +++++++++++++++++++++++++ xugrid/ugrid/polygonize.py | 123 ++++++++++++++++++++++++++++++++++ 2 files changed, 215 insertions(+) create mode 100644 examples/vector_conversion.py create mode 100644 xugrid/ugrid/polygonize.py diff --git a/examples/vector_conversion.py b/examples/vector_conversion.py new file mode 100644 index 000000000..9b66205c2 --- /dev/null +++ b/examples/vector_conversion.py @@ -0,0 +1,92 @@ +""" +Vector geometry conversion +========================== + +A great deal of geospatial data is available not in gridded form, but in +vectorized form: as points, lines, and polygons. In the Python data ecosystem, +these geometries and their associated data are generally represented by a +geopandas GeoDataFrame. + +Xugrid provides a number of utilities to use such data in combination with +unstructured grids. + +# +""" +# %% + +import geopandas as gpd +import matplotlib.pyplot as plt + +import xugrid as xu + +# %% +# We'll once again use the surface elevation data example. + +uda = xu.data.elevation_nl() +uda.ugrid.plot(vmin=-20, vmax=90, cmap="terrain") + +# %% +# Conversion to GeoDataFrame +# -------------------------- +# +# A UgridDataArray or UgridDataset can be directly converted to a GeoDataFrame, +# provided it only contains a spatial dimension (and not a dimension such as +# time). When calling +# ``.to_geodataframe``, a shapely Polygon is created for every face (cell). + +gdf = uda.ugrid.to_geodataframe() +gdf + +# %% +# Conversion from GeoDataFrame +# ---------------------------- +# +# We can also make the opposite conversion: we can create a UgridDataSet from a +# GeoDataFrame. +# +# .. note:: +# Not every GeoDataFrame can be converted to a ``xugrid`` representation! +# While an unstructured grid topology is generally always a valid collection +# of polygon geometries, not every collection of polygon geometries is valid +# grid: polygons should be convex and non-overlapping to create a valid +# unstructured grid. +# +# Hence, the ``.from_geodataframe()`` is primarily meant to create ``xugrid`` +# objects from data that were originally created as triangulation or +# unstructured grid, but that were converted to vector geometry form. + +back = xu.UgridDataset.from_geodataframe(gdf) +back + +# %% +# "Rasterizing", or "burning" vector geometries +# --------------------------------------------- +# +# Rasterizing is a common operation when working with raster and vector data. +# While we cannot name the operation "rasterizing" when we're dealing with +# unstructured grids, there is a clearly equivalent operation where we mark +# cells that are covered or touched by a polygon. +# +# In this example, we mark the cells that are covered by a certain province. + + +# %% + +provinces = gpd.read_file(r"c:\src\pandamesh\data\provinces-nl.geojson").to_crs(28992) +provinces["value"] = range(len(provinces)) +burned = xu.burn_vector_geometry(provinces, uda, column="value") +burned.ugrid.plot() + +# %% +# Polygonizing +# ------------ +# +# We can also do the opposite operation: turn collections of same-valued grid +# cells into vector polygons. Let's classify the elevation data into below and +# above the boundary of 5 m above mean sea level: + +classified = uda > 5 +polygonized = xu.polygonize(classified) +polygonized.plot(facecolor="none") + +# %% diff --git a/xugrid/ugrid/polygonize.py b/xugrid/ugrid/polygonize.py new file mode 100644 index 000000000..37a6fc4b3 --- /dev/null +++ b/xugrid/ugrid/polygonize.py @@ -0,0 +1,123 @@ +""" +The functions in this module serve to polygonize +""" +from typing import Tuple + +import numpy as np +from scipy import sparse + +from xugrid.constants import IntArray + + +def _bbox_area(bounds): + return (bounds[2] - bounds[0]) * (bounds[3] - bounds[1]) + + +def _classify( + fill_value: int, i: IntArray, j: IntArray, face_values: np.ndarray +) -> Tuple[int, IntArray]: + """ + Find out how many discrete polygons are created. Identify the connectivity, + such that we can select a single polygon afterwards. + + Parameters + ---------- + fill_value: int + Fill value in j: marks exterior edges. + i: np.ndarray of int + First face of the edge. + j: np.ndarray of int + Second face of the edge. + face_values: np.ndarray + + Returns + ------- + n_polygon: int + polygon_id: np.ndarray of int + """ + # Every face connects up to two faces. vi holds the values of the first face, + # vj holds the value of the second face. + vi = face_values[i] + vj = face_values[j] + # For labelling, only those parts of the mesh that have the same value + # should be connected with each other. + # Since we dropped NaN values before, we needn't worry about those. + is_connection = (j != fill_value) & (vi == vj) + i = i[is_connection] + j = j[is_connection] + ij = np.concatenate([i, j]) + ji = np.concatenate([j, i]) + coo_content = (ji, (ij, ji)) + coo_matrix = sparse.coo_matrix(coo_content) + # We can classify the grid faces using this (reduced) connectivity + return sparse.csgraph.connected_components(coo_matrix) + + +def polygonize(uda: "UgridDataArray") -> "gpd.GeoDataFrame": # type: ignore # noqa + """ + This function creates vector polygons for all connected regions of cells + (faces) in the Ugrid2d topology sharing a common value. + + The produced polygon edges will follow exactly the cell boundaries. When + the data consists of many unique values (e.g. elevation data), the result + will essentially be one polygon per face. In such cases, it is much more + efficient to use ``xugrid.UgridDataArray.to_geodataframe``, which directly + converts every cell to a polygon. This function is meant for data with + relatively few unique values such as classification results. + + Parameters + ---------- + uda: UgridDataArray + The DataArray should only contain the face dimension. Additional + dimensions, such as time, are not allowed. + + Returns + ------- + polygonized: GeoDataFrame + """ + + import geopandas as gpd + import shapely + + facedim = uda.ugrid.grid.face_dimension + if uda.dims != (facedim,): + raise ValueError( + "Cannot polygonize non-xy spatial dimensions. Expected only" + f"({facedim},), but received {uda.dims}." + ) + + # First remove the NaN values. These will not be polygonized anyway. + dropped = uda.dropna(dim=uda.ugrid.grid.face_dimension) + face_values = dropped.to_numpy() + grid = dropped.ugrid.grid + i, j = grid.edge_face_connectivity.T + fill_value = grid.fill_value + n_polygon, polygon_id = _classify(fill_value, i, j, face_values) + + # Now we identify for each label the subset of edges. These are the + # "exterior" edges: either the exterior edge of the mesh identified by a + # fill value, or by being connected to a cell with a different value. + coordinates = grid.node_coordinates + data_i = face_values[i] + vi = polygon_id[i] + vj = polygon_id[j] + boundary = (vi != vj) | (j == fill_value) + + polygons = [] + values = [] + for label in range(n_polygon): + keep = ((vi == label) | (vj == label)) & boundary + # The result of shapely polygonize is always a GeometryCollection. + # Holes are included twice: once as holes in the largest body, and once + # more as polygons. We are interested in the largest polygon, which we + # identify through its bounding box. + edges = grid.edge_node_connectivity[keep] + collection = shapely.polygonize(shapely.linestrings(coordinates[edges])) + polygon = max(collection.geoms, key=lambda x: _bbox_area(x.bounds)) + # Find the first True value in keep, use that to fetch the polygon + # value. + value = data_i[keep.argmax()] + polygons.append(polygon) + values.append(value) + + return gpd.GeoDataFrame({"values": values}, geometry=polygons) From cc0144dddb4c76e0c9f98528ff2bd2c43e744371 Mon Sep 17 00:00:00 2001 From: Huite Bootsma Date: Wed, 26 Jul 2023 17:18:52 +0200 Subject: [PATCH 04/10] Create a User Guide section on how to go from and to vector data --- docs/api.rst | 2 ++ docs/user_guide.rst | 1 + examples/vector_conversion.py | 62 ++++++++++++++++++++++++++--------- xugrid/__init__.py | 1 + xugrid/ugrid/burn.py | 2 +- xugrid/ugrid/polygonize.py | 30 ++++++++++------- 6 files changed, 70 insertions(+), 28 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 941d8b26a..ce4c7c73a 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -31,6 +31,8 @@ Top-level functions concat merge merge_partitions + burn_vector_geometry + polygonize UgridDataArray -------------- diff --git a/docs/user_guide.rst b/docs/user_guide.rst index d6fef0ba8..974023ca0 100644 --- a/docs/user_guide.rst +++ b/docs/user_guide.rst @@ -16,6 +16,7 @@ Information on specific methods and classes can be found in the API Reference. examples/plotting.rst examples/regridder_overview.rst examples/overlap_regridder.rst + examples/vector_conversion.rst examples/connectivity.rst examples/partitioning.rst sample_data/index.rst diff --git a/examples/vector_conversion.py b/examples/vector_conversion.py index 9b66205c2..62a5703e7 100644 --- a/examples/vector_conversion.py +++ b/examples/vector_conversion.py @@ -8,14 +8,11 @@ geopandas GeoDataFrame. Xugrid provides a number of utilities to use such data in combination with -unstructured grids. - -# +unstructured grids. These are demonstrated below. """ # %% import geopandas as gpd -import matplotlib.pyplot as plt import xugrid as xu @@ -38,27 +35,34 @@ gdf # %% +# We see that a GeoDataFrame with 5248 rows is created: one row for each face. +# # Conversion from GeoDataFrame # ---------------------------- # # We can also make the opposite conversion: we can create a UgridDataSet from a # GeoDataFrame. # +back = xu.UgridDataset.from_geodataframe(gdf) +back + +# %% # .. note:: # Not every GeoDataFrame can be converted to a ``xugrid`` representation! # While an unstructured grid topology is generally always a valid collection -# of polygon geometries, not every collection of polygon geometries is valid -# grid: polygons should be convex and non-overlapping to create a valid +# of polygon geometries, not every collection of polygon geometries is a +# valid grid: polygons should be convex and non-overlapping to create a valid # unstructured grid. # +# Secondly, each polygon fully owns its vertices (nodes), while the face of a +# UGRID topology shares its nodes with its neighbors. All the vertices of the +# polygons must therefore be exactly snapped together to form a connected +# mesh. +# # Hence, the ``.from_geodataframe()`` is primarily meant to create ``xugrid`` # objects from data that were originally created as triangulation or # unstructured grid, but that were converted to vector geometry form. - -back = xu.UgridDataset.from_geodataframe(gdf) -back - -# %% +# # "Rasterizing", or "burning" vector geometries # --------------------------------------------- # @@ -67,14 +71,29 @@ # unstructured grids, there is a clearly equivalent operation where we mark # cells that are covered or touched by a polygon. # -# In this example, we mark the cells that are covered by a certain province. +# In this example, we mark the faces that are covered by a certain province. +provinces = xu.data.provinces_nl.to_crs(28992) +provinces["value"] = range(len(provinces)) +burned = xu.burn_vector_geometry(provinces, uda, column="value") +burned.ugrid.plot() # %% +# This is a convenient way to create masks and such: -provinces = gpd.read_file(r"c:\src\pandamesh\data\provinces-nl.geojson").to_crs(28992) -provinces["value"] = range(len(provinces)) -burned = xu.burn_vector_geometry(provinces, uda, column="value") +utrecht = provinces[provinces["name"] == "Utrecht"] +burned = xu.burn_vector_geometry(utrecht, uda) +burned.ugrid.plot() + +# %% +# This function can also be used to burn points or lines into the faces of an +# unstructured grid. +# +# The exterior boundaries of the province polygons will provide +# a collection of linestrings that we can burn into the grid: + +lines = gpd.GeoDataFrame(geometry=provinces.exterior) +burned = xu.burn_vector_geometry(lines, uda) burned.ugrid.plot() # %% @@ -82,7 +101,7 @@ # ------------ # # We can also do the opposite operation: turn collections of same-valued grid -# cells into vector polygons. Let's classify the elevation data into below and +# faces into vector polygons. Let's classify the elevation data into below and # above the boundary of 5 m above mean sea level: classified = uda > 5 @@ -90,3 +109,14 @@ polygonized.plot(facecolor="none") # %% +# We see that the results consists of two large polygons, in which the +# triangles of the triangular grid have been merged to form a single polygon, +# and many smaller polygons, some of which correspond one-to-one to the +# triangles of the grid. +# +# .. note:: +# The produced polygon edges will follow exactly the face boundaries. When +# the data consists of many unique values (e.g. unbinned elevation data), the +# result will essentially be one polygon per face. In such cases, it is much +# more efficient to use ``xugrid.UgridDataArray.to_geodataframe``, which +# directly converts every face to a polygon. diff --git a/xugrid/__init__.py b/xugrid/__init__.py index 2ce0d433b..3048e1c60 100644 --- a/xugrid/__init__.py +++ b/xugrid/__init__.py @@ -25,6 +25,7 @@ from xugrid.ugrid.burn import burn_vector_geometry from xugrid.ugrid.conventions import UgridRolesAccessor from xugrid.ugrid.partitioning import merge_partitions +from xugrid.ugrid.polygonize import polygonize from xugrid.ugrid.snapping import snap_to_grid from xugrid.ugrid.ugrid1d import Ugrid1d from xugrid.ugrid.ugrid2d import Ugrid2d diff --git a/xugrid/ugrid/burn.py b/xugrid/ugrid/burn.py index cec9c52f8..e4b425542 100644 --- a/xugrid/ugrid/burn.py +++ b/xugrid/ugrid/burn.py @@ -192,7 +192,7 @@ def burn_vector_geometry( polygons = gdf.loc[geometry_id == shapely.GeometryType.POLYGON] if column is not None: - values = gdf["column"].to_numpy() + values = gdf[column].to_numpy() else: values = np.ones(len(gdf), dtype=float) diff --git a/xugrid/ugrid/polygonize.py b/xugrid/ugrid/polygonize.py index 37a6fc4b3..829ae5cf5 100644 --- a/xugrid/ugrid/polygonize.py +++ b/xugrid/ugrid/polygonize.py @@ -1,5 +1,5 @@ """ -The functions in this module serve to polygonize +The functions in this module serve to polygonize """ from typing import Tuple @@ -39,16 +39,20 @@ def _classify( # vj holds the value of the second face. vi = face_values[i] vj = face_values[j] + n = face_values.size # For labelling, only those parts of the mesh that have the same value # should be connected with each other. # Since we dropped NaN values before, we needn't worry about those. - is_connection = (j != fill_value) & (vi == vj) + is_connection = (i != fill_value) & (j != fill_value) & (vi == vj) i = i[is_connection] j = j[is_connection] ij = np.concatenate([i, j]) ji = np.concatenate([j, i]) coo_content = (ji, (ij, ji)) - coo_matrix = sparse.coo_matrix(coo_content) + # Make sure to explicitly set the matrix shape: otherwise, isolated + # elements witout any connection might disappear, and connected_components + # will not return a value for every face. + coo_matrix = sparse.coo_matrix(coo_content, shape=(n, n)) # We can classify the grid faces using this (reduced) connectivity return sparse.csgraph.connected_components(coo_matrix) @@ -59,11 +63,11 @@ def polygonize(uda: "UgridDataArray") -> "gpd.GeoDataFrame": # type: ignore # n (faces) in the Ugrid2d topology sharing a common value. The produced polygon edges will follow exactly the cell boundaries. When - the data consists of many unique values (e.g. elevation data), the result - will essentially be one polygon per face. In such cases, it is much more - efficient to use ``xugrid.UgridDataArray.to_geodataframe``, which directly - converts every cell to a polygon. This function is meant for data with - relatively few unique values such as classification results. + the data consists of many unique values (e.g. unbinned elevation data), the + result will essentially be one polygon per face. In such cases, it is much + more efficient to use ``xugrid.UgridDataArray.to_geodataframe``, which + directly converts every cell to a polygon. This function is meant for data + with relatively few unique values such as classification results. Parameters ---------- @@ -101,7 +105,11 @@ def polygonize(uda: "UgridDataArray") -> "gpd.GeoDataFrame": # type: ignore # n data_i = face_values[i] vi = polygon_id[i] vj = polygon_id[j] - boundary = (vi != vj) | (j == fill_value) + # Ensure that no result thas has been created by indexing with the + # fill_value remains. Since polygon_id starts counting a 0, we may use -1. + vi[i == fill_value] = -1 + vj[j == fill_value] = -1 + boundary = vi != vj polygons = [] values = [] @@ -109,8 +117,8 @@ def polygonize(uda: "UgridDataArray") -> "gpd.GeoDataFrame": # type: ignore # n keep = ((vi == label) | (vj == label)) & boundary # The result of shapely polygonize is always a GeometryCollection. # Holes are included twice: once as holes in the largest body, and once - # more as polygons. We are interested in the largest polygon, which we - # identify through its bounding box. + # more as polygons on their own. We are interested in the largest + # polygon, which we identify through its bounding box. edges = grid.edge_node_connectivity[keep] collection = shapely.polygonize(shapely.linestrings(coordinates[edges])) polygon = max(collection.geoms, key=lambda x: _bbox_area(x.bounds)) From 3b93de95ba2728fc9df3cb4361cf692c894ec65b Mon Sep 17 00:00:00 2001 From: Huite Bootsma Date: Thu, 27 Jul 2023 18:09:42 +0200 Subject: [PATCH 05/10] Add mapbox_earcut to dependencies --- .github/workflows/requirements/environment.yml | 1 + pyproject.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/requirements/environment.yml b/.github/workflows/requirements/environment.yml index 1e1c7c7c4..18b35af56 100644 --- a/.github/workflows/requirements/environment.yml +++ b/.github/workflows/requirements/environment.yml @@ -10,6 +10,7 @@ dependencies: - geopandas - flake8 - isort + - mapbox_earcut - netcdf4 - numba_celltree - pip diff --git a/pyproject.toml b/pyproject.toml index b6a24eadf..cfa73165b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,7 @@ Issues = "https://github.com/deltares/xugrid/issues" [project.optional-dependencies] all = [ 'geopandas', + 'mapbox_earcut', 'matplotlib', 'meshkernel', 'netcdf4', From c0ccae4ceb9be384768b027227cd1350417ceac3 Mon Sep 17 00:00:00 2001 From: Huite Bootsma Date: Thu, 27 Jul 2023 18:11:43 +0200 Subject: [PATCH 06/10] More efficient burn_polygons. Expand user guide, especially on all_touched option. --- docs/changelog.rst | 9 +++ examples/vector_conversion.py | 65 ++++++++++++++-- xugrid/ugrid/burn.py | 136 +++++++++++++++++++++------------- 3 files changed, 150 insertions(+), 60 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 74d7543fe..5492b600b 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -9,6 +9,15 @@ The format is based on `Keep a Changelog`_, and this project adheres to Unreleased ---------- +Added +~~~~~ + +- Added :func:`xugrid.burn_vector_geometries` to burn vector geometries in the + form of geopandas GeoDataFrames into a Ugrid2d topology. +- Added :func:`xugrid.polygonize` to create vector polygons for all connected + regions of a Ugrid2d topology sharing a common value. The result is a + geopandas GeoDataFrame. + [0.6.2] 2023-07-26 ------------------ diff --git a/examples/vector_conversion.py b/examples/vector_conversion.py index 62a5703e7..2c2300780 100644 --- a/examples/vector_conversion.py +++ b/examples/vector_conversion.py @@ -13,6 +13,7 @@ # %% import geopandas as gpd +import matplotlib.pyplot as plt import xugrid as xu @@ -32,7 +33,7 @@ # ``.to_geodataframe``, a shapely Polygon is created for every face (cell). gdf = uda.ugrid.to_geodataframe() -gdf +print(gdf) # %% # We see that a GeoDataFrame with 5248 rows is created: one row for each face. @@ -73,7 +74,7 @@ # # In this example, we mark the faces that are covered by a certain province. -provinces = xu.data.provinces_nl.to_crs(28992) +provinces = xu.data.provinces_nl().to_crs(28992) provinces["value"] = range(len(provinces)) burned = xu.burn_vector_geometry(provinces, uda, column="value") burned.ugrid.plot() @@ -83,7 +84,49 @@ utrecht = provinces[provinces["name"] == "Utrecht"] burned = xu.burn_vector_geometry(utrecht, uda) -burned.ugrid.plot() +xmin, ymin, xmax, ymax = utrecht.buffer(10_000).total_bounds + +fig, ax = plt.subplots() +burned.ugrid.plot(ax=ax) +burned.ugrid.plot.line(ax=ax, edgecolor="black", linewidth=0.5) +utrecht.plot(ax=ax, edgecolor="red", facecolor="none", linewidth=1.5) +ax.set_xlim(xmin, xmax) +ax.set_ylim(ymin, ymax) + +# %% +# By default, ``burn_vector_geometry`` will only include grid faces whose +# centroid are located in a polygon. We can also mark all intersected faces +# by setting ``all_touched=True``: + +burned = xu.burn_vector_geometry(utrecht, uda, all_touched=True) + +fig, ax = plt.subplots() +burned.ugrid.plot(ax=ax) +burned.ugrid.plot.line(ax=ax, edgecolor="black", linewidth=0.5) +utrecht.plot(ax=ax, edgecolor="red", facecolor="none", linewidth=1.5) +ax.set_xlim(xmin, xmax) +ax.set_ylim(ymin, ymax) + +# %% +# Note that ``all_touched=True`` is less suitable when differently valued +# polygons are present that share borders. While the centroid of a face is +# contained by only a single polygon, the area of the polygon may be located +# in more than one polygon. In this case, the results of each polygon will +# overwrite each other (in arbitrary order). + +by_centroid = xu.burn_vector_geometry(provinces, uda, column="value") +by_touch = xu.burn_vector_geometry(provinces, uda, column="value", all_touched=True) + +fig, axes = plt.subplots(ncols=2, figsize=(10, 5)) +by_centroid.ugrid.plot(ax=axes[0], add_colorbar=False) +by_touch.ugrid.plot(ax=axes[1], add_colorbar=False) + +for ax, title in zip(axes, ("centroid", "all touched")): + burned.ugrid.plot.line(ax=ax, edgecolor="black", linewidth=0.5) + utrecht.plot(ax=ax, edgecolor="red", facecolor="none", linewidth=1.5) + ax.set_xlim(xmin, xmax) + ax.set_ylim(ymin, ymax) + ax.set_title(title) # %% # This function can also be used to burn points or lines into the faces of an @@ -94,7 +137,13 @@ lines = gpd.GeoDataFrame(geometry=provinces.exterior) burned = xu.burn_vector_geometry(lines, uda) -burned.ugrid.plot() + +fig, ax = plt.subplots() +burned.ugrid.plot(ax=ax) +burned.ugrid.plot.line(ax=ax, edgecolor="black", linewidth=0.5) +provinces.plot(ax=ax, edgecolor="red", facecolor="none", linewidth=1.5) +ax.set_xlim(xmin, xmax) +ax.set_ylim(ymin, ymax) # %% # Polygonizing @@ -111,12 +160,12 @@ # %% # We see that the results consists of two large polygons, in which the # triangles of the triangular grid have been merged to form a single polygon, -# and many smaller polygons, some of which correspond one-to-one to the +# and many smaller polygons, some of which correspond one to one to the # triangles of the grid. # # .. note:: # The produced polygon edges will follow exactly the face boundaries. When # the data consists of many unique values (e.g. unbinned elevation data), the -# result will essentially be one polygon per face. In such cases, it is much -# more efficient to use ``xugrid.UgridDataArray.to_geodataframe``, which -# directly converts every face to a polygon. +# result will essentially be one polygon per face. In such cases, it is more +# efficient to use ``xugrid.UgridDataArray.to_geodataframe``, which directly +# converts every face to a polygon. diff --git a/xugrid/ugrid/burn.py b/xugrid/ugrid/burn.py index e4b425542..62b74695e 100644 --- a/xugrid/ugrid/burn.py +++ b/xugrid/ugrid/burn.py @@ -1,10 +1,18 @@ from typing import List, Union +import numba as nb import numpy as np import xarray as xr +from numba_celltree.constants import Point, Triangle +from numba_celltree.geometry_utils import ( + as_point, + as_triangle, + cross_product, + to_vector, +) import xugrid -from xugrid.constants import BoolArray, FloatArray, MissingOptionalModule +from xugrid.constants import FloatArray, IntArray, MissingOptionalModule try: import shapely @@ -12,18 +20,83 @@ shapely = MissingOptionalModule("shapely") +@nb.njit(inline="always") +def point_in_triangle(p: Point, triangle: Triangle) -> bool: + """Unrolled half-plane check.""" + # TODO: move this in numba_celltree instead? + a = cross_product(to_vector(triangle.a, triangle.b), to_vector(triangle.a, p)) > 0 + b = cross_product(to_vector(triangle.b, triangle.c), to_vector(triangle.b, p)) > 0 + c = cross_product(to_vector(triangle.c, triangle.a), to_vector(triangle.c, p)) > 0 + return (a == b) and (b == c) + + +@nb.njit(inline="always", parallel=True, cache=True) +def points_in_triangles( + points: FloatArray, + face_indices: IntArray, + faces: IntArray, + vertices: FloatArray, +): + # TODO: move this in numba_celltree instead? + n_points = len(points) + inside = np.empty(n_points, dtype=np.bool_) + for i in nb.prange(n_points): + face_index = face_indices[i] + face = faces[face_index] + triangle = as_triangle(vertices, face) + point = as_point(points[i]) + inside[i] = point_in_triangle(point, triangle) + return inside + + def _locate_polygon( - xy: FloatArray, + grid: "xu.Ugrid2d", # type: ignore # noqa exterior: FloatArray, interiors: List[FloatArray], -) -> BoolArray: + all_touched: bool, +) -> IntArray: + """ + This algorithm burns polygon vector geometries in a 2d topology by: + + * Extracting the exterior and interiors (holes) coordinates from the + polygons. + * Breaking every polygon down into a triangles using an "earcut" algorithm. + * Searching the grid for these triangles. + + Due to the use of the separating axes theorem, _locate_faces finds all + intersecting triangles, including those who only touch the edge. + To enable all_touched=False, we have to search the centroids of candidates + in the intersecting triangles. + + Parameters + ---------- + grid: Ugrid2d + exterior: FloatArray + Exterior of the polygon. + interiors: List[FloatArray] + Interior holes of the polygon. + all_touched: bool + Whether to include include cells whose centroid falls inside, of every + cell that is touched. + """ + import mapbox_earcut rings = np.cumsum([len(exterior)] + [len(interior) for interior in interiors]) vertices = np.vstack([exterior] + interiors).astype(np.float64) triangles = mapbox_earcut.triangulate_float64(vertices, rings).reshape((-1, 3)) - grid = xugrid.Ugrid2d(*vertices.T, -1, triangles) - return grid.locate_points(xy) != -1 + triangle_indices, grid_indices = grid.celltree._locate_faces(vertices, triangles) + if all_touched: + return grid_indices + else: + centroids = grid.centroids[grid_indices] + inside = points_in_triangles( + points=centroids, + face_indices=triangle_indices, + faces=triangles, + vertices=vertices, + ) + return grid_indices[inside] def _burn_polygons( @@ -33,27 +106,6 @@ def _burn_polygons( all_touched: bool, output: FloatArray, ) -> None: - """ - This algorithm burns polygon vector geometries in a 2d topology by: - - * Extracting the exterior and interiors (holes) coordinates from the - polygons. - * Breaking every polygon down into a triangles using an "earcut" algorithm. - * Building a Ugrid2d from the triangles. - - When all_touched=False: - - * The grid created from the polygon is searched for every centroid of the - ``like`` grid. - - When all_touched=True: - - * The grid created from the polygon is searched for every node of the - ``like`` grid. - * If any of the associated nodes of a face is found in the polygon, the - value is burned into the face. - - """ exterior_coordinates = [ shapely.get_coordinates(exterior) for exterior in polygons.exterior ] @@ -61,33 +113,13 @@ def _burn_polygons( [shapely.get_coordinates(p_interior) for p_interior in p_interiors] for p_interiors in polygons.interiors ] + to_burn = np.empty(like.n_face, dtype=bool) - if all_touched: - # Pre-allocate work arrays so we don't have re-allocate for every - # polygon. - to_burn2d = np.empty_like(like.face_node_connectivity, dtype=bool) - to_burn = np.empty(like.n_face, dtype=bool) - # These data are static: - mask = like.face_node_connectivity == like.fill_value - xy = like.node_coordinates - for exterior, interiors, value in zip( - exterior_coordinates, interior_coordinates, values - ): - location = _locate_polygon(xy, exterior, interiors) - # Equal to: to_burn2d = location[like.face_node_connectivity] - location.take(like.face_node_connectivity, out=to_burn2d) - to_burn2d[mask] = False - # Equal to: to_burn = to_burn2d.any(axis=1) - np.bitwise_or.reduce(to_burn2d, axis=1, out=to_burn) - output[to_burn] = value - - else: - xy = like.centroids - for exterior, interiors, value in zip( - exterior_coordinates, interior_coordinates, values - ): - to_burn = _locate_polygon(xy, exterior, interiors) - output[to_burn] = value + for exterior, interiors, value in zip( + exterior_coordinates, interior_coordinates, values + ): + to_burn = _locate_polygon(like, exterior, interiors, all_touched) + output[to_burn] = value return From 9cdecbb9ed9377fd7b2b031e4c3e68a8bfac28fc Mon Sep 17 00:00:00 2001 From: Huite Bootsma Date: Fri, 28 Jul 2023 19:18:28 +0200 Subject: [PATCH 07/10] Add tests for burn_vector_geometry --- examples/vector_conversion.py | 2 +- tests/test_burn.py | 134 ++++++++++++++++++++++++++++++++++ xugrid/ugrid/burn.py | 62 ++++++++++++++-- 3 files changed, 189 insertions(+), 9 deletions(-) create mode 100644 tests/test_burn.py diff --git a/examples/vector_conversion.py b/examples/vector_conversion.py index 2c2300780..40779f9af 100644 --- a/examples/vector_conversion.py +++ b/examples/vector_conversion.py @@ -112,7 +112,7 @@ # polygons are present that share borders. While the centroid of a face is # contained by only a single polygon, the area of the polygon may be located # in more than one polygon. In this case, the results of each polygon will -# overwrite each other (in arbitrary order). +# overwrite each other. by_centroid = xu.burn_vector_geometry(provinces, uda, column="value") by_touch = xu.burn_vector_geometry(provinces, uda, column="value", all_touched=True) diff --git a/tests/test_burn.py b/tests/test_burn.py new file mode 100644 index 000000000..8d0c8ab79 --- /dev/null +++ b/tests/test_burn.py @@ -0,0 +1,134 @@ +import os + +os.environ["NUMBA_DISABLE_JIT"] = "1" +import geopandas as gpd +import numpy as np +import pytest +import shapely +from numba_celltree.constants import Point, Triangle + +import xugrid as xu +from xugrid.ugrid import burn + + +@pytest.fixture(scope="function") +def grid(): + """Three by three squares""" + x = np.arange(0.0, 4.0) + y = np.arange(0.0, 4.0) + node_y, node_x = [a.ravel() for a in np.meshgrid(y, x, indexing="ij")] + nx = ny = 3 + # Define the first vertex of every face, v. + v = (np.add.outer(np.arange(nx), nx * np.arange(ny)) + np.arange(ny)).T.ravel() + faces = np.column_stack((v, v + 1, v + nx + 2, v + nx + 1)) + return xu.Ugrid2d(node_x, node_y, -1, faces) + + +def test_point_in_triangle(): + a = Point(0.1, 0.1) + b = Point(0.7, 0.5) + c = Point(0.4, 0.7) + # Should work for clockwise and ccw orientation. + triangle = Triangle(a, b, c) + rtriangle = Triangle(c, b, a) + p = Point(0.5, 0.5) + assert burn.point_in_triangle(p, triangle) + assert burn.point_in_triangle(p, rtriangle) + + p = Point(0.0, 0.0) + assert not burn.point_in_triangle(p, triangle) + assert not burn.point_in_triangle(p, rtriangle) + + +def test_points_in_triangle(): + vertices = np.array( + [ + [0.0, 0.0], + [1.0, 0.0], + [1.0, 1.0], + [2.0, 0.0], + ] + ) + faces = np.array( + [ + [0, 1, 2], + [1, 3, 2], + ] + ) + points = np.array( + [ + [-0.5, 0.25], + [0.0, 0.0], # on vertex + [0.5, 0.5], # on edge + [0.5, 0.25], + [1.5, 0.25], + [2.5, 0.25], + ] + ) + face_indices = np.array([0, 0, 0, 0, 1, 1]) + expected = [False, True, True, True, True, False] + actual = burn.points_in_triangles( + points=points, + face_indices=face_indices, + faces=faces, + vertices=vertices, + ) + assert np.array_equal(expected, actual) + + +def test_locate_polygon(grid): + polygon = shapely.Polygon(shell=[(0.5, 0.5), (2.5, 0.5), (0.5, 2.5)]) + exterior = shapely.get_coordinates(polygon.exterior) + interiors = [shapely.get_coordinates(interior) for interior in polygon.interiors] + + actual = burn._locate_polygon(grid, exterior, interiors, all_touched=False) + assert np.array_equal(np.sort(actual), [0, 1, 2, 3, 4, 6]) + actual = burn._locate_polygon(grid, exterior, interiors, all_touched=True) + assert np.array_equal(np.sort(actual), [0, 1, 2, 3, 4, 6]) + + polygon = shapely.Polygon(shell=[(0.75, 0.5), (2.5, 0.5), (0.75, 2.5)]) + exterior = shapely.get_coordinates(polygon.exterior) + interiors = [shapely.get_coordinates(interior) for interior in polygon.interiors] + + actual = burn._locate_polygon(grid, exterior, interiors, all_touched=False) + assert np.array_equal(np.sort(actual), [1, 2, 4]) + actual = burn._locate_polygon(grid, exterior, interiors, all_touched=True) + assert np.array_equal(np.sort(actual), [0, 1, 2, 3, 4, 5, 6, 7]) + + +def test_locate_polygon_with_hole(grid): + # The hole omits the centroid at (1.5, 1.5) + polygon = shapely.Polygon( + shell=[(0.7, 0.7), (2.3, 0.7), (1.5, 2.3)], + holes=[[(1.4, 1.6), (1.5, 1.4), (1.6, 1.6)]], + ) + exterior = shapely.get_coordinates(polygon.exterior) + interiors = [shapely.get_coordinates(interior) for interior in polygon.interiors] + + actual = burn._locate_polygon(grid, exterior, interiors, all_touched=False) + assert np.array_equal(actual, []) + actual = burn._locate_polygon(grid, exterior, interiors, all_touched=True) + assert np.array_equal(np.unique(actual), [0, 1, 2, 3, 4, 5, 7]) + + +def test_burn_polygons(grid): + output = np.full(grid.n_face, np.nan) + values = [0, 1] + polygons = gpd.GeoSeries( + [ + shapely.Polygon(shell=[(0.0, 0.0), (2.0, 0.0), (2.0, 2.0), (0.0, 2.0)]), + shapely.Polygon( + shell=[ + (0.0, 2.0), + (2.0, 2.0), + (2.0, 0.0), + (3.0, 0.0), + (3.0, 3.0), + (0.0, 3.0), + ] + ), + ] + ) + burn._burn_polygons(polygons, grid, values, all_touched=False, output=output) + expected = np.array([0, 0, 1, 0, 0, 1, 1, 1, 1]) + assert np.allclose(output, expected) diff --git a/xugrid/ugrid/burn.py b/xugrid/ugrid/burn.py index 62b74695e..ff1cf800f 100644 --- a/xugrid/ugrid/burn.py +++ b/xugrid/ugrid/burn.py @@ -3,7 +3,7 @@ import numba as nb import numpy as np import xarray as xr -from numba_celltree.constants import Point, Triangle +from numba_celltree.constants import TOLERANCE_ON_EDGE, Point, Triangle, Vector from numba_celltree.geometry_utils import ( as_point, as_triangle, @@ -21,13 +21,59 @@ @nb.njit(inline="always") -def point_in_triangle(p: Point, triangle: Triangle) -> bool: - """Unrolled half-plane check.""" - # TODO: move this in numba_celltree instead? - a = cross_product(to_vector(triangle.a, triangle.b), to_vector(triangle.a, p)) > 0 - b = cross_product(to_vector(triangle.b, triangle.c), to_vector(triangle.b, p)) > 0 - c = cross_product(to_vector(triangle.c, triangle.a), to_vector(triangle.c, p)) > 0 - return (a == b) and (b == c) +def on_edge(p: Point, a: Point, ab: Vector, twice_area: float): + if abs(twice_area) < TOLERANCE_ON_EDGE: + if ab.x != 0.0: + t = (p.x - a.x) / ab.x + elif ab.y != 0.0: + t = (p.y - a.y) / ab.y + else: + return False + if 0 <= t <= 1: + # It's on the edge. + return True + return False + + +@nb.njit(inline="always") +def on_edge(p: Point, a: Point, b: Point, ab: Vector, twice_area: float): + if abs(twice_area) < TOLERANCE_ON_EDGE: + if abs(ab.x) >= abs(ab.y): + if ab.x > 0: + return a.x <= p.x and p.x <= b.x + return b.x <= p.x and p.x <= a.x + else: + if ab.y > 0: + return a.y <= p.y and p.y <= b.y + return b.y <= p.y and p.y <= a.y + + return False + + +@nb.njit(inline="always") +def point_in_triangle(p: Point, t: Triangle) -> bool: + ap = to_vector(t.a, p) + bp = to_vector(t.b, p) + cp = to_vector(t.c, p) + ab = to_vector(t.a, t.b) + bc = to_vector(t.b, t.c) + ca = to_vector(t.c, t.a) + # Do a half plane check. + A = cross_product(ab, ap) + B = cross_product(bc, bp) + C = cross_product(ca, cp) + signA = A > 0 + signB = B > 0 + signC = C > 0 + if (signA == signB) and (signB == signC): + return True + if ( + on_edge(p, t.a, t.b, ab, A) + or on_edge(p, t.b, t.c, bc, B) + or on_edge(p, t.c, t.a, ca, C) + ): + return True + return False @nb.njit(inline="always", parallel=True, cache=True) From 086457931cb14108f20c8789dcfe8878aa5c936c Mon Sep 17 00:00:00 2001 From: Huite Bootsma Date: Mon, 31 Jul 2023 15:04:51 +0200 Subject: [PATCH 08/10] Finish test_burn, test_polygonize, some fixes --- tests/test_burn.py | 146 ++++++++++++++++++++++++++++++++----- tests/test_polygonize.py | 49 +++++++++++++ xugrid/ugrid/burn.py | 81 ++++++++++---------- xugrid/ugrid/polygonize.py | 2 +- 4 files changed, 219 insertions(+), 59 deletions(-) create mode 100644 tests/test_polygonize.py diff --git a/tests/test_burn.py b/tests/test_burn.py index 8d0c8ab79..74dd87401 100644 --- a/tests/test_burn.py +++ b/tests/test_burn.py @@ -1,11 +1,9 @@ -import os - -os.environ["NUMBA_DISABLE_JIT"] = "1" import geopandas as gpd import numpy as np import pytest import shapely from numba_celltree.constants import Point, Triangle +from shapely.geometry import Polygon import xugrid as xu from xugrid.ugrid import burn @@ -24,6 +22,61 @@ def grid(): return xu.Ugrid2d(node_x, node_y, -1, faces) +@pytest.fixture(scope="function") +def points_and_values(): + xy = np.array( + [ + [0.5, 0.5], + [1.5, 0.5], + [2.5, 2.5], + ] + ) + points = gpd.points_from_xy(*xy.T) + values = np.array([0.0, 1.0, 3.0]) + return points, values + + +@pytest.fixture(scope="function") +def lines_and_values(): + xy = np.array( + [ + [0.5, 0.5], + [2.5, 0.5], + [1.2, 1.5], + [1.8, 1.5], + [0.2, 2.2], + [0.8, 2.8], + [1.2, 2.2], + [1.8, 2.8], + ] + ) + indices = np.array([0, 0, 1, 1, 2, 2, 2, 2]) + values = np.array([0, 1, 2]) + lines = gpd.GeoSeries(shapely.linestrings(xy, indices=indices)) + return lines, values + + +@pytest.fixture(scope="function") +def polygons_and_values(): + values = [0, 1] + polygons = gpd.GeoSeries( + [ + shapely.Polygon(shell=[(0.0, 0.0), (2.0, 0.0), (2.0, 2.0), (0.0, 2.0)]), + shapely.Polygon( + shell=[ + (0.0, 2.0), + (2.0, 2.0), + (2.0, 0.0), + (3.0, 0.0), + (3.0, 3.0), + (0.0, 3.0), + ] + ), + ] + ) + return polygons, values + + def test_point_in_triangle(): a = Point(0.1, 0.1) b = Point(0.7, 0.5) @@ -111,24 +164,77 @@ def test_locate_polygon_with_hole(grid): assert np.array_equal(np.unique(actual), [0, 1, 2, 3, 4, 5, 7]) -def test_burn_polygons(grid): +def test_burn_polygons(grid, polygons_and_values): + polygons, values = polygons_and_values output = np.full(grid.n_face, np.nan) - values = [0, 1] - polygons = gpd.GeoSeries( - [ - shapely.Polygon(shell=[(0.0, 0.0), (2.0, 0.0), (2.0, 2.0), (0.0, 2.0)]), - shapely.Polygon( - shell=[ - (0.0, 2.0), - (2.0, 2.0), - (2.0, 0.0), - (3.0, 0.0), - (3.0, 3.0), - (0.0, 3.0), - ] - ), - ] - ) burn._burn_polygons(polygons, grid, values, all_touched=False, output=output) expected = np.array([0, 0, 1, 0, 0, 1, 1, 1, 1]) assert np.allclose(output, expected) + + +def test_burn_points(grid, points_and_values): + points, values = points_and_values + output = np.full(grid.n_face, -1.0) + + burn._burn_points(points, grid, values, output=output) + expected = np.array([0, 1, -1, -1, -1, -1, -1, -1, 3]) + assert np.allclose(output, expected) + + +def test_burn_lines(grid, lines_and_values): + lines, values = lines_and_values + output = np.full(grid.n_face, -1.0) + + burn._burn_lines(lines, grid, values, output=output) + expected = np.array([0, 0, 0, -1, 1, -1, 2, 2, -1]) + assert np.allclose(output, expected) + + +def test_burn_vector_geometry__errors(grid, points_and_values): + with pytest.raises(TypeError, match="gdf must be GeoDataFrame"): + xu.burn_vector_geometry(0, grid) + + points, values = points_and_values + gdf = gpd.GeoDataFrame({"values": values}, geometry=points) + with pytest.raises(TypeError, match="Like must be Ugrid2d, UgridDataArray"): + xu.burn_vector_geometry(gdf, gdf) + + # This seems like the easiest way to generate a multi-polygon inside a + # GeoDataFrame, since it won't initialize with a multi-polygon. + p1 = Polygon([(0, 0), (1, 0), (1, 1)]) + p2 = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]) + p3 = Polygon([(2, 0), (3, 0), (3, 1), (2, 1)]) + gdf = gpd.GeoDataFrame({"values": [0, 0, 0]}, geometry=[p1, p2, p3]).dissolve( + by="values" + ) + with pytest.raises( + TypeError, match="GeoDataFrame contains unsupported geometry types" + ): + xu.burn_vector_geometry(gdf, grid) + + +def test_burn_vector_geometry( + grid, points_and_values, lines_and_values, polygons_and_values +): + polygons, poly_values = polygons_and_values + gdf = gpd.GeoDataFrame({"values": poly_values}, geometry=polygons) + actual = xu.burn_vector_geometry(gdf, grid) + assert isinstance(actual, xu.UgridDataArray) + assert np.allclose(actual.to_numpy(), 1) + + expected = np.array([0, 0, 1, 0, 0, 1, 1, 1, 1]) + actual = xu.burn_vector_geometry(gdf, grid, column="values") + assert np.allclose(actual.to_numpy(), expected) + + points, point_values = points_and_values + lines, line_values = lines_and_values + line_values += 10 + point_values += 20 + values = np.concatenate([poly_values, line_values, point_values]) + geometry = np.concatenate( + [polygons.to_numpy(), lines.to_numpy(), points.to_numpy()] + ) + gdf = gpd.GeoDataFrame({"values": values}, geometry=geometry) + actual = xu.burn_vector_geometry(gdf, grid, column="values") + expected = np.array([20.0, 21.0, 10.0, 0.0, 11.0, 1.0, 12.0, 12.0, 23.0]) + assert np.allclose(actual.to_numpy(), expected) diff --git a/tests/test_polygonize.py b/tests/test_polygonize.py new file mode 100644 index 000000000..b9edd109f --- /dev/null +++ b/tests/test_polygonize.py @@ -0,0 +1,49 @@ +import geopandas as gpd +import numpy as np +import pytest +import xarray as xr + +import xugrid as xu + + +@pytest.fixture(scope="function") +def grid(): + """Three by three squares""" + x = np.arange(0.0, 4.0) + y = np.arange(0.0, 4.0) + node_y, node_x = [a.ravel() for a in np.meshgrid(y, x, indexing="ij")] + nx = ny = 3 + # Define the first vertex of every face, v. + v = (np.add.outer(np.arange(nx), nx * np.arange(ny)) + np.arange(ny)).T.ravel() + faces = np.column_stack((v, v + 1, v + nx + 2, v + nx + 1)) + return xu.Ugrid2d(node_x, node_y, -1, faces) + + +def test_polygonize__errors(grid): + uda = xu.UgridDataArray( + xr.DataArray(np.ones(grid.n_edge), dims=[grid.edge_dimension]), grid=grid + ) + with pytest.raises(ValueError, match="Cannot polygonize non-face dimension"): + xu.polygonize(uda) + + uda = xu.UgridDataArray( + xr.DataArray(np.ones((3, grid.n_face)), dims=["layer", grid.face_dimension]), + grid=grid, + ) + with pytest.raises(ValueError, match="Cannot polygonize non-face dimension"): + xu.polygonize(uda) + + +def test_polygonize(grid): + a = np.array([0, 0, 0, 1, 1, 1, 0, 0, 0]) + uda = xu.UgridDataArray(xr.DataArray(a, dims=grid.face_dimension), grid) + actual = xu.polygonize(uda) + assert isinstance(actual, gpd.GeoDataFrame) + assert len(actual) == 3 + + # With a hole in the 1-valued polygon. + a = np.array([1, 1, 1, 1, 0, 1, 1, 1, 1]) + uda = xu.UgridDataArray(xr.DataArray(a, dims=grid.face_dimension), grid) + actual = xu.polygonize(uda) + assert isinstance(actual, gpd.GeoDataFrame) + assert len(actual) == 2 diff --git a/xugrid/ugrid/burn.py b/xugrid/ugrid/burn.py index ff1cf800f..76f8f2ba8 100644 --- a/xugrid/ugrid/burn.py +++ b/xugrid/ugrid/burn.py @@ -3,7 +3,7 @@ import numba as nb import numpy as np import xarray as xr -from numba_celltree.constants import TOLERANCE_ON_EDGE, Point, Triangle, Vector +from numba_celltree.constants import TOLERANCE_ON_EDGE, Point, Triangle from numba_celltree.geometry_utils import ( as_point, as_triangle, @@ -21,33 +21,27 @@ @nb.njit(inline="always") -def on_edge(p: Point, a: Point, ab: Vector, twice_area: float): - if abs(twice_area) < TOLERANCE_ON_EDGE: - if ab.x != 0.0: - t = (p.x - a.x) / ab.x - elif ab.y != 0.0: - t = (p.y - a.y) / ab.y - else: - return False - if 0 <= t <= 1: - # It's on the edge. - return True - return False - +def in_bounds(p: Point, a: Point, b: Point) -> bool: + """ + Check whether point p falls within the bounding box created by a and b + (after we've checked the size of the cross product). -@nb.njit(inline="always") -def on_edge(p: Point, a: Point, b: Point, ab: Vector, twice_area: float): - if abs(twice_area) < TOLERANCE_ON_EDGE: - if abs(ab.x) >= abs(ab.y): - if ab.x > 0: - return a.x <= p.x and p.x <= b.x - return b.x <= p.x and p.x <= a.x - else: - if ab.y > 0: - return a.y <= p.y and p.y <= b.y - return b.y <= p.y and p.y <= a.y + However, we must take into account that a line may be either vertical + (dx=0) or horizontal (dy=0) and only evaluate the non-zero value. - return False + If the area created by p, a, b is tiny AND p is within the bounds of a and + b, the point lies very close to the edge. + """ + dx = b.x - a.x + dy = b.y - a.y + if abs(dx) >= abs(dy): + if dx > 0: + return a.x <= p.x and p.x <= b.x + return b.x <= p.x and p.x <= a.x + else: + if dy > 0: + return a.y <= p.y and p.y <= b.y + return b.y <= p.y and p.y <= a.y @nb.njit(inline="always") @@ -67,12 +61,18 @@ def point_in_triangle(p: Point, t: Triangle) -> bool: signC = C > 0 if (signA == signB) and (signB == signC): return True + + # Test whether p is located on/very close to edges. if ( - on_edge(p, t.a, t.b, ab, A) - or on_edge(p, t.b, t.c, bc, B) - or on_edge(p, t.c, t.a, ca, C) + (abs(A) < TOLERANCE_ON_EDGE) + and in_bounds(p, t.a, t.b) + or (abs(B) < TOLERANCE_ON_EDGE) + and in_bounds(p, t.b, t.c) + or (abs(C) < TOLERANCE_ON_EDGE) + and in_bounds(p, t.c, t.a) ): return True + return False @@ -83,7 +83,7 @@ def points_in_triangles( faces: IntArray, vertices: FloatArray, ): - # TODO: move this in numba_celltree instead? + # TODO: move this into numba_celltree instead? n_points = len(points) inside = np.empty(n_points, dtype=np.bool_) for i in nb.prange(n_points): @@ -265,22 +265,27 @@ def burn_vector_geometry( "GeoDataFrame contains unsupported geometry types. Can only burn " "Point, LineString, and Polygon geometries." ) + points = gdf.loc[geometry_id == shapely.GeometryType.POINT] lines = gdf.loc[geometry_id == shapely.GeometryType.LINESTRING] polygons = gdf.loc[geometry_id == shapely.GeometryType.POLYGON] - if column is not None: - values = gdf[column].to_numpy() + if column is None: + point_values = np.ones(len(points), dtype=float) + line_values = np.ones(len(lines), dtype=float) + poly_values = np.ones(len(polygons), dtype=float) else: - values = np.ones(len(gdf), dtype=float) + point_values = points[column].to_numpy() + line_values = lines[column].to_numpy() + poly_values = polygons[column].to_numpy() output = np.full(like.n_face, fill) - if len(points) > 0: - _burn_points(points.geometry, like, values, output) - if len(lines) > 0: - _burn_lines(lines.geometry, like, values, output) if len(polygons) > 0: - _burn_polygons(polygons.geometry, like, values, all_touched, output) + _burn_polygons(polygons.geometry, like, poly_values, all_touched, output) + if len(lines) > 0: + _burn_lines(lines.geometry, like, line_values, output) + if len(points) > 0: + _burn_points(points.geometry, like, point_values, output) return xugrid.UgridDataArray( obj=xr.DataArray(output, dims=[like.face_dimension], name=column), diff --git a/xugrid/ugrid/polygonize.py b/xugrid/ugrid/polygonize.py index 829ae5cf5..6fb4d3b08 100644 --- a/xugrid/ugrid/polygonize.py +++ b/xugrid/ugrid/polygonize.py @@ -86,7 +86,7 @@ def polygonize(uda: "UgridDataArray") -> "gpd.GeoDataFrame": # type: ignore # n facedim = uda.ugrid.grid.face_dimension if uda.dims != (facedim,): raise ValueError( - "Cannot polygonize non-xy spatial dimensions. Expected only" + "Cannot polygonize non-face dimensions. Expected only" f"({facedim},), but received {uda.dims}." ) From 5475383382885c6dc5bb34fd526486f521004f4b Mon Sep 17 00:00:00 2001 From: Huite Bootsma Date: Mon, 31 Jul 2023 15:36:24 +0200 Subject: [PATCH 09/10] Also allow LinearRing, better error --- xugrid/ugrid/burn.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/xugrid/ugrid/burn.py b/xugrid/ugrid/burn.py index 76f8f2ba8..fcd92c615 100644 --- a/xugrid/ugrid/burn.py +++ b/xugrid/ugrid/burn.py @@ -20,6 +20,13 @@ shapely = MissingOptionalModule("shapely") +POINT = shapely.GeometryType.POINT +LINESTRING = shapely.GeometryType.LINESTRING +LINEARRING = shapely.GeometryType.LINEARRING +POLYGON = shapely.GeometryType.POLYGON +GEOM_NAMES = {v: k for k, v in shapely.GeometryType.__members__.items()} + + @nb.njit(inline="always") def in_bounds(p: Point, a: Point, b: Point) -> bool: """ @@ -255,20 +262,20 @@ def burn_vector_geometry( f"received: {type(like).__name__}" ) geometry_id = shapely.get_type_id(gdf.geometry) - allowed_types = ( - shapely.GeometryType.POINT, - shapely.GeometryType.LINESTRING, - shapely.GeometryType.POLYGON, - ) + allowed_types = (POINT, LINESTRING, LINEARRING, POLYGON) if not np.isin(geometry_id, allowed_types).all(): + received = ", ".join( + [GEOM_NAMES[geom_id] for geom_id in np.unique(geometry_id)] + ) raise TypeError( "GeoDataFrame contains unsupported geometry types. Can only burn " - "Point, LineString, and Polygon geometries." + "Point, LineString, LinearRing, and Polygon geometries. Received: " + f"{received}" ) - points = gdf.loc[geometry_id == shapely.GeometryType.POINT] - lines = gdf.loc[geometry_id == shapely.GeometryType.LINESTRING] - polygons = gdf.loc[geometry_id == shapely.GeometryType.POLYGON] + points = gdf.loc[geometry_id == POINT] + lines = gdf.loc[(geometry_id == LINESTRING) | (geometry_id == LINEARRING)] + polygons = gdf.loc[geometry_id == POLYGON] if column is None: point_values = np.ones(len(points), dtype=float) From 7423009399b8ba7162afb191785e2920694610af Mon Sep 17 00:00:00 2001 From: Huite Bootsma Date: Mon, 31 Jul 2023 18:28:14 +0200 Subject: [PATCH 10/10] Process comments --- examples/vector_conversion.py | 48 +++++++++++++++++++++++++++++++---- tests/test_burn.py | 6 +++++ xugrid/ugrid/burn.py | 9 +++++-- 3 files changed, 56 insertions(+), 7 deletions(-) diff --git a/examples/vector_conversion.py b/examples/vector_conversion.py index 40779f9af..7a8cc6d50 100644 --- a/examples/vector_conversion.py +++ b/examples/vector_conversion.py @@ -14,6 +14,7 @@ import geopandas as gpd import matplotlib.pyplot as plt +import pandas as pd import xugrid as xu @@ -73,14 +74,27 @@ # cells that are covered or touched by a polygon. # # In this example, we mark the faces that are covered by a certain province. +# +# We start by re-projecting the provinces dataset to the coordinate reference +# system (CRS), from WGS84 (EPSG:4326) to the Dutch National coordinate system +# (RD New, EPSG: 28992). Then, we give each province a unique id, which we +# burn into the grid. provinces = xu.data.provinces_nl().to_crs(28992) -provinces["value"] = range(len(provinces)) -burned = xu.burn_vector_geometry(provinces, uda, column="value") +provinces["id"] = range(len(provinces)) +burned = xu.burn_vector_geometry(provinces, uda, column="id") burned.ugrid.plot() # %% -# This is a convenient way to create masks and such: +# This makes it very easy to classify and group data. Let's say +# we want to compute the average surface elevation per province: + +burned = xu.burn_vector_geometry(provinces, uda, column="id") +uda.groupby(burned).mean() + + +# %% +# This is a convenient way to create masks for specific regions: utrecht = provinces[provinces["name"] == "Utrecht"] burned = xu.burn_vector_geometry(utrecht, uda) @@ -107,6 +121,12 @@ ax.set_xlim(xmin, xmax) ax.set_ylim(ymin, ymax) +# %% +# We can also use such "masks" to e.g. modify specific parts of the grid data: + +modified = (uda + 50.0).where(burned == 1, other=uda) +modified.ugrid.plot(vmin=-20, vmax=90, cmap="terrain") + # %% # Note that ``all_touched=True`` is less suitable when differently valued # polygons are present that share borders. While the centroid of a face is @@ -114,8 +134,8 @@ # in more than one polygon. In this case, the results of each polygon will # overwrite each other. -by_centroid = xu.burn_vector_geometry(provinces, uda, column="value") -by_touch = xu.burn_vector_geometry(provinces, uda, column="value", all_touched=True) +by_centroid = xu.burn_vector_geometry(provinces, uda, column="id") +by_touch = xu.burn_vector_geometry(provinces, uda, column="id", all_touched=True) fig, axes = plt.subplots(ncols=2, figsize=(10, 5)) by_centroid.ugrid.plot(ax=axes[0], add_colorbar=False) @@ -145,6 +165,24 @@ ax.set_xlim(xmin, xmax) ax.set_ylim(ymin, ymax) +# %% +# We can also burn points. + +province_centroids = gpd.GeoDataFrame(geometry=provinces.centroid) +burned = xu.burn_vector_geometry(province_centroids, uda) + +fig, ax = plt.subplots() +burned.ugrid.plot(ax=ax) +provinces.plot(ax=ax, edgecolor="red", facecolor="none") + +# %% +# Finally, it's also possible to combine multiple geometry types in a single +# burn operation. + +combined = pd.concat([lines, province_centroids]) +burned = xu.burn_vector_geometry(combined, uda) +burned.ugrid.plot() + # %% # Polygonizing # ------------ diff --git a/tests/test_burn.py b/tests/test_burn.py index 74dd87401..66f5e3275 100644 --- a/tests/test_burn.py +++ b/tests/test_burn.py @@ -221,6 +221,8 @@ def test_burn_vector_geometry( actual = xu.burn_vector_geometry(gdf, grid) assert isinstance(actual, xu.UgridDataArray) assert np.allclose(actual.to_numpy(), 1) + actual = xu.burn_vector_geometry(gdf, grid, all_touched=True) + assert np.allclose(actual.to_numpy(), 1) expected = np.array([0, 0, 1, 0, 0, 1, 1, 1, 1]) actual = xu.burn_vector_geometry(gdf, grid, column="values") @@ -238,3 +240,7 @@ def test_burn_vector_geometry( actual = xu.burn_vector_geometry(gdf, grid, column="values") expected = np.array([20.0, 21.0, 10.0, 0.0, 11.0, 1.0, 12.0, 12.0, 23.0]) assert np.allclose(actual.to_numpy(), expected) + + # All touched should give the same answer for this specific example. + actual = xu.burn_vector_geometry(gdf, grid, column="values", all_touched=True) + assert np.allclose(actual.to_numpy(), expected) diff --git a/xugrid/ugrid/burn.py b/xugrid/ugrid/burn.py index fcd92c615..2c1911b10 100644 --- a/xugrid/ugrid/burn.py +++ b/xugrid/ugrid/burn.py @@ -39,6 +39,7 @@ def in_bounds(p: Point, a: Point, b: Point) -> bool: If the area created by p, a, b is tiny AND p is within the bounds of a and b, the point lies very close to the edge. """ + # Already in numba_celltree, unreleased. dx = b.x - a.x dy = b.y - a.y if abs(dx) >= abs(dy): @@ -53,6 +54,7 @@ def in_bounds(p: Point, a: Point, b: Point) -> bool: @nb.njit(inline="always") def point_in_triangle(p: Point, t: Triangle) -> bool: + # TODO: move this into numba_celltree instead? ap = to_vector(t.a, p) bp = to_vector(t.b, p) cp = to_vector(t.c, p) @@ -223,7 +225,7 @@ def _burn_lines( def burn_vector_geometry( gdf: "geopandas.GeoDataframe", # type: ignore # noqa - like: "xugrid.Ugrid2d", + like: Union["xugrid.Ugrid2d", "xugrid.UgridDataArray", "xugrid.UgridDataset"], column: str = None, fill: Union[int, float] = np.nan, all_touched: bool = False, @@ -237,9 +239,12 @@ def burn_vector_geometry( Parameters ---------- gdf: geopandas.GeoDataFrame + Polygons, points, and/or lines to be burned into the grid. like: UgridDataArray, UgridDataset, or Ugrid2d + Grid to burn the vector data into. column: str - Column name of geodataframe to burn into mesh + Name of the geodataframe column of which to the values to burn into + grid. fill: int, float, optional, default value ``np.nan``. Fill value for nodata areas. all_touched: bool, optional, default value ``False``.