From 815af95e9633635214eac005c94f4d90aa1659ec Mon Sep 17 00:00:00 2001 From: Andrew <15331990+ahuang11@users.noreply.github.com> Date: Thu, 10 Oct 2024 10:46:48 -0700 Subject: [PATCH] Fix dask tiles regression (#1432) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Simon Høxbro Hansen --- doc/user_guide/Customization.ipynb | 2 +- doc/user_guide/Geographic_Data.ipynb | 4 ++-- hvplot/converter.py | 5 ++++- hvplot/tests/testgeowithoutgv.py | 17 +++++++++++++++++ hvplot/util.py | 15 +++++++++++++++ 5 files changed, 39 insertions(+), 4 deletions(-) diff --git a/doc/user_guide/Customization.ipynb b/doc/user_guide/Customization.ipynb index dcef025db..bb970fe74 100644 --- a/doc/user_guide/Customization.ipynb +++ b/doc/user_guide/Customization.ipynb @@ -249,7 +249,7 @@ " projection (default=None):\n", " Coordinate reference system of the plot (output projection) specified as a string or integer EPSG code, a CRS or Proj pyproj object, a Cartopy CRS object or class name, a WKT string, or a proj.4 string. Defaults to PlateCarree.\n", " tiles (default=False):\n", - " Whether to overlay the plot on a tile source. If coordinate values fall within lat/lon bounds, auto-projects to EPSG:3857 unless `projection=False`. Tiles sources\n", + " Whether to overlay the plot on a tile source. If coordinate values fall within lat/lon bounds, auto-projects to EPSG:3857 unless `projection=False` or if the data is lazily loaded (dask / ibis). Tiles sources\n", " can be selected by name or a tiles object or class can be passed,\n", " the default is 'Wikipedia'.\n", " tiles_opts (default=None): dict\n", diff --git a/doc/user_guide/Geographic_Data.ipynb b/doc/user_guide/Geographic_Data.ipynb index d34ebe5b9..55df431a2 100644 --- a/doc/user_guide/Geographic_Data.ipynb +++ b/doc/user_guide/Geographic_Data.ipynb @@ -80,7 +80,7 @@ "source": [ "We'll first start by displaying the airports **without GeoViews** with tiles by setting `tiles=True`. \n", "\n", - "Under the hood, hvPlot projects lat/lon to easting/northing ([EPSG:4326](https://epsg.io/4326) to [EPSG:3857](https://epsg.io/3857)) coordinates without additional package dependencies if it detects that the values falls within expected lat/lon ranges.\n", + "Under the hood, hvPlot projects lat/lon to easting/northing ([EPSG:4326](https://epsg.io/4326) to [EPSG:3857](https://epsg.io/3857)) coordinates without additional package dependencies if it detects that the values falls within expected lat/lon ranges, **unless the data is lazily loaded (dask / ibis).**\n", "\n", "Note, **this feature is only available after `hvplot>=0.11.0`**; older versions, `hvplot<0.11.0`, require manual projection (see below)." ] @@ -417,7 +417,7 @@ "- `global_extent` (default=False): Whether to expand the plot extent to span the whole globe\n", "- `project` (default=False): Whether to project the data before plotting (adds initial overhead but avoids projecting data when plot is dynamically updated)\n", "- `projection` (default=None): Coordinate reference system of the plot (output projection) specified as a string or integer EPSG code, a CRS or Proj pyproj object, a Cartopy CRS object or class name, a WKT string, or a proj.4 string. Defaults to PlateCarree.\n", - "- `tiles` (default=False): Whether to overlay the plot on a tile source. If coordinate values fall within lat/lon bounds, auto-projects to EPSG:3857 unless `projection=False`. Accepts the following values:\n", + "- `tiles` (default=False): Whether to overlay the plot on a tile source. If coordinate values fall within lat/lon bounds, auto-projects to EPSG:3857 unless `projection=False` or if the data is lazily loaded (dask / ibis). Accepts the following values:\n", " - `True`: OpenStreetMap layer\n", " - `xyzservices.TileProvider` instance (requires [`xyzservices`](https://xyzservices.readthedocs.io/) to be installed)\n", " - a map string name based on one of the default layers made available by [HoloViews](https://holoviews.org/reference/elements/bokeh/Tiles.html) ('CartoDark', 'CartoLight', 'EsriImagery', 'EsriNatGeo', 'EsriUSATopo', 'EsriTerrain', 'EsriStreet', 'EsriReference', 'OSM', 'OpenTopoMap') or [GeoViews](https://geoviews.org/user_guide/Working_with_Bokeh.html) ('CartoDark', 'CartoEco', 'CartoLight', 'CartoMidnight', 'EsriImagery', 'EsriNatGeo', 'EsriUSATopo', 'EsriTerrain', 'EsriReference', 'EsriOceanBase', 'EsriOceanReference', 'EsriWorldPhysical', 'EsriWorldShadedRelief', 'EsriWorldTopo', 'EsriWorldDarkGrayBase', 'EsriWorldDarkGrayReference', 'EsriWorldLightGrayBase', 'EsriWorldLightGrayReference', 'EsriWorldHillshadeDark', 'EsriWorldHillshade', 'EsriAntarcticImagery', 'EsriArcticImagery', 'EsriArcticOceanBase', 'EsriArcticOceanReference', 'EsriWorldBoundariesAndPlaces', 'EsriWorldBoundariesAndPlacesAlternate', 'EsriWorldTransportation', 'EsriDelormeWorldBaseMap', 'EsriWorldNavigationCharts', 'EsriWorldStreetMap', 'OSM', 'OpenTopoMap'). Note that Stamen tile sources require a Stadia account when not running locally; see [stadiamaps.com](https://stadiamaps.com/).\n", diff --git a/hvplot/converter.py b/hvplot/converter.py index 0f90fc800..8d5f3f035 100644 --- a/hvplot/converter.py +++ b/hvplot/converter.py @@ -60,6 +60,7 @@ is_cudf, is_streamz, is_ibis, + is_lazy_data, is_xarray, is_xarray_dataarray, process_crs, @@ -2173,11 +2174,13 @@ def _process_tiles_without_geo(self, data, x, y): if data.crs is not None: data = data.to_crs(epsg=3857) return data, x, y - else: + elif not is_lazy_data(data): + # To prevent eager evaluation: https://github.com/holoviz/hvplot/pull/1432 min_x = np.min(data[x]) max_x = np.max(data[x]) min_y = np.min(data[y]) max_y = np.max(data[y]) + x_within_bounds = -180 <= min_x <= 360 and -180 <= max_x <= 360 y_within_bounds = -90 <= min_y <= 90 and -90 <= max_y <= 90 if x_within_bounds and y_within_bounds: diff --git a/hvplot/tests/testgeowithoutgv.py b/hvplot/tests/testgeowithoutgv.py index de1d90159..7ad0bf3ee 100644 --- a/hvplot/tests/testgeowithoutgv.py +++ b/hvplot/tests/testgeowithoutgv.py @@ -8,6 +8,12 @@ import pandas as pd import pytest +try: + import dask.dataframe as dd + import hvplot.dask # noqa +except ImportError: + dd = None + bk_renderer = hv.Store.renderers['bokeh'] @@ -66,3 +72,14 @@ def test_plot_with_xyzservices_tileprovider(self, simple_df): assert isinstance(plot.get(0).data, xyzservices.TileProvider) bk_plot = bk_renderer.get_plot(plot) assert bk_plot.projection == 'mercator' + + @pytest.mark.skipif(dd is None, reason='dask not installed') + def test_plot_with_dask(self, simple_df): + ddf = dd.from_pandas(simple_df, npartitions=2) + plot = ddf.hvplot.points('x', 'y', tiles=True) + assert 'x_' not in plot.get(1).data + assert 'y_' not in plot.get(1).data + assert len(plot) == 2 + assert isinstance(plot.get(0), hv.Tiles) + bk_plot = bk_renderer.get_plot(plot) + assert bk_plot.projection == 'mercator' diff --git a/hvplot/util.py b/hvplot/util.py index 8a8c1079c..e0392e300 100644 --- a/hvplot/util.py +++ b/hvplot/util.py @@ -446,6 +446,21 @@ def is_xarray(data): return isinstance(data, (DataArray, Dataset)) +def is_lazy_data(data): + """Check if data is lazy + + This checks if the datatype is Dask, Ibis, or Polars' LazyFrame. + It is useful to avoid eager evaluation of the data. + """ + if is_dask(data) or is_ibis(data): + return True + elif is_polars(data): + import polars as pl + + return isinstance(data, pl.LazyFrame) + return False + + def is_xarray_dataarray(data): if not check_library(data, 'xarray'): return False