Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add simple polars support #1129

Merged
merged 14 commits into from
Oct 2, 2023
14 changes: 14 additions & 0 deletions doc/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,20 @@ alt: Works with GeoPandas
align: center
---
:::
:::{tab-item} Polars
```python
import polars
import hvplot.polars

df_polars = polars.from_pandas(df)
df_polars.hvplot.scatter(x='bill_length_mm', y='bill_depth_mm', by='species')
```
```{image} ./_static/home/dask.gif
---
alt: Works with Polars
align: center
---
:::

:::{tab-item} Intake
```python
Expand Down
1 change: 1 addition & 0 deletions examples/user_guide/Introduction.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"\n",
"* [Pandas](https://pandas.pydata.org): DataFrame, Series (columnar/tabular data)\n",
"* [Rapids cuDF](https://docs.rapids.ai/api/cudf/stable/): GPU DataFrame, Series (columnar/tabular data)\n",
"* [Polars](https://www.pola.rs/): Polars is a fast DataFrame library/in-memory query engine (columnar/tabular data)\n",
"* [Dask](https://www.dask.org): DataFrame, Series (distributed/out of core arrays and columnar data)\n",
"* [XArray](https://xarray.pydata.org): Dataset, DataArray (labelled multidimensional arrays)\n",
"* [Streamz](https://streamz.readthedocs.io): DataFrame(s), Series(s) (streaming columnar data)\n",
Expand Down
64 changes: 64 additions & 0 deletions hvplot/polars.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""Adds the `.hvplot` method to pl.DataFrame, pl.LazyFrame and pl.Series"""
import itertools
hoxbro marked this conversation as resolved.
Show resolved Hide resolved

from hvplot import hvPlotTabular, post_patch
from hvplot.converter import HoloViewsConverter
from hvplot.util import is_list_like


class hvPlotTabularPolars(hvPlotTabular):
def _get_converter(self, x=None, y=None, kind=None, **kwds):
import polars as pl

params = dict(self._metadata, **kwds)
x = x or params.pop("x", None)
y = y or params.pop("y", None)
kind = kind or params.pop("kind", None)

# Find columns which should be converted for LazyDataFrame and DataFrame
if isinstance(self._data, (pl.LazyFrame, pl.DataFrame)):
if params.get("hover_cols") == "all":
columns = list(self._data.columns)
else:
possible_columns = [
[v] if isinstance(v, str) else v
for v in params.values()
if isinstance(v, (str, list))
]
columns = (
set(self._data.columns) & set(itertools.chain(*possible_columns))
) or {self._data.columns[0]}
xs = x if is_list_like(x) else (x,)
ys = y if is_list_like(y) else (y,)
columns |= {*xs, *ys}
columns.discard(None)

if isinstance(self._data, pl.DataFrame):
data = self._data.select(columns).to_pandas()
elif isinstance(self._data, pl.Series):
data = self._data.to_pandas()
elif isinstance(self._data, pl.LazyFrame):
data = self._data.select(columns).collect().to_pandas()
else:
raise ValueError(
"Only Polars DataFrame, Series, and LazyFrame are supported"
)

return HoloViewsConverter(data, x, y, kind=kind, **params)


def patch(name="hvplot", extension="bokeh", logo=False):
try:
import polars as pl
except:
raise ImportError(
"Could not patch plotting API onto Polars. Polars could not be imported."
)
pl.api.register_dataframe_namespace(name)(hvPlotTabularPolars)
pl.api.register_series_namespace(name)(hvPlotTabularPolars)
pl.api.register_lazyframe_namespace(name)(hvPlotTabularPolars)

post_patch(extension, logo)
hoxbro marked this conversation as resolved.
Show resolved Hide resolved


patch()
61 changes: 53 additions & 8 deletions hvplot/tests/plotting/testcore.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,66 @@
import numpy as np
import pandas as pd
import hvplot.pandas # noqa

import pytest

@pytest.mark.parametrize("y", (
from hvplot import hvPlotTabular

try:
import polars as pl
import hvplot.polars # noqa
skip_polar = False
except ImportError:
class pl:
DataFrame = None
LazyFrame = None
Series = None
skip_polar = True


TYPES = {t for t in dir(hvPlotTabular) if not t.startswith("_")}
FRAME_TYPES = TYPES - {"bivariate", "heatmap", "hexbin", "labels", "vectorfield"}
SERIES_TYPES = FRAME_TYPES - {"points", "polygons", "ohlc", "paths"}
frame_kinds = pytest.mark.parametrize("kind", FRAME_TYPES)
series_kinds = pytest.mark.parametrize("kind", SERIES_TYPES)

y_combinations = pytest.mark.parametrize("y", (
["A", "B", "C", "D"],
("A", "B", "C", "D"),
{"A", "B", "C", "D"},
np.array(["A", "B", "C", "D"]),
pd.Index(["A", "B", "C", "D"]),
pd.Series(["A", "B", "C", "D"]),
))
def test_diffent_input_types(y):
),
ids=lambda x: type(x).__name__
)


@frame_kinds
@y_combinations
def test_dataframe_pandas(kind, y):
df = pd._testing.makeDataFrame()
types = {t for t in dir(df.hvplot) if not t.startswith("_")}
ignore_types = {'bivariate', 'heatmap', 'hexbin', 'labels', 'vectorfield'}
df.hvplot(y=y, kind=kind)


@series_kinds
def test_series_pandas(kind):
ser = pd.Series(np.random.rand(10), name="A")
ser.hvplot(kind=kind)


@pytest.mark.skipif(skip_polar, reason="polars not installed")
maximlt marked this conversation as resolved.
Show resolved Hide resolved
@pytest.mark.parametrize("cast", (pl.DataFrame, pl.LazyFrame))
@frame_kinds
@y_combinations
def test_dataframe_polars(kind, y, cast):
df = cast(pd._testing.makeDataFrame())
assert isinstance(df, cast)
df.hvplot(y=y, kind=kind)


for t in types - ignore_types:
df.hvplot(y=y, kind=t)
@pytest.mark.skipif(skip_polar, reason="polars not installed")
@series_kinds
def test_series_polars(kind):
ser = pl.Series(values=np.random.rand(10), name="A")
assert isinstance(ser, pl.Series)
ser.hvplot(kind=kind)
25 changes: 25 additions & 0 deletions hvplot/tests/testpatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,28 @@ def test_streamz_seriess_patched(self):
from streamz.dataframe import Random
random_df = Random()
self.assertIsInstance(random_df.groupby('x').sum().y.hvplot, hvPlotTabular)


class TestPatchPolars(TestCase):

def setUp(self):
try:
import polars as pl # noqa
except:
raise SkipTest('Polars not available')
import hvplot.polars # noqa

def test_polars_series_patched(self):
import polars as pl
pseries = pl.Series([0, 1, 2])
self.assertIsInstance(pseries.hvplot, hvPlotTabular)

def test_polars_dataframe_patched(self):
import polars as pl
pdf = pl.DataFrame({'x': [1, 3, 5], 'y': [2, 4, 6]})
self.assertIsInstance(pdf.hvplot, hvPlotTabular)

def test_polars_lazyframe_patched(self):
import polars as pl
pldf = pl.LazyFrame({'x': [1, 3, 5], 'y': [2, 4, 6]})
self.assertIsInstance(pldf.hvplot, hvPlotTabular)
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def get_setup_version(reponame):
'pooch',
'scipy',
'ipywidgets',
'polars',
maximlt marked this conversation as resolved.
Show resolved Hide resolved
]

# Dependencies required to run the notebooks
Expand Down
Loading