Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for polars dataframes and series #7463

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions conda-envs/environment-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ dependencies:
- numpy>=1.15.0
- pandas>=0.24.0
- pip
- polars>=1.5.0
- pytensor>=2.25.1,<2.26
- python-graphviz
- networkx
Expand Down
1 change: 1 addition & 0 deletions conda-envs/environment-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ dependencies:
- numpy>=1.15.0
- pandas>=0.24.0
- pip
- polars>=1.5.0
- pytensor>=2.25.1,<2.26
- python-graphviz
- rich>=13.7.1
Expand Down
1 change: 1 addition & 0 deletions conda-envs/environment-jax.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ dependencies:
- numpyro>=0.8.0
- pandas>=0.24.0
- pip
- polars>=1.5.0
- pytensor>=2.25.1,<2.26
- python-graphviz
- networkx
Expand Down
1 change: 1 addition & 0 deletions conda-envs/windows-environment-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ dependencies:
- numpy>=1.15.0
- pandas>=0.24.0
- pip
- polars>=1.5.0
- pytensor>=2.25.1,<2.26
- python-graphviz
- networkx
Expand Down
1 change: 1 addition & 0 deletions conda-envs/windows-environment-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ dependencies:
- numpy>=1.15.0
- pandas>=0.24.0
- pip
- polars>=1.5.0
- pytensor>=2.25.1,<2.26
- python-graphviz
- networkx
Expand Down
5 changes: 3 additions & 2 deletions pymc/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import numpy as np
import pandas as pd
import polars as pl
fonnesbeck marked this conversation as resolved.
Show resolved Hide resolved
import pytensor
import pytensor.tensor as pt
import xarray as xr
Expand Down Expand Up @@ -204,7 +205,7 @@ def Minibatch(variable: TensorVariable, *variables: TensorVariable, batch_size:

def determine_coords(
model,
value: pd.DataFrame | pd.Series | xr.DataArray,
value: pd.DataFrame | pd.Series | pl.DataFrame | pl.Series | xr.DataArray,
dims: Sequence[str | None] | None = None,
coords: dict[str, Sequence | np.ndarray] | None = None,
) -> tuple[dict[str, Sequence | np.ndarray], Sequence[str | None]]:
Expand Down Expand Up @@ -348,7 +349,7 @@ def Data(
----------
name : str
The name for this variable.
value : array_like or pandas.Series, pandas.Dataframe
value : array_like or pandas.Series, pandas.Dataframe, polars.DataFrame, polars.Series, xarray.DataArray
A value to associate with this variable.
dims : str or tuple of str, optional
Dimension names of the random variables (as opposed to the shapes of these
Expand Down
15 changes: 11 additions & 4 deletions pymc/pytensorf.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import numpy as np
import pandas as pd
import polars as pl
import pytensor
import pytensor.tensor as pt
import scipy.sparse as sps
Expand Down Expand Up @@ -96,12 +97,16 @@ def convert_generator_data(data) -> TensorVariable:

def convert_data(data) -> np.ndarray | Variable:
ret: np.ndarray | Variable
if hasattr(data, "to_numpy") and hasattr(data, "isnull"):
fonnesbeck marked this conversation as resolved.
Show resolved Hide resolved
if hasattr(data, "to_numpy"):
# typically, but not limited to pandas objects
vals = data.to_numpy()
null_data = data.isnull()
if hasattr(data, "is_null"):
# polars DataFrame or Series
null_data = data.is_null()
else:
null_data = data.isnull()
if hasattr(null_data, "to_numpy"):
# pandas Series
# pandas or polars Series
mask = null_data.to_numpy()
else:
# pandas Index
Expand Down Expand Up @@ -144,7 +149,9 @@ def convert_data(data) -> np.ndarray | Variable:

@_as_tensor_variable.register(pd.Series)
@_as_tensor_variable.register(pd.DataFrame)
def dataframe_to_tensor_variable(df: pd.DataFrame, *args, **kwargs) -> TensorVariable:
@_as_tensor_variable.register(pl.DataFrame)
@_as_tensor_variable.register(pl.Series)
def dataframe_to_tensor_variable(df: pd.DataFrame | pl.DataFrame, *args, **kwargs) -> TensorVariable:
return pt.as_tensor_variable(df.to_numpy(), *args, **kwargs)


Expand Down
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ numdifftools>=0.9.40
numpy>=1.15.0
numpydoc
pandas>=0.24.0
polars>=1.5.0
polyagamma
pre-commit>=2.8.0
pytensor>=2.25.1,<2.26
Expand Down
15 changes: 15 additions & 0 deletions tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,21 @@ def test_implicit_coords_dataframe(self, seeded_test):
assert "columns" in pmodel.coords
assert pmodel.named_vars_to_dims == {"observations": ("rows", "columns")}

def test_implicit_coords_polars_dataframe(self, seeded_test):
pl = pytest.importorskip("polars")
N_rows = 5
N_cols = 7
df_data = pl.DataFrame({f"Column {c+1}": np.random.normal(size=(N_rows,)) for c in range(N_cols)})
df_data = df_data.with_row_count("rows")

# infer coordinates from index and columns of the DataFrame
with pm.Model() as pmodel:
pm.Data("observations", df_data, dims=("rows", "columns"), infer_dims_and_coords=True)

assert "rows" in pmodel.coords
assert "columns" in pmodel.coords
assert pmodel.named_vars_to_dims == {"observations": ("rows", "columns")}

def test_implicit_coords_xarray(self):
xr = pytest.importorskip("xarray")
data = xr.DataArray([[1, 2, 3], [4, 5, 6]], dims=("y", "x"))
Expand Down
14 changes: 14 additions & 0 deletions tests/test_pytensorf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import numpy.ma as ma
import numpy.testing as npt
import pandas as pd
import polars as pl
import pytensor
import pytensor.tensor as pt
import pytest
Expand Down Expand Up @@ -66,6 +67,19 @@ def test_pd_dataframe_as_tensor_variable(np_array: np.ndarray) -> None:
df = pd.DataFrame(np_array)
np.testing.assert_array_equal(x=pt.as_tensor_variable(x=df).eval(), y=np_array)

@pytest.mark.parametrize(
argnames="np_array",
argvalues=[
np.array([[1.0], [2.0], [-1.0]]),
np.array([[1.0, 1.0, 1.0], [0.0, 0.0, 0.0]]),
np.ones(shape=(10, 1)),
],
)
def test_polars_dataframe_as_tensor_variable(np_array: np.ndarray) -> None:
pl = pytest.importorskip("polars")
df = pl.DataFrame(np_array)
np.testing.assert_array_equal(x=pt.as_tensor_variable(x=df).eval(), y=np_array)


@pytest.mark.parametrize(
argnames="np_array",
Expand Down
Loading