Skip to content

Commit

Permalink
feat(python): Enable Null datatype and null values by default in pa…
Browse files Browse the repository at this point in the history
…rametric testing
  • Loading branch information
stinodego committed May 13, 2024
1 parent 8dfaf5b commit b6b500c
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 14 deletions.
17 changes: 15 additions & 2 deletions py-polars/tests/unit/interchange/test_roundtrip.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,12 @@
]


@given(dataframes(allowed_dtypes=protocol_dtypes))
@given(
dataframes(
allowed_dtypes=protocol_dtypes,
allow_null=False, # Bug: https://github.com/pola-rs/polars/issues/16190
)
)
def test_to_dataframe_pyarrow_parametric(df: pl.DataFrame) -> None:
dfi = df.__dataframe__()
df_pa = pa.interchange.from_dataframe(dfi)
Expand Down Expand Up @@ -71,7 +76,12 @@ def test_to_dataframe_pyarrow_zero_copy_parametric(df: pl.DataFrame) -> None:
@pytest.mark.filterwarnings(
"ignore:.*PEP3118 format string that does not match its itemsize:RuntimeWarning"
)
@given(dataframes(allowed_dtypes=protocol_dtypes))
@given(
dataframes(
allowed_dtypes=protocol_dtypes,
allow_null=False, # Bug: https://github.com/pola-rs/polars/issues/16190
)
)
def test_to_dataframe_pandas_parametric(df: pl.DataFrame) -> None:
dfi = df.__dataframe__()
df_pd = pd.api.interchange.from_dataframe(dfi)
Expand All @@ -94,6 +104,7 @@ def test_to_dataframe_pandas_parametric(df: pl.DataFrame) -> None:
pl.Categorical,
],
chunked=False,
allow_null=False, # Bug: https://github.com/pola-rs/polars/issues/16190
)
)
def test_to_dataframe_pandas_zero_copy_parametric(df: pl.DataFrame) -> None:
Expand Down Expand Up @@ -193,6 +204,7 @@ def test_from_dataframe_pandas_zero_copy_parametric(df: pl.DataFrame) -> None:
# Empty string columns cause an error due to a bug in pandas.
# https://github.com/pandas-dev/pandas/issues/56703
min_size=1,
allow_null=False, # Bug: https://github.com/pola-rs/polars/issues/16190
)
)
def test_from_dataframe_pandas_native_parametric(df: pl.DataFrame) -> None:
Expand All @@ -217,6 +229,7 @@ def test_from_dataframe_pandas_native_parametric(df: pl.DataFrame) -> None:
# https://github.com/pandas-dev/pandas/issues/56700
min_size=1,
chunked=False,
allow_null=False, # Bug: https://github.com/pola-rs/polars/issues/16190
)
)
def test_from_dataframe_pandas_native_zero_copy_parametric(df: pl.DataFrame) -> None:
Expand Down
15 changes: 12 additions & 3 deletions py-polars/tests/unit/interop/numpy/test_to_numpy_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,11 +326,19 @@ def test_series_to_numpy_temporal() -> None:

@given(
s=series(
min_size=1, max_size=10, excluded_dtypes=[pl.Categorical, pl.List, pl.Struct]
min_size=1,
max_size=10,
excluded_dtypes=[
pl.Categorical,
pl.List,
pl.Struct,
pl.Datetime("ms"),
pl.Duration("ms"),
],
allow_null=False,
).filter(
lambda s: (
getattr(s.dtype, "time_unit", None) != "ms"
and not (s.dtype == pl.String and s.str.contains("\x00").any())
not (s.dtype == pl.String and s.str.contains("\x00").any())
and not (s.dtype == pl.Binary and s.bin.contains(b"\x00").any())
)
),
Expand All @@ -345,6 +353,7 @@ def test_series_to_numpy(s: pl.Series) -> None:
pl.Datetime("us"): "datetime64[us]",
pl.Duration("ns"): "timedelta64[ns]",
pl.Duration("us"): "timedelta64[us]",
pl.Null(): np.float32,
}
np_dtype = dtype_map.get(s.dtype) # type: ignore[call-overload]
expected = np.array(values, dtype=np_dtype)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1350,7 +1350,7 @@ def test_series_duration_timeunits(


@given(
s=series(min_size=1, max_size=10, dtype=pl.Datetime),
s=series(min_size=1, max_size=10, dtype=pl.Datetime, allow_null=False),
)
def test_series_datetime_timeunits(
s: pl.Series,
Expand Down
1 change: 1 addition & 0 deletions py-polars/tests/unit/series/buffers/test_from_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
s=series(
allowed_dtypes=(pl.INTEGER_DTYPES | pl.FLOAT_DTYPES | {pl.Boolean}),
chunked=False,
allow_null=False,
)
)
def test_series_from_buffer(s: pl.Series) -> None:
Expand Down
26 changes: 18 additions & 8 deletions py-polars/tests/unit/testing/parametric/strategies/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
def test_series_defaults(s: pl.Series) -> None:
assert isinstance(s, pl.Series)
assert s.name == ""
assert s.null_count() == 0


@given(s=series(name="hello"))
Expand All @@ -39,7 +38,7 @@ def test_series_dtype(data: st.DataObject) -> None:
assert s.dtype == dtype


@given(s=series(dtype=pl.Enum))
@given(s=series(dtype=pl.Enum, allow_null=False))
@settings(max_examples=5)
def test_series_dtype_enum(s: pl.Series) -> None:
assert isinstance(s.dtype, pl.Enum)
Expand All @@ -58,9 +57,15 @@ def test_series_size_range(s: pl.Series) -> None:
assert 3 <= s.len() <= 8


@given(s=series(allow_null=True))
def test_series_allow_null(s: pl.Series) -> None:
assert 0 <= s.null_count() <= s.len()
@given(s=series(allow_null=False))
def test_series_allow_null_false(s: pl.Series) -> None:
assert s.null_count() == 0
assert s.dtype != pl.Null


@given(s=series(allowed_dtypes=[pl.Null], allow_null=False))
def test_series_allow_null_allowed_dtypes(s: pl.Series) -> None:
assert s.dtype == pl.Null


@given(df=dataframes())
Expand Down Expand Up @@ -121,6 +126,7 @@ def test_dataframes_allow_null_override(df: pl.DataFrame) -> None:
# generate lazyframes with at least one row
lazy=True,
min_size=1,
allow_null=False,
# test mix & match of bulk-assigned cols with custom cols
cols=[column(n, dtype=pl.UInt8, unique=True) for n in ["a", "b"]],
include_cols=[
Expand Down Expand Up @@ -190,7 +196,8 @@ def test_allow_infinities_deprecated(data: st.DataObject) -> None:
min_len=1,
),
),
]
],
allow_null=False,
),
)
def test_dataframes_nested_strategies(df: pl.DataFrame) -> None:
Expand Down Expand Up @@ -255,9 +262,12 @@ def test_chunking(

@given(
df=dataframes(
allowed_dtypes=[pl.Float32, pl.Float64], max_cols=4, allow_infinity=False
allowed_dtypes=[pl.Float32, pl.Float64],
max_cols=4,
allow_null=False,
allow_infinity=False,
),
s=series(dtype=pl.Float64, allow_infinity=False),
s=series(dtype=pl.Float64, allow_null=False, allow_infinity=False),
)
def test_infinities(
df: pl.DataFrame,
Expand Down

0 comments on commit b6b500c

Please sign in to comment.