Skip to content

Commit

Permalink
refactor(python): Remove re-export of data type groups (pola-rs#17073)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego authored Jun 20, 2024
1 parent 69b8440 commit 8a6bf4b
Show file tree
Hide file tree
Showing 33 changed files with 348 additions and 339 deletions.
34 changes: 16 additions & 18 deletions py-polars/polars/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from __future__ import annotations

import contextlib
import os

Expand Down Expand Up @@ -40,13 +38,6 @@
)
from polars.dataframe import DataFrame
from polars.datatypes import (
DATETIME_DTYPES,
DURATION_DTYPES,
FLOAT_DTYPES,
INTEGER_DTYPES,
NESTED_DTYPES,
NUMERIC_DTYPES,
TEMPORAL_DTYPES,
Array,
Binary,
Boolean,
Expand Down Expand Up @@ -250,14 +241,6 @@
"UInt64",
"Unknown",
"Utf8",
# polars.datatypes: dtype groups
"DATETIME_DTYPES",
"DURATION_DTYPES",
"FLOAT_DTYPES",
"INTEGER_DTYPES",
"NESTED_DTYPES",
"NUMERIC_DTYPES",
"TEMPORAL_DTYPES",
# polars.io
"read_avro",
"read_clipboard",
Expand Down Expand Up @@ -401,7 +384,7 @@
os.environ["POLARS_ALLOW_EXTENSION"] = "true"


def __getattr__(name: str) -> type[Exception]:
def __getattr__(name: str): # type: ignore[no-untyped-def]
# Deprecate re-export of exceptions at top-level
if name in dir(exceptions):
from polars._utils.deprecation import issue_deprecation_warning
Expand All @@ -416,5 +399,20 @@ def __getattr__(name: str) -> type[Exception]:
)
return getattr(exceptions, name)

# Deprecate data type groups at top-level
import polars.datatypes.group as dtgroup

if name in dir(dtgroup):
from polars._utils.deprecation import issue_deprecation_warning

issue_deprecation_warning(
message=(
f"`{name}` is deprecated. Define your own data type groups or use the"
" `polars.selectors` module for selecting columns of a certain data type."
),
version="1.0.0",
)
return getattr(dtgroup, name)

msg = f"module {__name__!r} has no attribute {name!r}"
raise AttributeError(msg)
3 changes: 1 addition & 2 deletions py-polars/polars/_utils/various.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@
import polars as pl
from polars import functions as F
from polars.datatypes import (
FLOAT_DTYPES,
INTEGER_DTYPES,
Boolean,
Date,
Datetime,
Expand All @@ -34,6 +32,7 @@
String,
Time,
)
from polars.datatypes.group import FLOAT_DTYPES, INTEGER_DTYPES
from polars.dependencies import _check_for_numpy
from polars.dependencies import numpy as np

Expand Down
28 changes: 13 additions & 15 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@
from polars.dataframe._html import NotebookFormatter
from polars.dataframe.group_by import DynamicGroupBy, GroupBy, RollingGroupBy
from polars.datatypes import (
INTEGER_DTYPES,
N_INFER_DEFAULT,
Boolean,
Float32,
Expand All @@ -75,6 +74,7 @@
UInt32,
UInt64,
)
from polars.datatypes.group import INTEGER_DTYPES
from polars.dependencies import (
_GREAT_TABLES_AVAILABLE,
_HVPLOT_AVAILABLE,
Expand Down Expand Up @@ -2784,9 +2784,7 @@ def write_excel(
dtype_formats : dict
A `{dtype:str,}` dictionary that sets the default Excel format for the
given dtype. (This can be overridden on a per-column basis by the
`column_formats` param). It is also valid to use dtype groups such as
`pl.FLOAT_DTYPES` as the dtype/format key, to simplify setting uniform
integer and float formats.
`column_formats` param).
conditional_formats : dict
A dictionary of colname (or selector) keys to a format str, dict, or list
that defines conditional formatting options for the specified columns.
Expand Down Expand Up @@ -3022,7 +3020,7 @@ def write_excel(
>>> df.write_excel( # doctest: +SKIP
... table_style="Table Style Light 2",
... # apply accounting format to all flavours of integer
... dtype_formats={pl.INTEGER_DTYPES: "#,##0_);(#,##0)"},
... dtype_formats={dt: "#,##0_);(#,##0)" for dt in [pl.Int32, pl.Int64]},
... sparklines={
... # default options; just provide source cols
... "trend": ["q1", "q2", "q3", "q4"],
Expand Down Expand Up @@ -8459,18 +8457,18 @@ def select(
>>> with pl.Config(auto_structify=True):
... df.select(
... is_odd=(pl.col(pl.INTEGER_DTYPES) % 2).name.suffix("_is_odd"),
... is_odd=(pl.col(pl.Int64) % 2 == 1).name.suffix("_is_odd"),
... )
shape: (3, 1)
┌───────────┐
│ is_odd │
│ --- │
│ struct[2] │
╞═══════════╡
│ {1,0}
│ {0,1}
│ {1,0}
└───────────┘
┌──────────────
│ is_odd
│ ---
│ struct[2]
╞══════════════
│ {true,false}
│ {false,true}
│ {true,false}
└──────────────
"""
return self.lazy().select(*exprs, **named_exprs).collect(_eager=True)

Expand Down
22 changes: 1 addition & 21 deletions py-polars/polars/datatypes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
Categorical,
DataType,
DataTypeClass,
DataTypeGroup,
Date,
Datetime,
Decimal,
Expand Down Expand Up @@ -34,17 +33,8 @@
Utf8,
)
from polars.datatypes.constants import (
DATETIME_DTYPES,
DTYPE_TEMPORAL_UNITS,
DURATION_DTYPES,
FLOAT_DTYPES,
INTEGER_DTYPES,
N_INFER_DEFAULT,
NESTED_DTYPES,
NUMERIC_DTYPES,
SIGNED_INTEGER_DTYPES,
TEMPORAL_DTYPES,
UNSIGNED_INTEGER_DTYPES,
)
from polars.datatypes.constructor import (
numpy_type_to_constructor,
Expand Down Expand Up @@ -72,7 +62,6 @@
"Categorical",
"DataType",
"DataTypeClass",
"DataTypeGroup",
"Date",
"Datetime",
"Decimal",
Expand Down Expand Up @@ -100,17 +89,8 @@
"Unknown",
"Utf8",
# constants
"DATETIME_DTYPES",
"DTYPE_TEMPORAL_UNITS",
"DURATION_DTYPES",
"FLOAT_DTYPES",
"INTEGER_DTYPES",
"NESTED_DTYPES",
"NUMERIC_DTYPES",
"N_INFER_DEFAULT",
"SIGNED_INTEGER_DTYPES",
"TEMPORAL_DTYPES",
"UNSIGNED_INTEGER_DTYPES",
"DTYPE_TEMPORAL_UNITS",
# constructor
"numpy_type_to_constructor",
"numpy_values_and_dtype",
Expand Down
32 changes: 0 additions & 32 deletions py-polars/polars/datatypes/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,38 +181,6 @@ def is_nested(cls) -> bool:
return issubclass(cls, NestedType)


class DataTypeGroup(frozenset): # type: ignore[type-arg]
"""Group of data types."""

_match_base_type: bool

def __new__(
cls, items: Iterable[DataType | DataTypeClass], *, match_base_type: bool = True
) -> DataTypeGroup:
"""
Construct a DataTypeGroup.
Parameters
----------
items :
iterable of data types
match_base_type:
match the base type
"""
for it in items:
if not isinstance(it, (DataType, DataTypeClass)):
msg = f"DataTypeGroup items must be dtypes; found {type(it).__name__!r}"
raise TypeError(msg)
dtype_group = super().__new__(cls, items)
dtype_group._match_base_type = match_base_type
return dtype_group

def __contains__(self, item: Any) -> bool:
if self._match_base_type and isinstance(item, (DataType, DataTypeClass)):
item = item.base_type()
return super().__contains__(item)


class NumericType(DataType):
"""Base class for numeric data types."""

Expand Down
81 changes: 3 additions & 78 deletions py-polars/polars/datatypes/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,85 +2,10 @@

from typing import TYPE_CHECKING

from polars.datatypes import (
Array,
DataTypeGroup,
Date,
Datetime,
Decimal,
Duration,
Float32,
Float64,
Int8,
Int16,
Int32,
Int64,
List,
Struct,
Time,
UInt8,
UInt16,
UInt32,
UInt64,
)

if TYPE_CHECKING:
from polars.type_aliases import (
PolarsDataType,
PolarsIntegerType,
PolarsTemporalType,
TimeUnit,
)
from polars.type_aliases import TimeUnit

# Number of rows to scan by default when inferring datatypes
N_INFER_DEFAULT = 100

DTYPE_TEMPORAL_UNITS: frozenset[TimeUnit] = frozenset(["ns", "us", "ms"])
DATETIME_DTYPES: frozenset[PolarsDataType] = DataTypeGroup(
[
Datetime,
Datetime("ms"),
Datetime("us"),
Datetime("ns"),
Datetime("ms", "*"),
Datetime("us", "*"),
Datetime("ns", "*"),
]
)
DURATION_DTYPES: frozenset[PolarsDataType] = DataTypeGroup(
[
Duration,
Duration("ms"),
Duration("us"),
Duration("ns"),
]
)
TEMPORAL_DTYPES: frozenset[PolarsTemporalType] = DataTypeGroup(
frozenset([Date, Time]) | DATETIME_DTYPES | DURATION_DTYPES
)
SIGNED_INTEGER_DTYPES: frozenset[PolarsIntegerType] = DataTypeGroup(
[
Int8,
Int16,
Int32,
Int64,
]
)
UNSIGNED_INTEGER_DTYPES: frozenset[PolarsIntegerType] = DataTypeGroup(
[
UInt8,
UInt16,
UInt32,
UInt64,
]
)
INTEGER_DTYPES: frozenset[PolarsIntegerType] = (
SIGNED_INTEGER_DTYPES | UNSIGNED_INTEGER_DTYPES
)
FLOAT_DTYPES: frozenset[PolarsDataType] = DataTypeGroup([Float32, Float64])
NUMERIC_DTYPES: frozenset[PolarsDataType] = DataTypeGroup(
FLOAT_DTYPES | INTEGER_DTYPES | frozenset([Decimal])
)

NESTED_DTYPES: frozenset[PolarsDataType] = DataTypeGroup([List, Struct, Array])

# number of rows to scan by default when inferring datatypes
N_INFER_DEFAULT = 100
Loading

0 comments on commit 8a6bf4b

Please sign in to comment.