Skip to content

Commit

Permalink
refactor(python): Refactor Series/DataFrame.__getitem__ logic (#16482)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego authored May 26, 2024
1 parent d265cd1 commit 5b25fb8
Show file tree
Hide file tree
Showing 12 changed files with 964 additions and 570 deletions.
2 changes: 0 additions & 2 deletions py-polars/polars/_utils/construction/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
)
from polars._utils.construction.other import (
coerce_arrow,
numpy_to_idxs,
pandas_series_to_arrow,
)
from polars._utils.construction.series import (
Expand Down Expand Up @@ -43,6 +42,5 @@
"series_to_pyseries",
# other
"coerce_arrow",
"numpy_to_idxs",
"pandas_series_to_arrow",
]
53 changes: 0 additions & 53 deletions py-polars/polars/_utils/construction/other.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,66 +2,13 @@

from typing import TYPE_CHECKING, Any

import polars._reexport as pl
from polars._utils.construction.utils import get_first_non_none
from polars.datatypes import UInt32
from polars.dependencies import numpy as np
from polars.dependencies import pyarrow as pa
from polars.meta import get_index_type

if TYPE_CHECKING:
from polars import Series
from polars.dependencies import pandas as pd


def numpy_to_idxs(idxs: np.ndarray[Any, Any], size: int) -> Series:
# Unsigned or signed Numpy array (ordered from fastest to slowest).
# - np.uint32 (polars) or np.uint64 (polars_u64_idx) numpy array
# indexes.
# - Other unsigned numpy array indexes are converted to pl.UInt32
# (polars) or pl.UInt64 (polars_u64_idx).
# - Signed numpy array indexes are converted pl.UInt32 (polars) or
# pl.UInt64 (polars_u64_idx) after negative indexes are converted
# to absolute indexes.
if idxs.ndim != 1:
msg = "only 1D numpy array is supported as index"
raise ValueError(msg)

idx_type = get_index_type()

if len(idxs) == 0:
return pl.Series("", [], dtype=idx_type)

# Numpy array with signed or unsigned integers.
if idxs.dtype.kind not in ("i", "u"):
msg = "unsupported idxs datatype"
raise NotImplementedError(msg)

if idx_type == UInt32:
if idxs.dtype in {np.int64, np.uint64} and idxs.max() >= 2**32:
msg = "index positions should be smaller than 2^32"
raise ValueError(msg)
if idxs.dtype == np.int64 and idxs.min() < -(2**32):
msg = "index positions should be bigger than -2^32 + 1"
raise ValueError(msg)

if idxs.dtype.kind == "i" and idxs.min() < 0:
if idx_type == UInt32:
if idxs.dtype in (np.int8, np.int16):
idxs = idxs.astype(np.int32)
else:
if idxs.dtype in (np.int8, np.int16, np.int32):
idxs = idxs.astype(np.int64)

# Update negative indexes to absolute indexes.
idxs = np.where(idxs < 0, size + idxs, idxs)

# numpy conversion is much faster
idxs = idxs.astype(np.uint32) if idx_type == UInt32 else idxs.astype(np.uint64)

return pl.Series("", idxs, dtype=idx_type)


def pandas_series_to_arrow(
values: pd.Series[Any] | pd.Index[Any],
*,
Expand Down
Loading

0 comments on commit 5b25fb8

Please sign in to comment.