From 5b10c4e70fde8fa533ecea3d5c2bf6d5d1b13e40 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Sat, 22 Jun 2024 09:08:15 +0200 Subject: [PATCH] feat(python): Improve ipython autocomplete for LazyFrame and DataFrame (#17091) --- py-polars/polars/dataframe/frame.py | 103 +++++++++++------------ py-polars/polars/lazyframe/frame.py | 122 ++++++++++++++-------------- 2 files changed, 110 insertions(+), 115 deletions(-) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 4c4f5f0ae8854..60fcba0cc04b7 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -169,11 +169,6 @@ else: from typing_extensions import Concatenate, ParamSpec - if sys.version_info >= (3, 11): - from typing import Self - else: - from typing_extensions import Self - T = TypeVar("T") P = ParamSpec("P") @@ -418,7 +413,7 @@ def __init__( raise TypeError(msg) @classmethod - def deserialize(cls, source: str | Path | IOBase) -> Self: + def deserialize(cls, source: str | Path | IOBase) -> DataFrame: """ Read a serialized DataFrame from a file. @@ -460,7 +455,7 @@ def deserialize(cls, source: str | Path | IOBase) -> Self: return cls._from_pydf(PyDataFrame.deserialize(source)) @classmethod - def _from_pydf(cls, py_df: PyDataFrame) -> Self: + def _from_pydf(cls, py_df: PyDataFrame) -> DataFrame: """Construct Polars DataFrame from FFI PyDataFrame object.""" df = cls.__new__(cls) df._df = py_df @@ -474,7 +469,7 @@ def _from_arrow( *, schema_overrides: SchemaDict | None = None, rechunk: bool = True, - ) -> Self: + ) -> DataFrame: """ Construct a DataFrame from an Arrow table. @@ -520,7 +515,7 @@ def _from_pandas( rechunk: bool = True, nan_to_null: bool = True, include_index: bool = False, - ) -> Self: + ) -> DataFrame: """ Construct a Polars DataFrame from a pandas DataFrame. @@ -559,7 +554,7 @@ def _from_pandas( ) ) - def _replace(self, column: str, new_column: Series) -> Self: + def _replace(self, column: str, new_column: Series) -> DataFrame: """Replace a column by a new Series (in place).""" self._df.replace(column, new_column._s) return self @@ -1064,14 +1059,14 @@ def __getstate__(self) -> list[Series]: def __setstate__(self, state: list[Series]) -> None: self._df = DataFrame(state)._df - def __mul__(self, other: DataFrame | Series | int | float) -> Self: + def __mul__(self, other: DataFrame | Series | int | float) -> DataFrame: if isinstance(other, DataFrame): return self._from_pydf(self._df.mul_df(other._df)) other = _prepare_other_arg(other) return self._from_pydf(self._df.mul(other._s)) - def __rmul__(self, other: DataFrame | Series | int | float) -> Self: + def __rmul__(self, other: int | float) -> DataFrame: return self * other def __add__( @@ -1089,13 +1084,13 @@ def __radd__( # type: ignore[misc] return self.select((lit(other) + F.col("*")).name.keep()) return self + other - def __sub__(self, other: DataFrame | Series | int | float) -> Self: + def __sub__(self, other: DataFrame | Series | int | float) -> DataFrame: if isinstance(other, DataFrame): return self._from_pydf(self._df.sub_df(other._df)) other = _prepare_other_arg(other) return self._from_pydf(self._df.sub(other._s)) - def __mod__(self, other: DataFrame | Series | int | float) -> Self: + def __mod__(self, other: DataFrame | Series | int | float) -> DataFrame: if isinstance(other, DataFrame): return self._from_pydf(self._df.rem_df(other._df)) other = _prepare_other_arg(other) @@ -1229,10 +1224,10 @@ def __setitem__( def __len__(self) -> int: return self.height - def __copy__(self) -> Self: + def __copy__(self) -> DataFrame: return self.clone() - def __deepcopy__(self, memo: None = None) -> Self: + def __deepcopy__(self, memo: None = None) -> DataFrame: return self.clone() def _ipython_key_completions_(self) -> list[str]: @@ -4042,7 +4037,7 @@ def transpose( include_header: bool = False, header_name: str = "column", column_names: str | Iterable[str] | None = None, - ) -> Self: + ) -> DataFrame: """ Transpose a DataFrame over the diagonal. @@ -4221,7 +4216,7 @@ def rename(self, mapping: dict[str, str] | Callable[[str], str]) -> DataFrame: """ return self.lazy().rename(mapping).collect(_eager=True) - def insert_column(self, index: int, column: Series) -> Self: + def insert_column(self, index: int, column: Series) -> DataFrame: """ Insert a Series at a certain column index. @@ -4614,7 +4609,7 @@ def get_column_index(self, name: str) -> int: """ return self._df.get_column_index(name) - def replace_column(self, index: int, column: Series) -> Self: + def replace_column(self, index: int, column: Series) -> DataFrame: """ Replace a column at an index location. @@ -4762,7 +4757,7 @@ def sort( .collect(_eager=True) ) - def sql(self, query: str, *, table_name: str = "self") -> Self: + def sql(self, query: str, *, table_name: str = "self") -> DataFrame: """ Execute a SQL query against the DataFrame. @@ -4852,7 +4847,7 @@ def sql(self, query: str, *, table_name: str = "self") -> Self: with SQLContext(register_globals=False, eager=True) as ctx: name = table_name if table_name else "self" ctx.register(name=name, frame=self) - return ctx.execute(query) # type: ignore[return-value] + return ctx.execute(query) @deprecate_renamed_parameter("descending", "reverse", version="1.0.0") def top_k( @@ -5056,7 +5051,7 @@ def equals(self, other: DataFrame, *, null_equal: bool = True) -> bool: """ return self._df.equals(other._df, null_equal=null_equal) - def slice(self, offset: int, length: int | None = None) -> Self: + def slice(self, offset: int, length: int | None = None) -> DataFrame: """ Get a slice of this DataFrame. @@ -5092,7 +5087,7 @@ def slice(self, offset: int, length: int | None = None) -> Self: length = self.height - offset + length return self._from_pydf(self._df.slice(offset, length)) - def head(self, n: int = 5) -> Self: + def head(self, n: int = 5) -> DataFrame: """ Get the first `n` rows. @@ -5144,7 +5139,7 @@ def head(self, n: int = 5) -> Self: n = max(0, self.height + n) return self._from_pydf(self._df.head(n)) - def tail(self, n: int = 5) -> Self: + def tail(self, n: int = 5) -> DataFrame: """ Get the last `n` rows. @@ -5196,7 +5191,7 @@ def tail(self, n: int = 5) -> Self: n = max(0, self.height + n) return self._from_pydf(self._df.tail(n)) - def limit(self, n: int = 5) -> Self: + def limit(self, n: int = 5) -> DataFrame: """ Get the first `n` rows. @@ -5412,7 +5407,7 @@ def pipe( """ return function(self, *args, **kwargs) - def with_row_index(self, name: str = "index", offset: int = 0) -> Self: + def with_row_index(self, name: str = "index", offset: int = 0) -> DataFrame: """ Add a row index as the first column in the DataFrame. @@ -5489,7 +5484,7 @@ def with_row_index(self, name: str = "index", offset: int = 0) -> Self: " Note that the default column name has changed from 'row_nr' to 'index'.", version="0.20.4", ) - def with_row_count(self, name: str = "row_nr", offset: int = 0) -> Self: + def with_row_count(self, name: str = "row_nr", offset: int = 0) -> DataFrame: """ Add a column at index 0 that counts the rows. @@ -6144,7 +6139,7 @@ def upsample( every: str | timedelta, group_by: str | Sequence[str] | None = None, maintain_order: bool = False, - ) -> Self: + ) -> DataFrame: """ Upsample a DataFrame at a regular frequency. @@ -6841,7 +6836,7 @@ def map_rows( def hstack( self, columns: list[Series] | DataFrame, *, in_place: bool = False - ) -> Self: + ) -> DataFrame: """ Return a new DataFrame grown horizontally by stacking multiple Series to it. @@ -6882,7 +6877,7 @@ def hstack( else: return self._from_pydf(self._df.hstack([s._s for s in columns])) - def vstack(self, other: DataFrame, *, in_place: bool = False) -> Self: + def vstack(self, other: DataFrame, *, in_place: bool = False) -> DataFrame: """ Grow this DataFrame vertically by stacking a DataFrame to it. @@ -6940,7 +6935,7 @@ def vstack(self, other: DataFrame, *, in_place: bool = False) -> Self: return self._from_pydf(self._df.vstack(other._df)) - def extend(self, other: DataFrame) -> Self: + def extend(self, other: DataFrame) -> DataFrame: """ Extend the memory backed by this `DataFrame` with the values from `other`. @@ -7205,7 +7200,7 @@ def cast( """ return self.lazy().cast(dtypes, strict=strict).collect(_eager=True) - def clear(self, n: int = 0) -> Self: + def clear(self, n: int = 0) -> DataFrame: """ Create an empty (n=0) or `n`-row null-filled (n>0) copy of the DataFrame. @@ -7263,7 +7258,7 @@ def clear(self, n: int = 0) -> Self: } ) - def clone(self) -> Self: + def clone(self) -> DataFrame: """ Create a copy of this DataFrame. @@ -7597,7 +7592,7 @@ def pivot( maintain_order: bool = True, sort_columns: bool = False, separator: str = "_", - ) -> Self: + ) -> DataFrame: """ Create a spreadsheet-style pivot table as a DataFrame. @@ -7810,7 +7805,7 @@ def unpivot( index: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, variable_name: str | None = None, value_name: str | None = None, - ) -> Self: + ) -> DataFrame: """ Unpivot a DataFrame from wide to long format. @@ -8017,7 +8012,7 @@ def partition_by( maintain_order: bool = ..., include_key: bool = ..., as_dict: Literal[False] = ..., - ) -> list[Self]: ... + ) -> list[DataFrame]: ... @overload def partition_by( @@ -8027,7 +8022,7 @@ def partition_by( maintain_order: bool = ..., include_key: bool = ..., as_dict: Literal[True], - ) -> dict[tuple[object, ...], Self]: ... + ) -> dict[tuple[object, ...], DataFrame]: ... @overload def partition_by( @@ -8037,7 +8032,7 @@ def partition_by( maintain_order: bool = ..., include_key: bool = ..., as_dict: bool, - ) -> list[Self] | dict[tuple[object, ...], Self]: ... + ) -> list[DataFrame] | dict[tuple[object, ...], DataFrame]: ... def partition_by( self, @@ -8046,7 +8041,7 @@ def partition_by( maintain_order: bool = True, include_key: bool = True, as_dict: bool = False, - ) -> list[Self] | dict[tuple[object, ...], Self]: + ) -> list[DataFrame] | dict[tuple[object, ...], DataFrame]: """ Group by the given columns and return the groups as separate dataframes. @@ -8956,7 +8951,7 @@ def mean_horizontal(self, *, ignore_nulls: bool = True) -> Series: """ return wrap_s(self._df.mean_horizontal(ignore_nulls)).alias("mean") - def std(self, ddof: int = 1) -> Self: + def std(self, ddof: int = 1) -> DataFrame: """ Aggregate the columns of this DataFrame to their standard deviation value. @@ -8995,9 +8990,9 @@ def std(self, ddof: int = 1) -> Self: │ 0.816497 ┆ 0.816497 ┆ null │ └──────────┴──────────┴──────┘ """ - return self.lazy().std(ddof).collect(_eager=True) # type: ignore[return-value] + return self.lazy().std(ddof).collect(_eager=True) - def var(self, ddof: int = 1) -> Self: + def var(self, ddof: int = 1) -> DataFrame: """ Aggregate the columns of this DataFrame to their variance value. @@ -9036,9 +9031,9 @@ def var(self, ddof: int = 1) -> Self: │ 0.666667 ┆ 0.666667 ┆ null │ └──────────┴──────────┴──────┘ """ - return self.lazy().var(ddof).collect(_eager=True) # type: ignore[return-value] + return self.lazy().var(ddof).collect(_eager=True) - def median(self) -> Self: + def median(self) -> DataFrame: """ Aggregate the columns of this DataFrame to their median value. @@ -9061,7 +9056,7 @@ def median(self) -> Self: │ 2.0 ┆ 7.0 ┆ null │ └─────┴─────┴──────┘ """ - return self.lazy().median().collect(_eager=True) # type: ignore[return-value] + return self.lazy().median().collect(_eager=True) def product(self) -> DataFrame: """ @@ -9098,7 +9093,7 @@ def product(self) -> DataFrame: def quantile( self, quantile: float, interpolation: RollingInterpolationMethod = "nearest" - ) -> Self: + ) -> DataFrame: """ Aggregate the columns of this DataFrame to their quantile value. @@ -9128,7 +9123,7 @@ def quantile( │ 2.0 ┆ 7.0 ┆ null │ └─────┴─────┴──────┘ """ - return self.lazy().quantile(quantile, interpolation).collect(_eager=True) # type: ignore[return-value] + return self.lazy().quantile(quantile, interpolation).collect(_eager=True) def to_dummies( self, @@ -9136,7 +9131,7 @@ def to_dummies( *, separator: str = "_", drop_first: bool = False, - ) -> Self: + ) -> DataFrame: """ Convert categorical variables into dummy/indicator variables. @@ -9396,7 +9391,7 @@ def approx_n_unique(self) -> DataFrame: """ return self.lazy().approx_n_unique().collect(_eager=True) - def rechunk(self) -> Self: + def rechunk(self) -> DataFrame: """ Rechunk the data in this DataFrame to a contiguous allocation. @@ -9405,7 +9400,7 @@ def rechunk(self) -> Self: """ return self._from_pydf(self._df.rechunk()) - def null_count(self) -> Self: + def null_count(self) -> DataFrame: """ Create a new DataFrame that shows the null counts per column. @@ -9438,7 +9433,7 @@ def sample( with_replacement: bool = False, shuffle: bool = False, seed: int | None = None, - ) -> Self: + ) -> DataFrame: """ Sample from this DataFrame. @@ -10145,7 +10140,7 @@ def iter_slices(self, n_rows: int = 10_000) -> Iterator[DataFrame]: for offset in range(0, self.height, n_rows): yield self.slice(offset, n_rows) - def shrink_to_fit(self, *, in_place: bool = False) -> Self: + def shrink_to_fit(self, *, in_place: bool = False) -> DataFrame: """ Shrink DataFrame memory usage. @@ -10326,7 +10321,7 @@ def unnest( self, columns: ColumnNameOrSelector | Collection[ColumnNameOrSelector], *more_columns: ColumnNameOrSelector, - ) -> Self: + ) -> DataFrame: """ Decompose struct columns into separate columns for each of their fields. @@ -10687,7 +10682,7 @@ def melt( value_vars: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, variable_name: str | None = None, value_name: str | None = None, - ) -> Self: + ) -> DataFrame: """ Unpivot a DataFrame from wide to long format. diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 9c084b1d6affe..1d898536b36c8 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -314,7 +314,7 @@ def __init__( ) @classmethod - def _from_pyldf(cls, ldf: PyLazyFrame) -> Self: + def _from_pyldf(cls, ldf: PyLazyFrame) -> LazyFrame: self = cls.__new__(cls) self._ldf = ldf return self @@ -333,7 +333,7 @@ def _scan_python_function( scan_fn: Any, *, pyarrow: bool = False, - ) -> Self: + ) -> LazyFrame: self = cls.__new__(cls) if isinstance(schema, Mapping): self._ldf = PyLazyFrame.scan_from_python_function_pl_schema( @@ -346,7 +346,7 @@ def _scan_python_function( return self @classmethod - def deserialize(cls, source: str | Path | IOBase) -> Self: + def deserialize(cls, source: str | Path | IOBase) -> LazyFrame: """ Read a logical plan from a file to construct a LazyFrame. @@ -580,10 +580,10 @@ def __le__(self, other: Any) -> NoReturn: def __contains__(self, key: str) -> bool: return key in self.collect_schema() - def __copy__(self) -> Self: + def __copy__(self) -> LazyFrame: return self.clone() - def __deepcopy__(self, memo: None = None) -> Self: + def __deepcopy__(self, memo: None = None) -> LazyFrame: return self.clone() def __getitem__(self, item: int | range | slice) -> LazyFrame: @@ -1191,7 +1191,7 @@ def show_graph( plt.show() return None - def inspect(self, fmt: str = "{}") -> Self: + def inspect(self, fmt: str = "{}") -> LazyFrame: """ Inspect a node in the computation graph. @@ -1225,7 +1225,7 @@ def sort( nulls_last: bool | Sequence[bool] = False, maintain_order: bool = False, multithreaded: bool = True, - ) -> Self: + ) -> LazyFrame: """ Sort the LazyFrame by the given columns. @@ -1331,7 +1331,7 @@ def sort( ) ) - def sql(self, query: str, *, table_name: str = "self") -> Self: + def sql(self, query: str, *, table_name: str = "self") -> LazyFrame: """ Execute a SQL query against the LazyFrame. @@ -1413,7 +1413,7 @@ def sql(self, query: str, *, table_name: str = "self") -> Self: with SQLContext(register_globals=False, eager=False) as ctx: name = table_name if table_name else "self" ctx.register(name=name, frame=self) - return ctx.execute(query) # type: ignore[return-value] + return ctx.execute(query) @deprecate_renamed_parameter("descending", "reverse", version="1.0.0") def top_k( @@ -1422,7 +1422,7 @@ def top_k( *, by: IntoExpr | Iterable[IntoExpr], reverse: bool | Sequence[bool] = False, - ) -> Self: + ) -> LazyFrame: """ Return the `k` largest rows. @@ -1497,7 +1497,7 @@ def bottom_k( *, by: IntoExpr | Iterable[IntoExpr], reverse: bool | Sequence[bool] = False, - ) -> Self: + ) -> LazyFrame: """ Return the `k` smallest rows. @@ -2643,7 +2643,7 @@ def fetch( ) return wrap_df(lf.fetch(n_rows)) - def lazy(self) -> Self: + def lazy(self) -> LazyFrame: """ Return lazy representation, i.e. itself. @@ -2668,7 +2668,7 @@ def lazy(self) -> Self: """ return self - def cache(self) -> Self: + def cache(self) -> LazyFrame: """ Cache the result once the execution of the physical plan hits this node. @@ -2684,7 +2684,7 @@ def cast( ), *, strict: bool = True, - ) -> Self: + ) -> LazyFrame: """ Cast LazyFrame column(s) to the specified dtype(s). @@ -2823,7 +2823,7 @@ def clear(self, n: int = 0) -> LazyFrame: """ return pl.DataFrame(schema=self.collect_schema()).clear(n).lazy() - def clone(self) -> Self: + def clone(self) -> LazyFrame: """ Create a copy of this LazyFrame. @@ -2858,7 +2858,7 @@ def filter( | np.ndarray[Any, Any] ), **constraints: Any, - ) -> Self: + ) -> LazyFrame: """ Filter the rows in the LazyFrame based on a predicate expression. @@ -2960,7 +2960,7 @@ def filter( # note: identify masks separately from predicates for p in predicates: if p is False: # immediately disallows all rows - return self.clear() # type: ignore[return-value] + return self.clear() elif p is True: continue # no-op; matches all rows if _is_generator(p): @@ -3018,7 +3018,7 @@ def filter( def select( self, *exprs: IntoExpr | Iterable[IntoExpr], **named_exprs: IntoExpr - ) -> Self: + ) -> LazyFrame: """ Select columns from this LazyFrame. @@ -3127,7 +3127,7 @@ def select( def select_seq( self, *exprs: IntoExpr | Iterable[IntoExpr], **named_exprs: IntoExpr - ) -> Self: + ) -> LazyFrame: """ Select columns from this LazyFrame. @@ -3740,7 +3740,7 @@ def join_asof( allow_parallel: bool = True, force_parallel: bool = False, coalesce: bool | None = None, - ) -> Self: + ) -> LazyFrame: """ Perform an asof join. @@ -3933,7 +3933,7 @@ def join( coalesce: bool | None = None, allow_parallel: bool = True, force_parallel: bool = False, - ) -> Self: + ) -> LazyFrame: """ Add a join operation to the Logical Plan. @@ -4137,7 +4137,7 @@ def with_columns( self, *exprs: IntoExpr | Iterable[IntoExpr], **named_exprs: IntoExpr, - ) -> Self: + ) -> LazyFrame: """ Add columns to this LazyFrame. @@ -4290,7 +4290,7 @@ def with_columns_seq( self, *exprs: IntoExpr | Iterable[IntoExpr], **named_exprs: IntoExpr, - ) -> Self: + ) -> LazyFrame: """ Add columns to this LazyFrame. @@ -4328,7 +4328,7 @@ def with_columns_seq( @deprecate_function( "Use `pl.concat(..., how='horizontal')` instead.", version="1.0.0" ) - def with_context(self, other: Self | list[Self]) -> Self: + def with_context(self, other: Self | list[Self]) -> LazyFrame: """ Add an external context to the computation graph. @@ -4394,7 +4394,7 @@ def drop( self, *columns: ColumnNameOrSelector | Iterable[ColumnNameOrSelector], strict: bool = True, - ) -> Self: + ) -> LazyFrame: """ Remove columns from the DataFrame. @@ -4462,7 +4462,7 @@ def drop( drop_cols = _expand_selectors(self, *columns) return self._from_pyldf(self._ldf.drop(drop_cols, strict=strict)) - def rename(self, mapping: dict[str, str] | Callable[[str], str]) -> Self: + def rename(self, mapping: dict[str, str] | Callable[[str], str]) -> LazyFrame: """ Rename column names. @@ -4516,7 +4516,7 @@ def rename(self, mapping: dict[str, str] | Callable[[str], str]) -> Self: new = list(mapping.values()) return self._from_pyldf(self._ldf.rename(existing, new)) - def reverse(self) -> Self: + def reverse(self) -> LazyFrame: """ Reverse the DataFrame. @@ -4544,7 +4544,7 @@ def reverse(self) -> Self: def shift( self, n: int | IntoExprColumn = 1, *, fill_value: IntoExpr | None = None - ) -> Self: + ) -> LazyFrame: """ Shift values by the given number of indices. @@ -4620,7 +4620,7 @@ def shift( n = parse_into_expression(n) return self._from_pyldf(self._ldf.shift(n, fill_value)) - def slice(self, offset: int, length: int | None = None) -> Self: + def slice(self, offset: int, length: int | None = None) -> LazyFrame: """ Get a slice of this DataFrame. @@ -4657,7 +4657,7 @@ def slice(self, offset: int, length: int | None = None) -> Self: raise ValueError(msg) return self._from_pyldf(self._ldf.slice(offset, length)) - def limit(self, n: int = 5) -> Self: + def limit(self, n: int = 5) -> LazyFrame: """ Get the first `n` rows. @@ -4708,7 +4708,7 @@ def limit(self, n: int = 5) -> Self: """ return self.head(n) - def head(self, n: int = 5) -> Self: + def head(self, n: int = 5) -> LazyFrame: """ Get the first `n` rows. @@ -4757,7 +4757,7 @@ def head(self, n: int = 5) -> Self: """ return self.slice(0, n) - def tail(self, n: int = 5) -> Self: + def tail(self, n: int = 5) -> LazyFrame: """ Get the last `n` rows. @@ -4800,7 +4800,7 @@ def tail(self, n: int = 5) -> Self: """ return self._from_pyldf(self._ldf.tail(n)) - def last(self) -> Self: + def last(self) -> LazyFrame: """ Get the last row of the DataFrame. @@ -4824,7 +4824,7 @@ def last(self) -> Self: """ return self.tail(1) - def first(self) -> Self: + def first(self) -> LazyFrame: """ Get the first row of the DataFrame. @@ -4851,7 +4851,7 @@ def first(self) -> Self: @deprecate_function( "Use `select(pl.all().approx_n_unique())` instead.", version="0.20.11" ) - def approx_n_unique(self) -> Self: + def approx_n_unique(self) -> LazyFrame: """ Approximate count of unique values. @@ -4880,7 +4880,7 @@ def approx_n_unique(self) -> Self: """ return self.select(F.all().approx_n_unique()) - def with_row_index(self, name: str = "index", offset: int = 0) -> Self: + def with_row_index(self, name: str = "index", offset: int = 0) -> LazyFrame: """ Add a row index as the first column in the LazyFrame. @@ -4962,7 +4962,7 @@ def with_row_index(self, name: str = "index", offset: int = 0) -> Self: " Note that the default column name has changed from 'row_nr' to 'index'.", version="0.20.4", ) - def with_row_count(self, name: str = "row_nr", offset: int = 0) -> Self: + def with_row_count(self, name: str = "row_nr", offset: int = 0) -> LazyFrame: """ Add a column at index 0 that counts the rows. @@ -5004,7 +5004,7 @@ def with_row_count(self, name: str = "row_nr", offset: int = 0) -> Self: """ return self.with_row_index(name, offset) - def gather_every(self, n: int, offset: int = 0) -> Self: + def gather_every(self, n: int, offset: int = 0) -> LazyFrame: """ Take every nth row in the LazyFrame and return as a new LazyFrame. @@ -5053,7 +5053,7 @@ def fill_null( limit: int | None = None, *, matches_supertype: bool = True, - ) -> Self: + ) -> LazyFrame: """ Fill null values using the specified value or strategy. @@ -5178,7 +5178,7 @@ def infer_dtype(value: Any) -> PolarsDataType: return self.select(F.all().fill_null(value, strategy, limit)) - def fill_nan(self, value: int | float | Expr | None) -> Self: + def fill_nan(self, value: int | float | Expr | None) -> LazyFrame: """ Fill floating point NaN values. @@ -5221,7 +5221,7 @@ def fill_nan(self, value: int | float | Expr | None) -> Self: value = F.lit(value) return self._from_pyldf(self._ldf.fill_nan(value._pyexpr)) - def std(self, ddof: int = 1) -> Self: + def std(self, ddof: int = 1) -> LazyFrame: """ Aggregate the columns in the LazyFrame to their standard deviation value. @@ -5261,7 +5261,7 @@ def std(self, ddof: int = 1) -> Self: """ return self._from_pyldf(self._ldf.std(ddof)) - def var(self, ddof: int = 1) -> Self: + def var(self, ddof: int = 1) -> LazyFrame: """ Aggregate the columns in the LazyFrame to their variance value. @@ -5301,7 +5301,7 @@ def var(self, ddof: int = 1) -> Self: """ return self._from_pyldf(self._ldf.var(ddof)) - def max(self) -> Self: + def max(self) -> LazyFrame: """ Aggregate the columns in the LazyFrame to their maximum value. @@ -5325,7 +5325,7 @@ def max(self) -> Self: """ return self._from_pyldf(self._ldf.max()) - def min(self) -> Self: + def min(self) -> LazyFrame: """ Aggregate the columns in the LazyFrame to their minimum value. @@ -5349,7 +5349,7 @@ def min(self) -> Self: """ return self._from_pyldf(self._ldf.min()) - def sum(self) -> Self: + def sum(self) -> LazyFrame: """ Aggregate the columns in the LazyFrame to their sum value. @@ -5373,7 +5373,7 @@ def sum(self) -> Self: """ return self._from_pyldf(self._ldf.sum()) - def mean(self) -> Self: + def mean(self) -> LazyFrame: """ Aggregate the columns in the LazyFrame to their mean value. @@ -5397,7 +5397,7 @@ def mean(self) -> Self: """ return self._from_pyldf(self._ldf.mean()) - def median(self) -> Self: + def median(self) -> LazyFrame: """ Aggregate the columns in the LazyFrame to their median value. @@ -5421,7 +5421,7 @@ def median(self) -> Self: """ return self._from_pyldf(self._ldf.median()) - def null_count(self) -> Self: + def null_count(self) -> LazyFrame: """ Aggregate the columns in the LazyFrame as the sum of their null value count. @@ -5450,7 +5450,7 @@ def quantile( self, quantile: float | Expr, interpolation: RollingInterpolationMethod = "nearest", - ) -> Self: + ) -> LazyFrame: """ Aggregate the columns in the LazyFrame to their quantile value. @@ -5486,7 +5486,7 @@ def explode( self, columns: str | Expr | Sequence[str | Expr], *more_columns: str | Expr, - ) -> Self: + ) -> LazyFrame: """ Explode the DataFrame to long format by exploding the given columns. @@ -5534,7 +5534,7 @@ def unique( *, keep: UniqueKeepStrategy = "any", maintain_order: bool = False, - ) -> Self: + ) -> LazyFrame: """ Drop duplicate rows from this DataFrame. @@ -5615,7 +5615,7 @@ def unique( def drop_nulls( self, subset: ColumnNameOrSelector | Collection[ColumnNameOrSelector] | None = None, - ) -> Self: + ) -> LazyFrame: """ Drop all rows that contain null values. @@ -5717,7 +5717,7 @@ def unpivot( variable_name: str | None = None, value_name: str | None = None, streamable: bool = True, - ) -> Self: + ) -> LazyFrame: """ Unpivot a DataFrame from wide to long format. @@ -5793,7 +5793,7 @@ def map_batches( schema: None | SchemaDict = None, validate_output_schema: bool = True, streamable: bool = False, - ) -> Self: + ) -> LazyFrame: """ Apply a custom function. @@ -5880,7 +5880,7 @@ def map_batches( ) ) - def interpolate(self) -> Self: + def interpolate(self) -> LazyFrame: """ Interpolate intermediate values. The interpolation method is linear. @@ -5912,7 +5912,7 @@ def unnest( self, columns: ColumnNameOrSelector | Collection[ColumnNameOrSelector], *more_columns: ColumnNameOrSelector, - ) -> Self: + ) -> LazyFrame: """ Decompose struct columns into separate columns for each of their fields. @@ -5962,7 +5962,7 @@ def unnest( columns = _expand_selectors(self, columns, *more_columns) return self._from_pyldf(self._ldf.unnest(columns)) - def merge_sorted(self, other: LazyFrame, key: str) -> Self: + def merge_sorted(self, other: LazyFrame, key: str) -> LazyFrame: """ Take two sorted DataFrames and merge them by the sorted key. @@ -6033,7 +6033,7 @@ def set_sorted( column: str, *, descending: bool = False, - ) -> Self: + ) -> LazyFrame: """ Indicate that one or multiple columns are sorted. @@ -6069,7 +6069,7 @@ def update( left_on: str | Sequence[str] | None = None, right_on: str | Sequence[str] | None = None, include_nulls: bool = False, - ) -> Self: + ) -> LazyFrame: """ Update the values in this `LazyFrame` with the values in `other`. @@ -6292,7 +6292,7 @@ def update( return self._from_pyldf(result._ldf) - def count(self) -> Self: + def count(self) -> LazyFrame: """ Return the number of non-null elements for each column. @@ -6325,7 +6325,7 @@ def melt( value_name: str | None = None, *, streamable: bool = True, - ) -> Self: + ) -> LazyFrame: """ Unpivot a DataFrame from wide to long format.