From e5c8153efffe136c9ec47138416c25462dfc9c8f Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Sat, 12 Oct 2024 02:19:47 +0400 Subject: [PATCH] feat(python): Add 'drop_empty_rows' parameter for `read_ods` --- py-polars/polars/io/spreadsheet/functions.py | 29 +++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/py-polars/polars/io/spreadsheet/functions.py b/py-polars/polars/io/spreadsheet/functions.py index a723aca88ce9..8b3df004f4b3 100644 --- a/py-polars/polars/io/spreadsheet/functions.py +++ b/py-polars/polars/io/spreadsheet/functions.py @@ -55,8 +55,8 @@ def read_excel( columns: Sequence[int] | Sequence[str] | None = ..., schema_overrides: SchemaDict | None = ..., infer_schema_length: int | None = ..., - raise_if_empty: bool = ..., drop_empty_rows: bool = ..., + raise_if_empty: bool = ..., ) -> pl.DataFrame: ... @@ -73,8 +73,8 @@ def read_excel( columns: Sequence[int] | Sequence[str] | None = ..., schema_overrides: SchemaDict | None = ..., infer_schema_length: int | None = ..., - raise_if_empty: bool = ..., drop_empty_rows: bool = ..., + raise_if_empty: bool = ..., ) -> pl.DataFrame: ... @@ -91,8 +91,8 @@ def read_excel( columns: Sequence[int] | Sequence[str] | None = ..., schema_overrides: SchemaDict | None = ..., infer_schema_length: int | None = ..., - raise_if_empty: bool = ..., drop_empty_rows: bool = ..., + raise_if_empty: bool = ..., ) -> NoReturn: ... @@ -111,8 +111,8 @@ def read_excel( columns: Sequence[int] | Sequence[str] | None = ..., schema_overrides: SchemaDict | None = ..., infer_schema_length: int | None = ..., - raise_if_empty: bool = ..., drop_empty_rows: bool = ..., + raise_if_empty: bool = ..., ) -> dict[str, pl.DataFrame]: ... @@ -129,8 +129,8 @@ def read_excel( columns: Sequence[int] | Sequence[str] | None = ..., schema_overrides: SchemaDict | None = ..., infer_schema_length: int | None = ..., - raise_if_empty: bool = ..., drop_empty_rows: bool = ..., + raise_if_empty: bool = ..., ) -> pl.DataFrame: ... @@ -147,8 +147,8 @@ def read_excel( columns: Sequence[int] | Sequence[str] | None = ..., schema_overrides: SchemaDict | None = ..., infer_schema_length: int | None = ..., - raise_if_empty: bool = ..., drop_empty_rows: bool = ..., + raise_if_empty: bool = ..., ) -> dict[str, pl.DataFrame]: ... @@ -166,8 +166,8 @@ def read_excel( columns: Sequence[int] | Sequence[str] | None = None, schema_overrides: SchemaDict | None = None, infer_schema_length: int | None = N_INFER_DEFAULT, - raise_if_empty: bool = True, drop_empty_rows: bool = True, + raise_if_empty: bool = True, ) -> pl.DataFrame | dict[str, pl.DataFrame]: """ Read Excel spreadsheet data into a DataFrame. @@ -236,12 +236,11 @@ def read_excel( entire dataset is scanned to determine the dtypes, which can slow parsing for large workbooks. Note that only the "calamine" and "xlsx2csv" engines support this parameter. + drop_empty_rows + Indicate whether to omit empty rows when reading data into the DataFrame. raise_if_empty When there is no data in the sheet,`NoDataError` is raised. If this parameter is set to False, an empty DataFrame (with no columns) is returned instead. - drop_empty_rows - A boolean flag whether to drop empty rows or not from the dataframe. Default - is True. Returns ------- @@ -323,6 +322,7 @@ def read_ods( columns: Sequence[int] | Sequence[str] | None = ..., schema_overrides: SchemaDict | None = ..., infer_schema_length: int | None = ..., + drop_empty_rows: bool = ..., raise_if_empty: bool = ..., ) -> pl.DataFrame: ... @@ -337,6 +337,7 @@ def read_ods( columns: Sequence[int] | Sequence[str] | None = ..., schema_overrides: SchemaDict | None = ..., infer_schema_length: int | None = ..., + drop_empty_rows: bool = ..., raise_if_empty: bool = ..., ) -> pl.DataFrame: ... @@ -351,6 +352,7 @@ def read_ods( columns: Sequence[int] | Sequence[str] | None = ..., schema_overrides: SchemaDict | None = ..., infer_schema_length: int | None = ..., + drop_empty_rows: bool = ..., raise_if_empty: bool = ..., ) -> NoReturn: ... @@ -365,6 +367,7 @@ def read_ods( columns: Sequence[int] | Sequence[str] | None = ..., schema_overrides: SchemaDict | None = ..., infer_schema_length: int | None = ..., + drop_empty_rows: bool = ..., raise_if_empty: bool = ..., ) -> dict[str, pl.DataFrame]: ... @@ -379,6 +382,7 @@ def read_ods( columns: Sequence[int] | Sequence[str] | None = ..., schema_overrides: SchemaDict | None = ..., infer_schema_length: int | None = ..., + drop_empty_rows: bool = ..., raise_if_empty: bool = ..., ) -> pl.DataFrame: ... @@ -393,6 +397,7 @@ def read_ods( columns: Sequence[int] | Sequence[str] | None = ..., schema_overrides: SchemaDict | None = ..., infer_schema_length: int | None = ..., + drop_empty_rows: bool = ..., raise_if_empty: bool = ..., ) -> dict[str, pl.DataFrame]: ... @@ -406,6 +411,7 @@ def read_ods( columns: Sequence[int] | Sequence[str] | None = None, schema_overrides: SchemaDict | None = None, infer_schema_length: int | None = N_INFER_DEFAULT, + drop_empty_rows: bool = True, raise_if_empty: bool = True, ) -> pl.DataFrame | dict[str, pl.DataFrame]: """ @@ -440,6 +446,8 @@ def read_ods( The maximum number of rows to scan for schema inference. If set to `None`, the entire dataset is scanned to determine the dtypes, which can slow parsing for large workbooks. + drop_empty_rows + Indicate whether to omit empty rows when reading data into the DataFrame. raise_if_empty When there is no data in the sheet,`NoDataError` is raised. If this parameter is set to False, an empty DataFrame (with no columns) is returned instead. @@ -481,6 +489,7 @@ def read_ods( schema_overrides=schema_overrides, infer_schema_length=infer_schema_length, raise_if_empty=raise_if_empty, + drop_empty_rows=drop_empty_rows, has_header=has_header, columns=columns, )