diff --git a/py-polars/tests/unit/io/files/test_empty_rows.xlsx b/py-polars/tests/unit/io/files/test_empty_rows.xlsx new file mode 100644 index 0000000000000..1fc27ee4c3e48 Binary files /dev/null and b/py-polars/tests/unit/io/files/test_empty_rows.xlsx differ diff --git a/py-polars/tests/unit/io/test_spreadsheet.py b/py-polars/tests/unit/io/test_spreadsheet.py index 8869affa1e1bd..6dc9a56ca9ab0 100644 --- a/py-polars/tests/unit/io/test_spreadsheet.py +++ b/py-polars/tests/unit/io/test_spreadsheet.py @@ -80,6 +80,10 @@ def path_ods_empty(io_files_path: Path) -> Path: def path_ods_mixed(io_files_path: Path) -> Path: return io_files_path / "mixed.ods" +@pytest.fixture() +def path_empty_rows_excel(io_files_path: Path) -> Path: + return io_files_path / "test_empty_rows.xlsx" + @pytest.mark.parametrize( ("read_spreadsheet", "source", "engine_params"), @@ -1058,3 +1062,25 @@ def test_identify_workbook( bytesio_data = BytesIO(f.read()) assert _identify_workbook(bytesio_data) == file_type assert isinstance(pl.read_excel(bytesio_data, engine="calamine"), pl.DataFrame) + +def test_drop_empty_rows(path_empty_rows_excel: Path): + df1 = pl.read_excel(source=path_empty_rows_excel, engine="xlsx2csv") + assert df1.shape == (8, 4) + df2 = pl.read_excel(source=path_empty_rows_excel, engine="xlsx2csv", drop_empty_rows=True) + assert df2.shape == (8, 4) + df3 = pl.read_excel(source=path_empty_rows_excel, engine="xlsx2csv", drop_empty_rows=False) + assert df3.shape == (10, 4) + + df4 = pl.read_excel(source=path_empty_rows_excel, engine="openpyxl") + assert df4.shape == (8, 4) + df5 = pl.read_excel(source=path_empty_rows_excel, engine="openpyxl", drop_empty_rows=True) + assert df5.shape == (8, 4) + df6 = pl.read_excel(source=path_empty_rows_excel, engine="openpyxl", drop_empty_rows=False) + assert df6.shape == (10, 4) + + df7 = pl.read_excel(source=path_empty_rows_excel, engine="calamine") + assert df7.shape == (8, 4) + df8 = pl.read_excel(source=path_empty_rows_excel, engine="calamine", drop_empty_rows=True) + assert df8.shape == (8, 4) + df9 = pl.read_excel(source=path_empty_rows_excel, engine="calamine", drop_empty_rows=False) + assert df9.shape == (10, 4)