Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf!: Default to writing binview data to IPC, mark compression argument as keyword-only #17084

Merged
merged 2 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion crates/polars-io/src/ipc/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ where
IpcWriter {
writer,
compression: None,
pl_flavor: false,
pl_flavor: true,
}
}

Expand Down
36 changes: 29 additions & 7 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3240,26 +3240,26 @@ def write_excel(
def write_ipc(
self,
file: None,
compression: IpcCompression = "uncompressed",
*,
future: bool = False,
compression: IpcCompression = "uncompressed",
future: bool | None = None,
) -> BytesIO: ...

@overload
def write_ipc(
self,
file: str | Path | IO[bytes],
compression: IpcCompression = "uncompressed",
*,
future: bool = False,
compression: IpcCompression = "uncompressed",
future: bool | None = None,
) -> None: ...

def write_ipc(
self,
file: str | Path | IO[bytes] | None,
compression: IpcCompression = "uncompressed",
*,
future: bool = False,
compression: IpcCompression = "uncompressed",
future: bool | None = None,
) -> BytesIO | None:
"""
Write to Arrow IPC binary stream or Feather file.
Expand Down Expand Up @@ -3308,6 +3308,8 @@ def write_ipc(
issue_unstable_warning(
"The `future` parameter of `DataFrame.write_ipc` is considered unstable."
)
if future is None:
future = True

self._df.write_ipc(file, compression, future)
return file if return_bytes else None # type: ignore[return-value]
Expand All @@ -3316,20 +3318,26 @@ def write_ipc(
def write_ipc_stream(
self,
file: None,
*,
compression: IpcCompression = "uncompressed",
future: bool | None = None,
) -> BytesIO: ...

@overload
def write_ipc_stream(
self,
file: str | Path | IO[bytes],
*,
compression: IpcCompression = "uncompressed",
future: bool | None = None,
) -> None: ...

def write_ipc_stream(
self,
file: str | Path | IO[bytes] | None,
*,
compression: IpcCompression = "uncompressed",
future: bool | None = None,
) -> BytesIO | None:
"""
Write to Arrow IPC record batch stream.
Expand All @@ -3343,6 +3351,13 @@ def write_ipc_stream(
be written. If set to `None`, the output is returned as a BytesIO object.
compression : {'uncompressed', 'lz4', 'zstd'}
Compression method. Defaults to "uncompressed".
future
Setting this to `True` will write Polars' internal data structures that
might not be available by other Arrow implementations.

.. warning::
This functionality is considered **unstable**. It may be changed
at any point without it being considered a breaking change.

Examples
--------
Expand All @@ -3367,7 +3382,14 @@ def write_ipc_stream(
if compression is None:
compression = "uncompressed"

self._df.write_ipc_stream(file, compression)
if future:
issue_unstable_warning(
"The `future` parameter of `DataFrame.write_ipc` is considered unstable."
)
if future is None:
future = True

self._df.write_ipc_stream(file, compression, future=future)
return file if return_bytes else None # type: ignore[return-value]

def write_parquet(
Expand Down
3 changes: 3 additions & 0 deletions py-polars/src/dataframe/io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -525,12 +525,14 @@ impl PyDataFrame {
py: Python,
py_f: PyObject,
compression: Wrap<Option<IpcCompression>>,
future: bool,
) -> PyResult<()> {
if let Ok(s) = py_f.extract::<PyBackedStr>(py) {
let f = std::fs::File::create(&*s)?;
py.allow_threads(|| {
IpcStreamWriter::new(f)
.with_compression(compression.0)
.with_pl_flavor(future)
.finish(&mut self.df)
.map_err(PyPolarsErr::from)
})?;
Expand All @@ -539,6 +541,7 @@ impl PyDataFrame {

IpcStreamWriter::new(&mut buf)
.with_compression(compression.0)
.with_pl_flavor(future)
.finish(&mut self.df)
.map_err(PyPolarsErr::from)?;
}
Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/io/test_ipc.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def test_compressed_simple(compression: IpcCompression, stream: bool) -> None:
df = pl.DataFrame({"a": [1, 2, 3], "b": [True, False, True], "c": ["a", "b", "c"]})

f = io.BytesIO()
write_ipc(df, stream, f, compression)
write_ipc(df, stream, f, compression=compression)
f.seek(0)

df_read = read_ipc(stream, f, use_pyarrow=False)
Expand Down