Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(rust!): Change DataFrame.is_empty() to check height == 0 instead of width == 0 #16351

Merged
merged 5 commits into from
May 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion crates/polars-core/src/frame/explode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ impl DataFrame {
pub fn explode_impl(&self, mut columns: Vec<Series>) -> PolarsResult<DataFrame> {
polars_ensure!(!columns.is_empty(), InvalidOperation: "no columns provided in explode");
let mut df = self.clone();
if self.height() == 0 {
if self.is_empty() {
for s in &columns {
df.with_column(s.explode()?)?;
}
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/frame/group_by/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ impl DataFrame {
.cloned()
.collect::<Vec<_>>();
if by.is_empty() {
let groups = if self.height() == 0 {
let groups = if self.is_empty() {
vec![]
} else {
vec![[0, self.height() as IdxSize]]
Expand Down
12 changes: 6 additions & 6 deletions crates/polars-core/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -729,7 +729,7 @@ impl DataFrame {
self.shape().0
}

/// Check if the [`DataFrame`] is empty.
/// Returns `true` if the [`DataFrame`] contains no rows.
///
/// # Example
///
Expand All @@ -744,7 +744,7 @@ impl DataFrame {
/// # Ok::<(), PolarsError>(())
/// ```
pub fn is_empty(&self) -> bool {
self.columns.is_empty()
self.height() == 0
}

/// Add columns horizontally.
Expand Down Expand Up @@ -779,7 +779,7 @@ impl DataFrame {
// this DataFrame is already modified when an error occurs.
for col in columns {
polars_ensure!(
col.len() == self.height() || self.height() == 0,
col.len() == self.height() || self.is_empty(),
ShapeMismatch: "unable to hstack Series of length {} and DataFrame of height {}",
col.len(), self.height(),
);
Expand Down Expand Up @@ -1146,7 +1146,7 @@ impl DataFrame {
series = series.new_from_index(0, height);
}

if series.len() == height || df.is_empty() {
if series.len() == height || df.get_columns().is_empty() {
df.add_column_by_search(series)?;
Ok(df)
}
Expand Down Expand Up @@ -1219,7 +1219,7 @@ impl DataFrame {
series = series.new_from_index(0, height);
}

if series.len() == height || self.is_empty() {
if series.len() == height || self.columns.is_empty() {
self.add_column_by_schema(series, schema)?;
Ok(self)
}
Expand Down Expand Up @@ -1795,7 +1795,7 @@ impl DataFrame {
});
};

if self.height() == 0 {
if self.is_empty() {
let mut out = self.clone();
set_sorted(&mut out);

Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/utils/flatten.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ pub fn flatten_df_iter(df: &DataFrame) -> impl Iterator<Item = DataFrame> + '_ {
})
.collect();
let df = unsafe { DataFrame::new_no_checks(columns) };
if df.height() == 0 {
if df.is_empty() {
None
} else {
Some(df)
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ pub fn split_df_as_ref(df: &DataFrame, target: usize, strict: bool) -> Vec<DataF
/// Split a [`DataFrame`] into `n` parts. We take a `&mut` to be able to repartition/align chunks.
/// `strict` in that it respects `n` even if the chunks are suboptimal.
pub fn split_df(df: &mut DataFrame, target: usize) -> Vec<DataFrame> {
if target == 0 || df.height() == 0 {
if target == 0 || df.is_empty() {
return vec![df.clone()];
}
// make sure that chunks are aligned.
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-expr/src/expressions/window.rs
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ impl PhysicalExpr for WindowExpr {

// 4. select the final column and return

if df.height() == 0 {
if df.is_empty() {
let field = self.phys_function.to_field(&df.schema())?;
return Ok(Series::full_null(field.name(), 0, field.data_type()));
}
Expand Down
3 changes: 1 addition & 2 deletions crates/polars-io/src/csv/write/write_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,7 @@ pub(crate) fn write<W: Write>(
let cols = unsafe { std::mem::transmute::<&[Series], &[Series]>(cols) };
let mut write_buffer = write_buffer_pool.get();

// don't use df.empty, won't work if there are columns.
if df.height() == 0 {
if df.is_empty() {
return Ok(write_buffer);
}

Expand Down
2 changes: 1 addition & 1 deletion crates/polars-io/src/parquet/read/read_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -727,7 +727,7 @@ impl BatchedParquetReader {
skipped_all_rgs |= dfs.is_empty();
for mut df in dfs {
// make sure that the chunks are not too large
let n = df.shape().0 / self.chunk_size;
let n = df.height() / self.chunk_size;
if n > 1 {
for df in split_df(&mut df, n) {
self.chunks_fifo.push_back(df)
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-io/src/predicates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ pub fn apply_predicate(
predicate: Option<&dyn PhysicalIoExpr>,
parallel: bool,
) -> PolarsResult<()> {
if let (Some(predicate), false) = (&predicate, df.is_empty()) {
if let (Some(predicate), false) = (&predicate, df.get_columns().is_empty()) {
let s = predicate.evaluate_io(df)?;
let mask = s.bool().expect("filter predicates was not of type boolean");

Expand Down
12 changes: 2 additions & 10 deletions crates/polars-lazy/src/physical_plan/executors/projection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,7 @@ impl ProjectionExec {
self.has_windows,
self.options.run_parallel,
)?;
check_expand_literals(
selected_cols,
df.height() == 0,
self.options.duplicate_check,
)
check_expand_literals(selected_cols, df.is_empty(), self.options.duplicate_check)
});

let df = POOL.install(|| iter.collect::<PolarsResult<Vec<_>>>())?;
Expand All @@ -60,11 +56,7 @@ impl ProjectionExec {
self.has_windows,
self.options.run_parallel,
)?;
check_expand_literals(
selected_cols,
df.height() == 0,
self.options.duplicate_check,
)?
check_expand_literals(selected_cols, df.is_empty(), self.options.duplicate_check)?
};

// this only runs during testing and check if the runtime type matches the predicted schema
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-lazy/src/physical_plan/executors/unique.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ impl Executor for UniqueExec {

state.record(
|| {
if df.height() == 0 {
if df.is_empty() {
return Ok(df);
}

Expand Down
2 changes: 1 addition & 1 deletion crates/polars-pipe/src/operators/chunks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ impl DataChunk {
Self::new(self.chunk_index, data)
}
pub(crate) fn is_empty(&self) -> bool {
self.data.height() == 0
self.data.is_empty()
}
}

Expand Down
2 changes: 1 addition & 1 deletion crates/polars/tests/it/lazy/predicate_queries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ fn test_binaryexpr_pushdown_left_join_9506() -> PolarsResult<()> {
}?;
let df = df1.lazy().left_join(df2.lazy(), col("b"), col("b"));
let out = df.filter(col("c").eq(lit("c2"))).collect()?;
assert!(out.height() == 0);
assert!(out.is_empty());
Ok(())
}

Expand Down
4 changes: 2 additions & 2 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -10379,7 +10379,7 @@ def interpolate(self) -> DataFrame:

def is_empty(self) -> bool:
"""
Check if the dataframe is empty.
Returns `True` if the DataFrame contains no rows.

Examples
--------
Expand All @@ -10389,7 +10389,7 @@ def is_empty(self) -> bool:
>>> df.filter(pl.col("foo") > 99).is_empty()
True
"""
return self.height == 0
return self._df.is_empty()

def to_struct(self, name: str = "") -> Series:
"""
Expand Down
4 changes: 4 additions & 0 deletions py-polars/src/dataframe/general.rs
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,10 @@ impl PyDataFrame {
self.df.width()
}

pub fn is_empty(&self) -> bool {
self.df.is_empty()
}

pub fn hstack(&self, columns: Vec<PySeries>) -> PyResult<Self> {
let columns = columns.to_series();
let df = self.df.hstack(&columns).map_err(PyPolarsErr::from)?;
Expand Down
9 changes: 2 additions & 7 deletions py-polars/src/interop/numpy/to_numpy_df.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,7 @@ fn df_to_numpy(
writable: bool,
allow_copy: bool,
) -> PyResult<PyObject> {
// TODO: Use `is_empty` when fixed:
// https://github.com/pola-rs/polars/pull/16351
if df.height() == 0 {
if df.is_empty() {
// Take this path to ensure a writable array.
// This does not actually copy data for an empty DataFrame.
return df_to_numpy_with_copy(py, df, order, true);
Expand Down Expand Up @@ -76,10 +74,7 @@ fn df_to_numpy(
}

fn try_df_to_numpy_view(py: Python, df: &DataFrame) -> Option<PyObject> {
if df.is_empty() {
return None;
}
let first = df.get_columns().first().unwrap().dtype();
let first = df.get_columns().first()?.dtype();
// TODO: Support Datetime/Duration/Array types
if !first.is_numeric() {
return None;
Expand Down