From 69b8440c7dc4b165faca1462f928d008ace989b9 Mon Sep 17 00:00:00 2001 From: Gijs Burghoorn Date: Thu, 20 Jun 2024 13:49:27 +0200 Subject: [PATCH] feat: print row index in explain + dot (#17074) --- crates/polars-plan/src/plans/ir/dot.rs | 33 ++++++++++++++--------- crates/polars-plan/src/plans/ir/format.rs | 29 +++++++++++--------- 2 files changed, 36 insertions(+), 26 deletions(-) diff --git a/crates/polars-plan/src/plans/ir/dot.rs b/crates/polars-plan/src/plans/ir/dot.rs index f38c7d3ebfda..43d7b158a013 100644 --- a/crates/polars-plan/src/plans/ir/dot.rs +++ b/crates/polars-plan/src/plans/ir/dot.rs @@ -255,15 +255,21 @@ impl<'a> IRDotDisplay<'a> { let path = PathsDisplay(paths.as_ref()); let with_columns = options.with_columns.as_ref().map(|cols| cols.as_ref()); let with_columns = NumColumns(with_columns); - let total_columns = file_info.schema.len(); - let predicate = predicate.as_ref().map(|e| self.display_expr(e)); - let predicate = OptionExprIRDisplay(predicate); + let total_columns = + file_info.schema.len() - usize::from(options.row_index.is_some()); write_label(f, id, |f| { - write!( - f, - "{name} SCAN {path}\nπ {with_columns}/{total_columns};\nσ {predicate}", - ) + write!(f, "{name} SCAN {path}\nπ {with_columns}/{total_columns};",)?; + + if let Some(predicate) = predicate.as_ref() { + write!(f, "\nσ {}", self.display_expr(predicate))?; + } + + if let Some(row_index) = options.row_index.as_ref() { + write!(f, "\nrow index: {} (+{})", row_index.name, row_index.offset)?; + } + + Ok(()) })?; }, Join { @@ -332,7 +338,7 @@ impl<'a> IRDotDisplay<'a> { } // A few utility structures for formatting -struct PathsDisplay<'a>(&'a [PathBuf]); +pub(crate) struct PathsDisplay<'a>(pub &'a [PathBuf]); struct NumColumns<'a>(Option<&'a [String]>); struct NumColumnsSchema<'a>(Option<&'a Schema>); struct OptionExprIRDisplay<'a>(Option>); @@ -340,13 +346,14 @@ struct OptionExprIRDisplay<'a>(Option>); impl fmt::Display for PathsDisplay<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self.0.len() { - 0 => Ok(()), - 1 => self.0[0].display().fmt(f), + 0 => write!(f, "[]"), + 1 => write!(f, "[{}]", self.0[0].display()), + 2 => write!(f, "[{}, {}]", self.0[0].display(), self.0[1].display()), _ => write!( f, - "{} files: first file: {}", - self.0.len(), - self.0[0].display() + "[{}, ... {} other files]", + self.0[0].to_string_lossy(), + self.0.len() - 1, ), } } diff --git a/crates/polars-plan/src/plans/ir/format.rs b/crates/polars-plan/src/plans/ir/format.rs index 78ecbb034ad6..dc5a6072f4c4 100644 --- a/crates/polars-plan/src/plans/ir/format.rs +++ b/crates/polars-plan/src/plans/ir/format.rs @@ -5,8 +5,10 @@ use std::path::PathBuf; use polars_core::datatypes::AnyValue; use polars_core::schema::Schema; +use polars_io::RowIndex; use recursive::recursive; +use super::ir::dot::PathsDisplay; use crate::prelude::*; pub struct IRDisplay<'a> { @@ -60,18 +62,11 @@ fn write_scan( total_columns: usize, predicate: &Option>, n_rows: Option, + row_index: Option<&RowIndex>, ) -> fmt::Result { - let path_fmt = match path.len() { - 1 => path[0].to_string_lossy(), - 0 => "".into(), - _ => Cow::Owned(format!( - "{} files: first file: {}", - path.len(), - path[0].to_string_lossy() - )), - }; - - write!(f, "{:indent$}{name} SCAN {path_fmt}", "")?; + write!(f, "{:indent$}{name} SCAN {}", "", PathsDisplay(path))?; + + let total_columns = total_columns - usize::from(row_index.is_some()); if n_columns > 0 { write!( f, @@ -87,6 +82,12 @@ fn write_scan( if let Some(n_rows) = n_rows { write!(f, "\n{:indent$}N_ROWS: {n_rows}", "")?; } + if let Some(row_index) = row_index { + write!(f, "\n{:indent$}ROW_INDEX: {}", "", row_index.name)?; + if row_index.offset != 0 { + write!(f, " (offset: {})", row_index.offset)?; + } + } Ok(()) } @@ -167,11 +168,12 @@ impl<'a> IRDisplay<'a> { f, "PYTHON", &[], - sub_indent, + indent, n_columns, total_columns, &predicate, options.n_rows, + None, ) }, Union { inputs, options } => { @@ -234,11 +236,12 @@ impl<'a> IRDisplay<'a> { f, scan_type.into(), paths, - sub_indent, + indent, n_columns, file_info.schema.len(), &predicate, file_options.n_rows, + file_options.row_index.as_ref(), ) }, Filter { predicate, input } => {