From f61594a7b55ddf351afe81cc8119c89cf46df60e Mon Sep 17 00:00:00 2001 From: Weijie Guo Date: Fri, 29 Mar 2024 15:49:05 +0800 Subject: [PATCH] fix: `sort` for series with unsupported dtype should raise instead of panic (#15385) --- .../src/chunked_array/array/iterator.rs | 23 +++++++++++++++++++ crates/polars-core/src/frame/mod.rs | 2 +- .../src/series/implementations/binary.rs | 4 ++-- .../series/implementations/binary_offset.rs | 4 ++-- .../src/series/implementations/boolean.rs | 4 ++-- .../src/series/implementations/categorical.rs | 4 ++-- .../src/series/implementations/dates_time.rs | 4 ++-- .../src/series/implementations/datetime.rs | 7 +++--- .../src/series/implementations/decimal.rs | 7 +++--- .../src/series/implementations/duration.rs | 7 +++--- .../src/series/implementations/floats.rs | 4 ++-- .../src/series/implementations/mod.rs | 4 ++-- .../src/series/implementations/string.rs | 4 ++-- .../src/series/implementations/struct_.rs | 22 ++++++++---------- crates/polars-core/src/series/mod.rs | 2 +- crates/polars-core/src/series/series_trait.rs | 4 ++-- .../src/physical_plan/expressions/sort.rs | 4 ++-- .../src/chunked_array/array/namespace.rs | 4 ++-- .../src/chunked_array/list/namespace.rs | 7 +++--- crates/polars-ops/src/series/ops/cut.rs | 2 +- .../src/executors/sinks/sort/sink.rs | 2 +- .../src/dsl/function_expr/array.rs | 2 +- .../polars-plan/src/dsl/function_expr/list.rs | 2 +- crates/polars-sql/tests/iss_8395.rs | 2 +- py-polars/src/series/mod.rs | 8 +++++-- 25 files changed, 83 insertions(+), 56 deletions(-) diff --git a/crates/polars-core/src/chunked_array/array/iterator.rs b/crates/polars-core/src/chunked_array/array/iterator.rs index 589b8996dc62..cbb38f954e5e 100644 --- a/crates/polars-core/src/chunked_array/array/iterator.rs +++ b/crates/polars-core/src/chunked_array/array/iterator.rs @@ -124,6 +124,29 @@ impl ArrayChunked { .collect_ca_with_dtype(self.name(), self.dtype().clone()) } + /// Try apply a closure `F` to each array. + /// + /// # Safety + /// Return series of `F` must has the same dtype and number of elements as input if it is Ok. + pub unsafe fn try_apply_amortized_same_type<'a, F>(&'a self, mut f: F) -> PolarsResult + where + F: FnMut(UnstableSeries<'a>) -> PolarsResult, + { + if self.is_empty() { + return Ok(self.clone()); + } + self.amortized_iter() + .map(|opt_v| { + opt_v + .map(|v| { + let out = f(v)?; + Ok(to_arr(&out)) + }) + .transpose() + }) + .try_collect_ca_with_dtype(self.name(), self.dtype().clone()) + } + /// Zip with a `ChunkedArray` then apply a binary function `F` elementwise. /// /// # Safety diff --git a/crates/polars-core/src/frame/mod.rs b/crates/polars-core/src/frame/mod.rs index 019fb6c342d8..0d8a696d190a 100644 --- a/crates/polars-core/src/frame/mod.rs +++ b/crates/polars-core/src/frame/mod.rs @@ -1843,7 +1843,7 @@ impl DataFrame { // no need to compute the sort indices and then take by these indices // simply sort and return as frame if df.width() == 1 && df.check_name_to_idx(s.name()).is_ok() { - let mut out = s.sort_with(options); + let mut out = s.sort_with(options)?; if let Some((offset, len)) = slice { out = out.slice(offset, len); } diff --git a/crates/polars-core/src/series/implementations/binary.rs b/crates/polars-core/src/series/implementations/binary.rs index 86705e8f9af3..b0da78d5141a 100644 --- a/crates/polars-core/src/series/implementations/binary.rs +++ b/crates/polars-core/src/series/implementations/binary.rs @@ -173,8 +173,8 @@ impl SeriesTrait for SeriesWrap { self.0.get_any_value_unchecked(index) } - fn sort_with(&self, options: SortOptions) -> Series { - ChunkSort::sort_with(&self.0, options).into_series() + fn sort_with(&self, options: SortOptions) -> PolarsResult { + Ok(ChunkSort::sort_with(&self.0, options).into_series()) } fn arg_sort(&self, options: SortOptions) -> IdxCa { diff --git a/crates/polars-core/src/series/implementations/binary_offset.rs b/crates/polars-core/src/series/implementations/binary_offset.rs index d0a5523c7d8c..8cea50d76212 100644 --- a/crates/polars-core/src/series/implementations/binary_offset.rs +++ b/crates/polars-core/src/series/implementations/binary_offset.rs @@ -136,8 +136,8 @@ impl SeriesTrait for SeriesWrap { self.0.get_any_value_unchecked(index) } - fn sort_with(&self, options: SortOptions) -> Series { - ChunkSort::sort_with(&self.0, options).into_series() + fn sort_with(&self, options: SortOptions) -> PolarsResult { + Ok(ChunkSort::sort_with(&self.0, options).into_series()) } fn arg_sort(&self, options: SortOptions) -> IdxCa { diff --git a/crates/polars-core/src/series/implementations/boolean.rs b/crates/polars-core/src/series/implementations/boolean.rs index 1aa17d298d3f..06f9f0920243 100644 --- a/crates/polars-core/src/series/implementations/boolean.rs +++ b/crates/polars-core/src/series/implementations/boolean.rs @@ -198,8 +198,8 @@ impl SeriesTrait for SeriesWrap { self.0.get_any_value_unchecked(index) } - fn sort_with(&self, options: SortOptions) -> Series { - ChunkSort::sort_with(&self.0, options).into_series() + fn sort_with(&self, options: SortOptions) -> PolarsResult { + Ok(ChunkSort::sort_with(&self.0, options).into_series()) } fn arg_sort(&self, options: SortOptions) -> IdxCa { diff --git a/crates/polars-core/src/series/implementations/categorical.rs b/crates/polars-core/src/series/implementations/categorical.rs index f9a23c261417..1efd20432d94 100644 --- a/crates/polars-core/src/series/implementations/categorical.rs +++ b/crates/polars-core/src/series/implementations/categorical.rs @@ -226,8 +226,8 @@ impl SeriesTrait for SeriesWrap { self.0.get_any_value_unchecked(index) } - fn sort_with(&self, options: SortOptions) -> Series { - self.0.sort_with(options).into_series() + fn sort_with(&self, options: SortOptions) -> PolarsResult { + Ok(self.0.sort_with(options).into_series()) } fn arg_sort(&self, options: SortOptions) -> IdxCa { diff --git a/crates/polars-core/src/series/implementations/dates_time.rs b/crates/polars-core/src/series/implementations/dates_time.rs index 5f5e993dcbbc..c0516e92ce28 100644 --- a/crates/polars-core/src/series/implementations/dates_time.rs +++ b/crates/polars-core/src/series/implementations/dates_time.rs @@ -266,8 +266,8 @@ macro_rules! impl_dyn_series { self.0.get_any_value_unchecked(index) } - fn sort_with(&self, options: SortOptions) -> Series { - self.0.sort_with(options).$into_logical().into_series() + fn sort_with(&self, options: SortOptions) -> PolarsResult { + Ok(self.0.sort_with(options).$into_logical().into_series()) } fn arg_sort(&self, options: SortOptions) -> IdxCa { diff --git a/crates/polars-core/src/series/implementations/datetime.rs b/crates/polars-core/src/series/implementations/datetime.rs index c4c8bfe1b47b..29accf0ac33e 100644 --- a/crates/polars-core/src/series/implementations/datetime.rs +++ b/crates/polars-core/src/series/implementations/datetime.rs @@ -266,11 +266,12 @@ impl SeriesTrait for SeriesWrap { self.0.get_any_value_unchecked(index) } - fn sort_with(&self, options: SortOptions) -> Series { - self.0 + fn sort_with(&self, options: SortOptions) -> PolarsResult { + Ok(self + .0 .sort_with(options) .into_datetime(self.0.time_unit(), self.0.time_zone().clone()) - .into_series() + .into_series()) } fn arg_sort(&self, options: SortOptions) -> IdxCa { diff --git a/crates/polars-core/src/series/implementations/decimal.rs b/crates/polars-core/src/series/implementations/decimal.rs index d50befa1059c..1c3e6f61566c 100644 --- a/crates/polars-core/src/series/implementations/decimal.rs +++ b/crates/polars-core/src/series/implementations/decimal.rs @@ -261,11 +261,12 @@ impl SeriesTrait for SeriesWrap { self.0.get_any_value_unchecked(index) } - fn sort_with(&self, options: SortOptions) -> Series { - self.0 + fn sort_with(&self, options: SortOptions) -> PolarsResult { + Ok(self + .0 .sort_with(options) .into_decimal_unchecked(self.0.precision(), self.0.scale()) - .into_series() + .into_series()) } fn arg_sort(&self, options: SortOptions) -> IdxCa { diff --git a/crates/polars-core/src/series/implementations/duration.rs b/crates/polars-core/src/series/implementations/duration.rs index 30e3f30857e0..0dfecd55de9c 100644 --- a/crates/polars-core/src/series/implementations/duration.rs +++ b/crates/polars-core/src/series/implementations/duration.rs @@ -326,11 +326,12 @@ impl SeriesTrait for SeriesWrap { self.0.get_any_value_unchecked(index) } - fn sort_with(&self, options: SortOptions) -> Series { - self.0 + fn sort_with(&self, options: SortOptions) -> PolarsResult { + Ok(self + .0 .sort_with(options) .into_duration(self.0.time_unit()) - .into_series() + .into_series()) } fn arg_sort(&self, options: SortOptions) -> IdxCa { diff --git a/crates/polars-core/src/series/implementations/floats.rs b/crates/polars-core/src/series/implementations/floats.rs index 3332649da16b..3f566463acfd 100644 --- a/crates/polars-core/src/series/implementations/floats.rs +++ b/crates/polars-core/src/series/implementations/floats.rs @@ -229,8 +229,8 @@ macro_rules! impl_dyn_series { self.0.get_any_value_unchecked(index) } - fn sort_with(&self, options: SortOptions) -> Series { - ChunkSort::sort_with(&self.0, options).into_series() + fn sort_with(&self, options: SortOptions) -> PolarsResult { + Ok(ChunkSort::sort_with(&self.0, options).into_series()) } fn arg_sort(&self, options: SortOptions) -> IdxCa { diff --git a/crates/polars-core/src/series/implementations/mod.rs b/crates/polars-core/src/series/implementations/mod.rs index ab51d751436a..6f589f1b61a1 100644 --- a/crates/polars-core/src/series/implementations/mod.rs +++ b/crates/polars-core/src/series/implementations/mod.rs @@ -332,8 +332,8 @@ macro_rules! impl_dyn_series { self.0.get_any_value_unchecked(index) } - fn sort_with(&self, options: SortOptions) -> Series { - ChunkSort::sort_with(&self.0, options).into_series() + fn sort_with(&self, options: SortOptions) -> PolarsResult { + Ok(ChunkSort::sort_with(&self.0, options).into_series()) } fn arg_sort(&self, options: SortOptions) -> IdxCa { diff --git a/crates/polars-core/src/series/implementations/string.rs b/crates/polars-core/src/series/implementations/string.rs index 9a8c1b1f6aa4..38a60a1be615 100644 --- a/crates/polars-core/src/series/implementations/string.rs +++ b/crates/polars-core/src/series/implementations/string.rs @@ -180,8 +180,8 @@ impl SeriesTrait for SeriesWrap { self.0.get_any_value_unchecked(index) } - fn sort_with(&self, options: SortOptions) -> Series { - ChunkSort::sort_with(&self.0, options).into_series() + fn sort_with(&self, options: SortOptions) -> PolarsResult { + Ok(ChunkSort::sort_with(&self.0, options).into_series()) } fn arg_sort(&self, options: SortOptions) -> IdxCa { diff --git a/crates/polars-core/src/series/implementations/struct_.rs b/crates/polars-core/src/series/implementations/struct_.rs index fcf85754aac7..219b88f36f7d 100644 --- a/crates/polars-core/src/series/implementations/struct_.rs +++ b/crates/polars-core/src/series/implementations/struct_.rs @@ -313,7 +313,7 @@ impl SeriesTrait for SeriesWrap { &self.0 } - fn sort_with(&self, options: SortOptions) -> Series { + fn sort_with(&self, options: SortOptions) -> PolarsResult { let df = self.0.clone().unnest(); let desc = if options.descending { @@ -321,17 +321,15 @@ impl SeriesTrait for SeriesWrap { } else { vec![false; df.width()] }; - let out = df - .sort_impl( - df.columns.clone(), - desc, - options.nulls_last, - options.maintain_order, - None, - options.multithreaded, - ) - .unwrap(); - StructChunked::new_unchecked(self.name(), &out.columns).into_series() + let out = df.sort_impl( + df.columns.clone(), + desc, + options.nulls_last, + options.maintain_order, + None, + options.multithreaded, + )?; + Ok(StructChunked::new_unchecked(self.name(), &out.columns).into_series()) } fn arg_sort(&self, options: SortOptions) -> IdxCa { diff --git a/crates/polars-core/src/series/mod.rs b/crates/polars-core/src/series/mod.rs index b72a83b4a3c9..2eb60f08788b 100644 --- a/crates/polars-core/src/series/mod.rs +++ b/crates/polars-core/src/series/mod.rs @@ -285,7 +285,7 @@ impl Series { Ok(self) } - pub fn sort(&self, descending: bool, nulls_last: bool) -> Self { + pub fn sort(&self, descending: bool, nulls_last: bool) -> PolarsResult { self.sort_with(SortOptions { descending, nulls_last, diff --git a/crates/polars-core/src/series/series_trait.rs b/crates/polars-core/src/series/series_trait.rs index b97ffc864257..ff4a7eca65a6 100644 --- a/crates/polars-core/src/series/series_trait.rs +++ b/crates/polars-core/src/series/series_trait.rs @@ -333,8 +333,8 @@ pub trait SeriesTrait: invalid_operation_panic!(get_unchecked, self) } - fn sort_with(&self, _options: SortOptions) -> Series { - invalid_operation_panic!(sort_with, self) + fn sort_with(&self, _options: SortOptions) -> PolarsResult { + polars_bail!(opq = sort_with, self._dtype()); } /// Retrieve the indexes needed for a sort. diff --git a/crates/polars-lazy/src/physical_plan/expressions/sort.rs b/crates/polars-lazy/src/physical_plan/expressions/sort.rs index 0df7d4b94ab9..207ad3c82915 100644 --- a/crates/polars-lazy/src/physical_plan/expressions/sort.rs +++ b/crates/polars-lazy/src/physical_plan/expressions/sort.rs @@ -51,7 +51,7 @@ impl PhysicalExpr for SortExpr { } fn evaluate(&self, df: &DataFrame, state: &ExecutionState) -> PolarsResult { let series = self.physical_expr.evaluate(df, state)?; - Ok(series.sort_with(self.options)) + series.sort_with(self.options) } #[allow(clippy::ptr_arg)] @@ -65,7 +65,7 @@ impl PhysicalExpr for SortExpr { match ac.agg_state() { AggState::AggregatedList(s) => { let ca = s.list().unwrap(); - let out = ca.lst_sort(self.options); + let out = ca.lst_sort(self.options)?; ac.with_series(out.into_series(), true, Some(&self.expr))?; }, _ => { diff --git a/crates/polars-ops/src/chunked_array/array/namespace.rs b/crates/polars-ops/src/chunked_array/array/namespace.rs index 42e402f25066..42555ef52080 100644 --- a/crates/polars-ops/src/chunked_array/array/namespace.rs +++ b/crates/polars-ops/src/chunked_array/array/namespace.rs @@ -96,10 +96,10 @@ pub trait ArrayNameSpace: AsArray { array_all(ca) } - fn array_sort(&self, options: SortOptions) -> ArrayChunked { + fn array_sort(&self, options: SortOptions) -> PolarsResult { let ca = self.as_array(); // SAFETY: Sort only changes the order of the elements in each subarray. - unsafe { ca.apply_amortized_same_type(|s| s.as_ref().sort_with(options)) } + unsafe { ca.try_apply_amortized_same_type(|s| s.as_ref().sort_with(options)) } } fn array_reverse(&self) -> ArrayChunked { diff --git a/crates/polars-ops/src/chunked_array/list/namespace.rs b/crates/polars-ops/src/chunked_array/list/namespace.rs index 38ca7732c40c..a4f7e78e2c6d 100644 --- a/crates/polars-ops/src/chunked_array/list/namespace.rs +++ b/crates/polars-ops/src/chunked_array/list/namespace.rs @@ -242,11 +242,10 @@ pub trait ListNameSpaceImpl: AsList { } } - #[must_use] - fn lst_sort(&self, options: SortOptions) -> ListChunked { + fn lst_sort(&self, options: SortOptions) -> PolarsResult { let ca = self.as_list(); - let out = ca.apply_amortized(|s| s.as_ref().sort_with(options)); - self.same_type(out) + let out = ca.try_apply_amortized(|s| s.as_ref().sort_with(options))?; + Ok(self.same_type(out)) } #[must_use] diff --git a/crates/polars-ops/src/series/ops/cut.rs b/crates/polars-ops/src/series/ops/cut.rs index b7e87a23d8a8..3b7d10dcb0a4 100644 --- a/crates/polars-ops/src/series/ops/cut.rs +++ b/crates/polars-ops/src/series/ops/cut.rs @@ -103,7 +103,7 @@ pub fn qcut( include_breaks: bool, ) -> PolarsResult { let s = s.cast(&DataType::Float64)?; - let s2 = s.sort(false, false); + let s2 = s.sort(false, false)?; let ca = s2.f64()?; if ca.null_count() == ca.len() { diff --git a/crates/polars-pipe/src/executors/sinks/sort/sink.rs b/crates/polars-pipe/src/executors/sinks/sort/sink.rs index 1411a76c872d..01138240b6ce 100644 --- a/crates/polars-pipe/src/executors/sinks/sort/sink.rs +++ b/crates/polars-pipe/src/executors/sinks/sort/sink.rs @@ -188,7 +188,7 @@ impl Sink for SortSink { nulls_last: self.sort_args.nulls_last, multithreaded: true, maintain_order: self.sort_args.maintain_order, - }); + })?; let instant = self.ooc_start.unwrap(); if context.verbose { diff --git a/crates/polars-plan/src/dsl/function_expr/array.rs b/crates/polars-plan/src/dsl/function_expr/array.rs index a731a8e0c70a..9dc04bd477c7 100644 --- a/crates/polars-plan/src/dsl/function_expr/array.rs +++ b/crates/polars-plan/src/dsl/function_expr/array.rs @@ -186,7 +186,7 @@ pub(super) fn all(s: &Series) -> PolarsResult { } pub(super) fn sort(s: &Series, options: SortOptions) -> PolarsResult { - Ok(s.array()?.array_sort(options).into_series()) + Ok(s.array()?.array_sort(options)?.into_series()) } pub(super) fn reverse(s: &Series) -> PolarsResult { diff --git a/crates/polars-plan/src/dsl/function_expr/list.rs b/crates/polars-plan/src/dsl/function_expr/list.rs index b9dafcf9e305..3fdbf6a18134 100644 --- a/crates/polars-plan/src/dsl/function_expr/list.rs +++ b/crates/polars-plan/src/dsl/function_expr/list.rs @@ -551,7 +551,7 @@ pub(super) fn diff(s: &Series, n: i64, null_behavior: NullBehavior) -> PolarsRes } pub(super) fn sort(s: &Series, options: SortOptions) -> PolarsResult { - Ok(s.list()?.lst_sort(options).into_series()) + Ok(s.list()?.lst_sort(options)?.into_series()) } pub(super) fn reverse(s: &Series) -> PolarsResult { diff --git a/crates/polars-sql/tests/iss_8395.rs b/crates/polars-sql/tests/iss_8395.rs index bc20f1e448f6..06d74affb33e 100644 --- a/crates/polars-sql/tests/iss_8395.rs +++ b/crates/polars-sql/tests/iss_8395.rs @@ -19,7 +19,7 @@ fn iss_8395() -> PolarsResult<()> { let df = res.collect()?; // assert that the df only contains [vegetables, seafood] - let s = df.column("category")?.unique()?.sort(false, false); + let s = df.column("category")?.unique()?.sort(false, false)?; let expected = Series::new("category", &["seafood", "vegetables"]); assert!(s.equals(&expected)); Ok(()) diff --git a/py-polars/src/series/mod.rs b/py-polars/src/series/mod.rs index 9322eaa9d93a..b3175bc9e731 100644 --- a/py-polars/src/series/mod.rs +++ b/py-polars/src/series/mod.rs @@ -283,8 +283,12 @@ impl PySeries { } } - fn sort(&mut self, descending: bool, nulls_last: bool) -> Self { - self.series.sort(descending, nulls_last).into() + fn sort(&mut self, descending: bool, nulls_last: bool) -> PyResult { + Ok(self + .series + .sort(descending, nulls_last) + .map_err(PyPolarsErr::from)? + .into()) } fn take_with_series(&self, indices: &PySeries) -> PyResult {