Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into 16158-native-pyo3-dat…
Browse files Browse the repository at this point in the history
…etime-support
  • Loading branch information
pythonspeed committed May 13, 2024
2 parents b9cc590 + 81cc802 commit 84a2141
Show file tree
Hide file tree
Showing 170 changed files with 5,425 additions and 3,632 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/test-rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ jobs:
-p polars-io
-p polars-lazy
-p polars-ops
-p polars-parquet
-p polars-plan
-p polars-row
-p polars-sql
Expand All @@ -68,6 +69,7 @@ jobs:
-p polars-io
-p polars-lazy
-p polars-ops
-p polars-parquet
-p polars-plan
-p polars-row
-p polars-sql
Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 2 additions & 4 deletions _typos.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,17 @@ extend-ignore-identifiers-re = [
]

[default.extend-identifiers]
arange = "arange"
bck = "bck"
Fo = "Fo"
ND = "ND"
ba = "ba"
nd = "nd"
opt_nd = "opt_nd"
ser = "ser"
strat = "strat"
width_strat = "width_strat"

[default.extend-words]
iif = "iif"
arange = "arange"
strat = "strat"
'"r0ot"' = "r0ot"
wee = "wee"

Expand Down
1 change: 1 addition & 0 deletions crates/polars-arrow/src/compute/cast/binary_to.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ pub fn binary_to_dictionary<O: Offset, K: DictionaryKey>(
from: &BinaryArray<O>,
) -> PolarsResult<DictionaryArray<K>> {
let mut array = MutableDictionaryArray::<K, MutableBinaryArray<O>>::new();
array.reserve(from.len());
array.try_extend(from.iter())?;

Ok(array.into())
Expand Down
2 changes: 2 additions & 0 deletions crates/polars-arrow/src/compute/cast/binview_to.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ pub(super) fn binview_to_dictionary<K: DictionaryKey>(
from: &BinaryViewArray,
) -> PolarsResult<DictionaryArray<K>> {
let mut array = MutableDictionaryArray::<K, MutableBinaryViewArray<[u8]>>::new();
array.reserve(from.len());
array.try_extend(from.iter())?;

Ok(array.into())
Expand All @@ -30,6 +31,7 @@ pub(super) fn utf8view_to_dictionary<K: DictionaryKey>(
from: &Utf8ViewArray,
) -> PolarsResult<DictionaryArray<K>> {
let mut array = MutableDictionaryArray::<K, MutableBinaryViewArray<str>>::new();
array.reserve(from.len());
array.try_extend(from.iter())?;

Ok(array.into())
Expand Down
1 change: 1 addition & 0 deletions crates/polars-arrow/src/compute/cast/primitive_to.rs
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,7 @@ pub fn primitive_to_dictionary<T: NativeType + Eq + Hash, K: DictionaryKey>(
let mut array = MutableDictionaryArray::<K, _>::try_empty(MutablePrimitiveArray::<T>::from(
from.data_type().clone(),
))?;
array.reserve(from.len());
array.try_extend(iter)?;

Ok(array.into())
Expand Down
1 change: 1 addition & 0 deletions crates/polars-arrow/src/compute/cast/utf8_to.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ pub fn utf8_to_dictionary<O: Offset, K: DictionaryKey>(
from: &Utf8Array<O>,
) -> PolarsResult<DictionaryArray<K>> {
let mut array = MutableDictionaryArray::<K, MutableUtf8Array<O>>::new();
array.reserve(from.len());
array.try_extend(from.iter())?;

Ok(array.into())
Expand Down
2 changes: 2 additions & 0 deletions crates/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ take_opt_iter = []
group_by_list = []
# rolling window functions
rolling_window = []
rolling_window_by = []
diagonal_concat = []
dataframe_arithmetic = []
product = []
Expand Down Expand Up @@ -135,6 +136,7 @@ docs-selection = [
"dot_product",
"row_hash",
"rolling_window",
"rolling_window_by",
"dtype-categorical",
"dtype-decimal",
"diagonal_concat",
Expand Down
18 changes: 17 additions & 1 deletion crates/polars-core/src/chunked_array/ops/rolling_window.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
use arrow::legacy::prelude::DynArgs;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

#[derive(Clone)]
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct RollingOptionsFixedWindow {
/// The length of the window.
pub window_size: usize,
Expand All @@ -11,9 +14,22 @@ pub struct RollingOptionsFixedWindow {
pub weights: Option<Vec<f64>>,
/// Set the labels at the center of the window.
pub center: bool,
#[cfg_attr(feature = "serde", serde(skip))]
pub fn_params: DynArgs,
}

#[cfg(feature = "rolling_window")]
impl PartialEq for RollingOptionsFixedWindow {
fn eq(&self, other: &Self) -> bool {
self.window_size == other.window_size
&& self.min_periods == other.min_periods
&& self.weights == other.weights
&& self.center == other.center
&& self.fn_params.is_none()
&& other.fn_params.is_none()
}
}

impl Default for RollingOptionsFixedWindow {
fn default() -> Self {
RollingOptionsFixedWindow {
Expand Down
3 changes: 3 additions & 0 deletions crates/polars-core/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2249,6 +2249,9 @@ impl DataFrame {
if offset == 0 && length == self.height() {
return self.clone();
}
if length == 0 {
return self.clear();
}
let col = self
.columns
.iter()
Expand Down
6 changes: 5 additions & 1 deletion crates/polars-core/src/series/implementations/null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,11 @@ impl NullChunked {
}
}
}
impl PrivateSeriesNumeric for NullChunked {}
impl PrivateSeriesNumeric for NullChunked {
fn bit_repr_small(&self) -> UInt32Chunked {
UInt32Chunked::full_null(self.name.as_ref(), self.len())
}
}

impl PrivateSeries for NullChunked {
fn compute_len(&mut self) {
Expand Down
22 changes: 12 additions & 10 deletions crates/polars-io/src/csv/read/read_impl/batched_mmap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ impl<'a> CoreReader<'a> {
to_cast: self.to_cast,
ignore_errors: self.ignore_errors,
truncate_ragged_lines: self.truncate_ragged_lines,
n_rows: self.n_rows,
remaining: self.n_rows.unwrap_or(usize::MAX),
encoding: self.encoding,
separator: self.separator,
schema: self.schema,
Expand All @@ -197,7 +197,7 @@ pub struct BatchedCsvReaderMmap<'a> {
truncate_ragged_lines: bool,
to_cast: Vec<Field>,
ignore_errors: bool,
n_rows: Option<usize>,
remaining: usize,
encoding: CsvEncoding,
separator: u8,
schema: SchemaRef,
Expand All @@ -211,14 +211,9 @@ pub struct BatchedCsvReaderMmap<'a> {

impl<'a> BatchedCsvReaderMmap<'a> {
pub fn next_batches(&mut self, n: usize) -> PolarsResult<Option<Vec<DataFrame>>> {
if n == 0 {
if n == 0 || self.remaining == 0 {
return Ok(None);
}
if let Some(n_rows) = self.n_rows {
if self.rows_read >= n_rows as IdxSize {
return Ok(None);
}
}

// get next `n` offset positions.
let file_chunks_iter = (&mut self.file_chunks_iter).take(n);
Expand Down Expand Up @@ -274,8 +269,15 @@ impl<'a> BatchedCsvReaderMmap<'a> {
if self.row_index.is_some() {
update_row_counts2(&mut chunks, self.rows_read)
}
for df in &chunks {
self.rows_read += df.height() as IdxSize;
for df in &mut chunks {
let h = df.height();

if self.remaining < h {
*df = df.slice(0, self.remaining)
};
self.remaining = self.remaining.saturating_sub(h);

self.rows_read += h as IdxSize;
}
Ok(Some(chunks))
}
Expand Down
28 changes: 15 additions & 13 deletions crates/polars-io/src/csv/read/read_impl/batched_read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,6 @@ impl<'a> CoreReader<'a> {

Ok(BatchedCsvReaderRead {
chunk_size: self.chunk_size,
finished: false,
file_chunk_reader: chunk_iter,
file_chunks: vec![],
projection,
Expand All @@ -260,20 +259,20 @@ impl<'a> CoreReader<'a> {
to_cast: self.to_cast,
ignore_errors: self.ignore_errors,
truncate_ragged_lines: self.truncate_ragged_lines,
n_rows: self.n_rows,
remaining: self.n_rows.unwrap_or(usize::MAX),
encoding: self.encoding,
separator: self.separator,
schema: self.schema,
rows_read: 0,
_cat_lock,
decimal_comma: self.decimal_comma,
finished: false,
})
}
}

pub struct BatchedCsvReaderRead<'a> {
chunk_size: usize,
finished: bool,
file_chunk_reader: ChunkReader<'a>,
file_chunks: Vec<(SyncPtr<u8>, usize)>,
projection: Vec<usize>,
Expand All @@ -287,7 +286,7 @@ pub struct BatchedCsvReaderRead<'a> {
to_cast: Vec<Field>,
ignore_errors: bool,
truncate_ragged_lines: bool,
n_rows: Option<usize>,
remaining: usize,
encoding: CsvEncoding,
separator: u8,
schema: SchemaRef,
Expand All @@ -297,19 +296,15 @@ pub struct BatchedCsvReaderRead<'a> {
#[cfg(not(feature = "dtype-categorical"))]
_cat_lock: Option<u8>,
decimal_comma: bool,
finished: bool,
}
//
impl<'a> BatchedCsvReaderRead<'a> {
/// `n` number of batches.
pub fn next_batches(&mut self, n: usize) -> PolarsResult<Option<Vec<DataFrame>>> {
if n == 0 || self.finished {
if n == 0 || self.remaining == 0 || self.finished {
return Ok(None);
}
if let Some(n_rows) = self.n_rows {
if self.rows_read >= n_rows as IdxSize {
return Ok(None);
}
}

// get next `n` offset positions.

Expand All @@ -331,7 +326,7 @@ impl<'a> BatchedCsvReaderRead<'a> {
// get the final slice
self.file_chunks
.push(self.file_chunk_reader.get_buf_remaining());
self.finished = true
self.finished = true;
}

// depleted the offsets iterator, we are done as well.
Expand Down Expand Up @@ -380,8 +375,15 @@ impl<'a> BatchedCsvReaderRead<'a> {
if self.row_index.is_some() {
update_row_counts2(&mut chunks, self.rows_read)
}
for df in &chunks {
self.rows_read += df.height() as IdxSize;
for df in &mut chunks {
let h = df.height();

if self.remaining < h {
*df = df.slice(0, self.remaining)
};
self.remaining = self.remaining.saturating_sub(h);

self.rows_read += h as IdxSize;
}
Ok(Some(chunks))
}
Expand Down
Loading

0 comments on commit 84a2141

Please sign in to comment.