Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into 16078-pyo3-0.21-bound…
Browse files Browse the repository at this point in the history
…-apis
  • Loading branch information
pythonspeed committed May 8, 2024
2 parents ccf582a + 12b40b9 commit 3c7de9e
Show file tree
Hide file tree
Showing 45 changed files with 496 additions and 395 deletions.
1 change: 0 additions & 1 deletion codecov.yml → .github/codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ coverage:
patch: off
comment:
require_changes: true
after_n_builds: 3
ignore:
- crates/polars-arrow/src/io/flight/*.rs
- crates/polars-arrow/src/io/ipc/append/*.rs
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:
save-if: ${{ github.ref_name == 'main' }}

- name: Run sccache-cache
uses: mozilla-actions/[email protected].3
uses: mozilla-actions/[email protected].4

- name: Install Polars release build
env:
Expand Down
59 changes: 37 additions & 22 deletions .github/workflows/test-coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,10 @@ jobs:
run: cargo llvm-cov report --lcov --output-path coverage-rust.lcov

- name: Upload coverage report
uses: codecov/codecov-action@v4
uses: actions/upload-artifact@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: coverage-rust.lcov
root_dir: ${{ github.workspace }}
flags: rust
fail_ci_if_error: true
name: coverage-rust
path: coverage-rust.lcov

coverage-python:
# Running under ubuntu doesn't seem to work:
Expand All @@ -94,7 +91,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
python-version: '3.12'

- name: Create virtual environment
run: |
Expand Down Expand Up @@ -126,33 +123,51 @@ jobs:

- name: Run Python tests
working-directory: py-polars
run: pytest --cov -n auto --dist loadgroup -m "not release and not benchmark and not docs" --cov-report xml:main.xml
continue-on-error: true
run: >
pytest
-n auto --dist loadgroup
-m "not release and not benchmark and not docs"
-k 'not test_polars_import'
--cov --cov-report xml:main.xml
- name: Run Python tests - async reader
working-directory: py-polars
env:
POLARS_FORCE_ASYNC: 1
run: pytest --cov -n auto --dist loadgroup -m "not release and not benchmark and not docs" tests/unit/io/ --cov-report xml:async.xml
continue-on-error: true
run: >
pytest tests/unit/io/
-n auto --dist loadgroup
-m "not release and not benchmark and not docs"
--cov --cov-report xml:async.xml --cov-fail-under=0
- name: Report Rust coverage
run: cargo llvm-cov report --lcov --output-path coverage-python.lcov

- name: Upload coverage reports - Python
uses: codecov/codecov-action@v4
- name: Upload coverage reports
uses: actions/upload-artifact@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: py-polars/main.xml,py-polars/async.xml
root_dir: ${{ github.workspace }}
flags: python
fail_ci_if_error: true
name: coverage-python
path: |
coverage-python.lcov
py-polars/main.xml
py-polars/async.xml
upload-coverage:
needs: [coverage-rust, coverage-python]
runs-on: ubuntu-latest

steps:
# Needed to fetch the Codecov config file
- uses: actions/checkout@v4

- name: Download coverage reports
uses: actions/download-artifact@v4
with:
merge-multiple: true

- name: Upload coverage report - Rust
- name: Upload coverage reports
uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: coverage-python.lcov
files: coverage-rust.lcov,coverage-python.lcov,py-polars/main.xml,py-polars/async.xml
root_dir: ${{ github.workspace }}
flags: rust
fail_ci_if_error: true
9 changes: 2 additions & 7 deletions .github/workflows/test-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,9 @@ jobs:
python tests/docs/run_doctest.py
pytest tests/docs/test_user_guide.py -m docs
- name: Run tests and report coverage
- name: Run tests
if: github.ref_name != 'main'
env:
# TODO: Re-enable coverage for for Ubuntu + Python 3.12 tests
# Currently skipped due to performance issues in coverage:
# https://github.com/nedbat/coveragepy/issues/1665
COV: ${{ !(matrix.os == 'ubuntu-latest' && matrix.python-version == '3.12') && '--cov' || '--no-cov' }}
run: pytest $COV -n auto --dist loadgroup -m "not release and not benchmark and not docs"
run: pytest -n auto --dist loadgroup -m "not release and not benchmark and not docs"

- name: Run tests async reader tests
if: github.ref_name != 'main' && matrix.os != 'windows-latest'
Expand Down
6 changes: 5 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ chrono-tz = "0.8.1"
ciborium = "0.2"
crossbeam-channel = "0.5.8"
crossbeam-queue = "0.3"
either = "1.9"
either = "1.11"
ethnum = "1.3.2"
fallible-streaming-iterator = "0.1.9"
futures = "0.3.25"
Expand Down
1 change: 0 additions & 1 deletion crates/polars-arrow/src/compute/cast/binary_to.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,6 @@ pub fn binary_to_dictionary<O: Offset, K: DictionaryKey>(
from: &BinaryArray<O>,
) -> PolarsResult<DictionaryArray<K>> {
let mut array = MutableDictionaryArray::<K, MutableBinaryArray<O>>::new();
array.reserve(from.len());
array.try_extend(from.iter())?;

Ok(array.into())
Expand Down
2 changes: 0 additions & 2 deletions crates/polars-arrow/src/compute/cast/binview_to.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ pub(super) fn binview_to_dictionary<K: DictionaryKey>(
from: &BinaryViewArray,
) -> PolarsResult<DictionaryArray<K>> {
let mut array = MutableDictionaryArray::<K, MutableBinaryViewArray<[u8]>>::new();
array.reserve(from.len());
array.try_extend(from.iter())?;

Ok(array.into())
Expand All @@ -31,7 +30,6 @@ pub(super) fn utf8view_to_dictionary<K: DictionaryKey>(
from: &Utf8ViewArray,
) -> PolarsResult<DictionaryArray<K>> {
let mut array = MutableDictionaryArray::<K, MutableBinaryViewArray<str>>::new();
array.reserve(from.len());
array.try_extend(from.iter())?;

Ok(array.into())
Expand Down
1 change: 0 additions & 1 deletion crates/polars-arrow/src/compute/cast/primitive_to.rs
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,6 @@ pub fn primitive_to_dictionary<T: NativeType + Eq + Hash, K: DictionaryKey>(
let mut array = MutableDictionaryArray::<K, _>::try_empty(MutablePrimitiveArray::<T>::from(
from.data_type().clone(),
))?;
array.reserve(from.len());
array.try_extend(iter)?;

Ok(array.into())
Expand Down
1 change: 0 additions & 1 deletion crates/polars-arrow/src/compute/cast/utf8_to.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ pub fn utf8_to_dictionary<O: Offset, K: DictionaryKey>(
from: &Utf8Array<O>,
) -> PolarsResult<DictionaryArray<K>> {
let mut array = MutableDictionaryArray::<K, MutableUtf8Array<O>>::new();
array.reserve(from.len());
array.try_extend(from.iter())?;

Ok(array.into())
Expand Down
11 changes: 0 additions & 11 deletions crates/polars-io/src/csv/read/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,17 +133,6 @@ pub(super) enum NullValuesCompiled {
}

impl NullValuesCompiled {
pub(super) fn apply_projection(&mut self, projections: &[usize]) {
if let Self::Columns(nv) = self {
let nv = projections
.iter()
.map(|i| std::mem::take(&mut nv[*i]))
.collect::<Vec<_>>();

*self = NullValuesCompiled::Columns(nv);
}
}

/// # Safety
///
/// The caller must ensure that `index` is in bounds
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-io/src/csv/read/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,7 @@ pub(super) fn parse_lines(

// SAFETY:
// process fields is in bounds
add_null = unsafe { null_values.is_null(field, processed_fields) }
add_null = unsafe { null_values.is_null(field, idx as usize) }
}
if add_null {
buf.add_null(!missing_is_null && field.is_empty())
Expand Down
10 changes: 2 additions & 8 deletions crates/polars-io/src/csv/read/read_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,21 +224,15 @@ impl<'a> CoreReader<'a> {
}
}

// create a null value for every column
let mut null_values = null_values.map(|nv| nv.compile(&schema)).transpose()?;
// Create a null value for every column
let null_values = null_values.map(|nv| nv.compile(&schema)).transpose()?;

if let Some(cols) = columns {
let mut prj = Vec::with_capacity(cols.len());
for col in cols {
let i = schema.try_index_of(&col)?;
prj.push(i);
}

// update null values with projection
if let Some(nv) = null_values.as_mut() {
nv.apply_projection(&prj);
}

projection = Some(prj);
}

Expand Down
2 changes: 1 addition & 1 deletion crates/polars-io/src/parquet/write/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ where
WriteOptions {
write_statistics: self.statistics,
compression: self.compression,
version: Version::V1,
version: Version::V2,
data_pagesize_limit: self.data_page_size,
}
}
Expand Down
6 changes: 4 additions & 2 deletions crates/polars-lazy/src/physical_plan/executors/scan/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use super::*;

pub struct CsvExec {
pub path: PathBuf,
pub schema: SchemaRef,
pub file_info: FileInfo,
pub options: CsvReaderOptions,
pub file_options: FileScanOptions,
pub predicate: Option<Arc<dyn PhysicalExpr>>,
Expand All @@ -26,7 +26,9 @@ impl CsvExec {
CsvReader::from_path(&self.path)
.unwrap()
.has_header(self.options.has_header)
.with_dtypes(Some(self.schema.clone()))
.with_schema(Some(
self.file_info.reader_schema.clone().unwrap().unwrap_right(),
))
.with_separator(self.options.separator)
.with_ignore_errors(self.options.ignore_errors)
.with_skip_rows(self.options.skip_rows)
Expand Down
18 changes: 15 additions & 3 deletions crates/polars-lazy/src/physical_plan/executors/scan/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,12 @@ impl ParquetExec {
);

let mut reader = ParquetReader::new(file)
.with_schema(self.file_info.reader_schema.clone())
.with_schema(
self.file_info
.reader_schema
.clone()
.map(|either| either.unwrap_left()),
)
.read_parallel(parallel)
.set_low_memory(self.options.low_memory)
.use_statistics(self.options.use_statistics)
Expand Down Expand Up @@ -163,7 +168,9 @@ impl ParquetExec {
.file_info
.reader_schema
.as_ref()
.expect("should be set");
.expect("should be set")
.as_ref()
.unwrap_left();
let first_metadata = &self.metadata;
let cloud_options = self.cloud_options.as_ref();
let with_columns = self
Expand Down Expand Up @@ -343,7 +350,12 @@ impl ParquetExec {
);
return Ok(materialize_empty_df(
projection.as_deref(),
self.file_info.reader_schema.as_ref().unwrap(),
self.file_info
.reader_schema
.as_ref()
.unwrap()
.as_ref()
.unwrap_left(),
hive_partitions.as_deref(),
self.file_options.row_index.as_ref(),
));
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-lazy/src/physical_plan/planner/lp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ pub fn create_physical_plan(
let path = paths[0].clone();
Ok(Box::new(executors::CsvExec {
path,
schema: file_info.schema,
file_info,
options: csv_options,
predicate,
file_options,
Expand Down
Loading

0 comments on commit 3c7de9e

Please sign in to comment.