From 3564a77893b19cb72448bcf5846a1b68d821ab2d Mon Sep 17 00:00:00 2001
From: Stijn de Gooijer <stijndegooijer@gmail.com>
Date: Sat, 27 Apr 2024 10:06:06 +0200
Subject: [PATCH] feat(rust!): Rename to `CsvParserOptions` to
 `CsvReaderOptions`, use in `CsvReader` (#15919)

---
 crates/polars-io/src/csv/read/mod.rs          |   2 +-
 crates/polars-io/src/csv/read/options.rs      |  29 +-
 crates/polars-io/src/csv/read/reader.rs       | 380 ++++++++----------
 .../src/physical_plan/executors/scan/csv.rs   |   2 +-
 .../polars-pipe/src/executors/sources/csv.rs  |   6 +-
 .../src/logical_plan/builder_dsl.rs           |   4 +-
 .../src/logical_plan/conversion/scans.rs      |   2 +-
 .../polars-plan/src/logical_plan/file_scan.rs |   4 +-
 py-polars/polars/io/csv/functions.py          |  12 +-
 9 files changed, 207 insertions(+), 234 deletions(-)
diff --git a/crates/polars-io/src/csv/read/mod.rs b/crates/polars-io/src/csv/read/mod.rs
index 3fad37ce049a..5f5b93948f02 100644
--- a/crates/polars-io/src/csv/read/mod.rs
+++ b/crates/polars-io/src/csv/read/mod.rs
@@ -26,7 +26,7 @@ mod reader;
 mod splitfields;
 mod utils;
 
-pub use options::{CommentPrefix, CsvEncoding, CsvParserOptions, NullValues};
+pub use options::{CommentPrefix, CsvEncoding, CsvReaderOptions, NullValues};
 pub use parser::count_rows;
 pub use read_impl::batched_mmap::{BatchedCsvReaderMmap, OwnedBatchedCsvReaderMmap};
 pub use read_impl::batched_read::{BatchedCsvReaderRead, OwnedBatchedCsvReader};
diff --git a/crates/polars-io/src/csv/read/options.rs b/crates/polars-io/src/csv/read/options.rs
index f3b4f26ef1cc..3741bd6d9e47 100644
--- a/crates/polars-io/src/csv/read/options.rs
+++ b/crates/polars-io/src/csv/read/options.rs
@@ -5,11 +5,11 @@ use serde::{Deserialize, Serialize};
 
 #[derive(Clone, Debug, PartialEq, Eq, Hash)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
-pub struct CsvParserOptions {
+pub struct CsvReaderOptions {
     pub has_header: bool,
     pub separator: u8,
-    pub comment_prefix: Option<CommentPrefix>,
     pub quote_char: Option<u8>,
+    pub comment_prefix: Option<CommentPrefix>,
     pub eol_char: u8,
     pub encoding: CsvEncoding,
     pub skip_rows: usize,
@@ -27,13 +27,13 @@ pub struct CsvParserOptions {
     pub low_memory: bool,
 }
 
-impl Default for CsvParserOptions {
+impl Default for CsvReaderOptions {
     fn default() -> Self {
         Self {
             has_header: true,
             separator: b',',
-            comment_prefix: None,
             quote_char: Some(b'"'),
+            comment_prefix: None,
             eol_char: b'\n',
             encoding: CsvEncoding::default(),
             skip_rows: 0,
@@ -75,17 +75,22 @@ pub enum CommentPrefix {
 
 impl CommentPrefix {
     /// Creates a new `CommentPrefix` for the `Single` variant.
-    pub fn new_single(c: u8) -> Self {
-        CommentPrefix::Single(c)
+    pub fn new_single(prefix: u8) -> Self {
+        CommentPrefix::Single(prefix)
+    }
+
+    /// Creates a new `CommentPrefix` for the `Multi` variant.
+    pub fn new_multi(prefix: String) -> Self {
+        CommentPrefix::Multi(prefix)
     }
 
-    /// Creates a new `CommentPrefix`. If `Multi` variant is used and the string is longer
-    /// than 5 characters, it will return `None`.
-    pub fn new_multi(s: String) -> Option<Self> {
-        if s.len() <= 5 {
-            Some(CommentPrefix::Multi(s))
+    /// Creates a new `CommentPrefix` from a `&str`.
+    pub fn new_from_str(prefix: &str) -> Self {
+        if prefix.len() == 1 && prefix.chars().next().unwrap().is_ascii() {
+            let c = prefix.as_bytes()[0];
+            CommentPrefix::Single(c)
         } else {
-            None
+            CommentPrefix::Multi(prefix.to_string())
         }
     }
 }
diff --git a/crates/polars-io/src/csv/read/reader.rs b/crates/polars-io/src/csv/read/reader.rs
index ffe5c1f01353..65fcff4b3c47 100644
--- a/crates/polars-io/src/csv/read/reader.rs
+++ b/crates/polars-io/src/csv/read/reader.rs
@@ -8,7 +8,7 @@ use polars_time::prelude::*;
 use rayon::prelude::*;
 
 use super::infer_file_schema;
-use super::options::{CommentPrefix, CsvEncoding, NullValues};
+use super::options::{CommentPrefix, CsvEncoding, CsvReaderOptions, NullValues};
 use super::read_impl::batched_mmap::{
     to_batched_owned_mmap, BatchedCsvReaderMmap, OwnedBatchedCsvReaderMmap,
 };
@@ -42,43 +42,25 @@ pub struct CsvReader<'a, R>
 where
     R: MmapBytesReader,
 {
-    /// File or Stream object
+    /// File or Stream object.
     reader: R,
+    /// Options for the CSV reader.
+    options: CsvReaderOptions,
     /// Stop reading from the csv after this number of rows is reached
     n_rows: Option<usize>,
-    // used by error ignore logic
-    max_records: Option<usize>,
-    skip_rows_before_header: usize,
     /// Optional indexes of the columns to project
     projection: Option<Vec<usize>>,
     /// Optional column names to project/ select.
     columns: Option<Vec<String>>,
-    separator: Option<u8>,
-    pub(crate) schema: Option<SchemaRef>,
-    encoding: CsvEncoding,
-    n_threads: Option<usize>,
     path: Option<PathBuf>,
-    schema_overwrite: Option<SchemaRef>,
     dtype_overwrite: Option<&'a [DataType]>,
     sample_size: usize,
     chunk_size: usize,
-    comment_prefix: Option<CommentPrefix>,
-    null_values: Option<NullValues>,
     predicate: Option<Arc<dyn PhysicalIoExpr>>,
-    quote_char: Option<u8>,
-    skip_rows_after_header: usize,
-    try_parse_dates: bool,
     row_index: Option<RowIndex>,
     /// Aggregates chunk afterwards to a single chunk.
     rechunk: bool,
-    raise_if_empty: bool,
-    truncate_ragged_lines: bool,
     missing_is_null: bool,
-    low_memory: bool,
-    has_header: bool,
-    ignore_errors: bool,
-    eol_char: u8,
-    decimal_comma: bool,
 }
 
 impl<'a, R> CsvReader<'a, R>
@@ -86,39 +68,62 @@ where
     R: 'a + MmapBytesReader,
 {
     /// Skip these rows after the header
-    pub fn with_skip_rows_after_header(mut self, offset: usize) -> Self {
-        self.skip_rows_after_header = offset;
+    pub fn with_options(mut self, options: CsvReaderOptions) -> Self {
+        self.options = options;
         self
     }
 
-    /// Add a row index column.
-    pub fn with_row_index(mut self, row_index: Option<RowIndex>) -> Self {
-        self.row_index = row_index;
+    /// Sets whether the CSV file has headers
+    pub fn has_header(mut self, has_header: bool) -> Self {
+        self.options.has_header = has_header;
         self
     }
 
-    /// Sets the chunk size used by the parser. This influences performance
-    pub fn with_chunk_size(mut self, chunk_size: usize) -> Self {
-        self.chunk_size = chunk_size;
+    /// Sets the CSV file's column separator as a byte character
+    pub fn with_separator(mut self, separator: u8) -> Self {
+        self.options.separator = separator;
         self
     }
 
-    /// Set  [`CsvEncoding`]
-    pub fn with_encoding(mut self, enc: CsvEncoding) -> Self {
-        self.encoding = enc;
+    /// Sets the `char` used as quote char. The default is `b'"'`. If set to [`None`], quoting is disabled.
+    pub fn with_quote_char(mut self, quote_char: Option<u8>) -> Self {
+        self.options.quote_char = quote_char;
         self
     }
 
-    /// Try to stop parsing when `n` rows are parsed. During multithreaded parsing the upper bound `n` cannot
-    /// be guaranteed.
-    pub fn with_n_rows(mut self, num_rows: Option<usize>) -> Self {
-        self.n_rows = num_rows;
+    /// Sets the comment prefix for this instance. Lines starting with this prefix will be ignored.
+    pub fn with_comment_prefix(mut self, comment_prefix: Option<&str>) -> Self {
+        self.options.comment_prefix = comment_prefix.map(CommentPrefix::new_from_str);
         self
     }
 
-    /// Continue with next batch when a ParserError is encountered.
-    pub fn with_ignore_errors(mut self, ignore: bool) -> Self {
-        self.ignore_errors = ignore;
+    /// Sets the comment prefix from `CsvParserOptions` for internal initialization.
+    pub fn _with_comment_prefix(mut self, comment_prefix: Option<CommentPrefix>) -> Self {
+        self.options.comment_prefix = comment_prefix;
+        self
+    }
+
+    /// Set the `char` used as end-of-line char. The default is `b'\n'`.
+    pub fn with_end_of_line_char(mut self, eol_char: u8) -> Self {
+        self.options.eol_char = eol_char;
+        self
+    }
+
+    /// Set [`CsvEncoding`].
+    pub fn with_encoding(mut self, encoding: CsvEncoding) -> Self {
+        self.options.encoding = encoding;
+        self
+    }
+
+    /// Skip the first `n` rows during parsing. The header will be parsed at `n` lines.
+    pub fn with_skip_rows(mut self, n: usize) -> Self {
+        self.options.skip_rows = n;
+        self
+    }
+
+    /// Skip these rows after the header
+    pub fn with_skip_rows_after_header(mut self, n: usize) -> Self {
+        self.options.skip_rows_after_header = n;
         self
     }
 
@@ -127,74 +132,111 @@ where
     ///
     /// It is recommended to use [with_dtypes](Self::with_dtypes) instead.
     pub fn with_schema(mut self, schema: Option<SchemaRef>) -> Self {
-        self.schema = schema;
+        self.options.schema = schema;
         self
     }
 
-    /// Skip the first `n` rows during parsing. The header will be parsed at `n` lines.
-    pub fn with_skip_rows(mut self, skip_rows: usize) -> Self {
-        self.skip_rows_before_header = skip_rows;
+    /// Overwrite the schema with the dtypes in this given Schema. The given schema may be a subset
+    /// of the total schema.
+    pub fn with_dtypes(mut self, schema: Option<SchemaRef>) -> Self {
+        self.options.schema_overwrite = schema;
         self
     }
 
-    /// Rechunk the DataFrame to contiguous memory after the CSV is parsed.
-    pub fn with_rechunk(mut self, rechunk: bool) -> Self {
-        self.rechunk = rechunk;
+    /// Set the CSV reader to infer the schema of the file
+    ///
+    /// # Arguments
+    /// * `n` - Maximum number of rows read for schema inference.
+    ///         Setting this to `None` will do a full table scan (slow).
+    pub fn infer_schema(mut self, n: Option<usize>) -> Self {
+        // used by error ignore logic
+        self.options.infer_schema_length = n;
         self
     }
 
-    /// Set whether the CSV file has headers
-    pub fn has_header(mut self, has_header: bool) -> Self {
-        self.has_header = has_header;
+    /// Automatically try to parse dates/ datetimes and time. If parsing fails, columns remain of dtype `[DataType::String]`.
+    pub fn with_try_parse_dates(mut self, toggle: bool) -> Self {
+        self.options.try_parse_dates = toggle;
         self
     }
 
-    /// Set the CSV file's column separator as a byte character
-    pub fn with_separator(mut self, separator: u8) -> Self {
-        self.separator = Some(separator);
+    /// Set values that will be interpreted as missing/null.
+    ///
+    /// Note: any value you set as null value will not be escaped, so if quotation marks
+    /// are part of the null value you should include them.
+    pub fn with_null_values(mut self, null_values: Option<NullValues>) -> Self {
+        self.options.null_values = null_values;
         self
     }
 
-    /// Set the comment prefix for this instance. Lines starting with this prefix will be ignored.
-    pub fn with_comment_prefix(mut self, comment_prefix: Option<&str>) -> Self {
-        self.comment_prefix = comment_prefix.map(|s| {
-            if s.len() == 1 && s.chars().next().unwrap().is_ascii() {
-                CommentPrefix::Single(s.as_bytes()[0])
-            } else {
-                CommentPrefix::Multi(s.to_string())
-            }
-        });
+    /// Continue with next batch when a ParserError is encountered.
+    pub fn with_ignore_errors(mut self, toggle: bool) -> Self {
+        self.options.ignore_errors = toggle;
         self
     }
 
-    /// Sets the comment prefix from `CsvParserOptions` for internal initialization.
-    pub fn _with_comment_prefix(mut self, comment_prefix: Option<CommentPrefix>) -> Self {
-        self.comment_prefix = comment_prefix;
+    /// Raise an error if CSV is empty (otherwise return an empty frame)
+    pub fn raise_if_empty(mut self, toggle: bool) -> Self {
+        self.options.raise_if_empty = toggle;
         self
     }
 
-    pub fn with_end_of_line_char(mut self, eol_char: u8) -> Self {
-        self.eol_char = eol_char;
+    /// Truncate lines that are longer than the schema.
+    pub fn truncate_ragged_lines(mut self, toggle: bool) -> Self {
+        self.options.truncate_ragged_lines = toggle;
         self
     }
 
-    /// Set values that will be interpreted as missing/ null. Note that any value you set as null value
-    /// will not be escaped, so if quotation marks are part of the null value you should include them.
-    pub fn with_null_values(mut self, null_values: Option<NullValues>) -> Self {
-        self.null_values = null_values;
+    /// Parse floats with a comma as decimal separator.
+    pub fn with_decimal_comma(mut self, toggle: bool) -> Self {
+        self.options.decimal_comma = toggle;
         self
     }
 
-    /// Treat missing fields as null.
-    pub fn with_missing_is_null(mut self, missing_is_null: bool) -> Self {
-        self.missing_is_null = missing_is_null;
+    /// Set the number of threads used in CSV reading. The default uses the number of cores of
+    /// your cpu.
+    ///
+    /// Note that this only works if this is initialized with `CsvReader::from_path`.
+    /// Note that the number of cores is the maximum allowed number of threads.
+    pub fn with_n_threads(mut self, n: Option<usize>) -> Self {
+        self.options.n_threads = n;
         self
     }
 
-    /// Overwrite the schema with the dtypes in this given Schema. The given schema may be a subset
-    /// of the total schema.
-    pub fn with_dtypes(mut self, schema: Option<SchemaRef>) -> Self {
-        self.schema_overwrite = schema;
+    /// Reduce memory consumption at the expense of performance
+    pub fn low_memory(mut self, toggle: bool) -> Self {
+        self.options.low_memory = toggle;
+        self
+    }
+
+    /// Add a row index column.
+    pub fn with_row_index(mut self, row_index: Option<RowIndex>) -> Self {
+        self.row_index = row_index;
+        self
+    }
+
+    /// Sets the chunk size used by the parser. This influences performance
+    pub fn with_chunk_size(mut self, chunk_size: usize) -> Self {
+        self.chunk_size = chunk_size;
+        self
+    }
+
+    /// Try to stop parsing when `n` rows are parsed. During multithreaded parsing the upper bound `n` cannot
+    /// be guaranteed.
+    pub fn with_n_rows(mut self, num_rows: Option<usize>) -> Self {
+        self.n_rows = num_rows;
+        self
+    }
+
+    /// Rechunk the DataFrame to contiguous memory after the CSV is parsed.
+    pub fn with_rechunk(mut self, rechunk: bool) -> Self {
+        self.rechunk = rechunk;
+        self
+    }
+
+    /// Treat missing fields as null.
+    pub fn with_missing_is_null(mut self, missing_is_null: bool) -> Self {
+        self.missing_is_null = missing_is_null;
         self
     }
 
@@ -205,17 +247,6 @@ where
         self
     }
 
-    /// Set the CSV reader to infer the schema of the file
-    ///
-    /// # Arguments
-    /// * `max_records` - Maximum number of rows read for schema inference.
-    ///                   Setting this to `None` will do a full table scan (slow).
-    pub fn infer_schema(mut self, max_records: Option<usize>) -> Self {
-        // used by error ignore logic
-        self.max_records = max_records;
-        self
-    }
-
     /// Set the reader's column projection. This counts from 0, meaning that
     /// `vec![0, 4]` would select the 1st and 5th column.
     pub fn with_projection(mut self, projection: Option<Vec<usize>>) -> Self {
@@ -229,16 +260,6 @@ where
         self
     }
 
-    /// Set the number of threads used in CSV reading. The default uses the number of cores of
-    /// your cpu.
-    ///
-    /// Note that this only works if this is initialized with `CsvReader::from_path`.
-    /// Note that the number of cores is the maximum allowed number of threads.
-    pub fn with_n_threads(mut self, n: Option<usize>) -> Self {
-        self.n_threads = n;
-        self
-    }
-
     /// The preferred way to initialize this builder. This allows the CSV file to be memory mapped
     /// and thereby greatly increases parsing performance.
     pub fn with_path<P: Into<PathBuf>>(mut self, path: Option<P>) -> Self {
@@ -254,46 +275,10 @@ where
         self
     }
 
-    /// Raise an error if CSV is empty (otherwise return an empty frame)
-    pub fn raise_if_empty(mut self, toggle: bool) -> Self {
-        self.raise_if_empty = toggle;
-        self
-    }
-
-    /// Reduce memory consumption at the expense of performance
-    pub fn low_memory(mut self, toggle: bool) -> Self {
-        self.low_memory = toggle;
-        self
-    }
-
-    /// Set the `char` used as quote char. The default is `b'"'`. If set to `[None]` quoting is disabled.
-    pub fn with_quote_char(mut self, quote_char: Option<u8>) -> Self {
-        self.quote_char = quote_char;
-        self
-    }
-
-    /// Automatically try to parse dates/ datetimes and time. If parsing fails, columns remain of dtype `[DataType::String]`.
-    pub fn with_try_parse_dates(mut self, toggle: bool) -> Self {
-        self.try_parse_dates = toggle;
-        self
-    }
-
     pub fn with_predicate(mut self, predicate: Option<Arc<dyn PhysicalIoExpr>>) -> Self {
         self.predicate = predicate;
         self
     }
-
-    /// Truncate lines that are longer than the schema.
-    pub fn truncate_ragged_lines(mut self, toggle: bool) -> Self {
-        self.truncate_ragged_lines = toggle;
-        self
-    }
-
-    /// Parse floats with decimals.
-    pub fn with_decimal_comma(mut self, toggle: bool) -> Self {
-        self.decimal_comma = toggle;
-        self
-    }
 }
 
 impl<'a> CsvReader<'a, File> {
@@ -318,34 +303,34 @@ impl<'a, R: MmapBytesReader + 'a> CsvReader<'a, R> {
         CoreReader::new(
             reader_bytes,
             self.n_rows,
-            self.skip_rows_before_header,
+            self.options.skip_rows,
             std::mem::take(&mut self.projection),
-            self.max_records,
-            self.separator,
-            self.has_header,
-            self.ignore_errors,
-            self.schema.clone(),
+            self.options.infer_schema_length,
+            Some(self.options.separator),
+            self.options.has_header,
+            self.options.ignore_errors,
+            self.options.schema.clone(),
             std::mem::take(&mut self.columns),
-            self.encoding,
-            self.n_threads,
+            self.options.encoding,
+            self.options.n_threads,
             schema,
             self.dtype_overwrite,
             self.sample_size,
             self.chunk_size,
-            self.low_memory,
-            std::mem::take(&mut self.comment_prefix),
-            self.quote_char,
-            self.eol_char,
-            std::mem::take(&mut self.null_values),
+            self.options.low_memory,
+            std::mem::take(&mut self.options.comment_prefix),
+            self.options.quote_char,
+            self.options.eol_char,
+            std::mem::take(&mut self.options.null_values),
             self.missing_is_null,
             std::mem::take(&mut self.predicate),
             to_cast,
-            self.skip_rows_after_header,
+            self.options.skip_rows_after_header,
             std::mem::take(&mut self.row_index),
-            self.try_parse_dates,
-            self.raise_if_empty,
-            self.truncate_ragged_lines,
-            self.decimal_comma,
+            self.options.try_parse_dates,
+            self.options.raise_if_empty,
+            self.options.truncate_ragged_lines,
+            self.options.decimal_comma,
         )
     }
 
@@ -403,26 +388,26 @@ impl<'a, R: MmapBytesReader + 'a> CsvReader<'a, R> {
     }
 
     pub fn batched_borrowed_mmap(&'a mut self) -> PolarsResult<BatchedCsvReaderMmap<'a>> {
-        if let Some(schema) = self.schema_overwrite.as_deref() {
+        if let Some(schema) = self.options.schema_overwrite.as_deref() {
             let (schema, to_cast, has_cat) = self.prepare_schema_overwrite(schema)?;
             let schema = Arc::new(schema);
 
             let csv_reader = self.core_reader(Some(schema), to_cast)?;
             csv_reader.batched_mmap(has_cat)
         } else {
-            let csv_reader = self.core_reader(self.schema.clone(), vec![])?;
+            let csv_reader = self.core_reader(self.options.schema.clone(), vec![])?;
             csv_reader.batched_mmap(false)
         }
     }
     pub fn batched_borrowed_read(&'a mut self) -> PolarsResult<BatchedCsvReaderRead<'a>> {
-        if let Some(schema) = self.schema_overwrite.as_deref() {
+        if let Some(schema) = self.options.schema_overwrite.as_deref() {
             let (schema, to_cast, has_cat) = self.prepare_schema_overwrite(schema)?;
             let schema = Arc::new(schema);
 
             let csv_reader = self.core_reader(Some(schema), to_cast)?;
             csv_reader.batched_read(has_cat)
         } else {
-            let csv_reader = self.core_reader(self.schema.clone(), vec![])?;
+            let csv_reader = self.core_reader(self.options.schema.clone(), vec![])?;
             csv_reader.batched_read(false)
         }
     }
@@ -440,20 +425,20 @@ impl<'a> CsvReader<'a, Box<dyn MmapBytesReader>> {
 
                 let (inferred_schema, _, _) = infer_file_schema(
                     &reader_bytes,
-                    self.separator.unwrap_or(b','),
-                    self.max_records,
-                    self.has_header,
+                    self.options.separator,
+                    self.options.infer_schema_length,
+                    self.options.has_header,
                     None,
-                    &mut self.skip_rows_before_header,
-                    self.skip_rows_after_header,
-                    self.comment_prefix.as_ref(),
-                    self.quote_char,
-                    self.eol_char,
-                    self.null_values.as_ref(),
-                    self.try_parse_dates,
-                    self.raise_if_empty,
-                    &mut self.n_threads,
-                    self.decimal_comma,
+                    &mut self.options.skip_rows,
+                    self.options.skip_rows_after_header,
+                    self.options.comment_prefix.as_ref(),
+                    self.options.quote_char,
+                    self.options.eol_char,
+                    self.options.null_values.as_ref(),
+                    self.options.try_parse_dates,
+                    self.options.raise_if_empty,
+                    &mut self.options.n_threads,
+                    self.options.decimal_comma,
                 )?;
                 let schema = Arc::new(inferred_schema);
                 Ok(to_batched_owned_mmap(self, schema))
@@ -471,20 +456,20 @@ impl<'a> CsvReader<'a, Box<dyn MmapBytesReader>> {
 
                 let (inferred_schema, _, _) = infer_file_schema(
                     &reader_bytes,
-                    self.separator.unwrap_or(b','),
-                    self.max_records,
-                    self.has_header,
+                    self.options.separator,
+                    self.options.infer_schema_length,
+                    self.options.has_header,
                     None,
-                    &mut self.skip_rows_before_header,
-                    self.skip_rows_after_header,
-                    self.comment_prefix.as_ref(),
-                    self.quote_char,
-                    self.eol_char,
-                    self.null_values.as_ref(),
-                    self.try_parse_dates,
-                    self.raise_if_empty,
-                    &mut self.n_threads,
-                    self.decimal_comma,
+                    &mut self.options.skip_rows,
+                    self.options.skip_rows_after_header,
+                    self.options.comment_prefix.as_ref(),
+                    self.options.quote_char,
+                    self.options.eol_char,
+                    self.options.null_values.as_ref(),
+                    self.options.try_parse_dates,
+                    self.options.raise_if_empty,
+                    &mut self.options.n_threads,
+                    self.options.decimal_comma,
                 )?;
                 let schema = Arc::new(inferred_schema);
                 Ok(to_batched_owned_read(self, schema))
@@ -501,44 +486,26 @@ where
     fn new(reader: R) -> Self {
         CsvReader {
             reader,
+            options: CsvReaderOptions::default(),
             rechunk: true,
             n_rows: None,
-            max_records: Some(128),
-            skip_rows_before_header: 0,
             projection: None,
-            separator: None,
-            has_header: true,
-            ignore_errors: false,
-            schema: None,
             columns: None,
-            encoding: CsvEncoding::Utf8,
-            n_threads: None,
             path: None,
-            schema_overwrite: None,
             dtype_overwrite: None,
             sample_size: 1024,
             chunk_size: 1 << 18,
-            low_memory: false,
-            comment_prefix: None,
-            eol_char: b'\n',
-            null_values: None,
             missing_is_null: true,
             predicate: None,
-            quote_char: Some(b'"'),
-            skip_rows_after_header: 0,
-            try_parse_dates: false,
             row_index: None,
-            raise_if_empty: true,
-            truncate_ragged_lines: false,
-            decimal_comma: false,
         }
     }
 
     /// Read the file and create the DataFrame.
     fn finish(mut self) -> PolarsResult<DataFrame> {
         let rechunk = self.rechunk;
-        let schema_overwrite = self.schema_overwrite.clone();
-        let low_memory = self.low_memory;
+        let schema_overwrite = self.options.schema_overwrite.clone();
+        let low_memory = self.options.low_memory;
 
         #[cfg(feature = "dtype-categorical")]
         let mut _cat_lock = None;
@@ -557,6 +524,7 @@ where
             #[cfg(feature = "dtype-categorical")]
             {
                 let has_cat = self
+                    .options
                     .schema
                     .clone()
                     .map(|schema| {
@@ -569,7 +537,7 @@ where
                     _cat_lock = Some(polars_core::StringCacheHolder::hold())
                 }
             }
-            let mut csv_reader = self.core_reader(self.schema.clone(), vec![])?;
+            let mut csv_reader = self.core_reader(self.options.schema.clone(), vec![])?;
             csv_reader.as_df()?
         };
 
@@ -585,7 +553,7 @@ where
 
         #[cfg(feature = "temporal")]
         // only needed until we also can parse time columns in place
-        if self.try_parse_dates {
+        if self.options.try_parse_dates {
             // determine the schema that's given by the user. That should not be changed
             let fixed_schema = match (schema_overwrite, self.dtype_overwrite) {
                 (Some(schema), _) => schema,
diff --git a/crates/polars-lazy/src/physical_plan/executors/scan/csv.rs b/crates/polars-lazy/src/physical_plan/executors/scan/csv.rs
index 63ad38bcfa12..06277d5b054e 100644
--- a/crates/polars-lazy/src/physical_plan/executors/scan/csv.rs
+++ b/crates/polars-lazy/src/physical_plan/executors/scan/csv.rs
@@ -5,7 +5,7 @@ use super::*;
 pub struct CsvExec {
     pub path: PathBuf,
     pub schema: SchemaRef,
-    pub options: CsvParserOptions,
+    pub options: CsvReaderOptions,
     pub file_options: FileScanOptions,
     pub predicate: Option<Arc<dyn PhysicalExpr>>,
 }
diff --git a/crates/polars-pipe/src/executors/sources/csv.rs b/crates/polars-pipe/src/executors/sources/csv.rs
index b2423c832ed8..8d49bcb9ea41 100644
--- a/crates/polars-pipe/src/executors/sources/csv.rs
+++ b/crates/polars-pipe/src/executors/sources/csv.rs
@@ -4,7 +4,7 @@ use std::path::PathBuf;
 use polars_core::export::arrow::Either;
 use polars_core::POOL;
 use polars_io::csv::read::{
-    BatchedCsvReaderMmap, BatchedCsvReaderRead, CsvEncoding, CsvParserOptions, CsvReader,
+    BatchedCsvReaderMmap, BatchedCsvReaderRead, CsvEncoding, CsvReader, CsvReaderOptions,
 };
 use polars_plan::global::_set_n_rows_for_scan;
 use polars_plan::prelude::FileScanOptions;
@@ -22,7 +22,7 @@ pub(crate) struct CsvSource {
         Option<Either<*mut BatchedCsvReaderMmap<'static>, *mut BatchedCsvReaderRead<'static>>>,
     n_threads: usize,
     path: Option<PathBuf>,
-    options: Option<CsvParserOptions>,
+    options: Option<CsvReaderOptions>,
     file_options: Option<FileScanOptions>,
     verbose: bool,
 }
@@ -106,7 +106,7 @@ impl CsvSource {
     pub(crate) fn new(
         path: PathBuf,
         schema: SchemaRef,
-        options: CsvParserOptions,
+        options: CsvReaderOptions,
         file_options: FileScanOptions,
         verbose: bool,
     ) -> PolarsResult<Self> {
diff --git a/crates/polars-plan/src/logical_plan/builder_dsl.rs b/crates/polars-plan/src/logical_plan/builder_dsl.rs
index ec31dd2e0514..fafcdfc4286f 100644
--- a/crates/polars-plan/src/logical_plan/builder_dsl.rs
+++ b/crates/polars-plan/src/logical_plan/builder_dsl.rs
@@ -2,7 +2,7 @@ use polars_core::prelude::*;
 #[cfg(feature = "parquet")]
 use polars_io::cloud::CloudOptions;
 #[cfg(feature = "csv")]
-use polars_io::csv::read::{CommentPrefix, CsvEncoding, CsvParserOptions, NullValues};
+use polars_io::csv::read::{CommentPrefix, CsvEncoding, CsvReaderOptions, NullValues};
 #[cfg(feature = "ipc")]
 use polars_io::ipc::IpcScanOptions;
 #[cfg(feature = "parquet")]
@@ -216,7 +216,7 @@ impl DslBuilder {
             file_options: options,
             predicate: None,
             scan_type: FileScan::Csv {
-                options: CsvParserOptions {
+                options: CsvReaderOptions {
                     has_header,
                     separator,
                     ignore_errors,
diff --git a/crates/polars-plan/src/logical_plan/conversion/scans.rs b/crates/polars-plan/src/logical_plan/conversion/scans.rs
index f0523d68748d..84139ff5e713 100644
--- a/crates/polars-plan/src/logical_plan/conversion/scans.rs
+++ b/crates/polars-plan/src/logical_plan/conversion/scans.rs
@@ -121,7 +121,7 @@ pub(super) fn ipc_file_info(
 pub(super) fn csv_file_info(
     paths: &[PathBuf],
     file_options: &FileScanOptions,
-    csv_options: &mut CsvParserOptions,
+    csv_options: &mut CsvReaderOptions,
 ) -> PolarsResult<FileInfo> {
     use std::io::Seek;
 
diff --git a/crates/polars-plan/src/logical_plan/file_scan.rs b/crates/polars-plan/src/logical_plan/file_scan.rs
index 43c3cb7da091..2777ad8a5e1b 100644
--- a/crates/polars-plan/src/logical_plan/file_scan.rs
+++ b/crates/polars-plan/src/logical_plan/file_scan.rs
@@ -1,7 +1,7 @@
 use std::hash::{Hash, Hasher};
 
 #[cfg(feature = "csv")]
-use polars_io::csv::read::CsvParserOptions;
+use polars_io::csv::read::CsvReaderOptions;
 #[cfg(feature = "ipc")]
 use polars_io::ipc::IpcScanOptions;
 #[cfg(feature = "parquet")]
@@ -15,7 +15,7 @@ use super::*;
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 pub enum FileScan {
     #[cfg(feature = "csv")]
-    Csv { options: CsvParserOptions },
+    Csv { options: CsvReaderOptions },
     #[cfg(feature = "parquet")]
     Parquet {
         options: ParquetOptions,
diff --git a/py-polars/polars/io/csv/functions.py b/py-polars/polars/io/csv/functions.py
index 10c558123a11..f6b8e5214e01 100644
--- a/py-polars/polars/io/csv/functions.py
+++ b/py-polars/polars/io/csv/functions.py
@@ -95,8 +95,8 @@ def read_csv(
     separator
         Single byte character to use as separator in the file.
     comment_prefix
-        A string, which can be up to 5 symbols in length, used to indicate
-        the start of a comment line. For instance, it can be set to `#` or `//`.
+        A string used to indicate the start of a comment line. Comment lines are skipped
+        during parsing. Common examples of comment prefixes are `#` and `//`.
     quote_char
         Single byte character used for csv quoting, default = `"`.
         Set to None to turn off special handling and escaping of quotes.
@@ -654,8 +654,8 @@ def read_csv_batched(
     separator
         Single byte character to use as separator in the file.
     comment_prefix
-        A string, which can be up to 5 symbols in length, used to indicate
-        the start of a comment line. For instance, it can be set to `#` or `//`.
+        A string used to indicate the start of a comment line. Comment lines are skipped
+        during parsing. Common examples of comment prefixes are `#` and `//`.
     quote_char
         Single byte character used for csv quoting, default = `"`.
         Set to None to turn off special handling and escaping of quotes.
@@ -944,8 +944,8 @@ def scan_csv(
     separator
         Single byte character to use as separator in the file.
     comment_prefix
-        A string, which can be up to 5 symbols in length, used to indicate
-        the start of a comment line. For instance, it can be set to `#` or `//`.
+        A string used to indicate the start of a comment line. Comment lines are skipped
+        during parsing. Common examples of comment prefixes are `#` and `//`.
     quote_char
         Single byte character used for csv quoting, default = `"`.
         Set to None to turn off special handling and escaping of quotes.