Address review feedback

pola-rs · Oct 7, 2024 · 5e2e040 · 5e2e040
1 parent a2ac41f
commit 5e2e040
Show file tree

Hide file tree

Showing 13 changed files with 160 additions and 53 deletions.
diff --git a/crates/polars-lazy/src/frame/mod.rs b/crates/polars-lazy/src/frame/mod.rs
@@ -252,7 +252,7 @@ impl LazyFrame {
 
     /// Return a String describing the logical plan.
     ///
-    /// If `optimized` is `true`, explains the optimized plan. If `optimized` is `false,
+    /// If `optimized` is `true`, explains the optimized plan. If `optimized` is `false`,
     /// explains the naive, un-optimized plan.
     pub fn explain(&self, optimized: bool) -> PolarsResult<String> {
         if optimized {

diff --git a/docs/source/_build/API_REFERENCE_LINKS.yml b/docs/source/_build/API_REFERENCE_LINKS.yml
@@ -26,6 +26,7 @@ python:
   is_duplicated: https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.Expr.is_duplicated.html
   sample: https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.sample.html
   head: https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.head.html
+  glimpse: https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.glimpse.html
   tail: https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.tail.html
   describe: https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.describe.html
   col: https://docs.pola.rs/api/python/stable/reference/expressions/col.html

diff --git a/docs/source/src/python/user-guide/concepts/data-types-and-structures.py b/docs/source/src/python/user-guide/concepts/data-types-and-structures.py
@@ -12,16 +12,16 @@
 # --8<-- [end:series-dtype]
 
 # --8<-- [start:df]
-import datetime as dt
+from datetime import date
 
 df = pl.DataFrame(
     {
         "name": ["Alice Archer", "Ben Brown", "Chloe Cooper", "Daniel Donovan"],
         "birthdate": [
-            dt.date(1997, 1, 10),
-            dt.date(1985, 2, 15),
-            dt.date(1983, 3, 22),
-            dt.date(1981, 4, 30),
+            date(1997, 1, 10),
+            date(1985, 2, 15),
+            date(1983, 3, 22),
+            date(1981, 4, 30),
         ],
         "weight": [57.9, 72.5, 53.6, 83.1],  # (kg)
         "height": [1.56, 1.77, 1.65, 1.75],  # (m)
@@ -39,6 +39,10 @@
 print(df.head(3))
 # --8<-- [end:head]
 
+# --8<-- [start:glimpse]
+print(df.glimpse(return_as_string=True))
+# --8<-- [end:glimpse]
+
 # --8<-- [start:tail]
 print(df.tail(3))
 # --8<-- [end:tail]

diff --git a/docs/source/src/python/user-guide/concepts/expressions.py b/docs/source/src/python/user-guide/concepts/expressions.py
@@ -10,16 +10,16 @@
 # --8<-- [end:print-expr]
 
 # --8<-- [start:df]
-import datetime as dt
+from datetime import date
 
 df = pl.DataFrame(
     {
         "name": ["Alice Archer", "Ben Brown", "Chloe Cooper", "Daniel Donovan"],
         "birthdate": [
-            dt.date(1997, 1, 10),
-            dt.date(1985, 2, 15),
-            dt.date(1983, 3, 22),
-            dt.date(1981, 4, 30),
+            date(1997, 1, 10),
+            date(1985, 2, 15),
+            date(1983, 3, 22),
+            date(1981, 4, 30),
         ],
         "weight": [57.9, 72.5, 53.6, 83.1],  # (kg)
         "height": [1.56, 1.77, 1.65, 1.75],  # (m)
@@ -54,16 +54,16 @@
 
 # --8<-- [start:filter-1]
 result = df.filter(
-    pl.col("birthdate").is_between(dt.date(1982, 12, 31), dt.date(1996, 1, 1)),
+    pl.col("birthdate").is_between(date(1982, 12, 31), date(1996, 1, 1)),
     pl.col("height") > 1.7,
 )
 print(result)
 # --8<-- [end:filter-1]
 
 # --8<-- [start:group_by-1]
-result = df.group_by((pl.col("birthdate").dt.year() // 10 * 10).alias("decade")).agg(
-    pl.col("name")
-)
+result = df.group_by(
+    (pl.col("birthdate").dt.year() // 10 * 10).alias("decade"),
+).agg(pl.col("name"))
 print(result)
 # --8<-- [end:group_by-1]
 

diff --git a/docs/source/src/python/user-guide/concepts/lazy-vs-eager.py b/docs/source/src/python/user-guide/concepts/lazy-vs-eager.py
@@ -1,5 +1,8 @@
+# --8<-- [start:import]
 import polars as pl
 
+# --8<-- [end:import]
+
 # --8<-- [start:eager]
 
 df = pl.read_csv("docs/assets/data/iris.csv")
@@ -18,3 +21,25 @@
 
 df = q.collect()
 # --8<-- [end:lazy]
+
+# --8<-- [start:explain]
+print(q.explain())
+# --8<-- [end:explain]
+
+# --8<-- [start:explain-expression-expansion]
+schema = pl.Schema(
+    {
+        "int_1": pl.Int16,
+        "int_2": pl.Int32,
+        "float_1": pl.Float64,
+        "float_2": pl.Float64,
+        "float_3": pl.Float64,
+    }
+)
+
+print(
+    pl.LazyFrame(schema=schema)
+    .select((pl.col(pl.Float64) * 1.1).name.suffix("*1.1"))
+    .explain()
+)
+# --8<-- [end:explain-expression-expansion]
diff --git a/docs/source/src/rust/user-guide/concepts/lazy-vs-eager.rs b/docs/source/src/rust/user-guide/concepts/lazy-vs-eager.rs
@@ -28,5 +28,15 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     println!("{}", df);
     // --8<-- [end:lazy]
 
+    // --8<-- [start:explain]
+    let q = LazyCsvReader::new("docs/assets/data/iris.csv")
+        .with_has_header(true)
+        .finish()?
+        .filter(col("sepal_length").gt(lit(5)))
+        .group_by(vec![col("species")])
+        .agg([col("sepal_width").mean()]);
+    println!("{:?}", q.explain(true));
+    // --8<-- [end:explain]
+
     Ok(())
 }
diff --git a/docs/source/user-guide/concepts/streaming.md → .../source/user-guide/concepts/_streaming.md b/docs/source/user-guide/concepts/streaming.md → .../source/user-guide/concepts/_streaming.md
@@ -1,5 +1,7 @@
 # Streaming
 
+<!-- Not included in the docs “until we have something we are proud of”. https://github.com/pola-rs/polars/pull/19087/files/92bffabe48c6c33a9ec5bc003d8683e59c97158c#r1788988580 -->
+
 One additional benefit of the lazy API is that it allows queries to be executed in a streaming manner. Instead of processing all the data at once, Polars can execute the query in batches allowing you to process datasets that do not fit in memory.
 
 To tell Polars we want to execute a query in streaming mode we pass the `streaming=True` argument to `collect`