Skip to content

Commit

Permalink
Address review feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
rodrigogiraoserrao committed Oct 7, 2024
1 parent a2ac41f commit 5e2e040
Show file tree
Hide file tree
Showing 13 changed files with 160 additions and 53 deletions.
2 changes: 1 addition & 1 deletion crates/polars-lazy/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ impl LazyFrame {

/// Return a String describing the logical plan.
///
/// If `optimized` is `true`, explains the optimized plan. If `optimized` is `false,
/// If `optimized` is `true`, explains the optimized plan. If `optimized` is `false`,
/// explains the naive, un-optimized plan.
pub fn explain(&self, optimized: bool) -> PolarsResult<String> {
if optimized {
Expand Down
1 change: 1 addition & 0 deletions docs/source/_build/API_REFERENCE_LINKS.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ python:
is_duplicated: https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.Expr.is_duplicated.html
sample: https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.sample.html
head: https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.head.html
glimpse: https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.glimpse.html
tail: https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.tail.html
describe: https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.describe.html
col: https://docs.pola.rs/api/python/stable/reference/expressions/col.html
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@
# --8<-- [end:series-dtype]

# --8<-- [start:df]
import datetime as dt
from datetime import date

df = pl.DataFrame(
{
"name": ["Alice Archer", "Ben Brown", "Chloe Cooper", "Daniel Donovan"],
"birthdate": [
dt.date(1997, 1, 10),
dt.date(1985, 2, 15),
dt.date(1983, 3, 22),
dt.date(1981, 4, 30),
date(1997, 1, 10),
date(1985, 2, 15),
date(1983, 3, 22),
date(1981, 4, 30),
],
"weight": [57.9, 72.5, 53.6, 83.1], # (kg)
"height": [1.56, 1.77, 1.65, 1.75], # (m)
Expand All @@ -39,6 +39,10 @@
print(df.head(3))
# --8<-- [end:head]

# --8<-- [start:glimpse]
print(df.glimpse(return_as_string=True))
# --8<-- [end:glimpse]

# --8<-- [start:tail]
print(df.tail(3))
# --8<-- [end:tail]
Expand Down
18 changes: 9 additions & 9 deletions docs/source/src/python/user-guide/concepts/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,16 @@
# --8<-- [end:print-expr]

# --8<-- [start:df]
import datetime as dt
from datetime import date

df = pl.DataFrame(
{
"name": ["Alice Archer", "Ben Brown", "Chloe Cooper", "Daniel Donovan"],
"birthdate": [
dt.date(1997, 1, 10),
dt.date(1985, 2, 15),
dt.date(1983, 3, 22),
dt.date(1981, 4, 30),
date(1997, 1, 10),
date(1985, 2, 15),
date(1983, 3, 22),
date(1981, 4, 30),
],
"weight": [57.9, 72.5, 53.6, 83.1], # (kg)
"height": [1.56, 1.77, 1.65, 1.75], # (m)
Expand Down Expand Up @@ -54,16 +54,16 @@

# --8<-- [start:filter-1]
result = df.filter(
pl.col("birthdate").is_between(dt.date(1982, 12, 31), dt.date(1996, 1, 1)),
pl.col("birthdate").is_between(date(1982, 12, 31), date(1996, 1, 1)),
pl.col("height") > 1.7,
)
print(result)
# --8<-- [end:filter-1]

# --8<-- [start:group_by-1]
result = df.group_by((pl.col("birthdate").dt.year() // 10 * 10).alias("decade")).agg(
pl.col("name")
)
result = df.group_by(
(pl.col("birthdate").dt.year() // 10 * 10).alias("decade"),
).agg(pl.col("name"))
print(result)
# --8<-- [end:group_by-1]

Expand Down
25 changes: 25 additions & 0 deletions docs/source/src/python/user-guide/concepts/lazy-vs-eager.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# --8<-- [start:import]
import polars as pl

# --8<-- [end:import]

# --8<-- [start:eager]

df = pl.read_csv("docs/assets/data/iris.csv")
Expand All @@ -18,3 +21,25 @@

df = q.collect()
# --8<-- [end:lazy]

# --8<-- [start:explain]
print(q.explain())
# --8<-- [end:explain]

# --8<-- [start:explain-expression-expansion]
schema = pl.Schema(
{
"int_1": pl.Int16,
"int_2": pl.Int32,
"float_1": pl.Float64,
"float_2": pl.Float64,
"float_3": pl.Float64,
}
)

print(
pl.LazyFrame(schema=schema)
.select((pl.col(pl.Float64) * 1.1).name.suffix("*1.1"))
.explain()
)
# --8<-- [end:explain-expression-expansion]
10 changes: 10 additions & 0 deletions docs/source/src/rust/user-guide/concepts/lazy-vs-eager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,15 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("{}", df);
// --8<-- [end:lazy]

// --8<-- [start:explain]
let q = LazyCsvReader::new("docs/assets/data/iris.csv")
.with_has_header(true)
.finish()?
.filter(col("sepal_length").gt(lit(5)))
.group_by(vec![col("species")])
.agg([col("sepal_width").mean()]);
println!("{:?}", q.explain(true));
// --8<-- [end:explain]

Ok(())
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Streaming

<!-- Not included in the docs “until we have something we are proud of”. https://github.com/pola-rs/polars/pull/19087/files/92bffabe48c6c33a9ec5bc003d8683e59c97158c#r1788988580 -->

One additional benefit of the lazy API is that it allows queries to be executed in a streaming manner. Instead of processing all the data at once, Polars can execute the query in batches allowing you to process datasets that do not fit in memory.

To tell Polars we want to execute a query in streaming mode we pass the `streaming=True` argument to `collect`
Expand Down
Loading

0 comments on commit 5e2e040

Please sign in to comment.