Skip to content
This repository has been archived by the owner on Sep 26, 2023. It is now read-only.

Commit

Permalink
Update ergonomics across the user-guide (#365)
Browse files Browse the repository at this point in the history
  • Loading branch information
avimallu authored Jul 6, 2023
1 parent f2611f4 commit d5decdd
Show file tree
Hide file tree
Showing 13 changed files with 119 additions and 199 deletions.
28 changes: 11 additions & 17 deletions docs/src/python/user-guide/concepts/contexts.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,10 @@
# --8<-- [start:select]

out = df.select(
[
pl.sum("nrs"),
pl.col("names").sort(),
pl.col("names").first().alias("first name"),
(pl.mean("nrs") * 10).alias("10xnrs"),
]
pl.sum("nrs"),
pl.col("names").sort(),
pl.col("names").first().alias("first name"),
(pl.mean("nrs") * 10).alias("10xnrs"),
)
print(out)
# --8<-- [end:select]
Expand All @@ -38,24 +36,20 @@
# --8<-- [start:with_columns]

df = df.with_columns(
[
pl.sum("nrs").alias("nrs_sum"),
pl.col("random").count().alias("count"),
]
pl.sum("nrs").alias("nrs_sum"),
pl.col("random").count().alias("count"),
)
print(df)
# --8<-- [end:with_columns]


# --8<-- [start:groupby]
out = df.groupby("groups").agg(
[
pl.sum("nrs"), # sum nrs by groups
pl.col("random").count().alias("count"), # count group members
# sum random where name != null
pl.col("random").filter(pl.col("names").is_not_null()).sum().suffix("_sum"),
pl.col("names").reverse().alias(("reversed names")),
]
pl.sum("nrs"), # sum nrs by groups
pl.col("random").count().alias("count"), # count group members
# sum random where name != null
pl.col("random").filter(pl.col("names").is_not_null()).sum().suffix("_sum"),
pl.col("names").reverse().alias(("reversed names")),
)
print(out)
# --8<-- [end:groupby]
7 changes: 1 addition & 6 deletions docs/src/python/user-guide/concepts/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,5 @@
# --8<-- [end:example1]

# --8<-- [start:example2]
df.select(
[
pl.col("foo").sort().head(2),
pl.col("bar").filter(pl.col("foo") == 1).sum(),
]
)
df.select(pl.col("foo").sort().head(2), pl.col("bar").filter(pl.col("foo") == 1).sum())
# --8<-- [end:example2]
60 changes: 24 additions & 36 deletions docs/src/python/user-guide/expressions/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,9 @@
dataset.lazy()
.groupby("first_name")
.agg(
[
pl.count(),
pl.col("gender"),
pl.first("last_name"),
]
pl.count(),
pl.col("gender"),
pl.first("last_name"),
)
.sort("count", descending=True)
.limit(5)
Expand All @@ -44,10 +42,8 @@
dataset.lazy()
.groupby("state")
.agg(
[
(pl.col("party") == "Anti-Administration").sum().alias("anti"),
(pl.col("party") == "Pro-Administration").sum().alias("pro"),
]
(pl.col("party") == "Anti-Administration").sum().alias("anti"),
(pl.col("party") == "Pro-Administration").sum().alias("pro"),
)
.sort("pro", descending=True)
.limit(5)
Expand All @@ -60,8 +56,8 @@
# --8<-- [start:nested]
q = (
dataset.lazy()
.groupby(["state", "party"])
.agg([pl.count("party").alias("count")])
.groupby("state", "party")
.agg(pl.count("party").alias("count"))
.filter(
(pl.col("party") == "Anti-Administration")
| (pl.col("party") == "Pro-Administration")
Expand Down Expand Up @@ -91,14 +87,12 @@ def avg_birthday(gender: str) -> pl.Expr:

q = (
dataset.lazy()
.groupby(["state"])
.groupby("state")
.agg(
[
avg_birthday("M"),
avg_birthday("F"),
(pl.col("gender") == "M").sum().alias("# male"),
(pl.col("gender") == "F").sum().alias("# female"),
]
avg_birthday("M"),
avg_birthday("F"),
(pl.col("gender") == "M").sum().alias("# male"),
(pl.col("gender") == "F").sum().alias("# female"),
)
.limit(5)
)
Expand All @@ -116,12 +110,10 @@ def get_person() -> pl.Expr:
q = (
dataset.lazy()
.sort("birthday", descending=True)
.groupby(["state"])
.groupby("state")
.agg(
[
get_person().first().alias("youngest"),
get_person().last().alias("oldest"),
]
get_person().first().alias("youngest"),
get_person().last().alias("oldest"),
)
.limit(5)
)
Expand All @@ -139,13 +131,11 @@ def get_person() -> pl.Expr:
q = (
dataset.lazy()
.sort("birthday", descending=True)
.groupby(["state"])
.groupby("state")
.agg(
[
get_person().first().alias("youngest"),
get_person().last().alias("oldest"),
get_person().sort().first().alias("alphabetical_first"),
]
get_person().first().alias("youngest"),
get_person().last().alias("oldest"),
get_person().sort().first().alias("alphabetical_first"),
)
.limit(5)
)
Expand All @@ -163,14 +153,12 @@ def get_person() -> pl.Expr:
q = (
dataset.lazy()
.sort("birthday", descending=True)
.groupby(["state"])
.groupby("state")
.agg(
[
get_person().first().alias("youngest"),
get_person().last().alias("oldest"),
get_person().sort().first().alias("alphabetical_first"),
pl.col("gender").sort_by("first_name").first().alias("gender"),
]
get_person().first().alias("youngest"),
get_person().last().alias("oldest"),
get_person().sort().first().alias("alphabetical_first"),
pl.col("gender").sort_by("first_name").first().alias("gender"),
)
.sort("state")
.limit(5)
Expand Down
53 changes: 18 additions & 35 deletions docs/src/python/user-guide/expressions/casting.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,38 +19,34 @@

# --8<-- [start:castnum]
out = df.select(
[
pl.col("integers").cast(pl.Float32).alias("integers_as_floats"),
pl.col("floats").cast(pl.Int32).alias("floats_as_integers"),
pl.col("floats_with_decimal")
.cast(pl.Int32)
.alias("floats_with_decimal_as_integers"),
]
pl.col("integers").cast(pl.Float32).alias("integers_as_floats"),
pl.col("floats").cast(pl.Int32).alias("floats_as_integers"),
pl.col("floats_with_decimal")
.cast(pl.Int32)
.alias("floats_with_decimal_as_integers"),
)
print(out)
# --8<-- [end:castnum]


# --8<-- [start:downcast]
out = df.select(
[
pl.col("integers").cast(pl.Int16).alias("integers_smallfootprint"),
pl.col("floats").cast(pl.Float32).alias("floats_smallfootprint"),
]
pl.col("integers").cast(pl.Int16).alias("integers_smallfootprint"),
pl.col("floats").cast(pl.Float32).alias("floats_smallfootprint"),
)
print(out)
# --8<-- [end:downcast]

# --8<-- [start:overflow]
try:
out = df.select([pl.col("big_integers").cast(pl.Int8)])
out = df.select(pl.col("big_integers").cast(pl.Int8))
print(out)
except Exception as e:
print(e)
# --8<-- [end:overflow]

# --8<-- [start:overflow2]
out = df.select([pl.col("big_integers").cast(pl.Int8, strict=False)])
out = df.select(pl.col("big_integers").cast(pl.Int8, strict=False))
print(out)
# --8<-- [end:overflow2]

Expand All @@ -65,24 +61,18 @@
)

out = df.select(
[
pl.col("integers").cast(pl.Utf8),
pl.col("float").cast(pl.Utf8),
pl.col("floats_as_string").cast(pl.Float64),
]
pl.col("integers").cast(pl.Utf8),
pl.col("float").cast(pl.Utf8),
pl.col("floats_as_string").cast(pl.Float64),
)
print(out)
# --8<-- [end:strings]


# --8<-- [start:strings2]
df = pl.DataFrame(
{
"strings_not_float": ["4.0", "not_a_number", "6.0", "7.0", "8.0"],
}
)
df = pl.DataFrame({"strings_not_float": ["4.0", "not_a_number", "6.0", "7.0", "8.0"]})
try:
out = df.select([pl.col("strings_not_float").cast(pl.Float64)])
out = df.select(pl.col("strings_not_float").cast(pl.Float64))
print(out)
except Exception as e:
print(e)
Expand All @@ -97,12 +87,7 @@
}
)

out = df.select(
[
pl.col("integers").cast(pl.Boolean),
pl.col("floats").cast(pl.Boolean),
]
)
out = df.select(pl.col("integers").cast(pl.Boolean), pl.col("floats").cast(pl.Boolean))
print(out)
# --8<-- [end:bool]

Expand All @@ -118,7 +103,7 @@
}
)

out = df.select([pl.col("date").cast(pl.Int64), pl.col("datetime").cast(pl.Int64)])
out = df.select(pl.col("date").cast(pl.Int64), pl.col("datetime").cast(pl.Int64))
print(out)
# --8<-- [end:dates]

Expand All @@ -137,10 +122,8 @@
)

out = df.select(
[
pl.col("date").dt.strftime("%Y-%m-%d"),
pl.col("string").str.strptime(pl.Datetime, "%Y-%m-%d"),
]
pl.col("date").dt.strftime("%Y-%m-%d"),
pl.col("string").str.strptime(pl.Datetime, "%Y-%m-%d"),
)
print(out)
# --8<-- [end:dates2]
6 changes: 1 addition & 5 deletions docs/src/python/user-guide/expressions/folds.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,6 @@
}
)

out = df.select(
[
pl.concat_str(["a", "b"]),
]
)
out = df.select(pl.concat_str(["a", "b"]))
print(out)
# --8<-- [end:string]
28 changes: 11 additions & 17 deletions docs/src/python/user-guide/expressions/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,48 +19,42 @@
# --8<-- [end:dataframe]

# --8<-- [start:samename]
df_samename = df.select([pl.col("nrs") + 5])
df_samename = df.select(pl.col("nrs") + 5)
print(df_samename)
# --8<-- [end:samename]


# --8<-- [start:samenametwice]
try:
df_samename2 = df.select([pl.col("nrs") + 5, pl.col("nrs") - 5])
df_samename2 = df.select(pl.col("nrs") + 5, pl.col("nrs") - 5)
print(df_samename2)
except Exception as e:
print(e)
# --8<-- [end:samenametwice]

# --8<-- [start:samenamealias]
df_alias = df.select(
[
(pl.col("nrs") + 5).alias("nrs + 5"),
(pl.col("nrs") - 5).alias("nrs - 5"),
]
(pl.col("nrs") + 5).alias("nrs + 5"),
(pl.col("nrs") - 5).alias("nrs - 5"),
)
print(df_alias)
# --8<-- [end:samenamealias]

# --8<-- [start:countunique]
df_alias = df.select(
[
pl.col("names").n_unique().alias("unique"),
pl.approx_unique("names").alias("unique_approx"),
]
pl.col("names").n_unique().alias("unique"),
pl.approx_unique("names").alias("unique_approx"),
)
print(df_alias)
# --8<-- [end:countunique]

# --8<-- [start:conditional]
df_conditional = df.select(
[
pl.col("nrs"),
pl.when(pl.col("nrs") > 2)
.then(pl.lit(True))
.otherwise(pl.lit(False))
.alias("conditional"),
]
pl.col("nrs"),
pl.when(pl.col("nrs") > 2)
.then(pl.lit(True))
.otherwise(pl.lit(False))
.alias("conditional"),
)
print(df_conditional)
# --8<-- [end:conditional]
6 changes: 1 addition & 5 deletions docs/src/python/user-guide/expressions/numpy-example.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,5 @@

df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})

out = df.select(
[
np.log(pl.all()).suffix("_log"),
]
)
out = df.select(np.log(pl.all()).suffix("_log"))
print(out)
Loading

0 comments on commit d5decdd

Please sign in to comment.