Skip to content

Commit

Permalink
[CHORE] Set Expression.str.normalize() options to False by default (#…
Browse files Browse the repository at this point in the history
…2647)

This is a **breaking change**. Now every option needs to be toggled on,
instead of off, which I feel is slightly more usable.
  • Loading branch information
Vince7778 authored Aug 14, 2024
1 parent 800d675 commit eeb4191
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 10 deletions.
12 changes: 6 additions & 6 deletions daft/expressions/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2568,20 +2568,20 @@ def to_datetime(self, format: str, timezone: str | None = None) -> Expression:
def normalize(
self,
*,
remove_punct: bool = True,
lowercase: bool = True,
nfd_unicode: bool = True,
white_space: bool = True,
remove_punct: bool = False,
lowercase: bool = False,
nfd_unicode: bool = False,
white_space: bool = False,
):
"""Normalizes a string for more useful deduplication.
.. NOTE::
All processing options are on by default.
All processing options are off by default.
Example:
>>> import daft
>>> df = daft.from_pydict({"x": ["hello world", "Hello, world!", "HELLO, \\nWORLD!!!!"]})
>>> df = df.with_column("normalized", df["x"].str.normalize())
>>> df = df.with_column("normalized", df["x"].str.normalize(remove_punct=True, lowercase=True, white_space=True))
>>> df.show()
╭───────────────┬─────────────╮
│ x ┆ normalized │
Expand Down
8 changes: 4 additions & 4 deletions daft/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -891,10 +891,10 @@ def substr(self, start: Series, length: Series | None = None) -> Series:
def normalize(
self,
*,
remove_punct: bool = True,
lowercase: bool = True,
nfd_unicode: bool = True,
white_space: bool = True,
remove_punct: bool = False,
lowercase: bool = False,
nfd_unicode: bool = False,
white_space: bool = False,
) -> Series:
if not isinstance(remove_punct, bool):
raise ValueError(f"expected bool for remove_punct but got {type(remove_punct)}")
Expand Down

0 comments on commit eeb4191

Please sign in to comment.