Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(python): Validate time_zone in pl.Datetime #16165

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions py-polars/polars/datatypes/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,11 +458,8 @@ def __init__(
)
raise ValueError(msg)

if isinstance(time_zone, timezone):
time_zone = str(time_zone)

self.time_unit = time_unit
self.time_zone = time_zone
self.time_zone = self._validate_timezone(time_zone)

def __eq__(self, other: PolarsDataType) -> bool: # type: ignore[override]
# allow comparing object instances to class
Expand All @@ -484,6 +481,23 @@ def __repr__(self) -> str:
f"{class_name}(time_unit={self.time_unit!r}, time_zone={self.time_zone!r})"
)

def _validate_timezone(self, time_zone: str | timezone | None) -> str | None:
if isinstance(time_zone, timezone):
return str(time_zone)
elif time_zone is None or time_zone == "*":
return time_zone
else:
from polars.dependencies import _ZONEINFO_AVAILABLE, zoneinfo

if _ZONEINFO_AVAILABLE:
if time_zone in zoneinfo.available_timezones():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this match with the list of timezones we use on the Rust side? What are the differences if there are any?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the part that concerns me a little, I'm pretty sure I encountered some differences

return str(time_zone)
else:
msg = f"invalid time zone: {time_zone!r}, to see valid strings run `import zoneinfo; zoneinfo.available_timezones()`"
raise ValueError(msg)
else:
return time_zone


class Duration(TemporalType):
"""
Expand Down
9 changes: 6 additions & 3 deletions py-polars/tests/unit/datatypes/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -1666,11 +1666,14 @@ def test_strptime_empty(time_unit: TimeUnit, time_zone: str | None) -> None:


def test_strptime_with_invalid_tz() -> None:
with pytest.raises(ComputeError, match="unable to parse time zone: 'foo'"):
with pytest.raises(
ValueError,
match="invalid time zone: 'foo'",
):
pl.Series(["2020-01-01 03:00:00"]).str.strptime(pl.Datetime("us", "foo"))
with pytest.raises(
ComputeError,
match="Please either drop the time zone from the function call, or set it to UTC",
ValueError,
match="invalid time zone: 'foo'",
):
pl.Series(["2020-01-01 03:00:00+01:00"]).str.strptime(
pl.Datetime("us", "foo"), "%Y-%m-%d %H:%M:%S%z"
Expand Down
4 changes: 2 additions & 2 deletions py-polars/tests/unit/interchange/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@
(pl.Datetime, (DtypeKind.DATETIME, 64, "tsu:", NE)),
(pl.Datetime(time_unit="ms"), (DtypeKind.DATETIME, 64, "tsm:", NE)),
(
pl.Datetime(time_zone="Amsterdam/Europe"),
(DtypeKind.DATETIME, 64, "tsu:Amsterdam/Europe", NE),
pl.Datetime(time_zone="Europe/Amsterdam"),
(DtypeKind.DATETIME, 64, "tsu:Europe/Amsterdam", NE),
),
(
pl.Datetime(time_unit="ns", time_zone="Asia/Seoul"),
Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,7 @@ def test_err_on_time_datetime_cast() -> None:

def test_err_on_invalid_time_zone_cast() -> None:
s = pl.Series([datetime(2021, 1, 1)])
with pytest.raises(pl.ComputeError, match=r"unable to parse time zone: 'qwerty'"):
with pytest.raises(ValueError, match=r"invalid time zone: 'qwerty'"):
s.cast(pl.Datetime("us", "qwerty"))


Expand Down