Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(rust): Don't silently produce null values from invalid input to pl.datetime and pl.date #21013

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions crates/polars-time/src/chunkedarray/date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,10 @@ pub trait DateMethods: AsDate {
.zip(day)
.map(|((y, m), d)| {
if let (Some(y), Some(m), Some(d)) = (y, m, d) {
NaiveDate::from_ymd_opt(y, m as u32, d as u32)
.map(|t| t.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
let Some(ns) = NaiveDate::from_ymd_opt(y, m as u32, d as u32) else {
panic!("Invalid date components ({}, {}, {}) supplied", y, m, d)
};
Some(ns.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
} else {
None
}
Expand Down
25 changes: 16 additions & 9 deletions crates/polars-time/src/chunkedarray/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,21 +179,28 @@ pub trait DatetimeMethods: AsDatetime {
if let (Some(y), Some(m), Some(d), Some(h), Some(mnt), Some(s), Some(ns)) =
(y, m, d, h, mnt, s, ns)
{
NaiveDate::from_ymd_opt(y, m as u32, d as u32)
.and_then(|nd| {
nd.and_hms_nano_opt(h as u32, mnt as u32, s as u32, ns as u32)
})
.map(|ndt| match time_unit {
TimeUnit::Milliseconds => ndt.and_utc().timestamp_millis(),
TimeUnit::Microseconds => ndt.and_utc().timestamp_micros(),
TimeUnit::Nanoseconds => ndt.and_utc().timestamp_nanos_opt().unwrap(),
})
let Some(t) = NaiveDate::from_ymd_opt(y, m as u32, d as u32) else {
panic!("Invalid date components ({}, {}, {}) supplied", y, m, d)
};
let Some(ndt) = t.and_hms_nano_opt(h as u32, mnt as u32, s as u32, ns as u32)
else {
panic!(
"Invalid time components ({}, {}, {}, {}) supplied",
h, mnt, s, ns
)
};
Some(match time_unit {
TimeUnit::Milliseconds => ndt.and_utc().timestamp_millis(),
TimeUnit::Microseconds => ndt.and_utc().timestamp_micros(),
TimeUnit::Nanoseconds => ndt.and_utc().timestamp_nanos_opt().unwrap(),
})
} else {
None
}
})
.collect_trusted();

println!("here");
let mut ca = match time_zone {
#[cfg(feature = "timezones")]
Some(_) => {
Expand Down
49 changes: 48 additions & 1 deletion py-polars/tests/unit/functions/as_datatype/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pytest

import polars as pl
from polars.exceptions import ComputeError
from polars.exceptions import ComputeError, PanicException
from polars.testing import assert_series_equal

if TYPE_CHECKING:
Expand All @@ -32,6 +32,53 @@ def test_date_datetime() -> None:
assert_series_equal(out["h2"], df["hour"].rename("h2"))


@pytest.mark.parametrize(
"components",
[
[2025, 13, 1],
[2025, 1, 32],
[2025, 2, 29],
],
)
def test_date_invalid_component(components: list[int]) -> None:
y, m, d = components
msg = rf"Invalid date components \({y}, {m}, {d}\) supplied"
with pytest.raises(PanicException, match=msg):
pl.select(pl.date(*components))


@pytest.mark.parametrize(
"components",
[
[2025, 13, 1, 0, 0, 0, 0],
[2025, 1, 32, 0, 0, 0, 0],
[2025, 2, 29, 0, 0, 0, 0],
],
)
def test_datetime_invalid_date_component(components: list[int]) -> None:
y, m, d = components[0:3]
msg = rf"Invalid date components \({y}, {m}, {d}\) supplied"
with pytest.raises(PanicException, match=msg):
pl.select(pl.datetime(*components))


@pytest.mark.parametrize(
"components",
[
[2025, 1, 1, 25, 0, 0, 0],
[2025, 1, 1, 0, 60, 0, 0],
[2025, 1, 1, 0, 0, 60, 0],
[2025, 1, 1, 0, 0, 0, 2_000_000],
],
)
def test_datetime_invalid_time_component(components: list[int]) -> None:
h, mnt, s, us = components[3:]
ns = us * 1_000
msg = rf"Invalid time components \({h}, {mnt}, {s}, {ns}\) supplied"
with pytest.raises(PanicException, match=msg):
pl.select(pl.datetime(*components))


@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
def test_datetime_time_unit(time_unit: TimeUnit) -> None:
result = pl.datetime(2022, 1, 2, time_unit=time_unit)
Expand Down
5 changes: 0 additions & 5 deletions py-polars/tests/unit/functions/range/test_date_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,6 @@ def test_date_range_invalid_time_unit() -> None:
)


def test_date_range_invalid_time() -> None:
with pytest.raises(ComputeError, match="end is an out-of-range time"):
pl.date_range(pl.date(2024, 1, 1), pl.date(2024, 2, 30), eager=True)


def test_date_range_lazy_with_literals() -> None:
df = pl.DataFrame({"misc": ["x"]}).with_columns(
pl.date_ranges(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pytest

import polars as pl
from polars.exceptions import ComputeError
from polars.exceptions import ComputeError, PanicException
from polars.testing import assert_frame_equal, assert_series_equal

if TYPE_CHECKING:
Expand Down Expand Up @@ -298,3 +298,58 @@ def test_replace_preserve_tu_and_tz(tu: TimeUnit, tzinfo: str) -> None:
result = s.dt.replace(year=2000)
assert result.dtype.time_unit == tu # type: ignore[attr-defined]
assert result.dtype.time_zone == tzinfo # type: ignore[attr-defined]


def test_replace_date_invalid_components() -> None:
df = pl.DataFrame({"a": [date(2025, 1, 1)]})

with pytest.raises(
PanicException, match=r"Invalid date components \(2025, 13, 1\) supplied"
):
df.select(pl.col("a").dt.replace(month=13))
with pytest.raises(
PanicException, match=r"Invalid date components \(2025, 1, 32\) supplied"
):
df.select(pl.col("a").dt.replace(day=32))


def test_replace_datetime_invalid_date_components() -> None:
df = pl.DataFrame({"a": [datetime(2025, 1, 1)]})

with pytest.raises(
PanicException, match=r"Invalid date components \(2025, 13, 1\) supplied"
):
df.select(pl.col("a").dt.replace(month=13))
with pytest.raises(
PanicException, match=r"Invalid date components \(2025, 1, 32\) supplied"
):
df.select(pl.col("a").dt.replace(day=32))


def test_replace_datetime_invalid_time_components() -> None:
df = pl.DataFrame({"a": [datetime(2025, 1, 1)]})

# hour
with pytest.raises(
PanicException, match=r"Invalid time components \(25, 0, 0, 0\) supplied"
):
df.select(pl.col("a").dt.replace(hour=25))

# minute
with pytest.raises(
PanicException, match=r"Invalid time components \(0, 61, 0, 0\) supplied"
):
df.select(pl.col("a").dt.replace(minute=61))

# second
with pytest.raises(
PanicException, match=r"Invalid time components \(0, 0, 61, 0\) supplied"
):
df.select(pl.col("a").dt.replace(second=61))

# microsecond
with pytest.raises(
PanicException,
match=r"Invalid time components \(0, 0, 0, 2000000000\) supplied",
):
df.select(pl.col("a").dt.replace(microsecond=2_000_000))
Loading