Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions doc/source/user_guide/10min.rst
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ For a :class:`DataFrame`, passing a slice ``:`` selects matching rows:
.. ipython:: python

df[0:3]
df["20130102":"20130104"]
df["2013-01-02":"2013-01-04"]

Selection by label
~~~~~~~~~~~~~~~~~~
Expand All @@ -226,7 +226,7 @@ For label slicing, both endpoints are *included*:

.. ipython:: python

df.loc["20130102":"20130104", ["A", "B"]]
df.loc["2013-01-02":"2013-01-04", ["A", "B"]]

Selecting a single row and column label returns a scalar:

Expand Down
2 changes: 1 addition & 1 deletion doc/source/user_guide/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ Selection by label

.. ipython:: python
dfl.loc['20130102':'20130104']
dfl.loc['2013-01-02':'2013-01-04']
pandas provides a suite of methods in order to have **purely label based indexing**. This is a strict inclusion based protocol.
Every label asked for must be in the index, or a ``KeyError`` will be raised.
Expand Down
4 changes: 2 additions & 2 deletions doc/source/user_guide/timeseries.rst
Original file line number Diff line number Diff line change
Expand Up @@ -566,11 +566,11 @@ Dates and strings that parse to timestamps can be passed as indexing parameters:

.. ipython:: python

ts["1/31/2011"]
ts["2011-01-31"]

ts[datetime.datetime(2011, 12, 25):]

ts["10/31/2011":"12/31/2011"]
ts["2011-10-31":"2011-12-31"]

To provide convenience for accessing longer time series, you can also pass in
the year or year and month as strings:
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -805,6 +805,7 @@ Other Deprecations
- Deprecated allowing ``fill_value`` that cannot be held in the original dtype (excepting NA values for integer and bool dtypes) in :meth:`Series.shift` and :meth:`DataFrame.shift` (:issue:`53802`)
- Deprecated allowing strings representing full dates in :meth:`DataFrame.at_time` and :meth:`Series.at_time` (:issue:`50839`)
- Deprecated backward-compatibility behavior for :meth:`DataFrame.select_dtypes` matching "str" dtype when ``np.object_`` is specified (:issue:`61916`)
- Deprecated non-ISO date string formats in :meth:`DatetimeIndex.__getitem__` with string labels. Use ISO format (YYYY-MM-DD) instead. (:issue:`58302`)
- Deprecated option "future.no_silent_downcasting", as it is no longer used. In a future version accessing this option will raise (:issue:`59502`)
- Deprecated passing non-Index types to :meth:`Index.join`; explicitly convert to Index first (:issue:`62897`)
- Deprecated silent casting of non-datetime 'other' to datetime in :meth:`Series.combine_first` (:issue:`62931`)
Expand Down
62 changes: 62 additions & 0 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import datetime as dt
import operator
import re
from typing import (
TYPE_CHECKING,
Self,
Expand Down Expand Up @@ -110,6 +111,42 @@ def _new_DatetimeIndex(cls, d):
return result


def _is_iso_format_string(date_str: str) -> bool:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Credit to @stefmolin for the original idea, but I don't think we should try and roll our own regex here if we can avoid it.

I see the standard library provides date.fromisostring, although that is documented to not work with "Reduced Precision" dates:

https://docs.python.org/3/library/datetime.html

Even still I wonder if we can't use that first and only fallback when it fails

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @stefmolin @WillAyd!. Switched to using date.fromisoformat() like you suggested. Added a regex fallback to handle the reduced precision dates (YYYY and YYYY-MM) that fromisoformat doesn't support .

"""
Check if a date string follows ISO8601 format.

Uses date.fromisoformat() to validate full ISO dates, with fallback to regex
for reduced precision dates (YYYY or YYYY-MM) which are not supported by
fromisoformat().

Examples of ISO format (True):
- 2024 (reduced precision)
- 2024-01 (reduced precision)
- 2024-01-10
- 2024-01-10T00:00:00

Examples of non-ISO format (False):
- 2024/01/10 (/ separator)
- 2024 01 10 (space separator)
- 01/10/2024 (MM/DD/YYYY)
- 10/01/2024 (DD/MM/YYYY)
- 01-10-2024 (MM-DD-YYYY)
"""
try:
# Standard library validates full ISO dates (YYYY-MM-DD format)
dt.date.fromisoformat(date_str)
return True
except (ValueError, TypeError):
# Fallback regex for reduced precision dates not supported by fromisoformat()
# Checks if string starts with ISO pattern (YYYY, YYYY-MM, YYYY-MM-DD, etc.)
# Pattern: ^\d{4}(?:-|T|$)
# - Requires exactly 4 digits at start (year)
# - Followed by: hyphen (YYYY-), T (YYYY-T...), or end (YYYY)
# Examples that match: "2024", "2024-01", "2024-01-10", "2024-01-10T00:00:00"
# Examples that don't: "01/10/2024", "2024 01 10", "1/1/2024"
return re.match(r"^\d{4}(?:-|T|$)", date_str) is not None


@inherit_names(
DatetimeArray._field_ops
+ [
Expand Down Expand Up @@ -613,6 +650,14 @@ def get_loc(self, key):
parsed, reso = self._parse_with_reso(key)
except ValueError as err:
raise KeyError(key) from err
# GH#58302 - Deprecate non-ISO string formats in .loc indexing
if not _is_iso_format_string(key):
msg = (
"Parsing non-ISO datetime strings in .loc is deprecated "
"and will be removed in a future version. Use ISO format "
f"(YYYY-MM-DD) instead. Got '{key}'."
)
warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level())
self._disallow_mismatched_indexing(parsed)

if self._can_partial_date_slice(reso):
Expand Down Expand Up @@ -688,6 +733,23 @@ def slice_indexer(self, start=None, end=None, step=None):
def check_str_or_none(point) -> bool:
return point is not None and not isinstance(point, str)

# GH#58302 - Deprecate non-ISO string formats in .loc indexing
if isinstance(start, str) and not _is_iso_format_string(start):
msg = (
"Parsing non-ISO datetime strings in .loc is deprecated "
"and will be removed in a future version. Use ISO format "
f"(YYYY-MM-DD) instead. Got '{start}'."
)
warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level())

if isinstance(end, str) and not _is_iso_format_string(end):
msg = (
"Parsing non-ISO datetime strings in .loc is deprecated "
"and will be removed in a future version. Use ISO format "
f"(YYYY-MM-DD) instead. Got '{end}'."
)
warnings.warn(msg, Pandas4Warning, stacklevel=find_stack_level())

# GH#33146 if start and end are combinations of str and None and Index is not
# monotonic, we can not use Index.slice_indexer because it does not honor the
# actual elements, is only searching for start and end
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/groupby/methods/test_value_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ def seed_df(seed_nans, n, m):
@pytest.mark.parametrize("bins", [None, [0, 5]], ids=repr)
@pytest.mark.parametrize("isort", [True, False])
@pytest.mark.parametrize("normalize, name", [(True, "proportion"), (False, "count")])
@pytest.mark.filterwarnings(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does this test need to filter warnings? It seems unrelated to the change?

Copy link
Author

@raghavendranath raghavendranath Nov 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When we group by keys='2nd' (the date column - from parameterized tests), it triggers the deprecation warning internally during the groupby operation. Without it, those test cases fail in CI. So, added the filterwarnings.

"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)
def test_series_groupby_value_counts(
seed_nans,
num_rows,
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2851,6 +2851,9 @@ def test_groupby_with_Time_Grouper(unit):
tm.assert_frame_equal(result, expected_output)


@pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)
def test_groupby_series_with_datetimeindex_month_name():
# GH 48509
s = Series([0, 1, 0], index=date_range("2022-01-01", periods=3), name="jan")
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexes/datetimes/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,9 +491,9 @@ def test_get_loc_timedelta_invalid_key(self, key):

def test_get_loc_reasonable_key_error(self):
# GH#1062
index = DatetimeIndex(["1/3/2000"])
index = DatetimeIndex(["2000-01-03"])
with pytest.raises(KeyError, match="2000"):
index.get_loc("1/1/2000")
index.get_loc("2000-01-01")

def test_get_loc_year_str(self):
rng = date_range("1/1/2000", "1/1/2010")
Expand Down
97 changes: 97 additions & 0 deletions pandas/tests/indexes/datetimes/test_partial_slicing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import numpy as np
import pytest

from pandas.errors import Pandas4Warning

from pandas import (
DataFrame,
DatetimeIndex,
Expand All @@ -19,6 +21,10 @@


class TestSlicing:
pytestmark = pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)

def test_string_index_series_name_converted(self):
# GH#1644
df = DataFrame(
Expand Down Expand Up @@ -464,3 +470,94 @@ def test_slice_reduce_to_series(self):
)
result = df.loc["2000", "A"]
tm.assert_series_equal(result, expected)


class TestDatetimeIndexNonISODeprecation:
"""Tests for deprecation of non-ISO string formats in .loc indexing. GH#58302"""

@pytest.fixture
def ser_daily(self):
"""Create a Series with daily DatetimeIndex for testing."""
return Series(
range(15),
index=DatetimeIndex(date_range(start="2024-01-01", freq="D", periods=15)),
)

@pytest.mark.parametrize(
"date_string",
[
"1/10/2024", # MM/DD/YYYY format
"01/10/2024", # MM/DD/YYYY format with leading zero
],
)
def test_loc_indexing_non_iso_single_key_deprecation(self, ser_daily, date_string):
# GH#58302
msg = "Parsing non-ISO datetime strings in .loc is deprecated"

with tm.assert_produces_warning(Pandas4Warning, match=msg):
result = ser_daily.loc[date_string]
assert result == 9

@pytest.mark.parametrize(
"date_string,expected",
[
("2024-01-10", 9), # YYYY-MM-DD (ISO format)
],
)
def test_loc_indexing_iso_format_no_warning(self, ser_daily, date_string, expected):
# GH#58302 - ISO format (YYYY-MM-DD) should NOT warn
with tm.assert_produces_warning(None):
result = ser_daily.loc[date_string]
assert result == expected

@pytest.mark.parametrize(
"start_string",
[
"1/10/2024", # MM/DD/YYYY format
"01/10/2024", # MM/DD/YYYY format with leading zero
],
)
def test_loc_slicing_non_iso_start_deprecation(self, ser_daily, start_string):
# GH#58302 - Non-ISO start in slice should warn
msg = "Parsing non-ISO datetime strings in .loc is deprecated"

with tm.assert_produces_warning(Pandas4Warning, match=msg):
result = ser_daily.loc[start_string:"2024-01-15"]
assert len(result) > 0

@pytest.mark.parametrize(
"end_string",
[
"5-01-2024", # DD-MM-YYYY format
"05-01-2024", # DD-MM-YYYY format with leading zero
],
)
def test_loc_slicing_non_iso_end_deprecation(self, ser_daily, end_string):
# GH#58302 - Non-ISO end in slice should warn
msg = "Parsing non-ISO datetime strings in .loc is deprecated"

with tm.assert_produces_warning(Pandas4Warning, match=msg):
result = ser_daily.loc["2024-01-01":end_string]
assert len(result) > 0

def test_loc_slicing_both_non_iso_deprecation(self, ser_daily):
# GH#58302 - Both non-ISO should warn (twice)
msg = "Parsing non-ISO datetime strings in .loc is deprecated"

with tm.assert_produces_warning(
Pandas4Warning, match=msg, check_stacklevel=False
):
result = ser_daily.loc["1/10/2024":"5-01-2024"]
assert len(result) > 0

def test_loc_slicing_iso_formats_no_warning(self, ser_daily):
# GH#58302 - ISO slice formats should NOT warn
with tm.assert_produces_warning(None):
result = ser_daily.loc["2024-01-05":"2024-01-10"]
assert len(result) == 6

def test_loc_non_string_keys_no_warning(self, ser_daily):
# GH#58302 - Non-string keys should not warn
with tm.assert_produces_warning(None):
result = ser_daily.loc[Timestamp("2024-01-10")]
assert result == 9
21 changes: 12 additions & 9 deletions pandas/tests/indexes/period/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,17 +174,17 @@ def test_getitem_list_periods(self):
@pytest.mark.arm_slow
def test_getitem_seconds(self):
# GH#6716
didx = date_range(start="2013/01/01 09:00:00", freq="s", periods=4000)
pidx = period_range(start="2013/01/01 09:00:00", freq="s", periods=4000)
didx = date_range(start="2013-01-01 09:00:00", freq="s", periods=4000)
pidx = period_range(start="2013-01-01 09:00:00", freq="s", periods=4000)

for idx in [didx, pidx]:
# getitem against index should raise ValueError
values = [
"2014",
"2013/02",
"2013/01/02",
"2013/02/01 9h",
"2013/02/01 09:00",
"2013-02",
"2013-01-02",
"2013-02-01 9h",
"2013-02-01 09:00",
]
for val in values:
# GH7116
Expand All @@ -194,9 +194,9 @@ def test_getitem_seconds(self):
idx[val]

ser = Series(np.random.default_rng(2).random(len(idx)), index=idx)
tm.assert_series_equal(ser["2013/01/01 10:00"], ser[3600:3660])
tm.assert_series_equal(ser["2013/01/01 9h"], ser[:3600])
for d in ["2013/01/01", "2013/01", "2013"]:
tm.assert_series_equal(ser["2013-01-01 10:00"], ser[3600:3660])
tm.assert_series_equal(ser["2013-01-01 9h"], ser[:3600])
for d in ["2013-01-01", "2013-01", "2013"]:
tm.assert_series_equal(ser[d], ser)

@pytest.mark.parametrize(
Expand All @@ -206,6 +206,9 @@ def test_getitem_seconds(self):
period_range,
],
)
@pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)
def test_getitem_day(self, idx_range):
# GH#6716
# Confirm DatetimeIndex and PeriodIndex works identically
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/indexes/period/test_partial_slicing.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@


class TestPeriodIndex:
pytestmark = pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)

def test_getitem_periodindex_duplicates_string_slice(self):
# monotonic
idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="Y-JUN")
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/indexing/multiindex/test_slice.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,9 @@ def test_multiindex_slicers_datetimelike(self):
]
tm.assert_frame_equal(result, expected)

@pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)
def test_multiindex_slicers_edges(self):
# GH 8132
# various edge cases
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,9 @@ def test_loc_empty_slice_assignment_with_datetime(self, data):

class TestLocBaseIndependent:
# Tests for loc that do not depend on subclassing Base
@pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)
def test_loc_npstr(self):
# GH#45580
df = DataFrame(index=date_range("2021", "2022"))
Expand Down Expand Up @@ -1281,6 +1284,9 @@ def test_loc_setitem_str_to_small_float_conversion_type(self, using_infer_string
expected = DataFrame(col_data, columns=["A"], dtype=float)
tm.assert_frame_equal(result, expected)

@pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)
def test_loc_getitem_time_object(self, frame_or_series):
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
mask = (rng.hour == 9) & (rng.minute == 30)
Expand Down Expand Up @@ -2434,6 +2440,9 @@ def test_loc_getitem_partial_slice_non_monotonicity(


class TestLabelSlicing:
@pytest.mark.filterwarnings(
"ignore:Parsing non-ISO datetime strings:pandas.errors.Pandas4Warning"
)
def test_loc_getitem_slicing_datetimes_frame(self):
# GH#7523

Expand Down
Loading
Loading