Skip to content

Commit 9b82f96

Browse files
authored
DEPR: verify_integrity in DataFrame.set_index (#63033)
1 parent f1904ae commit 9b82f96

File tree

3 files changed

+25
-5
lines changed

3 files changed

+25
-5
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -798,6 +798,7 @@ Other Deprecations
798798
- Deprecated lowercase strings ``w``, ``w-mon``, ``w-tue``, etc. denoting frequencies in :class:`Week` in favour of ``W``, ``W-MON``, ``W-TUE``, etc. (:issue:`58998`)
799799
- Deprecated parameter ``method`` in :meth:`DataFrame.reindex_like` / :meth:`Series.reindex_like` (:issue:`58667`)
800800
- Deprecated strings ``w``, ``d``, ``MIN``, ``MS``, ``US`` and ``NS`` denoting units in :class:`Timedelta` in favour of ``W``, ``D``, ``min``, ``ms``, ``us`` and ``ns`` (:issue:`59051`)
801+
- Deprecated the 'verify_integrity' keyword in :meth:`DataFrame.set_index`; directly check the result for ``obj.index.is_unique`` instead (:issue:`62919`)
801802
- Deprecated the ``arg`` parameter of ``Series.map``; pass the added ``func`` argument instead. (:issue:`61260`)
802803
- Deprecated using ``epoch`` date format in :meth:`DataFrame.to_json` and :meth:`Series.to_json`, use ``iso`` instead. (:issue:`57063`)
803804
- Deprecated allowing ``fill_value`` that cannot be held in the original dtype (excepting NA values for integer and bool dtypes) in :meth:`Series.unstack` and :meth:`DataFrame.unstack` (:issue:`12189`, :issue:`53868`)

pandas/core/frame.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6225,7 +6225,7 @@ def set_index(
62256225
drop: bool = ...,
62266226
append: bool = ...,
62276227
inplace: Literal[False] = ...,
6228-
verify_integrity: bool = ...,
6228+
verify_integrity: bool | lib.NoDefault = ...,
62296229
) -> DataFrame: ...
62306230

62316231
@overload
@@ -6236,7 +6236,7 @@ def set_index(
62366236
drop: bool = ...,
62376237
append: bool = ...,
62386238
inplace: Literal[True],
6239-
verify_integrity: bool = ...,
6239+
verify_integrity: bool | lib.NoDefault = ...,
62406240
) -> None: ...
62416241

62426242
def set_index(
@@ -6246,7 +6246,7 @@ def set_index(
62466246
drop: bool = True,
62476247
append: bool = False,
62486248
inplace: bool = False,
6249-
verify_integrity: bool = False,
6249+
verify_integrity: bool | lib.NoDefault = lib.no_default,
62506250
) -> DataFrame | None:
62516251
"""
62526252
Set the DataFrame index using existing columns.
@@ -6276,6 +6276,8 @@ def set_index(
62766276
necessary. Setting to False will improve the performance of this
62776277
method.
62786278
6279+
.. deprecated:: 3.0.0
6280+
62796281
Returns
62806282
-------
62816283
DataFrame or None
@@ -6362,6 +6364,18 @@ def set_index(
63626364
2013 84
63636365
2014 31
63646366
"""
6367+
if verify_integrity is not lib.no_default:
6368+
# GH#62919
6369+
warnings.warn(
6370+
"The 'verify_integrity' keyword in DataFrame.set_index is "
6371+
"deprecated and will be removed in a future version. "
6372+
"Directly check the result.index.is_unique instead.",
6373+
Pandas4Warning,
6374+
stacklevel=find_stack_level(),
6375+
)
6376+
else:
6377+
verify_integrity = False
6378+
63656379
inplace = validate_bool_kwarg(inplace, "inplace")
63666380
self._check_inplace_and_allows_duplicate_labels(inplace)
63676381
if not isinstance(keys, list):

pandas/tests/frame/methods/test_set_index.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
import numpy as np
1111
import pytest
1212

13+
from pandas.errors import Pandas4Warning
14+
1315
from pandas import (
1416
Categorical,
1517
CategoricalIndex,
@@ -547,11 +549,14 @@ class TestSetIndexInvalid:
547549
def test_set_index_verify_integrity(self, frame_of_index_cols):
548550
df = frame_of_index_cols
549551

552+
msg = "The 'verify_integrity' keyword in DataFrame.set_index"
550553
with pytest.raises(ValueError, match="Index has duplicate keys"):
551-
df.set_index("A", verify_integrity=True)
554+
with tm.assert_produces_warning(Pandas4Warning, match=msg):
555+
df.set_index("A", verify_integrity=True)
552556
# with MultiIndex
553557
with pytest.raises(ValueError, match="Index has duplicate keys"):
554-
df.set_index([df["A"], df["A"]], verify_integrity=True)
558+
with tm.assert_produces_warning(Pandas4Warning, match=msg):
559+
df.set_index([df["A"], df["A"]], verify_integrity=True)
555560

556561
@pytest.mark.parametrize("append", [True, False])
557562
@pytest.mark.parametrize("drop", [True, False])

0 commit comments

Comments
 (0)