Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into sty/ruff/docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke committed Dec 21, 2023
2 parents 91fb539 + 4ac340e commit 534f4f0
Show file tree
Hide file tree
Showing 17 changed files with 201 additions and 49 deletions.
1 change: 1 addition & 0 deletions .github/workflows/broken-linkcheck.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ on:
- "doc/make.py"
jobs:
linkcheck:
if: false
runs-on: ubuntu-latest
defaults:
run:
Expand Down
3 changes: 3 additions & 0 deletions doc/source/user_guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ delim_whitespace : boolean, default False
If this option is set to ``True``, nothing should be passed in for the
``delimiter`` parameter.

.. deprecated: 2.2.0
Use ``sep="\\s+" instead.
Column and index locations and names
++++++++++++++++++++++++++++++++++++

Expand Down
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,7 @@ Other Deprecations
- Deprecated support for combining parsed datetime columns in :func:`read_csv` along with the ``keep_date_col`` keyword (:issue:`55569`)
- Deprecated the :attr:`.DataFrameGroupBy.grouper` and :attr:`SeriesGroupBy.grouper`; these attributes will be removed in a future version of pandas (:issue:`56521`)
- Deprecated the :class:`.Grouping` attributes ``group_index``, ``result_index``, and ``group_arraylike``; these will be removed in a future version of pandas (:issue:`56148`)
- Deprecated the ``delim_whitespace`` keyword in :func:`read_csv` and :func:`read_table`, use ``sep="\\s+"`` instead (:issue:`55569`)
- Deprecated the ``errors="ignore"`` option in :func:`to_datetime`, :func:`to_timedelta`, and :func:`to_numeric`; explicitly catch exceptions instead (:issue:`54467`)
- Deprecated the ``fastpath`` keyword in the :class:`Series` constructor (:issue:`20110`)
- Deprecated the ``kind`` keyword in :meth:`Series.resample` and :meth:`DataFrame.resample`, explicitly cast the object's ``index`` instead (:issue:`55895`)
Expand Down Expand Up @@ -618,6 +619,7 @@ Indexing
- Bug in :meth:`DataFrame.loc` when setting :class:`Series` with extension dtype into NumPy dtype (:issue:`55604`)
- Bug in :meth:`Index.difference` not returning a unique set of values when ``other`` is empty or ``other`` is considered non-comparable (:issue:`55113`)
- Bug in setting :class:`Categorical` values into a :class:`DataFrame` with numpy dtypes raising ``RecursionError`` (:issue:`52927`)
- Fixed bug when creating new column with missing values when setting a single string value (:issue:`56204`)

Missing
^^^^^^^
Expand Down
9 changes: 8 additions & 1 deletion pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
from pandas.core.construction import (
array as pd_array,
extract_array,
sanitize_array,
)
from pandas.core.indexers import (
check_array_indexer,
Expand Down Expand Up @@ -1904,7 +1905,13 @@ def _setitem_with_indexer(self, indexer, value, name: str = "iloc"):
return

self.obj[key] = empty_value

elif not is_list_like(value):
# Find our empty_value dtype by constructing an array
# from our value and doing a .take on it
arr = sanitize_array(value, Index(range(1)), copy=False)
taker = -1 * np.ones(len(self.obj), dtype=np.intp)
empty_value = algos.take_nd(arr, taker)
self.obj[key] = empty_value
else:
# FIXME: GH#42099#issuecomment-864326014
self.obj[key] = infer_fill_value(value)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,7 @@ def astype(
Block
"""
values = self.values
if squeeze and values.ndim == 2:
if squeeze and values.ndim == 2 and is_1d_only_ea_dtype(dtype):
if values.shape[0] != 1:
raise ValueError("Can not squeeze with more than one column.")
values = values[0, :] # type: ignore[call-overload]
Expand Down
40 changes: 34 additions & 6 deletions pandas/io/parsers/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,9 @@
used as the ``sep`` delimiter. Equivalent to setting ``sep='\\s+'``. If this option
is set to ``True``, nothing should be passed in for the ``delimiter``
parameter.
.. deprecated:: 2.2.0
Use ``sep="\\s+"`` instead.
low_memory : bool, default True
Internally process the file in chunks, resulting in lower memory use
while parsing, but possibly mixed type inference. To ensure no mixed
Expand Down Expand Up @@ -670,7 +673,7 @@ def read_csv(
encoding_errors: str | None = ...,
dialect: str | csv.Dialect | None = ...,
on_bad_lines=...,
delim_whitespace: bool = ...,
delim_whitespace: bool | lib.NoDefault = ...,
low_memory: bool = ...,
memory_map: bool = ...,
float_precision: Literal["high", "legacy"] | None = ...,
Expand Down Expand Up @@ -730,7 +733,7 @@ def read_csv(
encoding_errors: str | None = ...,
dialect: str | csv.Dialect | None = ...,
on_bad_lines=...,
delim_whitespace: bool = ...,
delim_whitespace: bool | lib.NoDefault = ...,
low_memory: bool = ...,
memory_map: bool = ...,
float_precision: Literal["high", "legacy"] | None = ...,
Expand Down Expand Up @@ -790,7 +793,7 @@ def read_csv(
encoding_errors: str | None = ...,
dialect: str | csv.Dialect | None = ...,
on_bad_lines=...,
delim_whitespace: bool = ...,
delim_whitespace: bool | lib.NoDefault = ...,
low_memory: bool = ...,
memory_map: bool = ...,
float_precision: Literal["high", "legacy"] | None = ...,
Expand Down Expand Up @@ -850,7 +853,7 @@ def read_csv(
encoding_errors: str | None = ...,
dialect: str | csv.Dialect | None = ...,
on_bad_lines=...,
delim_whitespace: bool = ...,
delim_whitespace: bool | lib.NoDefault = ...,
low_memory: bool = ...,
memory_map: bool = ...,
float_precision: Literal["high", "legacy"] | None = ...,
Expand Down Expand Up @@ -928,7 +931,7 @@ def read_csv(
# Error Handling
on_bad_lines: str = "error",
# Internal
delim_whitespace: bool = False,
delim_whitespace: bool | lib.NoDefault = lib.no_default,
low_memory: bool = _c_parser_defaults["low_memory"],
memory_map: bool = False,
float_precision: Literal["high", "legacy"] | None = None,
Expand Down Expand Up @@ -978,6 +981,17 @@ def read_csv(
stacklevel=find_stack_level(),
)

if delim_whitespace is not lib.no_default:
# GH#55569
warnings.warn(
"The 'delim_whitespace' keyword in pd.read_csv is deprecated and "
"will be removed in a future version. Use ``sep='\\s+'`` instead",
FutureWarning,
stacklevel=find_stack_level(),
)
else:
delim_whitespace = False

if verbose is not lib.no_default:
# GH#55569
warnings.warn(
Expand Down Expand Up @@ -1305,7 +1319,7 @@ def read_table(
# Error Handling
on_bad_lines: str = "error",
# Internal
delim_whitespace: bool = False,
delim_whitespace: bool | lib.NoDefault = lib.no_default,
low_memory: bool = _c_parser_defaults["low_memory"],
memory_map: bool = False,
float_precision: str | None = None,
Expand Down Expand Up @@ -1346,6 +1360,17 @@ def read_table(
stacklevel=find_stack_level(),
)

if delim_whitespace is not lib.no_default:
# GH#55569
warnings.warn(
"The 'delim_whitespace' keyword in pd.read_table is deprecated and "
"will be removed in a future version. Use ``sep='\\s+'`` instead",
FutureWarning,
stacklevel=find_stack_level(),
)
else:
delim_whitespace = False

if verbose is not lib.no_default:
# GH#55569
warnings.warn(
Expand Down Expand Up @@ -2131,6 +2156,9 @@ def _refine_defaults_read(
used as the sep. Equivalent to setting ``sep='\\s+'``. If this option
is set to True, nothing should be passed in for the ``delimiter``
parameter.
.. deprecated:: 2.2.0
Use ``sep="\\s+"`` instead.
engine : {{'c', 'python'}}
Parser engine to use. The C engine is faster while the python engine is
currently more feature-complete.
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1935,6 +1935,26 @@ def test_adding_new_conditional_column() -> None:
tm.assert_frame_equal(df, expected)


@pytest.mark.parametrize(
("dtype", "infer_string"),
[
(object, False),
("string[pyarrow_numpy]", True),
],
)
def test_adding_new_conditional_column_with_string(dtype, infer_string) -> None:
# https://github.com/pandas-dev/pandas/issues/56204
pytest.importorskip("pyarrow")

df = DataFrame({"a": [1, 2], "b": [3, 4]})
with pd.option_context("future.infer_string", infer_string):
df.loc[df["a"] == 1, "c"] = "1"
expected = DataFrame({"a": [1, 2], "b": [3, 4], "c": ["1", float("nan")]}).astype(
{"a": "int64", "b": "int64", "c": dtype}
)
tm.assert_frame_equal(df, expected)


def test_add_new_column_infer_string():
# GH#55366
pytest.importorskip("pyarrow")
Expand Down
5 changes: 1 addition & 4 deletions pandas/tests/frame/indexing/test_set_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,7 @@ def test_set_value_resize(self, float_frame, using_infer_string):
else:
assert res["baz"].dtype == np.object_
res = float_frame.copy()
with tm.assert_produces_warning(
FutureWarning, match="Setting an item of incompatible dtype"
):
res._set_value("foobar", "baz", True)
res._set_value("foobar", "baz", True)
assert res["baz"].dtype == np.object_

res = float_frame.copy()
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/frame/methods/test_convert_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,3 +193,10 @@ def test_convert_dtypes_avoid_block_splitting(self):
)
tm.assert_frame_equal(result, expected)
assert result._mgr.nblocks == 2

def test_convert_dtypes_from_arrow(self):
# GH#56581
df = pd.DataFrame([["a", datetime.time(18, 12)]], columns=["a", "b"])
result = df.convert_dtypes()
expected = df.astype({"a": "string[python]"})
tm.assert_frame_equal(result, expected)
75 changes: 56 additions & 19 deletions pandas/tests/io/parser/common/test_common_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,13 +500,21 @@ def test_trailing_spaces(all_parsers, kwargs, expected):
data = "A B C \nrandom line with trailing spaces \nskip\n1,2,3\n1,2.,4.\nrandom line with trailing tabs\t\t\t\n \n5.1,NaN,10.0\n" # noqa: E501
parser = all_parsers

depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"

if parser.engine == "pyarrow":
msg = "The 'delim_whitespace' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(StringIO(data.replace(",", " ")), **kwargs)
with tm.assert_produces_warning(
FutureWarning, match=depr_msg, check_stacklevel=False
):
parser.read_csv(StringIO(data.replace(",", " ")), **kwargs)
return

result = parser.read_csv(StringIO(data.replace(",", " ")), **kwargs)
with tm.assert_produces_warning(
FutureWarning, match=depr_msg, check_stacklevel=False
):
result = parser.read_csv(StringIO(data.replace(",", " ")), **kwargs)
tm.assert_frame_equal(result, expected)


Expand All @@ -515,8 +523,12 @@ def test_raise_on_sep_with_delim_whitespace(all_parsers):
data = "a b c\n1 2 3"
parser = all_parsers

depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
with pytest.raises(ValueError, match="you can only specify one"):
parser.read_csv(StringIO(data), sep=r"\s", delim_whitespace=True)
with tm.assert_produces_warning(
FutureWarning, match=depr_msg, check_stacklevel=False
):
parser.read_csv(StringIO(data), sep=r"\s", delim_whitespace=True)


def test_read_filepath_or_buffer(all_parsers):
Expand All @@ -539,18 +551,27 @@ def test_single_char_leading_whitespace(all_parsers, delim_whitespace):
b\n"""

expected = DataFrame({"MyColumn": list("abab")})
depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"

if parser.engine == "pyarrow":
msg = "The 'skipinitialspace' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_csv(
StringIO(data), skipinitialspace=True, delim_whitespace=delim_whitespace
)
with tm.assert_produces_warning(
FutureWarning, match=depr_msg, check_stacklevel=False
):
parser.read_csv(
StringIO(data),
skipinitialspace=True,
delim_whitespace=delim_whitespace,
)
return

result = parser.read_csv(
StringIO(data), skipinitialspace=True, delim_whitespace=delim_whitespace
)
with tm.assert_produces_warning(
FutureWarning, match=depr_msg, check_stacklevel=False
):
result = parser.read_csv(
StringIO(data), skipinitialspace=True, delim_whitespace=delim_whitespace
)
tm.assert_frame_equal(result, expected)


Expand Down Expand Up @@ -798,12 +819,20 @@ def test_read_table_delim_whitespace_default_sep(all_parsers):
f = StringIO("a b c\n1 -2 -3\n4 5 6")
parser = all_parsers

depr_msg = "The 'delim_whitespace' keyword in pd.read_table is deprecated"

if parser.engine == "pyarrow":
msg = "The 'delim_whitespace' option is not supported with the 'pyarrow' engine"
with pytest.raises(ValueError, match=msg):
parser.read_table(f, delim_whitespace=True)
with tm.assert_produces_warning(
FutureWarning, match=depr_msg, check_stacklevel=False
):
parser.read_table(f, delim_whitespace=True)
return
result = parser.read_table(f, delim_whitespace=True)
with tm.assert_produces_warning(
FutureWarning, match=depr_msg, check_stacklevel=False
):
result = parser.read_table(f, delim_whitespace=True)
expected = DataFrame({"a": [1, 4], "b": [-2, 5], "c": [-3, 6]})
tm.assert_frame_equal(result, expected)

Expand All @@ -817,11 +846,15 @@ def test_read_csv_delim_whitespace_non_default_sep(all_parsers, delimiter):
"Specified a delimiter with both sep and "
"delim_whitespace=True; you can only specify one."
)
with pytest.raises(ValueError, match=msg):
parser.read_csv(f, delim_whitespace=True, sep=delimiter)
depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
with tm.assert_produces_warning(
FutureWarning, match=depr_msg, check_stacklevel=False
):
with pytest.raises(ValueError, match=msg):
parser.read_csv(f, delim_whitespace=True, sep=delimiter)

with pytest.raises(ValueError, match=msg):
parser.read_csv(f, delim_whitespace=True, delimiter=delimiter)
with pytest.raises(ValueError, match=msg):
parser.read_csv(f, delim_whitespace=True, delimiter=delimiter)


def test_read_csv_delimiter_and_sep_no_default(all_parsers):
Expand Down Expand Up @@ -858,11 +891,15 @@ def test_read_table_delim_whitespace_non_default_sep(all_parsers, delimiter):
"Specified a delimiter with both sep and "
"delim_whitespace=True; you can only specify one."
)
with pytest.raises(ValueError, match=msg):
parser.read_table(f, delim_whitespace=True, sep=delimiter)
depr_msg = "The 'delim_whitespace' keyword in pd.read_table is deprecated"
with tm.assert_produces_warning(
FutureWarning, match=depr_msg, check_stacklevel=False
):
with pytest.raises(ValueError, match=msg):
parser.read_table(f, delim_whitespace=True, sep=delimiter)

with pytest.raises(ValueError, match=msg):
parser.read_table(f, delim_whitespace=True, delimiter=delimiter)
with pytest.raises(ValueError, match=msg):
parser.read_table(f, delim_whitespace=True, delimiter=delimiter)


@skip_pyarrow
Expand Down
6 changes: 5 additions & 1 deletion pandas/tests/io/parser/test_c_parser_only.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,11 @@ def test_delim_whitespace_custom_terminator(c_parser_only):
data = "a b c~1 2 3~4 5 6~7 8 9"
parser = c_parser_only

df = parser.read_csv(StringIO(data), lineterminator="~", delim_whitespace=True)
depr_msg = "The 'delim_whitespace' keyword in pd.read_csv is deprecated"
with tm.assert_produces_warning(
FutureWarning, match=depr_msg, check_stacklevel=False
):
df = parser.read_csv(StringIO(data), lineterminator="~", delim_whitespace=True)
expected = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["a", "b", "c"])
tm.assert_frame_equal(df, expected)

Expand Down
Loading

0 comments on commit 534f4f0

Please sign in to comment.