Skip to content

Commit f1904ae

Browse files
CoW: add readonly flag to ExtensionArrays, return read-only EA/ndarray in .array/EA.to_numpy() (#61925)
1 parent ee3ade7 commit f1904ae

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+514
-56
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1359,6 +1359,10 @@ Other
13591359
- Fixed bug in the :meth:`Series.rank` with object dtype and extremely small float values (:issue:`62036`)
13601360
- Fixed bug where the :class:`DataFrame` constructor misclassified array-like objects with a ``.name`` attribute as :class:`Series` or :class:`Index` (:issue:`61443`)
13611361
- Fixed regression in :meth:`DataFrame.from_records` not initializing subclasses properly (:issue:`57008`)
1362+
- Accessing the underlying NumPy array of a DataFrame or Series will return a read-only
1363+
array if the array shares data with the original DataFrame or Series (:ref:`copy_on_write_read_only_na`).
1364+
This logic is expanded to accessing the underlying pandas ExtensionArray
1365+
through ``.array`` (or ``.values`` depending on the dtype) as well (:issue:`61925`).
13621366

13631367
.. ***DO NOT USE THIS SECTION***
13641368

pandas/_libs/ops.pyx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ def vec_compare(ndarray[object] left, ndarray[object] right, object op) -> ndarr
177177

178178
@cython.wraparound(False)
179179
@cython.boundscheck(False)
180-
def scalar_binop(object[:] values, object val, object op) -> ndarray:
180+
def scalar_binop(ndarray[object] values, object val, object op) -> ndarray:
181181
"""
182182
Apply the given binary operator `op` between each element of the array
183183
`values` and the scalar `val`.
@@ -214,7 +214,7 @@ def scalar_binop(object[:] values, object val, object op) -> ndarray:
214214

215215
@cython.wraparound(False)
216216
@cython.boundscheck(False)
217-
def vec_binop(object[:] left, object[:] right, object op) -> ndarray:
217+
def vec_binop(ndarray[object] left, ndarray[object] right, object op) -> ndarray:
218218
"""
219219
Apply the given binary operator `op` pointwise to the elements of
220220
arrays `left` and `right`.

pandas/core/arrays/_mixins.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,10 @@
5454
from pandas.core.array_algos.transforms import shift
5555
from pandas.core.arrays.base import ExtensionArray
5656
from pandas.core.construction import extract_array
57-
from pandas.core.indexers import check_array_indexer
57+
from pandas.core.indexers import (
58+
check_array_indexer,
59+
getitem_returns_view,
60+
)
5861
from pandas.core.sorting import nargminmax
5962

6063
if TYPE_CHECKING:
@@ -258,6 +261,9 @@ def shift(self, periods: int = 1, fill_value=None) -> Self:
258261
return self._from_backing_data(new_values)
259262

260263
def __setitem__(self, key, value) -> None:
264+
if self._readonly:
265+
raise ValueError("Cannot modify read-only array")
266+
261267
key = check_array_indexer(self, key)
262268
value = self._validate_setitem_value(value)
263269
self._ndarray[key] = value
@@ -283,7 +289,10 @@ def __getitem__(
283289
result = self._ndarray[key]
284290
if self.ndim == 1:
285291
return self._box_func(result)
286-
return self._from_backing_data(result)
292+
result = self._from_backing_data(result)
293+
if getitem_returns_view(self, key):
294+
result._readonly = self._readonly
295+
return result
287296

288297
# error: Incompatible types in assignment (expression has type "ExtensionArray",
289298
# variable has type "Union[int, slice, ndarray]")
@@ -294,6 +303,8 @@ def __getitem__(
294303
return self._box_func(result)
295304

296305
result = self._from_backing_data(result)
306+
if getitem_returns_view(self, key):
307+
result._readonly = self._readonly
297308
return result
298309

299310
def _pad_or_backfill(

pandas/core/arrays/arrow/array.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
from pandas.core.construction import extract_array
7979
from pandas.core.indexers import (
8080
check_array_indexer,
81+
getitem_returns_view,
8182
unpack_tuple_and_ellipses,
8283
validate_indices,
8384
)
@@ -790,7 +791,10 @@ def __getitem__(self, item: PositionalIndexer):
790791

791792
value = self._pa_array[item]
792793
if isinstance(value, pa.ChunkedArray):
793-
return self._from_pyarrow_array(value)
794+
result = self._from_pyarrow_array(value)
795+
if getitem_returns_view(self, item):
796+
result._readonly = self._readonly
797+
return result
794798
else:
795799
pa_type = self._pa_array.type
796800
scalar = value.as_py()
@@ -2196,6 +2200,9 @@ def __setitem__(self, key, value) -> None:
21962200
-------
21972201
None
21982202
"""
2203+
if self._readonly:
2204+
raise ValueError("Cannot modify read-only array")
2205+
21992206
# GH50085: unwrap 1D indexers
22002207
if isinstance(key, tuple) and len(key) == 1:
22012208
key = key[0]

pandas/core/arrays/base.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
validate_insert_loc,
3838
)
3939

40+
from pandas.core.dtypes.astype import astype_is_view
4041
from pandas.core.dtypes.common import (
4142
is_list_like,
4243
is_scalar,
@@ -268,6 +269,8 @@ class ExtensionArray:
268269
# strictly less than 2000 to be below Index.__pandas_priority__.
269270
__pandas_priority__ = 1000
270271

272+
_readonly = False
273+
271274
# ------------------------------------------------------------------------
272275
# Constructors
273276
# ------------------------------------------------------------------------
@@ -454,6 +457,11 @@ def __setitem__(self, key, value) -> None:
454457
Returns
455458
-------
456459
None
460+
461+
Raises
462+
------
463+
ValueError
464+
If the array is readonly and modification is attempted.
457465
"""
458466
# Some notes to the ExtensionArray implementer who may have ended up
459467
# here. While this method is not required for the interface, if you
@@ -473,6 +481,10 @@ def __setitem__(self, key, value) -> None:
473481
# __init__ method coerces that value, then so should __setitem__
474482
# Note, also, that Series/DataFrame.where internally use __setitem__
475483
# on a copy of the data.
484+
# Check if the array is readonly
485+
if self._readonly:
486+
raise ValueError("Cannot modify read-only array")
487+
476488
raise NotImplementedError(f"{type(self)} does not implement __setitem__.")
477489

478490
def __len__(self) -> int:
@@ -567,8 +579,14 @@ def to_numpy(
567579
result = np.asarray(self, dtype=dtype)
568580
if copy or na_value is not lib.no_default:
569581
result = result.copy()
582+
elif self._readonly and astype_is_view(self.dtype, result.dtype):
583+
# If the ExtensionArray is readonly, make the numpy array readonly too
584+
result = result.view()
585+
result.flags.writeable = False
586+
570587
if na_value is not lib.no_default:
571588
result[self.isna()] = na_value # type: ignore[index]
589+
572590
return result
573591

574592
# ------------------------------------------------------------------------

pandas/core/arrays/datetimelike.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,12 @@ def __array__(
368368

369369
if copy is True:
370370
return np.array(self._ndarray, dtype=dtype)
371-
return self._ndarray
371+
372+
result = self._ndarray
373+
if self._readonly:
374+
result = result.view()
375+
result.flags.writeable = False
376+
return result
372377

373378
@overload
374379
def __getitem__(self, key: ScalarIndexer) -> DTScalarOrNaT: ...

pandas/core/arrays/interval.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,10 @@
8787
ensure_wrapped_if_datetimelike,
8888
extract_array,
8989
)
90-
from pandas.core.indexers import check_array_indexer
90+
from pandas.core.indexers import (
91+
check_array_indexer,
92+
getitem_returns_view,
93+
)
9194
from pandas.core.ops import (
9295
invalid_comparison,
9396
unpack_zerodim_and_defer,
@@ -842,9 +845,15 @@ def __getitem__(self, key: PositionalIndexer) -> Self | IntervalOrNA:
842845
# "Union[Period, Timestamp, Timedelta, NaTType, DatetimeArray, TimedeltaArray,
843846
# ndarray[Any, Any]]"; expected "Union[Union[DatetimeArray, TimedeltaArray],
844847
# ndarray[Any, Any]]"
845-
return self._simple_new(left, right, dtype=self.dtype) # type: ignore[arg-type]
848+
result = self._simple_new(left, right, dtype=self.dtype) # type: ignore[arg-type]
849+
if getitem_returns_view(self, key):
850+
result._readonly = self._readonly
851+
return result
846852

847853
def __setitem__(self, key, value) -> None:
854+
if self._readonly:
855+
raise ValueError("Cannot modify read-only array")
856+
848857
value_left, value_right = self._validate_setitem_value(value)
849858
key = check_array_indexer(self, key)
850859

pandas/core/arrays/masked.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
)
2727
from pandas.errors import AbstractMethodError
2828

29+
from pandas.core.dtypes.astype import astype_is_view
2930
from pandas.core.dtypes.base import ExtensionDtype
3031
from pandas.core.dtypes.cast import maybe_downcast_to_dtype
3132
from pandas.core.dtypes.common import (
@@ -75,7 +76,10 @@
7576
ensure_wrapped_if_datetimelike,
7677
extract_array,
7778
)
78-
from pandas.core.indexers import check_array_indexer
79+
from pandas.core.indexers import (
80+
check_array_indexer,
81+
getitem_returns_view,
82+
)
7983
from pandas.core.ops import invalid_comparison
8084
from pandas.core.util.hashing import hash_array
8185

@@ -212,7 +216,10 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any:
212216
return self.dtype.na_value
213217
return self._data[item]
214218

215-
return self._simple_new(self._data[item], newmask)
219+
result = self._simple_new(self._data[item], newmask)
220+
if getitem_returns_view(self, item):
221+
result._readonly = self._readonly
222+
return result
216223

217224
def _pad_or_backfill(
218225
self,
@@ -354,6 +361,9 @@ def _validate_setitem_value(self, value):
354361
raise TypeError(f"Invalid value '{value!s}' for dtype '{self.dtype}'")
355362

356363
def __setitem__(self, key, value) -> None:
364+
if self._readonly:
365+
raise ValueError("Cannot modify read-only array")
366+
357367
key = check_array_indexer(self, key)
358368

359369
if is_scalar(value):
@@ -566,11 +576,11 @@ def to_numpy(
566576
hasna = self._hasna
567577
dtype, na_value = to_numpy_dtype_inference(self, dtype, na_value, hasna)
568578
if dtype is None:
569-
dtype = object
579+
dtype = np.dtype(object)
570580

571581
if hasna:
572582
if (
573-
dtype != object
583+
dtype != np.dtype(object)
574584
and not is_string_dtype(dtype)
575585
and na_value is libmissing.NA
576586
):
@@ -588,6 +598,9 @@ def to_numpy(
588598
with warnings.catch_warnings():
589599
warnings.filterwarnings("ignore", category=RuntimeWarning)
590600
data = self._data.astype(dtype, copy=copy)
601+
if self._readonly and not copy and astype_is_view(self.dtype, dtype):
602+
data = data.view()
603+
data.flags.writeable = False
591604
return data
592605

593606
def tolist(self) -> list:
@@ -686,7 +699,12 @@ def __array__(
686699
if copy is False:
687700
if not self._hasna:
688701
# special case, here we can simply return the underlying data
689-
return np.array(self._data, dtype=dtype, copy=copy)
702+
result = np.array(self._data, dtype=dtype, copy=copy)
703+
# If the ExtensionArray is readonly, make the numpy array readonly too
704+
if self._readonly:
705+
result = result.view()
706+
result.flags.writeable = False
707+
return result
690708
raise ValueError(
691709
"Unable to avoid copy while creating an array as requested."
692710
)

pandas/core/arrays/numpy_.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,10 @@
1414
from pandas._libs.tslibs import is_supported_dtype
1515
from pandas.compat.numpy import function as nv
1616

17-
from pandas.core.dtypes.astype import astype_array
17+
from pandas.core.dtypes.astype import (
18+
astype_array,
19+
astype_is_view,
20+
)
1821
from pandas.core.dtypes.cast import (
1922
construct_1d_object_array_from_listlike,
2023
maybe_downcast_to_dtype,
@@ -179,12 +182,23 @@ def dtype(self) -> NumpyEADtype:
179182
# NumPy Array Interface
180183

181184
def __array__(
182-
self, dtype: NpDtype | None = None, copy: bool | None = None
185+
self, dtype: np.dtype | None = None, copy: bool | None = None
183186
) -> np.ndarray:
184187
if copy is not None:
185188
# Note: branch avoids `copy=None` for NumPy 1.x support
186-
return np.array(self._ndarray, dtype=dtype, copy=copy)
187-
return np.asarray(self._ndarray, dtype=dtype)
189+
result = np.array(self._ndarray, dtype=dtype, copy=copy)
190+
else:
191+
result = np.asarray(self._ndarray, dtype=dtype)
192+
193+
if (
194+
self._readonly
195+
and not copy
196+
and (dtype is None or astype_is_view(self.dtype, dtype))
197+
):
198+
result = result.view()
199+
result.flags.writeable = False
200+
201+
return result
188202

189203
def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
190204
# Lightly modified version of
@@ -545,6 +559,9 @@ def to_numpy(
545559
result[mask] = na_value
546560
else:
547561
result = self._ndarray
562+
if not copy and self._readonly:
563+
result = result.view()
564+
result.flags.writeable = False
548565

549566
result = np.asarray(result, dtype=dtype)
550567

pandas/core/arrays/period.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,11 @@ def __array__(
397397
# For NumPy 1.x compatibility we cannot use copy=None. And
398398
# `copy=False` has the meaning of `copy=None` here:
399399
if not copy:
400-
return np.asarray(self.asi8, dtype=dtype)
400+
result = np.asarray(self.asi8, dtype=dtype)
401+
if self._readonly:
402+
result = result.view()
403+
result.flags.writeable = False
404+
return result
401405
else:
402406
return np.array(self.asi8, dtype=dtype)
403407

0 commit comments

Comments
 (0)