Skip to content

Commit 89ba38e

Browse files
committed
NestedExtensionArray._pa_table and ._pa_struct_array
1 parent 0777547 commit 89ba38e

File tree

2 files changed

+54
-1
lines changed

2 files changed

+54
-1
lines changed

src/nested_pandas/series/ext_array.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False) -> Self: #
225225
pa_array = cls._box_pa_array(scalars, pa_type=pa_type)
226226
return cls(pa_array)
227227

228-
# Tricky to implement, but required by things like pd.read_csv
228+
# Tricky to implement but required by things like pd.read_csv
229229
@classmethod
230230
def _from_sequence_of_strings(cls, strings, *, dtype=None, copy: bool = False) -> Self: # type: ignore[name-defined] # noqa: F821
231231
return super()._from_sequence_of_strings(strings, dtype=dtype, copy=copy)
@@ -680,6 +680,29 @@ def _list_array(self) -> pa.ChunkedArray:
680680
list_chunks.append(transpose_struct_list_array(struct_chunk, validate=False))
681681
return pa.chunked_array(list_chunks)
682682

683+
@property
684+
def _struct_array(self) -> pa.ChunkedArray:
685+
"""Pyarrow chunked struct-list array representation
686+
687+
Returns
688+
-------
689+
pa.ChunkedArray
690+
Pyarrow chunked-array of struct-list arrays.
691+
"""
692+
return self._chunked_array
693+
694+
@property
695+
def _pa_table(self) -> pa.Table:
696+
"""Pyarrow table representation of the extension array.
697+
698+
Returns
699+
-------
700+
pa.Table
701+
Pyarrow table where each column is a list array corresponding
702+
to a field of the struct array.
703+
"""
704+
return pa.Table.from_struct_array(self._struct_array)
705+
683706
@classmethod
684707
def from_sequence(cls, scalars, *, dtype: NestedDtype | pd.ArrowDtype | pa.DataType = None) -> Self: # type: ignore[name-defined] # noqa: F821
685708
"""Construct a NestedExtensionArray from a sequence of items

tests/nested_pandas/series/test_ext_array.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1873,6 +1873,36 @@ def test___init___with_list_struct_array():
18731873
assert pa.array(ext_array) == struct_array
18741874

18751875

1876+
def test__struct_array():
1877+
"""Test ._struct_array property"""
1878+
struct_array = pa.StructArray.from_arrays(
1879+
arrays=[
1880+
pa.array([np.array([1.0, 2.0, 3.0]), np.array([1.0, 2.0, 1.0, 2.0])]),
1881+
pa.array([-np.array([4.0, 5.0, 6.0]), -np.array([3.0, 4.0, 5.0, 6.0])]),
1882+
],
1883+
names=["a", "b"],
1884+
)
1885+
ext_array = NestedExtensionArray(struct_array)
1886+
1887+
assert ext_array._struct_array.combine_chunks() == struct_array
1888+
1889+
1890+
def test__pa_table():
1891+
"""Test ._pa_table property"""
1892+
struct_array = pa.StructArray.from_arrays(
1893+
arrays=[
1894+
pa.array([np.array([1.0, 2.0, 3.0]), np.array([1.0, 2.0, 1.0, 2.0])]),
1895+
pa.array([-np.array([4.0, 5.0, 6.0]), -np.array([3.0, 4.0, 5.0, 6.0])]),
1896+
],
1897+
names=["a", "b"],
1898+
)
1899+
ext_array = NestedExtensionArray(struct_array)
1900+
1901+
assert ext_array._pa_table == pa.Table.from_arrays(
1902+
arrays=[struct_array.field("a"), struct_array.field("b")], names=["a", "b"]
1903+
)
1904+
1905+
18761906
def test__from_sequence_of_strings():
18771907
"""We do not support from_sequence_of_strings() which would apply things like pd.read_csv()"""
18781908
with pytest.raises(NotImplementedError):

0 commit comments

Comments
 (0)