From 9e6faeeaa18d54a09cf115bcaab002f2e5267a4f Mon Sep 17 00:00:00 2001
From: Konstantin Malanchev <hombit@gmail.com>
Date: Wed, 5 Mar 2025 19:50:18 -0500
Subject: [PATCH 1/6] Allow NestedDtype initialization from pd.ArrowDtype

---
 docs/tutorials/low_level.ipynb           | 11 +++-----
 src/nested_pandas/series/dtype.py        | 34 ++++++++----------------
 tests/nested_pandas/series/test_dtype.py |  7 +++--
 3 files changed, 19 insertions(+), 33 deletions(-)

diff --git a/docs/tutorials/low_level.ipynb b/docs/tutorials/low_level.ipynb
index e0004a56..3ba68762 100644
--- a/docs/tutorials/low_level.ipynb
+++ b/docs/tutorials/low_level.ipynb
@@ -340,16 +340,11 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "422e719861ae40f6",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2025-03-05T20:34:52.352751Z",
-     "start_time": "2025-03-05T20:34:52.350143Z"
-    }
-   },
+   "id": "da7788cc04b78a2a",
+   "metadata": {},
    "outputs": [],
    "source": [
-    "nested_series.equals(pd.Series(struct_series, dtype=NestedDtype.from_pandas_arrow_dtype(struct_series.dtype)))"
+    "nested_series.equals(pd.Series(struct_series, dtype=NestedDtype(struct_series.dtype)))"
    ]
   },
   {
diff --git a/src/nested_pandas/series/dtype.py b/src/nested_pandas/series/dtype.py
index a798235e..2d1fea4e 100644
--- a/src/nested_pandas/series/dtype.py
+++ b/src/nested_pandas/series/dtype.py
@@ -20,7 +20,14 @@
 
 @register_extension_dtype
 class NestedDtype(ExtensionDtype):
-    """Data type to handle packed time series data"""
+    """Data type to handle packed time series data
+
+    Parameters
+    ----------
+    pyarrow_dtype : pyarrow.StructType or pd.ArrowDtype
+        The pyarrow data type to use for the nested type. It must be a struct
+        type where all fields are list types.
+    """
 
     # ExtensionDtype overrides #
 
@@ -135,7 +142,9 @@ def __from_arrow__(self, array: pa.Array | pa.ChunkedArray) -> ExtensionArray:
 
     pyarrow_dtype: pa.StructType
 
-    def __init__(self, pyarrow_dtype: pa.DataType) -> None:
+    def __init__(self, pyarrow_dtype: pa.DataType | pd.ArrowDtype) -> None:
+        if isinstance(pyarrow_dtype, pd.ArrowDtype):
+            pyarrow_dtype = pyarrow_dtype.pyarrow_dtype
         self.pyarrow_dtype = self._validate_dtype(pyarrow_dtype)
 
     @classmethod
@@ -193,27 +202,6 @@ def field_names(self) -> list[str]:
         """The list of field names of the nested type"""
         return [field.name for field in self.pyarrow_dtype]
 
-    @classmethod
-    def from_pandas_arrow_dtype(cls, pandas_arrow_dtype: ArrowDtype):
-        """Construct NestedDtype from a pandas.ArrowDtype.
-
-        Parameters
-        ----------
-        pandas_arrow_dtype : ArrowDtype
-            The pandas.ArrowDtype to construct NestedDtype from.
-
-        Returns
-        -------
-        NestedDtype
-            The constructed NestedDtype.
-
-        Raises
-        ------
-        ValueError
-            If the given dtype is not a valid nested type.
-        """
-        return cls(pyarrow_dtype=pandas_arrow_dtype.pyarrow_dtype)
-
     def to_pandas_arrow_dtype(self) -> ArrowDtype:
         """Convert NestedDtype to a pandas.ArrowDtype.
 
diff --git a/tests/nested_pandas/series/test_dtype.py b/tests/nested_pandas/series/test_dtype.py
index 2b6b3b2b..07369534 100644
--- a/tests/nested_pandas/series/test_dtype.py
+++ b/tests/nested_pandas/series/test_dtype.py
@@ -20,8 +20,9 @@
 )
 def test_from_pyarrow_dtype(pyarrow_dtype):
     """Test that we can construct NestedDtype from pyarrow struct type."""
-    dtype = NestedDtype(pyarrow_dtype)
-    assert dtype.pyarrow_dtype == pyarrow_dtype
+    dtype1 = NestedDtype(pyarrow_dtype)
+    dtype2 = NestedDtype(pd.ArrowDtype(pyarrow_dtype))
+    assert dtype1.pyarrow_dtype == dtype2.pyarrow_dtype == pyarrow_dtype
 
 
 @pytest.mark.parametrize(
@@ -39,6 +40,8 @@ def test_from_pyarrow_dtype_raises(pyarrow_dtype):
     """Test that we raise an error when constructing NestedDtype from invalid pyarrow type."""
     with pytest.raises(ValueError):
         NestedDtype(pyarrow_dtype)
+    with pytest.raises(ValueError):
+        NestedDtype(pd.ArrowDtype(pyarrow_dtype))
 
 
 def test_to_pandas_arrow_dtype():

From 105af32aa9cc4ffd13b00479796a7188111db503 Mon Sep 17 00:00:00 2001
From: Konstantin Malanchev <hombit@gmail.com>
Date: Wed, 5 Mar 2025 20:55:16 -0500
Subject: [PATCH 2/6] NestedDtype.inner_dtypes

---
 src/nested_pandas/series/dtype.py        | 97 ++++++++++++++++++++----
 tests/nested_pandas/series/test_dtype.py | 43 +++++++++--
 2 files changed, 122 insertions(+), 18 deletions(-)

diff --git a/src/nested_pandas/series/dtype.py b/src/nested_pandas/series/dtype.py
index 2d1fea4e..f8590085 100644
--- a/src/nested_pandas/series/dtype.py
+++ b/src/nested_pandas/series/dtype.py
@@ -27,11 +27,27 @@ class NestedDtype(ExtensionDtype):
     pyarrow_dtype : pyarrow.StructType or pd.ArrowDtype
         The pyarrow data type to use for the nested type. It must be a struct
         type where all fields are list types.
+    inner_dtypes : Mapping[str, object] or None, default None
+        A mapping of field names and their inner types. This will be used to:
+        1. Cast to the correct types when getting flat representations
+           of the nested fields.
+        2. To handle information of the double-nested fields, you should use
+           this NestedDtype for the inner types in this case.
+        Dtypes must be pandas-recognisable types, such as Python native types,
+        numpy dtypes or extension array dtypes. Please wrap pyarrow types with
+        pd.ArrowDtype.
+        We trust these dtypes and make no attempt to validate them when
+        casting.
+        If None, all inner types are assumed to be the same as the
+        corresponding list element types.
     """
 
     # ExtensionDtype overrides #
 
-    _metadata = ("pyarrow_dtype",)
+    _metadata = (
+        "pyarrow_dtype",
+        "inner_dtypes",
+    )
     """Attributes to use as metadata for __eq__ and __hash__"""
 
     @property
@@ -45,7 +61,12 @@ def na_value(self) -> Type[pd.NA]:
     @property
     def name(self) -> str:
         """The string representation of the nested type"""
-        fields = ", ".join([f"{field.name}: [{field.type.value_type!s}]" for field in self.pyarrow_dtype])
+        # Replace pd.ArrowDtype with pa.DataType, because it has nicer __str__
+        nice_dtypes = {
+            field: dtype.pyarrow_dtype if isinstance(dtype, pd.ArrowDtype) else dtype
+            for field, dtype in self.fields.items()
+        }
+        fields = ", ".join([f"{field}: [{dtype!s}]" for field, dtype in nice_dtypes.items()])
         return f"nested<{fields}>"
 
     @classmethod
@@ -141,21 +162,26 @@ def __from_arrow__(self, array: pa.Array | pa.ChunkedArray) -> ExtensionArray:
     # Additional methods and attributes #
 
     pyarrow_dtype: pa.StructType
+    inner_dtypes: dict[str, object]
 
-    def __init__(self, pyarrow_dtype: pa.DataType | pd.ArrowDtype) -> None:
+    def __init__(
+        self, pyarrow_dtype: pa.DataType | pd.ArrowDtype, inner_dtypes: Mapping[str, object] | None = None
+    ) -> None:
         if isinstance(pyarrow_dtype, pd.ArrowDtype):
             pyarrow_dtype = pyarrow_dtype.pyarrow_dtype
         self.pyarrow_dtype = self._validate_dtype(pyarrow_dtype)
+        self.inner_dtypes = self._validate_inner_dtypes(self.pyarrow_dtype, inner_dtypes)
 
     @classmethod
-    def from_fields(cls, fields: Mapping[str, pa.DataType]) -> Self:  # type: ignore[name-defined] # noqa: F821
+    def from_fields(cls, fields: Mapping[str, pa.DataType | Self]) -> Self:  # type: ignore[name-defined] # noqa: F821
         """Make NestedDtype from a mapping of field names and list item types.
 
         Parameters
         ----------
-        fields : Mapping[str, pa.DataType]
-            A mapping of field names and their item types. Since all fields are lists, the item types are
-            inner types of the lists, not the list types themselves.
+        fields : Mapping[str, pa.DataType | NestedDtype]
+            A mapping of field names and their item types. Since all fields are
+            lists, the item types are inner types of the lists, not the list
+            types themselves.
 
         Returns
         -------
@@ -172,9 +198,15 @@ def from_fields(cls, fields: Mapping[str, pa.DataType]) -> Self:  # type: ignore
         ...     == pa.struct({"a": pa.list_(pa.float64()), "b": pa.list_(pa.int64())})
         ... )
         """
-        pyarrow_dtype = pa.struct({field: pa.list_(pa_type) for field, pa_type in fields.items()})
-        pyarrow_dtype = cast(pa.StructType, pyarrow_dtype)
-        return cls(pyarrow_dtype=pyarrow_dtype)
+        pa_fields = {}
+        inner_dtypes = {}
+        for field, dtype in fields.items():
+            if isinstance(dtype, NestedDtype):
+                inner_dtypes[field] = dtype
+                dtype = dtype.pyarrow_dtype
+            pa_fields[field] = dtype
+        pyarrow_dtype = pa.struct({field: pa.list_(pa_type) for field, pa_type in pa_fields.items()})
+        return cls(pyarrow_dtype=pyarrow_dtype, inner_dtypes=inner_dtypes or None)
 
     @staticmethod
     def _validate_dtype(pyarrow_dtype: pa.DataType) -> pa.StructType:
@@ -192,10 +224,49 @@ def _validate_dtype(pyarrow_dtype: pa.DataType) -> pa.StructType:
                 )
         return pyarrow_dtype
 
+    @staticmethod
+    def _validate_inner_dtypes(
+        pyarrow_dtype: pa.StructType, inner_dtypes: Mapping[str, object] | None
+    ) -> dict[str, object]:
+        # Short circuit if there are no inner dtypes
+        if inner_dtypes is None:
+            return {}
+
+        inner_dtypes = dict(inner_dtypes)
+
+        for field_name, inner_dtype in inner_dtypes.items():
+            if field_name not in pyarrow_dtype.names:
+                raise ValueError(f"Field '{field_name}' not found in the pyarrow struct type.")
+            element_type = pyarrow_dtype[field_name].type.value_type
+            test_series = pd.Series([], dtype=pd.ArrowDtype(element_type))
+            try:
+                _ = test_series.astype(inner_dtype)
+            except TypeError as e:
+                raise TypeError(
+                    f"Could not cast the inner dtype '{inner_dtype}' for field '{field_name}' to the"
+                    f" corresponding element type '{element_type}'. {e}"
+                ) from e
+        return inner_dtypes
+
+    def inner_dtype(self, field: str) -> object:
+        """Get the inner dtype for a field.
+
+        Parameters
+        ----------
+        field : str
+            The field name.
+
+        Returns
+        -------
+        object
+            The inner dtype for the field.
+        """
+        return self.inner_dtypes.get(field, pd.ArrowDtype(self.pyarrow_dtype[field].type.value_type))
+
     @property
-    def fields(self) -> dict[str, pa.DataType]:
-        """The mapping of field names and their item types."""
-        return {field.name: field.type.value_type for field in self.pyarrow_dtype}
+    def fields(self) -> dict[str, object]:
+        """The mapping of field names and pandas dtypes of their items"""
+        return {field.name: self.inner_dtype(field.name) for field in self.pyarrow_dtype}
 
     @property
     def field_names(self) -> list[str]:
diff --git a/tests/nested_pandas/series/test_dtype.py b/tests/nested_pandas/series/test_dtype.py
index 07369534..a463d0e2 100644
--- a/tests/nested_pandas/series/test_dtype.py
+++ b/tests/nested_pandas/series/test_dtype.py
@@ -54,12 +54,25 @@ def test_to_pandas_arrow_dtype():
 
 def test_from_fields():
     """Test NestedDtype.from_fields()."""
-    fields = {"a": pa.int64(), "b": pa.float64()}
-    dtype = NestedDtype.from_fields(fields)
-    assert dtype.pyarrow_dtype == pa.struct(
+    fields1 = {"a": pa.int64(), "b": pa.float64()}
+    dtype1 = NestedDtype.from_fields(fields1)
+    assert dtype1.pyarrow_dtype == pa.struct(
         [pa.field("a", pa.list_(pa.int64())), pa.field("b", pa.list_(pa.float64()))]
     )
 
+    fields2 = {"x": pa.string(), "y": pa.bool_(), "nested": dtype1}
+    dtype2 = NestedDtype.from_fields(fields2)
+    assert dtype2 == NestedDtype(
+        pa.struct(
+            [
+                pa.field("x", pa.list_(pa.string())),
+                pa.field("y", pa.list_(pa.bool_())),
+                pa.field("nested", pa.list_(dtype1.pyarrow_dtype)),
+            ]
+        ),
+        inner_dtypes={"nested": dtype1},
+    )
+
 
 def test_na_value():
     """Test that NestedDtype.na_value is a singleton instance of NAType."""
@@ -69,10 +82,30 @@ def test_na_value():
 
 def test_fields():
     """Test NestedDtype.fields property"""
-    dtype = NestedDtype(
+    dtype1 = NestedDtype(
         pa.struct([pa.field("a", pa.list_(pa.int64())), pa.field("b", pa.list_(pa.float64()))])
     )
-    assert dtype.fields == {"a": pa.int64(), "b": pa.float64()}
+    assert dtype1.fields == {"a": pd.ArrowDtype(pa.int64()), "b": pd.ArrowDtype(pa.float64())}
+
+    dtype2 = NestedDtype(
+        pa.struct(
+            [
+                pa.field("x", pa.list_(pa.float64())),
+                pa.field("y", pa.list_(pa.string())),
+                pa.field("nested", pa.list_(dtype1.pyarrow_dtype)),
+            ]
+        ),
+        inner_dtypes={"x": pd.Float64Dtype(), "nested": dtype1},
+    )
+    assert dtype2.fields == {"x": pd.Float64Dtype(), "y": pd.ArrowDtype(pa.string()), "nested": dtype1}
+
+    # field name missmatch
+    with pytest.raises(ValueError):
+        NestedDtype(pa.struct([pa.field("a", pa.list_(pa.int64()))]), inner_dtypes={"xyz": pa.int64()})
+
+    # element type is not compatible with inner dtype
+    with pytest.raises(TypeError):
+        NestedDtype(pa.struct([pa.field("a", pa.list_(pa.int64()))]), inner_dtypes={"a": pa.bool_()})
 
 
 def test_field_names():

From c7cd6d9a40ad22b73d1d7b640c44bf1255fe2082 Mon Sep 17 00:00:00 2001
From: Konstantin Malanchev <hombit@gmail.com>
Date: Wed, 5 Mar 2025 21:41:59 -0500
Subject: [PATCH 3/6] .nest.to_flat to respect inner_dtypes

---
 src/nested_pandas/series/accessor.py        |  4 ++--
 src/nested_pandas/series/ext_array.py       | 17 +++++++++-----
 tests/nested_pandas/series/test_accessor.py | 25 +++++++++------------
 3 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/src/nested_pandas/series/accessor.py b/src/nested_pandas/series/accessor.py
index 2ae9afc8..75c464c1 100644
--- a/src/nested_pandas/series/accessor.py
+++ b/src/nested_pandas/series/accessor.py
@@ -109,7 +109,7 @@ def to_flat(self, fields: list[str] | None = None) -> pd.DataFrame:
                 index=pd.Series(index, name=self._series.index.name),
                 name=field,
                 copy=False,
-                dtype=pd.ArrowDtype(chunked_array.type),
+                dtype=self._series.dtype.inner_dtype(field),
             )
 
         return pd.DataFrame(flat_series)
@@ -292,7 +292,7 @@ def get_flat_series(self, field: str) -> pd.Series:
 
         return pd.Series(
             flat_chunked_array,
-            dtype=pd.ArrowDtype(flat_chunked_array.type),
+            dtype=self._series.dtype.inner_dtype(field),
             index=self.get_flat_index(),
             name=field,
             copy=False,
diff --git a/src/nested_pandas/series/ext_array.py b/src/nested_pandas/series/ext_array.py
index 43735dbb..ba9c109f 100644
--- a/src/nested_pandas/series/ext_array.py
+++ b/src/nested_pandas/series/ext_array.py
@@ -35,7 +35,7 @@
 # typing.Self and "|" union syntax don't exist in Python 3.9
 from __future__ import annotations
 
-from collections.abc import Generator, Iterable, Iterator, Sequence
+from collections.abc import Generator, Iterable, Iterator, Mapping, Sequence
 from typing import Any, Callable, cast
 
 import numpy as np
@@ -212,7 +212,7 @@ def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False) -> Self:  #
         Parameters
         ----------
         scalars : Sequence
-            The sequence of scalars: disctionaries, DataFrames, None, pd.NA, pa.Array or anything convertible
+            The sequence of scalars: dictionaries, DataFrames, None, pd.NA, pa.Array or anything convertible
             to PyArrow scalars.
         dtype : dtype or None
             dtype of the resulting array
@@ -223,7 +223,8 @@ def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False) -> Self:  #
 
         pa_type = to_pyarrow_dtype(dtype)
         pa_array = cls._box_pa_array(scalars, pa_type=pa_type)
-        return cls(pa_array)
+        inner_dtypes = dtype.inner_dtypes if isinstance(dtype, NestedDtype) else None
+        return cls(pa_array, inner_dtypes=inner_dtypes)
 
     # Tricky to implement, but required by things like pd.read_csv
     @classmethod
@@ -655,7 +656,13 @@ def _convert_struct_scalar_to_df(cls, value: pa.StructScalar, *, copy: bool, na_
     _chunked_array: pa.ChunkedArray
     _dtype: NestedDtype
 
-    def __init__(self, values: pa.Array | pa.ChunkedArray, *, validate: bool = True) -> None:
+    def __init__(
+        self,
+        values: pa.Array | pa.ChunkedArray,
+        *,
+        inner_dtypes: Mapping[str, object] | None = None,
+        validate: bool = True,
+    ) -> None:
         if isinstance(values, pa.Array):
             values = pa.chunked_array([values])
 
@@ -670,7 +677,7 @@ def __init__(self, values: pa.Array | pa.ChunkedArray, *, validate: bool = True)
             self._validate(values)
 
         self._chunked_array = values
-        self._dtype = NestedDtype(values.type)
+        self._dtype = NestedDtype(values.type, inner_dtypes=inner_dtypes)
 
     @property
     def _list_array(self) -> pa.ChunkedArray:
diff --git a/tests/nested_pandas/series/test_accessor.py b/tests/nested_pandas/series/test_accessor.py
index 37b385a8..17cdcb10 100644
--- a/tests/nested_pandas/series/test_accessor.py
+++ b/tests/nested_pandas/series/test_accessor.py
@@ -148,14 +148,16 @@ def test_to_flat():
     """Test that the .nest.to_flat() method works."""
     struct_array = pa.StructArray.from_arrays(
         arrays=[
-            pa.array([np.array([1.0, 2.0, 3.0]), np.array([1.0, 2.0, 1.0])]),
-            pa.array([-np.array([4.0, 5.0, 6.0]), -np.array([3.0, 4.0, 5.0])]),
+            pa.array([np.array([1.0, 2.0, 3.0]), np.array([1.0, 2.0]), np.array([1.0])]),
+            pa.array([-np.array([4.0, 5.0, 6.0]), -np.array([3.0, 4.0]), [None]]),
         ],
         names=["a", "b"],
     )
 
     series = pd.Series(
-        struct_array, dtype=NestedDtype(struct_array.type), index=pd.Series([0, 1], name="idx")
+        struct_array,
+        dtype=NestedDtype(struct_array.type, inner_dtypes={"b": pd.Float64Dtype()}),
+        index=pd.Series([0, 1, 2], name="idx"),
     )
 
     flat = series.nest.to_flat()
@@ -164,28 +166,23 @@ def test_to_flat():
         data={
             "a": pd.Series(
                 data=[1.0, 2.0, 3.0, 1.0, 2.0, 1.0],
-                index=[0, 0, 0, 1, 1, 1],
+                index=[0, 0, 0, 1, 1, 2],
                 name="a",
                 copy=False,
                 dtype=pd.ArrowDtype(pa.float64()),
             ),
             "b": pd.Series(
-                data=[-4.0, -5.0, -6.0, -3.0, -4.0, -5.0],
-                index=[0, 0, 0, 1, 1, 1],
+                data=[-4.0, -5.0, -6.0, -3.0, -4.0, None],
+                index=[0, 0, 0, 1, 1, 2],
                 name="b",
                 copy=False,
-                dtype=pd.ArrowDtype(pa.float64()),
+                dtype=pd.Float64Dtype(),
             ),
         },
-        index=pd.Index([0, 0, 0, 1, 1, 1], name="idx"),
+        index=pd.Index([0, 0, 0, 1, 1, 2], name="idx"),
     )
 
-    assert_array_equal(flat.dtypes, desired.dtypes)
-    assert_array_equal(flat.index, desired.index)
-    assert flat.index.name == desired.index.name
-
-    for column in flat.columns:
-        assert_array_equal(flat[column], desired[column])
+    assert_frame_equal(flat, desired)
 
 
 def test_to_flat_for_chunked_array():

From 7b3d22ae3d6355c97799cb617ce8db6a6b9cad0b Mon Sep 17 00:00:00 2001
From: Konstantin Malanchev <hombit@gmail.com>
Date: Wed, 12 Mar 2025 16:49:30 -0400
Subject: [PATCH 4/6] Allow ArrowDtype in NestedDtype.from_fields

---
 src/nested_pandas/series/dtype.py        | 6 ++++--
 tests/nested_pandas/series/test_dtype.py | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/nested_pandas/series/dtype.py b/src/nested_pandas/series/dtype.py
index f8590085..5e95f118 100644
--- a/src/nested_pandas/series/dtype.py
+++ b/src/nested_pandas/series/dtype.py
@@ -173,7 +173,7 @@ def __init__(
         self.inner_dtypes = self._validate_inner_dtypes(self.pyarrow_dtype, inner_dtypes)
 
     @classmethod
-    def from_fields(cls, fields: Mapping[str, pa.DataType | Self]) -> Self:  # type: ignore[name-defined] # noqa: F821
+    def from_fields(cls, fields: Mapping[str, pa.DataType | pa.ArrowDtype | Self]) -> Self:  # type: ignore[name-defined] # noqa: F821
         """Make NestedDtype from a mapping of field names and list item types.
 
         Parameters
@@ -204,6 +204,8 @@ def from_fields(cls, fields: Mapping[str, pa.DataType | Self]) -> Self:  # type:
             if isinstance(dtype, NestedDtype):
                 inner_dtypes[field] = dtype
                 dtype = dtype.pyarrow_dtype
+            elif isinstance(dtype, pd.ArrowDtype):
+                dtype = dtype.pyarrow_dtype
             pa_fields[field] = dtype
         pyarrow_dtype = pa.struct({field: pa.list_(pa_type) for field, pa_type in pa_fields.items()})
         return cls(pyarrow_dtype=pyarrow_dtype, inner_dtypes=inner_dtypes or None)
@@ -229,7 +231,7 @@ def _validate_inner_dtypes(
         pyarrow_dtype: pa.StructType, inner_dtypes: Mapping[str, object] | None
     ) -> dict[str, object]:
         # Short circuit if there are no inner dtypes
-        if inner_dtypes is None:
+        if inner_dtypes is None or len(inner_dtypes) == 0:
             return {}
 
         inner_dtypes = dict(inner_dtypes)
diff --git a/tests/nested_pandas/series/test_dtype.py b/tests/nested_pandas/series/test_dtype.py
index a463d0e2..1fde0bc2 100644
--- a/tests/nested_pandas/series/test_dtype.py
+++ b/tests/nested_pandas/series/test_dtype.py
@@ -54,7 +54,7 @@ def test_to_pandas_arrow_dtype():
 
 def test_from_fields():
     """Test NestedDtype.from_fields()."""
-    fields1 = {"a": pa.int64(), "b": pa.float64()}
+    fields1 = {"a": pa.int64(), "b": pd.ArrowDtype(pa.float64())}
     dtype1 = NestedDtype.from_fields(fields1)
     assert dtype1.pyarrow_dtype == pa.struct(
         [pa.field("a", pa.list_(pa.int64())), pa.field("b", pa.list_(pa.float64()))]

From c9f0c29bc75b839dc57d5ead022aa4a4d1c995de Mon Sep 17 00:00:00 2001
From: Konstantin Malanchev <hombit@gmail.com>
Date: Sat, 5 Apr 2025 16:15:47 -0400
Subject: [PATCH 5/6] Handle and derive inner_dtypes

---
 src/nested_pandas/series/ext_array.py        | 131 ++++++++++++++++---
 src/nested_pandas/series/packer.py           |  27 +++-
 tests/nested_pandas/series/test_ext_array.py |  58 ++++++--
 tests/nested_pandas/series/test_packer.py    |  14 ++
 4 files changed, 195 insertions(+), 35 deletions(-)

diff --git a/src/nested_pandas/series/ext_array.py b/src/nested_pandas/series/ext_array.py
index ba9c109f..e5318abb 100644
--- a/src/nested_pandas/series/ext_array.py
+++ b/src/nested_pandas/series/ext_array.py
@@ -179,9 +179,32 @@ def replace_with_mask(array: pa.ChunkedArray, mask: pa.BooleanArray, value: pa.A
     return pa.compute.if_else(mask, broadcast_value, array)
 
 
-def convert_df_to_pa_scalar(df: pd.DataFrame, *, pa_type: pa.DataType | None) -> pa.Scalar:
-    d = {column: series.values for column, series in df.to_dict("series").items()}
-    return pa.scalar(d, type=pa_type, from_pandas=True)
+def convert_df_to_pa_scalar(
+    df: pd.DataFrame, *, pa_type: pa.DataType | None
+) -> tuple[pa.StructScalar, dict[str, NestedDtype]]:
+    """Convert a pandas DataFrame to a PyArrow StructScalar
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        The DataFrame to be converted
+    pa_type : pa.DataType | None
+        The PyArrow data type to be used for the scalar.
+        If None, the data type will be inferred from the DataFrame.
+
+    Returns
+    -------
+    pa.StructScalar
+        The PyArrow StructScalar representing the DataFrame
+    dict[str, object]
+        Pandas dtypes of the DataFrame columns which we'd like to cast the result to.
+    """
+    d = {
+        column: series.values.to_pyarrow_scalar() if isinstance(series.dtype, NestedDtype) else series.values
+        for column, series in df.to_dict("series").items()
+    }
+    inner_dtypes = {column: dtype for column, dtype in df.dtypes.items() if isinstance(dtype, NestedDtype)}
+    return pa.scalar(d, type=pa_type, from_pandas=True), inner_dtypes
 
 
 class NestedExtensionArray(ExtensionArray):
@@ -222,8 +245,13 @@ def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False) -> Self:  #
         del copy
 
         pa_type = to_pyarrow_dtype(dtype)
-        pa_array = cls._box_pa_array(scalars, pa_type=pa_type)
-        inner_dtypes = dtype.inner_dtypes if isinstance(dtype, NestedDtype) else None
+        pa_array, infered_inner_dtypes = cls._box_pa_array(scalars, pa_type=pa_type)
+        if isinstance(dtype, NestedDtype):
+            inner_dtypes = dtype.inner_dtypes
+        elif len(infered_inner_dtypes) > 1:
+            inner_dtypes = infered_inner_dtypes
+        else:
+            inner_dtypes = None
         return cls(pa_array, inner_dtypes=inner_dtypes)
 
     # Tricky to implement, but required by things like pd.read_csv
@@ -298,10 +326,10 @@ def __setitem__(self, key, value) -> None:
 
         # Try to convert to struct_scalar first, if it fails, convert to array
         try:
-            scalar = self._box_pa_scalar(value, pa_type=self._pyarrow_dtype)
+            scalar, _ = self._box_pa_scalar(value, pa_type=self._pyarrow_dtype)
         except (ValueError, TypeError):
             # Copy will happen later in replace_with_mask() anyway
-            value = self._box_pa_array(value, pa_type=self._pyarrow_dtype)
+            value, _ = self._box_pa_array(value, pa_type=self._pyarrow_dtype)
         else:
             # Our replace_with_mask implementation doesn't work with scalars
             value = pa.array([scalar] * pa.compute.sum(pa_mask).as_py())
@@ -460,7 +488,7 @@ def take(
             raise IndexError("out of bounds value in 'indices'.")
 
         if allow_fill:
-            fill_value = self._box_pa_scalar(fill_value, pa_type=self._pyarrow_dtype)
+            fill_value, _inner_dtypes = self._box_pa_scalar(fill_value, pa_type=self._pyarrow_dtype)
 
             fill_mask = indices_array < 0
             if not fill_mask.any():
@@ -595,23 +623,60 @@ def __setstate__(self, state):
     # End of Additional magic methods #
 
     @classmethod
-    def _box_pa_scalar(cls, value, *, pa_type: pa.DataType | None) -> pa.Scalar:
-        """Convert a value to a PyArrow scalar with the specified type."""
-        if isinstance(value, pa.Scalar):
+    def _box_pa_scalar(
+        cls, value, *, pa_type: pa.DataType | None
+    ) -> tuple[pa.StructScalar, dict[str, NestedDtype]]:
+        """Convert a value to a PyArrow scalar with the specified type.
+
+        Parameters
+        ----------
+        value: convertible to a PyArrow scalar
+            The value to be converted.
+        pa_type: PyArrow data type or None (default: None)
+            The type to which the value should be converted. If None,
+            the type is inferred from the value.
+
+        Returns
+        -------
+        pa.StructScalar
+            The converted PyArrow scalar.
+        dict[str, object]
+            Pandas datatypes of the scalar struct-fields.
+        """
+        empty_inner_dtypes = cast(dict[str, NestedDtype], {})
+        if isinstance(value, (pa.StructScalar, pa.NullScalar)):
             if pa_type is None:
-                return value
-            return value.cast(pa_type)
+                return value, empty_inner_dtypes
+            return value.cast(pa_type), empty_inner_dtypes
         if value is pd.NA or value is None:
-            return pa.scalar(None, type=pa_type, from_pandas=True)
+            return pa.scalar(None, type=pa_type, from_pandas=True), empty_inner_dtypes
         if isinstance(value, pd.DataFrame):
             return convert_df_to_pa_scalar(value, pa_type=pa_type)
-        return pa.scalar(value, type=pa_type, from_pandas=True)
+        return pa.scalar(value, type=pa_type, from_pandas=True), empty_inner_dtypes
 
     @classmethod
-    def _box_pa_array(cls, value, *, pa_type: pa.DataType | None) -> pa.Array | pa.ChunkedArray:
-        """Convert a value to a PyArrow array with the specified type."""
+    def _box_pa_array(
+        cls, value, *, pa_type: pa.DataType | None
+    ) -> tuple[pa.Array | pa.ChunkedArray, dict[str, object]]:
+        """Convert a value to a PyArrow array with the specified type.
+
+        Parameters
+        ----------
+        value
+            Value to convert
+        pa_type : pyarrow.DataType or None
+            Pyarrow type to cast to. If None it will be derived
+
+        Returns
+        -------
+        pyarrow.Array or pyarrow.ChunkedArray
+            The result array
+        dict of inferred inner dtypes
+        """
+        inner_dtypes: dict[str, object] = {}
         if isinstance(value, cls):
             pa_array = value._chunked_array
+            inner_dtypes = value.dtype.inner_dtypes.copy()
         elif isinstance(value, (pa.Array, pa.ChunkedArray)):
             pa_array = value
         else:
@@ -619,11 +684,14 @@ def _box_pa_array(cls, value, *, pa_type: pa.DataType | None) -> pa.Array | pa.C
                 pa_array = pa.array(value, type=pa_type)
             except (ValueError, TypeError, KeyError):
                 scalars: list[pa.Scalar] = []
+                # Pandas dtypes to cast the result Series to. Currently NestedDtype only.
                 for v in value:
                     # If pa_type is not specified, then cast to the first non-null type
                     if pa_type is None and len(scalars) > 0 and not isinstance(scalars[-1], pa.NullScalar):
                         pa_type = scalars[-1].type
-                    scalars.append(cls._box_pa_scalar(v, pa_type=pa_type))
+                    scalar, dtypes = cls._box_pa_scalar(v, pa_type=pa_type)
+                    scalars.append(scalar)
+                    inner_dtypes.update(dtypes)
                 # We recast the scalars to the specified type.
                 # Logically, we should 1) have `pa_type is not None` here,
                 # 2) only "head" null-scalars to be not cast to the specified type.
@@ -633,13 +701,15 @@ def _box_pa_array(cls, value, *, pa_type: pa.DataType | None) -> pa.Array | pa.C
                 scalars = [s.cast(pa_type) for s in scalars]
                 pa_array = pa.array(scalars)
                 # We already copied the data into scalars
+            else:
+                inner_dtypes = {}
 
         # We always cast - even if the type is the same, it does not hurt
-        # If the type is different the result may still be a view, so we do not set copy=False
+        # If the type is different, the result array may still be a view, so we do not set copy=False
         if pa_type is not None:
             pa_array = pa_array.cast(pa_type)
 
-        return pa_array
+        return pa_array, inner_dtypes
 
     @classmethod
     def _convert_struct_scalar_to_df(cls, value: pa.StructScalar, *, copy: bool, na_value: Any = None) -> Any:
@@ -757,11 +827,32 @@ def to_arrow_ext_array(self, list_struct: bool = False) -> ArrowExtensionArray:
         list_struct : bool, optional
             If False (default), return struct-list array, otherwise return
             list-struct array.
+
+        Returns
+        -------
+        pandas.ArrowExtensionArray
         """
         if list_struct:
             return ArrowExtensionArray(self._list_array)
         return ArrowExtensionArray(self._chunked_array)
 
+    def to_pyarrow_scalar(self, list_struct: bool = False) -> pa.ListScalar:
+        """Convert to a pyarrow scalar of a list type
+
+        Parameters
+        ----------
+        list_struct : bool, optional
+            If False (default), return list-struct-list scalar,
+            otherwise  list-list-struct scalar.
+
+        Returns
+        -------
+        pyarrow.ListScalar
+        """
+        pa_array = self._list_array if list_struct else self._chunked_array
+        pa_type = pa.list_(pa_array.type)
+        return cast(pa.ListScalar, pa.scalar(pa_array, type=pa_type))
+
     def _replace_chunked_array(self, pa_array: pa.ChunkedArray, *, validate: bool) -> None:
         if validate:
             self._validate(pa_array)
diff --git a/src/nested_pandas/series/packer.py b/src/nested_pandas/series/packer.py
index 73dc58ce..c9a092ae 100644
--- a/src/nested_pandas/series/packer.py
+++ b/src/nested_pandas/series/packer.py
@@ -7,7 +7,7 @@
 # "|" for python 3.9
 from __future__ import annotations
 
-from collections.abc import Sequence
+from collections.abc import Mapping, Sequence
 
 import numpy as np
 import pandas as pd
@@ -158,12 +158,22 @@ def pack_sorted_df_into_struct(df: pd.DataFrame, name: str | None = None) -> pd.
         raise ValueError("The index of the input dataframe must be sorted")
 
     packed_df = view_sorted_df_as_list_arrays(df)
+
+    # Handle columns which are already nested in the input dataframe
+    inner_dtypes = {str(col): dtype for col, dtype in df.dtypes.items() if isinstance(dtype, NestedDtype)}
+
     # No need to validate the dataframe, the length of the nested arrays is forced to be the same by
     # the view_sorted_df_as_list_arrays function.
-    return pack_lists(packed_df, name=name, validate=False)
+    return pack_lists(packed_df, name=name, validate=False, inner_dtypes=inner_dtypes)
 
 
-def pack_lists(df: pd.DataFrame, name: str | None = None, *, validate: bool = True) -> pd.Series:
+def pack_lists(
+    df: pd.DataFrame,
+    name: str | None = None,
+    *,
+    validate: bool = True,
+    inner_dtypes: Mapping[str, object] | None = None,
+) -> pd.Series:
     """Make a series of arrow structures from a dataframe with nested arrays.
 
     For the input dataframe with repeated indexes, make a pandas.Series,
@@ -184,6 +194,9 @@ def pack_lists(df: pd.DataFrame, name: str | None = None, *, validate: bool = Tr
         Name of the pd.Series.
     validate : bool, default True
         Whether to validate the input dataframe.
+    inner_dtypes : mapping of field names to pandas dtypes to cast to, optional
+        The dtypes to cast the inner arrays to. If not provided, the dtypes
+        may be inferred from the input arrays.
 
     Returns
     -------
@@ -225,11 +238,19 @@ def pack_lists(df: pd.DataFrame, name: str | None = None, *, validate: bool = Tr
         )
 
     ext_array = NestedExtensionArray(struct_array, validate=validate)
+
+    # Put nested dtypes of the input dataframe into the output series dtype
+    # Prefer inferred dtypes over what we previously detected in pack_sorted_df_into_struct
+    inferred_dtype = ext_array.dtype
+    inner_dtypes = dict(inner_dtypes or {})
+    dtype = NestedDtype(inferred_dtype.pyarrow_dtype, inner_dtypes=inner_dtypes | inferred_dtype.inner_dtypes)
+
     return pd.Series(
         ext_array,
         index=df.index,
         copy=False,
         name=name,
+        dtype=dtype,
     )
 
 
diff --git a/tests/nested_pandas/series/test_ext_array.py b/tests/nested_pandas/series/test_ext_array.py
index 391e06cc..c0dc7aef 100644
--- a/tests/nested_pandas/series/test_ext_array.py
+++ b/tests/nested_pandas/series/test_ext_array.py
@@ -5,8 +5,9 @@
 import pyarrow as pa
 import pyarrow.compute as pc
 import pytest
-from nested_pandas import NestedDtype
+from nested_pandas import NestedDtype, NestedFrame
 from nested_pandas.series.ext_array import NestedExtensionArray, convert_df_to_pa_scalar, replace_with_mask
+from nested_pandas.series.packer import pack_flat
 from numpy.testing import assert_array_equal
 from pandas.core.arrays import ArrowExtensionArray
 from pandas.testing import assert_frame_equal, assert_series_equal
@@ -278,24 +279,28 @@ def test_series_built_from_dict():
 def test_convert_df_to_pa_scalar():
     """Test that we can convert a DataFrame to a pyarrow struct_scalar."""
     df = pd.DataFrame({"a": [1, 2, 3], "b": [-4.0, -5.0, -6.0]})
-    pa_scalar = convert_df_to_pa_scalar(df, pa_type=None)
-
-    assert pa_scalar == pa.scalar(
+    actual, inner_dtypes = convert_df_to_pa_scalar(df, pa_type=None)
+    expected = pa.scalar(
         {"a": [1, 2, 3], "b": [-4.0, -5.0, -6.0]},
         type=pa.struct([pa.field("a", pa.list_(pa.int64())), pa.field("b", pa.list_(pa.float64()))]),
     )
 
+    assert actual == expected
+    assert inner_dtypes == {}
+
 
 def test_convert_df_to_pa_from_scalar():
     """Test that we can convert a DataFrame to a pyarrow struct_scalar."""
     df = pd.DataFrame({"a": [1, 2, 3], "b": [-4.0, -5.0, -6.0]})
-    pa_scalar = convert_df_to_pa_scalar(df, pa_type=None)
-
-    assert pa_scalar == pa.scalar(
+    actual, inner_dtypes = convert_df_to_pa_scalar(df, pa_type=None)
+    expected = pa.scalar(
         {"a": [1, 2, 3], "b": [-4.0, -5.0, -6.0]},
         type=pa.struct([pa.field("a", pa.list_(pa.int64())), pa.field("b", pa.list_(pa.float64()))]),
     )
 
+    assert actual == expected
+    assert inner_dtypes == {}
+
 
 def test__box_pa_array_from_series_of_df():
     """Test that we can convert a DataFrame to a pyarrow scalar."""
@@ -305,7 +310,9 @@ def test__box_pa_array_from_series_of_df():
             pd.DataFrame({"a": [1, 2, 1], "b": [-3.0, -4.0, -5.0]}),
         ]
     )
-    list_of_dicts = list(NestedExtensionArray._box_pa_array(series, pa_type=None))
+    pa_array, inner_dtypes = NestedExtensionArray._box_pa_array(series, pa_type=None)
+    assert inner_dtypes == {}
+    list_of_dicts = list(pa_array)
 
     desired_type = pa.struct([pa.field("a", pa.list_(pa.int64())), pa.field("b", pa.list_(pa.float64()))])
 
@@ -321,7 +328,9 @@ def test__box_pa_array_from_list_of_df():
         pd.DataFrame({"a": [1, 2, 3], "b": [-4.0, -5.0, -6.0]}),
         pd.DataFrame({"a": [1, 2, 1], "b": [-3.0, -4.0, -5.0]}),
     ]
-    list_of_dicts = list(NestedExtensionArray._box_pa_array(list_of_dfs, pa_type=None))
+    pa_array, inner_dtypes = NestedExtensionArray._box_pa_array(list_of_dfs, pa_type=None)
+    assert inner_dtypes == {}
+    list_of_dicts = list(pa_array)
 
     desired_type = pa.struct([pa.field("a", pa.list_(pa.int64())), pa.field("b", pa.list_(pa.float64()))])
 
@@ -1155,8 +1164,9 @@ def test___array__():
 )
 def test__box_pa_scalar(value, pa_type, desired):
     """Tests _box_pa_scalar()"""
-    actual = NestedExtensionArray._box_pa_scalar(value, pa_type=pa_type)
+    actual, inner_dtypes = NestedExtensionArray._box_pa_scalar(value, pa_type=pa_type)
     assert actual == desired
+    assert inner_dtypes == {}
 
 
 @pytest.mark.parametrize(
@@ -1203,8 +1213,32 @@ def test__box_pa_scalar(value, pa_type, desired):
 )
 def test__box_pa_array(value, pa_type, desired):
     """Tests _box_pa_array"""
-    actual = NestedExtensionArray._box_pa_array(value, pa_type=pa_type)
-    assert actual == desired
+    pa_array, inner_dtypes = NestedExtensionArray._box_pa_array(value, pa_type=pa_type)
+    assert pa_array == desired
+    assert inner_dtypes == {}
+
+
+def test__box_pa_array_from_nested_frames():
+    """Tests _box_pa_array for a collection of nested-frames"""
+
+    nf1 = NestedFrame(
+        {
+            "base": pd.Series([1, 2, 3]),
+            "nested": pack_flat(
+                pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [6, 7, 8, 9, 10]}, index=[0, 0, 1, 1, 2])
+            ),
+        }
+    )
+    nf2 = NestedFrame(
+        {
+            "base": pd.Series([-1, -2, -3, -4]),
+            "nested": pack_flat(
+                pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [6, 7, 8, 9, 10]}, index=[0, 0, 1, 2, 3])
+            ),
+        }
+    )
+
+    pa_array, dtypes = NestedExtensionArray._box_pa_array([nf1, nf2], pa_type=None)
 
 
 def test_series_apply_udf_argument():
diff --git a/tests/nested_pandas/series/test_packer.py b/tests/nested_pandas/series/test_packer.py
index df59c0f8..8e27db91 100644
--- a/tests/nested_pandas/series/test_packer.py
+++ b/tests/nested_pandas/series/test_packer.py
@@ -3,6 +3,7 @@
 import pyarrow as pa
 import pytest
 from nested_pandas import NestedDtype
+from nested_pandas.datasets import generate_data
 from nested_pandas.series import packer
 from numpy.testing import assert_array_equal
 from pandas.testing import assert_frame_equal, assert_series_equal
@@ -221,6 +222,19 @@ def test_pack_flat_with_on():
     assert_series_equal(actual, desired)
 
 
+def test_pack_flat_with_nested():
+    """Test pack_flat when input already has nested columns."""
+    df = generate_data(10, 3)
+    index = [0, 0, 1, 1, 2, 2, 2, 2, 0, 0]
+    df.index = index
+    actual = packer.pack_flat(df)
+
+    desired_dtype = NestedDtype.from_fields(
+        {col: t if isinstance(t, NestedDtype) else pa.from_numpy_dtype(t) for col, t in df.dtypes.items()}
+    )
+    assert actual.dtype == desired_dtype, f"{actual.dtype.name} != {desired_dtype.name}"
+
+
 def test_pack_sorted_df_into_struct():
     """Test pack_sorted_df_into_struct()."""
     df = pd.DataFrame(

From 85cdd7e12c64068159a4ce8ad7bdb688439432e0 Mon Sep 17 00:00:00 2001
From: Konstantin Malanchev <hombit@gmail.com>
Date: Mon, 7 Apr 2025 10:51:47 -0400
Subject: [PATCH 6/6] Fix a typo in variable name

---
 src/nested_pandas/series/ext_array.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/nested_pandas/series/ext_array.py b/src/nested_pandas/series/ext_array.py
index e5318abb..e4ab6616 100644
--- a/src/nested_pandas/series/ext_array.py
+++ b/src/nested_pandas/series/ext_array.py
@@ -245,11 +245,11 @@ def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False) -> Self:  #
         del copy
 
         pa_type = to_pyarrow_dtype(dtype)
-        pa_array, infered_inner_dtypes = cls._box_pa_array(scalars, pa_type=pa_type)
+        pa_array, inferred_inner_dtypes = cls._box_pa_array(scalars, pa_type=pa_type)
         if isinstance(dtype, NestedDtype):
             inner_dtypes = dtype.inner_dtypes
-        elif len(infered_inner_dtypes) > 1:
-            inner_dtypes = infered_inner_dtypes
+        elif len(inferred_inner_dtypes) > 1:
+            inner_dtypes = inferred_inner_dtypes
         else:
             inner_dtypes = None
         return cls(pa_array, inner_dtypes=inner_dtypes)