Skip to content

Commit b6412b5

Browse files
authored
Merge pull request #248 from lincc-frameworks/drop_nested
implement NestedFrame.drop for nested columns
2 parents 6d89bb9 + 377f5fe commit b6412b5

File tree

3 files changed

+147
-0
lines changed

3 files changed

+147
-0
lines changed

docs/reference/nestedframe.rst

+1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ Extended Pandas.DataFrame Interface
3838
NestedFrame.dropna
3939
NestedFrame.sort_values
4040
NestedFrame.reduce
41+
NestedFrame.drop
4142

4243
I/O
4344
~~~~~~~~~

src/nested_pandas/nestedframe/core.py

+98
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,104 @@ def from_lists(cls, df, base_columns=None, list_columns=None, name="nested"):
482482
else:
483483
return NestedFrame(packed_df.to_frame())
484484

485+
def drop(
486+
self, labels=None, *, axis=0, index=None, columns=None, level=None, inplace=False, errors="raise"
487+
):
488+
"""Drop specified labels from rows or columns.
489+
490+
Remove rows or columns by specifying label names and corresponding
491+
axis, or by directly specifying index or column names. When using a
492+
multi-index, labels on different levels can be removed by
493+
specifying the level. See the user guide for more information about
494+
the now unused levels.
495+
496+
Parameters
497+
----------
498+
labels: single label or list-like
499+
Index or column labels to drop. A tuple will be used as a single
500+
label and not treated as a list-like. Nested sub-columns are
501+
accessed using dot notation (e.g. "nested.col1").
502+
axis: {0 or ‘index’, 1 or ‘columns’}, default 0
503+
Whether to drop labels from the index (0 or ‘index’) or
504+
columns (1 or ‘columns’).
505+
index: single label or list-like
506+
Alternative to specifying axis (labels, axis=0 is equivalent to
507+
index=labels).
508+
columns: single label or list-like
509+
Alternative to specifying axis (labels, axis=1 is equivalent to
510+
columns=labels).
511+
level: int or level name, optional
512+
For MultiIndex, level from which the labels will be removed.
513+
inplace: bool, default False
514+
If False, return a copy. Otherwise, do operation in place and
515+
return None.
516+
errors: {‘ignore’, ‘raise’}, default ‘raise’
517+
If ‘ignore’, suppress error and only existing labels are dropped.
518+
519+
Returns
520+
-------
521+
DataFrame or None
522+
Returns DataFrame or None DataFrame with the specified index or
523+
column labels removed or None if inplace=True.
524+
525+
Examples
526+
--------
527+
528+
>>> from nested_pandas.datasets.generation import generate_data
529+
>>> nf = generate_data(5,5, seed=1)
530+
531+
>>> # drop the "t" column from "nested"
532+
>>> nf = nf.drop(["nested.t"], axis=1)
533+
>>> nf
534+
a b nested
535+
0 0.417022 0.184677 [{flux: 31.551563, band: 'r'}; …] (5 rows)
536+
1 0.720324 0.372520 [{flux: 68.650093, band: 'g'}; …] (5 rows)
537+
2 0.000114 0.691121 [{flux: 83.462567, band: 'g'}; …] (5 rows)
538+
3 0.302333 0.793535 [{flux: 1.828828, band: 'g'}; …] (5 rows)
539+
4 0.146756 1.077633 [{flux: 75.014431, band: 'g'}; …] (5 rows)
540+
"""
541+
542+
# axis 1 requires special handling for nested columns
543+
if axis == 1:
544+
# label convergence
545+
if isinstance(labels, str):
546+
labels = [labels]
547+
nested_labels = [label for label in labels if self._is_known_hierarchical_column(label)]
548+
base_labels = [label for label in labels if not self._is_known_hierarchical_column(label)]
549+
550+
# split nested_labels by nested column
551+
if len(nested_labels) > 0:
552+
nested_cols = set([label.split(".")[0] for label in nested_labels])
553+
554+
# drop targeted sub-columns for each nested column
555+
for col in nested_cols:
556+
sub_cols = [label.split(".")[1] for label in nested_labels if label.split(".")[0] == col]
557+
self = self.assign(**{f"{col}": self[col].nest.without_field(sub_cols)})
558+
559+
# drop remaining base columns
560+
if len(base_labels) > 0:
561+
return super().drop(
562+
labels=base_labels,
563+
axis=axis,
564+
index=index,
565+
columns=columns,
566+
level=level,
567+
inplace=inplace,
568+
errors=errors,
569+
)
570+
else:
571+
return self
572+
# Otherwise just drop like pandas
573+
return super().drop(
574+
labels=labels,
575+
axis=axis,
576+
index=index,
577+
columns=columns,
578+
level=level,
579+
inplace=inplace,
580+
errors=errors,
581+
)
582+
485583
def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None:
486584
"""
487585

tests/nested_pandas/nestedframe/test_nestedframe.py

+48
Original file line numberDiff line numberDiff line change
@@ -1126,6 +1126,54 @@ def test_scientific_notation():
11261126
assert list(selected.index) == [0, 2]
11271127

11281128

1129+
def test_drop():
1130+
"""Test that we can drop nested columns from a NestedFrame"""
1131+
1132+
base = NestedFrame(data={"a": [1, 2, 3], "b": [2, 4, 6]}, index=[0, 1, 2])
1133+
1134+
nested = pd.DataFrame(
1135+
data={"c": [0, 2, 4, 1, 4, 3, 1, 4, 1], "d": [5, 4, 7, 5, 3, 1, 9, 3, 4]},
1136+
index=[0, 0, 0, 1, 1, 1, 2, 2, 2],
1137+
)
1138+
1139+
nested2 = pd.DataFrame(
1140+
data={"e": [0, 2, 4, 1, 4, 3, 1, 4, 1], "f": [5, 4, 7, 5, 3, 1, 9, 3, 4]},
1141+
index=[0, 0, 0, 1, 1, 1, 2, 2, 2],
1142+
)
1143+
1144+
base = base.add_nested(nested, "nested").add_nested(nested2, "nested2")
1145+
1146+
# test axis=0 drop
1147+
dropped_base = base.drop(0, axis=0)
1148+
assert len(dropped_base) == len(base) - 1
1149+
1150+
# Test dropping a base column
1151+
dropped_base = base.drop("a", axis=1)
1152+
assert len(dropped_base.columns) == len(base.columns) - 1
1153+
assert "a" not in dropped_base.columns
1154+
1155+
# Test dropping a nested column
1156+
dropped_nested = base.drop("nested.c", axis=1)
1157+
assert len(dropped_nested.columns) == len(base.columns)
1158+
assert "c" not in dropped_nested.nested.nest.fields
1159+
1160+
# Test dropping a non-existent column
1161+
with pytest.raises(KeyError):
1162+
base.drop("not_a_column", axis=1)
1163+
1164+
# Test dropping multiple columns
1165+
dropped_multiple = base.drop(["a", "nested.c"], axis=1)
1166+
assert len(dropped_multiple.columns) == len(base.columns) - 1
1167+
assert "a" not in dropped_multiple.columns
1168+
assert "c" not in dropped_multiple.nested.nest.fields
1169+
1170+
# Test multiple nested structures
1171+
dropped_multiple = base.drop(["nested.c", "nested2.f"], axis=1)
1172+
assert len(dropped_multiple.columns) == len(base.columns)
1173+
assert "c" not in dropped_multiple.nested.nest.fields
1174+
assert "f" not in dropped_multiple.nested2.nest.fields
1175+
1176+
11291177
def test_eval():
11301178
"""
11311179
Test basic behavior of NestedFrame.eval, and that it can handle nested references

0 commit comments

Comments
 (0)