diff --git a/narwhals/__init__.py b/narwhals/__init__.py index 52e0eb0506..c6d72f1320 100644 --- a/narwhals/__init__.py +++ b/narwhals/__init__.py @@ -52,6 +52,7 @@ coalesce, col, concat, + concat_list, concat_str, exclude, format, @@ -132,6 +133,7 @@ "coalesce", "col", "concat", + "concat_list", "concat_str", "dependencies", "dtypes", diff --git a/narwhals/_arrow/namespace.py b/narwhals/_arrow/namespace.py index 98282a575e..23c0d5fbce 100644 --- a/narwhals/_arrow/namespace.py +++ b/narwhals/_arrow/namespace.py @@ -227,6 +227,23 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: context=self, ) + def concat_list(self, *exprs: ArrowExpr) -> ArrowExpr: + def func(df: ArrowDataFrame) -> list[ArrowSeries]: + msg = "TODO: ARROW" + raise NotImplementedError(msg) + # series = list(chain.from_iterable(expr(df) for expr in exprs)) + # arrays = [s._native_series.combine_chunks() for s in series] + # name = series[0].name + # struct_array = pc.make_struct(*arrays, field_names=[s.name for s in series]) + # return [self._series(struct_array, name=name, version=self._version)] + + return self._expr._from_callable( + func=func, + evaluate_output_names=combine_evaluate_output_names(*exprs), + alias_output_names=combine_alias_output_names(*exprs), + context=self, + ) + def coalesce(self, *exprs: ArrowExpr) -> ArrowExpr: def func(df: ArrowDataFrame) -> list[ArrowSeries]: align = self._series._align_full_broadcast diff --git a/narwhals/_pandas_like/namespace.py b/narwhals/_pandas_like/namespace.py index 96b7a22290..c3bd36a73d 100644 --- a/narwhals/_pandas_like/namespace.py +++ b/narwhals/_pandas_like/namespace.py @@ -335,6 +335,18 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: context=self, ) + def concat_list(self, *exprs: PandasLikeExpr) -> PandasLikeExpr: + def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: + msg = "TODO: PANDAS" + raise NotImplementedError(msg) + + return self._expr._from_callable( + func=func, + evaluate_output_names=combine_evaluate_output_names(*exprs), + alias_output_names=combine_alias_output_names(*exprs), + context=self, + ) + def _if_then_else( self, when: NativeSeriesT, diff --git a/narwhals/_polars/namespace.py b/narwhals/_polars/namespace.py index ac8da364be..463b18599b 100644 --- a/narwhals/_polars/namespace.py +++ b/narwhals/_polars/namespace.py @@ -198,6 +198,10 @@ def concat_str( version=self._version, ) + def concat_list(self, *exprs: PolarsExpr) -> PolarsExpr: + pl_exprs = [expr._native_expr for expr in exprs] + return self._expr(pl.concat_list(pl_exprs), version=self._version) + def when_then( self, when: PolarsExpr, then: PolarsExpr, otherwise: PolarsExpr | None = None ) -> PolarsExpr: diff --git a/narwhals/functions.py b/narwhals/functions.py index bff3f27c85..a46ee7a3ab 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -1587,6 +1587,46 @@ def concat_str( ) +def concat_list(exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr) -> Expr: + r"""Horizontally combine multiple columns into a single column with a list of the elements. + + Arguments: + exprs: One or more expressions to combine into a struct. Strings are treated as column names. + *more_exprs: Additional columns or expressions, passed as positional arguments. + + Returns: + An expression that produces a single column containing a list of the given fields. + + Example: + >>> import pandas as pd + >>> import narwhals as nw + >>> + >>> data = { + ... "a": [1, 2, 3], + ... "b": ["dogs", "cats", None], + ... "c": ["play", "swim", "walk"], + ... } + >>> df_native = pd.DataFrame(data) + >>> ( + ... nw.from_native(df_native).select( + ... nw.concat_list([nw.col("a"), nw.col("b"), nw.col("c")]).alias( + ... "my_list" + ... ) + ... ) + ... ) + ┌──────────────────────────┐ + | Narwhals DataFrame | + |--------------------------| + | my_list | + | 0 [1, "dogs", "play"] | + | 1 [2, "cats", "swim] | + | 2 [3, None, "walk"] | + └──────────────────────────┘ + """ + flat_exprs = flatten([*flatten([exprs]), *more_exprs]) + return _expr_with_horizontal_op("concat_list", *flat_exprs) + + def coalesce( exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr | NonNestedLiteral ) -> Expr: diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py index 8e5a78672d..d73b69b4ed 100644 --- a/narwhals/stable/v1/__init__.py +++ b/narwhals/stable/v1/__init__.py @@ -1163,6 +1163,10 @@ def concat_str( ) +def concat_list(exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr) -> Expr: + return _stableify(nw.concat_list(exprs, *more_exprs)) + + def format(f_string: str, *args: IntoExpr) -> Expr: """Format expressions as a string.""" return _stableify(nw.format(f_string, *args)) @@ -1401,6 +1405,7 @@ def scan_parquet( "coalesce", "col", "concat", + "concat_list", "concat_str", "dependencies", "dtypes", diff --git a/narwhals/stable/v2/__init__.py b/narwhals/stable/v2/__init__.py index cbc5ff21d3..7ad19e3873 100644 --- a/narwhals/stable/v2/__init__.py +++ b/narwhals/stable/v2/__init__.py @@ -916,6 +916,19 @@ def concat_str( ) +def concat_list(exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr) -> Expr: + """Horizontally combine multiple columns into a single list column. + + Arguments: + exprs: One or more expressions to combine into a list. Strings are treated as column names. + *more_exprs: Additional columns or expressions, passed as positional arguments. + + Returns: + An expression that produces a single list column containing the given fields. + """ + return _stableify(nw.concat_list(exprs, *more_exprs)) + + def format(f_string: str, *args: IntoExpr) -> Expr: """Format expressions as a string. @@ -1240,6 +1253,7 @@ def scan_parquet( "coalesce", "col", "concat", + "concat_list", "concat_str", "dependencies", "dtypes", diff --git a/tests/expr_and_series/concat_list_test.py b/tests/expr_and_series/concat_list_test.py new file mode 100644 index 0000000000..7ef84f86f4 --- /dev/null +++ b/tests/expr_and_series/concat_list_test.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +import pytest + +import narwhals as nw +from tests.utils import POLARS_VERSION, Constructor + +pytest.importorskip("pyarrow") + +data = {"a": [1, 2, 3], "b": ["dogs", "cats", None], "c": ["play", "swim", "walk"]} + + +def test_dryrun(constructor: Constructor, *, request: pytest.FixtureRequest) -> None: + if "polars" in str(constructor) and POLARS_VERSION < (1, 0, 0): + # nth only available after 1.0 + request.applymarker(pytest.mark.xfail) + + # FIXME: for now only polars>1.0 works. + if "polars" not in str(constructor): + request.applymarker(pytest.mark.xfail) + + df = nw.from_native(constructor(data)) + result = df.select(nw.concat_list([nw.col("a"), nw.col("b"), nw.col("c")]).alias("s")) + + # FIXME: assert instead of print + print(result)