Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions narwhals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
coalesce,
col,
concat,
concat_list,
concat_str,
exclude,
format,
Expand Down Expand Up @@ -132,6 +133,7 @@
"coalesce",
"col",
"concat",
"concat_list",
"concat_str",
"dependencies",
"dtypes",
Expand Down
17 changes: 17 additions & 0 deletions narwhals/_arrow/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,23 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
context=self,
)

def concat_list(self, *exprs: ArrowExpr) -> ArrowExpr:
def func(df: ArrowDataFrame) -> list[ArrowSeries]:
msg = "TODO: ARROW"
raise NotImplementedError(msg)
# series = list(chain.from_iterable(expr(df) for expr in exprs))
# arrays = [s._native_series.combine_chunks() for s in series]
# name = series[0].name
# struct_array = pc.make_struct(*arrays, field_names=[s.name for s in series])
# return [self._series(struct_array, name=name, version=self._version)]

return self._expr._from_callable(
func=func,
evaluate_output_names=combine_evaluate_output_names(*exprs),
alias_output_names=combine_alias_output_names(*exprs),
context=self,
)

def coalesce(self, *exprs: ArrowExpr) -> ArrowExpr:
def func(df: ArrowDataFrame) -> list[ArrowSeries]:
align = self._series._align_full_broadcast
Expand Down
12 changes: 12 additions & 0 deletions narwhals/_pandas_like/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,18 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
context=self,
)

def concat_list(self, *exprs: PandasLikeExpr) -> PandasLikeExpr:
def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
msg = "TODO: PANDAS"
raise NotImplementedError(msg)

return self._expr._from_callable(
func=func,
evaluate_output_names=combine_evaluate_output_names(*exprs),
alias_output_names=combine_alias_output_names(*exprs),
context=self,
)

def _if_then_else(
self,
when: NativeSeriesT,
Expand Down
4 changes: 4 additions & 0 deletions narwhals/_polars/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,10 @@ def concat_str(
version=self._version,
)

def concat_list(self, *exprs: PolarsExpr) -> PolarsExpr:
pl_exprs = [expr._native_expr for expr in exprs]
return self._expr(pl.concat_list(pl_exprs), version=self._version)

def when_then(
self, when: PolarsExpr, then: PolarsExpr, otherwise: PolarsExpr | None = None
) -> PolarsExpr:
Expand Down
40 changes: 40 additions & 0 deletions narwhals/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1587,6 +1587,46 @@ def concat_str(
)


def concat_list(exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr) -> Expr:
r"""Horizontally combine multiple columns into a single column with a list of the elements.

Arguments:
exprs: One or more expressions to combine into a struct. Strings are treated as column names.
*more_exprs: Additional columns or expressions, passed as positional arguments.

Returns:
An expression that produces a single column containing a list of the given fields.

Example:
>>> import pandas as pd
>>> import narwhals as nw
>>>
>>> data = {
... "a": [1, 2, 3],
... "b": ["dogs", "cats", None],
... "c": ["play", "swim", "walk"],
... }
>>> df_native = pd.DataFrame(data)
>>> (
... nw.from_native(df_native).select(
... nw.concat_list([nw.col("a"), nw.col("b"), nw.col("c")]).alias(
... "my_list"
... )
... )
... )
β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
| Narwhals DataFrame |
|--------------------------|
| my_list |
| 0 [1, "dogs", "play"] |
| 1 [2, "cats", "swim] |
| 2 [3, None, "walk"] |
β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
"""
flat_exprs = flatten([*flatten([exprs]), *more_exprs])
return _expr_with_horizontal_op("concat_list", *flat_exprs)


def coalesce(
exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr | NonNestedLiteral
) -> Expr:
Expand Down
5 changes: 5 additions & 0 deletions narwhals/stable/v1/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1163,6 +1163,10 @@ def concat_str(
)


def concat_list(exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr) -> Expr:
return _stableify(nw.concat_list(exprs, *more_exprs))


def format(f_string: str, *args: IntoExpr) -> Expr:
"""Format expressions as a string."""
return _stableify(nw.format(f_string, *args))
Expand Down Expand Up @@ -1401,6 +1405,7 @@ def scan_parquet(
"coalesce",
"col",
"concat",
"concat_list",
"concat_str",
"dependencies",
"dtypes",
Expand Down
14 changes: 14 additions & 0 deletions narwhals/stable/v2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -916,6 +916,19 @@ def concat_str(
)


def concat_list(exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr) -> Expr:
"""Horizontally combine multiple columns into a single list column.

Arguments:
exprs: One or more expressions to combine into a list. Strings are treated as column names.
*more_exprs: Additional columns or expressions, passed as positional arguments.

Returns:
An expression that produces a single list column containing the given fields.
"""
return _stableify(nw.concat_list(exprs, *more_exprs))


def format(f_string: str, *args: IntoExpr) -> Expr:
"""Format expressions as a string.

Expand Down Expand Up @@ -1240,6 +1253,7 @@ def scan_parquet(
"coalesce",
"col",
"concat",
"concat_list",
"concat_str",
"dependencies",
"dtypes",
Expand Down
26 changes: 26 additions & 0 deletions tests/expr_and_series/concat_list_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from __future__ import annotations

import pytest

import narwhals as nw
from tests.utils import POLARS_VERSION, Constructor

pytest.importorskip("pyarrow")

data = {"a": [1, 2, 3], "b": ["dogs", "cats", None], "c": ["play", "swim", "walk"]}


def test_dryrun(constructor: Constructor, *, request: pytest.FixtureRequest) -> None:
if "polars" in str(constructor) and POLARS_VERSION < (1, 0, 0):
# nth only available after 1.0
request.applymarker(pytest.mark.xfail)

# FIXME: for now only polars>1.0 works.
if "polars" not in str(constructor):
request.applymarker(pytest.mark.xfail)

df = nw.from_native(constructor(data))
result = df.select(nw.concat_list([nw.col("a"), nw.col("b"), nw.col("c")]).alias("s"))

# FIXME: assert instead of print
print(result)