From d53ae187dc83b85d2e044c4549a58818fb4269f4 Mon Sep 17 00:00:00 2001 From: jrycw Date: Mon, 3 Mar 2025 01:49:47 +0800 Subject: [PATCH 01/15] Implement `cols_label_with()` --- great_tables/_boxhead.py | 71 +++++++++++++++++++++++++++++++++++++++- great_tables/gt.py | 3 +- 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/great_tables/_boxhead.py b/great_tables/_boxhead.py index 2a5f358af..da0abb7f2 100644 --- a/great_tables/_boxhead.py +++ b/great_tables/_boxhead.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import Callable, TYPE_CHECKING from ._locations import resolve_cols_c from ._utils import _assert_list_is_subset @@ -156,6 +156,75 @@ def cols_label( return self._replace(_boxhead=boxhead) +def cols_label_with(self: GTSelf, fn: Callable[[str], str], columns: SelectExpr = None) -> GTSelf: + """ + Relabel one or more columns using a function. + + The `cols_label_with()` function allows for modification of column labels through a supplied + function. By default, the function will be invoked on all column labels but this can be limited + to a subset via the `columns` parameter. + + Parameters + ---------- + fn + A function that accepts a column label as input and returns a transformed label as output. + + columns + The columns to target. Can either be a single column name or a series of column names + provided in a list. + + Returns + ------- + GT + The GT object is returned. This is the same object that the method is called on so that we + can facilitate method chaining. + + Notes + ----- + GT always selects columns using their name in the underlying data. This means that a column's + label is purely for final presentation. + + Examples + -------- + Let's use a subset of the `sp500` dataset to create a gt table. + ```{python} + from great_tables import GT, md + from great_tables.data import sp500 + + gt = GT(sp500.head()) + gt + ``` + + We can pass `str.upper()` to the `columns` parameter to convert all column labels to uppercase. + ```{python} + gt.cols_label_with(str.upper) + ``` + + One useful use case is using `md()`, provided by **Great Tables**, to format column labels. + For example, the following code demonstrates how to make the `date` and `adj_close` column labels + bold using markdown syntax. + ```{python} + gt.cols_label_with(lambda x: md(f"**{x}**"), columns=["date", "adj_close"]) + ``` + + """ + # Get the full list of column names for the data + column_names = self._boxhead._get_columns() + + if isinstance(columns, str): + columns = [columns] + _assert_list_is_subset(columns, set_list=column_names) + elif columns is None: + columns = column_names + + sel_cols = resolve_cols_c(data=self, expr=columns) + + new_col_labels = {col: fn(col) for col in sel_cols} + boxhead = self._boxhead._set_column_labels(new_col_labels) + + return self._replace(_boxhead=boxhead) + + def cols_align(self: GTSelf, align: str = "left", columns: SelectExpr = None) -> GTSelf: """ Set the alignment of one or more columns. diff --git a/great_tables/gt.py b/great_tables/gt.py index ed1b84a19..670d3b1bb 100644 --- a/great_tables/gt.py +++ b/great_tables/gt.py @@ -6,7 +6,7 @@ # Main gt imports ---- from ._body import body_reassemble -from ._boxhead import cols_align, cols_label +from ._boxhead import cols_align, cols_label, cols_label_with from ._data_color import data_color from ._export import as_latex, as_raw_html, save, show, write_raw_html from ._formats import ( @@ -253,6 +253,7 @@ def __init__( cols_align = cols_align cols_width = cols_width cols_label = cols_label + cols_label_with = cols_label_with cols_move = cols_move cols_move_to_start = cols_move_to_start cols_move_to_end = cols_move_to_end From 43766ccfdcb4f8f7696ea1d9eaffc9cc2be5e8eb Mon Sep 17 00:00:00 2001 From: jrycw Date: Mon, 3 Mar 2025 01:50:53 +0800 Subject: [PATCH 02/15] Add tests for `cols_label_with()` --- tests/test__boxhead.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/test__boxhead.py b/tests/test__boxhead.py index aecd950d3..b1a790d07 100644 --- a/tests/test__boxhead.py +++ b/tests/test__boxhead.py @@ -56,6 +56,33 @@ def test_cols_label_return_self_if_no_kwargs(): assert isinstance(unmodified_table, gt.GT) +def test_cols_label_with_relabel_columns(): + # Create a table with default column labels + df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + table = gt.GT(df) + + # Relabel the columns + modified_table = table.cols_label_with(str.lower) + + # Check that the column labels have been updated + assert modified_table._boxhead._get_column_labels() == ["a", "b"] + + +def test_cols_label_with_relabel_columns_with_markdown(): + # Create a table with default column labels + df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + table = gt.GT(df) + + # Relabel a column with a Markdown formatted label + modified_table = table.cols_label_with(lambda x: gt.md(f"**{x}**"), columns="A") + + # Check that the column label has been updated with Markdown formatting + modified_column_labels = modified_table._boxhead._get_column_labels() + + assert modified_column_labels[0].text == "**A**" + assert modified_column_labels[1] == "B" + + def test_cols_align_default(): df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) table = gt.GT(df) From f89e19b97bc8788c456d8f9c82f11af6c9ad2a06 Mon Sep 17 00:00:00 2001 From: jrycw Date: Mon, 3 Mar 2025 01:51:21 +0800 Subject: [PATCH 03/15] Include `cols_label_with()` in `_quarto.yml` --- docs/_quarto.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/_quarto.yml b/docs/_quarto.yml index 8e5f8d1fd..12428f267 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -153,6 +153,7 @@ quartodoc: - GT.cols_align - GT.cols_width - GT.cols_label + - GT.cols_label_with - GT.cols_move - GT.cols_move_to_start - GT.cols_move_to_end From 22d93b1ee95ae6f0f35eff3f5d948949be984526 Mon Sep 17 00:00:00 2001 From: jrycw Date: Tue, 4 Mar 2025 00:51:00 +0800 Subject: [PATCH 04/15] Extract unit handling logic for column labels into `_handle_units_syntax()` --- great_tables/_boxhead.py | 31 ++++++++----------------------- great_tables/_utils.py | 24 ++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 23 deletions(-) diff --git a/great_tables/_boxhead.py b/great_tables/_boxhead.py index da0abb7f2..74dc912ee 100644 --- a/great_tables/_boxhead.py +++ b/great_tables/_boxhead.py @@ -3,7 +3,7 @@ from typing import Callable, TYPE_CHECKING from ._locations import resolve_cols_c -from ._utils import _assert_list_is_subset +from ._utils import _assert_list_is_subset, _handle_units_syntax from ._tbl_data import SelectExpr from ._text import BaseText @@ -114,8 +114,6 @@ def cols_label( ) ``` """ - from great_tables._helpers import UnitStr - cases = cases if cases is not None else {} new_cases = cases | kwargs @@ -132,24 +130,7 @@ def cols_label( _assert_list_is_subset(mod_columns, set_list=column_names) # Handle units syntax in labels (e.g., "Density ({{ppl / mi^2}})") - new_kwargs: dict[str, UnitStr | str | BaseText] = {} - - for k, v in new_cases.items(): - if isinstance(v, str): - unitstr_v = UnitStr.from_str(v) - - if len(unitstr_v.units_str) == 1 and isinstance(unitstr_v.units_str[0], str): - new_kwargs[k] = unitstr_v.units_str[0] - else: - new_kwargs[k] = unitstr_v - - elif isinstance(v, BaseText): - new_kwargs[k] = v - - else: - raise ValueError( - "Column labels must be strings or BaseText objects. Use `md()` or `html()` for formatting." - ) + new_kwargs = _handle_units_syntax(new_cases) boxhead = self._boxhead._set_column_labels(new_kwargs) @@ -219,8 +200,12 @@ def cols_label_with(self: GTSelf, fn: Callable[[str], str], columns: SelectExpr sel_cols = resolve_cols_c(data=self, expr=columns) - new_col_labels = {col: fn(col) for col in sel_cols} - boxhead = self._boxhead._set_column_labels(new_col_labels) + new_cases = {col: fn(col) for col in sel_cols} + + # Handle units syntax in labels (e.g., "Density ({{ppl / mi^2}})") + new_kwargs = _handle_units_syntax(new_cases) + + boxhead = self._boxhead._set_column_labels(new_kwargs) return self._replace(_boxhead=boxhead) diff --git a/great_tables/_utils.py b/great_tables/_utils.py index 28eb62b39..835cf6ecd 100644 --- a/great_tables/_utils.py +++ b/great_tables/_utils.py @@ -7,6 +7,7 @@ from types import ModuleType from typing import TYPE_CHECKING, Any, Iterable, Iterator +from ._helpers import UnitStr from ._tbl_data import _get_cell, _set_cell, get_column_names, n_rows from ._text import BaseText, _process_text @@ -285,3 +286,26 @@ def _get_visible_cells(data: TblData) -> list[tuple[str, int]]: def is_valid_http_schema(url: str) -> bool: return url.startswith("http://") or url.startswith("https://") + + +def _handle_units_syntax(cases: dict[str, str | BaseText]) -> dict[str, UnitStr | str | BaseText]: + # Handle units syntax in labels (e.g., "Density ({{ppl / mi^2}})") + kwargs: dict[str, UnitStr | str | BaseText] = {} + + for k, v in cases.items(): + if isinstance(v, str): + unitstr_v = UnitStr.from_str(v) + + if len(unitstr_v.units_str) == 1 and isinstance(unitstr_v.units_str[0], str): + kwargs[k] = unitstr_v.units_str[0] + else: + kwargs[k] = unitstr_v + + elif isinstance(v, BaseText): + kwargs[k] = v + + else: + raise ValueError( + "Column labels must be strings or BaseText objects. Use `md()` or `html()` for formatting." + ) + return kwargs From ee637b8dba0224163109f4d92abaa083a7029a90 Mon Sep 17 00:00:00 2001 From: jrycw Date: Tue, 4 Mar 2025 01:11:46 +0800 Subject: [PATCH 05/15] Add tests for `_handle_units_syntax()` --- tests/test_utils.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/test_utils.py b/tests/test_utils.py index caa5eca34..2f568ad7a 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -10,6 +10,7 @@ _assert_str_list, _assert_str_scalar, _collapse_list_elements, + _handle_units_syntax, _insert_into_list, _match_arg, _migrate_unformatted_to_output, @@ -224,3 +225,21 @@ def test_migrate_unformatted_to_output_html(): ) def test_is_valid_http_schema(url: str): assert is_valid_http_schema(url) + + +def test_handle_units_syntax(): + from great_tables._text import BaseText, Text + + new_kwargs = _handle_units_syntax({"column_label_1": "abc", "column_label_2": Text(text="xyz")}) + + assert all(isinstance(v, (str, BaseText)) for v in new_kwargs.values()) + + +def test_handle_units_syntax_raises(): + with pytest.raises(ValueError) as exc_info: + _handle_units_syntax({"column_label": 123}) + + assert ( + "Column labels must be strings or BaseText objects. Use `md()` or `html()` for formatting." + in exc_info.value.args[0] + ) From 2852f268d4a835742e9481538a565ada661fe7a3 Mon Sep 17 00:00:00 2001 From: jrycw Date: Fri, 7 Mar 2025 10:04:33 +0800 Subject: [PATCH 06/15] Switch the positions of `columns=` and `fn=` parameters in `cols_label_with()` based on feedback --- great_tables/_boxhead.py | 17 +++++++++++------ tests/test__boxhead.py | 15 +++++++++++++-- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/great_tables/_boxhead.py b/great_tables/_boxhead.py index 74dc912ee..926e78a9a 100644 --- a/great_tables/_boxhead.py +++ b/great_tables/_boxhead.py @@ -137,7 +137,9 @@ def cols_label( return self._replace(_boxhead=boxhead) -def cols_label_with(self: GTSelf, fn: Callable[[str], str], columns: SelectExpr = None) -> GTSelf: +def cols_label_with( + self: GTSelf, columns: SelectExpr = None, fn: Callable[[str], str] | None = None +) -> GTSelf: """ Relabel one or more columns using a function. @@ -147,12 +149,12 @@ def cols_label_with(self: GTSelf, fn: Callable[[str], str], columns: SelectExpr Parameters ---------- - fn - A function that accepts a column label as input and returns a transformed label as output. - columns The columns to target. Can either be a single column name or a series of column names provided in a list. + fn + A function that accepts a column label as input and returns a transformed label as output. + Returns ------- @@ -178,17 +180,20 @@ def cols_label_with(self: GTSelf, fn: Callable[[str], str], columns: SelectExpr We can pass `str.upper()` to the `columns` parameter to convert all column labels to uppercase. ```{python} - gt.cols_label_with(str.upper) + gt.cols_label_with(fn=str.upper) ``` One useful use case is using `md()`, provided by **Great Tables**, to format column labels. For example, the following code demonstrates how to make the `date` and `adj_close` column labels bold using markdown syntax. ```{python} - gt.cols_label_with(lambda x: md(f"**{x}**"), columns=["date", "adj_close"]) + gt.cols_label_with(["date", "adj_close"], lambda x: md(f"**{x}**")) ``` """ + if fn is None: + raise ValueError("Must provide the `fn=` parameter to use `cols_label_with()`.") + # Get the full list of column names for the data column_names = self._boxhead._get_columns() diff --git a/tests/test__boxhead.py b/tests/test__boxhead.py index b1a790d07..505918516 100644 --- a/tests/test__boxhead.py +++ b/tests/test__boxhead.py @@ -62,7 +62,7 @@ def test_cols_label_with_relabel_columns(): table = gt.GT(df) # Relabel the columns - modified_table = table.cols_label_with(str.lower) + modified_table = table.cols_label_with(fn=str.lower) # Check that the column labels have been updated assert modified_table._boxhead._get_column_labels() == ["a", "b"] @@ -74,7 +74,7 @@ def test_cols_label_with_relabel_columns_with_markdown(): table = gt.GT(df) # Relabel a column with a Markdown formatted label - modified_table = table.cols_label_with(lambda x: gt.md(f"**{x}**"), columns="A") + modified_table = table.cols_label_with("A", lambda x: gt.md(f"**{x}**")) # Check that the column label has been updated with Markdown formatting modified_column_labels = modified_table._boxhead._get_column_labels() @@ -83,6 +83,17 @@ def test_cols_label_with_relabel_columns_with_markdown(): assert modified_column_labels[1] == "B" +def test_cols_label_with_raises(): + # Create a table with default column labels + df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + table = gt.GT(df) + + with pytest.raises(ValueError) as exc_info: + table.cols_label_with() + + assert "Must provide the `fn=` parameter to use `cols_label_with()`." in exc_info.value.args[0] + + def test_cols_align_default(): df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) table = gt.GT(df) From 1ae6f8a9257e1092307d956fce06ece377c36b74 Mon Sep 17 00:00:00 2001 From: jrycw Date: Fri, 7 Mar 2025 15:29:44 +0800 Subject: [PATCH 07/15] Add `fn=` parameter to `cols_label()` --- great_tables/_boxhead.py | 34 +++++++++++++++++++++++++++++----- tests/test__boxhead.py | 14 ++++++++++++++ 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/great_tables/_boxhead.py b/great_tables/_boxhead.py index 926e78a9a..fa447c3f5 100644 --- a/great_tables/_boxhead.py +++ b/great_tables/_boxhead.py @@ -12,7 +12,10 @@ def cols_label( - self: GTSelf, cases: dict[str, str | BaseText] | None = None, **kwargs: str | BaseText + self: GTSelf, + cases: dict[str, str | BaseText] | None = None, + fn: Callable[[str], str] | None = None, + **kwargs: str | BaseText, ) -> GTSelf: """ Relabel one or more columns. @@ -31,7 +34,8 @@ def cols_label( cases A dictionary where the keys are column names and the values are the labels. Labels may use [`md()`](`great_tables.md`) or [`html()`](`great_tables.html`) helpers for formatting. - + fn + A function that accepts a column label as input and returns a transformed label as output. **kwargs Keyword arguments to specify column labels. Each keyword corresponds to a column name, with its value indicating the new label. @@ -76,8 +80,8 @@ def cols_label( `md("*Population*")` to make the label italicized. ```{python} - from great_tables import GT, md - from great_tables.data import countrypops + from great_tables import md + ( GT(countrypops_mini) @@ -89,11 +93,26 @@ def cols_label( ) ``` + Furthermore, we can provide a callable to the `fn=` parameter, which will be applied to all + specified labels. This is useful for making small adjustments to each label individually. + For example, in this case, we want the `country_name` label to be bold and the `year` label to + be italicized using markdown syntax. By passing `md()` to `fn=`, we avoid the need to wrap `md()` + around each label separately: + ```{python} + ( + GT(countrypops_mini) + .cols_label( + country_name="**Name**", + year="*Year*", + fn=md + ) + ) + ``` + We can also use unit notation to format the column labels. In this example, we'll use `{{cm^3 molecules^-1 s^-1}}` for part of the label for the `OH_k298` column. ```{python} - from great_tables import GT from great_tables.data import reactions import polars as pl @@ -129,6 +148,11 @@ def cols_label( # msg: "All column names provided must exist in the input `.data` table." _assert_list_is_subset(mod_columns, set_list=column_names) + if fn is not None: + new_cases = { + orig_colname: fn(given_colname) for orig_colname, given_colname in new_cases.items() + } + # Handle units syntax in labels (e.g., "Density ({{ppl / mi^2}})") new_kwargs = _handle_units_syntax(new_cases) diff --git a/tests/test__boxhead.py b/tests/test__boxhead.py index 505918516..cac9af5dd 100644 --- a/tests/test__boxhead.py +++ b/tests/test__boxhead.py @@ -17,6 +17,20 @@ def test_cols_label_relabel_columns(): assert modified_table._boxhead._get_column_labels() == ["Column 1", "Column 2"] +def test_cols_label_relabel_columns_fn(): + # Create a table with default column labels + df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + table = gt.GT(df) + + # Relabel the columns + modified_table = table.cols_label( + {"A": "Column 1"}, B="Column 2", fn=lambda x: "".join(x.split()) + ) + + # Check that the column labels have been updated + assert modified_table._boxhead._get_column_labels() == ["Column1", "Column2"] + + def test_cols_label_relabel_columns_with_markdown(): # Create a table with default column labels df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) From 2ee205a726621a013c6fbed48dcda12dbca0317e Mon Sep 17 00:00:00 2001 From: jrycw Date: Fri, 7 Mar 2025 15:41:22 +0800 Subject: [PATCH 08/15] Update type hint for `fn=` in `cols_label()` --- great_tables/_boxhead.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/great_tables/_boxhead.py b/great_tables/_boxhead.py index fa447c3f5..2fd4c563e 100644 --- a/great_tables/_boxhead.py +++ b/great_tables/_boxhead.py @@ -14,7 +14,7 @@ def cols_label( self: GTSelf, cases: dict[str, str | BaseText] | None = None, - fn: Callable[[str], str] | None = None, + fn: Callable[[str | BaseText], str | BaseText] | None = None, **kwargs: str | BaseText, ) -> GTSelf: """ From a06084909670244b2235b9f108da0d80970105fa Mon Sep 17 00:00:00 2001 From: jrycw Date: Sat, 8 Mar 2025 10:03:16 +0800 Subject: [PATCH 09/15] Revert "Add `fn=` parameter to `cols_label()` --- great_tables/_boxhead.py | 34 +++++----------------------------- tests/test__boxhead.py | 14 -------------- 2 files changed, 5 insertions(+), 43 deletions(-) diff --git a/great_tables/_boxhead.py b/great_tables/_boxhead.py index 2fd4c563e..926e78a9a 100644 --- a/great_tables/_boxhead.py +++ b/great_tables/_boxhead.py @@ -12,10 +12,7 @@ def cols_label( - self: GTSelf, - cases: dict[str, str | BaseText] | None = None, - fn: Callable[[str | BaseText], str | BaseText] | None = None, - **kwargs: str | BaseText, + self: GTSelf, cases: dict[str, str | BaseText] | None = None, **kwargs: str | BaseText ) -> GTSelf: """ Relabel one or more columns. @@ -34,8 +31,7 @@ def cols_label( cases A dictionary where the keys are column names and the values are the labels. Labels may use [`md()`](`great_tables.md`) or [`html()`](`great_tables.html`) helpers for formatting. - fn - A function that accepts a column label as input and returns a transformed label as output. + **kwargs Keyword arguments to specify column labels. Each keyword corresponds to a column name, with its value indicating the new label. @@ -80,8 +76,8 @@ def cols_label( `md("*Population*")` to make the label italicized. ```{python} - from great_tables import md - + from great_tables import GT, md + from great_tables.data import countrypops ( GT(countrypops_mini) @@ -93,26 +89,11 @@ def cols_label( ) ``` - Furthermore, we can provide a callable to the `fn=` parameter, which will be applied to all - specified labels. This is useful for making small adjustments to each label individually. - For example, in this case, we want the `country_name` label to be bold and the `year` label to - be italicized using markdown syntax. By passing `md()` to `fn=`, we avoid the need to wrap `md()` - around each label separately: - ```{python} - ( - GT(countrypops_mini) - .cols_label( - country_name="**Name**", - year="*Year*", - fn=md - ) - ) - ``` - We can also use unit notation to format the column labels. In this example, we'll use `{{cm^3 molecules^-1 s^-1}}` for part of the label for the `OH_k298` column. ```{python} + from great_tables import GT from great_tables.data import reactions import polars as pl @@ -148,11 +129,6 @@ def cols_label( # msg: "All column names provided must exist in the input `.data` table." _assert_list_is_subset(mod_columns, set_list=column_names) - if fn is not None: - new_cases = { - orig_colname: fn(given_colname) for orig_colname, given_colname in new_cases.items() - } - # Handle units syntax in labels (e.g., "Density ({{ppl / mi^2}})") new_kwargs = _handle_units_syntax(new_cases) diff --git a/tests/test__boxhead.py b/tests/test__boxhead.py index cac9af5dd..505918516 100644 --- a/tests/test__boxhead.py +++ b/tests/test__boxhead.py @@ -17,20 +17,6 @@ def test_cols_label_relabel_columns(): assert modified_table._boxhead._get_column_labels() == ["Column 1", "Column 2"] -def test_cols_label_relabel_columns_fn(): - # Create a table with default column labels - df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - table = gt.GT(df) - - # Relabel the columns - modified_table = table.cols_label( - {"A": "Column 1"}, B="Column 2", fn=lambda x: "".join(x.split()) - ) - - # Check that the column labels have been updated - assert modified_table._boxhead._get_column_labels() == ["Column1", "Column2"] - - def test_cols_label_relabel_columns_with_markdown(): # Create a table with default column labels df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) From 73c08f92898d1d05f0432c37d3f38cf0ff66ade6 Mon Sep 17 00:00:00 2001 From: jrycw Date: Sat, 8 Mar 2025 10:32:26 +0800 Subject: [PATCH 10/15] Avoid automatic unit syntax handling in `cols_label_with()` --- great_tables/_boxhead.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/great_tables/_boxhead.py b/great_tables/_boxhead.py index 926e78a9a..a92711ae7 100644 --- a/great_tables/_boxhead.py +++ b/great_tables/_boxhead.py @@ -207,10 +207,7 @@ def cols_label_with( new_cases = {col: fn(col) for col in sel_cols} - # Handle units syntax in labels (e.g., "Density ({{ppl / mi^2}})") - new_kwargs = _handle_units_syntax(new_cases) - - boxhead = self._boxhead._set_column_labels(new_kwargs) + boxhead = self._boxhead._set_column_labels(new_cases) return self._replace(_boxhead=boxhead) From b3564d275bd902cc93b8286862e7ea1d99b19da4 Mon Sep 17 00:00:00 2001 From: jrycw Date: Sun, 9 Mar 2025 18:42:14 +0800 Subject: [PATCH 11/15] Allow passing a Polars expression to `converter=` in `cols_label_with()` --- great_tables/_boxhead.py | 53 +++++++++++++++++++++++++++------------- tests/test__boxhead.py | 29 ++++++++++++++++++++-- 2 files changed, 63 insertions(+), 19 deletions(-) diff --git a/great_tables/_boxhead.py b/great_tables/_boxhead.py index a92711ae7..4a321892d 100644 --- a/great_tables/_boxhead.py +++ b/great_tables/_boxhead.py @@ -9,6 +9,7 @@ if TYPE_CHECKING: from ._types import GTSelf + from ._tbl_data import SelectExpr def cols_label( @@ -138,23 +139,33 @@ def cols_label( def cols_label_with( - self: GTSelf, columns: SelectExpr = None, fn: Callable[[str], str] | None = None + self: GTSelf, + columns: SelectExpr = None, + converter: Callable[[str], str] | SelectExpr | None = None, ) -> GTSelf: """ - Relabel one or more columns using a function. + Relabel one or more columns using a function or a Polars expression. The `cols_label_with()` function allows for modification of column labels through a supplied function. By default, the function will be invoked on all column labels but this can be limited - to a subset via the `columns` parameter. + to a subset via the `columns=` parameter. + + Alternatively, you can pass a Polars expression using its + [name](https://docs.pola.rs/api/python/stable/reference/expressions/name.html) attribute. + + :::{.callout-warning} + If a Polars expression is provided, the `columns=` parameter will be ignored, as **Great Tables** + can infer the original column labels from the expression. + ::: Parameters ---------- columns The columns to target. Can either be a single column name or a series of column names provided in a list. - fn - A function that accepts a column label as input and returns a transformed label as output. - + converter + A function that takes a column label as input and returns a transformed label. + Alternatively, you can use a Polars expression to describe the transformations. Returns ------- @@ -191,21 +202,29 @@ def cols_label_with( ``` """ - if fn is None: - raise ValueError("Must provide the `fn=` parameter to use `cols_label_with()`.") + if converter is None: + raise ValueError("Must provide the `converter=` parameter to use `cols_label_with()`.") - # Get the full list of column names for the data - column_names = self._boxhead._get_columns() + if isinstance(converter, Callable): + # Get the full list of column names for the data + column_names = self._boxhead._get_columns() - if isinstance(columns, str): - columns = [columns] - _assert_list_is_subset(columns, set_list=column_names) - elif columns is None: - columns = column_names + if isinstance(columns, str): + columns = [columns] + _assert_list_is_subset(columns, set_list=column_names) + elif columns is None: + columns = column_names - sel_cols = resolve_cols_c(data=self, expr=columns) + sel_cols = resolve_cols_c(data=self, expr=columns) + + new_cases = {col: converter(col) for col in sel_cols} - new_cases = {col: fn(col) for col in sel_cols} + else: # pl.col().expr.name.method() or selector.name.method() + expr = converter + frame = self._tbl_data + sel_cols = frame.select(expr.meta.undo_aliases()).columns + new_cols = frame.select(expr).columns + new_cases = dict(zip(sel_cols, new_cols)) boxhead = self._boxhead._set_column_labels(new_cases) diff --git a/tests/test__boxhead.py b/tests/test__boxhead.py index 505918516..1bf2c7fbf 100644 --- a/tests/test__boxhead.py +++ b/tests/test__boxhead.py @@ -62,12 +62,34 @@ def test_cols_label_with_relabel_columns(): table = gt.GT(df) # Relabel the columns - modified_table = table.cols_label_with(fn=str.lower) + modified_table = table.cols_label_with(converter=str.lower) # Check that the column labels have been updated assert modified_table._boxhead._get_column_labels() == ["a", "b"] +@pytest.mark.parametrize( + "converter", + [ + pl.col("my_col", "my_col2").name.to_uppercase(), + pl.col(["my_col", "my_col2"]).name.to_uppercase(), + cs.by_name("my_col", "my_col2").name.to_uppercase(), + cs.by_name(["my_col", "my_col2"]).name.to_uppercase(), + cs.starts_with("my").name.to_uppercase(), + ], +) +def test_cols_label_with_relabel_columns_polars(converter): + # Create a table with default column labels + df = pl.DataFrame({"my_col": [1, 2, 3], "my_col2": [4, 5, 6]}) + table = gt.GT(df) + + # Relabel the columns + modified_table = table.cols_label_with(converter=converter) + + # Check that the column labels have been updated + assert modified_table._boxhead._get_column_labels() == ["MY_COL", "MY_COL2"] + + def test_cols_label_with_relabel_columns_with_markdown(): # Create a table with default column labels df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) @@ -91,7 +113,10 @@ def test_cols_label_with_raises(): with pytest.raises(ValueError) as exc_info: table.cols_label_with() - assert "Must provide the `fn=` parameter to use `cols_label_with()`." in exc_info.value.args[0] + assert ( + "Must provide the `converter=` parameter to use `cols_label_with()`." + in exc_info.value.args[0] + ) def test_cols_align_default(): From 1d4b4e6fc415e8234d21d4f8f1781da68836b244 Mon Sep 17 00:00:00 2001 From: jrycw Date: Sun, 9 Mar 2025 18:47:08 +0800 Subject: [PATCH 12/15] Update the example for `cols_label_with()` --- great_tables/_boxhead.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/great_tables/_boxhead.py b/great_tables/_boxhead.py index 4a321892d..ff12344d8 100644 --- a/great_tables/_boxhead.py +++ b/great_tables/_boxhead.py @@ -191,7 +191,7 @@ def cols_label_with( We can pass `str.upper()` to the `columns` parameter to convert all column labels to uppercase. ```{python} - gt.cols_label_with(fn=str.upper) + gt.cols_label_with(converter=str.upper) ``` One useful use case is using `md()`, provided by **Great Tables**, to format column labels. From 25e661c515d53014d0a4f15ac93dcb90527a53ea Mon Sep 17 00:00:00 2001 From: jrycw Date: Sun, 9 Mar 2025 19:01:49 +0800 Subject: [PATCH 13/15] Add a test for `cols_label_with()` to ensure column order does not affect the result --- tests/test__boxhead.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test__boxhead.py b/tests/test__boxhead.py index 1bf2c7fbf..fe39f411c 100644 --- a/tests/test__boxhead.py +++ b/tests/test__boxhead.py @@ -76,6 +76,7 @@ def test_cols_label_with_relabel_columns(): cs.by_name("my_col", "my_col2").name.to_uppercase(), cs.by_name(["my_col", "my_col2"]).name.to_uppercase(), cs.starts_with("my").name.to_uppercase(), + pl.col("my_col2", "my_col").name.to_uppercase(), # test for column positions ], ) def test_cols_label_with_relabel_columns_polars(converter): From 50542f864fc27589c17eb854d814cec6455144fe Mon Sep 17 00:00:00 2001 From: jrycw Date: Sun, 9 Mar 2025 23:35:13 +0800 Subject: [PATCH 14/15] Support passing a list of Polars expressions to `converter=` in `cols_label_with()` --- great_tables/_boxhead.py | 83 +++++++++++++++++++++++++++++++--------- tests/test__boxhead.py | 3 ++ 2 files changed, 67 insertions(+), 19 deletions(-) diff --git a/great_tables/_boxhead.py b/great_tables/_boxhead.py index ff12344d8..0985da071 100644 --- a/great_tables/_boxhead.py +++ b/great_tables/_boxhead.py @@ -9,7 +9,9 @@ if TYPE_CHECKING: from ._types import GTSelf - from ._tbl_data import SelectExpr + from polars.selectors import _selector_proxy_ + + PlSelectExpr = _selector_proxy_ def cols_label( @@ -141,7 +143,7 @@ def cols_label( def cols_label_with( self: GTSelf, columns: SelectExpr = None, - converter: Callable[[str], str] | SelectExpr | None = None, + converter: Callable[[str], str] | PlSelectExpr | list[PlSelectExpr] | None = None, ) -> GTSelf: """ Relabel one or more columns using a function or a Polars expression. @@ -150,11 +152,12 @@ def cols_label_with( function. By default, the function will be invoked on all column labels but this can be limited to a subset via the `columns=` parameter. - Alternatively, you can pass a Polars expression using its - [name](https://docs.pola.rs/api/python/stable/reference/expressions/name.html) attribute. + Alternatively, you can utilize the + [name](https://docs.pola.rs/api/python/stable/reference/expressions/name.html) attribute of + Polars expressions. :::{.callout-warning} - If a Polars expression is provided, the `columns=` parameter will be ignored, as **Great Tables** + If Polars expressions are utilized, the `columns=` parameter will be ignored, as **Great Tables** can infer the original column labels from the expression. ::: @@ -165,7 +168,8 @@ def cols_label_with( provided in a list. converter A function that takes a column label as input and returns a transformed label. - Alternatively, you can use a Polars expression to describe the transformations. + Alternatively, you can use a Polars expression or a list of Polars expressions to describe + the transformations. Returns ------- @@ -180,25 +184,64 @@ def cols_label_with( Examples -------- - Let's use a subset of the `sp500` dataset to create a gt table. + Let's use a subset of the `sp500` dataset to demonstrate how to convert all column labels to + uppercase using `str.upper()`. + ```{python} + import polars as pl + from polars import selectors as cs + from great_tables import GT, md from great_tables.data import sp500 - gt = GT(sp500.head()) - gt - ``` + sp500_mini = sp500.head() - We can pass `str.upper()` to the `columns` parameter to convert all column labels to uppercase. - ```{python} - gt.cols_label_with(converter=str.upper) + GT(sp500_mini).cols_label_with(converter=str.upper) ``` One useful use case is using `md()`, provided by **Great Tables**, to format column labels. For example, the following code demonstrates how to make the `date` and `adj_close` column labels bold using markdown syntax. + ```{python} - gt.cols_label_with(["date", "adj_close"], lambda x: md(f"**{x}**")) + GT(sp500_mini).cols_label_with(["date", "adj_close"], lambda x: md(f"**{x}**")) + ``` + + Now, let's see how to use Polars expressions to relabel a table when the underlying dataframe + comes from Polars. For instance, you can convert all column labels to uppercase using + `pl.all().name.to_uppercase()`. + + ```{python} + sp500_mini_pl = pl.from_pandas(sp500_mini) + GT(sp500_mini_pl).cols_label_with(converter=pl.all().name.to_uppercase()) + ``` + + Polars selectors are also supported. The following example demonstrates how to add a "str_" + prefix to string columns using `cs.string().name.prefix("str_")`. + + ```{python} + GT(sp500_mini_pl).cols_label_with(converter=cs.string().name.prefix("str_")) + ``` + + Passing a list of Polars expressions is also supported. The following example shows how to + add a "str_" prefix to string columns using `cs.string().name.prefix("str_")` + and a "_num" suffix to numerical columns using `cs.numeric().name.suffix("_num")`. + + ```{python} + GT(sp500_mini_pl).cols_label_with( + converter=[cs.string().name.prefix("str_"), cs.numeric().name.suffix("_num")] + ) + ``` + + One final note: if a column is selected multiple times in different Polars expressions, + the last applied transformation takes precedence. For example, applying + `cs.all().name.to_uppercase()` followed by `cs.all().name.suffix("_all")` + will result in only the latter being used for relabeling. + + ```{python} + GT(sp500_mini_pl).cols_label_with( + converter=[cs.all().name.to_uppercase(), cs.all().name.suffix("_all")] + ) ``` """ @@ -219,12 +262,14 @@ def cols_label_with( new_cases = {col: converter(col) for col in sel_cols} - else: # pl.col().expr.name.method() or selector.name.method() - expr = converter + else: # pl.col().expr.name.method() or selector.name.method() or [...] frame = self._tbl_data - sel_cols = frame.select(expr.meta.undo_aliases()).columns - new_cols = frame.select(expr).columns - new_cases = dict(zip(sel_cols, new_cols)) + new_cases: dict[str, str] = {} + exprs = converter if isinstance(converter, list) else [converter] + for expr in exprs: + sel_cols: list[str] = frame.select(expr.meta.undo_aliases()).columns + new_cols: list[str] = frame.select(expr).columns + new_cases |= dict(zip(sel_cols, new_cols)) boxhead = self._boxhead._set_column_labels(new_cases) diff --git a/tests/test__boxhead.py b/tests/test__boxhead.py index fe39f411c..e681e856c 100644 --- a/tests/test__boxhead.py +++ b/tests/test__boxhead.py @@ -76,6 +76,9 @@ def test_cols_label_with_relabel_columns(): cs.by_name("my_col", "my_col2").name.to_uppercase(), cs.by_name(["my_col", "my_col2"]).name.to_uppercase(), cs.starts_with("my").name.to_uppercase(), + [cs.by_name("my_col").name.to_uppercase(), cs.by_name("my_col2").name.to_uppercase()], + [pl.col("my_col").name.to_uppercase(), cs.by_name("my_col2").name.to_uppercase()], + [cs.all().name.suffix("_suffix"), cs.all().name.to_uppercase()], # test for last one wins pl.col("my_col2", "my_col").name.to_uppercase(), # test for column positions ], ) From ce5d5f1f46987af09f0b22f053caac7e7d95a51a Mon Sep 17 00:00:00 2001 From: jrycw Date: Sun, 9 Mar 2025 23:40:27 +0800 Subject: [PATCH 15/15] Simplify `test_handle_units_syntax()` --- tests/test_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 2f568ad7a..8f3da674d 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -2,7 +2,7 @@ import pytest -from great_tables import GT, exibble +from great_tables import GT, exibble, md from great_tables._tbl_data import is_na from great_tables._utils import ( _assert_list_is_subset, @@ -228,9 +228,9 @@ def test_is_valid_http_schema(url: str): def test_handle_units_syntax(): - from great_tables._text import BaseText, Text + from great_tables._text import BaseText - new_kwargs = _handle_units_syntax({"column_label_1": "abc", "column_label_2": Text(text="xyz")}) + new_kwargs = _handle_units_syntax({"column_label_1": "abc", "column_label_2": md(text="xyz")}) assert all(isinstance(v, (str, BaseText)) for v in new_kwargs.values())