From 19838669b103f34a98061f251f05e27b83722f3c Mon Sep 17 00:00:00 2001
From: Pedro Diogo <pedro.f.diogo@tecnico.ulisboa.pt>
Date: Tue, 3 Jun 2025 19:54:49 +0100
Subject: [PATCH 1/5] feature #49580: support new-style float_format string in
 to_csv

feat(to_csv): support new-style float_format strings using str.format

Detect and process new-style format strings (e.g., "{:,.2f}") in the
float_format parameter of to_csv.

- Check if float_format is a string and matches new-style pattern
- Convert it to a callable (e.g., lambda x: float_format.format(x))
- Ensure compatibility with NaN values and mixed data types
- Improves formatting output for floats when exporting to CSV

Example:
df = pd.DataFrame([1234.56789, 9876.54321])
df.to_csv(float_format="{:,.2f}")  # now outputs formatted values like
1,234.57

Co-authored-by: Pedro Santos <pedro.filipe.santos@tecnico.ulisboa.pt>
---
 pandas/_libs/tslibs/offsets.pyx               |   8 +-
 pandas/io/formats/format.py                   |  30 +++-
 .../tests/io/formats/test_csv_benchmarks.py   |  29 ++++
 pandas/tests/io/formats/test_to_csv.py        | 141 ++++++++++++++++++
 4 files changed, 203 insertions(+), 5 deletions(-)
 create mode 100644 pandas/tests/io/formats/test_csv_benchmarks.py

diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
index a16964435ef50..5ffa363ea3ea8 100644
--- a/pandas/_libs/tslibs/offsets.pyx
+++ b/pandas/_libs/tslibs/offsets.pyx
@@ -5108,8 +5108,8 @@ def _warn_about_deprecated_aliases(name: str, is_period: bool) -> str:
         warnings.warn(
             f"\'{name}\' is deprecated and will be removed "
             f"in a future version, please use "
-            f"\'{c_PERIOD_AND_OFFSET_DEPR_FREQSTR.get(name)}\'"
-            f" instead.",
+            f"\'{c_PERIOD_AND_OFFSET_DEPR_FREQSTR.get(name)}\' "
+            f"instead.",
             FutureWarning,
             stacklevel=find_stack_level(),
             )
@@ -5122,8 +5122,8 @@ def _warn_about_deprecated_aliases(name: str, is_period: bool) -> str:
             warnings.warn(
                 f"\'{name}\' is deprecated and will be removed "
                 f"in a future version, please use "
-                f"\'{_name}\'"
-                f" instead.",
+                f"\'{_name}\' "
+                f"instead.",
                 FutureWarning,
                 stacklevel=find_stack_level(),
                 )
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index fb799361fea67..4046ffd21af3b 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -455,7 +455,7 @@ def __init__(
         self.na_rep = na_rep
         self.formatters = self._initialize_formatters(formatters)
         self.justify = self._initialize_justify(justify)
-        self.float_format = float_format
+        self.float_format = self._validate_float_format(float_format)
         self.sparsify = self._initialize_sparsify(sparsify)
         self.show_index_names = index_names
         self.decimal = decimal
@@ -850,6 +850,34 @@ def _get_column_name_list(self) -> list[Hashable]:
             names.append("" if columns.name is None else columns.name)
         return names
 
+    def _validate_float_format(
+        self, fmt: FloatFormatType | None
+    ) -> FloatFormatType | None:
+        """
+        Validates and processes the float_format argument.
+        Converts new-style format strings to callables.
+        """
+
+        if fmt is None:
+            return None
+
+        if callable(fmt):
+            return fmt
+
+        if isinstance(fmt, str):
+            if "%" in fmt:
+                # Keeps old-style format strings as they are (C code handles them)
+                return fmt
+            else:
+                try:
+                    _ = fmt.format(1.0)  # Test with an arbitrary float
+                    return lambda x: fmt.format(x)
+                except (ValueError, KeyError, IndexError) as e:
+                    raise ValueError(f"Invalid new-style format string {fmt!r}") from e
+
+        # If fmt is neither None, nor callable, nor a successfully processed string,
+        raise ValueError("float_format must be a string or callable")
+
 
 class DataFrameRenderer:
     """Class for creating dataframe output in multiple formats.
diff --git a/pandas/tests/io/formats/test_csv_benchmarks.py b/pandas/tests/io/formats/test_csv_benchmarks.py
new file mode 100644
index 0000000000000..420faba96516e
--- /dev/null
+++ b/pandas/tests/io/formats/test_csv_benchmarks.py
@@ -0,0 +1,29 @@
+import numpy as np
+import pytest
+
+from pandas import DataFrame
+
+pytestmark = pytest.mark.usefixtures("benchmark")
+
+# Create a single generator instance for all tests
+rng = np.random.default_rng(seed=42)
+
+
+def test_benchmark_old_style_format(benchmark):
+    df = DataFrame(rng.random((1000, 1000)))
+    benchmark(lambda: df.to_csv(float_format="%.6f"))
+
+
+def test_benchmark_new_style_format(benchmark):
+    df = DataFrame(rng.random((1000, 1000)))
+    benchmark(lambda: df.to_csv(float_format="{:.6f}"))
+
+
+def test_benchmark_new_style_thousands(benchmark):
+    df = DataFrame(rng.random((1000, 1000)))
+    benchmark(lambda: df.to_csv(float_format="{:,.2f}"))
+
+
+def test_benchmark_callable_format(benchmark):
+    df = DataFrame(rng.random((1000, 1000)))
+    benchmark(lambda: df.to_csv(float_format=lambda x: f"{x:.6f}"))
diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index 6d762fdeb8d79..afe8c23725392 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -1,6 +1,7 @@
 import io
 import os
 import sys
+import warnings
 from zipfile import ZipFile
 
 from _csv import Error
@@ -741,3 +742,143 @@ def test_to_csv_iterative_compression_buffer(compression):
             pd.read_csv(buffer, compression=compression, index_col=0), df
         )
         assert not buffer.closed
+
+
+def test_new_style_float_format_basic():
+    df = DataFrame({"A": [1234.56789, 9876.54321]})
+    result = df.to_csv(float_format="{:.2f}")
+    expected = ",A\n0,1234.57\n1,9876.54\n"
+    assert result == expected
+
+
+def test_new_style_float_format_thousands():
+    df = DataFrame({"A": [1234.56789, 9876.54321]})
+    result = df.to_csv(float_format="{:,.2f}")
+    expected = ',A\n0,"1,234.57"\n1,"9,876.54"\n'
+    assert result == expected
+
+
+def test_new_style_scientific_format():
+    df = DataFrame({"A": [0.000123, 0.000456]})
+    result = df.to_csv(float_format="{:.2e}")
+    expected = ",A\n0,1.23e-04\n1,4.56e-04\n"
+    assert result == expected
+
+
+def test_new_style_with_nan():
+    df = DataFrame({"A": [1.23, np.nan, 4.56]})
+    result = df.to_csv(float_format="{:.2f}", na_rep="NA")
+    expected = ",A\n0,1.23\n1,NA\n2,4.56\n"
+    assert result == expected
+
+
+def test_new_style_with_mixed_types():
+    df = DataFrame({"A": [1.23, 4.56], "B": ["x", "y"]})
+    result = df.to_csv(float_format="{:.2f}")
+    expected = ",A,B\n0,1.23,x\n1,4.56,y\n"
+    assert result == expected
+
+
+def test_new_style_with_mixed_types_in_column():
+    df = DataFrame({"A": [1.23, "text", 4.56]})
+    with warnings.catch_warnings(record=True):
+        warnings.simplefilter("always")
+        result = df.to_csv(float_format="{:.2f}")
+
+    expected = ",A\n0,1.23\n1,text\n2,4.56\n"
+    assert result == expected
+
+
+def test_invalid_new_style_format_missing_brace():
+    df = DataFrame({"A": [1.23]})
+    with pytest.raises(ValueError, match="Invalid new-style format string '{:.2f"):
+        df.to_csv(float_format="{:.2f")
+
+
+def test_invalid_new_style_format_specifier():
+    df = DataFrame({"A": [1.23]})
+    with pytest.raises(ValueError, match="Invalid new-style format string '{:.2z}'"):
+        df.to_csv(float_format="{:.2z}")
+
+
+def test_old_style_format_compatibility():
+    df = DataFrame({"A": [1234.56789, 9876.54321]})
+    result = df.to_csv(float_format="%.2f")
+    expected = ",A\n0,1234.57\n1,9876.54\n"
+    assert result == expected
+
+
+def test_callable_float_format_compatibility():
+    df = DataFrame({"A": [1234.56789, 9876.54321]})
+    result = df.to_csv(float_format=lambda x: f"{x:,.2f}")
+    expected = ',A\n0,"1,234.57"\n1,"9,876.54"\n'
+    assert result == expected
+
+
+def test_no_float_format():
+    df = DataFrame({"A": [1.23, 4.56]})
+    result = df.to_csv(float_format=None)
+    expected = ",A\n0,1.23\n1,4.56\n"
+    assert result == expected
+
+
+def test_large_numbers():
+    df = DataFrame({"A": [1e308, 2e308]})
+    result = df.to_csv(float_format="{:.2e}")
+    expected = ",A\n0,1.00e+308\n1,inf\n"
+    assert result == expected
+
+
+def test_zero_and_negative():
+    df = DataFrame({"A": [0.0, -1.23456]})
+    result = df.to_csv(float_format="{:+.2f}")
+    expected = ",A\n0,+0.00\n1,-1.23\n"
+    assert result == expected
+
+
+def test_unicode_format():
+    df = DataFrame({"A": [1.23, 4.56]})
+    result = df.to_csv(float_format="{:.2f}€", encoding="utf-8")
+    expected = ",A\n0,1.23€\n1,4.56€\n"
+    assert result == expected
+
+
+def test_empty_dataframe():
+    df = DataFrame({"A": []})
+    result = df.to_csv(float_format="{:.2f}")
+    expected = ",A\n"
+    assert result == expected
+
+
+def test_multi_column_float():
+    df = DataFrame({"A": [1.23, 4.56], "B": [7.89, 0.12]})
+    result = df.to_csv(float_format="{:.2f}")
+    expected = ",A,B\n0,1.23,7.89\n1,4.56,0.12\n"
+    assert result == expected
+
+
+def test_invalid_float_format_type():
+    df = DataFrame({"A": [1.23]})
+    with pytest.raises(ValueError, match="float_format must be a string or callable"):
+        df.to_csv(float_format=123)
+
+
+def test_new_style_with_inf():
+    df = DataFrame({"A": [1.23, np.inf, -np.inf]})
+    result = df.to_csv(float_format="{:.2f}", na_rep="NA")
+    expected = ",A\n0,1.23\n1,inf\n2,-inf\n"
+    assert result == expected
+
+
+def test_new_style_with_precision_edge():
+    df = DataFrame({"A": [1.23456789]})
+    result = df.to_csv(float_format="{:.10f}")
+    expected = ",A\n0,1.2345678900\n"
+    assert result == expected
+
+
+def test_new_style_with_template():
+    df = DataFrame({"A": [1234.56789]})
+    result = df.to_csv(float_format="Value: {:,.2f}")
+    expected = ',A\n0,"Value: 1,234.57"\n'
+    assert result == expected

From 7eccc899d8d9c34cbf6ec19fcaaa6486d0d6c57c Mon Sep 17 00:00:00 2001
From: Pedro Diogo <pedro.f.diogo@tecnico.ulisboa.pt>
Date: Fri, 20 Jun 2025 17:48:51 +0100
Subject: [PATCH 2/5] update benchmark test

---
 asv_bench/benchmarks/io/csv.py                | 17 +++++++++++
 .../tests/io/formats/test_csv_benchmarks.py   | 29 -------------------
 2 files changed, 17 insertions(+), 29 deletions(-)
 delete mode 100644 pandas/tests/io/formats/test_csv_benchmarks.py

diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py
index 3a15f754ae523..8b68568783ad7 100644
--- a/asv_bench/benchmarks/io/csv.py
+++ b/asv_bench/benchmarks/io/csv.py
@@ -52,6 +52,23 @@ def setup(self, kind):
     def time_frame(self, kind):
         self.df.to_csv(self.fname)
 
+class ToCSVFloatFormatVariants(BaseIO):
+    fname = "__test__.csv"
+
+    def setup(self):
+        self.df = DataFrame(np.random.default_rng(seed=42).random((1000, 1000)))
+
+    def time_old_style_percent_format(self):
+        self.df.to_csv(self.fname, float_format="%.6f")
+
+    def time_new_style_brace_format(self):
+        self.df.to_csv(self.fname, float_format="{:.6f}")
+
+    def time_new_style_thousands_format(self):
+        self.df.to_csv(self.fname, float_format="{:,.2f}")
+
+    def time_callable_format(self):
+        self.df.to_csv(self.fname, float_format=lambda x: f"{x:.6f}")
 
 class ToCSVMultiIndexUnusedLevels(BaseIO):
     fname = "__test__.csv"
diff --git a/pandas/tests/io/formats/test_csv_benchmarks.py b/pandas/tests/io/formats/test_csv_benchmarks.py
deleted file mode 100644
index 420faba96516e..0000000000000
--- a/pandas/tests/io/formats/test_csv_benchmarks.py
+++ /dev/null
@@ -1,29 +0,0 @@
-import numpy as np
-import pytest
-
-from pandas import DataFrame
-
-pytestmark = pytest.mark.usefixtures("benchmark")
-
-# Create a single generator instance for all tests
-rng = np.random.default_rng(seed=42)
-
-
-def test_benchmark_old_style_format(benchmark):
-    df = DataFrame(rng.random((1000, 1000)))
-    benchmark(lambda: df.to_csv(float_format="%.6f"))
-
-
-def test_benchmark_new_style_format(benchmark):
-    df = DataFrame(rng.random((1000, 1000)))
-    benchmark(lambda: df.to_csv(float_format="{:.6f}"))
-
-
-def test_benchmark_new_style_thousands(benchmark):
-    df = DataFrame(rng.random((1000, 1000)))
-    benchmark(lambda: df.to_csv(float_format="{:,.2f}"))
-
-
-def test_benchmark_callable_format(benchmark):
-    df = DataFrame(rng.random((1000, 1000)))
-    benchmark(lambda: df.to_csv(float_format=lambda x: f"{x:.6f}"))

From c81352abb8b4f520f789b23b925a2bd6130a8e1e Mon Sep 17 00:00:00 2001
From: Pedro Diogo <pedro.f.diogo@tecnico.ulisboa.pt>
Date: Fri, 20 Jun 2025 17:58:03 +0100
Subject: [PATCH 3/5] fixed pre commit

---
 asv_bench/benchmarks/io/csv.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py
index 8b68568783ad7..9ee867260aa39 100644
--- a/asv_bench/benchmarks/io/csv.py
+++ b/asv_bench/benchmarks/io/csv.py
@@ -52,6 +52,7 @@ def setup(self, kind):
     def time_frame(self, kind):
         self.df.to_csv(self.fname)
 
+
 class ToCSVFloatFormatVariants(BaseIO):
     fname = "__test__.csv"
 
@@ -70,6 +71,7 @@ def time_new_style_thousands_format(self):
     def time_callable_format(self):
         self.df.to_csv(self.fname, float_format=lambda x: f"{x:.6f}")
 
+
 class ToCSVMultiIndexUnusedLevels(BaseIO):
     fname = "__test__.csv"
 

From cbc096f4019bae9bc8236bc1051368a1d654e3ad Mon Sep 17 00:00:00 2001
From: Pedro Diogo <pedro.f.diogo@tecnico.ulisboa.pt>
Date: Fri, 20 Jun 2025 19:07:12 +0100
Subject: [PATCH 4/5] fixed offsets.pyx

---
 pandas/_libs/tslibs/offsets.pyx | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
index 5ffa363ea3ea8..a16964435ef50 100644
--- a/pandas/_libs/tslibs/offsets.pyx
+++ b/pandas/_libs/tslibs/offsets.pyx
@@ -5108,8 +5108,8 @@ def _warn_about_deprecated_aliases(name: str, is_period: bool) -> str:
         warnings.warn(
             f"\'{name}\' is deprecated and will be removed "
             f"in a future version, please use "
-            f"\'{c_PERIOD_AND_OFFSET_DEPR_FREQSTR.get(name)}\' "
-            f"instead.",
+            f"\'{c_PERIOD_AND_OFFSET_DEPR_FREQSTR.get(name)}\'"
+            f" instead.",
             FutureWarning,
             stacklevel=find_stack_level(),
             )
@@ -5122,8 +5122,8 @@ def _warn_about_deprecated_aliases(name: str, is_period: bool) -> str:
             warnings.warn(
                 f"\'{name}\' is deprecated and will be removed "
                 f"in a future version, please use "
-                f"\'{_name}\' "
-                f"instead.",
+                f"\'{_name}\'"
+                f" instead.",
                 FutureWarning,
                 stacklevel=find_stack_level(),
                 )

From fec8e4a11adaa24a79e3db3db7dba9f7c86c49b0 Mon Sep 17 00:00:00 2001
From: Pedro Diogo <pedro.f.diogo@tecnico.ulisboa.pt>
Date: Sat, 21 Jun 2025 16:36:26 +0100
Subject: [PATCH 5/5] fixed tests to windows

---
 pandas/tests/io/formats/test_to_csv.py | 34 +++++++++++++-------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index afe8c23725392..958713c29a395 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -746,35 +746,35 @@ def test_to_csv_iterative_compression_buffer(compression):
 
 def test_new_style_float_format_basic():
     df = DataFrame({"A": [1234.56789, 9876.54321]})
-    result = df.to_csv(float_format="{:.2f}")
+    result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
     expected = ",A\n0,1234.57\n1,9876.54\n"
     assert result == expected
 
 
 def test_new_style_float_format_thousands():
     df = DataFrame({"A": [1234.56789, 9876.54321]})
-    result = df.to_csv(float_format="{:,.2f}")
+    result = df.to_csv(float_format="{:,.2f}", lineterminator="\n")
     expected = ',A\n0,"1,234.57"\n1,"9,876.54"\n'
     assert result == expected
 
 
 def test_new_style_scientific_format():
     df = DataFrame({"A": [0.000123, 0.000456]})
-    result = df.to_csv(float_format="{:.2e}")
+    result = df.to_csv(float_format="{:.2e}", lineterminator="\n")
     expected = ",A\n0,1.23e-04\n1,4.56e-04\n"
     assert result == expected
 
 
 def test_new_style_with_nan():
     df = DataFrame({"A": [1.23, np.nan, 4.56]})
-    result = df.to_csv(float_format="{:.2f}", na_rep="NA")
+    result = df.to_csv(float_format="{:.2f}", na_rep="NA", lineterminator="\n")
     expected = ",A\n0,1.23\n1,NA\n2,4.56\n"
     assert result == expected
 
 
 def test_new_style_with_mixed_types():
     df = DataFrame({"A": [1.23, 4.56], "B": ["x", "y"]})
-    result = df.to_csv(float_format="{:.2f}")
+    result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
     expected = ",A,B\n0,1.23,x\n1,4.56,y\n"
     assert result == expected
 
@@ -783,7 +783,7 @@ def test_new_style_with_mixed_types_in_column():
     df = DataFrame({"A": [1.23, "text", 4.56]})
     with warnings.catch_warnings(record=True):
         warnings.simplefilter("always")
-        result = df.to_csv(float_format="{:.2f}")
+        result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
 
     expected = ",A\n0,1.23\n1,text\n2,4.56\n"
     assert result == expected
@@ -803,56 +803,56 @@ def test_invalid_new_style_format_specifier():
 
 def test_old_style_format_compatibility():
     df = DataFrame({"A": [1234.56789, 9876.54321]})
-    result = df.to_csv(float_format="%.2f")
+    result = df.to_csv(float_format="%.2f", lineterminator="\n")
     expected = ",A\n0,1234.57\n1,9876.54\n"
     assert result == expected
 
 
 def test_callable_float_format_compatibility():
     df = DataFrame({"A": [1234.56789, 9876.54321]})
-    result = df.to_csv(float_format=lambda x: f"{x:,.2f}")
+    result = df.to_csv(float_format=lambda x: f"{x:,.2f}", lineterminator="\n")
     expected = ',A\n0,"1,234.57"\n1,"9,876.54"\n'
     assert result == expected
 
 
 def test_no_float_format():
     df = DataFrame({"A": [1.23, 4.56]})
-    result = df.to_csv(float_format=None)
+    result = df.to_csv(float_format=None, lineterminator="\n")
     expected = ",A\n0,1.23\n1,4.56\n"
     assert result == expected
 
 
 def test_large_numbers():
     df = DataFrame({"A": [1e308, 2e308]})
-    result = df.to_csv(float_format="{:.2e}")
+    result = df.to_csv(float_format="{:.2e}", lineterminator="\n")
     expected = ",A\n0,1.00e+308\n1,inf\n"
     assert result == expected
 
 
 def test_zero_and_negative():
     df = DataFrame({"A": [0.0, -1.23456]})
-    result = df.to_csv(float_format="{:+.2f}")
+    result = df.to_csv(float_format="{:+.2f}", lineterminator="\n")
     expected = ",A\n0,+0.00\n1,-1.23\n"
     assert result == expected
 
 
 def test_unicode_format():
     df = DataFrame({"A": [1.23, 4.56]})
-    result = df.to_csv(float_format="{:.2f}€", encoding="utf-8")
+    result = df.to_csv(float_format="{:.2f}€", encoding="utf-8", lineterminator="\n")
     expected = ",A\n0,1.23€\n1,4.56€\n"
     assert result == expected
 
 
 def test_empty_dataframe():
     df = DataFrame({"A": []})
-    result = df.to_csv(float_format="{:.2f}")
+    result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
     expected = ",A\n"
     assert result == expected
 
 
 def test_multi_column_float():
     df = DataFrame({"A": [1.23, 4.56], "B": [7.89, 0.12]})
-    result = df.to_csv(float_format="{:.2f}")
+    result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
     expected = ",A,B\n0,1.23,7.89\n1,4.56,0.12\n"
     assert result == expected
 
@@ -865,20 +865,20 @@ def test_invalid_float_format_type():
 
 def test_new_style_with_inf():
     df = DataFrame({"A": [1.23, np.inf, -np.inf]})
-    result = df.to_csv(float_format="{:.2f}", na_rep="NA")
+    result = df.to_csv(float_format="{:.2f}", na_rep="NA", lineterminator="\n")
     expected = ",A\n0,1.23\n1,inf\n2,-inf\n"
     assert result == expected
 
 
 def test_new_style_with_precision_edge():
     df = DataFrame({"A": [1.23456789]})
-    result = df.to_csv(float_format="{:.10f}")
+    result = df.to_csv(float_format="{:.10f}", lineterminator="\n")
     expected = ",A\n0,1.2345678900\n"
     assert result == expected
 
 
 def test_new_style_with_template():
     df = DataFrame({"A": [1234.56789]})
-    result = df.to_csv(float_format="Value: {:,.2f}")
+    result = df.to_csv(float_format="Value: {:,.2f}", lineterminator="\n")
     expected = ',A\n0,"Value: 1,234.57"\n'
     assert result == expected