feature #49580: support new-style float_format string in to_csv (#61650)

pedromfdiogo · Pedro-Santos04 · mroeschke · web-flow · commit d5f97ed21a87 · 2025-07-08T08:48:22.000-07:00
* feature #49580: support new-style float_format string in to_csv feat(to_csv): support new-style float_format strings using str.format Detect and process new-style format strings (e.g., "{:,.2f}") in the float_format parameter of to_csv. - Check if float_format is a string and matches new-style pattern - Convert it to a callable (e.g., lambda x: float_format.format(x)) - Ensure compatibility with NaN values and mixed data types - Improves formatting output for floats when exporting to CSV Example: df = pd.DataFrame([1234.56789, 9876.54321]) df.to_csv(float_format="{:,.2f}") # now outputs formatted values like 1,234.57 Co-authored-by: Pedro Santos <pedro.filipe.santos@tecnico.ulisboa.pt> * update benchmark test * fixed pre commit * fixed offsets.pyx * fixed tests to windows * Update pandas/io/formats/format.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> * Update pandas/io/formats/format.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> * Update pandas/io/formats/format.py Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> * updated v3.0.0.rst and fixed tm.assert_produces_warning * fixed test_new_style_with_mixed_types_in_column added match to assert_produces_warning * Update doc/source/whatsnew/v3.0.0.rst (removed reference to this PR) Co-authored-by: Simon Hawkins <simonjayhawkins@gmail.com> * fixed pre-commit * removed tm.assert_produces_warning * fixed space * fixed pre-commit --------- Co-authored-by: Pedro Santos <pedro.filipe.santos@tecnico.ulisboa.pt> Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Co-authored-by: Simon Hawkins <simonjayhawkins@gmail.com>
diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py
@@ -53,6 +53,25 @@ def time_frame(self, kind):
         self.df.to_csv(self.fname)
 
 
+class ToCSVFloatFormatVariants(BaseIO):
+    fname = "__test__.csv"
+
+    def setup(self):
+        self.df = DataFrame(np.random.default_rng(seed=42).random((1000, 1000)))
+
+    def time_old_style_percent_format(self):
+        self.df.to_csv(self.fname, float_format="%.6f")
+
+    def time_new_style_brace_format(self):
+        self.df.to_csv(self.fname, float_format="{:.6f}")
+
+    def time_new_style_thousands_format(self):
+        self.df.to_csv(self.fname, float_format="{:,.2f}")
+
+    def time_callable_format(self):
+        self.df.to_csv(self.fname, float_format=lambda x: f"{x:.6f}")
+
+
 class ToCSVMultiIndexUnusedLevels(BaseIO):
     fname = "__test__.csv"
 
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -76,6 +76,7 @@ Other enhancements
 - :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`)
 - :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`)
 - :meth:`DataFrame.iloc` and :meth:`Series.iloc` now support boolean masks in ``__getitem__`` for more consistent indexing behavior (:issue:`60994`)
+- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support Python's new-style format strings (e.g., ``"{:.6f}"``) for the ``float_format`` parameter, in addition to old-style ``%`` format strings and callables. This allows for more flexible and modern formatting of floating point numbers when exporting to CSV. (:issue:`49580`)
 - :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
 - :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
 - :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
@@ -454,7 +454,7 @@ def __init__(
         self.na_rep = na_rep
         self.formatters = self._initialize_formatters(formatters)
         self.justify = self._initialize_justify(justify)
-        self.float_format = float_format
+        self.float_format = self._validate_float_format(float_format)
         self.sparsify = self._initialize_sparsify(sparsify)
         self.show_index_names = index_names
         self.decimal = decimal
@@ -849,6 +849,29 @@ def _get_column_name_list(self) -> list[Hashable]:
             names.append("" if columns.name is None else columns.name)
         return names
 
+    def _validate_float_format(
+        self, fmt: FloatFormatType | None
+    ) -> FloatFormatType | None:
+        """
+        Validates and processes the float_format argument.
+        Converts new-style format strings to callables.
+        """
+        if fmt is None or callable(fmt):
+            return fmt
+
+        if isinstance(fmt, str):
+            if "%" in fmt:
+                # Keeps old-style format strings as they are (C code handles them)
+                return fmt
+            else:
+                try:
+                    _ = fmt.format(1.0)  # Test with an arbitrary float
+                    return fmt.format
+                except (ValueError, KeyError, IndexError) as e:
+                    raise ValueError(f"Invalid new-style format string {fmt!r}") from e
+
+        raise ValueError("float_format must be a string or callable")
+
 
 class DataFrameRenderer:
     """Class for creating dataframe output in multiple formats.
diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
@@ -741,3 +741,140 @@ def test_to_csv_iterative_compression_buffer(compression):
             pd.read_csv(buffer, compression=compression, index_col=0), df
         )
         assert not buffer.closed
+
+
+def test_new_style_float_format_basic():
+    df = DataFrame({"A": [1234.56789, 9876.54321]})
+    result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
+    expected = ",A\n0,1234.57\n1,9876.54\n"
+    assert result == expected
+
+
+def test_new_style_float_format_thousands():
+    df = DataFrame({"A": [1234.56789, 9876.54321]})
+    result = df.to_csv(float_format="{:,.2f}", lineterminator="\n")
+    expected = ',A\n0,"1,234.57"\n1,"9,876.54"\n'
+    assert result == expected
+
+
+def test_new_style_scientific_format():
+    df = DataFrame({"A": [0.000123, 0.000456]})
+    result = df.to_csv(float_format="{:.2e}", lineterminator="\n")
+    expected = ",A\n0,1.23e-04\n1,4.56e-04\n"
+    assert result == expected
+
+
+def test_new_style_with_nan():
+    df = DataFrame({"A": [1.23, np.nan, 4.56]})
+    result = df.to_csv(float_format="{:.2f}", na_rep="NA", lineterminator="\n")
+    expected = ",A\n0,1.23\n1,NA\n2,4.56\n"
+    assert result == expected
+
+
+def test_new_style_with_mixed_types():
+    df = DataFrame({"A": [1.23, 4.56], "B": ["x", "y"]})
+    result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
+    expected = ",A,B\n0,1.23,x\n1,4.56,y\n"
+    assert result == expected
+
+
+def test_new_style_with_mixed_types_in_column():
+    df = DataFrame({"A": [1.23, "text", 4.56]})
+    result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
+    expected = ",A\n0,1.23\n1,text\n2,4.56\n"
+    assert result == expected
+
+
+def test_invalid_new_style_format_missing_brace():
+    df = DataFrame({"A": [1.23]})
+    with pytest.raises(ValueError, match="Invalid new-style format string '{:.2f"):
+        df.to_csv(float_format="{:.2f")
+
+
+def test_invalid_new_style_format_specifier():
+    df = DataFrame({"A": [1.23]})
+    with pytest.raises(ValueError, match="Invalid new-style format string '{:.2z}'"):
+        df.to_csv(float_format="{:.2z}")
+
+
+def test_old_style_format_compatibility():
+    df = DataFrame({"A": [1234.56789, 9876.54321]})
+    result = df.to_csv(float_format="%.2f", lineterminator="\n")
+    expected = ",A\n0,1234.57\n1,9876.54\n"
+    assert result == expected
+
+
+def test_callable_float_format_compatibility():
+    df = DataFrame({"A": [1234.56789, 9876.54321]})
+    result = df.to_csv(float_format=lambda x: f"{x:,.2f}", lineterminator="\n")
+    expected = ',A\n0,"1,234.57"\n1,"9,876.54"\n'
+    assert result == expected
+
+
+def test_no_float_format():
+    df = DataFrame({"A": [1.23, 4.56]})
+    result = df.to_csv(float_format=None, lineterminator="\n")
+    expected = ",A\n0,1.23\n1,4.56\n"
+    assert result == expected
+
+
+def test_large_numbers():
+    df = DataFrame({"A": [1e308, 2e308]})
+    result = df.to_csv(float_format="{:.2e}", lineterminator="\n")
+    expected = ",A\n0,1.00e+308\n1,inf\n"
+    assert result == expected
+
+
+def test_zero_and_negative():
+    df = DataFrame({"A": [0.0, -1.23456]})
+    result = df.to_csv(float_format="{:+.2f}", lineterminator="\n")
+    expected = ",A\n0,+0.00\n1,-1.23\n"
+    assert result == expected
+
+
+def test_unicode_format():
+    df = DataFrame({"A": [1.23, 4.56]})
+    result = df.to_csv(float_format="{:.2f}€", encoding="utf-8", lineterminator="\n")
+    expected = ",A\n0,1.23€\n1,4.56€\n"
+    assert result == expected
+
+
+def test_empty_dataframe():
+    df = DataFrame({"A": []})
+    result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
+    expected = ",A\n"
+    assert result == expected
+
+
+def test_multi_column_float():
+    df = DataFrame({"A": [1.23, 4.56], "B": [7.89, 0.12]})
+    result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
+    expected = ",A,B\n0,1.23,7.89\n1,4.56,0.12\n"
+    assert result == expected
+
+
+def test_invalid_float_format_type():
+    df = DataFrame({"A": [1.23]})
+    with pytest.raises(ValueError, match="float_format must be a string or callable"):
+        df.to_csv(float_format=123)
+
+
+def test_new_style_with_inf():
+    df = DataFrame({"A": [1.23, np.inf, -np.inf]})
+    result = df.to_csv(float_format="{:.2f}", na_rep="NA", lineterminator="\n")
+    expected = ",A\n0,1.23\n1,inf\n2,-inf\n"
+    assert result == expected
+
+
+def test_new_style_with_precision_edge():
+    df = DataFrame({"A": [1.23456789]})
+    result = df.to_csv(float_format="{:.10f}", lineterminator="\n")
+    expected = ",A\n0,1.2345678900\n"
+    assert result == expected
+
+
+def test_new_style_with_template():
+    df = DataFrame({"A": [1234.56789]})
+    result = df.to_csv(float_format="Value: {:,.2f}", lineterminator="\n")
+    expected = ',A\n0,"Value: 1,234.57"\n'
+    assert result == expected