Skip to content

Commit d5f97ed

Browse files
pedromfdiogoPedro-Santos04mroeschkesimonjayhawkins
authored
feature #49580: support new-style float_format string in to_csv (#61650)
* feature #49580: support new-style float_format string in to_csv feat(to_csv): support new-style float_format strings using str.format Detect and process new-style format strings (e.g., "{:,.2f}") in the float_format parameter of to_csv. - Check if float_format is a string and matches new-style pattern - Convert it to a callable (e.g., lambda x: float_format.format(x)) - Ensure compatibility with NaN values and mixed data types - Improves formatting output for floats when exporting to CSV Example: df = pd.DataFrame([1234.56789, 9876.54321]) df.to_csv(float_format="{:,.2f}") # now outputs formatted values like 1,234.57 Co-authored-by: Pedro Santos <[email protected]> * update benchmark test * fixed pre commit * fixed offsets.pyx * fixed tests to windows * Update pandas/io/formats/format.py Co-authored-by: Matthew Roeschke <[email protected]> * Update pandas/io/formats/format.py Co-authored-by: Matthew Roeschke <[email protected]> * Update pandas/io/formats/format.py Co-authored-by: Matthew Roeschke <[email protected]> * updated v3.0.0.rst and fixed tm.assert_produces_warning * fixed test_new_style_with_mixed_types_in_column added match to assert_produces_warning * Update doc/source/whatsnew/v3.0.0.rst (removed reference to this PR) Co-authored-by: Simon Hawkins <[email protected]> * fixed pre-commit * removed tm.assert_produces_warning * fixed space * fixed pre-commit --------- Co-authored-by: Pedro Santos <[email protected]> Co-authored-by: Matthew Roeschke <[email protected]> Co-authored-by: Simon Hawkins <[email protected]>
1 parent d1a245c commit d5f97ed

File tree

4 files changed

+181
-1
lines changed

4 files changed

+181
-1
lines changed

asv_bench/benchmarks/io/csv.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,25 @@ def time_frame(self, kind):
5353
self.df.to_csv(self.fname)
5454

5555

56+
class ToCSVFloatFormatVariants(BaseIO):
57+
fname = "__test__.csv"
58+
59+
def setup(self):
60+
self.df = DataFrame(np.random.default_rng(seed=42).random((1000, 1000)))
61+
62+
def time_old_style_percent_format(self):
63+
self.df.to_csv(self.fname, float_format="%.6f")
64+
65+
def time_new_style_brace_format(self):
66+
self.df.to_csv(self.fname, float_format="{:.6f}")
67+
68+
def time_new_style_thousands_format(self):
69+
self.df.to_csv(self.fname, float_format="{:,.2f}")
70+
71+
def time_callable_format(self):
72+
self.df.to_csv(self.fname, float_format=lambda x: f"{x:.6f}")
73+
74+
5675
class ToCSVMultiIndexUnusedLevels(BaseIO):
5776
fname = "__test__.csv"
5877

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ Other enhancements
7676
- :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`)
7777
- :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`)
7878
- :meth:`DataFrame.iloc` and :meth:`Series.iloc` now support boolean masks in ``__getitem__`` for more consistent indexing behavior (:issue:`60994`)
79+
- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support Python's new-style format strings (e.g., ``"{:.6f}"``) for the ``float_format`` parameter, in addition to old-style ``%`` format strings and callables. This allows for more flexible and modern formatting of floating point numbers when exporting to CSV. (:issue:`49580`)
7980
- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
8081
- :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
8182
- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)

pandas/io/formats/format.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -454,7 +454,7 @@ def __init__(
454454
self.na_rep = na_rep
455455
self.formatters = self._initialize_formatters(formatters)
456456
self.justify = self._initialize_justify(justify)
457-
self.float_format = float_format
457+
self.float_format = self._validate_float_format(float_format)
458458
self.sparsify = self._initialize_sparsify(sparsify)
459459
self.show_index_names = index_names
460460
self.decimal = decimal
@@ -849,6 +849,29 @@ def _get_column_name_list(self) -> list[Hashable]:
849849
names.append("" if columns.name is None else columns.name)
850850
return names
851851

852+
def _validate_float_format(
853+
self, fmt: FloatFormatType | None
854+
) -> FloatFormatType | None:
855+
"""
856+
Validates and processes the float_format argument.
857+
Converts new-style format strings to callables.
858+
"""
859+
if fmt is None or callable(fmt):
860+
return fmt
861+
862+
if isinstance(fmt, str):
863+
if "%" in fmt:
864+
# Keeps old-style format strings as they are (C code handles them)
865+
return fmt
866+
else:
867+
try:
868+
_ = fmt.format(1.0) # Test with an arbitrary float
869+
return fmt.format
870+
except (ValueError, KeyError, IndexError) as e:
871+
raise ValueError(f"Invalid new-style format string {fmt!r}") from e
872+
873+
raise ValueError("float_format must be a string or callable")
874+
852875

853876
class DataFrameRenderer:
854877
"""Class for creating dataframe output in multiple formats.

pandas/tests/io/formats/test_to_csv.py

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -741,3 +741,140 @@ def test_to_csv_iterative_compression_buffer(compression):
741741
pd.read_csv(buffer, compression=compression, index_col=0), df
742742
)
743743
assert not buffer.closed
744+
745+
746+
def test_new_style_float_format_basic():
747+
df = DataFrame({"A": [1234.56789, 9876.54321]})
748+
result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
749+
expected = ",A\n0,1234.57\n1,9876.54\n"
750+
assert result == expected
751+
752+
753+
def test_new_style_float_format_thousands():
754+
df = DataFrame({"A": [1234.56789, 9876.54321]})
755+
result = df.to_csv(float_format="{:,.2f}", lineterminator="\n")
756+
expected = ',A\n0,"1,234.57"\n1,"9,876.54"\n'
757+
assert result == expected
758+
759+
760+
def test_new_style_scientific_format():
761+
df = DataFrame({"A": [0.000123, 0.000456]})
762+
result = df.to_csv(float_format="{:.2e}", lineterminator="\n")
763+
expected = ",A\n0,1.23e-04\n1,4.56e-04\n"
764+
assert result == expected
765+
766+
767+
def test_new_style_with_nan():
768+
df = DataFrame({"A": [1.23, np.nan, 4.56]})
769+
result = df.to_csv(float_format="{:.2f}", na_rep="NA", lineterminator="\n")
770+
expected = ",A\n0,1.23\n1,NA\n2,4.56\n"
771+
assert result == expected
772+
773+
774+
def test_new_style_with_mixed_types():
775+
df = DataFrame({"A": [1.23, 4.56], "B": ["x", "y"]})
776+
result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
777+
expected = ",A,B\n0,1.23,x\n1,4.56,y\n"
778+
assert result == expected
779+
780+
781+
def test_new_style_with_mixed_types_in_column():
782+
df = DataFrame({"A": [1.23, "text", 4.56]})
783+
result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
784+
expected = ",A\n0,1.23\n1,text\n2,4.56\n"
785+
assert result == expected
786+
787+
788+
def test_invalid_new_style_format_missing_brace():
789+
df = DataFrame({"A": [1.23]})
790+
with pytest.raises(ValueError, match="Invalid new-style format string '{:.2f"):
791+
df.to_csv(float_format="{:.2f")
792+
793+
794+
def test_invalid_new_style_format_specifier():
795+
df = DataFrame({"A": [1.23]})
796+
with pytest.raises(ValueError, match="Invalid new-style format string '{:.2z}'"):
797+
df.to_csv(float_format="{:.2z}")
798+
799+
800+
def test_old_style_format_compatibility():
801+
df = DataFrame({"A": [1234.56789, 9876.54321]})
802+
result = df.to_csv(float_format="%.2f", lineterminator="\n")
803+
expected = ",A\n0,1234.57\n1,9876.54\n"
804+
assert result == expected
805+
806+
807+
def test_callable_float_format_compatibility():
808+
df = DataFrame({"A": [1234.56789, 9876.54321]})
809+
result = df.to_csv(float_format=lambda x: f"{x:,.2f}", lineterminator="\n")
810+
expected = ',A\n0,"1,234.57"\n1,"9,876.54"\n'
811+
assert result == expected
812+
813+
814+
def test_no_float_format():
815+
df = DataFrame({"A": [1.23, 4.56]})
816+
result = df.to_csv(float_format=None, lineterminator="\n")
817+
expected = ",A\n0,1.23\n1,4.56\n"
818+
assert result == expected
819+
820+
821+
def test_large_numbers():
822+
df = DataFrame({"A": [1e308, 2e308]})
823+
result = df.to_csv(float_format="{:.2e}", lineterminator="\n")
824+
expected = ",A\n0,1.00e+308\n1,inf\n"
825+
assert result == expected
826+
827+
828+
def test_zero_and_negative():
829+
df = DataFrame({"A": [0.0, -1.23456]})
830+
result = df.to_csv(float_format="{:+.2f}", lineterminator="\n")
831+
expected = ",A\n0,+0.00\n1,-1.23\n"
832+
assert result == expected
833+
834+
835+
def test_unicode_format():
836+
df = DataFrame({"A": [1.23, 4.56]})
837+
result = df.to_csv(float_format="{:.2f}€", encoding="utf-8", lineterminator="\n")
838+
expected = ",A\n0,1.23€\n1,4.56€\n"
839+
assert result == expected
840+
841+
842+
def test_empty_dataframe():
843+
df = DataFrame({"A": []})
844+
result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
845+
expected = ",A\n"
846+
assert result == expected
847+
848+
849+
def test_multi_column_float():
850+
df = DataFrame({"A": [1.23, 4.56], "B": [7.89, 0.12]})
851+
result = df.to_csv(float_format="{:.2f}", lineterminator="\n")
852+
expected = ",A,B\n0,1.23,7.89\n1,4.56,0.12\n"
853+
assert result == expected
854+
855+
856+
def test_invalid_float_format_type():
857+
df = DataFrame({"A": [1.23]})
858+
with pytest.raises(ValueError, match="float_format must be a string or callable"):
859+
df.to_csv(float_format=123)
860+
861+
862+
def test_new_style_with_inf():
863+
df = DataFrame({"A": [1.23, np.inf, -np.inf]})
864+
result = df.to_csv(float_format="{:.2f}", na_rep="NA", lineterminator="\n")
865+
expected = ",A\n0,1.23\n1,inf\n2,-inf\n"
866+
assert result == expected
867+
868+
869+
def test_new_style_with_precision_edge():
870+
df = DataFrame({"A": [1.23456789]})
871+
result = df.to_csv(float_format="{:.10f}", lineterminator="\n")
872+
expected = ",A\n0,1.2345678900\n"
873+
assert result == expected
874+
875+
876+
def test_new_style_with_template():
877+
df = DataFrame({"A": [1234.56789]})
878+
result = df.to_csv(float_format="Value: {:,.2f}", lineterminator="\n")
879+
expected = ',A\n0,"Value: 1,234.57"\n'
880+
assert result == expected

0 commit comments

Comments
 (0)