Skip to content

Commit a5671ef

Browse files
CopilotTomeHirata
andauthored
fix(dspy): Example.toDict() fails to serialize dspy.History objects (stanfordnlp#9047)
* Initial plan * Fix Example.toDict() to serialize dspy.History objects properly Co-authored-by: TomeHirata <[email protected]> * Consolidate History tests into existing test_evaluate.py Co-authored-by: TomeHirata <[email protected]> * Refactor: move toDict() call to merge_dicts for cleaner code Co-authored-by: TomeHirata <[email protected]> * Use isinstance(BaseModel) instead of hasattr for Pydantic check Co-authored-by: TomeHirata <[email protected]> * Simplify _prepare_results_output to always use toDict() Co-authored-by: TomeHirata <[email protected]> * Remove unnecessary try/except for pydantic import since it's a core dependency Co-authored-by: TomeHirata <[email protected]> --------- Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: TomeHirata <[email protected]>
1 parent 342a9d6 commit a5671ef

File tree

4 files changed

+179
-1
lines changed

4 files changed

+179
-1
lines changed

dspy/evaluate/evaluate.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ def _prepare_results_output(
233233
(
234234
merge_dicts(example, prediction) | {metric_name: score}
235235
if prediction_is_dictlike(prediction)
236-
else dict(example) | {"prediction": prediction, metric_name: score}
236+
else example.toDict() | {"prediction": prediction, metric_name: score}
237237
)
238238
for example, prediction, score in results
239239
]
@@ -305,6 +305,12 @@ def prediction_is_dictlike(prediction):
305305

306306

307307
def merge_dicts(d1, d2) -> dict:
308+
# Convert to dict if objects have toDict method (e.g., Example objects)
309+
if hasattr(d1, "toDict"):
310+
d1 = d1.toDict()
311+
if hasattr(d2, "toDict"):
312+
d2 = d2.toDict()
313+
308314
merged = {}
309315
for k, v in d1.items():
310316
if k in d2:

dspy/primitives/example.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
from pydantic import BaseModel
2+
3+
14
class Example:
25
"""A flexible data container for DSPy examples and training data.
36
@@ -193,6 +196,9 @@ def toDict(self): # noqa: N802
193196
def convert_to_serializable(value):
194197
if hasattr(value, "toDict"):
195198
return value.toDict()
199+
elif isinstance(value, BaseModel):
200+
# Handle Pydantic models (e.g., dspy.History)
201+
return value.model_dump()
196202
elif isinstance(value, list):
197203
return [convert_to_serializable(item) for item in value]
198204
elif isinstance(value, dict):

tests/evaluate/test_evaluate.py

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
import json
12
import signal
3+
import tempfile
24
import threading
35
from unittest.mock import patch
46

@@ -261,3 +263,136 @@ def on_evaluate_end(
261263
def test_evaluation_result_repr():
262264
result = EvaluationResult(score=100.0, results=[(new_example("What is 1+1?", "2"), {"answer": "2"}, 100.0)])
263265
assert repr(result) == "EvaluationResult(score=100.0, results=<list of 1 results>)"
266+
267+
268+
def test_evaluate_save_as_json_with_history():
269+
"""Test that save_as_json works with Examples containing dspy.History objects."""
270+
# Setup
271+
dspy.settings.configure(
272+
lm=DummyLM(
273+
{
274+
"What is 1+1?": {"answer": "2"},
275+
"What is 2+2?": {"answer": "4"},
276+
}
277+
)
278+
)
279+
280+
# Create history objects
281+
history1 = dspy.History(
282+
messages=[
283+
{"question": "Previous Q1", "answer": "Previous A1"},
284+
]
285+
)
286+
history2 = dspy.History(
287+
messages=[
288+
{"question": "Previous Q2", "answer": "Previous A2"},
289+
{"question": "Previous Q3", "answer": "Previous A3"},
290+
]
291+
)
292+
293+
# Create examples with history
294+
devset = [
295+
dspy.Example(question="What is 1+1?", answer="2", history=history1).with_inputs("question"),
296+
dspy.Example(question="What is 2+2?", answer="4", history=history2).with_inputs("question"),
297+
]
298+
299+
program = Predict("question -> answer")
300+
301+
# Create evaluator with save_as_json
302+
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
303+
temp_json = f.name
304+
305+
try:
306+
evaluator = Evaluate(
307+
devset=devset,
308+
metric=answer_exact_match,
309+
display_progress=False,
310+
save_as_json=temp_json,
311+
)
312+
313+
result = evaluator(program)
314+
assert result.score == 100.0
315+
316+
# Verify JSON file was created and is valid
317+
with open(temp_json) as f:
318+
data = json.load(f)
319+
320+
assert len(data) == 2
321+
322+
# Verify history was properly serialized in first record
323+
assert "history" in data[0]
324+
assert isinstance(data[0]["history"], dict)
325+
assert "messages" in data[0]["history"]
326+
assert len(data[0]["history"]["messages"]) == 1
327+
assert data[0]["history"]["messages"][0] == {"question": "Previous Q1", "answer": "Previous A1"}
328+
329+
# Verify history was properly serialized in second record
330+
assert "history" in data[1]
331+
assert isinstance(data[1]["history"], dict)
332+
assert "messages" in data[1]["history"]
333+
assert len(data[1]["history"]["messages"]) == 2
334+
assert data[1]["history"]["messages"][0] == {"question": "Previous Q2", "answer": "Previous A2"}
335+
assert data[1]["history"]["messages"][1] == {"question": "Previous Q3", "answer": "Previous A3"}
336+
337+
finally:
338+
import os
339+
if os.path.exists(temp_json):
340+
os.unlink(temp_json)
341+
342+
343+
def test_evaluate_save_as_csv_with_history():
344+
"""Test that save_as_csv works with Examples containing dspy.History objects."""
345+
# Setup
346+
dspy.settings.configure(
347+
lm=DummyLM(
348+
{
349+
"What is 1+1?": {"answer": "2"},
350+
}
351+
)
352+
)
353+
354+
# Create history object
355+
history = dspy.History(
356+
messages=[
357+
{"question": "Previous Q", "answer": "Previous A"},
358+
]
359+
)
360+
361+
# Create example with history
362+
devset = [
363+
dspy.Example(question="What is 1+1?", answer="2", history=history).with_inputs("question"),
364+
]
365+
366+
program = Predict("question -> answer")
367+
368+
# Create evaluator with save_as_csv
369+
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
370+
temp_csv = f.name
371+
372+
try:
373+
evaluator = Evaluate(
374+
devset=devset,
375+
metric=answer_exact_match,
376+
display_progress=False,
377+
save_as_csv=temp_csv,
378+
)
379+
380+
result = evaluator(program)
381+
assert result.score == 100.0
382+
383+
# Verify CSV file was created
384+
import csv
385+
with open(temp_csv) as f:
386+
reader = csv.DictReader(f)
387+
rows = list(reader)
388+
389+
assert len(rows) == 1
390+
assert "history" in rows[0]
391+
# CSV will have string representation of the dict
392+
assert "messages" in rows[0]["history"]
393+
394+
finally:
395+
import os
396+
if os.path.exists(temp_csv):
397+
os.unlink(temp_csv)
398+

tests/primitives/test_example.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,3 +123,34 @@ def test_example_copy_without():
123123
def test_example_to_dict():
124124
example = Example(a=1, b=2)
125125
assert example.toDict() == {"a": 1, "b": 2}
126+
127+
128+
def test_example_to_dict_with_history():
129+
"""Test that Example.toDict() properly serializes dspy.History objects."""
130+
history = dspy.History(
131+
messages=[
132+
{"question": "What is the capital of France?", "answer": "Paris"},
133+
{"question": "What is the capital of Germany?", "answer": "Berlin"},
134+
]
135+
)
136+
example = Example(question="Test question", history=history, answer="Test answer")
137+
138+
result = example.toDict()
139+
140+
# Verify the result is a dictionary
141+
assert isinstance(result, dict)
142+
assert "history" in result
143+
144+
# Verify history is serialized to a dict (not a History object)
145+
assert isinstance(result["history"], dict)
146+
assert "messages" in result["history"]
147+
assert result["history"]["messages"] == [
148+
{"question": "What is the capital of France?", "answer": "Paris"},
149+
{"question": "What is the capital of Germany?", "answer": "Berlin"},
150+
]
151+
152+
# Verify JSON serialization works
153+
import json
154+
json_str = json.dumps(result)
155+
restored = json.loads(json_str)
156+
assert restored["history"]["messages"] == result["history"]["messages"]

0 commit comments

Comments
 (0)