Utility that converts async stream to sync stream (stanfordnlp#8162)

chenmoneygithub · web-flow · commit d89028f7e2b8 · 2025-05-04T08:34:25.000-07:00
* init sync streaming

* increment

* fix tests

* fix tests
diff --git a/dspy/streaming/__init__.py b/dspy/streaming/__init__.py
@@ -1,5 +1,5 @@
 from dspy.streaming.messages import StatusMessage, StatusMessageProvider, StreamResponse
-from dspy.streaming.streamify import streamify, streaming_response
+from dspy.streaming.streamify import apply_sync_streaming, streamify, streaming_response
 from dspy.streaming.streaming_listener import StreamListener
 
 __all__ = [
@@ -9,4 +9,5 @@
     "StreamListener",
     "StreamResponse",
     "streaming_response",
+    "apply_sync_streaming",
 ]
diff --git a/dspy/streaming/streamify.py b/dspy/streaming/streamify.py
@@ -1,6 +1,10 @@
+import asyncio
+import contextvars
 import logging
+import threading
 from asyncio import iscoroutinefunction
-from typing import TYPE_CHECKING, Any, AsyncGenerator, Awaitable, Callable, List, Optional
+from queue import Queue
+from typing import TYPE_CHECKING, Any, AsyncGenerator, Awaitable, Callable, Generator, List, Optional
 
 import litellm
 import ujson
@@ -200,6 +204,46 @@ async def streamer(*args, **kwargs):
     return streamer
 
 
+def apply_sync_streaming(async_generator: AsyncGenerator) -> Generator:
+    """Convert the async streaming generator to a sync generator."""
+    queue = Queue()  # Queue to hold items from the async generator
+    stop_sentinel = object()  # Sentinel to signal the generator is complete
+
+    # To propagate prediction request ID context to the child thread
+    context = contextvars.copy_context()
+    from dspy.dsp.utils.settings import thread_local_overrides
+
+    parent_overrides = thread_local_overrides.overrides.copy()
+
+    def producer():
+        """Runs in a background thread to fetch items asynchronously."""
+
+        original_overrides = thread_local_overrides.overrides
+        thread_local_overrides.overrides = parent_overrides.copy()
+
+        async def runner():
+            try:
+                async for item in async_generator:
+                    queue.put(item)
+            finally:
+                # Signal completion
+                queue.put(stop_sentinel)
+
+        context.run(asyncio.run, runner())
+        thread_local_overrides.overrides = original_overrides
+
+    # Start the producer in a background thread
+    thread = threading.Thread(target=producer, daemon=True)
+    thread.start()
+
+    # Consume items from the queue
+    while True:
+        item = queue.get()  # Block until an item is available
+        if item is stop_sentinel:
+            break
+        yield item
+
+
 async def streaming_response(streamer: AsyncGenerator) -> AsyncGenerator:
     """
     Convert a DSPy program output stream to an OpenAI-compatible output stream that can be
diff --git a/tests/streaming/test_streaming.py b/tests/streaming/test_streaming.py
@@ -225,3 +225,65 @@ def __call__(self, x: str, **kwargs):
 
     assert all_chunks[-1].predict_name == "predict2"
     assert all_chunks[-1].signature_field_name == "judgement"
+
+
+@pytest.mark.skipif(not os.getenv("OPENAI_API_KEY"), reason="OpenAI API key not found in environment variables")
+def test_sync_streaming():
+    class MyProgram(dspy.Module):
+        def __init__(self):
+            self.predict1 = dspy.Predict("question->answer")
+            self.predict2 = dspy.Predict("question, answer->judgement")
+
+        def __call__(self, x: str, **kwargs):
+            answer = self.predict1(question=x, **kwargs)
+            judgement = self.predict2(question=x, answer=answer, **kwargs)
+            return judgement
+
+    # Turn off the cache to ensure the stream is produced.
+    dspy.settings.configure(lm=dspy.LM("openai/gpt-4o-mini", cache=False))
+    my_program = MyProgram()
+    program = dspy.streamify(
+        my_program,
+        stream_listeners=[
+            dspy.streaming.StreamListener(signature_field_name="answer"),
+            dspy.streaming.StreamListener(signature_field_name="judgement"),
+        ],
+        include_final_prediction_in_output_stream=False,
+    )
+    output = program(x="why did a chicken cross the kitchen?")
+    sync_output = dspy.streaming.apply_sync_streaming(output)
+    all_chunks = []
+    for value in sync_output:
+        if isinstance(value, dspy.streaming.StreamResponse):
+            all_chunks.append(value)
+
+    assert all_chunks[0].predict_name == "predict1"
+    assert all_chunks[0].signature_field_name == "answer"
+
+    assert all_chunks[-1].predict_name == "predict2"
+    assert all_chunks[-1].signature_field_name == "judgement"
+
+
+def test_sync_status_streaming():
+    class MyProgram(dspy.Module):
+        def __init__(self):
+            self.generate_question = dspy.Tool(lambda x: f"What color is the {x}?", name="generate_question")
+            self.predict = dspy.Predict("question->answer")
+
+        def __call__(self, x: str):
+            question = self.generate_question(x=x)
+            return self.predict(question=question)
+
+    lm = dspy.utils.DummyLM([{"answer": "red"}, {"answer": "blue"}])
+    with dspy.context(lm=lm):
+        program = dspy.streamify(MyProgram())
+        output = program("sky")
+        sync_output = dspy.streaming.apply_sync_streaming(output)
+        status_messages = []
+        for value in sync_output:
+            if isinstance(value, StatusMessage):
+                status_messages.append(value)
+
+    assert len(status_messages) == 2
+    assert status_messages[0].message == "Calling tool generate_question..."
+    assert status_messages[1].message == "Tool calling finished! Querying the LLM with tool calling results..."