Use exponential_backoff_retry for completion call (stanfordnlp#8023)

TomeHirata · web-flow · commit 7a877d15292f · 2025-03-31T13:05:50.000-07:00
* use exponential_backoff_retry for completion call

* add test for exponential_backoff_retry
diff --git a/dspy/clients/lm.py b/dspy/clients/lm.py
@@ -289,6 +289,7 @@ def cached_litellm_completion(request: Dict[str, Any], num_retries: int):
 def litellm_completion(request: Dict[str, Any], num_retries: int, cache={"no-cache": True, "no-store": True}):
     retry_kwargs = dict(
         retry_policy=_get_litellm_retry_policy(num_retries),
+        retry_strategy="exponential_backoff_retry",
         # In LiteLLM version 1.55.3 (the first version that supports retry_policy as an argument
         # to completion()), the default value of max_retries is non-zero for certain providers, and
         # max_retries is stacked on top of the retry_policy. To avoid this, we set max_retries=0
diff --git a/tests/clients/test_lm.py b/tests/clients/test_lm.py
@@ -1,8 +1,10 @@
 from unittest import mock
 
+import time
 import litellm
 import pydantic
 import pytest
+from openai import RateLimitError
 
 import dspy
 from tests.test_utils.server import litellm_test_server, read_litellm_test_server_request_logs
@@ -250,3 +252,22 @@ def test_dump_state():
         "launch_kwargs": { "temperature": 1 },
         "train_kwargs": { "temperature": 5 },
     }
+
+
+def test_exponential_backoff_retry():
+    time_counter = []
+    def mock_create(*args, **kwargs):
+        time_counter.append(time.time())
+        # These fields are called during the error handling
+        mock_response = mock.Mock()
+        mock_response.headers = {}
+        mock_response.status_code = 429
+        raise RateLimitError(response=mock_response, message="message", body="error")
+    lm = dspy.LM(model='openai/gpt-3.5-turbo', max_tokens=250, num_retries=3)
+    with mock.patch.object(litellm.OpenAIChatCompletion, "completion", side_effect=mock_create):
+        with pytest.raises(RateLimitError):
+            lm("question")
+    
+    # The first retry happens immediately regardless of the configuration
+    for i in range(1, len(time_counter)-1):
+        assert time_counter[i+1] - time_counter[i] >= 2**(i-1)