Skip to content

Commit 904b293

Browse files
authored
make retry_time configurable, add doc (microsoft#53)
* make retry_time configurable, add doc * in seconds * retry_wait_time * bump version to 0.1.4 * remove .json * rename * time
1 parent d802b7a commit 904b293

File tree

6 files changed

+40
-23
lines changed

6 files changed

+40
-23
lines changed

autogen/oai/completion.py

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -105,9 +105,9 @@ class Completion(openai_Completion):
105105
seed = 41
106106
cache_path = f".cache/{seed}"
107107
# retry after this many seconds
108-
retry_time = 10
108+
retry_wait_time = 10
109109
# fail a request after hitting RateLimitError for this many seconds
110-
retry_timeout = 120
110+
max_retry_period = 120
111111
# time out for request to openai server
112112
request_timeout = 60
113113

@@ -181,7 +181,7 @@ def _book_keeping(cls, config: Dict, response):
181181
def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_cache=True):
182182
"""Get the response from the openai api call.
183183
184-
Try cache first. If not found, call the openai api. If the api call fails, retry after retry_time.
184+
Try cache first. If not found, call the openai api. If the api call fails, retry after retry_wait_time.
185185
"""
186186
config = config.copy()
187187
openai.api_key_path = config.pop("api_key_path", openai.api_key_path)
@@ -199,7 +199,8 @@ def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_ca
199199
)
200200
start_time = time.time()
201201
request_timeout = cls.request_timeout
202-
retry_timeout = config.pop("retry_timeout", cls.retry_timeout)
202+
max_retry_period = config.pop("max_retry_period", cls.max_retry_period)
203+
retry_wait_time = config.pop("retry_wait_time", cls.retry_wait_time)
203204
while True:
204205
try:
205206
if "request_timeout" in config:
@@ -211,18 +212,18 @@ def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_ca
211212
APIConnectionError,
212213
):
213214
# transient error
214-
logger.info(f"retrying in {cls.retry_time} seconds...", exc_info=1)
215-
sleep(cls.retry_time)
215+
logger.info(f"retrying in {retry_wait_time} seconds...", exc_info=1)
216+
sleep(retry_wait_time)
216217
except APIError as err:
217218
error_code = err and err.json_body and isinstance(err.json_body, dict) and err.json_body.get("error")
218219
error_code = error_code and error_code.get("code")
219220
if error_code == "content_filter":
220221
raise
221222
# transient error
222-
logger.info(f"retrying in {cls.retry_time} seconds...", exc_info=1)
223-
sleep(cls.retry_time)
223+
logger.info(f"retrying in {retry_wait_time} seconds...", exc_info=1)
224+
sleep(retry_wait_time)
224225
except (RateLimitError, Timeout) as err:
225-
time_left = retry_timeout - (time.time() - start_time + cls.retry_time)
226+
time_left = max_retry_period - (time.time() - start_time + retry_wait_time)
226227
if (
227228
time_left > 0
228229
and isinstance(err, RateLimitError)
@@ -233,16 +234,16 @@ def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_ca
233234
if isinstance(err, Timeout):
234235
request_timeout <<= 1
235236
request_timeout = min(request_timeout, time_left)
236-
logger.info(f"retrying in {cls.retry_time} seconds...", exc_info=1)
237-
sleep(cls.retry_time)
237+
logger.info(f"retrying in {retry_wait_time} seconds...", exc_info=1)
238+
sleep(retry_wait_time)
238239
elif raise_on_ratelimit_or_timeout:
239240
raise
240241
else:
241242
response = -1
242243
if use_cache and isinstance(err, Timeout):
243244
cls._cache.set(key, response)
244245
logger.warning(
245-
f"Failed to get response from openai api due to getting RateLimitError or Timeout for {retry_timeout} seconds."
246+
f"Failed to get response from openai api due to getting RateLimitError or Timeout for {max_retry_period} seconds."
246247
)
247248
return response
248249
except InvalidRequestError:
@@ -743,9 +744,11 @@ def yes_or_no_filter(context, config, response):
743744
When set to False, -1 will be returned when all configs fail.
744745
allow_format_str_template (bool, Optional): Whether to allow format string template in the config.
745746
**config: Configuration for the openai API call. This is used as parameters for calling openai API.
746-
Besides the parameters for the openai API call, it can also contain a seed (int) for the cache.
747-
This is useful when implementing "controlled randomness" for the completion.
748-
Also, the "prompt" or "messages" parameter can contain a template (str or Callable) which will be instantiated with the context.
747+
The "prompt" or "messages" parameter can contain a template (str or Callable) which will be instantiated with the context.
748+
Besides the parameters for the openai API call, it can also contain:
749+
- `max_retry_period` (int): the total time (in seconds) allowed for retrying failed requests.
750+
- `retry_wait_time` (int): the time interval to wait (in seconds) before retrying a failed request.
751+
- `seed` (int) for the cache. This is useful when implementing "controlled randomness" for the completion.
749752
750753
Returns:
751754
Responses from OpenAI API, with additional fields.
@@ -763,9 +766,9 @@ def yes_or_no_filter(context, config, response):
763766
base_config = config.copy()
764767
base_config["allow_format_str_template"] = allow_format_str_template
765768
base_config.update(each_config)
766-
if i < last and filter_func is None and "retry_timeout" not in base_config:
767-
# retry_timeout = 0 to avoid retrying when no filter is given
768-
base_config["retry_timeout"] = 0
769+
if i < last and filter_func is None and "max_retry_period" not in base_config:
770+
# max_retry_period = 0 to avoid retrying when no filter is given
771+
base_config["max_retry_period"] = 0
769772
try:
770773
response = cls.create(
771774
context,
@@ -1103,7 +1106,7 @@ def stop_logging(cls):
11031106

11041107

11051108
class ChatCompletion(Completion):
1106-
"""A class for OpenAI API ChatCompletion."""
1109+
"""A class for OpenAI API ChatCompletion. Share the same API as Completion."""
11071110

11081111
default_search_space = Completion.default_search_space.copy()
11091112
default_search_space["model"] = tune.choice(["gpt-3.5-turbo", "gpt-4"])

autogen/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.1.3"
1+
__version__ = "0.1.4"

test/oai/test_completion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ def test_humaneval(num_samples=1):
227227
config_list=autogen.config_list_from_models(KEY_LOC, model_list=["gpt-3.5-turbo"]),
228228
prompt="",
229229
max_tokens=1,
230-
retry_timeout=0,
230+
max_retry_period=0,
231231
raise_on_ratelimit_or_timeout=False,
232232
)
233233
# assert response == -1

test/twoagent.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
# Load LLM inference endpoints from an env variable or a file
44
# See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints
5-
# and OAI_CONFIG_LIST_sample.json
5+
# and OAI_CONFIG_LIST_sample
66
config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST")
77
assistant = AssistantAgent("assistant", llm_config={"config_list": config_list})
88
user_proxy = UserProxyAgent("user_proxy", code_execution_config={"work_dir": "coding"})

website/docs/FAQ.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,3 +99,13 @@ You can also explicitly specify that by:
9999
```python
100100
assistant = autogen.AssistantAgent(name="assistant", llm_config={"api_key": ...})
101101
```
102+
103+
## Handle Rate Limit Error and Timeout Error
104+
105+
You can set `retry_wait_time` and `max_retry_period` to handle rate limit error. And you can set `request_timeout` to handle timeout error. They can all be specified in `llm_config` for an agent, which will be used in the [`create`](/docs/reference/oai/completion#create) function for LLM inference.
106+
107+
- `retry_wait_time` (int): the time interval to wait (in seconds) before retrying a failed request.
108+
- `max_retry_period` (int): the total timeout (in seconds) allowed for retrying failed requests.
109+
- `request_timeout` (int): the timeout (in seconds) sent with a single request.
110+
111+
Please refer to the [documentation](/docs/Use-Cases/enhanced_inference#runtime-error) for more info.

website/docs/Use-Cases/enhanced_inference.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,11 @@ API call results are cached locally and reused when the same request is issued.
123123

124124
### Runtime error
125125

126-
It is easy to hit error when calling OpenAI APIs, due to connection, rate limit, or timeout. Some of the errors are transient. `autogen.Completion.create` deals with the transient errors and retries automatically. Initial request timeout, retry timeout and retry time interval can be configured via `request_timeout`, `retry_timeout` and `autogen.Completion.retry_time`.
126+
It is easy to hit error when calling OpenAI APIs, due to connection, rate limit, or timeout. Some of the errors are transient. `autogen.Completion.create` deals with the transient errors and retries automatically. Request timeout, max retry period and retry wait time can be configured via `request_timeout`, `max_retry_period` and `retry_wait_time`.
127+
128+
- `request_timeout` (int): the timeout (in seconds) sent with a single request.
129+
- `max_retry_period` (int): the total time (in seconds) allowed for retrying failed requests.
130+
- `retry_wait_time` (int): the time interval to wait (in seconds) before retrying a failed request.
127131

128132
Moreover, one can pass a list of configurations of different models/endpoints to mitigate the rate limits. For example,
129133

0 commit comments

Comments
 (0)