@@ -140,9 +140,25 @@ export interface ChatCompletion {
140
140
object : 'chat.completion' ;
141
141
142
142
/**
143
- * The service tier used for processing the request.
143
+ * Specifies the latency tier to use for processing the request. This parameter is
144
+ * relevant for customers subscribed to the scale tier service:
145
+ *
146
+ * - If set to 'auto', and the Project is Scale tier enabled, the system will
147
+ * utilize scale tier credits until they are exhausted.
148
+ * - If set to 'auto', and the Project is not Scale tier enabled, the request will
149
+ * be processed using the default service tier with a lower uptime SLA and no
150
+ * latency guarentee.
151
+ * - If set to 'default', the request will be processed using the default service
152
+ * tier with a lower uptime SLA and no latency guarentee.
153
+ * - If set to 'flex', the request will be processed with the Flex Processing
154
+ * service tier.
155
+ * [Learn more](https://platform.openai.com/docs/guides/flex-processing).
156
+ * - When not set, the default behavior is 'auto'.
157
+ *
158
+ * When this parameter is set, the response body will include the `service_tier`
159
+ * utilized.
144
160
*/
145
- service_tier ?: 'scale ' | 'default' | null ;
161
+ service_tier ?: 'auto ' | 'default' | 'flex ' | null ;
146
162
147
163
/**
148
164
* This fingerprint represents the backend configuration that the model runs with.
@@ -319,11 +335,11 @@ export interface ChatCompletionAudioParam {
319
335
* Specifies the output audio format. Must be one of `wav`, `mp3`, `flac`, `opus`,
320
336
* or `pcm16`.
321
337
*/
322
- format : 'wav' | 'mp3' | 'flac' | 'opus' | 'pcm16' ;
338
+ format : 'wav' | 'aac' | ' mp3' | 'flac' | 'opus' | 'pcm16' ;
323
339
324
340
/**
325
341
* The voice the model uses to respond. Supported voices are `alloy`, `ash`,
326
- * `ballad`, `coral`, `echo`, `sage`, and `shimmer`.
342
+ * `ballad`, `coral`, `echo`, `fable`, `nova`, `onyx`, ` sage`, and `shimmer`.
327
343
*/
328
344
voice :
329
345
| ( string & { } )
@@ -375,9 +391,25 @@ export interface ChatCompletionChunk {
375
391
object : 'chat.completion.chunk' ;
376
392
377
393
/**
378
- * The service tier used for processing the request.
394
+ * Specifies the latency tier to use for processing the request. This parameter is
395
+ * relevant for customers subscribed to the scale tier service:
396
+ *
397
+ * - If set to 'auto', and the Project is Scale tier enabled, the system will
398
+ * utilize scale tier credits until they are exhausted.
399
+ * - If set to 'auto', and the Project is not Scale tier enabled, the request will
400
+ * be processed using the default service tier with a lower uptime SLA and no
401
+ * latency guarentee.
402
+ * - If set to 'default', the request will be processed using the default service
403
+ * tier with a lower uptime SLA and no latency guarentee.
404
+ * - If set to 'flex', the request will be processed with the Flex Processing
405
+ * service tier.
406
+ * [Learn more](https://platform.openai.com/docs/guides/flex-processing).
407
+ * - When not set, the default behavior is 'auto'.
408
+ *
409
+ * When this parameter is set, the response body will include the `service_tier`
410
+ * utilized.
379
411
*/
380
- service_tier ?: 'scale ' | 'default' | null ;
412
+ service_tier ?: 'auto ' | 'default' | 'flex ' | null ;
381
413
382
414
/**
383
415
* This fingerprint represents the backend configuration that the model runs with.
@@ -1114,7 +1146,7 @@ export interface ChatCompletionCreateParamsBase {
1114
1146
messages : Array < ChatCompletionMessageParam > ;
1115
1147
1116
1148
/**
1117
- * Model ID used to generate the response, like `gpt-4o` or `o1 `. OpenAI offers a
1149
+ * Model ID used to generate the response, like `gpt-4o` or `o3 `. OpenAI offers a
1118
1150
* wide range of models with different capabilities, performance characteristics,
1119
1151
* and price points. Refer to the
1120
1152
* [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -1194,7 +1226,7 @@ export interface ChatCompletionCreateParamsBase {
1194
1226
*
1195
1227
* This value is now deprecated in favor of `max_completion_tokens`, and is not
1196
1228
* compatible with
1197
- * [o1 series models](https://platform.openai.com/docs/guides/reasoning).
1229
+ * [o- series models](https://platform.openai.com/docs/guides/reasoning).
1198
1230
*/
1199
1231
max_tokens ?: number | null ;
1200
1232
@@ -1296,14 +1328,19 @@ export interface ChatCompletionCreateParamsBase {
1296
1328
* latency guarentee.
1297
1329
* - If set to 'default', the request will be processed using the default service
1298
1330
* tier with a lower uptime SLA and no latency guarentee.
1331
+ * - If set to 'flex', the request will be processed with the Flex Processing
1332
+ * service tier.
1333
+ * [Learn more](https://platform.openai.com/docs/guides/flex-processing).
1299
1334
* - When not set, the default behavior is 'auto'.
1300
1335
*
1301
1336
* When this parameter is set, the response body will include the `service_tier`
1302
1337
* utilized.
1303
1338
*/
1304
- service_tier ?: 'auto' | 'default' | null ;
1339
+ service_tier ?: 'auto' | 'default' | 'flex' | null ;
1305
1340
1306
1341
/**
1342
+ * Not supported with latest reasoning models `o3` and `o4-mini`.
1343
+ *
1307
1344
* Up to 4 sequences where the API will stop generating further tokens. The
1308
1345
* returned text will not contain the stop sequence.
1309
1346
*/
0 commit comments