Skip to content

Commit a0d0000

Browse files
feat(api): add o3 and o4-mini model IDs
1 parent bd50165 commit a0d0000

File tree

6 files changed

+112
-19
lines changed

6 files changed

+112
-19
lines changed

.stats.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
configured_endpoints: 97
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-a555f81249cb084f463dcefa4aba069f9341fdaf3dd6ac27d7f237fc90e8f488.yml
3-
openapi_spec_hash: 8e590296cd1a54b9508510b0c7a2c45a
4-
config_hash: 5ea32de61ff42fcf5e66cff8d9e247ea
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-5633633cc38734869cf7d993f7b549bb8e4d10e0ec45381ec2cd91507cd8eb8f.yml
3+
openapi_spec_hash: c855121b2b2324b99499c9244c21d24d
4+
config_hash: d20837393b73efdb19cd08e04c1cc9a1

src/resources/chat/completions/completions.ts

+46-9
Original file line numberDiff line numberDiff line change
@@ -140,9 +140,25 @@ export interface ChatCompletion {
140140
object: 'chat.completion';
141141

142142
/**
143-
* The service tier used for processing the request.
143+
* Specifies the latency tier to use for processing the request. This parameter is
144+
* relevant for customers subscribed to the scale tier service:
145+
*
146+
* - If set to 'auto', and the Project is Scale tier enabled, the system will
147+
* utilize scale tier credits until they are exhausted.
148+
* - If set to 'auto', and the Project is not Scale tier enabled, the request will
149+
* be processed using the default service tier with a lower uptime SLA and no
150+
* latency guarentee.
151+
* - If set to 'default', the request will be processed using the default service
152+
* tier with a lower uptime SLA and no latency guarentee.
153+
* - If set to 'flex', the request will be processed with the Flex Processing
154+
* service tier.
155+
* [Learn more](https://platform.openai.com/docs/guides/flex-processing).
156+
* - When not set, the default behavior is 'auto'.
157+
*
158+
* When this parameter is set, the response body will include the `service_tier`
159+
* utilized.
144160
*/
145-
service_tier?: 'scale' | 'default' | null;
161+
service_tier?: 'auto' | 'default' | 'flex' | null;
146162

147163
/**
148164
* This fingerprint represents the backend configuration that the model runs with.
@@ -319,11 +335,11 @@ export interface ChatCompletionAudioParam {
319335
* Specifies the output audio format. Must be one of `wav`, `mp3`, `flac`, `opus`,
320336
* or `pcm16`.
321337
*/
322-
format: 'wav' | 'mp3' | 'flac' | 'opus' | 'pcm16';
338+
format: 'wav' | 'aac' | 'mp3' | 'flac' | 'opus' | 'pcm16';
323339

324340
/**
325341
* The voice the model uses to respond. Supported voices are `alloy`, `ash`,
326-
* `ballad`, `coral`, `echo`, `sage`, and `shimmer`.
342+
* `ballad`, `coral`, `echo`, `fable`, `nova`, `onyx`, `sage`, and `shimmer`.
327343
*/
328344
voice:
329345
| (string & {})
@@ -375,9 +391,25 @@ export interface ChatCompletionChunk {
375391
object: 'chat.completion.chunk';
376392

377393
/**
378-
* The service tier used for processing the request.
394+
* Specifies the latency tier to use for processing the request. This parameter is
395+
* relevant for customers subscribed to the scale tier service:
396+
*
397+
* - If set to 'auto', and the Project is Scale tier enabled, the system will
398+
* utilize scale tier credits until they are exhausted.
399+
* - If set to 'auto', and the Project is not Scale tier enabled, the request will
400+
* be processed using the default service tier with a lower uptime SLA and no
401+
* latency guarentee.
402+
* - If set to 'default', the request will be processed using the default service
403+
* tier with a lower uptime SLA and no latency guarentee.
404+
* - If set to 'flex', the request will be processed with the Flex Processing
405+
* service tier.
406+
* [Learn more](https://platform.openai.com/docs/guides/flex-processing).
407+
* - When not set, the default behavior is 'auto'.
408+
*
409+
* When this parameter is set, the response body will include the `service_tier`
410+
* utilized.
379411
*/
380-
service_tier?: 'scale' | 'default' | null;
412+
service_tier?: 'auto' | 'default' | 'flex' | null;
381413

382414
/**
383415
* This fingerprint represents the backend configuration that the model runs with.
@@ -1114,7 +1146,7 @@ export interface ChatCompletionCreateParamsBase {
11141146
messages: Array<ChatCompletionMessageParam>;
11151147

11161148
/**
1117-
* Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
1149+
* Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
11181150
* wide range of models with different capabilities, performance characteristics,
11191151
* and price points. Refer to the
11201152
* [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -1194,7 +1226,7 @@ export interface ChatCompletionCreateParamsBase {
11941226
*
11951227
* This value is now deprecated in favor of `max_completion_tokens`, and is not
11961228
* compatible with
1197-
* [o1 series models](https://platform.openai.com/docs/guides/reasoning).
1229+
* [o-series models](https://platform.openai.com/docs/guides/reasoning).
11981230
*/
11991231
max_tokens?: number | null;
12001232

@@ -1296,14 +1328,19 @@ export interface ChatCompletionCreateParamsBase {
12961328
* latency guarentee.
12971329
* - If set to 'default', the request will be processed using the default service
12981330
* tier with a lower uptime SLA and no latency guarentee.
1331+
* - If set to 'flex', the request will be processed with the Flex Processing
1332+
* service tier.
1333+
* [Learn more](https://platform.openai.com/docs/guides/flex-processing).
12991334
* - When not set, the default behavior is 'auto'.
13001335
*
13011336
* When this parameter is set, the response body will include the `service_tier`
13021337
* utilized.
13031338
*/
1304-
service_tier?: 'auto' | 'default' | null;
1339+
service_tier?: 'auto' | 'default' | 'flex' | null;
13051340

13061341
/**
1342+
* Not supported with latest reasoning models `o3` and `o4-mini`.
1343+
*
13071344
* Up to 4 sequences where the API will stop generating further tokens. The
13081345
* returned text will not contain the stop sequence.
13091346
*/

src/resources/completions.ts

+2
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,8 @@ export interface CompletionCreateParamsBase {
293293
seed?: number | null;
294294

295295
/**
296+
* Not supported with latest reasoning models `o3` and `o4-mini`.
297+
*
296298
* Up to 4 sequences where the API will stop generating further tokens. The
297299
* returned text will not contain the stop sequence.
298300
*/

src/resources/responses/responses.ts

+44-2
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ export interface Response {
321321
metadata: Shared.Metadata | null;
322322

323323
/**
324-
* Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
324+
* Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
325325
* wide range of models with different capabilities, performance characteristics,
326326
* and price points. Refer to the
327327
* [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -414,6 +414,27 @@ export interface Response {
414414
*/
415415
reasoning?: Shared.Reasoning | null;
416416

417+
/**
418+
* Specifies the latency tier to use for processing the request. This parameter is
419+
* relevant for customers subscribed to the scale tier service:
420+
*
421+
* - If set to 'auto', and the Project is Scale tier enabled, the system will
422+
* utilize scale tier credits until they are exhausted.
423+
* - If set to 'auto', and the Project is not Scale tier enabled, the request will
424+
* be processed using the default service tier with a lower uptime SLA and no
425+
* latency guarentee.
426+
* - If set to 'default', the request will be processed using the default service
427+
* tier with a lower uptime SLA and no latency guarentee.
428+
* - If set to 'flex', the request will be processed with the Flex Processing
429+
* service tier.
430+
* [Learn more](https://platform.openai.com/docs/guides/flex-processing).
431+
* - When not set, the default behavior is 'auto'.
432+
*
433+
* When this parameter is set, the response body will include the `service_tier`
434+
* utilized.
435+
*/
436+
service_tier?: 'auto' | 'default' | 'flex' | null;
437+
417438
/**
418439
* The status of the response generation. One of `completed`, `failed`,
419440
* `in_progress`, or `incomplete`.
@@ -2673,7 +2694,7 @@ export interface ResponseCreateParamsBase {
26732694
input: string | ResponseInput;
26742695

26752696
/**
2676-
* Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
2697+
* Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
26772698
* wide range of models with different capabilities, performance characteristics,
26782699
* and price points. Refer to the
26792700
* [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -2740,6 +2761,27 @@ export interface ResponseCreateParamsBase {
27402761
*/
27412762
reasoning?: Shared.Reasoning | null;
27422763

2764+
/**
2765+
* Specifies the latency tier to use for processing the request. This parameter is
2766+
* relevant for customers subscribed to the scale tier service:
2767+
*
2768+
* - If set to 'auto', and the Project is Scale tier enabled, the system will
2769+
* utilize scale tier credits until they are exhausted.
2770+
* - If set to 'auto', and the Project is not Scale tier enabled, the request will
2771+
* be processed using the default service tier with a lower uptime SLA and no
2772+
* latency guarentee.
2773+
* - If set to 'default', the request will be processed using the default service
2774+
* tier with a lower uptime SLA and no latency guarentee.
2775+
* - If set to 'flex', the request will be processed with the Flex Processing
2776+
* service tier.
2777+
* [Learn more](https://platform.openai.com/docs/guides/flex-processing).
2778+
* - When not set, the default behavior is 'auto'.
2779+
*
2780+
* When this parameter is set, the response body will include the `service_tier`
2781+
* utilized.
2782+
*/
2783+
service_tier?: 'auto' | 'default' | 'flex' | null;
2784+
27432785
/**
27442786
* Whether to store the generated model response for later retrieval via API.
27452787
*/

src/resources/shared.ts

+15-4
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ export type ChatModel =
1515
| 'gpt-4.1-2025-04-14'
1616
| 'gpt-4.1-mini-2025-04-14'
1717
| 'gpt-4.1-nano-2025-04-14'
18+
| 'o4-mini'
19+
| 'o4-mini-2025-04-16'
20+
| 'o3'
21+
| 'o3-2025-04-16'
1822
| 'o3-mini'
1923
| 'o3-mini-2025-01-31'
2024
| 'o1'
@@ -187,13 +191,20 @@ export interface Reasoning {
187191
effort?: ReasoningEffort | null;
188192

189193
/**
190-
* **computer_use_preview only**
194+
* @deprecated **Deprecated:** use `summary` instead.
191195
*
192196
* A summary of the reasoning performed by the model. This can be useful for
193-
* debugging and understanding the model's reasoning process. One of `concise` or
194-
* `detailed`.
197+
* debugging and understanding the model's reasoning process. One of `auto`,
198+
* `concise`, or `detailed`.
195199
*/
196-
generate_summary?: 'concise' | 'detailed' | null;
200+
generate_summary?: 'auto' | 'concise' | 'detailed' | null;
201+
202+
/**
203+
* A summary of the reasoning performed by the model. This can be useful for
204+
* debugging and understanding the model's reasoning process. One of `auto`,
205+
* `concise`, or `detailed`.
206+
*/
207+
summary?: 'auto' | 'concise' | 'detailed' | null;
197208
}
198209

199210
/**

tests/api-resources/responses/responses.test.ts

+2-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ describe('resource responses', () => {
3030
metadata: { foo: 'string' },
3131
parallel_tool_calls: true,
3232
previous_response_id: 'previous_response_id',
33-
reasoning: { effort: 'low', generate_summary: 'concise' },
33+
reasoning: { effort: 'low', generate_summary: 'auto', summary: 'auto' },
34+
service_tier: 'auto',
3435
store: true,
3536
stream: false,
3637
temperature: 1,

0 commit comments

Comments
 (0)