Skip to content

Commit 1bdde0d

Browse files
authored
Merge 2dab7ea into 31261ca
2 parents 31261ca + 2dab7ea commit 1bdde0d

File tree

3 files changed

+51
-15
lines changed

3 files changed

+51
-15
lines changed

packages/vertexai/src/methods/chrome-adapter.test.ts

+6-2
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ async function toStringArray(
5353

5454
describe('ChromeAdapter', () => {
5555
describe('constructor', () => {
56-
it('sets image as expected input type by default', async () => {
56+
it('determines expected inputs by request inspection', async () => {
5757
const languageModelProvider = {
5858
availability: () => Promise.resolve(Availability.available)
5959
} as LanguageModel;
@@ -69,7 +69,11 @@ describe('ChromeAdapter', () => {
6969
contents: [
7070
{
7171
role: 'user',
72-
parts: [{ text: 'hi' }]
72+
parts: [
73+
{ text: 'hi' },
74+
// Triggers image as expected type.
75+
{ inlineData: { mimeType: 'image/asd', data: 'asd' } }
76+
]
7377
}
7478
]
7579
});

packages/vertexai/src/methods/chrome-adapter.ts

+43-11
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ import {
2727
Availability,
2828
LanguageModel,
2929
LanguageModelCreateOptions,
30-
LanguageModelMessageContent
30+
LanguageModelExpectedInput,
31+
LanguageModelMessageContent,
32+
LanguageModelMessageType
3133
} from '../types/language-model';
3234

3335
/**
@@ -44,9 +46,7 @@ export class ChromeAdapter {
4446
private languageModelProvider?: LanguageModel,
4547
private mode?: InferenceMode,
4648
private onDeviceParams: LanguageModelCreateOptions = {}
47-
) {
48-
this.addImageTypeAsExpectedInput();
49-
}
49+
) {}
5050

5151
/**
5252
* Checks if a given request can be made on-device.
@@ -68,8 +68,10 @@ export class ChromeAdapter {
6868
return false;
6969
}
7070

71+
const expectedInputs = ChromeAdapter.extractExpectedInputs(request);
72+
7173
// Triggers out-of-band download so model will eventually become available.
72-
const availability = await this.downloadIfAvailable();
74+
const availability = await this.downloadIfAvailable(expectedInputs);
7375

7476
if (this.mode === 'only_on_device') {
7577
return true;
@@ -129,6 +131,33 @@ export class ChromeAdapter {
129131
);
130132
}
131133

134+
/**
135+
* Maps
136+
* <a href="https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#blob">
137+
* Vertex's input mime types</a> to
138+
* <a href="https://github.com/webmachinelearning/prompt-api?tab=readme-ov-file#full-api-surface-in-web-idl">
139+
* Chrome's expected types</a>.
140+
*
141+
* <p>Chrome's API checks availability by type. It's tedious to specify the types in advance, so
142+
* this method infers the types.</p>
143+
*/
144+
private static extractExpectedInputs(
145+
request: GenerateContentRequest
146+
): LanguageModelExpectedInput[] {
147+
const inputSet = new Set<LanguageModelExpectedInput>();
148+
for (const content of request.contents) {
149+
for (const part of content.parts) {
150+
if (part.inlineData) {
151+
const type = part.inlineData.mimeType.split(
152+
'/'
153+
)[0] as LanguageModelMessageType;
154+
inputSet.add({ type });
155+
}
156+
}
157+
}
158+
return Array.from(inputSet);
159+
}
160+
132161
/**
133162
* Asserts inference for the given request can be performed by an on-device model.
134163
*/
@@ -164,12 +193,20 @@ export class ChromeAdapter {
164193
/**
165194
* Encapsulates logic to get availability and download a model if one is downloadable.
166195
*/
167-
private async downloadIfAvailable(): Promise<Availability | undefined> {
196+
private async downloadIfAvailable(
197+
expectedInputs: LanguageModelExpectedInput[]
198+
): Promise<Availability | undefined> {
199+
// Side-effect: updates construction-time params with request-time params.
200+
// This is required because params are referenced through multiple flows.
201+
Object.assign(this.onDeviceParams, { expectedInputs });
202+
168203
const availability = await this.languageModelProvider?.availability(
169204
this.onDeviceParams
170205
);
171206

172207
if (availability === Availability.downloadable) {
208+
// Side-effect: triggers out-of-band model download.
209+
// This is required because Chrome manages the model download.
173210
this.download();
174211
}
175212

@@ -252,11 +289,6 @@ export class ChromeAdapter {
252289
return newSession;
253290
}
254291

255-
private addImageTypeAsExpectedInput(): void {
256-
// Defaults to support image inputs for convenience.
257-
this.onDeviceParams.expectedInputs ??= [{ type: 'image' }];
258-
}
259-
260292
/**
261293
* Formats string returned by Chrome as a {@link Response} returned by Vertex.
262294
*/

packages/vertexai/src/types/language-model.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ export interface LanguageModelCreateOptions
5252
interface LanguageModelPromptOptions {
5353
signal?: AbortSignal;
5454
}
55-
interface LanguageModelExpectedInput {
55+
export interface LanguageModelExpectedInput {
5656
type: LanguageModelMessageType;
5757
languages?: string[];
5858
}
@@ -74,7 +74,7 @@ export interface LanguageModelMessageContent {
7474
content: LanguageModelMessageContentValue;
7575
}
7676
type LanguageModelMessageRole = 'system' | 'user' | 'assistant';
77-
type LanguageModelMessageType = 'text' | 'image' | 'audio';
77+
export type LanguageModelMessageType = 'text' | 'image' | 'audio';
7878
type LanguageModelMessageContentValue =
7979
| ImageBitmapSource
8080
| AudioBuffer

0 commit comments

Comments
 (0)