@@ -27,7 +27,9 @@ import {
27
27
Availability ,
28
28
LanguageModel ,
29
29
LanguageModelCreateOptions ,
30
- LanguageModelMessageContent
30
+ LanguageModelExpectedInput ,
31
+ LanguageModelMessageContent ,
32
+ LanguageModelMessageType
31
33
} from '../types/language-model' ;
32
34
33
35
/**
@@ -44,9 +46,7 @@ export class ChromeAdapter {
44
46
private languageModelProvider ?: LanguageModel ,
45
47
private mode ?: InferenceMode ,
46
48
private onDeviceParams : LanguageModelCreateOptions = { }
47
- ) {
48
- this . addImageTypeAsExpectedInput ( ) ;
49
- }
49
+ ) { }
50
50
51
51
/**
52
52
* Checks if a given request can be made on-device.
@@ -68,8 +68,10 @@ export class ChromeAdapter {
68
68
return false ;
69
69
}
70
70
71
+ const expectedInputs = ChromeAdapter . extractExpectedInputs ( request ) ;
72
+
71
73
// Triggers out-of-band download so model will eventually become available.
72
- const availability = await this . downloadIfAvailable ( ) ;
74
+ const availability = await this . downloadIfAvailable ( expectedInputs ) ;
73
75
74
76
if ( this . mode === 'only_on_device' ) {
75
77
return true ;
@@ -129,6 +131,33 @@ export class ChromeAdapter {
129
131
) ;
130
132
}
131
133
134
+ /**
135
+ * Maps
136
+ * <a href="https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#blob">
137
+ * Vertex's input mime types</a> to
138
+ * <a href="https://github.com/webmachinelearning/prompt-api?tab=readme-ov-file#full-api-surface-in-web-idl">
139
+ * Chrome's expected types</a>.
140
+ *
141
+ * <p>Chrome's API checks availability by type. It's tedious to specify the types in advance, so
142
+ * this method infers the types.</p>
143
+ */
144
+ private static extractExpectedInputs (
145
+ request : GenerateContentRequest
146
+ ) : LanguageModelExpectedInput [ ] {
147
+ const inputSet = new Set < LanguageModelExpectedInput > ( ) ;
148
+ for ( const content of request . contents ) {
149
+ for ( const part of content . parts ) {
150
+ if ( part . inlineData ) {
151
+ const type = part . inlineData . mimeType . split (
152
+ '/'
153
+ ) [ 0 ] as LanguageModelMessageType ;
154
+ inputSet . add ( { type } ) ;
155
+ }
156
+ }
157
+ }
158
+ return Array . from ( inputSet ) ;
159
+ }
160
+
132
161
/**
133
162
* Asserts inference for the given request can be performed by an on-device model.
134
163
*/
@@ -164,12 +193,20 @@ export class ChromeAdapter {
164
193
/**
165
194
* Encapsulates logic to get availability and download a model if one is downloadable.
166
195
*/
167
- private async downloadIfAvailable ( ) : Promise < Availability | undefined > {
196
+ private async downloadIfAvailable (
197
+ expectedInputs : LanguageModelExpectedInput [ ]
198
+ ) : Promise < Availability | undefined > {
199
+ // Side-effect: updates construction-time params with request-time params.
200
+ // This is required because params are referenced through multiple flows.
201
+ Object . assign ( this . onDeviceParams , { expectedInputs } ) ;
202
+
168
203
const availability = await this . languageModelProvider ?. availability (
169
204
this . onDeviceParams
170
205
) ;
171
206
172
207
if ( availability === Availability . downloadable ) {
208
+ // Side-effect: triggers out-of-band model download.
209
+ // This is required because Chrome manages the model download.
173
210
this . download ( ) ;
174
211
}
175
212
@@ -252,11 +289,6 @@ export class ChromeAdapter {
252
289
return newSession ;
253
290
}
254
291
255
- private addImageTypeAsExpectedInput ( ) : void {
256
- // Defaults to support image inputs for convenience.
257
- this . onDeviceParams . expectedInputs ??= [ { type : 'image' } ] ;
258
- }
259
-
260
292
/**
261
293
* Formats string returned by Chrome as a {@link Response} returned by Vertex.
262
294
*/
0 commit comments