From 3d2702886a540d67faf35f4224f7df5c82358d57 Mon Sep 17 00:00:00 2001 From: Logan Ramos Date: Fri, 17 Oct 2025 13:23:07 -0400 Subject: [PATCH 1/3] Initial cache work --- .../endpoint/common/automodeService.ts | 173 +++++++++++++++--- 1 file changed, 147 insertions(+), 26 deletions(-) diff --git a/src/platform/endpoint/common/automodeService.ts b/src/platform/endpoint/common/automodeService.ts index e3f4682ca..f9bdecc17 100644 --- a/src/platform/endpoint/common/automodeService.ts +++ b/src/platform/endpoint/common/automodeService.ts @@ -23,6 +23,25 @@ interface AutoModeAPIResponse { session_token: string; } +/** + * Represents a cached auto mode token and the endpoint it maps to. + */ +interface CachedAutoToken { + readonly endpoint: IChatEndpoint; + readonly expiration: number; + readonly sessionToken: string; +} + +/** + * Holds the active and standby tokens for a conversation. + */ +interface ConversationCacheEntry { + active?: CachedAutoToken; + standby?: CachedAutoToken; +} + +const TOKEN_REFRESH_BUFFER_MS = 5 * 60 * 1000; + export const IAutomodeService = createServiceIdentifier('IAutomodeService'); export interface IAutomodeService { @@ -33,8 +52,9 @@ export interface IAutomodeService { export class AutomodeService extends Disposable implements IAutomodeService { readonly _serviceBrand: undefined; - private readonly _autoModelCache: Map = new Map(); - private readonly _taskSingler = new TaskSingler(); + private readonly _autoModelCache: Map = new Map(); + private _reserveToken: CachedAutoToken | undefined; + private readonly _taskSingler = new TaskSingler(); constructor( @@ -46,27 +66,114 @@ export class AutomodeService extends Disposable implements IAutomodeService { super(); this._register(this._authService.onDidAuthenticationChange(() => { this._autoModelCache.clear(); + this._reserveToken = undefined; })); this._serviceBrand = undefined; } - private async _updateAutoEndpointCache(chatRequest: ChatRequest | undefined, knownEndpoints: IChatEndpoint[]): Promise { - const startTime = Date.now(); + /** + * Resolve an auto mode endpoint using a double-buffer strategy and a global reserve token. + */ + async resolveAutoModeEndpoint(chatRequest: ChatRequest | undefined, knownEndpoints: IChatEndpoint[]): Promise { + if (!knownEndpoints.length) { + throw new Error('No auto mode endpoints provided.'); + } + const conversationId = getConversationId(chatRequest); - const cacheEntry = this._autoModelCache.get(conversationId); - const existingToken = cacheEntry?.autoModeToken; - const isExpired = cacheEntry && (cacheEntry.expiration <= Date.now()); + const entry = this._autoModelCache.get(conversationId) ?? {}; + if (!this._autoModelCache.has(conversationId)) { + this._autoModelCache.set(conversationId, entry); + } + + this._pruneExpiredTokens(entry); + if (!entry.active && entry.standby) { + entry.active = entry.standby; + entry.standby = undefined; + } + + if (!entry.active) { + entry.active = await this._acquireActiveToken(conversationId, entry, knownEndpoints); + } + + if (!entry.standby || !this._isTokenValid(entry.standby) || this._isExpiringSoon(entry.standby) || this._isExpiringSoon(entry.active)) { + this._refreshStandbyInBackground(conversationId, entry, knownEndpoints); + } + + this._ensureReserveRefill(knownEndpoints); + return entry.active.endpoint; + } + + /** + * Acquire or refresh the reserve token so that a future conversation can respond instantly. + */ + private _ensureReserveRefill(knownEndpoints: IChatEndpoint[]): void { + if (this._isTokenValid(this._reserveToken)) { + return; + } + + void this._taskSingler.getOrCreate('reserve', () => this._fetchToken(undefined, knownEndpoints)) + .then(token => { + this._reserveToken = token; + }) + .catch(err => { + this._logService.error(`Failed to refresh reserve auto mode token: ${err instanceof Error ? err.message : String(err)}`); + }); + } + + /** + * Acquire the active token for a conversation, promoting the reserve if available. + */ + private async _acquireActiveToken(conversationId: string, entry: ConversationCacheEntry, knownEndpoints: IChatEndpoint[]): Promise { + if (this._isTokenValid(this._reserveToken)) { + const token = this._reserveToken; + this._reserveToken = undefined; + return token; + } + + const sessionHint = entry.standby?.sessionToken ?? entry.active?.sessionToken; + return this._taskSingler.getOrCreate(`active:${conversationId}`, () => this._fetchToken(sessionHint, knownEndpoints)); + } + + /** + * Start a background refresh to populate or update the standby token. + */ + private _refreshStandbyInBackground(conversationId: string, entrySnapshot: ConversationCacheEntry, knownEndpoints: IChatEndpoint[]): void { + const sessionHint = entrySnapshot.standby?.sessionToken ?? entrySnapshot.active?.sessionToken; + void this._taskSingler.getOrCreate(`standby:${conversationId}`, () => this._fetchToken(sessionHint, knownEndpoints)) + .then(token => { + const entry = this._autoModelCache.get(conversationId); + if (!entry) { + return; + } + if (entry.active && entry.active.sessionToken === token.sessionToken) { + return; + } + entry.standby = token; + }) + .catch(err => { + this._logService.error(`Failed to refresh standby auto mode token for ${conversationId}: ${err instanceof Error ? err.message : String(err)}`); + }); + } + + /** + * Fetch a new token from the auto mode service. + */ + private async _fetchToken(sessionToken: string | undefined, knownEndpoints: IChatEndpoint[]): Promise { + const startTime = Date.now(); + // Add 3s delay to test slow latency + await new Promise(resolve => setTimeout(resolve, 3000)); const authToken = (await this._authService.getCopilotToken()).token; const headers: Record = { 'Content-Type': 'application/json', 'Authorization': `Bearer ${authToken}` }; - if (existingToken && !isExpired) { - headers['Copilot-Session-Token'] = existingToken; + if (sessionToken) { + headers['Copilot-Session-Token'] = sessionToken; } + const response = await this._capiClientService.makeRequest({ json: { - "auto_mode": { "model_hints": ["auto"] }, + 'auto_mode': { 'model_hints': ['auto'] } }, headers, method: 'POST' @@ -74,27 +181,41 @@ export class AutomodeService extends Disposable implements IAutomodeService { const data: AutoModeAPIResponse = await response.json() as AutoModeAPIResponse; const selectedModel = knownEndpoints.find(e => e.model === data.selected_model) || knownEndpoints[0]; const autoEndpoint = new AutoChatEndpoint(selectedModel, this._chatMLFetcher, data.session_token, data.discounted_costs?.[selectedModel.model] || 0); - this._autoModelCache.set(conversationId, { + this._logService.info(`Fetched auto model in ${Date.now() - startTime}ms.`); + return { endpoint: autoEndpoint, expiration: data.expires_at * 1000, - autoModeToken: data.session_token, - lastRequestId: chatRequest?.id - }); - this._logService.info(`Fetched auto model in ${Date.now() - startTime}ms.`); - return autoEndpoint; + sessionToken: data.session_token + }; } - async resolveAutoModeEndpoint(chatRequest: ChatRequest | undefined, knownEndpoints: IChatEndpoint[]): Promise { - const cacheEntry = this._autoModelCache.get(getConversationId(chatRequest)); - const expiringSoon = cacheEntry && (cacheEntry.expiration - Date.now() < 5 * 60 * 1000); - const isExpired = cacheEntry && (cacheEntry.expiration < Date.now()); - if (cacheEntry && !expiringSoon) { // Not expiring soon -> Return cached - return cacheEntry.endpoint; - } else if (cacheEntry && expiringSoon && !isExpired && chatRequest?.id === cacheEntry.lastRequestId) { // Expiring soon but the request is the same, so keep model sticky - return cacheEntry.endpoint; - } else { // Either no cache, it's expiring soon and a new request, or it has expired - return this._taskSingler.getOrCreate(getConversationId(chatRequest), () => this._updateAutoEndpointCache(chatRequest, knownEndpoints)); + /** + * Remove expired tokens so they are not considered during promotion. + */ + private _pruneExpiredTokens(entry: ConversationCacheEntry): void { + if (entry.active && !this._isTokenValid(entry.active)) { + entry.active = undefined; + } + if (entry.standby && !this._isTokenValid(entry.standby)) { + entry.standby = undefined; + } + } + + /** + * Determine whether a token is still valid. + */ + private _isTokenValid(token: CachedAutoToken | undefined): token is CachedAutoToken { + return !!token && token.expiration > Date.now(); + } + + /** + * Determine whether a token should be refreshed soon. + */ + private _isExpiringSoon(token: CachedAutoToken | undefined): boolean { + if (!token) { + return false; } + return token.expiration - Date.now() <= TOKEN_REFRESH_BUFFER_MS; } } From 9992e5139f8664f017b70ce2608c410e455704be Mon Sep 17 00:00:00 2001 From: Logan Ramos Date: Fri, 17 Oct 2025 13:42:00 -0400 Subject: [PATCH 2/3] Add some logging --- src/platform/endpoint/common/automodeService.ts | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/platform/endpoint/common/automodeService.ts b/src/platform/endpoint/common/automodeService.ts index f9bdecc17..6d85f7424 100644 --- a/src/platform/endpoint/common/automodeService.ts +++ b/src/platform/endpoint/common/automodeService.ts @@ -40,8 +40,6 @@ interface ConversationCacheEntry { standby?: CachedAutoToken; } -const TOKEN_REFRESH_BUFFER_MS = 5 * 60 * 1000; - export const IAutomodeService = createServiceIdentifier('IAutomodeService'); export interface IAutomodeService { @@ -111,7 +109,7 @@ export class AutomodeService extends Disposable implements IAutomodeService { return; } - void this._taskSingler.getOrCreate('reserve', () => this._fetchToken(undefined, knownEndpoints)) + void this._taskSingler.getOrCreate('reserve', () => this._fetchToken('reserve', undefined, knownEndpoints)) .then(token => { this._reserveToken = token; }) @@ -131,7 +129,7 @@ export class AutomodeService extends Disposable implements IAutomodeService { } const sessionHint = entry.standby?.sessionToken ?? entry.active?.sessionToken; - return this._taskSingler.getOrCreate(`active:${conversationId}`, () => this._fetchToken(sessionHint, knownEndpoints)); + return this._taskSingler.getOrCreate(`active:${conversationId}`, () => this._fetchToken('active', sessionHint, knownEndpoints)); } /** @@ -139,7 +137,7 @@ export class AutomodeService extends Disposable implements IAutomodeService { */ private _refreshStandbyInBackground(conversationId: string, entrySnapshot: ConversationCacheEntry, knownEndpoints: IChatEndpoint[]): void { const sessionHint = entrySnapshot.standby?.sessionToken ?? entrySnapshot.active?.sessionToken; - void this._taskSingler.getOrCreate(`standby:${conversationId}`, () => this._fetchToken(sessionHint, knownEndpoints)) + void this._taskSingler.getOrCreate(`standby:${conversationId}`, () => this._fetchToken('standby', sessionHint, knownEndpoints)) .then(token => { const entry = this._autoModelCache.get(conversationId); if (!entry) { @@ -158,7 +156,7 @@ export class AutomodeService extends Disposable implements IAutomodeService { /** * Fetch a new token from the auto mode service. */ - private async _fetchToken(sessionToken: string | undefined, knownEndpoints: IChatEndpoint[]): Promise { + private async _fetchToken(debugName: string, sessionToken: string | undefined, knownEndpoints: IChatEndpoint[]): Promise { const startTime = Date.now(); // Add 3s delay to test slow latency await new Promise(resolve => setTimeout(resolve, 3000)); @@ -181,7 +179,7 @@ export class AutomodeService extends Disposable implements IAutomodeService { const data: AutoModeAPIResponse = await response.json() as AutoModeAPIResponse; const selectedModel = knownEndpoints.find(e => e.model === data.selected_model) || knownEndpoints[0]; const autoEndpoint = new AutoChatEndpoint(selectedModel, this._chatMLFetcher, data.session_token, data.discounted_costs?.[selectedModel.model] || 0); - this._logService.info(`Fetched auto model in ${Date.now() - startTime}ms.`); + this._logService.trace(`Fetched auto model for ${debugName} in ${Date.now() - startTime}ms.`); return { endpoint: autoEndpoint, expiration: data.expires_at * 1000, @@ -215,7 +213,7 @@ export class AutomodeService extends Disposable implements IAutomodeService { if (!token) { return false; } - return token.expiration - Date.now() <= TOKEN_REFRESH_BUFFER_MS; + return token.expiration - Date.now() <= 5 * 60 * 1000; } } From a738f81d0ff8605eb960cb1a6082f493b4b906ee Mon Sep 17 00:00:00 2001 From: Logan Ramos Date: Fri, 17 Oct 2025 14:43:32 -0400 Subject: [PATCH 3/3] remove fake timeout --- src/platform/endpoint/common/automodeService.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/platform/endpoint/common/automodeService.ts b/src/platform/endpoint/common/automodeService.ts index 6d85f7424..a48381397 100644 --- a/src/platform/endpoint/common/automodeService.ts +++ b/src/platform/endpoint/common/automodeService.ts @@ -158,8 +158,7 @@ export class AutomodeService extends Disposable implements IAutomodeService { */ private async _fetchToken(debugName: string, sessionToken: string | undefined, knownEndpoints: IChatEndpoint[]): Promise { const startTime = Date.now(); - // Add 3s delay to test slow latency - await new Promise(resolve => setTimeout(resolve, 3000)); + const authToken = (await this._authService.getCopilotToken()).token; const headers: Record = { 'Content-Type': 'application/json',