diff --git a/src/platform/configuration/common/configurationService.ts b/src/platform/configuration/common/configurationService.ts index bcb7c1aab..85a168fdf 100644 --- a/src/platform/configuration/common/configurationService.ts +++ b/src/platform/configuration/common/configurationService.ts @@ -828,6 +828,12 @@ export namespace ConfigKey { export const InstantApplyModelName = defineTeamInternalSetting('chat.advanced.instantApply.modelName', ConfigType.ExperimentBased, CHAT_MODEL.GPT4OPROXY); export const VerifyTextDocumentChanges = defineTeamInternalSetting('chat.advanced.inlineEdits.verifyTextDocumentChanges', ConfigType.ExperimentBased, false); export const UseAutoModeRouting = defineTeamInternalSetting('chat.advanced.useAutoModeRouter', ConfigType.ExperimentBased, false); + /** Controls which routing method the auto-intent-service uses per-request. + * '' (empty/default) = use server default. + * 'binary' = binary classifier v1 (path B). + * 'hydra' = HYDRA multi-head capability matching (path C). + * Used for A/B/C experiments: A=automod, B=binary, C=hydra. */ + export const AutoModeRoutingMethod = defineTeamInternalSetting('chat.advanced.autoModeRoutingMethod', ConfigType.ExperimentBased, '', undefined, undefined, { experimentName: 'copilotchat.autoModeRoutingMethod' }); /** Inline Completions */ export const InlineCompletionsDefaultDiagnosticsOptions = defineTeamInternalSetting('chat.advanced.inlineCompletions.defaultDiagnosticsOptionsString', ConfigType.ExperimentBased, undefined); diff --git a/src/platform/endpoint/node/automodeService.ts b/src/platform/endpoint/node/automodeService.ts index 830f91f3c..fb3810ff5 100644 --- a/src/platform/endpoint/node/automodeService.ts +++ b/src/platform/endpoint/node/automodeService.ts @@ -324,7 +324,13 @@ export class AutomodeService extends Disposable implements IAutomodeService { previous_model: entry?.endpoint?.model, turn_number: (entry?.turnCount ?? 0) + 1, }; - const result = await this._routerDecisionFetcher.getRouterDecision(prompt, token.session_token, token.available_models, undefined, contextSignals, chatRequest?.sessionId, chatRequest?.id); + const routingMethod = this._configurationService.getExperimentBasedConfig(ConfigKey.TeamInternal.AutoModeRoutingMethod, this._expService) || undefined; + const result = await this._routerDecisionFetcher.getRouterDecision(prompt, token.session_token, token.available_models, undefined, contextSignals, chatRequest?.sessionId, chatRequest?.id, routingMethod); + + if (result.fallback) { + this._logService.info(`[AutomodeService] Router signaled fallback: ${result.fallback_reason ?? 'unknown'}, routing_method=${result.routing_method ?? 'n/a'}`); + return { lastRoutedPrompt: prompt, fallbackReason: `routerFallback:${result.fallback_reason ?? 'unknown'}` }; + } if (!result.candidate_models.length) { return { lastRoutedPrompt: prompt, fallbackReason: 'emptyCandidateList' }; diff --git a/src/platform/endpoint/node/routerDecisionFetcher.ts b/src/platform/endpoint/node/routerDecisionFetcher.ts index 948d68366..b98754ee6 100644 --- a/src/platform/endpoint/node/routerDecisionFetcher.ts +++ b/src/platform/endpoint/node/routerDecisionFetcher.ts @@ -13,7 +13,7 @@ import { ITelemetryService } from '../../telemetry/common/telemetry'; import { ICAPIClientService } from '../common/capiClient'; export interface RouterDecisionResponse { - predicted_label: 'needs_reasoning' | 'no_reasoning'; + predicted_label: 'needs_reasoning' | 'no_reasoning' | 'fallback'; confidence: number; latency_ms: number; candidate_models: string[]; @@ -22,6 +22,12 @@ export interface RouterDecisionResponse { no_reasoning: number; }; sticky_override?: boolean; + routing_method?: string; + fallback?: boolean; + fallback_reason?: string; + hydra_scores?: Record; + chosen_model?: string; + chosen_shortfall?: number; } export interface RoutingContextSignals { @@ -48,7 +54,7 @@ export class RouterDecisionFetcher { ) { } - async getRouterDecision(query: string, autoModeToken: string, availableModels: string[], stickyThreshold?: number, contextSignals?: RoutingContextSignals, conversationId?: string, vscodeRequestId?: string): Promise { + async getRouterDecision(query: string, autoModeToken: string, availableModels: string[], stickyThreshold?: number, contextSignals?: RoutingContextSignals, conversationId?: string, vscodeRequestId?: string, routingMethod?: string): Promise { const startTime = Date.now(); const requestBody: Record = { prompt: query, available_models: availableModels, ...contextSignals }; if (stickyThreshold !== undefined) { @@ -79,7 +85,7 @@ export class RouterDecisionFetcher { const text = await response.text(); const result: RouterDecisionResponse = JSON.parse(text); const e2eLatencyMs = Date.now() - startTime; - this._logService.trace(`[RouterDecisionFetcher] Prediction: ${result.predicted_label}, (confidence: ${(result.confidence * 100).toFixed(1)}%, scores: needs_reasoning=${(result.scores.needs_reasoning * 100).toFixed(1)}%, no_reasoning=${(result.scores.no_reasoning * 100).toFixed(1)}%) (latency_ms: ${result.latency_ms}, e2e_latency_ms: ${e2eLatencyMs}, candidate models: ${result.candidate_models.join(', ')}, sticky_override: ${result.sticky_override ?? false})`); + this._logService.trace(`[RouterDecisionFetcher] Prediction: ${result.predicted_label}, (confidence: ${(result.confidence * 100).toFixed(1)}%, scores: needs_reasoning=${(result.scores.needs_reasoning * 100).toFixed(1)}%, no_reasoning=${(result.scores.no_reasoning * 100).toFixed(1)}%) (latency_ms: ${result.latency_ms}, e2e_latency_ms: ${e2eLatencyMs}, candidate models: ${result.candidate_models.join(', ')}, sticky_override: ${result.sticky_override ?? false}, routing_method: ${result.routing_method ?? 'n/a'}, fallback: ${result.fallback ?? false})`); this._requestLogger.addEntry({ type: LoggedRequestKind.MarkdownContentRequest,