vscode/extensions/copilot/src/platform/endpoint/node/routerDecisionFetcher.ts at 008b8b6de782bfc839d6a7c75bc9791d6d4528ed · microsoft/vscode · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
/*---------------------------------------------------------------------------------------------
 *  Copyright (c) Microsoft Corporation. All rights reserved.
 *  Licensed under the MIT License. See License.txt in the project root for license information.
 *--------------------------------------------------------------------------------------------*/

import { RequestType } from '@vscode/copilot-api';
import { Codicon } from '../../../util/vs/base/common/codicons';
import { IAuthenticationService } from '../../authentication/common/authentication';
import { ILogService } from '../../log/common/logService';
import { Response } from '../../networking/common/fetcherService';
import { IRequestLogger, LoggedRequestKind } from '../../requestLogger/node/requestLogger';
import { ITelemetryService } from '../../telemetry/common/telemetry';
import { ICAPIClientService } from '../common/capiClient';

export interface RouterDecisionResponse {
	predicted_label: 'needs_reasoning' | 'no_reasoning' | 'fallback';
	confidence: number;
	latency_ms: number;
	candidate_models: string[];
	scores: {
		needs_reasoning: number;
		no_reasoning: number;
	};
	sticky_override?: boolean;
	routing_method?: string;
	fallback?: boolean;
	fallback_reason?: string;
	hydra_scores?: Record<string, number>;
	chosen_model?: string;
	chosen_shortfall?: number;
}

export interface RoutingContextSignals {
	turn_number?: number;
	session_id?: string;
	previous_model?: string;
	reference_count?: number;
	prompt_char_count?: number;
}

/**
 * Fetches routing decisions from a classification API to determine which model should handle a query.
 *
 * This class sends queries along with available models to a router API endpoint, which uses reasoning
 * classification to select the most appropriate model based on the query's requirements.
 */
export class RouterDecisionFetcher {
	constructor(
		private readonly _capiClientService: ICAPIClientService,
		private readonly _authService: IAuthenticationService,
		private readonly _logService: ILogService,
		private readonly _telemetryService: ITelemetryService,
		private readonly _requestLogger: IRequestLogger,
	) {
	}

	async getRouterDecision(query: string, autoModeToken: string, availableModels: string[], stickyThreshold?: number, contextSignals?: RoutingContextSignals, conversationId?: string, vscodeRequestId?: string, routingMethod?: string): Promise<RouterDecisionResponse> {
		const startTime = Date.now();
		const requestBody: Record<string, unknown> = { prompt: query, available_models: availableModels, ...contextSignals };
		if (stickyThreshold !== undefined) {
			requestBody.sticky_threshold = stickyThreshold;
		}
		if (routingMethod) {
			requestBody.routing_method = routingMethod;
		}
		const copilotToken = (await this._authService.getCopilotToken()).token;
		const abortController = new AbortController();
		const timeout = setTimeout(() => abortController.abort(), 1000);
		let response: Response;
		try {
			response = await this._capiClientService.makeRequest<Response>({
				method: 'POST',
				headers: {
					'Authorization': `Bearer ${copilotToken}`,
					'Copilot-Session-Token': autoModeToken,
				},
				body: JSON.stringify(requestBody),
				signal: abortController.signal,
			}, { type: RequestType.ModelRouter });
		} finally {
			clearTimeout(timeout);
		}

		if (!response.ok) {
			throw new Error(`Router decision request failed with status ${response.status}: ${response.statusText}`);
		}

		const text = await response.text();
		const result: RouterDecisionResponse = JSON.parse(text);
		const e2eLatencyMs = Date.now() - startTime;
		this._logService.trace(`[RouterDecisionFetcher] Prediction: ${result.predicted_label}, (confidence: ${(result.confidence * 100).toFixed(1)}%, scores: needs_reasoning=${(result.scores.needs_reasoning * 100).toFixed(1)}%, no_reasoning=${(result.scores.no_reasoning * 100).toFixed(1)}%) (latency_ms: ${result.latency_ms}, e2e_latency_ms: ${e2eLatencyMs}, candidate models: ${result.candidate_models.join(', ')}, sticky_override: ${result.sticky_override ?? false}, routing_method: ${result.routing_method ?? 'n/a'}, fallback: ${result.fallback ?? false})`);

		this._requestLogger.addEntry({
			type: LoggedRequestKind.MarkdownContentRequest,
			debugName: `Auto Mode Router`,
			startTimeMs: startTime,
			icon: Codicon.lightbulbSparkle,
			markdownContent: [
				`# Auto Mode Router Decision`,
				`## Result`,
				`- **Predicted Label**: ${result.predicted_label}`,
				`- **Confidence**: ${(result.confidence * 100).toFixed(1)}%`,
				`- **Sticky Override**: ${result.sticky_override ?? false}`,
				`## Scores`,
				`- **Needs Reasoning**: ${(result.scores.needs_reasoning * 100).toFixed(1)}%`,
				`- **No Reasoning**: ${(result.scores.no_reasoning * 100).toFixed(1)}%`,
				`## Latency`,
				`- **Router Latency**: ${result.latency_ms}ms`,
				`- **E2E Latency**: ${e2eLatencyMs}ms`,
				`## Candidate Models`,
				...result.candidate_models.map(m => `- ${m}`),
				`## Query`,
				query,
			].join('\n'),
		});

		/* __GDPR__
			"automode.routerDecision" : {
				"owner": "lramos15",
				"comment": "Reports the routing decision made by the auto mode router API",
				"conversationId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The conversation ID in which the routing decision was made." },
				"vscodeRequestId": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The VS Code chat request id in which the routing decision was made." },
				"predictedLabel": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The predicted classification label (needs_reasoning, no_reasoning, or fallback)" },
				"routingMethod": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The routing method used for this request (empty=server default, binary, hydra). Identifies the A/B/C experiment path." },
				"fallback": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "Whether the router signaled a fallback to default automod selection." },
				"fallbackReason": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "comment": "The reason provided by the server when fallback is true." },
				"confidence": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "comment": "The confidence score of the routing decision" },
				"latencyMs": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "The latency of the router API call in milliseconds" },
				"e2eLatencyMs": { "classification": "SystemMetaData", "purpose": "PerformanceAndHealth", "isMeasurement": true, "comment": "The end-to-end latency of the router request in milliseconds, including network overhead" }
			}
		*/
		this._telemetryService.sendMSFTTelemetryEvent('automode.routerDecision',
			{
				conversationId: conversationId ?? '',
				vscodeRequestId: vscodeRequestId ?? '',
				predictedLabel: result.predicted_label,
				routingMethod: result.routing_method ?? '',
				fallback: String(result.fallback ?? false),
				fallbackReason: result.fallback_reason ?? '',
			},
			{
				confidence: result.confidence,
				latencyMs: result.latency_ms,
				e2eLatencyMs: e2eLatencyMs,
			}
		);
		return result;
	}
}