Skip to content

Commit a0a9af3

Browse files
aashnaAashna Garg
andauthored
Add sticky_threshold and sticky_override to router decision API (#4359)
Support threshold-based sticky routing to prevent mid-conversation model switching on low-confidence predictions. Changes: - Add sticky_override bool to RouterDecisionResponse interface and validator - Add optional sticky_threshold parameter to getRouterDecision() - Send sticky_threshold in request body when provided - Log sticky_override in trace output and request logger markdown - AutomodeService logs sticky override events for observability Co-authored-by: Aashna Garg <aashnagarg@microsoft.com>
1 parent 849a149 commit a0a9af3

File tree

3 files changed

+20
-8
lines changed

3 files changed

+20
-8
lines changed

src/platform/endpoint/node/automodeService.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,9 @@ export class AutomodeService extends Disposable implements IAutomodeService {
228228
selectedModel ??= knownEndpoints.find(e => e.model === result.chosen_model);
229229
if (selectedModel) {
230230
lastRoutedPrompt = prompt;
231+
if (result.sticky_override) {
232+
this._logService.trace(`[AutomodeService] Sticky routing override: confidence=${(result.confidence * 100).toFixed(1)}%, label=${result.predicted_label}, router_model=${result.chosen_model}, actual_model=${selectedModel.model}`);
233+
}
231234
}
232235
} catch (e) {
233236
this._logService.error(`Failed to get routed model for conversation ${conversationId}:`, (e as Error).message);

src/platform/endpoint/node/routerDecisionFetcher.ts

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import { RequestType } from '@vscode/copilot-api';
77
import { Codicon } from '../../../util/vs/base/common/codicons';
88
import { IAuthenticationService } from '../../authentication/common/authentication';
9-
import { IValidator, vArray, vEnum, vNumber, vObj, vRequired, vString } from '../../configuration/common/validator';
9+
import { IValidator, vArray, vBoolean, vEnum, vNumber, vObj, vRequired, vString } from '../../configuration/common/validator';
1010
import { ILogService } from '../../log/common/logService';
1111
import { Response } from '../../networking/common/fetcherService';
1212
import { IRequestLogger, LoggedRequestKind } from '../../requestLogger/node/requestLogger';
@@ -23,6 +23,7 @@ export interface RouterDecisionResponse {
2323
needs_reasoning: number;
2424
no_reasoning: number;
2525
};
26+
sticky_override?: boolean;
2627
}
2728

2829
const routerDecisionResponseValidator: IValidator<RouterDecisionResponse> = vObj({
@@ -34,7 +35,8 @@ const routerDecisionResponseValidator: IValidator<RouterDecisionResponse> = vObj
3435
scores: vRequired(vObj({
3536
needs_reasoning: vRequired(vNumber()),
3637
no_reasoning: vRequired(vNumber())
37-
}))
38+
})),
39+
sticky_override: vBoolean()
3840
});
3941

4042
/**
@@ -53,15 +55,19 @@ export class RouterDecisionFetcher {
5355
) {
5456
}
5557

56-
async getRouterDecision(query: string, autoModeToken: string, availableModels: string[]): Promise<RouterDecisionResponse> {
58+
async getRouterDecision(query: string, autoModeToken: string, availableModels: string[], stickyThreshold?: number): Promise<RouterDecisionResponse> {
5759
const startTime = Date.now();
60+
const requestBody: Record<string, unknown> = { prompt: query, available_models: availableModels };
61+
if (stickyThreshold !== undefined) {
62+
requestBody.sticky_threshold = stickyThreshold;
63+
}
5864
const response = await this._capiClientService.makeRequest<Response>({
5965
method: 'POST',
6066
headers: {
6167
'Authorization': `Bearer ${(await this._authService.getCopilotToken()).token}`,
6268
'Copilot-Session-Token': autoModeToken,
6369
},
64-
body: JSON.stringify({ prompt: query, available_models: availableModels })
70+
body: JSON.stringify(requestBody)
6571
}, { type: RequestType.ModelRouter });
6672

6773
if (!response.ok) {
@@ -74,7 +80,7 @@ export class RouterDecisionFetcher {
7480
throw new Error(`Invalid router decision response: ${validationError.message}`);
7581
}
7682
const e2eLatencyMs = Date.now() - startTime;
77-
this._logService.trace(`[RouterDecisionFetcher] Prediction: ${result.predicted_label}, model: ${result.chosen_model} (confidence: ${(result.confidence * 100).toFixed(1)}%, scores: needs_reasoning=${(result.scores.needs_reasoning * 100).toFixed(1)}%, no_reasoning=${(result.scores.no_reasoning * 100).toFixed(1)}%) (latency_ms: ${result.latency_ms}, e2e_latency_ms: ${e2eLatencyMs}, candidate models: ${result.candidate_models.join(', ')})`);
83+
this._logService.trace(`[RouterDecisionFetcher] Prediction: ${result.predicted_label}, model: ${result.chosen_model} (confidence: ${(result.confidence * 100).toFixed(1)}%, scores: needs_reasoning=${(result.scores.needs_reasoning * 100).toFixed(1)}%, no_reasoning=${(result.scores.no_reasoning * 100).toFixed(1)}%) (latency_ms: ${result.latency_ms}, e2e_latency_ms: ${e2eLatencyMs}, candidate models: ${result.candidate_models.join(', ')}, sticky_override: ${result.sticky_override ?? false})`);
7884

7985
this._requestLogger.addEntry({
8086
type: LoggedRequestKind.MarkdownContentRequest,
@@ -87,7 +93,8 @@ export class RouterDecisionFetcher {
8793
`- **Predicted Label**: ${result.predicted_label}`,
8894
`- **Chosen Model**: ${result.chosen_model}`,
8995
`- **Confidence**: ${(result.confidence * 100).toFixed(1)}%`,
90-
`## Scores`,
96+
`- **Sticky Override**: ${result.sticky_override ?? false}`,
97+
`## Scores`,
9198
`- **Needs Reasoning**: ${(result.scores.needs_reasoning * 100).toFixed(1)}%`,
9299
`- **No Reasoning**: ${(result.scores.no_reasoning * 100).toFixed(1)}%`,
93100
`## Latency`,

src/platform/endpoint/node/test/automodeService.spec.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,8 @@ describe('AutomodeService', () => {
160160
latency_ms: 50,
161161
chosen_model: 'gpt-4o',
162162
candidate_models: ['gpt-4o', 'gpt-4o-mini'],
163-
scores: { needs_reasoning: 0.85, no_reasoning: 0.15 }
163+
scores: { needs_reasoning: 0.85, no_reasoning: 0.15 },
164+
sticky_override: false
164165
}))
165166
});
166167
}
@@ -367,7 +368,8 @@ describe('AutomodeService', () => {
367368
latency_ms: 30,
368369
chosen_model: routerResult.chosen_model,
369370
candidate_models: routerResult.candidate_models,
370-
scores: { needs_reasoning: 0.9, no_reasoning: 0.1 }
371+
scores: { needs_reasoning: 0.9, no_reasoning: 0.1 },
372+
sticky_override: false
371373
}))
372374
});
373375
}

0 commit comments

Comments
 (0)