Skip to content

Commit 7edc35f

Browse files
committed
[TASK] Evaluate grade scores in smart routing middleware
1 parent 96e2a1d commit 7edc35f

3 files changed

Lines changed: 127 additions & 3 deletions

File tree

Classes/Domain/Repository/RequestLogRepository.php

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,10 +229,11 @@ public function getStatisticsByExtension(): array
229229
* Performance profile per model for a given request type.
230230
* Used by smart routing middleware.
231231
*
232-
* @return list<array{model_used: string, request_count: int, avg_cost: float, avg_duration_ms: int, success_rate: float, avg_tokens: int}>
232+
* @return list<array{model_used: string, request_count: int, avg_cost: float, avg_duration_ms: int, success_rate: float, avg_tokens: int, graded_count: int, avg_grade_score: float}>
233233
*/
234234
public function getModelPerformanceProfile(string $requestType = ''): array
235235
{
236+
$done = GradeStatus::Done->value;
236237
$qb = $this->getQueryBuilder();
237238
$qb->addSelectLiteral(
238239
'model_used',
@@ -241,6 +242,8 @@ public function getModelPerformanceProfile(string $requestType = ''): array
241242
'AVG(duration_ms) AS avg_duration_ms',
242243
'SUM(success) AS successful_requests',
243244
'AVG(total_tokens) AS avg_tokens',
245+
sprintf("SUM(CASE WHEN grade_status = '%s' THEN grade_score ELSE 0 END) AS grade_score_sum", $done),
246+
sprintf("SUM(CASE WHEN grade_status = '%s' THEN 1 ELSE 0 END) AS graded_count", $done),
244247
);
245248
if ($requestType !== '') {
246249
$qb->where($qb->expr()->eq('request_type', $qb->createNamedParameter($requestType)));
@@ -257,13 +260,16 @@ public function getModelPerformanceProfile(string $requestType = ''): array
257260
return array_map(static function (array $row): array {
258261
$count = (int)$row['request_count'];
259262
$successful = (int)$row['successful_requests'];
263+
$gradedCount = (int)$row['graded_count'];
260264
return [
261265
'model_used' => $row['model_used'],
262266
'request_count' => $count,
263267
'avg_cost' => round((float)$row['avg_cost'], 6),
264268
'avg_duration_ms' => (int)$row['avg_duration_ms'],
265269
'success_rate' => $count > 0 ? round($successful / $count * 100, 1) : 0,
266270
'avg_tokens' => (int)$row['avg_tokens'],
271+
'graded_count' => $gradedCount,
272+
'avg_grade_score' => $gradedCount > 0 ? round((float)$row['grade_score_sum'] / $gradedCount, 4) : 0.0,
267273
];
268274
}, $rows);
269275
}

Classes/Middleware/SmartRoutingMiddleware.php

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,19 @@ final class SmartRoutingMiddleware implements AiMiddlewareInterface
6363
*/
6464
private const MIN_SUCCESS_RATE = 90.0;
6565

66+
/**
67+
* Minimum number of graded requests before the grade gate is trusted.
68+
* Below this, grading is treated as "no signal" and the candidate is
69+
* judged on cost and success rate alone.
70+
*/
71+
private const MIN_GRADED_REQUESTS = 10;
72+
73+
/**
74+
* Minimum average grade (0.0–1.0) for a cheaper model to remain a
75+
* candidate. 0.65 is the "good" label boundary.
76+
*/
77+
private const MIN_GRADE_SCORE = 0.65;
78+
6679
public function __construct(
6780
private readonly RequestLogRepository $logRepository,
6881
private readonly ProviderResolver $providerResolver,
@@ -101,12 +114,16 @@ public function process(
101114
if ($classification['label'] === 'simple') {
102115
$cheaperResult = $this->findCheaperModel($request, $configuration);
103116
if ($cheaperResult !== null) {
117+
$gradeNote = $cheaperResult['graded_count'] > 0
118+
? sprintf('avg grade: %.2f over %d graded', $cheaperResult['avg_grade_score'], $cheaperResult['graded_count'])
119+
: 'ungraded';
104120
$this->logger->info(sprintf(
105-
'Smart routing: downgrading from "%s" to cheaper model "%s" for simple prompt (score: %.2f, reason: %s)',
121+
'Smart routing: downgrading from "%s" to cheaper model "%s" for simple prompt (score: %.2f, reason: %s, %s)',
106122
$configuration->model,
107123
$cheaperResult['configuration']->model,
108124
$classification['score'],
109125
$classification['reason'],
126+
$gradeNote,
110127
));
111128

112129
return $next->handle(
@@ -257,7 +274,7 @@ private function classifyComplexity(string $prompt): array
257274
* Queries historical performance data from the request log to find
258275
* models with lower cost but high success rates for the same request type.
259276
*
260-
* @return array{provider: AiProviderInterface, configuration: ProviderConfiguration}|null
277+
* @return array{provider: AiProviderInterface, configuration: ProviderConfiguration, avg_grade_score: float, graded_count: int}|null
261278
*/
262279
private function findCheaperModel(AiRequestInterface $request, ProviderConfiguration $currentConfig): ?array
263280
{
@@ -298,6 +315,13 @@ private function findCheaperModel(AiRequestInterface $request, ProviderConfigura
298315
if ($profile['success_rate'] < self::MIN_SUCCESS_RATE) {
299316
continue;
300317
}
318+
// Quality gate: veto a cheap, reliable model only when we have enough
319+
// graded samples to trust the signal. Too few grades = no signal, fall through.
320+
if ($profile['graded_count'] >= self::MIN_GRADED_REQUESTS
321+
&& $profile['avg_grade_score'] < self::MIN_GRADE_SCORE
322+
) {
323+
continue;
324+
}
301325
if ($profile['avg_cost'] >= $currentCost) {
302326
continue;
303327
}
@@ -328,6 +352,8 @@ private function findCheaperModel(AiRequestInterface $request, ProviderConfigura
328352
return [
329353
'provider' => $resolved->manifest->getInstance(),
330354
'configuration' => $resolved->configuration,
355+
'avg_grade_score' => $bestCandidate['avg_grade_score'],
356+
'graded_count' => $bestCandidate['graded_count'],
331357
];
332358
}
333359
}
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
/*
6+
* This file is part of TYPO3 CMS-based extension "aim" by b13.
7+
*
8+
* It is free software; you can redistribute it and/or modify it under
9+
* the terms of the GNU General Public License, either version 2
10+
* of the License, or any later version.
11+
*/
12+
13+
namespace B13\Aim\Tests\Functional\Domain\Repository;
14+
15+
use B13\Aim\Domain\Repository\RequestLogRepository;
16+
use B13\Aim\Grading\GradeLabel;
17+
use B13\Aim\Grading\GradeStatus;
18+
use PHPUnit\Framework\Attributes\Test;
19+
use TYPO3\TestingFramework\Core\Functional\FunctionalTestCase;
20+
21+
final class RequestLogRepositoryTest extends FunctionalTestCase
22+
{
23+
protected array $testExtensionsToLoad = [
24+
'b13/aim',
25+
];
26+
27+
#[Test]
28+
public function modelPerformanceProfileAggregatesGradesOverDoneRowsOnly(): void
29+
{
30+
$logRepo = $this->get(RequestLogRepository::class);
31+
32+
// Three graded "done" rows for cheap-model: scores 0.6, 0.8, 1.0 → avg 0.8
33+
foreach ([0.6, 0.8, 1.0] as $score) {
34+
$logRepo->log($this->row('cheap-model', 0.5, GradeStatus::Done, $score));
35+
}
36+
// One failed and one ungraded row — must be excluded from the grade average
37+
$logRepo->log($this->row('cheap-model', 0.4, GradeStatus::Failed, 0.0));
38+
$logRepo->log($this->row('cheap-model', 0.4, GradeStatus::None, 0.0));
39+
40+
$profiles = $logRepo->getModelPerformanceProfile('TextGenerationRequest');
41+
$cheap = $this->profileFor($profiles, 'cheap-model');
42+
43+
self::assertSame(5, $cheap['request_count']);
44+
self::assertSame(3, $cheap['graded_count']);
45+
self::assertEqualsWithDelta(0.8, $cheap['avg_grade_score'], 0.0001);
46+
}
47+
48+
#[Test]
49+
public function modelPerformanceProfileReportsZeroGradesForUngradedModel(): void
50+
{
51+
$logRepo = $this->get(RequestLogRepository::class);
52+
$logRepo->log($this->row('ungraded-model', 0.5, GradeStatus::None, 0.0));
53+
$logRepo->log($this->row('ungraded-model', 0.5, GradeStatus::None, 0.0));
54+
55+
$profiles = $logRepo->getModelPerformanceProfile('TextGenerationRequest');
56+
$model = $this->profileFor($profiles, 'ungraded-model');
57+
58+
self::assertSame(2, $model['request_count']);
59+
self::assertSame(0, $model['graded_count']);
60+
self::assertSame(0.0, $model['avg_grade_score']);
61+
}
62+
63+
private function row(string $model, float $cost, GradeStatus $status, float $gradeScore): array
64+
{
65+
return [
66+
'crdate' => time(),
67+
'request_type' => 'TextGenerationRequest',
68+
'provider_identifier' => 'test',
69+
'model_used' => $model,
70+
'success' => 1,
71+
'cost' => $cost,
72+
'total_tokens' => 100,
73+
'grade_status' => $status->value,
74+
'grade_score' => $gradeScore,
75+
'grade_label' => $status === GradeStatus::Done ? GradeLabel::fromScore($gradeScore)->value : '',
76+
];
77+
}
78+
79+
/**
80+
* @param list<array<string, mixed>> $profiles
81+
* @return array<string, mixed>
82+
*/
83+
private function profileFor(array $profiles, string $model): array
84+
{
85+
foreach ($profiles as $profile) {
86+
if ($profile['model_used'] === $model) {
87+
return $profile;
88+
}
89+
}
90+
self::fail('No performance profile for model "' . $model . '".');
91+
}
92+
}

0 commit comments

Comments
 (0)