|
1 | 1 | <script lang="ts"> |
2 | 2 | import type { NanoGPTModel } from '$lib/backend/models/nano-gpt'; |
| 3 | + import type { AALLMModel, AAImageModel, AABenchmarkData } from '$lib/types/artificial-analysis'; |
3 | 4 | import { |
4 | 5 | supportsVision, |
5 | 6 | supportsReasoning, |
6 | 7 | isImageOnlyModel, |
7 | 8 | supportsVideo, |
8 | 9 | } from '$lib/utils/model-capabilities'; |
9 | 10 | import { cn } from '$lib/utils/utils'; |
| 11 | + import { useCachedQuery, api } from '$lib/cache/cached-query.svelte'; |
10 | 12 | import EyeIcon from '~icons/lucide/eye'; |
11 | 13 | import BrainIcon from '~icons/lucide/brain'; |
12 | 14 | import ImageIcon from '~icons/lucide/image'; |
13 | 15 | import VideoIcon from '~icons/lucide/video'; |
14 | 16 | import XIcon from '~icons/lucide/x'; |
15 | 17 | import CheckIcon from '~icons/lucide/check'; |
| 18 | + import TrendingUpIcon from '~icons/lucide/trending-up'; |
| 19 | + import ZapIcon from '~icons/lucide/zap'; |
| 20 | + import ExternalLinkIcon from '~icons/lucide/external-link'; |
16 | 21 |
|
17 | 22 | type Props = { |
18 | 23 | model: NanoGPTModel; |
|
22 | 27 |
|
23 | 28 | let { model, iconUrl, onClose }: Props = $props(); |
24 | 29 |
|
| 30 | + // Fetch benchmark data |
| 31 | + const benchmarks = useCachedQuery<AABenchmarkData & { available: boolean }>( |
| 32 | + api.artificial_analysis.benchmarks, |
| 33 | + {}, |
| 34 | + { ttl: 60 * 60 * 1000 } // 1 hour cache |
| 35 | + ); |
| 36 | +
|
| 37 | + // Normalize model name for matching (remove punctuation, lowercase) |
| 38 | + function normalizeForMatch(str: string): string { |
| 39 | + return str |
| 40 | + .toLowerCase() |
| 41 | + .replace(/[^a-z0-9]/g, '') // Remove all non-alphanumeric |
| 42 | + .trim(); |
| 43 | + } |
| 44 | +
|
| 45 | + // Strip common suffixes that don't affect model identity |
| 46 | + function stripSuffixes(str: string): string { |
| 47 | + return str |
| 48 | + .replace(/[-_]?original$/i, '') |
| 49 | + .replace(/[-_]?\d{8}$/i, '') // Remove date suffixes like -20250929 |
| 50 | + .trim(); |
| 51 | + } |
| 52 | +
|
| 53 | + // Extract model name from ID (e.g., "zai-org/glm-4.7-original" -> "glm-4.7") |
| 54 | + function extractModelName(id: string): string { |
| 55 | + const parts = id.split('/'); |
| 56 | + const name = (parts.length > 1 ? parts[parts.length - 1] : id) ?? id; |
| 57 | + return stripSuffixes(name); |
| 58 | + } |
| 59 | +
|
| 60 | + // Extract key tokens from a model name (keeping version numbers together) |
| 61 | + function extractKeyTokens(name: string): Set<string> { |
| 62 | + // First, normalize separators but keep decimal points intact in version numbers |
| 63 | + const normalized = name |
| 64 | + .toLowerCase() |
| 65 | + .replace(/(\d+)\.(\d+)/g, '$1$2') // "4.7" -> "47", "4.5" -> "45" |
| 66 | + .replace(/[^a-z0-9]+/g, ' '); |
| 67 | + const tokens = normalized.split(' ').filter((t) => t.length > 0); |
| 68 | + // Keep all tokens, but filter out very long date-like patterns (8+ digit numbers) |
| 69 | + return new Set(tokens.filter((t) => !/^\d{8,}$/.test(t))); |
| 70 | + } |
| 71 | +
|
| 72 | + // Check if two token sets have significant overlap |
| 73 | + function tokensMatch(set1: Set<string>, set2: Set<string>): boolean { |
| 74 | + if (set1.size === 0 || set2.size === 0) return false; |
| 75 | + let matches = 0; |
| 76 | + for (const token of set1) { |
| 77 | + if (set2.has(token)) matches++; |
| 78 | + } |
| 79 | + // Require at least 2 matching tokens to prevent false positives |
| 80 | + // This prevents "glm" alone from matching "glm-4.5-air" |
| 81 | + return matches >= 2; |
| 82 | + } |
| 83 | +
|
| 84 | + // Find matching LLM benchmark using scoring system (find BEST match, not first match) |
| 85 | + const llmBenchmark = $derived.by(() => { |
| 86 | + if (!benchmarks.data?.available || !benchmarks.data.llms) return null; |
| 87 | +
|
| 88 | + const modelName = stripSuffixes(model.name).toLowerCase(); |
| 89 | + const modelIdFull = model.id.toLowerCase(); |
| 90 | + const modelIdShort = extractModelName(model.id).toLowerCase(); |
| 91 | + const normalizedName = normalizeForMatch(stripSuffixes(model.name)); |
| 92 | + const normalizedId = normalizeForMatch(modelIdShort); |
| 93 | + const modelNameTokens = extractKeyTokens(stripSuffixes(model.name)); |
| 94 | + const modelIdTokens = extractKeyTokens(modelIdShort); |
| 95 | +
|
| 96 | + console.log('[AA Benchmark] Searching for model:', { |
| 97 | + name: model.name, |
| 98 | + id: model.id, |
| 99 | + modelIdShort, |
| 100 | + normalizedName, |
| 101 | + normalizedId, |
| 102 | + nameTokens: Array.from(modelNameTokens), |
| 103 | + idTokens: Array.from(modelIdTokens), |
| 104 | + }); |
| 105 | +
|
| 106 | + let bestMatch: AALLMModel | null = null; |
| 107 | + let bestScore = 0; |
| 108 | + let bestMatchReason = ''; |
| 109 | +
|
| 110 | + for (const llm of benchmarks.data.llms) { |
| 111 | + const aaName = llm.name.toLowerCase(); |
| 112 | + const aaSlug = llm.slug.toLowerCase(); |
| 113 | + const normalizedAaName = normalizeForMatch(llm.name); |
| 114 | + const normalizedAaSlug = normalizeForMatch(llm.slug); |
| 115 | +
|
| 116 | + let score = 0; |
| 117 | + let reason = ''; |
| 118 | +
|
| 119 | + // EXACT matches get highest score (100) |
| 120 | + if (modelName === aaName) { |
| 121 | + score = 100; |
| 122 | + reason = 'EXACT NAME'; |
| 123 | + } else if (modelIdShort === aaSlug || modelIdFull === aaSlug) { |
| 124 | + score = 100; |
| 125 | + reason = 'EXACT SLUG'; |
| 126 | + } |
| 127 | + // NORMALIZED EXACT matches get high score (90) |
| 128 | + else if (normalizedName === normalizedAaName) { |
| 129 | + score = 90; |
| 130 | + reason = 'NORMALIZED EXACT NAME'; |
| 131 | + } else if (normalizedId === normalizedAaSlug) { |
| 132 | + score = 90; |
| 133 | + reason = 'NORMALIZED EXACT SLUG'; |
| 134 | + } |
| 135 | + // Token-based matching with score based on number of matching tokens |
| 136 | + else { |
| 137 | + const aaNameTokens = extractKeyTokens(llm.name); |
| 138 | + const aaSlugTokens = extractKeyTokens(llm.slug); |
| 139 | +
|
| 140 | + // Count matching tokens |
| 141 | + let nameMatches = 0; |
| 142 | + for (const token of modelNameTokens) { |
| 143 | + if (aaNameTokens.has(token)) nameMatches++; |
| 144 | + } |
| 145 | + let slugMatches = 0; |
| 146 | + for (const token of modelIdTokens) { |
| 147 | + if (aaSlugTokens.has(token)) slugMatches++; |
| 148 | + } |
| 149 | +
|
| 150 | + const maxTokenMatches = Math.max(nameMatches, slugMatches); |
| 151 | + const minTokensNeeded = Math.min( |
| 152 | + modelNameTokens.size, |
| 153 | + modelIdTokens.size, |
| 154 | + aaNameTokens.size, |
| 155 | + aaSlugTokens.size |
| 156 | + ); |
| 157 | +
|
| 158 | + // Require ALL tokens to match for a valid score (prevents partial matches) |
| 159 | + if (maxTokenMatches >= 2 && maxTokenMatches >= minTokensNeeded) { |
| 160 | + // Score based on how many tokens matched |
| 161 | + score = 50 + maxTokenMatches * 10; |
| 162 | + reason = `TOKEN MATCH (${maxTokenMatches} tokens)`; |
| 163 | + } |
| 164 | + } |
| 165 | +
|
| 166 | + if (score > bestScore) { |
| 167 | + bestScore = score; |
| 168 | + bestMatch = llm; |
| 169 | + bestMatchReason = reason; |
| 170 | + } |
| 171 | + } |
| 172 | +
|
| 173 | + if (bestMatch && bestScore >= 50) { |
| 174 | + console.log('[AA Benchmark] Best match:', { |
| 175 | + name: bestMatch.name, |
| 176 | + slug: bestMatch.slug, |
| 177 | + score: bestScore, |
| 178 | + reason: bestMatchReason, |
| 179 | + evaluations: bestMatch.evaluations, |
| 180 | + }); |
| 181 | + return bestMatch; |
| 182 | + } |
| 183 | +
|
| 184 | + console.log('[AA Benchmark] No match found for:', model.name); |
| 185 | + return null; |
| 186 | + }); |
| 187 | +
|
| 188 | + // Find matching image model benchmark |
| 189 | + const imageBenchmark = $derived.by(() => { |
| 190 | + if (!benchmarks.data?.available || !benchmarks.data.imageModels || !isImageOnlyModel(model)) |
| 191 | + return null; |
| 192 | +
|
| 193 | + const modelName = model.name.toLowerCase(); |
| 194 | + const modelIdShort = extractModelName(model.id).toLowerCase(); |
| 195 | + const normalizedName = normalizeForMatch(model.name); |
| 196 | + const normalizedId = normalizeForMatch(modelIdShort); |
| 197 | + const modelNameTokens = extractKeyTokens(model.name); |
| 198 | + const modelIdTokens = extractKeyTokens(modelIdShort); |
| 199 | +
|
| 200 | + return benchmarks.data.imageModels.find((img: AAImageModel) => { |
| 201 | + const aaName = img.name.toLowerCase(); |
| 202 | + const aaSlug = img.slug.toLowerCase(); |
| 203 | + const normalizedAaName = normalizeForMatch(img.name); |
| 204 | + const normalizedAaSlug = normalizeForMatch(img.slug); |
| 205 | +
|
| 206 | + // Exact matches |
| 207 | + if (modelName === aaName || modelIdShort === aaSlug) return true; |
| 208 | +
|
| 209 | + // Normalized matches |
| 210 | + if (normalizedName === normalizedAaName || normalizedId === normalizedAaSlug) return true; |
| 211 | +
|
| 212 | + // Partial matches |
| 213 | + if (modelName.includes(aaName) || aaName.includes(modelName)) return true; |
| 214 | + if (modelIdShort.includes(aaSlug) || aaSlug.includes(modelIdShort)) return true; |
| 215 | +
|
| 216 | + // Normalized partial matches |
| 217 | + if (normalizedName.includes(normalizedAaName) || normalizedAaName.includes(normalizedName)) |
| 218 | + return true; |
| 219 | + if (normalizedId.includes(normalizedAaSlug) || normalizedAaSlug.includes(normalizedId)) |
| 220 | + return true; |
| 221 | +
|
| 222 | + // Token-based matching (handles different word orderings) |
| 223 | + const aaNameTokens = extractKeyTokens(img.name); |
| 224 | + const aaSlugTokens = extractKeyTokens(img.slug); |
| 225 | + if (tokensMatch(modelNameTokens, aaNameTokens)) return true; |
| 226 | + if (tokensMatch(modelIdTokens, aaSlugTokens)) return true; |
| 227 | + if (tokensMatch(modelNameTokens, aaSlugTokens)) return true; |
| 228 | + if (tokensMatch(modelIdTokens, aaNameTokens)) return true; |
| 229 | +
|
| 230 | + return false; |
| 231 | + }); |
| 232 | + }); |
| 233 | +
|
| 234 | + const hasBenchmarks = $derived(llmBenchmark || imageBenchmark); |
| 235 | +
|
25 | 236 | function formatNumber(num: number | undefined): string { |
26 | 237 | if (!num) return '-'; |
27 | 238 | if (num >= 1000000) return `${(num / 1000000).toFixed(1)}M`; |
|
55 | 266 | .map((word) => word.charAt(0).toUpperCase() + word.slice(1)) |
56 | 267 | .join(' '); |
57 | 268 | } |
| 269 | +
|
| 270 | + function formatBenchmarkScore(score: number | undefined): string { |
| 271 | + if (score === undefined || score === null) return '-'; |
| 272 | + return score.toFixed(1); |
| 273 | + } |
58 | 274 | </script> |
59 | 275 |
|
60 | 276 | <div class="bg-popover border-border flex h-full w-[320px] flex-col overflow-hidden border-l"> |
|
88 | 304 | </div> |
89 | 305 | {/if} |
90 | 306 |
|
| 307 | + <!-- Benchmarks Section --> |
| 308 | + {#if hasBenchmarks} |
| 309 | + <div> |
| 310 | + <h4 class="text-muted-foreground mb-2 text-xs font-medium tracking-wide uppercase"> |
| 311 | + <span class="inline-flex items-center gap-1.5"> |
| 312 | + <TrendingUpIcon class="size-3" /> |
| 313 | + Benchmarks |
| 314 | + </span> |
| 315 | + </h4> |
| 316 | + |
| 317 | + {#if llmBenchmark} |
| 318 | + <div class="bg-muted/50 space-y-2 rounded-lg p-3"> |
| 319 | + {#if llmBenchmark.evaluations?.artificial_analysis_intelligence_index} |
| 320 | + <div class="flex justify-between text-sm"> |
| 321 | + <span class="text-muted-foreground">Intelligence</span> |
| 322 | + <span class="font-medium text-blue-400" |
| 323 | + >{formatBenchmarkScore( |
| 324 | + llmBenchmark.evaluations.artificial_analysis_intelligence_index |
| 325 | + )}</span |
| 326 | + > |
| 327 | + </div> |
| 328 | + {/if} |
| 329 | + {#if llmBenchmark.evaluations?.artificial_analysis_coding_index} |
| 330 | + <div class="flex justify-between text-sm"> |
| 331 | + <span class="text-muted-foreground">Coding</span> |
| 332 | + <span class="font-medium text-green-400" |
| 333 | + >{formatBenchmarkScore( |
| 334 | + llmBenchmark.evaluations.artificial_analysis_coding_index |
| 335 | + )}</span |
| 336 | + > |
| 337 | + </div> |
| 338 | + {/if} |
| 339 | + {#if llmBenchmark.evaluations?.artificial_analysis_math_index} |
| 340 | + <div class="flex justify-between text-sm"> |
| 341 | + <span class="text-muted-foreground">Math</span> |
| 342 | + <span class="font-medium text-purple-400" |
| 343 | + >{formatBenchmarkScore( |
| 344 | + llmBenchmark.evaluations.artificial_analysis_math_index |
| 345 | + )}</span |
| 346 | + > |
| 347 | + </div> |
| 348 | + {/if} |
| 349 | + {#if llmBenchmark.median_output_tokens_per_second} |
| 350 | + <div class="border-border flex justify-between border-t pt-2 text-sm"> |
| 351 | + <span class="text-muted-foreground inline-flex items-center gap-1"> |
| 352 | + <ZapIcon class="size-3" /> |
| 353 | + Speed |
| 354 | + </span> |
| 355 | + <span class="font-medium text-yellow-400" |
| 356 | + >{llmBenchmark.median_output_tokens_per_second.toFixed(0)} tok/s</span |
| 357 | + > |
| 358 | + </div> |
| 359 | + {/if} |
| 360 | + </div> |
| 361 | + {/if} |
| 362 | + |
| 363 | + {#if imageBenchmark} |
| 364 | + <div class="bg-muted/50 space-y-2 rounded-lg p-3"> |
| 365 | + {#if imageBenchmark.elo} |
| 366 | + <div class="flex justify-between text-sm"> |
| 367 | + <span class="text-muted-foreground">ELO Rating</span> |
| 368 | + <span class="font-medium text-blue-400">{imageBenchmark.elo}</span> |
| 369 | + </div> |
| 370 | + {/if} |
| 371 | + {#if imageBenchmark.rank} |
| 372 | + <div class="flex justify-between text-sm"> |
| 373 | + <span class="text-muted-foreground">Rank</span> |
| 374 | + <span class="font-medium text-amber-400">#{imageBenchmark.rank}</span> |
| 375 | + </div> |
| 376 | + {/if} |
| 377 | + </div> |
| 378 | + {/if} |
| 379 | + |
| 380 | + <!-- Attribution --> |
| 381 | + <a |
| 382 | + href="https://artificialanalysis.ai" |
| 383 | + target="_blank" |
| 384 | + rel="noopener noreferrer" |
| 385 | + class="text-muted-foreground hover:text-foreground mt-2 inline-flex items-center gap-1 text-xs transition-colors" |
| 386 | + > |
| 387 | + Data from Artificial Analysis |
| 388 | + <ExternalLinkIcon class="size-3" /> |
| 389 | + </a> |
| 390 | + </div> |
| 391 | + {/if} |
| 392 | + |
91 | 393 | <!-- Features --> |
92 | 394 | <div> |
93 | 395 | <h4 class="text-muted-foreground mb-2 text-xs font-medium tracking-wide uppercase"> |
|
0 commit comments