Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 108 additions & 0 deletions frontend/src/__tests__/EngineCardSpecDecode.test.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import { describe, it, expect } from 'vitest'
import { render, screen } from '@testing-library/react'
import { EngineCard } from '../components/engines/EngineCard'
import type { EngineMetrics, EngineSnapshot } from '../types/metrics'

/** Minimal but complete EngineMetrics, spec-decode fields null by default. */
function metrics(overrides: Partial<EngineMetrics> = {}): EngineMetrics {
return {
tokens_per_sec: 100,
avg_tokens_per_sec: 80,
per_request_tps: 50,
ttft_ms: 120,
active_requests: 1,
queued_requests: 0,
kv_cache_percent: 40,
kv_cache_is_estimated: false,
total_requests: 10,
e2e_latency_ms: 500,
prompt_tokens_per_sec: 200,
avg_prompt_tokens_per_sec: 180,
per_request_prompt_tps: 70,
swapped_requests: 0,
prefix_cache_hit_rate: 30,
queue_time_ms: 50,
inter_token_latency_ms: 25,
preemptions_total: 0,
total_prompt_tokens: 1000,
total_generation_tokens: 2000,
prefix_cache_queries_total: 5000,
avg_batch_size: 4,
ttft_percentiles: null,
itl_percentiles: null,
e2e_percentiles: null,
ttft_goodput_pct: null,
itl_goodput_pct: null,
e2e_goodput_pct: null,
ttft_buckets: null,
itl_buckets: null,
e2e_buckets: null,
tpot_ms: 28,
tpot_percentiles: null,
tpot_goodput_pct: null,
tpot_buckets: null,
spec_decode_draft_tokens_total: null,
spec_decode_accepted_tokens_total: null,
spec_decode_drafts_total: null,
spec_decode_acceptance_rate: null,
spec_decode_acceptance_rate_live: null,
spec_decode_mean_acceptance_length: null,
...overrides,
}
}

function snapshot(m: EngineMetrics | null): EngineSnapshot {
return {
engine_type: 'Vllm',
endpoint: 'http://localhost:8000',
status: { type: 'Running' },
model: {
name: 'test/model',
parameter_size: null,
quantization: null,
precision: null,
tensor_type: null,
model_type: null,
pipeline_tag: null,
},
metrics: m,
recent_requests: [],
deployment_mode: 'Docker',
}
}

describe('EngineCard speculative-decoding section', () => {
it('always renders the renamed cache card header', () => {
render(<EngineCard engine={snapshot(metrics())} />)
expect(screen.getByText('Cache & Speculative Decoding')).toBeTruthy()
})

it('hides the spec-decode section when the model has no spec-decode metrics', () => {
render(<EngineCard engine={snapshot(metrics())} />)
expect(screen.queryByText('Speculative Decoding')).toBeNull()
expect(screen.queryByText('Accept Len')).toBeNull()
})

it('renders TAR, acceptance length, and accepted/draft counters when enabled', () => {
render(
<EngineCard
engine={snapshot(
metrics({
spec_decode_draft_tokens_total: 100_000,
spec_decode_accepted_tokens_total: 72_000,
spec_decode_drafts_total: 24_000,
spec_decode_acceptance_rate: 72,
spec_decode_acceptance_rate_live: 68,
spec_decode_mean_acceptance_length: 3,
}),
)}
/>,
)
expect(screen.getByText('Speculative Decoding')).toBeTruthy()
expect(screen.getByText('72')).toBeTruthy()
expect(screen.getByText(/68% live/)).toBeTruthy()
expect(screen.getByText('Accept Len')).toBeTruthy()
expect(screen.getByText('Accepted')).toBeTruthy()
expect(screen.getByText('Draft')).toBeTruthy()
})
})
48 changes: 48 additions & 0 deletions frontend/src/__tests__/HBar.test.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import { describe, it, expect } from 'vitest'
import { render, screen } from '@testing-library/react'
import { HBar } from '../components/gauges/HBar'
import type { GaugeSegment } from '../components/gauges/ArcGauge'

describe('HBar', () => {
it('renders a single value with label and unit', () => {
render(<HBar value={42} label="GPU Util" unit="%" />)
expect(screen.getByText('GPU Util')).toBeTruthy()
expect(screen.getByText('42')).toBeTruthy()
expect(screen.getByText('%')).toBeTruthy()
})

it('fills the bar proportionally to value/max', () => {
render(<HBar value={30} max={120} label="X" unit="W" />)
const fill = screen.getByTestId('hbar-fill') as HTMLElement
// 30/120 = 25%
expect(fill.style.width).toBe('25%')
})

it('clamps the fill width to [0, 100]%', () => {
render(<HBar value={500} max={100} label="X" unit="%" />)
expect((screen.getByTestId('hbar-fill') as HTMLElement).style.width).toBe('100%')
})

it('prefers displayValue over value for the readout', () => {
render(<HBar value={75} displayValue={150} label="GPU Power" unit="W" />)
expect(screen.getByText('150')).toBeTruthy()
})

it('renders stacked segments with a legend and no single-value fill', () => {
// Mirrors how the Memory card calls it: an explicit `value` (used %) plus
// segments for the stacked breakdown.
const segments: GaugeSegment[] = [
{ value: 25, total: 100, color: '#76B900', label: 'GPU: 25' },
{ value: 25, total: 100, color: '#3B82F6', label: 'CPU: 25' },
{ value: 50, total: 100, color: '#27272A', label: 'Free: 50' },
]
render(<HBar value={50} label="" unit="%" segments={segments} />)
// No single-value fill is rendered in segment mode.
expect(screen.queryByTestId('hbar-fill')).toBeNull()
// Legend labels present.
expect(screen.getByText('GPU: 25')).toBeTruthy()
expect(screen.getByText('Free: 50')).toBeTruthy()
// Readout uses the explicit value (used %), matching ArcGauge precedence.
expect(screen.getByText('50')).toBeTruthy()
})
})
17 changes: 16 additions & 1 deletion frontend/src/__tests__/format.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { describe, it, expect } from 'vitest'
import { formatBytes, formatGiB, formatCompactTokens } from '../lib/format'
import { formatBytes, formatGiB, formatCompactTokens, formatAcceptanceLength } from '../lib/format'

const GIB = 1_073_741_824
const MIB = 1_048_576
Expand Down Expand Up @@ -53,3 +53,18 @@ describe('formatCompactTokens', () => {
expect(formatCompactTokens(3.4e12)).toBe('3.4T')
})
})

describe('formatAcceptanceLength', () => {
it('renders -- for null, negative, or non-finite', () => {
expect(formatAcceptanceLength(null)).toBe('--')
expect(formatAcceptanceLength(-1)).toBe('--')
expect(formatAcceptanceLength(Number.NaN)).toBe('--')
expect(formatAcceptanceLength(Number.POSITIVE_INFINITY)).toBe('--')
})

it('formats accepted-tokens-per-draft to two decimals', () => {
expect(formatAcceptanceLength(3)).toBe('3.00')
expect(formatAcceptanceLength(3.4167)).toBe('3.42')
expect(formatAcceptanceLength(0)).toBe('0.00')
})
})
29 changes: 25 additions & 4 deletions frontend/src/components/engines/EngineCard.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import {
KvBar,
TrendArrow,
GoodputTile,
SpecDecodeSection,
computeTrend,
fmtVal,
fmtInt,
Expand Down Expand Up @@ -134,6 +135,17 @@ export function EngineCard({
const prefixCacheHit = v('prefix_cache_hit_rate')
const prefixCacheQueries = v('prefix_cache_queries_total')
const preemptions = v('preemptions_total')
// Speculative decoding — present only when the served model has it enabled.
const specDraftTokens = v('spec_decode_draft_tokens_total')
const specAcceptedTokens = v('spec_decode_accepted_tokens_total')
const specAcceptanceRate = v('spec_decode_acceptance_rate')
const specAcceptanceRateLive = v('spec_decode_acceptance_rate_live')
const specMeanAcceptanceLength = v('spec_decode_mean_acceptance_length')
// Show the section only once the engine has actually drafted tokens. The
// counter is present (non-null) whenever spec decoding is configured, but it
// sits at 0 on a freshly-started idle engine — gating on >0 keeps the card
// from showing an all-dashes section until the metrics carry real values.
const hasSpecDecode = specDraftTokens !== null && specDraftTokens > 0
const engineKey = `${engine.engine_type}-${engine.endpoint}`
const modelName = engine.model?.name ?? null
const { thresholds: slo, setThresholds: setSlo, reset: resetSlo, isCustomized: sloCustomized } =
Expand Down Expand Up @@ -218,7 +230,7 @@ export function EngineCard({
) : (
<>
{/* ── Grouped metrics with trend arrows — 6 categories ── */}
<div className="grid grid-cols-2 md:grid-cols-3 xl:grid-cols-6 gap-2 py-1">
<div className="grid grid-cols-2 sm:grid-cols-3 lg:grid-cols-6 gap-2 py-1">
{/* Prefill Throughput */}
<div className="bg-white/[0.02] rounded-md px-3 py-2.5 2xl:px-4 2xl:py-3 min-w-0">
<div className="text-[11px] 2xl:text-xs min-[1920px]:text-sm font-semibold text-zinc-300 tracking-tight mb-1.5 truncate">Prompt Processing / Prefill Throughput</div>
Expand Down Expand Up @@ -289,9 +301,9 @@ export function EngineCard({
</div>
</div>

{/* Cache */}
{/* Cache & Speculative Decoding */}
<div className="bg-white/[0.02] rounded-md px-3 py-2.5 2xl:px-4 2xl:py-3 min-w-0">
<div className="text-[11px] 2xl:text-xs min-[1920px]:text-sm font-semibold text-zinc-300 tracking-tight mb-1.5 truncate">Cache</div>
<div className="text-[11px] 2xl:text-xs min-[1920px]:text-sm font-semibold text-zinc-300 tracking-tight mb-1.5 truncate">Cache &amp; Speculative Decoding</div>
<div className="grid grid-cols-2 gap-1.5">
<div className="flex flex-col gap-0.5 min-w-0">
<span className="text-[10px] font-medium text-zinc-400 uppercase tracking-wider truncate">KV Cache</span>
Expand All @@ -316,6 +328,15 @@ export function EngineCard({
className="text-lg xl:text-xl 2xl:text-2xl min-[1920px]:text-3xl min-[2560px]:text-4xl font-bold text-zinc-100 font-mono tabular-nums leading-none"
/>
</div>
{hasSpecDecode && (
<SpecDecodeSection
acceptanceRate={specAcceptanceRate}
acceptanceRateLive={specAcceptanceRateLive}
meanAcceptanceLength={specMeanAcceptanceLength}
acceptedTokens={specAcceptedTokens}
draftTokens={specDraftTokens}
/>
)}
</div>
</div>

Expand All @@ -324,7 +345,7 @@ export function EngineCard({
* 1 Prefill · 2 Decode · 3 Latency · 4 SLO Goodput · 5 Requests · 6 Cache
* E2E sits under SLO Goodput (col 4); Requests under col 5; KV under Cache (col 6). */}
{showCharts && chartData && (
<div className="grid grid-cols-2 md:grid-cols-3 xl:grid-cols-6 gap-3 pt-1">
<div className="grid grid-cols-2 sm:grid-cols-3 lg:grid-cols-6 gap-3 pt-1">
<TimeSeriesChart
title="Prefill Throughput (tok/s)"
hideTooltipLabel
Expand Down
103 changes: 102 additions & 1 deletion frontend/src/components/engines/EngineCardPrimitives.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
*/

import type { LatencyPercentiles } from '@/types/metrics'
import { formatCompactTokens } from '@/lib/format'
import { formatCompactTokens, formatAcceptanceLength } from '@/lib/format'
import { AnimatedCounter } from './AnimatedCounter'

export interface ChartDataPoint {
Expand Down Expand Up @@ -166,6 +166,107 @@ export function fmtVal(v: number | null, fmt: (n: number) => string): string {
return v === null ? '--' : fmt(v)
}

interface SpecDecodeSectionProps {
/** Lifetime token acceptance rate (TAR), 0-100. */
acceptanceRate: number | null
/** Live (windowed) TAR, 0-100. Shown as a secondary line when present. */
acceptanceRateLive: number | null
/** Mean accepted tokens per draft attempt. */
meanAcceptanceLength: number | null
/** Cumulative accepted tokens (counts up). */
acceptedTokens: number | null
/** Cumulative drafted tokens (counts up). */
draftTokens: number | null
}

/**
* Speculative-decoding sub-section of the Cache card. Rendered only when the
* served model has speculative decoding enabled (the caller gates on metric
* presence). Surfaces the token acceptance rate (lifetime + live), mean
* acceptance length, and the cumulative accepted/draft token counters that
* animate upward. Higher acceptance is better, so TAR is colored green (≥70%),
* yellow (≥40%), else red.
*/
export function SpecDecodeSection({
acceptanceRate,
acceptanceRateLive,
meanAcceptanceLength,
acceptedTokens,
draftTokens,
}: SpecDecodeSectionProps) {
const tarColor =
acceptanceRate === null
? 'text-zinc-100'
: acceptanceRate >= 70
? 'text-[#76B900]'
: acceptanceRate >= 40
? 'text-yellow-400'
: 'text-red-400'
return (
<div className="flex flex-col gap-0.5 mt-2 pt-2 border-t border-white/[0.04] min-w-0">
<span className="text-[10px] 2xl:text-xs min-[1920px]:text-sm font-medium text-zinc-400 uppercase tracking-wider truncate">
Speculative Decoding
</span>
<div className="grid grid-cols-2 gap-1.5 mt-0.5">
<div className="flex flex-col gap-0.5 min-w-0">
<span className="text-[10px] font-medium text-zinc-400 uppercase tracking-wider truncate">
TAR
{acceptanceRateLive !== null && (
<span className="ml-1 normal-case tracking-normal text-zinc-600">
{Math.round(acceptanceRateLive)}% live
</span>
)}
</span>
<div className="flex items-baseline">
<span
className={`text-lg xl:text-xl 2xl:text-2xl min-[1920px]:text-3xl min-[2560px]:text-4xl font-bold font-mono tabular-nums leading-none ${tarColor}`}
>
{acceptanceRate === null ? '--' : Math.round(acceptanceRate)}
</span>
<span className="text-xs text-zinc-500 ml-1">%</span>
</div>
</div>
<MetricTile
label="Accept Len"
value={fmtVal(meanAcceptanceLength, formatAcceptanceLength)}
unit="tok/draft"
/>
</div>
{/* Cumulative accepted/draft tokens. Each value gets its own line (label
above) so the abbreviated counters never clip on narrow cards, while
smaller fonts than the headline numbers keep the section compact. */}
<div className="grid grid-cols-2 gap-1.5 mt-1.5 min-w-0">
<div className="flex flex-col gap-0.5 min-w-0">
<span className="text-[10px] font-medium text-zinc-400 uppercase tracking-wider truncate">
Accepted
</span>
<div className="flex items-baseline min-w-0">
<AnimatedCounter
value={acceptedTokens}
format={formatCompactTokens}
className="text-sm xl:text-base 2xl:text-lg min-[1920px]:text-xl font-bold font-mono tabular-nums leading-none text-zinc-100 truncate"
/>
<span className="text-[10px] ml-1 text-zinc-500">tok</span>
</div>
</div>
<div className="flex flex-col gap-0.5 min-w-0">
<span className="text-[10px] font-medium text-zinc-400 uppercase tracking-wider truncate">
Draft
</span>
<div className="flex items-baseline min-w-0">
<AnimatedCounter
value={draftTokens}
format={formatCompactTokens}
className="text-sm xl:text-base 2xl:text-lg min-[1920px]:text-xl font-bold font-mono tabular-nums leading-none text-zinc-100 truncate"
/>
<span className="text-[10px] ml-1 text-zinc-500">tok</span>
</div>
</div>
</div>
</div>
)
}

export function fmtInt(v: number | null): string {
return v === null ? '--' : String(Math.round(v))
}
Expand Down
Loading