Skip to content

Commit 73840dd

Browse files
fuziontechclaude
andcommitted
metrics: per-org worker-acquire latency by allocation source + axis fix
Two things (per request, same PR): 1. Worker-acquire latency by org and allocation phase. The acquire histograms (duckgres_worker_acquire_{total,phase,gate_wait}_seconds) already timed the wait but had no `org` label, and the end-to-end total wasn't tagged by HOW the worker was obtained. Now: - all three carry an `org` label (sliceable per tenant) - the total carries a `source` label — idle_reuse | hot_idle_claim | spawn | none — so "how long did org X wait, and did it need a cold spawn?" is a dashboard query. source is bound to the claim BEFORE completion so a failed spawn still attributes its wait to source=spawn (outcome=error). org is threaded from p.orgID / assignment.OrgID at every observe site. Two allow-listed admin panels expose it: acquire_p95 (p95 by source) and acquire_by_source (acquire rate by source), both org-scopable via $ORG. 2. Metrics chart Y-axis fix. The axis had no tickFormatter and a narrow fixed width, so large byte-rate values were clipped to a meaningless "00000". Add a unit-aware compact formatter (binary bytes for B/s, compact SI otherwise) for the tick + tooltip, and widen the axis. Tests: acquire_metrics_test.go updated for the new labels (+ asserts a cold spawn records source=spawn end-to-end); metrics_proxy_test validates the new panels render cleanly; format.test.ts covers the compact/axis/value formatters; harness asserts the acquire panels are advertised (raw histogram emission is unit-tested — the :9090 port is NetworkPolicy-blocked in-Job). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_01NUq2EVxvKQFq3YEDNLF5HP
1 parent 4eaa89c commit 73840dd

10 files changed

Lines changed: 179 additions & 48 deletions

File tree

controlplane/acquire_metrics.go

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -34,56 +34,74 @@ const (
3434

3535
acquireGateOutcomeAcquired = "acquired"
3636
acquireGateOutcomeCanceled = "canceled"
37+
38+
// acquireSource* is how a completed AcquireWorker got its worker — the
39+
// "phase that was allocated" dimension on the end-to-end total histogram:
40+
// reusing this org's own idle Hot worker (near-instant), claiming a parked
41+
// hot-idle worker from the shared pool (fast), or cold-spawning a fresh pod
42+
// (slow — EC2 node boot). "none" when no worker was allocated (capacity
43+
// backpressure / ctx cancel).
44+
acquireSourceIdleReuse = "idle_reuse"
45+
acquireSourceHotIdleClaim = "hot_idle_claim"
46+
acquireSourceSpawn = "spawn"
47+
acquireSourceNone = "none"
3748
)
3849

3950
// errHotIdleImageMismatch marks a hot-idle claim that yielded a worker of a
4051
// stale image (retired instead of reused) so the attempt is observed as an
4152
// error rather than dropped from the phase histogram.
4253
var errHotIdleImageMismatch = errors.New("hot-idle worker image mismatch")
4354

55+
// All three histograms carry an `org` label so per-org acquire latency is
56+
// sliceable from a dashboard (which tenant is eating cold-spawn waits). Orgs are
57+
// bounded managed-warehouse tenants, so the added cardinality is acceptable.
58+
4459
var workerAcquireGateWaitHistogram = promauto.NewHistogramVec(prometheus.HistogramOpts{
4560
Name: "duckgres_worker_acquire_gate_wait_seconds",
46-
Help: "Time a connection spent blocked in the per-org FIFO acquire gate (orgAcquireGate) before owning the slow acquisition path, partitioned by outcome (acquired|canceled).",
61+
Help: "Time a connection spent blocked in the per-org FIFO acquire gate (orgAcquireGate) before owning the slow acquisition path, partitioned by org and outcome (acquired|canceled).",
4762
Buckets: acquirePhaseBuckets,
48-
}, []string{"outcome"})
63+
}, []string{"org", "outcome"})
4964

5065
var workerAcquirePhaseHistogram = promauto.NewHistogramVec(prometheus.HistogramOpts{
5166
Name: "duckgres_worker_acquire_phase_seconds",
52-
Help: "Duration of individual worker-acquire phases on the remote/k8s backend, partitioned by phase (hot_idle_claim|spawn|activate) and outcome (ok|error).",
67+
Help: "Duration of individual worker-acquire phases on the remote/k8s backend, partitioned by org, phase (hot_idle_claim|spawn|activate) and outcome (ok|error).",
5368
Buckets: acquirePhaseBuckets,
54-
}, []string{"phase", "outcome"})
69+
}, []string{"org", "phase", "outcome"})
5570

5671
var workerAcquireTotalHistogram = promauto.NewHistogramVec(prometheus.HistogramOpts{
5772
Name: "duckgres_worker_acquire_total_seconds",
58-
Help: "End-to-end OrgReservedPool.AcquireWorker duration, partitioned by outcome (ok|capacity|error|canceled).",
73+
Help: "End-to-end OrgReservedPool.AcquireWorker duration (the time a pending session waits for a worker), partitioned by org, the allocation source (idle_reuse|hot_idle_claim|spawn|none) and outcome (ok|capacity|error|canceled).",
5974
Buckets: acquirePhaseBuckets,
60-
}, []string{"outcome"})
75+
}, []string{"org", "source", "outcome"})
6176

62-
func observeAcquireGateWait(d time.Duration, outcome string) {
77+
func observeAcquireGateWait(d time.Duration, org, outcome string) {
6378
if d < 0 {
6479
d = 0
6580
}
66-
workerAcquireGateWaitHistogram.WithLabelValues(outcome).Observe(d.Seconds())
81+
workerAcquireGateWaitHistogram.WithLabelValues(org, outcome).Observe(d.Seconds())
6782
}
6883

6984
// observeAcquirePhase records one attempt of a single acquire phase. err==nil
7085
// observes outcome=ok, otherwise outcome=error.
71-
func observeAcquirePhase(phase string, d time.Duration, err error) {
86+
func observeAcquirePhase(phase, org string, d time.Duration, err error) {
7287
if d < 0 {
7388
d = 0
7489
}
7590
outcome := acquireOutcomeOK
7691
if err != nil {
7792
outcome = acquireOutcomeError
7893
}
79-
workerAcquirePhaseHistogram.WithLabelValues(phase, outcome).Observe(d.Seconds())
94+
workerAcquirePhaseHistogram.WithLabelValues(org, phase, outcome).Observe(d.Seconds())
8095
}
8196

82-
func observeAcquireTotal(d time.Duration, outcome string) {
97+
// observeAcquireTotal records the end-to-end acquire latency for one session,
98+
// tagged by the org, how the worker was ultimately obtained (source), and the
99+
// outcome.
100+
func observeAcquireTotal(d time.Duration, org, source, outcome string) {
83101
if d < 0 {
84102
d = 0
85103
}
86-
workerAcquireTotalHistogram.WithLabelValues(outcome).Observe(d.Seconds())
104+
workerAcquireTotalHistogram.WithLabelValues(org, source, outcome).Observe(d.Seconds())
87105
}
88106

89107
// acquireTotalOutcome classifies an AcquireWorker error for the end-to-end

controlplane/acquire_metrics_test.go

Lines changed: 31 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -14,31 +14,34 @@ import (
1414
// label combination they can emit) so a registration conflict or label-arity
1515
// mistake panics here instead of in production.
1616
func TestAcquireMetricsObserveHelpers(t *testing.T) {
17-
gateBefore := histogramVecLabelSampleCount(t, workerAcquireGateWaitHistogram, acquireGateOutcomeAcquired)
18-
observeAcquireGateWait(125*time.Millisecond, acquireGateOutcomeAcquired)
19-
observeAcquireGateWait(-1*time.Second, acquireGateOutcomeCanceled) // negative durations clamp to 0
20-
if got := histogramVecLabelSampleCount(t, workerAcquireGateWaitHistogram, acquireGateOutcomeAcquired); got != gateBefore+1 {
17+
const org = "metrics-test-org"
18+
gateBefore := histogramVecLabelSampleCount(t, workerAcquireGateWaitHistogram, org, acquireGateOutcomeAcquired)
19+
observeAcquireGateWait(125*time.Millisecond, org, acquireGateOutcomeAcquired)
20+
observeAcquireGateWait(-1*time.Second, org, acquireGateOutcomeCanceled) // negative durations clamp to 0
21+
if got := histogramVecLabelSampleCount(t, workerAcquireGateWaitHistogram, org, acquireGateOutcomeAcquired); got != gateBefore+1 {
2122
t.Fatalf("gate wait acquired samples = %d, want %d", got, gateBefore+1)
2223
}
2324

2425
for _, phase := range []string{acquirePhaseHotIdleClaim, acquirePhaseSpawn, acquirePhaseActivate} {
25-
okBefore := histogramVecLabelSampleCount(t, workerAcquirePhaseHistogram, phase, acquireOutcomeOK)
26-
errBefore := histogramVecLabelSampleCount(t, workerAcquirePhaseHistogram, phase, acquireOutcomeError)
27-
observeAcquirePhase(phase, 50*time.Millisecond, nil)
28-
observeAcquirePhase(phase, 50*time.Millisecond, errors.New("boom"))
29-
if got := histogramVecLabelSampleCount(t, workerAcquirePhaseHistogram, phase, acquireOutcomeOK); got != okBefore+1 {
26+
okBefore := histogramVecLabelSampleCount(t, workerAcquirePhaseHistogram, org, phase, acquireOutcomeOK)
27+
errBefore := histogramVecLabelSampleCount(t, workerAcquirePhaseHistogram, org, phase, acquireOutcomeError)
28+
observeAcquirePhase(phase, org, 50*time.Millisecond, nil)
29+
observeAcquirePhase(phase, org, 50*time.Millisecond, errors.New("boom"))
30+
if got := histogramVecLabelSampleCount(t, workerAcquirePhaseHistogram, org, phase, acquireOutcomeOK); got != okBefore+1 {
3031
t.Fatalf("phase %q ok samples = %d, want %d", phase, got, okBefore+1)
3132
}
32-
if got := histogramVecLabelSampleCount(t, workerAcquirePhaseHistogram, phase, acquireOutcomeError); got != errBefore+1 {
33+
if got := histogramVecLabelSampleCount(t, workerAcquirePhaseHistogram, org, phase, acquireOutcomeError); got != errBefore+1 {
3334
t.Fatalf("phase %q error samples = %d, want %d", phase, got, errBefore+1)
3435
}
3536
}
3637

37-
for _, outcome := range []string{acquireOutcomeOK, acquireOutcomeCapacity, acquireOutcomeError, acquireOutcomeCanceled} {
38-
before := histogramVecLabelSampleCount(t, workerAcquireTotalHistogram, outcome)
39-
observeAcquireTotal(time.Second, outcome)
40-
if got := histogramVecLabelSampleCount(t, workerAcquireTotalHistogram, outcome); got != before+1 {
41-
t.Fatalf("total outcome %q samples = %d, want %d", outcome, got, before+1)
38+
for _, source := range []string{acquireSourceIdleReuse, acquireSourceHotIdleClaim, acquireSourceSpawn, acquireSourceNone} {
39+
for _, outcome := range []string{acquireOutcomeOK, acquireOutcomeCapacity, acquireOutcomeError, acquireOutcomeCanceled} {
40+
before := histogramVecLabelSampleCount(t, workerAcquireTotalHistogram, org, source, outcome)
41+
observeAcquireTotal(time.Second, org, source, outcome)
42+
if got := histogramVecLabelSampleCount(t, workerAcquireTotalHistogram, org, source, outcome); got != before+1 {
43+
t.Fatalf("total source=%q outcome=%q samples = %d, want %d", source, outcome, got, before+1)
44+
}
4245
}
4346
}
4447
}
@@ -67,10 +70,13 @@ func TestAcquireTotalOutcomeClassification(t *testing.T) {
6770
// acquisition (no idle worker → gate → spawn → activate) records one sample in
6871
// each phase histogram plus the gate-wait and end-to-end histograms.
6972
func TestOrgReservedPoolAcquireObservesPhaseMetrics(t *testing.T) {
70-
gateBefore := histogramVecLabelSampleCount(t, workerAcquireGateWaitHistogram, acquireGateOutcomeAcquired)
71-
spawnBefore := histogramVecLabelSampleCount(t, workerAcquirePhaseHistogram, acquirePhaseSpawn, acquireOutcomeOK)
72-
activateBefore := histogramVecLabelSampleCount(t, workerAcquirePhaseHistogram, acquirePhaseActivate, acquireOutcomeOK)
73-
totalBefore := histogramVecLabelSampleCount(t, workerAcquireTotalHistogram, acquireOutcomeOK)
73+
const org = "analytics"
74+
gateBefore := histogramVecLabelSampleCount(t, workerAcquireGateWaitHistogram, org, acquireGateOutcomeAcquired)
75+
spawnBefore := histogramVecLabelSampleCount(t, workerAcquirePhaseHistogram, org, acquirePhaseSpawn, acquireOutcomeOK)
76+
activateBefore := histogramVecLabelSampleCount(t, workerAcquirePhaseHistogram, org, acquirePhaseActivate, acquireOutcomeOK)
77+
// A no-idle-worker acquire cold-spawns, so the end-to-end total lands under
78+
// source=spawn (proving the source label tracks the allocation path).
79+
totalBefore := histogramVecLabelSampleCount(t, workerAcquireTotalHistogram, org, acquireSourceSpawn, acquireOutcomeOK)
7480

7581
shared, _ := newTestK8sPool(t, 5)
7682
shared.healthCheckFunc = func(ctx context.Context, worker *ManagedWorker) error {
@@ -83,7 +89,7 @@ func TestOrgReservedPoolAcquireObservesPhaseMetrics(t *testing.T) {
8389
return nil
8490
}
8591

86-
pool := NewOrgReservedPool(shared, "analytics", 2, shared.workerImage, nil)
92+
pool := NewOrgReservedPool(shared, org, 2, shared.workerImage, nil)
8793
pool.activateReservedWorker = func(ctx context.Context, worker *ManagedWorker) error {
8894
return nil
8995
}
@@ -94,16 +100,16 @@ func TestOrgReservedPoolAcquireObservesPhaseMetrics(t *testing.T) {
94100
t.Fatalf("AcquireWorker: %v", err)
95101
}
96102

97-
if got := histogramVecLabelSampleCount(t, workerAcquireGateWaitHistogram, acquireGateOutcomeAcquired); got != gateBefore+1 {
103+
if got := histogramVecLabelSampleCount(t, workerAcquireGateWaitHistogram, org, acquireGateOutcomeAcquired); got != gateBefore+1 {
98104
t.Errorf("gate wait acquired samples = %d, want %d", got, gateBefore+1)
99105
}
100-
if got := histogramVecLabelSampleCount(t, workerAcquirePhaseHistogram, acquirePhaseSpawn, acquireOutcomeOK); got != spawnBefore+1 {
106+
if got := histogramVecLabelSampleCount(t, workerAcquirePhaseHistogram, org, acquirePhaseSpawn, acquireOutcomeOK); got != spawnBefore+1 {
101107
t.Errorf("spawn ok samples = %d, want %d", got, spawnBefore+1)
102108
}
103-
if got := histogramVecLabelSampleCount(t, workerAcquirePhaseHistogram, acquirePhaseActivate, acquireOutcomeOK); got != activateBefore+1 {
109+
if got := histogramVecLabelSampleCount(t, workerAcquirePhaseHistogram, org, acquirePhaseActivate, acquireOutcomeOK); got != activateBefore+1 {
104110
t.Errorf("activate ok samples = %d, want %d", got, activateBefore+1)
105111
}
106-
if got := histogramVecLabelSampleCount(t, workerAcquireTotalHistogram, acquireOutcomeOK); got != totalBefore+1 {
107-
t.Errorf("total ok samples = %d, want %d", got, totalBefore+1)
112+
if got := histogramVecLabelSampleCount(t, workerAcquireTotalHistogram, org, acquireSourceSpawn, acquireOutcomeOK); got != totalBefore+1 {
113+
t.Errorf("total spawn/ok samples = %d, want %d", got, totalBefore+1)
108114
}
109115
}

controlplane/admin/metrics_proxy.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,12 @@ var rangePanels = map[string]string{
4848
"s3_bytes_rate": `sum(rate(duckgres_s3_bytes_read_total$ORG[$WIN]))`,
4949
"worker_states": `sum by (state) (duckgres_worker_lifecycle_count)`,
5050
"queue_depth": `sum(duckgres_control_plane_worker_queue_depth)`,
51+
// Worker-acquire latency: how long a pending session waits for a worker,
52+
// split by the allocation source (idle_reuse|hot_idle_claim|spawn). p95 for
53+
// the tail, plus the rate of acquisitions per source so cold-spawn frequency
54+
// is visible. $ORG scopes both to a single org.
55+
"acquire_p95": `histogram_quantile(0.95, sum by (le, source) (rate(duckgres_worker_acquire_total_seconds_bucket$ORG[$WIN])))`,
56+
"acquire_by_source": `sum by (source) (rate(duckgres_worker_acquire_total_seconds_count$ORG[$WIN]))`,
5157
}
5258

5359
// renderPanel substitutes the named tokens into a panel template. $ORGERR is
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import { describe, expect, it } from "vitest";
2+
import { fmtCompact, fmtMetricAxis, fmtMetricValue } from "./format";
3+
4+
describe("fmtCompact", () => {
5+
it("renders compact SI so large numbers don't overflow an axis", () => {
6+
expect(fmtCompact(20_000_000)).toBe("20M");
7+
expect(fmtCompact(1500)).toBe("1.5K");
8+
expect(fmtCompact(0)).toBe("0");
9+
expect(fmtCompact(0.42)).toBe("0.4");
10+
});
11+
it("handles nullish", () => {
12+
expect(fmtCompact(null)).toBe("—");
13+
expect(fmtCompact(undefined)).toBe("—");
14+
expect(fmtCompact(NaN)).toBe("—");
15+
});
16+
});
17+
18+
describe("fmtMetricAxis", () => {
19+
it("formats byte units with binary prefixes (the S3 bytes-rate axis)", () => {
20+
// 20,000,000 B ≈ 19.1 MiB — the bug was this rendering as a clipped '00000'.
21+
expect(fmtMetricAxis(20_000_000, "B/s")).toBe("19.1 MB");
22+
expect(fmtMetricAxis(1024, "B")).toBe("1.0 KB");
23+
});
24+
it("formats non-byte units compactly", () => {
25+
expect(fmtMetricAxis(20_000_000, "ops/s")).toBe("20M");
26+
expect(fmtMetricAxis(0.5, "")).toBe("0.5");
27+
});
28+
it("returns empty string for nullish (recharts skips the tick)", () => {
29+
expect(fmtMetricAxis(null, "B/s")).toBe("");
30+
expect(fmtMetricAxis(NaN)).toBe("");
31+
});
32+
});
33+
34+
describe("fmtMetricValue", () => {
35+
it("byte rate gets a /s suffix; plain bytes don't", () => {
36+
expect(fmtMetricValue(20_000_000, "B/s")).toBe("19.1 MB/s");
37+
expect(fmtMetricValue(1024, "B")).toBe("1.0 KB");
38+
});
39+
it("non-byte value keeps its unit label", () => {
40+
expect(fmtMetricValue(1500, "ops/s")).toBe("1.5K ops/s");
41+
expect(fmtMetricValue(0.42, "")).toBe("0.4");
42+
});
43+
it("nullish renders a dash", () => {
44+
expect(fmtMetricValue(null, "B/s")).toBe("—");
45+
});
46+
});

controlplane/admin/ui/src/lib/format.ts

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,33 @@ export function fmtBytes(n: number | null | undefined): string {
4848
return `${v.toFixed(v >= 100 || i === 0 ? 0 : 1)} ${units[i]}`;
4949
}
5050

51+
// fmtCompact renders a number in compact SI notation (1.5K, 20M, 3.2B). Keeps a
52+
// wide-ranging metric axis readable and NARROW — a raw 20000000 would be clipped
53+
// to "00000" by a fixed-width axis.
54+
export function fmtCompact(n: number | null | undefined): string {
55+
if (n == null || Number.isNaN(n)) return "—";
56+
return new Intl.NumberFormat("en-US", { notation: "compact", maximumFractionDigits: 1 }).format(n);
57+
}
58+
59+
// fmtMetricAxis compacts a metric value for a chart Y-axis tick. Byte units use
60+
// binary prefixes (19 MB); everything else uses compact SI (1.5K, 20M). Returns
61+
// "" for nullish so recharts skips the tick rather than drawing a dash.
62+
export function fmtMetricAxis(v: number | null | undefined, unit?: string): string {
63+
if (v == null || Number.isNaN(v)) return "";
64+
if (unit === "B/s" || unit === "B") return fmtBytes(v);
65+
return fmtCompact(v);
66+
}
67+
68+
// fmtMetricValue is the full "value + unit" string for a metric tooltip: byte
69+
// units render as bytes (with a /s rate suffix when applicable), everything else
70+
// as a compact number followed by its unit label.
71+
export function fmtMetricValue(v: number | null | undefined, unit?: string): string {
72+
if (v == null || Number.isNaN(v)) return "—";
73+
if (unit === "B/s") return `${fmtBytes(v)}/s`;
74+
if (unit === "B") return fmtBytes(v);
75+
return `${fmtCompact(v)}${unit ? ` ${unit}` : ""}`;
76+
}
77+
5178
// fmtDurationMs renders a MILLISECOND duration as a compact human string
5279
// (250ms, 3.4s, 2m 5s, 1h 3m). <=0 / nullish → "—". Used for running-query
5380
// elapsed time in the Live view + detail dialog. (fmtDuration below takes

controlplane/admin/ui/src/pages/Metrics.tsx

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
1414
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "@/components/ui/select";
1515
import { LoadingState } from "@/components/states";
1616
import { useMetricRange, useMetricsPanels, useOrgs } from "@/hooks/useApi";
17-
import { promToSeries } from "@/lib/format";
17+
import { fmtMetricAxis, fmtMetricValue, promToSeries } from "@/lib/format";
1818

1919
// Window selector. The backend caps the step at ~250 points per window.
2020
const WINDOWS = ["15m", "1h", "6h", "24h"];
@@ -30,6 +30,8 @@ const PANELS: { key: string; title: string; unit: string }[] = [
3030
{ key: "s3_bytes_rate", title: "S3 read bytes rate", unit: "B/s" },
3131
{ key: "worker_states", title: "Workers by state", unit: "" },
3232
{ key: "queue_depth", title: "Connection queue depth", unit: "" },
33+
{ key: "acquire_p95", title: "Worker acquire p95 by source", unit: "s" },
34+
{ key: "acquire_by_source", title: "Worker acquire rate by source", unit: "ops/s" },
3335
];
3436

3537
export function Metrics() {
@@ -165,7 +167,12 @@ function MetricCard({
165167
stroke="hsl(var(--muted-foreground))"
166168
fontSize={10}
167169
/>
168-
<YAxis stroke="hsl(var(--muted-foreground))" fontSize={10} width={48} />
170+
<YAxis
171+
stroke="hsl(var(--muted-foreground))"
172+
fontSize={10}
173+
width={56}
174+
tickFormatter={(v: number) => fmtMetricAxis(v, unit)}
175+
/>
169176
<RTooltip
170177
contentStyle={{
171178
background: "hsl(var(--popover))",
@@ -174,7 +181,7 @@ function MetricCard({
174181
fontSize: 12,
175182
}}
176183
labelFormatter={(t) => new Date(t as number).toLocaleString()}
177-
formatter={(v: number, name) => [`${v}${unit ? ` ${unit}` : ""}`, name]}
184+
formatter={(v: number, name) => [fmtMetricValue(v, unit), name]}
178185
/>
179186
{keys.map((k, i) => (
180187
<Line

controlplane/k8s_pool_acquire.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@ func (p *K8sWorkerPool) completeSharedWorkerReservation(ctx context.Context, cla
427427
// attempt.
428428
hotClaimStart := time.Now()
429429
worker, reserveErr := p.reserveClaimedWorker(ctx, claim.hotClaimed, assignment)
430-
observeAcquirePhase("hot_idle_claim", time.Since(hotClaimStart), reserveErr)
430+
observeAcquirePhase("hot_idle_claim", assignment.OrgID, time.Since(hotClaimStart), reserveErr)
431431
if reserveErr == nil {
432432
worker.hotIdleReclaimed = true
433433
return worker, false, nil

controlplane/k8s_pool_spawn.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -558,7 +558,7 @@ func (p *K8sWorkerPool) spawnReservedWorkerForSlot(ctx context.Context, id int,
558558
// connect → reserve. Named-return defer so every failure stage observes.
559559
spawnStart := time.Now()
560560
defer func() {
561-
observeAcquirePhase("spawn", time.Since(spawnStart), err)
561+
observeAcquirePhase("spawn", assignment.OrgID, time.Since(spawnStart), err)
562562
}()
563563
// A default (nil profile) request spawns a default-sized worker: the zero
564564
// profile makes workerResourcesForProfile fall back to the pool-global request.

0 commit comments

Comments
 (0)