marin-community · rjpower · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026
diff --git a/lib/iris/dashboard/src/components/controller/AutoscalerTab.vue b/lib/iris/dashboard/src/components/controller/AutoscalerTab.vue
@@ -29,6 +29,13 @@ onMounted(refresh)
 
 const expandedDemand = ref<Set<string>>(new Set())
 const expandedSlices = ref<Set<string>>(new Set())
+const collapsedPools = ref<Set<string>>(new Set())
+
+function togglePool(pool: string) {
+  const next = new Set(collapsedPools.value)
+  next.has(pool) ? next.delete(pool) : next.add(pool)
+  collapsedPools.value = next
+}
 
 function toggleDemand(name: string) {
   const next = new Set(expandedDemand.value)
@@ -106,12 +113,20 @@ interface AvailabilityBadge {
   classes: string
 }
 
-function groupAvailabilityBadge(group: ScaleGroupStatus): AvailabilityBadge | null {
+function groupAvailabilityBadge(group: ScaleGroupStatus, section?: PoolSection): AvailabilityBadge | null {
   const status = group.availabilityStatus
   const blockedMs = timestampMs(group.blockedUntil)
   const cooldownMs = timestampMs(group.scaleUpCooldownUntil)
   const now = Date.now()
 
+  // Check for tier-blocked state (pool monotonicity)
+  if (section && section.blockedAtTier) {
+    const tier = group.config?.allocationTier ?? 0
+    if (tier > section.blockedAtTier) {
+      return { label: 'tier-blocked', classes: 'bg-status-danger-bg text-status-danger border-status-danger-border opacity-60' }
+    }
+  }
+
   if (status === 'requesting') {
     return { label: 'in-flight', classes: 'bg-status-purple-bg text-status-purple border-status-purple-border' }
   }
@@ -221,6 +236,74 @@ const sortedGroupStatuses = computed<GroupRoutingStatus[]>(() => {
   })
 })
 
+// Pool grouping for tier chain display
+interface PoolSection {
+  pool: string
+  groups: GroupRoutingStatus[]
+  blockedAtTier: number | null  // lowest tier in quota_exceeded/backoff, or null
+}
+
+const poolSections = computed<PoolSection[]>(() => {
+  const poolMap = new Map<string, GroupRoutingStatus[]>()
+  const unpooled: GroupRoutingStatus[] = []
+
+  for (const gs of sortedGroupStatuses.value) {
+    const group = groupIndex.value[gs.group]
+    const pool = group?.config?.quotaPool
+    if (pool) {
+      if (!poolMap.has(pool)) poolMap.set(pool, [])
+      poolMap.get(pool)!.push(gs)
+    } else {
+      unpooled.push(gs)
+    }
+  }
+
+  const sections: PoolSection[] = []
+  for (const [pool, poolGroups] of poolMap) {
+    // Sort by allocation_tier within the pool
+    poolGroups.sort((a, b) => {
+      const ta = groupIndex.value[a.group]?.config?.allocationTier ?? 0
+      const tb = groupIndex.value[b.group]?.config?.allocationTier ?? 0
+      return ta - tb
+    })
+
+    // Find the lowest blocked tier
+    let blockedAtTier: number | null = null
+    for (const gs of poolGroups) {
+      const group = groupIndex.value[gs.group]
+      if (!group) continue
+      const tier = group.config?.allocationTier ?? 0
+      const status = group.availabilityStatus
+      if (tier > 0 && (status === 'quota_exceeded' || status === 'backoff')) {
+        if (blockedAtTier === null || tier < blockedAtTier) {
+          blockedAtTier = tier
+        }
+      }
+    }
+
+    sections.push({ pool, groups: poolGroups, blockedAtTier })
+  }
+
+  if (unpooled.length > 0) {
+    sections.push({ pool: '', groups: unpooled, blockedAtTier: null })
+  }
+
+  return sections
+})
+
+function isTierBlocked(gs: GroupRoutingStatus, section: PoolSection): boolean {
+  if (!section.blockedAtTier) return false
+  const group = groupIndex.value[gs.group]
+  const tier = group?.config?.allocationTier ?? 0
+  return tier > section.blockedAtTier
+}
+
+function tierLabel(gs: GroupRoutingStatus): string {
+  const group = groupIndex.value[gs.group]
+  const tier = group?.config?.allocationTier ?? 0
+  return tier > 0 ? `T${tier}` : ''
+}
+
 function isInactiveRow(gs: GroupRoutingStatus): boolean {
   const group = groupIndex.value[gs.group]
   const counts = group?.sliceStateCounts ?? {}
@@ -463,12 +546,56 @@ function idleThresholdMs(groupName: string): number {
             </tr>
           </thead>
           <tbody>
-            <template v-for="gs in sortedGroupStatuses" :key="gs.group">
+            <template v-for="section in poolSections" :key="section.pool || '__unpooled'">
+              <!-- Pool header row -->
+              <tr v-if="section.pool" class="bg-surface border-b border-surface-border cursor-pointer hover:bg-surface-raised" @click="togglePool(section.pool)">
+                <td colspan="8" class="px-3 py-1.5">
+                  <div class="flex items-center gap-2">
+                    <span class="text-[10px] text-text-muted">
+                      {{ collapsedPools.has(section.pool) ? '▶' : '▼' }}
+                    </span>
+                    <span class="text-xs font-semibold uppercase tracking-wider text-text-secondary">
+                      Pool: {{ section.pool }}
+                    </span>
+                    <span
+                      v-if="section.blockedAtTier"
+                      class="inline-flex items-center px-1.5 py-0.5 rounded text-xs border
+                             bg-status-danger-bg text-status-danger border-status-danger-border"
+                    >
+                      blocked at tier {{ section.blockedAtTier }}+
+                    </span>
+                    <!-- Tier chain visualization -->
+                    <span class="flex items-center gap-0.5 text-xs text-text-muted ml-2">
+                      <template v-for="(gs, idx) in section.groups" :key="gs.group">
+                        <span v-if="idx > 0" class="text-text-muted mx-0.5">&rarr;</span>
+                        <span
+                          :class="[
+                            'px-1 py-0.5 rounded border text-[11px] font-mono',
+                            isTierBlocked(gs, section)
+                              ? 'bg-status-danger-bg text-status-danger border-status-danger-border line-through'
+                              : groupIndex[gs.group]?.availabilityStatus === 'quota_exceeded'
+                                ? 'bg-status-danger-bg text-status-danger border-status-danger-border'
+                                : groupIndex[gs.group]?.availabilityStatus === 'backoff'
+                                  ? 'bg-status-orange-bg text-status-orange border-status-orange-border'
+                                  : 'bg-surface border-surface-border text-text-secondary',
+                          ]"
+                        >
+                          {{ tierLabel(gs) }}
+                        </span>
+                      </template>
+                    </span>
+                  </div>
+                </td>
+              </tr>
+
+            <template v-for="gs in section.groups" :key="gs.group">
               <!-- Main row -->
               <tr
+                v-if="!section.pool || !collapsedPools.has(section.pool)"
                 :class="[
                   'border-b border-surface-border-subtle hover:bg-surface-raised transition-colors',
                   isInactiveRow(gs) ? 'opacity-50' : '',
+                  isTierBlocked(gs, section) ? 'opacity-40' : '',
                 ]"
               >
                 <!-- Priority -->
@@ -488,14 +615,14 @@ function idleThresholdMs(groupName: string): number {
                       &#x26a0; {{ groupFailures(gs.group) }} fail{{ groupFailures(gs.group) > 1 ? 's' : '' }}
                     </span>
                   </div>
-                  <div v-if="groupIndex[gs.group] && groupAvailabilityBadge(groupIndex[gs.group])" class="mt-0.5">
+                  <div v-if="groupIndex[gs.group] && groupAvailabilityBadge(groupIndex[gs.group], section)" class="mt-0.5">
                     <span
                       :class="[
                         'inline-flex items-center px-1.5 py-0.5 rounded text-xs border',
-                        groupAvailabilityBadge(groupIndex[gs.group])!.classes,
+                        groupAvailabilityBadge(groupIndex[gs.group], section)!.classes,
                       ]"
                     >
-                      {{ groupAvailabilityBadge(groupIndex[gs.group])!.label }}
+                      {{ groupAvailabilityBadge(groupIndex[gs.group], section)!.label }}
                     </span>
                   </div>
                 </td>
@@ -576,7 +703,7 @@ function idleThresholdMs(groupName: string): number {
               </tr>
 
               <!-- Slice detail (expanded) -->
-              <tr v-if="expandedSlices.has(gs.group) && groupHasSlices(gs.group)" class="bg-surface-sunken">
+              <tr v-if="expandedSlices.has(gs.group) && groupHasSlices(gs.group) && (!section.pool || !collapsedPools.has(section.pool))" class="bg-surface-sunken">
                 <td colspan="8" class="px-6 py-3">
                   <div class="space-y-1.5">
                     <div
@@ -615,7 +742,7 @@ function idleThresholdMs(groupName: string): number {
               </tr>
 
               <!-- Demand detail (expanded) -->
-              <tr v-if="expandedDemand.has(gs.group) && groupDemand(gs.group) > 0" class="bg-surface-sunken">
+              <tr v-if="expandedDemand.has(gs.group) && groupDemand(gs.group) > 0 && (!section.pool || !collapsedPools.has(section.pool))" class="bg-surface-sunken">
                 <td colspan="8" class="px-6 py-3">
                   <div class="space-y-1">
                     <div
@@ -634,6 +761,7 @@ function idleThresholdMs(groupName: string): number {
                 </td>
               </tr>
             </template>
+            </template>
           </tbody>
         </table>
       </div>

diff --git a/lib/iris/dashboard/src/types/rpc.ts b/lib/iris/dashboard/src/types/rpc.ts
@@ -255,9 +255,14 @@ export interface SliceInfo {
   idle?: boolean
 }
 
+export interface ScaleGroupConfig {
+  quotaPool?: string
+  allocationTier?: number
+}
+
 export interface ScaleGroupStatus {
   name: string
-  config?: Record<string, unknown>
+  config?: ScaleGroupConfig
   currentDemand?: number
   peakDemand?: number
   backoffUntil?: ProtoTimestamp

diff --git a/lib/iris/docs/tpu-pool-expansion.md b/lib/iris/docs/tpu-pool-expansion.md
@@ -0,0 +1,140 @@
+# TPU Pool Expansion & Allocation Tiers
+
+**Status:** implementation plan
+**Last updated:** 2026-04-03
+
+## Problem
+
+When the autoscaler can't allocate a TPU of size X in a zone, it falls through the priority waterfall and tries size 2X, 4X, etc. This is wasteful and dangerous:
+
+1. **Wasted API calls**: GCP TPU capacity is monotonic — if v5p-8 is unavailable, v5p-16 will also be unavailable. Each failed attempt burns rate limit tokens and adds latency.
+2. **Accidental over-allocation**: If a larger slice transiently succeeds, the job gets more resources than intended and is more likely to be preempted.
+3. **Config verbosity**: Each TPU size × zone is a separate scale group entry. The production config has ~35 nearly-identical entries that differ only in size-derived fields.
+
+## Design
+
+Two changes:
+
+### 1. TPU Pool Config Sugar (`tpu_pools`)
+
+A new top-level YAML key that expands into scale groups. Each pool defines shared properties for a TPU family; the `sizes` map lists per-size overrides.
+
+```yaml
+tpu_pools:
+  v5e-preemptible:
+    family: v5e
+    zones: [europe-west4-b, us-west4-a]
+    base_priority: 10
+    resources: { cpu: 112, ram: 192GB, disk: 100GB, preemptible: true }
+    slice_template:
+      gcp:
+        service_account: iris-worker@hai-gcp-models.iam.gserviceaccount.com
+        runtime_version: v2-alpha-tpuv5-lite
+    sizes:
+      4:   { min_slices: 3, max_slices: 1024 }
+      8:   { max_slices: 512 }
+      16:  { max_slices: 256 }
+```
+
+The pool name (`v5e-preemptible`) is an operator-chosen label, independent of the TPU family. This allows multiple pools for the same family (e.g., `v5e-preemptible` vs `v5e-reserved` with different zones, priorities, and preemptibility).
+
+**Expansion** (`_expand_tpu_pools`): For each pool × size × zone, emit a scale group:
+
+- **name**: `tpu_{pool}_{size}-{zone}` (e.g., `tpu_v5e-preemptible_16-europe-west4-b`)
+- **device_variant**: looked up from `TpuTopologyInfo` via `family` (e.g., `v5e` → `v5litepod-16`)
+- **num_vms**: `TpuTopologyInfo.vm_count`
+- **device_count**: `TpuTopologyInfo.chips_per_vm`
+- **device_type**: `tpu` (injected)
+- **priority**: `base_priority + (tier_index × 10)` where tier_index is the 0-based position in sorted sizes
+- **quota_pool**: `{pool_name}/{zone}` (e.g., `v5e-preemptible/europe-west4-b`). Per-zone because GCP quota is per-zone — a failure in one zone should not block allocation in another.
+- **allocation_tier**: `tier_index + 1` (1-based)
+- **min_slices**: from size entry, default 0
+- **max_slices**: from size entry (required)
+- **zone, region**: set on `slice_template.gcp.zone` and `worker.attributes`
+
+The function runs before `_expand_multi_zone_groups` and handles zone expansion itself (TPU pools don't go through the generic zone expander).
+
+**Family → variant mapping**: A dict in `types.py`:
+
+```python
+TPU_FAMILY_VARIANT_PREFIX: dict[str, str] = {
+    "v4": "v4",
+    "v5e": "v5litepod",
+    "v5p": "v5p",
+    "v6e": "v6e",
+}
+```
+
+The variant name for a pool with `family: v5e` and size `16` is `v5litepod-16`. This is validated against `TPU_TOPOLOGIES` — unknown family/size combinations are rejected at config load time.
+
+### 2. Allocation Tiers (`quota_pool` + `allocation_tier`)
+
+Two new fields on `ScaleGroupConfig`:
+
+```protobuf
+message ScaleGroupConfig {
+  // Groups sharing a quota_pool propagate quota-exceeded state together.
+  // When tier N in a pool hits quota, tiers > N are blocked.
+  string quota_pool = 80;
+  int32 allocation_tier = 81;
+}
+```
+
+**Autoscaler behavior** change in `route_demand`:
+
+When filtering matching groups for a demand entry, skip groups where **any lower-tier group in the same quota_pool** is in `QUOTA_EXCEEDED` or `BACKOFF` state. This is a filter applied after hard constraint matching and before budget assignment.
+
+```python
+def _pool_blocked_tiers(groups: list[ScalingGroup], ts: Timestamp) -> dict[str, int]:
+    """Return the minimum blocked tier per quota_pool.
+
+    If pool "v5e" has tier 1 in QUOTA_EXCEEDED, returns {"v5e": 1},
+    meaning tiers >= 1 should be skipped.
+    """
+    blocked: dict[str, int] = {}
+    for g in groups:
+        pool = g.config.quota_pool
+        tier = g.config.allocation_tier
+        if not pool or not tier:
+            continue
+        avail = g.availability(ts)
+        if avail.status in (GroupAvailability.QUOTA_EXCEEDED, GroupAvailability.BACKOFF):
+            if pool not in blocked or tier < blocked[pool]:
+                blocked[pool] = tier
+    return blocked
+```
+
+In `route_demand`, after `matching_groups` is computed:
+
+```python
+blocked = _pool_blocked_tiers(sorted_groups, ts)
+matching_groups = [
+    g for g in matching_groups
+    if not g.config.quota_pool
+    or g.config.allocation_tier < blocked.get(g.config.quota_pool, float('inf'))
+]
+```
+
+**Dashboard**: The AutoscalerTab groups scale groups by `quota_pool` when present, showing a visual tier chain: `[v5p-8 ✓] → [v5p-16 ⊘] → [v5p-32 ⊘]`.
+
+## Implementation Plan
+
+### Stage 1: Proto + config expansion
+
+1. Add `TPU_FAMILY_VARIANT_PREFIX` dict to `types.py`
+2. Add `quota_pool` and `allocation_tier` fields to `ScaleGroupConfig` in `config.proto`, regenerate
+3. Implement `_expand_tpu_pools()` in `config.py`
+4. Wire into `load_config()` before `_expand_multi_zone_groups()`
+5. Tests: expansion correctness, topology derivation, validation errors
+6. Migrate `examples/marin.yaml` to `tpu_pools` format
+
+### Stage 2: Autoscaler tier blocking
+
+7. Implement `_pool_blocked_tiers()` in `autoscaler.py`
+8. Add tier filtering to `route_demand()`
+9. Tests: tier blocking on quota exceeded, independent pools, groups without pools
+
+### Stage 3: Dashboard
+
+10. Group autoscaler view by `quota_pool`
+11. Show tier chain with blocked/available visual state