Add core limiter interfaces and types

ev-shindin · ev-shindin · commit 263a75b60b7c · 2026-01-14T18:35:48.000+02:00
Introduce the foundational interfaces for resource limiting:
- Limiter: combines inventory with allocation algorithm
- AllocationAlgorithm: defines resource distribution strategy
- ResourceAllocator: handles granularity-specific reservation
- Inventory: provides resource availability at different levels

Data types include ScalingProposal, ScalingDecision, AllocationResult,
and ScalingAction. ScaleTargetRef reuses existing K8s
CrossVersionObjectReference type for compatibility.
diff --git a/internal/limiter/interfaces.go b/internal/limiter/interfaces.go
@@ -0,0 +1,57 @@
+// Package limiter provides interfaces for resource limiting algorithms.
+package limiter
+
+import (
+	"context"
+)
+
+// Limiter constrains scaling decisions based on resource availability.
+// It combines an Inventory (granularity) with an AllocationAlgorithm (strategy).
+type Limiter interface {
+	// Name returns limiter identifier (includes algorithm name).
+	Name() string
+
+	// Limit applies resource constraints to proposed scaling decisions.
+	Limit(ctx context.Context, proposals []ScalingProposal) ([]ScalingDecision, error)
+}
+
+// AllocationAlgorithm defines how to distribute limited resources across proposals.
+// Algorithms are independent of resource granularity - they work with any Inventory.
+type AllocationAlgorithm interface {
+	// Name returns algorithm identifier for logging/metrics.
+	Name() string
+
+	// Allocate distributes available resources across proposals.
+	Allocate(
+		ctx context.Context,
+		proposals []ScalingProposal,
+		allocator ResourceAllocator,
+	) ([]AllocationResult, error)
+}
+
+// ResourceAllocator abstracts resource reservation at different granularities.
+// Created by Inventory to handle node-level vs cluster-level allocation logic.
+type ResourceAllocator interface {
+	// TryAllocate attempts to allocate GPUs for a proposal.
+	// Returns actual GPUs allocated (may be less than requested if constrained).
+	TryAllocate(proposal ScalingProposal, gpusRequested int) (gpusAllocated int, err error)
+
+	// Remaining returns total remaining allocatable GPUs.
+	Remaining() int
+}
+
+// Inventory provides resource availability information and creates allocators.
+// Implementations define the granularity (cluster, type, node).
+type Inventory interface {
+	// Name returns inventory identifier.
+	Name() string
+
+	// Refresh updates inventory from the cluster.
+	Refresh(ctx context.Context) error
+
+	// CreateAllocator returns a ResourceAllocator for this inventory.
+	CreateAllocator(ctx context.Context) ResourceAllocator
+
+	// TotalAvailable returns total available GPUs (for metrics/logging).
+	TotalAvailable() int
+}
diff --git a/internal/limiter/types.go b/internal/limiter/types.go
@@ -0,0 +1,69 @@
+package limiter
+
+import (
+	autoscalingv1 "k8s.io/api/autoscaling/v1"
+)
+
+// ScalingAction represents the type of scaling action.
+type ScalingAction string
+
+const (
+	ScaleUp   ScalingAction = "ScaleUp"
+	ScaleDown ScalingAction = "ScaleDown"
+	NoChange  ScalingAction = "NoChange"
+)
+
+// ScalingProposal represents a proposed scaling action from the optimizer.
+type ScalingProposal struct {
+	// Variant identification
+	ModelID     string
+	VariantName string
+	Namespace   string
+
+	// Resource requirements
+	AcceleratorType string
+	GPUsPerReplica  int
+
+	// Current state
+	CurrentReplicas int32
+
+	// Proposed action
+	DesiredReplicas int32
+	Action          ScalingAction
+
+	// Priority information (for algorithm ordering)
+	SpareCapacity float64
+	Priority      int
+	Weight        float64
+
+	// Cost information
+	Cost float64
+
+	// Reference to scale target (uses existing K8s type)
+	ScaleTargetRef *autoscalingv1.CrossVersionObjectReference
+}
+
+// AllocationResult captures the outcome for a single proposal.
+type AllocationResult struct {
+	Proposal      ScalingProposal
+	GPUsAllocated int
+	ReplicasAdded int
+	Partial       bool
+	Reason        string
+}
+
+// ScalingDecision represents the final scaling decision after limiting.
+type ScalingDecision struct {
+	ScalingProposal
+
+	// Adjusted target (may differ from DesiredReplicas due to limits)
+	TargetReplicas int32
+
+	// Allocation details
+	GPUsAllocated int
+
+	// Limiting metadata
+	LimitedBy      string
+	LimitReason    string
+	WasConstrained bool
+}