Skip to content

Commit 263a75b

Browse files
committed
Add core limiter interfaces and types
Introduce the foundational interfaces for resource limiting: - Limiter: combines inventory with allocation algorithm - AllocationAlgorithm: defines resource distribution strategy - ResourceAllocator: handles granularity-specific reservation - Inventory: provides resource availability at different levels Data types include ScalingProposal, ScalingDecision, AllocationResult, and ScalingAction. ScaleTargetRef reuses existing K8s CrossVersionObjectReference type for compatibility.
1 parent 634d637 commit 263a75b

2 files changed

Lines changed: 126 additions & 0 deletions

File tree

internal/limiter/interfaces.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// Package limiter provides interfaces for resource limiting algorithms.
2+
package limiter
3+
4+
import (
5+
"context"
6+
)
7+
8+
// Limiter constrains scaling decisions based on resource availability.
9+
// It combines an Inventory (granularity) with an AllocationAlgorithm (strategy).
10+
type Limiter interface {
11+
// Name returns limiter identifier (includes algorithm name).
12+
Name() string
13+
14+
// Limit applies resource constraints to proposed scaling decisions.
15+
Limit(ctx context.Context, proposals []ScalingProposal) ([]ScalingDecision, error)
16+
}
17+
18+
// AllocationAlgorithm defines how to distribute limited resources across proposals.
19+
// Algorithms are independent of resource granularity - they work with any Inventory.
20+
type AllocationAlgorithm interface {
21+
// Name returns algorithm identifier for logging/metrics.
22+
Name() string
23+
24+
// Allocate distributes available resources across proposals.
25+
Allocate(
26+
ctx context.Context,
27+
proposals []ScalingProposal,
28+
allocator ResourceAllocator,
29+
) ([]AllocationResult, error)
30+
}
31+
32+
// ResourceAllocator abstracts resource reservation at different granularities.
33+
// Created by Inventory to handle node-level vs cluster-level allocation logic.
34+
type ResourceAllocator interface {
35+
// TryAllocate attempts to allocate GPUs for a proposal.
36+
// Returns actual GPUs allocated (may be less than requested if constrained).
37+
TryAllocate(proposal ScalingProposal, gpusRequested int) (gpusAllocated int, err error)
38+
39+
// Remaining returns total remaining allocatable GPUs.
40+
Remaining() int
41+
}
42+
43+
// Inventory provides resource availability information and creates allocators.
44+
// Implementations define the granularity (cluster, type, node).
45+
type Inventory interface {
46+
// Name returns inventory identifier.
47+
Name() string
48+
49+
// Refresh updates inventory from the cluster.
50+
Refresh(ctx context.Context) error
51+
52+
// CreateAllocator returns a ResourceAllocator for this inventory.
53+
CreateAllocator(ctx context.Context) ResourceAllocator
54+
55+
// TotalAvailable returns total available GPUs (for metrics/logging).
56+
TotalAvailable() int
57+
}

internal/limiter/types.go

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
package limiter
2+
3+
import (
4+
autoscalingv1 "k8s.io/api/autoscaling/v1"
5+
)
6+
7+
// ScalingAction represents the type of scaling action.
8+
type ScalingAction string
9+
10+
const (
11+
ScaleUp ScalingAction = "ScaleUp"
12+
ScaleDown ScalingAction = "ScaleDown"
13+
NoChange ScalingAction = "NoChange"
14+
)
15+
16+
// ScalingProposal represents a proposed scaling action from the optimizer.
17+
type ScalingProposal struct {
18+
// Variant identification
19+
ModelID string
20+
VariantName string
21+
Namespace string
22+
23+
// Resource requirements
24+
AcceleratorType string
25+
GPUsPerReplica int
26+
27+
// Current state
28+
CurrentReplicas int32
29+
30+
// Proposed action
31+
DesiredReplicas int32
32+
Action ScalingAction
33+
34+
// Priority information (for algorithm ordering)
35+
SpareCapacity float64
36+
Priority int
37+
Weight float64
38+
39+
// Cost information
40+
Cost float64
41+
42+
// Reference to scale target (uses existing K8s type)
43+
ScaleTargetRef *autoscalingv1.CrossVersionObjectReference
44+
}
45+
46+
// AllocationResult captures the outcome for a single proposal.
47+
type AllocationResult struct {
48+
Proposal ScalingProposal
49+
GPUsAllocated int
50+
ReplicasAdded int
51+
Partial bool
52+
Reason string
53+
}
54+
55+
// ScalingDecision represents the final scaling decision after limiting.
56+
type ScalingDecision struct {
57+
ScalingProposal
58+
59+
// Adjusted target (may differ from DesiredReplicas due to limits)
60+
TargetReplicas int32
61+
62+
// Allocation details
63+
GPUsAllocated int
64+
65+
// Limiting metadata
66+
LimitedBy string
67+
LimitReason string
68+
WasConstrained bool
69+
}

0 commit comments

Comments
 (0)