Skip to content

Commit 728a6d6

Browse files
committed
Retry gateway model-name discovery for KAITO llama.cpp
1 parent 054ba06 commit 728a6d6

3 files changed

Lines changed: 104 additions & 23 deletions

File tree

controller/internal/controller/gateway_reconciler.go

Lines changed: 46 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,12 @@ import (
4545

4646
// reconcileGateway creates or updates InferencePool and HTTPRoute resources
4747
// for a ModelDeployment that has gateway integration enabled.
48-
func (r *ModelDeploymentReconciler) reconcileGateway(ctx context.Context, md *airunwayv1alpha1.ModelDeployment) error {
48+
func (r *ModelDeploymentReconciler) reconcileGateway(ctx context.Context, md *airunwayv1alpha1.ModelDeployment) (bool, error) {
4949
logger := log.FromContext(ctx)
5050

5151
// Skip if no gateway detector configured
5252
if r.GatewayDetector == nil {
53-
return nil
53+
return false, nil
5454
}
5555

5656
// Skip if gateway CRDs are not available
@@ -60,21 +60,21 @@ func (r *ModelDeploymentReconciler) reconcileGateway(ctx context.Context, md *ai
6060
logger.Info("Gateway explicitly enabled but Gateway API Inference Extension CRDs not found", "name", md.Name)
6161
r.setCondition(md, airunwayv1alpha1.ConditionTypeGatewayReady, metav1.ConditionFalse, "CRDsNotAvailable", "Gateway API Inference Extension CRDs are not installed in the cluster")
6262
}
63-
return nil
63+
return false, nil
6464
}
6565

6666
// Skip if explicitly disabled
6767
if md.Spec.Gateway != nil && md.Spec.Gateway.Enabled != nil && !*md.Spec.Gateway.Enabled {
6868
logger.V(1).Info("Gateway integration explicitly disabled", "name", md.Name)
69-
return nil
69+
return false, nil
7070
}
7171

7272
// Resolve gateway configuration
7373
gwConfig, err := r.resolveGatewayConfig(ctx, md)
7474
if err != nil {
7575
logger.Info("No gateway found for routing, skipping gateway reconciliation", "reason", err.Error())
7676
r.setCondition(md, airunwayv1alpha1.ConditionTypeGatewayReady, metav1.ConditionFalse, "NoGateway", err.Error())
77-
return nil
77+
return false, nil
7878
}
7979

8080
// Determine target port for InferencePool (needs the pod/container port, not service port)
@@ -97,25 +97,26 @@ func (r *ModelDeploymentReconciler) reconcileGateway(ctx context.Context, md *ai
9797
// Create or update InferencePool
9898
if err := r.reconcileInferencePool(ctx, md, port); err != nil {
9999
r.setCondition(md, airunwayv1alpha1.ConditionTypeGatewayReady, metav1.ConditionFalse, "InferencePoolFailed", err.Error())
100-
return fmt.Errorf("reconciling InferencePool: %w", err)
100+
return false, fmt.Errorf("reconciling InferencePool: %w", err)
101101
}
102102

103103
// Create or update EPP (Endpoint Picker Proxy) for the InferencePool
104104
if err := r.reconcileEPP(ctx, md); err != nil {
105105
r.setCondition(md, airunwayv1alpha1.ConditionTypeGatewayReady, metav1.ConditionFalse, "EPPFailed", err.Error())
106-
return fmt.Errorf("reconciling EPP: %w", err)
106+
return false, fmt.Errorf("reconciling EPP: %w", err)
107107
}
108108

109109
// Resolve model name early (needed for HTTPRoute header match and status)
110-
modelName := r.resolveModelName(ctx, md)
110+
resolution := r.resolveModelNameResolution(ctx, md)
111+
modelName := resolution.name
111112

112113
// Create or update HTTPRoute (skip if user provides their own)
113114
if md.Spec.Gateway != nil && md.Spec.Gateway.HTTPRouteRef != "" {
114115
logger.V(1).Info("Using user-provided HTTPRoute", "httpRouteRef", md.Spec.Gateway.HTTPRouteRef)
115116
} else {
116117
if err := r.reconcileHTTPRoute(ctx, md, gwConfig, modelName); err != nil {
117118
r.setCondition(md, airunwayv1alpha1.ConditionTypeGatewayReady, metav1.ConditionFalse, "HTTPRouteFailed", err.Error())
118-
return fmt.Errorf("reconciling HTTPRoute: %w", err)
119+
return false, fmt.Errorf("reconciling HTTPRoute: %w", err)
119120
}
120121
}
121122

@@ -128,7 +129,7 @@ func (r *ModelDeploymentReconciler) reconcileGateway(ctx context.Context, md *ai
128129
r.setCondition(md, airunwayv1alpha1.ConditionTypeGatewayReady, metav1.ConditionTrue, "GatewayConfigured", "InferencePool and HTTPRoute created")
129130

130131
logger.Info("Gateway resources reconciled", "name", md.Name, "gateway", gwConfig.GatewayName, "model", modelName)
131-
return nil
132+
return resolution.retry, nil
132133
}
133134

134135
// resolveGatewayConfig determines which Gateway to use as the HTTPRoute parent.
@@ -647,15 +648,24 @@ func (r *ModelDeploymentReconciler) resolveGatewayEndpoint(ctx context.Context,
647648
return ""
648649
}
649650

651+
type modelNameResolution struct {
652+
name string
653+
retry bool
654+
}
655+
650656
// resolveModelName determines the model name for gateway routing.
651657
// Priority: spec.gateway.modelName > spec.model.servedName > auto-discovered from /v1/models > spec.model.id
652658
func (r *ModelDeploymentReconciler) resolveModelName(ctx context.Context, md *airunwayv1alpha1.ModelDeployment) string {
659+
return r.resolveModelNameResolution(ctx, md).name
660+
}
661+
662+
func (r *ModelDeploymentReconciler) resolveModelNameResolution(ctx context.Context, md *airunwayv1alpha1.ModelDeployment) modelNameResolution {
653663
// Use explicit overrides first
654664
if md.Spec.Gateway != nil && md.Spec.Gateway.ModelName != "" {
655-
return md.Spec.Gateway.ModelName
665+
return modelNameResolution{name: md.Spec.Gateway.ModelName}
656666
}
657667
if shouldUseServedNameForGateway(md) {
658-
return md.Spec.Model.ServedName
668+
return modelNameResolution{name: md.Spec.Model.ServedName}
659669
}
660670

661671
// Auto-discover from the running model server
@@ -670,11 +680,19 @@ func (r *ModelDeploymentReconciler) resolveModelName(ctx context.Context, md *ai
670680
}
671681
if discovered := r.discoverModelName(ctx, md.Status.Endpoint.Service, md.Namespace, port); discovered != "" {
672682
log.FromContext(ctx).Info("Auto-discovered model name from server", "name", md.Name, "modelName", discovered)
673-
return discovered
683+
return modelNameResolution{name: discovered}
684+
}
685+
686+
return modelNameResolution{
687+
name: md.Spec.Model.ID,
688+
retry: shouldRetryGatewayModelNameDiscovery(md),
674689
}
675690
}
676691

677-
return md.Spec.Model.ID
692+
return modelNameResolution{
693+
name: md.Spec.Model.ID,
694+
retry: shouldRetryGatewayModelNameDiscovery(md),
695+
}
678696
}
679697

680698
func shouldUseServedNameForGateway(md *airunwayv1alpha1.ModelDeployment) bool {
@@ -689,6 +707,20 @@ func shouldUseServedNameForGateway(md *airunwayv1alpha1.ModelDeployment) bool {
689707
return true
690708
}
691709

710+
func shouldRetryGatewayModelNameDiscovery(md *airunwayv1alpha1.ModelDeployment) bool {
711+
if md.ResolvedEngineType() != airunwayv1alpha1.EngineTypeLlamaCpp {
712+
return false
713+
}
714+
if resolvedProviderName(md) != "kaito" {
715+
return false
716+
}
717+
if md.Spec.Gateway != nil && md.Spec.Gateway.ModelName != "" {
718+
return false
719+
}
720+
721+
return true
722+
}
723+
692724
func resolvedProviderName(md *airunwayv1alpha1.ModelDeployment) string {
693725
if md.Spec.Provider != nil && md.Spec.Provider.Name != "" {
694726
return md.Spec.Provider.Name

controller/internal/controller/gateway_reconciler_test.go

Lines changed: 50 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ func TestGateway_DisabledSkipsCreation(t *testing.T) {
260260
r := newTestReconciler(scheme, detector, md)
261261
ctx := context.Background()
262262

263-
err := r.reconcileGateway(ctx, md)
263+
_, err := r.reconcileGateway(ctx, md)
264264
if err != nil {
265265
t.Fatalf("reconcileGateway failed: %v", err)
266266
}
@@ -395,7 +395,7 @@ func TestGateway_NotAvailableSkipsSilently(t *testing.T) {
395395
r := newTestReconciler(scheme, detector, md)
396396
ctx := context.Background()
397397

398-
err := r.reconcileGateway(ctx, md)
398+
_, err := r.reconcileGateway(ctx, md)
399399
if err != nil {
400400
t.Fatalf("expected no error when gateway not available, got: %v", err)
401401
}
@@ -415,7 +415,7 @@ func TestGateway_NilDetectorSkipsSilently(t *testing.T) {
415415
r := newTestReconciler(scheme, nil, md)
416416
ctx := context.Background()
417417

418-
err := r.reconcileGateway(ctx, md)
418+
_, err := r.reconcileGateway(ctx, md)
419419
if err != nil {
420420
t.Fatalf("expected no error when detector is nil, got: %v", err)
421421
}
@@ -428,7 +428,7 @@ func TestGateway_StatusUpdate(t *testing.T) {
428428
r := newTestReconciler(scheme, detector, md)
429429
ctx := context.Background()
430430

431-
err := r.reconcileGateway(ctx, md)
431+
_, err := r.reconcileGateway(ctx, md)
432432
if err != nil {
433433
t.Fatalf("reconcileGateway failed: %v", err)
434434
}
@@ -480,7 +480,7 @@ func TestGateway_StatusEndpointFromGatewayAddress(t *testing.T) {
480480
r := newTestReconciler(scheme, detector, md, gw)
481481
ctx := context.Background()
482482

483-
err := r.reconcileGateway(ctx, md)
483+
_, err := r.reconcileGateway(ctx, md)
484484
if err != nil {
485485
t.Fatalf("reconcileGateway failed: %v", err)
486486
}
@@ -503,7 +503,7 @@ func TestGateway_StatusModelNameOverride(t *testing.T) {
503503
r := newTestReconciler(scheme, detector, md)
504504
ctx := context.Background()
505505

506-
err := r.reconcileGateway(ctx, md)
506+
_, err := r.reconcileGateway(ctx, md)
507507
if err != nil {
508508
t.Fatalf("reconcileGateway failed: %v", err)
509509
}
@@ -521,7 +521,7 @@ func TestGateway_StatusServedNameFallback(t *testing.T) {
521521
r := newTestReconciler(scheme, detector, md)
522522
ctx := context.Background()
523523

524-
err := r.reconcileGateway(ctx, md)
524+
_, err := r.reconcileGateway(ctx, md)
525525
if err != nil {
526526
t.Fatalf("reconcileGateway failed: %v", err)
527527
}
@@ -604,6 +604,49 @@ func TestGateway_KaitoLlamaCppServedNameFallsBackToModelID(t *testing.T) {
604604
}
605605
}
606606

607+
func TestGateway_KaitoLlamaCppFallbackRequestsRetry(t *testing.T) {
608+
scheme := newTestScheme()
609+
md := newModelDeployment("test-model", "default")
610+
md.Spec.Provider = &airunwayv1alpha1.ProviderSpec{Name: "kaito"}
611+
md.Spec.Engine.Type = airunwayv1alpha1.EngineTypeLlamaCpp
612+
md.Status.Endpoint = &airunwayv1alpha1.EndpointStatus{
613+
Service: "nonexistent-svc",
614+
Port: 8080,
615+
}
616+
detector := fakeDetector(true, "my-gateway", "gateway-ns")
617+
r := newTestReconciler(scheme, detector, md)
618+
ctx := context.Background()
619+
620+
resolution := r.resolveModelNameResolution(ctx, md)
621+
if resolution.name != "meta-llama/Llama-3-8B" {
622+
t.Fatalf("expected fallback to spec.model.id %q, got %q", "meta-llama/Llama-3-8B", resolution.name)
623+
}
624+
if !resolution.retry {
625+
t.Fatal("expected failed Kaito llama.cpp discovery to request a retry")
626+
}
627+
}
628+
629+
func TestGateway_ModelNameOverrideSkipsRetry(t *testing.T) {
630+
scheme := newTestScheme()
631+
md := newModelDeployment("test-model", "default")
632+
md.Spec.Provider = &airunwayv1alpha1.ProviderSpec{Name: "kaito"}
633+
md.Spec.Engine.Type = airunwayv1alpha1.EngineTypeLlamaCpp
634+
md.Spec.Gateway = &airunwayv1alpha1.GatewaySpec{
635+
ModelName: "stable-name",
636+
}
637+
detector := fakeDetector(true, "my-gateway", "gateway-ns")
638+
r := newTestReconciler(scheme, detector, md)
639+
ctx := context.Background()
640+
641+
resolution := r.resolveModelNameResolution(ctx, md)
642+
if resolution.name != "stable-name" {
643+
t.Fatalf("expected explicit override %q, got %q", "stable-name", resolution.name)
644+
}
645+
if resolution.retry {
646+
t.Fatal("expected explicit model name override to skip retry")
647+
}
648+
}
649+
607650
func TestGateway_ModelNameNoEndpointFallsBack(t *testing.T) {
608651
scheme := newTestScheme()
609652
md := newModelDeployment("test-model", "default")

controller/internal/controller/modeldeployment_controller.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"encoding/json"
2222
"fmt"
2323
"strings"
24+
"time"
2425

2526
"github.com/google/cel-go/cel"
2627
"github.com/google/cel-go/common/types"
@@ -48,6 +49,8 @@ type ModelDeploymentReconciler struct {
4849
GatewayDetector *gateway.Detector
4950
}
5051

52+
const gatewayModelNameRetryDelay = time.Minute
53+
5154
// +kubebuilder:rbac:groups=airunway.ai,resources=modeldeployments,verbs=get;list;watch;create;update;patch;delete
5255
// +kubebuilder:rbac:groups=airunway.ai,resources=modeldeployments/status,verbs=get;update;patch
5356
// +kubebuilder:rbac:groups=airunway.ai,resources=modeldeployments/finalizers,verbs=update
@@ -75,6 +78,7 @@ type ModelDeploymentReconciler struct {
7578
// matches their name and handle the actual resource creation.
7679
func (r *ModelDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
7780
logger := log.FromContext(ctx)
81+
result := ctrl.Result{}
7882

7983
// Fetch the ModelDeployment
8084
var md airunwayv1alpha1.ModelDeployment
@@ -179,22 +183,24 @@ func (r *ModelDeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Requ
179183
logger.Error(err, "Failed to clean up gateway resources")
180184
}
181185
} else {
182-
if err := r.reconcileGateway(ctx, &md); err != nil {
186+
if retryModelNameDiscovery, err := r.reconcileGateway(ctx, &md); err != nil {
183187
logger.Error(err, "Gateway reconciliation failed", "name", md.Name)
184188
// If the error suggests CRDs were removed, refresh the detection cache
185189
if isNoMatchError(err) && r.GatewayDetector != nil {
186190
logger.Info("Gateway CRDs may have been removed, refreshing detection cache")
187191
r.GatewayDetector.Refresh()
188192
}
189193
// Non-fatal: don't block overall reconciliation
194+
} else if retryModelNameDiscovery {
195+
result.RequeueAfter = gatewayModelNameRetryDelay
190196
}
191197
}
192198
}
193199
// Kubernetes garbage collection will handle cleanup when the ModelDeployment is deleted.
194200

195201
logger.Info("Reconciliation complete", "name", md.Name, "phase", md.Status.Phase, "provider", md.Status.Provider)
196202

197-
return ctrl.Result{}, r.Status().Patch(ctx, &md, client.MergeFrom(base))
203+
return result, r.Status().Patch(ctx, &md, client.MergeFrom(base))
198204
}
199205

200206
// isNoMatchError checks if an error indicates that a CRD/resource type is not registered.

0 commit comments

Comments
 (0)