Skip to content

Commit 83a5514

Browse files
committed
Improve error logging; don't retry for launcher in its early lifecycle stage
Signed-off-by: Jun Duan <jun.duan.phd@outlook.com>
1 parent 6e27f32 commit 83a5514

1 file changed

Lines changed: 4 additions & 4 deletions

File tree

pkg/controller/dual-pods/inference-server.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,20 +88,20 @@ func ensureNamedLauncherInstance(
8888
return inst, nil
8989
}
9090
if !IsInstanceNotFoundError(err) {
91-
return nil, fmt.Errorf("get vLLM instance %q: %w", instanceID, err)
91+
return nil, fmt.Errorf("failed to get vLLM instance %q: %w", instanceID, err)
9292
}
9393

9494
result, err := lClient.CreateNamedInstance(ctx, instanceID, cfg)
9595
if err != nil {
9696
if IsInstanceAlreadyExistsError(err) {
9797
inst, retryErr := lClient.GetInstanceState(ctx, instanceID)
9898
if retryErr != nil {
99-
return nil, fmt.Errorf("get existing vLLM instance %q after conflict: %w", instanceID, retryErr)
99+
return nil, fmt.Errorf("failed to get existing vLLM instance %q after conflict: %w", instanceID, retryErr)
100100
}
101101
launcherDat.Instances[instanceID] = time.Now()
102102
return inst, nil
103103
}
104-
return nil, fmt.Errorf("create vLLM instance %q: %w", instanceID, err)
104+
return nil, fmt.Errorf("failed to create vLLM instance %q: %w", instanceID, err)
105105
}
106106
launcherDat.Instances[instanceID] = time.Now()
107107
return &InstanceState{
@@ -435,7 +435,7 @@ func (item infSvrItem) process(urCtx context.Context, ctl *controller, nodeDat *
435435
// ensureNamedLauncherInstance recovers or creates the instance.
436436
if launcherBased && serverDat.InstanceID == "" {
437437
if providingPod.Status.PodIP == "" || !utils.IsPodReady(providingPod) {
438-
return nil, true
438+
return nil, false
439439
}
440440
cfg, iscHash, err := ctl.configInferenceServer(isc, serverDat.GPUIDs)
441441
if err != nil {

0 commit comments

Comments
 (0)