@@ -106,10 +106,13 @@ func (item launcherPodItem) process(ctx context.Context, ctl *controller, nodeDa
106106
107107func (item infSvrItem ) process (urCtx context.Context , ctl * controller , nodeDat * nodeData ) (error , bool ) {
108108 logger := klog .FromContext (urCtx ).WithValues ("serverUID" , item .UID , "requesterName" , item .RequesterName )
109+ serverDat := ctl .getServerData (nodeDat , item .RequesterName , item .UID )
110+ if serverDat .InstanceID != "" {
111+ logger = logger .WithValues ("instanceID" , serverDat .InstanceID )
112+ }
109113 ctx := klog .NewContext (urCtx , logger )
110114 requesterRV := "(non existent)"
111115 providerRV := "(non existent)"
112- serverDat := ctl .getServerData (nodeDat , item .RequesterName , item .UID )
113116 var requesterDeletionTimestamp , providerDeletionTimestamp * string
114117 var requesterRCS , providerRCS * reducedContainerState
115118
@@ -356,11 +359,10 @@ func (item infSvrItem) process(urCtx context.Context, ctl *controller, nodeDat *
356359 serverDat .Sleeping = & sleeping
357360 }
358361 if * (serverDat .Sleeping ) {
359- err = ctl .wakeSleeper (ctx , serverDat , requestingPod , providingPod , serverPort )
362+ err = ctl .wakeSleeper (ctx , serverDat , requestingPod , providingPod , serverPort , "discovered-bound" )
360363 if err != nil {
361364 return err , true
362365 }
363- logger .V (2 ).Info ("Woke discovered-bound inference server" )
364366 }
365367 if err := ctl .ensureSleepingLabel (ctx , providingPod , * (serverDat .Sleeping )); err != nil {
366368 return err , true
@@ -433,7 +435,7 @@ func (item infSvrItem) process(urCtx context.Context, ctl *controller, nodeDat *
433435 logger .V (2 ).Info ("Unexpected: multiple sleeping Pods match; using the first" , "requesterName" , requestingPod .Name )
434436 }
435437 providingPod = sleepingAnys [0 ].(* corev1.Pod )
436- return ctl .bind (ctx , serverDat , requestingPod , providingPod , false , - 1 )
438+ return ctl .bind (ctx , serverDat , requestingPod , providingPod , nil , - 1 )
437439 }
438440 // What remains is to make a new server-providing Pod --- if the sleeper budget allows.
439441
@@ -531,7 +533,7 @@ func (item infSvrItem) process(urCtx context.Context, ctl *controller, nodeDat *
531533 }
532534 launcherDat .Instances [iscHash ] = time .Now ()
533535 // TODO(waltforme): the bind method may need more revision to fully handle launcher-based server providing Pods
534- return ctl .bind (ctx , serverDat , requestingPod , launcherPod , true , int16 (isc .Spec .ModelServerConfig .Port ))
536+ return ctl .bind (ctx , serverDat , requestingPod , launcherPod , & iscHash , int16 (isc .Spec .ModelServerConfig .Port ))
535537 } else {
536538 // Slower path: create new instance in launcher with capacity
537539 logger .V (5 ).Info ("Creating new vLLM instance" , "iscHash" , iscHash )
@@ -545,7 +547,7 @@ func (item infSvrItem) process(urCtx context.Context, ctl *controller, nodeDat *
545547 )
546548 launcherDat .Instances [iscHash ] = time .Now ()
547549 // TODO(waltforme): the bind method may need more revision to fully handle launcher-based server providing Pods
548- return ctl .bind (ctx , serverDat , requestingPod , launcherPod , true , int16 (isc .Spec .ModelServerConfig .Port ))
550+ return ctl .bind (ctx , serverDat , requestingPod , launcherPod , & iscHash , int16 (isc .Spec .ModelServerConfig .Port ))
549551 }
550552 }
551553 }
@@ -693,11 +695,12 @@ func (ctl *controller) configInferenceServer(isc *fmav1alpha1.InferenceServerCon
693695
694696func (ctl * controller ) wakeupInstance (ctx context.Context , lClient * LauncherClient , instanceID string , instancePort int32 ) error {
695697 logger := klog .FromContext (ctx )
696- err := doPost ("http://" + lClient .baseURL .Hostname () + ":" + strconv .Itoa (int (instancePort )) + "/wake_up" )
698+ endpoint := lClient .baseURL .Hostname () + ":" + strconv .Itoa (int (instancePort ))
699+ err := doPost ("http://" + endpoint + "/wake_up" )
697700 if err != nil {
698- return fmt .Errorf ("failed to wake up vLLM instance %q: %w" , instanceID , err )
701+ return fmt .Errorf ("failed to wake up vLLM instance %q (at %s) : %w" , instanceID , endpoint , err )
699702 }
700- logger .V (2 ).Info ("Woke up vLLM instance" , "instanceID" , instanceID )
703+ logger .V (2 ).Info ("Woke up vLLM instance" , "instanceID" , instanceID , "endpoint" , endpoint )
701704 return nil
702705}
703706
@@ -794,7 +797,8 @@ func (ctl *controller) enforceSleeperBudget(ctx context.Context, serverDat *serv
794797}
795798
796799// Note: instPort is used only for launcher-based server-providing Pods.
797- func (ctl * controller ) bind (ctx context.Context , serverDat * serverData , requestingPod , providingPod * corev1.Pod , launcherBased bool , instPort int16 ) (error , bool ) {
800+ // instanceID is non-nil iff launcher-based
801+ func (ctl * controller ) bind (ctx context.Context , serverDat * serverData , requestingPod , providingPod * corev1.Pod , instanceID * string , instPort int16 ) (error , bool ) {
798802 logger := klog .FromContext (ctx )
799803 providingPod = providingPod .DeepCopy ()
800804 providingPod .Annotations [requesterAnnotationKey ] = string (requestingPod .UID ) + " " + requestingPod .Name
@@ -807,8 +811,12 @@ func (ctl *controller) bind(ctx context.Context, serverDat *serverData, requesti
807811 if err != nil {
808812 return fmt .Errorf ("failed to bind server-providing Pod %s: %w" , providingPod .Name , err ), true
809813 }
814+ launcherBased := instanceID != nil
810815 serverDat .ProvidingPodName = providingPod .Name
811- logger .V (2 ).Info ("Bound server-providing Pod" , "name" , providingPod .Name , "node" , requestingPod .Spec .NodeName , "gpus" , serverDat .GPUIDsStr , "newResourceVersion" , echo .ResourceVersion )
816+ if launcherBased {
817+ serverDat .InstanceID = * instanceID
818+ }
819+ logger .V (2 ).Info ("Bound server-providing Pod" , "name" , providingPod .Name , "node" , requestingPod .Spec .NodeName , "gpus" , serverDat .GPUIDsStr , "newResourceVersion" , echo .ResourceVersion , "instanceID" , serverDat .InstanceID )
812820 var serverPort int16
813821 if launcherBased {
814822 serverPort = instPort
@@ -824,25 +832,27 @@ func (ctl *controller) bind(ctx context.Context, serverDat *serverData, requesti
824832 if launcherBased {
825833 serverDat .ServerPort = serverPort
826834 }
827- err = ctl .wakeSleeper (ctx , serverDat , requestingPod , providingPod , serverPort )
835+ err = ctl .wakeSleeper (ctx , serverDat , requestingPod , providingPod , serverPort , "freshly-bound" )
828836 if err != nil {
829837 return err , true
830838 }
831- logger .V (2 ).Info ("Woke freshly-bound inference server" , "providingPod" , providingPod .Name )
832839 return ctl .ensureReqState (ctx , requestingPod , serverDat , ! slices .Contains (requestingPod .Finalizers , requesterFinalizer ), false )
833840}
834841
835- func (ctl * controller ) wakeSleeper (ctx context.Context , serverDat * serverData , requestingPod , providingPod * corev1.Pod , serverPort int16 ) error {
842+ func (ctl * controller ) wakeSleeper (ctx context.Context , serverDat * serverData , requestingPod , providingPod * corev1.Pod , serverPort int16 , description string ) error {
836843 if ctl .debugAccelMemory {
837844 if err := ctl .accelMemoryIsLowEnough (ctx , requestingPod , serverDat ); err != nil {
838845 return err
839846 }
840847 }
841- wakeURL := fmt .Sprintf ("http://%s:%d/wake_up" , providingPod .Status .PodIP , serverPort )
848+ endpoint := fmt .Sprintf ("%s:%d" , providingPod .Status .PodIP , serverPort )
849+ wakeURL := "http://" + endpoint + "/wake_up"
842850 err := doPost (wakeURL )
843851 if err != nil {
844852 return err
845853 }
854+ logger := klog .FromContext (ctx )
855+ logger .V (2 ).Info ("Woke inference server" , "endpoint" , endpoint , "description" , description )
846856 if err := ctl .ensureSleepingLabel (ctx , providingPod , false ); err != nil {
847857 return err
848858 }
@@ -949,7 +959,8 @@ func (ctl *controller) ensureUnbound(ctx context.Context, serverDat *serverData,
949959 }
950960 }
951961 }
952- sleepURL := fmt .Sprintf ("http://%s:%d/sleep" , providingPod .Status .PodIP , serverPort )
962+ endpoint := fmt .Sprintf ("%s:%d" , providingPod .Status .PodIP , serverPort )
963+ sleepURL := "http://" + endpoint + "/sleep"
953964 resp , err := http .Post (sleepURL , "" , nil )
954965 if err != nil {
955966 return fmt .Errorf ("failed to put provider %q to sleep, POST %s got error: %w" , serverDat .ProvidingPodName , sleepURL , err )
@@ -958,7 +969,7 @@ func (ctl *controller) ensureUnbound(ctx context.Context, serverDat *serverData,
958969 return fmt .Errorf ("failed to put provider %q to sleep, POST %s returned status %d" , serverDat .ProvidingPodName , sleepURL , sc )
959970 }
960971 serverDat .Sleeping = ptr .To (true )
961- logger .V (2 ).Info ("Put inference server to sleep" )
972+ logger .V (2 ).Info ("Put inference server to sleep" , "endpoint" , endpoint )
962973 }
963974 providingPod = providingPod .DeepCopy ()
964975 var aChange , fChange bool
0 commit comments