@@ -409,7 +409,7 @@ func (item infSvrItem) process(urCtx context.Context, ctl *controller, nodeDat *
409409 logger .V (2 ).Info ("Unexpected: multiple sleeping Pods match; using the first" , "requesterName" , requestingPod .Name )
410410 }
411411 providingPod = sleepingAnys [0 ].(* corev1.Pod )
412- return ctl .bind (ctx , serverDat , requestingPod , providingPod , false )
412+ return ctl .bind (ctx , serverDat , requestingPod , providingPod , false , - 1 )
413413 }
414414 // What remains is to make a new server-providing Pod --- if the sleeper budget allows.
415415
@@ -507,7 +507,7 @@ func (item infSvrItem) process(urCtx context.Context, ctl *controller, nodeDat *
507507 }
508508 launcherDat .Instances [iscHash ] = time .Now ()
509509 // TODO(waltforme): the bind method may need more revision to fully handle launcher-based server providing Pods
510- return ctl .bind (ctx , serverDat , requestingPod , launcherPod , true )
510+ return ctl .bind (ctx , serverDat , requestingPod , launcherPod , true , int16 ( isc . Spec . ModelServerConfig . Port ) )
511511 } else {
512512 // Slower path: create new instance in launcher with capacity
513513 result , err := lClient .CreateNamedInstance (ctx , iscHash , * cfg )
@@ -520,7 +520,7 @@ func (item infSvrItem) process(urCtx context.Context, ctl *controller, nodeDat *
520520 )
521521 launcherDat .Instances [iscHash ] = time .Now ()
522522 // TODO(waltforme): the bind method may need more revision to fully handle launcher-based server providing Pods
523- return ctl .bind (ctx , serverDat , requestingPod , launcherPod , true )
523+ return ctl .bind (ctx , serverDat , requestingPod , launcherPod , true , int16 ( isc . Spec . ModelServerConfig . Port ) )
524524 }
525525 }
526526 }
@@ -768,7 +768,8 @@ func (ctl *controller) enforceSleeperBudget(ctx context.Context, serverDat *serv
768768 return nil , len (gonerNames ) > 0
769769}
770770
771- func (ctl * controller ) bind (ctx context.Context , serverDat * serverData , requestingPod , providingPod * corev1.Pod , launcherBased bool ) (error , bool ) {
771+ // Note: instPort is used only for launcher-based server-providing Pods.
772+ func (ctl * controller ) bind (ctx context.Context , serverDat * serverData , requestingPod , providingPod * corev1.Pod , launcherBased bool , instPort int16 ) (error , bool ) {
772773 logger := klog .FromContext (ctx )
773774 providingPod = providingPod .DeepCopy ()
774775 providingPod .Annotations [requesterAnnotationKey ] = string (requestingPod .UID ) + " " + requestingPod .Name
@@ -783,9 +784,20 @@ func (ctl *controller) bind(ctx context.Context, serverDat *serverData, requesti
783784 }
784785 serverDat .ProvidingPodName = providingPod .Name
785786 logger .V (2 ).Info ("Bound server-providing Pod" , "name" , providingPod .Name , "node" , requestingPod .Spec .NodeName , "gpus" , serverDat .GPUIndicesStr , "newResourceVersion" , echo .ResourceVersion )
786- _ , serverPort , err := utils .GetInferenceServerPort (providingPod , launcherBased )
787- if err != nil { // Impossible, because such a providingPod would never be created by this controller
788- return fmt .Errorf ("unable to wake up server because port not known: %w" , err ), true
787+ var serverPort int16
788+ if launcherBased {
789+ serverPort = instPort
790+ } else {
791+ _ , serverPort , err = utils .GetInferenceServerPort (providingPod , false )
792+ if err != nil { // Impossible, because such a providingPod would never be created by this controller
793+ return fmt .Errorf ("unable to wake up server because port not known: %w" , err ), true
794+ }
795+ }
796+ // For launcher-based server-providing Pods, ServerPort is written when binding.
797+ // For direct server-providing Pods, ServerPort is written (earlier) when
798+ // constructingthe server-providing Pod's spec in getNominalServerProvidingPod.
799+ if launcherBased {
800+ serverDat .ServerPort = serverPort
789801 }
790802 err = ctl .wakeSleeper (ctx , serverDat , requestingPod , providingPod , serverPort )
791803 if err != nil {
@@ -900,11 +912,16 @@ func (ctl *controller) ensureUnbound(ctx context.Context, serverDat *serverData,
900912 // If providingPod is stale then the update will fail.
901913 if (serverDat .Sleeping == nil || ! * (serverDat .Sleeping )) && providingPod .Status .PodIP != "" { // need to put to sleep
902914 serverPort := serverDat .ServerPort
903- if serverDat .NominalProvidingPod == nil {
904- var err error
905- _ , serverPort , err = utils .GetInferenceServerPort (providingPod , launcherBased )
906- if err != nil { // Impossible, because such a providingPod would never be created by this controller
907- return fmt .Errorf ("unable to put server to sleep because port not known: %w" , err )
915+ // TODO(waltforme): Is serverPort always set correctly for launcher-based server-providing Pods upon unbinding?
916+ // E.g. What if requestingPod is deleted during a crash and restart of the dual-pods controller?
917+ // In order to find the port in this case, I think the best effort is to recompute hash for all InferenceServerConfig objects and try to match.
918+ if ! launcherBased {
919+ if serverDat .NominalProvidingPod == nil {
920+ var err error
921+ _ , serverPort , err = utils .GetInferenceServerPort (providingPod , false )
922+ if err != nil { // Impossible, because such a providingPod would never be created by this controller
923+ return fmt .Errorf ("unable to put server to sleep because port not known: %w" , err )
924+ }
908925 }
909926 }
910927 sleepURL := fmt .Sprintf ("http://%s:%d/sleep" , providingPod .Status .PodIP , serverPort )
@@ -947,6 +964,7 @@ func (ctl *controller) ensureUnbound(ctx context.Context, serverDat *serverData,
947964 logger .V (3 ).Info ("Server-providing Pod remains unbound" , "name" , providingPod .Name , "resourceVersion" , providingPod .ResourceVersion )
948965 }
949966 serverDat .ProvidingPodName = ""
967+ serverDat .ServerPort = - 1
950968 return nil
951969}
952970
0 commit comments