@@ -490,6 +490,7 @@ func (item infSvrItem) process(urCtx context.Context, ctl *controller, nodeDat *
490490 if err != nil {
491491 return fmt .Errorf ("failed to configure inference server config: %w" , err ), true
492492 }
493+ desiredPort := isc .Spec .ModelServerConfig .Port
493494 logger .V (5 ).Info ("Nominal hash of InferenceServerConfig" , "hash" , iscHash )
494495
495496 if len (launcherPodAnys ) > 0 {
@@ -498,7 +499,7 @@ func (item infSvrItem) process(urCtx context.Context, ctl *controller, nodeDat *
498499 // then those with capacity for new instances.
499500 // Note that multiple vLLM instances could exist in one launcher Pod, but at most one instance could be awake at a time.
500501
501- launcherPod , hasSleepingInstance , someNotReady , err := ctl .selectBestLauncherPod (ctx , launcherPodAnys , iscHash , int (lc .Spec .MaxSleepingInstances ), nodeDat )
502+ launcherPod , hasSleepingInstance , someNotReady , err := ctl .selectBestLauncherPod (ctx , launcherPodAnys , iscHash , desiredPort , int (lc .Spec .MaxSleepingInstances ), nodeDat )
502503 if err != nil {
503504 return err , true
504505 }
@@ -591,6 +592,7 @@ func (ctl *controller) selectBestLauncherPod(
591592 ctx context.Context ,
592593 launcherPodAnys []interface {},
593594 iscHash string ,
595+ desiredPort int32 ,
594596 maxOthers int ,
595597 nodeDat * nodeData ,
596598) (* corev1.Pod , bool , bool , error ) {
@@ -605,6 +607,11 @@ func (ctl *controller) selectBestLauncherPod(
605607 if launcherPod .Status .Phase == corev1 .PodFailed || launcherPod .DeletionTimestamp != nil {
606608 continue
607609 }
610+ requesterParts := strings .Split (launcherPod .Annotations [requesterAnnotationKey ], " " )
611+ if len (requesterParts ) == 2 {
612+ logger .V (5 ).Info ("Launcher Pod already bound to another requester, skipping" , "name" , launcherPod .Name , "boundRequester" , requesterParts [1 ])
613+ continue
614+ }
608615
609616 // Track pods that are not ready yet - we should give them time instead of
610617 // failing and creating new launcher Pods immediately.
@@ -622,12 +629,33 @@ func (ctl *controller) selectBestLauncherPod(
622629
623630 // Check if this launcher has a sleeping instance matching the iscHash
624631 hasSleepingInstance := false
632+ hasPortConflict := false
625633 for _ , inst := range insts .Instances {
634+ instPort , err := getVLLMInstancePort (inst .Options )
635+ if err != nil {
636+ logger .V (5 ).Info ("Skipping launcher Pod because an instance has unparseable options" ,
637+ "name" , launcherPod .Name ,
638+ "instanceID" , inst .InstanceID ,
639+ "options" , inst .Options ,
640+ "err" , err )
641+ hasPortConflict = true
642+ break
643+ }
644+ if instPort == desiredPort && inst .InstanceID != iscHash {
645+ logger .V (5 ).Info ("Skipping launcher Pod because a different instance already uses the desired port" ,
646+ "name" , launcherPod .Name ,
647+ "instanceID" , inst .InstanceID ,
648+ "port" , desiredPort )
649+ hasPortConflict = true
650+ break
651+ }
626652 if inst .InstanceID == iscHash {
627653 hasSleepingInstance = true
628- break
629654 }
630655 }
656+ if hasPortConflict {
657+ continue
658+ }
631659 if hasSleepingInstance {
632660 // Priority 1: Found a sleeping instance
633661 logger .V (5 ).Info ("Found launcher with sleeping instance (fastest path)" ,
@@ -693,6 +721,30 @@ func (ctl *controller) configInferenceServer(isc *fmav1alpha1.InferenceServerCon
693721 return & vllmCfg , nominalHash , nil
694722}
695723
724+ func getVLLMInstancePort (options string ) (int32 , error ) {
725+ parts := strings .Fields (options )
726+ for idx , part := range parts {
727+ if part == "--port" {
728+ if idx + 1 >= len (parts ) {
729+ return 0 , fmt .Errorf ("missing value for --port" )
730+ }
731+ port , err := strconv .ParseInt (parts [idx + 1 ], 10 , 32 )
732+ if err != nil {
733+ return 0 , fmt .Errorf ("parse --port value %q: %w" , parts [idx + 1 ], err )
734+ }
735+ return int32 (port ), nil
736+ }
737+ if value , ok := strings .CutPrefix (part , "--port=" ); ok {
738+ port , err := strconv .ParseInt (value , 10 , 32 )
739+ if err != nil {
740+ return 0 , fmt .Errorf ("parse --port value %q: %w" , value , err )
741+ }
742+ return int32 (port ), nil
743+ }
744+ }
745+ return 0 , fmt .Errorf ("missing --port in options %q" , options )
746+ }
747+
696748func (ctl * controller ) wakeupInstance (ctx context.Context , lClient * LauncherClient , instanceID string , instancePort int32 ) error {
697749 logger := klog .FromContext (ctx )
698750 endpoint := lClient .baseURL .Hostname () + ":" + strconv .Itoa (int (instancePort ))
0 commit comments