Skip to content

Commit 6c4d68f

Browse files
authored
fix(llm): honor SLLM device override when building container spec (#24799)
1 parent 3fcde78 commit 6c4d68f

6 files changed

Lines changed: 42 additions & 11 deletions

File tree

pkg/llm/drivers/llm_container/comfyui.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,9 @@ func (c *comfyui) GetContainerSpec(ctx context.Context, llm *models.SLLM, image
8888
},
8989
}
9090

91-
if len(devices) == 0 && (sku.Devices != nil && len(*sku.Devices) > 0) {
92-
for i := range *sku.Devices {
91+
effDevs := models.GetEffectiveDevices(llm, sku)
92+
if len(devices) == 0 && effDevs != nil && len(*effDevs) > 0 {
93+
for i := range *effDevs {
9394
index := i
9495
spec.Devices = append(spec.Devices, &computeapi.ContainerDevice{
9596
Type: commonapi.CONTAINER_DEVICE_TYPE_ISOLATED_DEVICE,

pkg/llm/drivers/llm_container/ollama.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,9 @@ func (o *ollama) GetContainerSpec(ctx context.Context, llm *models.SLLM, image *
8181
},
8282
}
8383

84-
if len(devices) == 0 && (sku.Devices != nil && len(*sku.Devices) > 0) {
85-
for i := range *sku.Devices {
84+
effDevs := models.GetEffectiveDevices(llm, sku)
85+
if len(devices) == 0 && effDevs != nil && len(*effDevs) > 0 {
86+
for i := range *effDevs {
8687
index := i
8788
spec.Devices = append(spec.Devices, &computeapi.ContainerDevice{
8889
Type: commonapi.CONTAINER_DEVICE_TYPE_ISOLATED_DEVICE,

pkg/llm/drivers/llm_container/vllm.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -372,8 +372,9 @@ func (v *vllm) GetContainerSpec(ctx context.Context, llm *models.SLLM, image *mo
372372
}
373373

374374
// GPU Devices
375-
if len(devices) == 0 && (sku.Devices != nil && len(*sku.Devices) > 0) {
376-
for i := range *sku.Devices {
375+
effDevs := models.GetEffectiveDevices(llm, sku)
376+
if len(devices) == 0 && effDevs != nil && len(*effDevs) > 0 {
377+
for i := range *effDevs {
377378
index := i
378379
spec.Devices = append(spec.Devices, &computeapi.ContainerDevice{
379380
Type: commonapi.CONTAINER_DEVICE_TYPE_ISOLATED_DEVICE,
@@ -498,8 +499,8 @@ func (v *vllm) StartLLM(ctx context.Context, userCred mcclient.TokenCredential,
498499
return errors.Wrap(err, "get llm sku")
499500
}
500501
tensorParallelSize := 1
501-
if sku.Devices != nil && len(*sku.Devices) > 0 {
502-
tensorParallelSize = len(*sku.Devices)
502+
if effDevs := models.GetEffectiveDevices(llm, sku); effDevs != nil && len(*effDevs) > 0 {
503+
tensorParallelSize = len(*effDevs)
503504
}
504505
swapSpaceGiB := (sku.Memory * 1) / (2 * 1024)
505506
if swapSpaceGiB < 1 {

pkg/llm/models/llm.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,8 @@ func (llm *SLLM) CustomizeCreate(ctx context.Context, userCred mcclient.TokenCre
8080
return nil
8181
}
8282

83-
// getEffectiveMountedModels returns the effective mounted model ids: llm's override takes priority over sku's when non-empty.
84-
func getEffectiveMountedModels(llm *SLLM, sku *SLLMSku) []string {
83+
// GetEffectiveMountedModels returns the effective mounted model ids: llm's override takes priority over sku's when non-empty.
84+
func GetEffectiveMountedModels(llm *SLLM, sku *SLLMSku) []string {
8585
if llm != nil && len(llm.MountedModels) > 0 {
8686
return llm.MountedModels
8787
}

pkg/llm/models/llm_base.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,21 @@ type SLLMBase struct {
7272
Devices *api.Devices `charset:"utf8" length:"medium" nullable:"true" list:"user" update:"user" create:"optional"`
7373
}
7474

75+
// GetEffectiveDevices returns the devices to apply: llm's override takes priority over sku when non-empty.
76+
// Drivers should call this when materializing container device specs so that SLLM overrides on the
77+
// instance are honored (not just on server-level IsolatedDevices allocation).
78+
func GetEffectiveDevices(llm *SLLM, sku *SLLMSku) *api.Devices {
79+
var llmBase *SLLMBase
80+
var skuBase *SLLMSkuBase
81+
if llm != nil {
82+
llmBase = &llm.SLLMBase
83+
}
84+
if sku != nil {
85+
skuBase = &sku.SLLMSkuBase
86+
}
87+
return getEffectiveDevices(llmBase, skuBase)
88+
}
89+
7590
func getEffectiveDevices(llmBase *SLLMBase, skuBase *SLLMSkuBase) *api.Devices {
7691
if llmBase != nil && llmBase.Devices != nil && !llmBase.Devices.IsZero() {
7792
return llmBase.Devices
@@ -82,6 +97,19 @@ func getEffectiveDevices(llmBase *SLLMBase, skuBase *SLLMSkuBase) *api.Devices {
8297
return nil
8398
}
8499

100+
// GetEffectiveHostPaths returns the host_paths to apply with llm's override taking priority over sku.
101+
func GetEffectiveHostPaths(llm *SLLM, sku *SLLMSku) *api.HostPaths {
102+
var llmBase *SLLMBase
103+
var skuBase *SLLMSkuBase
104+
if llm != nil {
105+
llmBase = &llm.SLLMBase
106+
}
107+
if sku != nil {
108+
skuBase = &sku.SLLMSkuBase
109+
}
110+
return getEffectiveHostPaths(llmBase, skuBase)
111+
}
112+
85113
func getEffectiveHostPaths(llmBase *SLLMBase, skuBase *SLLMSkuBase) *api.HostPaths {
86114
if llmBase != nil && llmBase.HostPaths != nil && !llmBase.HostPaths.IsZero() {
87115
return llmBase.HostPaths

pkg/llm/models/llm_instant_model_sync.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -691,7 +691,7 @@ func (llm *SLLM) UpdateMountedModelFullNames(ctx context.Context, userCred mccli
691691
return errors.Wrap(err, "getDeletedModelIds")
692692
}
693693
}
694-
mountedModels := getEffectiveMountedModels(llm, sku)
694+
mountedModels := GetEffectiveMountedModels(llm, sku)
695695
for i := range mountedModels {
696696
instMdl, err := GetInstantModelManager().FetchByIdOrName(ctx, userCred, mountedModels[i])
697697
if err != nil {

0 commit comments

Comments
 (0)