Skip to content

Commit af1ab6f

Browse files
committed
update: add env variable for main container(vllm) when DRA is enabled
for Intel - currently when DRA is enabled it skip adding 3 env for intel-i915 and one VLLM_WORKER_MULTIPROC_METHOD for intel-xe - three env variable were previouly only injected if use legancy accelerators(intel i915 or the new Xe) Signed-off-by: Wen Zhou <wenzhou@redhat.com>
1 parent c94bd89 commit af1ab6f

File tree

6 files changed

+143
-16
lines changed

6 files changed

+143
-16
lines changed

charts/llm-d-modelservice/templates/_helpers.tpl

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,22 @@ nvidia.com/gpu
246246

247247
{{/* Get accelerator environment variables based on type */}}
248248
{{- define "llm-d-modelservice.acceleratorEnv" -}}
249-
{{- $acceleratorType := .Values.accelerator.type | default "nvidia" -}}
249+
{{- $acceleratorType := "" -}}
250+
{{- if .Values.dra.enabled -}}
251+
{{- /* Use explicit acceleratorEnvType if provided, otherwise map DRA type */ -}}
252+
{{- if .Values.dra.acceleratorEnvType -}}
253+
{{- $acceleratorType = .Values.dra.acceleratorEnvType -}}
254+
{{- else -}}
255+
{{- $draType := .Values.dra.type | default "nvidia" -}}
256+
{{- if eq $draType "intel" -}}
257+
{{- $acceleratorType = "intel-i915" -}}
258+
{{- else -}}
259+
{{- $acceleratorType = $draType -}}
260+
{{- end -}}
261+
{{- end -}}
262+
{{- else -}}
263+
{{- $acceleratorType = .Values.accelerator.type | default "nvidia" -}}
264+
{{- end -}}
250265
{{- if and (ne $acceleratorType "cpu") (hasKey .Values.accelerator.env $acceleratorType) -}}
251266
{{- $envVars := index .Values.accelerator.env $acceleratorType -}}
252267
{{- range $envVars }}

charts/llm-d-modelservice/values.schema.json

Lines changed: 54 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,58 @@
77
"description": " Supported types: nvidia, intel-i915, intel-xe, intel-gaudi, amd, google",
88
"properties": {
99
"env": {
10-
"additionalProperties": false,
11-
"description": "Environment variables specific to accelerator types",
10+
"additionalProperties": true,
11+
"description": " @schema additionalProperties: true @schema",
1212
"properties": {
1313
"intel-i915": {
14+
"items": {
15+
"anyOf": [
16+
{
17+
"additionalProperties": false,
18+
"properties": {
19+
"name": {
20+
"default": "VLLM_USE_V1",
21+
"required": [],
22+
"title": "name",
23+
"type": "string"
24+
},
25+
"value": {
26+
"default": "1",
27+
"required": [],
28+
"title": "value",
29+
"type": "string"
30+
}
31+
},
32+
"required": [],
33+
"type": "object"
34+
},
35+
{
36+
"additionalProperties": false,
37+
"properties": {
38+
"name": {
39+
"default": "TORCH_LLM_ALLREDUCE",
40+
"required": [],
41+
"title": "name",
42+
"type": "string"
43+
},
44+
"value": {
45+
"default": "1",
46+
"required": [],
47+
"title": "value",
48+
"type": "string"
49+
}
50+
},
51+
"required": [],
52+
"type": "object"
53+
}
54+
],
55+
"required": []
56+
},
57+
"required": [],
58+
"title": "intel-i915",
59+
"type": "array"
60+
},
61+
"intel-xe": {
1462
"items": {
1563
"anyOf": [
1664
{
@@ -74,13 +122,12 @@
74122
"required": []
75123
},
76124
"required": [],
77-
"title": "intel-i915",
125+
"title": "intel-xe",
78126
"type": "array"
79127
}
80128
},
81129
"required": [],
82-
"title": "env",
83-
"type": "object"
130+
"title": "env"
84131
},
85132
"resources": {
86133
"additionalProperties": false,
@@ -1660,6 +1707,7 @@
16601707
"description": " additionalProperties: true @schema",
16611708
"properties": {
16621709
"claimTemplates": {
1710+
"description": " acceleratorEnvType: \"\"",
16631711
"items": {
16641712
"anyOf": [
16651713
{
@@ -1861,8 +1909,7 @@
18611909
"required": []
18621910
},
18631911
"required": [],
1864-
"title": "claimTemplates",
1865-
"type": "array"
1912+
"title": "claimTemplates"
18661913
},
18671914
"enabled": {
18681915
"default": false,

charts/llm-d-modelservice/values.schema.tmpl.json

Lines changed: 54 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,58 @@
77
"description": " Supported types: nvidia, intel-i915, intel-xe, intel-gaudi, amd, google",
88
"properties": {
99
"env": {
10-
"additionalProperties": false,
11-
"description": "Environment variables specific to accelerator types",
10+
"additionalProperties": true,
11+
"description": " @schema additionalProperties: true @schema",
1212
"properties": {
1313
"intel-i915": {
14+
"items": {
15+
"anyOf": [
16+
{
17+
"additionalProperties": false,
18+
"properties": {
19+
"name": {
20+
"default": "VLLM_USE_V1",
21+
"required": [],
22+
"title": "name",
23+
"type": "string"
24+
},
25+
"value": {
26+
"default": "1",
27+
"required": [],
28+
"title": "value",
29+
"type": "string"
30+
}
31+
},
32+
"required": [],
33+
"type": "object"
34+
},
35+
{
36+
"additionalProperties": false,
37+
"properties": {
38+
"name": {
39+
"default": "TORCH_LLM_ALLREDUCE",
40+
"required": [],
41+
"title": "name",
42+
"type": "string"
43+
},
44+
"value": {
45+
"default": "1",
46+
"required": [],
47+
"title": "value",
48+
"type": "string"
49+
}
50+
},
51+
"required": [],
52+
"type": "object"
53+
}
54+
],
55+
"required": []
56+
},
57+
"required": [],
58+
"title": "intel-i915",
59+
"type": "array"
60+
},
61+
"intel-xe": {
1462
"items": {
1563
"anyOf": [
1664
{
@@ -74,13 +122,12 @@
74122
"required": []
75123
},
76124
"required": [],
77-
"title": "intel-i915",
125+
"title": "intel-xe",
78126
"type": "array"
79127
}
80128
},
81129
"required": [],
82-
"title": "env",
83-
"type": "object"
130+
"title": "env"
84131
},
85132
"resources": {
86133
"additionalProperties": false,
@@ -346,6 +393,7 @@
346393
"description": " additionalProperties: true @schema",
347394
"properties": {
348395
"claimTemplates": {
396+
"description": " acceleratorEnvType: \"\"",
349397
"items": {
350398
"anyOf": [
351399
{
@@ -547,8 +595,7 @@
547595
"required": []
548596
},
549597
"required": [],
550-
"title": "claimTemplates",
551-
"type": "array"
598+
"title": "claimTemplates"
552599
},
553600
"enabled": {
554601
"default": false,

charts/llm-d-modelservice/values.yaml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ modelArtifacts:
7474
multinode: false
7575

7676
# Global accelerator configuration
77-
# Supported types: nvidia, intel-i915, intel-xe, intel-gaudi, amd, google
77+
# Supported types: nvidia, intel-i915, intel-xe, intel-gaudi, amd, google, cpu
7878
accelerator:
7979
# Type of accelerator to use
8080
type: nvidia
@@ -87,6 +87,9 @@ accelerator:
8787
amd: "amd.com/gpu"
8888
google: "google.com/tpu"
8989
# Environment variables specific to accelerator types
90+
# @schema
91+
# additionalProperties: true
92+
# @schema
9093
env:
9194
intel-i915:
9295
- name: VLLM_USE_V1
@@ -95,6 +98,9 @@ accelerator:
9598
value: "1"
9699
- name: VLLM_WORKER_MULTIPROC_METHOD
97100
value: "spawn"
101+
intel-xe:
102+
- name: VLLM_WORKER_MULTIPROC_METHOD
103+
value: "spawn"
98104

99105
# @schema
100106
# additionalProperties: true
@@ -122,6 +128,8 @@ requester:
122128
dra:
123129
enabled: false # true: use this block instead of the `accelerator`
124130
type: nvidia # which claimTemplates entry to use
131+
# Optional: inject env variable for vllm (e.g., "intel-i915" or "intel-xe"). Need to match accelerator.resource map's key
132+
# acceleratorEnvType: ""
125133
claimTemplates:
126134
- name: nvidia
127135
class: gpu.nvidia.com

examples/output-dra.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,13 @@ spec:
9090
name: llm-d-hf-token
9191
key: HF_TOKEN
9292

93+
- name: VLLM_USE_V1
94+
value: "1"
95+
- name: TORCH_LLM_ALLREDUCE
96+
value: "1"
97+
- name: VLLM_WORKER_MULTIPROC_METHOD
98+
value: "spawn"
99+
93100
resources:
94101
claims:
95102
- name: intel-resource-claim

examples/output-xpu.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ spec:
9292
- name: HF_HOME
9393
value: /model-cache
9494

95+
- name: VLLM_WORKER_MULTIPROC_METHOD
96+
value: "spawn"
97+
9598
ports:
9699
- containerPort: 8200
97100
protocol: TCP

0 commit comments

Comments
 (0)