Skip to content

Commit 57516ac

Browse files
committed
spyre fixes
Signed-off-by: Michael Kalantar <kalantar@us.ibm.com>
1 parent 16cd8ff commit 57516ac

3 files changed

Lines changed: 5 additions & 42 deletions

File tree

scenarios/examples/spyre.sh

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,6 @@ cat << EOF > $LLMDBENCH_VLLM_COMMON_ENVVARS_TO_YAML
9090
value: '1024,256'
9191
- name: DTCOMPILER_KEEP_EXPORT
9292
value: 'true'
93-
- name: TENSOR_PARALLEL_SIZE
94-
value: "REPLACE_ENV_LLMDBENCH_VLLM_COMMON_TENSOR_PARALLELISM"
9593
- name: PORT
9694
value: "REPLACE_ENV_LLMDBENCH_VLLM_COMMON_INFERENCE_PORT"
9795
- name: DTCOMPILER_KEEP_EXPORT
@@ -117,7 +115,7 @@ EOF
117115
# Prefill parameters: 0 prefill pod
118116
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_REPLICAS=0
119117
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_ACCELERATOR_RESOURCE=ibm.com/spyre_pf
120-
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_ACCELERATOR_NR=0
118+
# export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_ACCELERATOR_NR=0
121119

122120
# Decode parameters: 2 decode pods
123121
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_TENSOR_PARALLELISM=1
@@ -136,14 +134,14 @@ cat << EOF > $LLMDBENCH_VLLM_MODELSERVICE_DECODE_EXTRA_ARGS
136134
/home/senuser/container-scripts/simple_vllm_serve.sh REPLACE_ENV_LLMDBENCH_DEPLOY_CURRENT_MODEL; \
137135
--port REPLACE_ENV_LLMDBENCH_VLLM_COMMON_INFERENCE_PORT \
138136
--max-model-len REPLACE_ENV_LLMDBENCH_VLLM_COMMON_MAX_MODEL_LEN \
139-
--tensor-parallel-size REPLACE_ENV_LLMDBENCH_VLLM_COMMON_TENSOR_PARALLELISM \
137+
--tensor-parallel-size \$TP_SIZE \
140138
--max-num-seqs 32 \
141139
--enable-auto-tool-choice \
142140
--tool-call-parser granite
143141
EOF
144142

145143
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_ACCELERATOR_RESOURCE=ibm.com/spyre_pf
146-
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_ACCELERATOR_NR=1
144+
# export LLMDBENCH_VLLM_MODELSERVICE_DECODE_ACCELERATOR_NR=1
147145

148146
# Workload parameters
149147

setup/functions.py

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1238,20 +1238,6 @@ def add_resources(ev:dict, identifier: str) -> [str, str]:
12381238
identifier = f"modelservice_{identifier}"
12391239
section_indent = " " * 8
12401240

1241-
accelerator_resource = ev[f"vllm_{identifier}_accelerator_resource"]
1242-
1243-
if accelerator_resource == "auto":
1244-
accelerator_resource = "nvidia.com/gpu"
1245-
1246-
accelerator_nr = ev[f"vllm_{identifier}_accelerator_nr"]
1247-
1248-
data_parallelism = ev[f"vllm_{identifier}_data_parallelism"]
1249-
tensor_parallelism = ev[f"vllm_{identifier}_tensor_parallelism"]
1250-
1251-
accelerator_count = get_accelerator_nr(
1252-
accelerator_nr, tensor_parallelism, data_parallelism
1253-
)
1254-
12551241
cpu_mem = ev[f"vllm_{identifier}_cpu_mem"]
12561242
cpu_nr = ev[f"vllm_{identifier}_cpu_nr"]
12571243

@@ -1278,26 +1264,6 @@ def add_resources(ev:dict, identifier: str) -> [str, str]:
12781264
f'{section_indent}{ephemeral_storage_resource}: "{ephemeral_storage_nr}"'
12791265
)
12801266

1281-
if (
1282-
accelerator_resource
1283-
and accelerator_count
1284-
and str(accelerator_count) != "0"
1285-
):
1286-
limits_resources.append(
1287-
f'{section_indent}{accelerator_resource}: "{accelerator_count}"'
1288-
)
1289-
requests_resources.append(
1290-
f'{section_indent}{accelerator_resource}: "{accelerator_count}"'
1291-
)
1292-
1293-
if accelerator_resource != "nvidia.com/gpu" :
1294-
limits_resources.append(
1295-
f'{section_indent}nvidia.com/gpu: "0"'
1296-
)
1297-
requests_resources.append(
1298-
f'{section_indent}nvidia.com/gpu: "0"'
1299-
)
1300-
13011267
if network_resource and network_nr:
13021268
limits_resources.append(
13031269
f'{section_indent}{network_resource}: "{network_nr}"'

setup/steps/09_deploy_via_modelservice.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
get_image,
2626
add_command,
2727
add_command_line_options,
28-
get_accelerator_nr,
2928
add_annotations,
3029
add_additional_env_to_yaml,
3130
add_config,
@@ -236,10 +235,11 @@ def generate_ms_values_yaml(
236235
connector: {proxy_connector}
237236
debugLevel: {proxy_debug_level}
238237
238+
{add_affinity(ev, "")}
239+
239240
decode:
240241
create: {decode_create}
241242
replicas: {decode_replicas}
242-
{add_affinity(ev, " ")}
243243
parallelism:
244244
data: {decode_data_parallelism}
245245
tensor: {decode_tensor_parallelism}
@@ -294,7 +294,6 @@ def generate_ms_values_yaml(
294294
prefill:
295295
create: {prefill_create}
296296
replicas: {prefill_replicas}
297-
{add_affinity(ev, " ")}
298297
parallelism:
299298
data: {prefill_data_parallelism}
300299
tensor: {prefill_tensor_parallelism}

0 commit comments

Comments
 (0)