Skip to content

Commit 007b9f8

Browse files
Fix vmagent worker argument expansion (#107)
1 parent b2867a4 commit 007b9f8

1 file changed

Lines changed: 1 addition & 7 deletions

File tree

src/swiss_ai_model_launch/assets/template.jinja

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -207,13 +207,7 @@ done
207207
# The batch node (index 0) runs directly; worker nodes run via srun --overlap.
208208
# The batch node scrapes framework metrics (8080) + DCGM (9400); workers scrape only DCGM (9400).
209209
if [ -n "$METRICS_REMOTE_WRITE_URL" ] && [ -x "$METRICS_AGENT_BIN" ]; then
210-
VMAGENT_COMMON_ARGS="
211-
-remoteWrite.url=${METRICS_REMOTE_WRITE_URL}
212-
-remoteWrite.label=slurm_job_id=${SLURM_JOB_ID}
213-
-remoteWrite.label=model=${SERVED_MODEL_NAME}
214-
-remoteWrite.label=framework=${FRAMEWORK}
215-
-remoteWrite.label=user=${USER}
216-
"
210+
VMAGENT_COMMON_ARGS="-remoteWrite.url=${METRICS_REMOTE_WRITE_URL} -remoteWrite.label=slurm_job_id=${SLURM_JOB_ID} -remoteWrite.label=model=${SERVED_MODEL_NAME} -remoteWrite.label=framework=${FRAMEWORK} -remoteWrite.label=user=${USER}"
217211
METRICS_CONFIG_DIR="/capstor/store/cscs/swissai/infra01/ocf-share"
218212
DCGM_COMMON_ARGS="--address 0.0.0.0:9400 -f $METRICS_CONFIG_DIR/default-counters.csv"
219213
DCGM_LOG="/tmp/dcgm-exporter-${SLURM_JOB_ID}.log"

0 commit comments

Comments
 (0)