@@ -239,51 +239,51 @@ steps:
239239 queue: gh200_queue
240240 command: nvidia-smi && bash .buildkite/run-gh200-test.sh
241241
242- - group: "AMD Tests"
243- depends_on: ~
244- steps:
245- - block: "Block AMD build"
246- depends_on: ~
247- key: block-amd-build
242+ - group: "AMD Tests"
243+ depends_on: ~
244+ steps:
245+ - block: "Block AMD build"
246+ depends_on: ~
247+ key: block-amd-build
248+
249+ - label: "AMD: :docker: build image"
250+ depends_on: block-amd-build
251+ soft_fail: true
252+ commands:
253+ - "grep -i 'from base as test' Dockerfile.rocm && docker build --build-arg max_jobs=16 --tag {{ docker_image_amd }} -f Dockerfile.rocm --target test --progress plain . || docker build --build-arg max_jobs=16 --tag {{ docker_image_amd }} -f Dockerfile.rocm --progress plain ."
254+ - "docker push {{ docker_image_amd }}"
255+ key: amd-build
256+ env:
257+ DOCKER_BUILDKIT: "1"
258+ retry:
259+ automatic:
260+ - exit_status: -1 # Agent was lost
261+ limit: 1
262+ - exit_status: -10 # Agent was lost
263+ limit: 1
264+ - exit_status: 1 # Machine occasionally fail
265+ limit: 1
266+ agents:
267+ queue: amd-cpu
268+ {% for step in steps %}
269+ {% if step .mirror_hardwares and "amd" in step .mirror_hardwares %}
270+ - block: "Run AMD: {{ step.label }}"
271+ depends_on: amd-build
272+ key: block-amd-{{ step.label | replace(" ", "-") | lower | replace("(", "") | replace(")", "") | replace("%", "") | replace(",", "-") }}
248273
249- - label: "AMD: :docker: build image"
250- depends_on: block-amd-build
251- soft_fail: true
252- commands:
253- - "grep -i 'from base as test' Dockerfile.rocm && docker build --build-arg max_jobs=16 --tag {{ docker_image_amd }} -f Dockerfile.rocm --target test --progress plain . || docker build --build-arg max_jobs=16 --tag {{ docker_image_amd }} -f Dockerfile.rocm --progress plain ."
254- - "docker push {{ docker_image_amd }}"
255- key: amd-build
256- env:
257- DOCKER_BUILDKIT: "1"
258- retry:
259- automatic:
260- - exit_status: -1 # Agent was lost
261- limit: 1
262- - exit_status: -10 # Agent was lost
263- limit: 1
264- - exit_status: 1 # Machine occasionally fail
265- limit: 1
266- agents:
267- queue: amd-cpu
268- {% for step in steps %}
269- {% if step .mirror_hardwares and "amd" in step .mirror_hardwares %}
270- - block: "Run AMD: {{ step.label }}"
271- depends_on: amd-build
272- key: block-amd-{{ step.label | replace(" ", "-") | lower | replace("(", "") | replace(")", "") | replace("%", "") | replace(",", "-") }}
273-
274- - label: "AMD: {{ step.label }}"
275- depends_on: block-amd-{{ step.label | replace(" ", "-") | lower | replace("(", "") | replace(")", "") | replace("%", "") | replace(",", "-") }}
276- agents:
277- {% if step .label and step .label =="Benchmarks" %}
278- queue: amd_mi300
279- {% else %}
280- queue: amd_gpu
281- {% endif %}
282-
283- command: bash .buildkite/run-amd-test.sh "(command rocm-smi || true) && export VLLM_LOGGING_LEVEL=DEBUG && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd {{ (step.working_dir or default_working_dir) | safe }} ; {{ step.command or (step.commands | join(" && ")) | safe }}"
284- env:
285- DOCKER_BUILDKIT: "1"
286- priority: 100
287- soft_fail: true
288- {% endif %}
289- {% endfor %}
274+ - label: "AMD: {{ step.label }}"
275+ depends_on: block-amd-{{ step.label | replace(" ", "-") | lower | replace("(", "") | replace(")", "") | replace("%", "") | replace(",", "-") }}
276+ agents:
277+ {% if step .label and step .label =="Benchmarks" %}
278+ queue: amd_mi300
279+ {% else %}
280+ queue: amd_gpu
281+ {% endif %}
282+
283+ command: bash .buildkite/run-amd-test.sh "(command rocm-smi || true) && export VLLM_LOGGING_LEVEL=DEBUG && export VLLM_ALLOW_DEPRECATED_BEAM_SEARCH=1 && cd {{ (step.working_dir or default_working_dir) | safe }} ; {{ step.command or (step.commands | join(" && ")) | safe }}"
284+ env:
285+ DOCKER_BUILDKIT: "1"
286+ priority: 100
287+ soft_fail: true
288+ {% endif %}
289+ {% endfor %}
0 commit comments