Skip to content

Commit afd05c7

Browse files
Merge remote-tracking branch 'upstream/main' into HEAD
2 parents d482e8f + 0d6516f commit afd05c7

File tree

114 files changed

+8186
-2051
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

114 files changed

+8186
-2051
lines changed

.buildkite/models/Qwen_Qwen3-30B-A3B.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ steps:
4848
TEST_MODEL: Qwen/Qwen3-30B-A3B
4949
TENSOR_PARALLEL_SIZE: 4
5050
MINIMUM_ACCURACY_THRESHOLD: 0.89
51+
MODEL_IMPL_TYPE: vllm
5152
commands:
5253
- |
5354
.buildkite/scripts/run_in_docker.sh bash /workspace/tpu_inference/tests/e2e/benchmarking/test_accuracy.sh

.buildkite/models/meta-llama_Llama-3_1-8B-Instruct.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ steps:
4646
soft_fail: true
4747
env:
4848
TEST_MODEL: meta-llama/Llama-3.1-8B-Instruct
49-
TENSOR_PARALLEL_SIZE: 1
49+
TPU_VERSION: "${TPU_VERSION:-tpu6e}"
5050
MINIMUM_ACCURACY_THRESHOLD: 0.75
5151
commands:
5252
- |
@@ -73,7 +73,7 @@ steps:
7373
queue: "${TPU_QUEUE_SINGLE:-tpu_v6e_queue}"
7474
env:
7575
TEST_MODEL: meta-llama/Llama-3.1-8B-Instruct
76-
TENSOR_PARALLEL_SIZE: 1
76+
TPU_VERSION: "${TPU_VERSION:-tpu6e}"
7777
MINIMUM_THROUGHPUT_THRESHOLD: 10.77
7878
INPUT_LEN: 1800
7979
OUTPUT_LEN: 128

.buildkite/models/meta-llama_Llama-Guard-4-12B_Multimodal.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ steps:
5858
env:
5959
TEST_MODEL: meta-llama/Llama-Guard-4-12B
6060
TENSOR_PARALLEL_SIZE: 1
61-
MINIMUM_ACCURACY_THRESHOLD: 0.31
61+
MINIMUM_ACCURACY_THRESHOLD: 0.02. #TODO: increase threshold when this model becomes higher priority
6262
commands:
6363
- |
6464
.buildkite/scripts/run_in_docker.sh bash /workspace/tpu_inference/tests/e2e/benchmarking/safety_model_benchmark.sh --mode accuracy --benchmark multimodal

.buildkite/parallelism/PP.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ steps:
1919
key: "${TPU_VERSION:-tpu6e}_PP_CorrectnessTest_Single_Host"
2020
soft_fail: true
2121
agents:
22-
queue: "${TPU_QUEUE_SINGLE:-tpu_v6e_queue}"
22+
queue: "${TPU_QUEUE_MULTI:-tpu_v6e_8_queue}"
2323
commands:
2424
- |
2525
.buildkite/scripts/run_in_docker.sh \
@@ -43,7 +43,7 @@ steps:
4343
depends_on: "${TPU_VERSION:-tpu6e}_record_PP_CorrectnessTest_Single_Host"
4444
soft_fail: true
4545
agents:
46-
queue: "${TPU_QUEUE_SINGLE:-tpu_v6e_queue}"
46+
queue: "${TPU_QUEUE_MULTI:-tpu_v6e_8_queue}"
4747
commands:
4848
- |
4949
.buildkite/scripts/run_in_docker.sh \
@@ -66,7 +66,7 @@ steps:
6666
key: "${TPU_VERSION:-tpu6e}_PP_CorrectnessTest_Multi_Host"
6767
soft_fail: true
6868
agents:
69-
queue: "${TPU_QUEUE_SINGLE:-tpu_v6e_queue}"
69+
queue: "${TPU_QUEUE_MULTI:-tpu_v6e_8_queue}"
7070
commands:
7171
- |
7272
.buildkite/scripts/run_in_docker.sh \
@@ -89,7 +89,7 @@ steps:
8989
depends_on: "${TPU_VERSION:-tpu6e}_record_PP_CorrectnessTest_Multi_Host"
9090
soft_fail: true
9191
agents:
92-
queue: "${TPU_QUEUE_SINGLE:-tpu_v6e_queue}"
92+
queue: "${TPU_QUEUE_MULTI:-tpu_v6e_8_queue}"
9393
commands:
9494
- |
9595
.buildkite/scripts/run_in_docker.sh \

.buildkite/pipeline_jax.yml

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,28 @@
1515
steps:
1616
- group: "${TESTS_GROUP_LABEL:-[jax] TPU6e Tests Group}"
1717
steps:
18+
19+
# -----------------------------------------------------------------
20+
# Centralized Build Step (Runs on the CPU queue)
21+
# -----------------------------------------------------------------
22+
- label: ":docker: Build and Push Base Image (${TPU_VERSION:-tpu6e})"
23+
key: "${TPU_VERSION:-tpu6e}_build_docker"
24+
agents:
25+
queue: cpu_64_core
26+
env:
27+
TPU_VERSION: "${TPU_VERSION:-tpu6e}"
28+
commands:
29+
- bash -c 'source .buildkite/scripts/setup_docker_env.sh && setup_environment "vllm-tpu" "false" "true"'
30+
1831
# -----------------------------------------------------------------
1932
# TEST STEPS - Calling wrapper
2033
# -----------------------------------------------------------------
2134
- label: "${TPU_VERSION:-tpu6e} E2E MLPerf tests for JAX models"
2235
key: "${TPU_VERSION:-tpu6e}_test_0"
36+
depends_on: "${TPU_VERSION:-tpu6e}_build_docker"
2337
soft_fail: true
2438
env:
39+
USE_PREBUILT_IMAGE: "1"
2540
TPU_VERSION: "${TPU_VERSION:-tpu6e}"
2641
agents:
2742
queue: ${TPU_QUEUE_SINGLE:-tpu_v6e_queue}
@@ -54,8 +69,10 @@ steps:
5469

5570
- label: "${TPU_VERSION:-tpu6e} E2E MLPerf tests for JAX + vLLM models on single chip"
5671
key: ${TPU_VERSION:-tpu6e}_test_3
72+
depends_on: "${TPU_VERSION:-tpu6e}_build_docker"
5773
soft_fail: true
5874
env:
75+
USE_PREBUILT_IMAGE: "1"
5976
MODEL_IMPL_TYPE: "vllm"
6077
TPU_VERSION: "${TPU_VERSION:-tpu6e}"
6178
agents:
@@ -78,8 +95,10 @@ steps:
7895

7996
- label: "${TPU_VERSION:-tpu6e} E2E speculative decoding test"
8097
key: ${TPU_VERSION:-tpu6e}_test_6
98+
depends_on: "${TPU_VERSION:-tpu6e}_build_docker"
8199
soft_fail: true
82100
env:
101+
USE_PREBUILT_IMAGE: "1"
83102
TPU_VERSION: "${TPU_VERSION:-tpu6e}"
84103
agents:
85104
queue: ${TPU_QUEUE_SINGLE:-tpu_v6e_queue}
@@ -90,8 +109,12 @@ steps:
90109
91110
- label: "${TPU_VERSION:-tpu6e} JAX unit tests part1"
92111
key: ${TPU_VERSION:-tpu6e}_test_7_1
112+
depends_on: "${TPU_VERSION:-tpu6e}_build_docker"
93113
soft_fail: true
94114
artifact_paths: ".coverage.part1.${TPU_VERSION:-tpu6e}"
115+
env:
116+
USE_PREBUILT_IMAGE: "1"
117+
TPU_VERSION: "${TPU_VERSION:-tpu6e}"
95118
agents:
96119
queue: ${TPU_QUEUE_SINGLE:-tpu_v6e_queue}
97120
commands:
@@ -102,8 +125,12 @@ steps:
102125

103126
- label: "${TPU_VERSION:-tpu6e} JAX unit tests part2"
104127
key: ${TPU_VERSION:-tpu6e}_test_7_2
128+
depends_on: "${TPU_VERSION:-tpu6e}_build_docker"
105129
soft_fail: true
106130
artifact_paths: ".coverage.part2.${TPU_VERSION:-tpu6e}"
131+
env:
132+
USE_PREBUILT_IMAGE: "1"
133+
TPU_VERSION: "${TPU_VERSION:-tpu6e}"
107134
agents:
108135
queue: ${TPU_QUEUE_SINGLE:-tpu_v6e_queue}
109136
commands:
@@ -139,7 +166,11 @@ steps:
139166
140167
- label: "${TPU_VERSION:-tpu6e} JAX unit tests - kernels"
141168
key: ${TPU_VERSION:-tpu6e}_test_8
169+
depends_on: "${TPU_VERSION:-tpu6e}_build_docker"
142170
soft_fail: true
171+
env:
172+
USE_PREBUILT_IMAGE: "1"
173+
TPU_VERSION: "${TPU_VERSION:-tpu6e}"
143174
agents:
144175
queue: ${TPU_QUEUE_SINGLE:-tpu_v6e_queue}
145176
commands:
@@ -162,7 +193,11 @@ steps:
162193
163194
- label: "${TPU_VERSION:-tpu6e} JAX unit tests - collective kernels"
164195
key: ${TPU_VERSION:-tpu6e}_test_9
196+
depends_on: "${TPU_VERSION:-tpu6e}_build_docker"
165197
soft_fail: true
198+
env:
199+
USE_PREBUILT_IMAGE: "1"
200+
TPU_VERSION: "${TPU_VERSION:-tpu6e}"
166201
agents:
167202
queue: ${TPU_QUEUE_MULTI:-tpu_v6e_8_queue}
168203
commands:
@@ -218,8 +253,11 @@ steps:
218253

219254
- label: "${TPU_VERSION:-tpu6e} lora e2e tests for JAX + vLLM models multi chips"
220255
key: ${TPU_VERSION:-tpu6e}_test_13
256+
depends_on: "${TPU_VERSION:-tpu6e}_build_docker"
221257
soft_fail: true
222258
env:
259+
USE_PREBUILT_IMAGE: "1"
260+
TPU_VERSION: "${TPU_VERSION:-tpu6e}"
223261
TEST_LORA_TP: "True"
224262
VLLM_LOG_LEVEL: "INFO"
225263
agents:
@@ -229,7 +267,11 @@ steps:
229267

230268
- label: "${TPU_VERSION:-tpu6e} lora unit tests on single chip"
231269
key: ${TPU_VERSION:-tpu6e}_test_15
270+
depends_on: "${TPU_VERSION:-tpu6e}_build_docker"
232271
soft_fail: true
272+
env:
273+
USE_PREBUILT_IMAGE: "1"
274+
TPU_VERSION: "${TPU_VERSION:-tpu6e}"
233275
agents:
234276
queue: ${TPU_QUEUE_SINGLE:-tpu_v6e_queue}
235277
commands:
@@ -240,8 +282,11 @@ steps:
240282
241283
- label: "${TPU_VERSION:-tpu6e} lora unit tests on multi chips"
242284
key: ${TPU_VERSION:-tpu6e}_test_16
285+
depends_on: "${TPU_VERSION:-tpu6e}_build_docker"
243286
soft_fail: true
244287
env:
288+
USE_PREBUILT_IMAGE: "1"
289+
TPU_VERSION: "${TPU_VERSION:-tpu6e}"
245290
USE_V6E8_QUEUE: "True"
246291
VLLM_LOG_LEVEL: "INFO"
247292
agents:
@@ -251,8 +296,10 @@ steps:
251296

252297
- label: "${TPU_VERSION:-tpu6e} E2E lm_eval accuracy check qwen3 coder with fused moe."
253298
key: ${TPU_VERSION:-tpu6e}_test_17
299+
depends_on: "${TPU_VERSION:-tpu6e}_build_docker"
254300
soft_fail: true
255301
env:
302+
USE_PREBUILT_IMAGE: "1"
256303
TPU_VERSION: "${TPU_VERSION:-tpu6e}"
257304
agents:
258305
queue: ${TPU_QUEUE_MULTI:-tpu_v6e_8_queue}
@@ -267,8 +314,10 @@ steps:
267314
268315
- label: "${TPU_VERSION:-tpu6e} E2E lm_eval accuracy check qwen3 coder with gmm kernel."
269316
key: ${TPU_VERSION:-tpu6e}_test_18
317+
depends_on: "${TPU_VERSION:-tpu6e}_build_docker"
270318
soft_fail: true
271319
env:
320+
USE_PREBUILT_IMAGE: "1"
272321
TPU_VERSION: "${TPU_VERSION:-tpu6e}"
273322
agents:
274323
queue: ${TPU_QUEUE_MULTI:-tpu_v6e_8_queue}
@@ -283,8 +332,10 @@ steps:
283332
284333
- label: "${TPU_VERSION:-tpu6e} E2E lm_eval accuracy check gpt oss."
285334
key: ${TPU_VERSION:-tpu6e}_test_19
335+
depends_on: "${TPU_VERSION:-tpu6e}_build_docker"
286336
soft_fail: true
287337
env:
338+
USE_PREBUILT_IMAGE: "1"
288339
TPU_VERSION: "${TPU_VERSION:-tpu6e}"
289340
agents:
290341
queue: ${TPU_QUEUE_MULTI:-tpu_v6e_8_queue}
@@ -313,8 +364,10 @@ steps:
313364
314365
- label: "${TPU_VERSION:-tpu6e} Perf regression test for qwen3 coder 8k 1k with fused moe kernel."
315366
key: ${TPU_VERSION:-tpu6e}_test_21
367+
depends_on: "${TPU_VERSION:-tpu6e}_build_docker"
316368
soft_fail: true
317369
env:
370+
USE_PREBUILT_IMAGE: "1"
318371
TPU_VERSION: "${TPU_VERSION:-tpu6e}"
319372
agents:
320373
queue: ${TPU_QUEUE_MULTI:-tpu_v6e_8_queue}
@@ -340,8 +393,10 @@ steps:
340393
341394
- label: "${TPU_VERSION:-tpu6e} Perf regression test for qwen3 coder 8k 1k with gmm kernel."
342395
key: ${TPU_VERSION:-tpu6e}_test_23
396+
depends_on: "${TPU_VERSION:-tpu6e}_build_docker"
343397
soft_fail: true
344398
env:
399+
USE_PREBUILT_IMAGE: "1"
345400
TPU_VERSION: "${TPU_VERSION:-tpu6e}"
346401
agents:
347402
queue: ${TPU_QUEUE_MULTI:-tpu_v6e_8_queue}
@@ -353,8 +408,10 @@ steps:
353408
354409
- label: "${TPU_VERSION:-tpu6e} Test EP recompilation."
355410
key: ${TPU_VERSION:-tpu6e}_test_24
411+
depends_on: "${TPU_VERSION:-tpu6e}_build_docker"
356412
soft_fail: true
357413
env:
414+
USE_PREBUILT_IMAGE: "1"
358415
TPU_VERSION: "${TPU_VERSION:-tpu6e}"
359416
agents:
360417
queue: ${TPU_QUEUE_MULTI:-tpu_v6e_8_queue}
@@ -366,8 +423,10 @@ steps:
366423
367424
- label: "${TPU_VERSION:-tpu6e} E2E test for DCN-based P/D disaggregation"
368425
key: ${TPU_VERSION:-tpu6e}_test_25
426+
depends_on: "${TPU_VERSION:-tpu6e}_build_docker"
369427
soft_fail: true
370428
env:
429+
USE_PREBUILT_IMAGE: "1"
371430
TPU_VERSION: "${TPU_VERSION:-tpu6e}"
372431
MODEL: "Qwen/Qwen3-0.6B"
373432
INPUT_LEN: 1024
@@ -414,23 +473,35 @@ steps:
414473
415474
- label: "${TPU_VERSION:-tpu6e} Correctness Test | Runai Model Streamer JAX UniProcExecutor"
416475
key: "${TPU_VERSION:-tpu6e}_test_27"
476+
depends_on: "${TPU_VERSION:-tpu6e}_build_docker"
417477
soft_fail: true
478+
env:
479+
USE_PREBUILT_IMAGE: "1"
480+
TPU_VERSION: "${TPU_VERSION:-tpu6e}"
418481
agents:
419482
queue: "${TPU_QUEUE_SINGLE:-tpu_v6e_queue}"
420483
commands:
421484
- .buildkite/scripts/run_in_docker.sh python3 -m pytest -s -v /workspace/tpu_inference/tests/e2e/test_runai_model_streamer_loader.py::test_correctness_jax_uni_proc_executor
422485

423486
- label: "${TPU_VERSION:-tpu6e} Correctness Test | Runai Model Streamer Torchax UniProcExecutor"
424487
key: "${TPU_VERSION:-tpu6e}_test_28"
488+
depends_on: "${TPU_VERSION:-tpu6e}_build_docker"
425489
soft_fail: true
490+
env:
491+
USE_PREBUILT_IMAGE: "1"
492+
TPU_VERSION: "${TPU_VERSION:-tpu6e}"
426493
agents:
427494
queue: "${TPU_QUEUE_SINGLE:-tpu_v6e_queue}"
428495
commands:
429496
- .buildkite/scripts/run_in_docker.sh python3 -m pytest -s -v /workspace/tpu_inference/tests/e2e/test_runai_model_streamer_loader.py::test_correctness_torchax_uni_proc_executor
430497

431498
- label: "${TPU_VERSION:-tpu6e} Correctness Test | Runai Model Streamer Torchax RayDistributedExecutor"
432499
key: "${TPU_VERSION:-tpu6e}_test_29"
500+
depends_on: "${TPU_VERSION:-tpu6e}_build_docker"
433501
soft_fail: true
502+
env:
503+
USE_PREBUILT_IMAGE: "1"
504+
TPU_VERSION: "${TPU_VERSION:-tpu6e}"
434505
agents:
435506
queue: "${TPU_QUEUE_MULTI:-tpu_v6e_8_queue}" # Using a queue with more devices for distributed tests
436507
commands:

.buildkite/scripts/bootstrap.sh

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -60,14 +60,9 @@ if [ "$BUILDKITE_PULL_REQUEST" != "false" ]; then
6060
else
6161
echo "Code files changed. Proceeding with pipeline upload."
6262
fi
63-
64-
# Validate modified YAML pipelines using bk pipeline validate
65-
if .buildkite/scripts/validate_all_pipelines.sh "$NON_SKIPPABLE_FILES"; then
66-
echo "All pipelines syntax are valid. Proceeding with pipeline upload."
67-
else
68-
echo "Some pipelines syntax are invalid. Failing build."
69-
exit 1
70-
fi
63+
# TODO(#2066): Temporarily disabled static pipeline validation due to upstream schema breakage.
64+
# Re-evaluate restoring the validation once Buildkite supports dynamic interpolation in strict mode.
65+
echo "Skipping static yaml validation to allow dynamic variables."
7166
else
7267
echo "Non-PR build. Bypassing file change check."
7368
FILES_CHANGED=$(git diff-tree --no-commit-id --name-only -r -m "$BUILDKITE_COMMIT")

.buildkite/scripts/setup_docker_env.sh

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,10 @@ cleanup_docker_resource() {
8181
setup_environment() {
8282
local image_name_param=${1:-"vllm-tpu"}
8383
local should_push=${2:-"false"}
84+
local push_to_ci_cache=${3:-"false"}
8485
IMAGE_NAME="$image_name_param"
86+
local CI_IMAGE_REPO="us-central1-docker.pkg.dev/cloud-ullm-inference-ci-cd/tpu-inference-ci/${IMAGE_NAME}"
87+
local LOCAL_TPU_VERSION="${TPU_VERSION:-tpu6e}"
8588

8689
local DOCKERFILE_NAME="Dockerfile"
8790

@@ -106,20 +109,47 @@ setup_environment() {
106109
cleanup_docker_resource "${IMAGE_NAME}"
107110

108111
if [ -z "${BUILDKITE:-}" ]; then
109-
VLLM_COMMIT_HASH=""
112+
if [ "${USE_VLLM_LKG:-false}" == "true" ] && [ -f ".buildkite/vllm_lkg.version" ]; then
113+
VLLM_COMMIT_HASH=$(cat .buildkite/vllm_lkg.version)
114+
else
115+
VLLM_COMMIT_HASH=""
116+
fi
110117
TPU_INFERENCE_HASH=$(git log -n 1 --pretty="%H")
111118
else
112119
VLLM_COMMIT_HASH=$(buildkite-agent meta-data get "VLLM_COMMIT_HASH" --default "")
113120
TPU_INFERENCE_HASH="$BUILDKITE_COMMIT"
114121
fi
115122

123+
local CACHE_TAG="${TPU_INFERENCE_HASH}-${LOCAL_TPU_VERSION}"
124+
125+
# ==========================================
126+
# Pull-Only Mode for TPU execution nodes
127+
# ==========================================
128+
if [[ "${USE_PREBUILT_IMAGE:-0}" == "1" ]]; then
129+
echo "Pulling pre-built Docker image: ${CI_IMAGE_REPO}:${CACHE_TAG} ..."
130+
docker pull "${CI_IMAGE_REPO}:${CACHE_TAG}"
131+
docker tag "${CI_IMAGE_REPO}:${CACHE_TAG}" "${IMAGE_NAME}:${TPU_INFERENCE_HASH}"
132+
docker tag "${CI_IMAGE_REPO}:${CACHE_TAG}" "${IMAGE_NAME}:latest"
133+
return 0
134+
fi
135+
116136
# Build with specific hash and 'latest' tag for convenience
117137
docker build \
118138
--build-arg VLLM_COMMIT_HASH="${VLLM_COMMIT_HASH}" \
119139
--build-arg IS_TEST="true" \
120140
--no-cache -f docker/"${DOCKERFILE_NAME}" \
121141
-t "${IMAGE_NAME}:${TPU_INFERENCE_HASH}" \
122-
-t "${IMAGE_NAME}:latest" .
142+
-t "${IMAGE_NAME}:latest" \
143+
-t "${IMAGE_NAME}:${CACHE_TAG}" .
144+
145+
# ==========================================
146+
# Push to CI Image Registry (Executed by dedicate CPU builder)
147+
# ==========================================
148+
if [[ "$push_to_ci_cache" == "true" ]]; then
149+
echo "Pushing Docker image to CI Image Registry..."
150+
docker tag "${IMAGE_NAME}:${CACHE_TAG}" "${CI_IMAGE_REPO}:${CACHE_TAG}"
151+
docker push "${CI_IMAGE_REPO}:${CACHE_TAG}"
152+
fi
123153

124154
# Push logic if requested
125155
if [[ "$should_push" == "true" ]]; then

0 commit comments

Comments
 (0)