ZehaoLu98
diff --git a/‎.buildkite/ci_config.yaml‎
Lines changed: 3 additions & 2 deletions b/‎.buildkite/ci_config.yaml‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎.buildkite/ci_config_intel.yaml‎
Lines changed: 23 additions & 0 deletions b/‎.buildkite/ci_config_intel.yaml‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎.buildkite/hardware_tests/amd.yaml‎
Lines changed: 1 addition & 9 deletions b/‎.buildkite/hardware_tests/amd.yaml‎
Lines changed: 1 addition & 9 deletions
diff --git a/‎.buildkite/hardware_tests/cpu.yaml‎
Lines changed: 55 additions & 15 deletions b/‎.buildkite/hardware_tests/cpu.yaml‎
Lines changed: 55 additions & 15 deletions
diff --git a/‎.buildkite/hardware_tests/intel.yaml‎
Lines changed: 0 additions & 7 deletions b/‎.buildkite/hardware_tests/intel.yaml‎
Lines changed: 0 additions & 7 deletions
diff --git a/‎.buildkite/image_build/image_build.sh‎
Lines changed: 3 additions & 2 deletions b/‎.buildkite/image_build/image_build.sh‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎.buildkite/image_build/image_build_cpu.sh‎
Lines changed: 2 additions & 4 deletions b/‎.buildkite/image_build/image_build_cpu.sh‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎.buildkite/image_build/image_build_cpu_arm64.sh‎
Lines changed: 1 addition & 1 deletion b/‎.buildkite/image_build/image_build_cpu_arm64.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.buildkite/image_build/image_build_hpu.sh‎
Lines changed: 1 addition & 1 deletion b/‎.buildkite/image_build/image_build_hpu.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.buildkite/image_build/image_build_torch_nightly.sh‎
Lines changed: 68 additions & 0 deletions b/‎.buildkite/image_build/image_build_torch_nightly.sh‎
Lines changed: 68 additions & 0 deletions
@@ -8,8 +8,9 @@ run_all_patterns:
   - "CMakeLists.txt"
   - "requirements/common.txt"
   - "requirements/cuda.txt"
-  - "requirements/build.txt"
-  - "requirements/test.txt"
+  - "requirements/kv_connectors.txt"
+  - "requirements/build/cuda.txt"
+  - "requirements/test/cuda.txt"
   - "setup.py"
   - "csrc/"
   - "cmake/"
 
@@ -0,0 +1,23 @@
+name: vllm_intel_ci
+job_dirs:
+  - ".buildkite/intel_jobs"
+run_all_patterns:
+  - "docker/Dockerfile"
+  - "CMakeLists.txt"
+  - "requirements/common.txt"
+  - "requirements/xpu.txt"
+  - "requirements/build/cuda.txt"
+  - "requirements/test/cuda.txt"
+  - "setup.py"
+  - "csrc/"
+  - "cmake/"
+run_all_exclude_patterns:
+  - "docker/Dockerfile."
+  - "csrc/cpu/"
+  - "csrc/rocm/"
+  - "cmake/hipify.py"
+  - "cmake/cpu_extension.cmake"
+registries: public.ecr.aws/q9t5s3a7
+repositories:
+  main: "vllm-ci-test-repo"
+  premerge: "vllm-ci-test-repo"
@@ -10,7 +10,7 @@ steps:
       docker build
       --build-arg max_jobs=16
       --build-arg REMOTE_VLLM=1
-      --build-arg ARG_PYTORCH_ROCM_ARCH='gfx942;gfx950'
+      --build-arg ARG_PYTORCH_ROCM_ARCH='gfx90a;gfx942;gfx950'
       --build-arg VLLM_BRANCH=$BUILDKITE_COMMIT
       --tag "rocm/vllm-ci:${BUILDKITE_COMMIT}"
       -f docker/Dockerfile.rocm
@@ -20,11 +20,3 @@ steps:
     - docker push "rocm/vllm-ci:${BUILDKITE_COMMIT}"
     env:
       DOCKER_BUILDKIT: "1"
-    retry:
-      automatic:
-        - exit_status: -1  # Agent was lost
-          limit: 1
-        - exit_status: -10  # Agent was lost
-          limit: 1
-        - exit_status: 1  # Machine occasionally fail
-          limit: 1
@@ -3,7 +3,6 @@ depends_on: []
 steps:
 - label: CPU-Kernel Tests
   depends_on: []
-  soft_fail: true
   device: intel_cpu
   no_plugin: true
   source_file_dependencies:
@@ -13,17 +12,35 @@ steps:
   - vllm/_custom_ops.py
   - tests/kernels/attention/test_cpu_attn.py
   - tests/kernels/moe/test_cpu_fused_moe.py
+  - tests/kernels/moe/test_cpu_quant_fused_moe.py
   - tests/kernels/test_onednn.py
+  - tests/kernels/test_awq_int4_to_int8.py
+  - tests/kernels/quantization/test_cpu_fp8_scaled_mm.py
   commands:
     - |
-      bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 20m "
+      bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 30m "
       pytest -x -v -s tests/kernels/attention/test_cpu_attn.py
       pytest -x -v -s tests/kernels/moe/test_cpu_fused_moe.py
-      pytest -x -v -s tests/kernels/test_onednn.py"
+      pytest -x -v -s tests/kernels/moe/test_cpu_quant_fused_moe.py
+      pytest -x -v -s tests/kernels/test_onednn.py
+      pytest -x -v -s tests/kernels/test_awq_int4_to_int8.py
+      pytest -x -v -s tests/kernels/quantization/test_cpu_fp8_scaled_mm.py"
+
+- label: CPU-Compatibility Tests
+  depends_on: []
+  device: intel_cpu
+  no_plugin: true
+  source_file_dependencies:
+  - cmake/cpu_extension.cmake
+  - setup.py
+  - vllm/platforms/cpu.py
+  commands:
+    - |
+      bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 20m "
+      bash .buildkite/scripts/hardware_ci/run-cpu-compatibility-test.sh"
 
 - label: CPU-Language Generation and Pooling Model Tests
   depends_on: []
-  soft_fail: true
   device: intel_cpu
   no_plugin: true
   source_file_dependencies:
@@ -33,36 +50,49 @@ steps:
   - tests/models/language/pooling/
   commands:
     - |
-      bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 30m "
+      bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 40m "
       pytest -x -v -s tests/models/language/generation -m cpu_model
       pytest -x -v -s tests/models/language/pooling -m cpu_model"
 
-- label: CPU-Quantization Model Tests
+- label: CPU-ModelRunnerV2 Tests
   depends_on: []
+  device: intel_cpu
+  no_plugin: true
   soft_fail: true
+  source_file_dependencies:
+  - vllm/v1/worker/cpu/
+  - vllm/v1/worker/gpu/
+  commands:
+    - |
+      bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 30m "
+      uv pip install git+https://github.com/triton-lang/triton-cpu.git@270e696d
+      VLLM_USE_V2_MODEL_RUNNER=1 pytest -x -v -s tests/models/language/generation/test_granite.py -m cpu_model"
+
+- label: CPU-Quantization Model Tests
+  depends_on: []
   device: intel_cpu
   no_plugin: true
   source_file_dependencies:
   - csrc/cpu/
   - vllm/model_executor/layers/quantization/cpu_wna16.py
-  - vllm/model_executor/layers/quantization/gptq_marlin.py
+  - vllm/model_executor/layers/quantization/auto_gptq.py
   - vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py
   - vllm/model_executor/layers/quantization/kernels/scaled_mm/cpu.py
   - vllm/model_executor/layers/quantization/kernels/mixed_precision/cpu.py
+  - vllm/model_executor/layers/fused_moe/experts/cpu_moe.py
   - tests/quantization/test_compressed_tensors.py
   - tests/quantization/test_cpu_wna16.py
   commands:
     - |
-      bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 20m "
+      bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 30m "
       pytest -x -v -s tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_logprobs
       pytest -x -v -s tests/quantization/test_cpu_wna16.py"
       
-- label: CPU-Distributed Tests
+- label: CPU-Distributed Tests (PP+TP)
   depends_on: []
-  soft_fail: true
   device: intel_cpu
   no_plugin: true
-  source_file_dependencies:
+  source_file_dependencies: &cpu_distributed_deps
   - csrc/cpu/shm.cpp
   - vllm/v1/worker/cpu_worker.py
   - vllm/v1/worker/gpu_worker.py
@@ -71,14 +101,24 @@ steps:
   - vllm/platforms/cpu.py
   - vllm/distributed/parallel_state.py
   - vllm/distributed/device_communicators/cpu_communicator.py
+  - .buildkite/scripts/hardware_ci/run-cpu-distributed-smoke-test.sh
   commands:
     - |
       bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 10m "
-      bash .buildkite/scripts/hardware_ci/run-cpu-distributed-smoke-test.sh"
+      bash .buildkite/scripts/hardware_ci/run-cpu-distributed-smoke-test.sh tp_pp"
+
+- label: CPU-Distributed Tests (DP+TP)
+  depends_on: []
+  device: intel_cpu
+  no_plugin: true
+  source_file_dependencies: *cpu_distributed_deps
+  commands:
+    - |
+      bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 10m "
+      bash .buildkite/scripts/hardware_ci/run-cpu-distributed-smoke-test.sh dp_tp"
 
 - label: CPU-Multi-Modal Model Tests %N
   depends_on: []
-  soft_fail: true
   device: intel_cpu
   no_plugin: true
   source_file_dependencies:
@@ -89,11 +129,11 @@ steps:
     - |
       bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 45m "
       pytest -x -v -s tests/models/multimodal/generation --ignore=tests/models/multimodal/generation/test_pixtral.py -m cpu_model --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --shard-id=$$BUILDKITE_PARALLEL_JOB"
-  parallelism: 2
+  parallelism: 3
 
 - label: "Arm CPU Test"
   depends_on: []
-  soft_fail: true
+  soft_fail: false
   device: arm_cpu
   no_plugin: true
   commands: 
 
@@ -8,10 +8,3 @@ steps:
     commands: 
     - bash .buildkite/scripts/hardware_ci/run-hpu-test.sh
 
-  - label: "Intel GPU Test"
-    depends_on: []
-    soft_fail: true
-    device: intel_gpu
-    no_plugin: true
-    commands: 
-    - bash .buildkite/scripts/hardware_ci/run-xpu-test.sh
@@ -92,8 +92,8 @@ check_and_skip_if_image_exists() {
 }
 
 ecr_login() {
-    aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY"
-    aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 936637512419.dkr.ecr.us-east-1.amazonaws.com
+    aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY" || true
+    aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 936637512419.dkr.ecr.us-east-1.amazonaws.com || true
 }
 
 prepare_cache_tags() {
@@ -192,6 +192,7 @@ export BUILDKITE_COMMIT
 export PARENT_COMMIT
 export IMAGE_TAG
 export IMAGE_TAG_LATEST
+export COMMIT="${COMMIT:-${BUILDKITE_COMMIT}}"
 export CACHE_FROM
 export CACHE_FROM_BASE_BRANCH
 export CACHE_FROM_MAIN
 
@@ -11,7 +11,7 @@ REPO=$2
 BUILDKITE_COMMIT=$3
 
 # authenticate with AWS ECR
-aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY"
+aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY" || true
 
 # skip build if image already exists
 if [[ -z $(docker manifest inspect "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-cpu) ]]; then
@@ -25,9 +25,7 @@ fi
 docker build --file docker/Dockerfile.cpu \
   --build-arg max_jobs=16 \
   --build-arg buildkite_commit="$BUILDKITE_COMMIT" \
-  --build-arg VLLM_CPU_AVX512BF16=true \
-  --build-arg VLLM_CPU_AVX512VNNI=true \
-  --build-arg VLLM_CPU_AMXBF16=true \
+  --build-arg VLLM_CPU_X86=true \
   --tag "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-cpu \
   --target vllm-test \
   --progress plain .
 
@@ -11,7 +11,7 @@ REPO=$2
 BUILDKITE_COMMIT=$3
 
 # authenticate with AWS ECR
-aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY"
+aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY" || true
 
 # skip build if image already exists
 if [[ -z $(docker manifest inspect "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-arm64-cpu) ]]; then
 
@@ -11,7 +11,7 @@ REPO=$2
 BUILDKITE_COMMIT=$3
 
 # authenticate with AWS ECR
-aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY"
+aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin "$REGISTRY" || true
 
 # skip build if image already exists
 if [[ -z $(docker manifest inspect "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-hpu) ]]; then
 
@@ -0,0 +1,68 @@
+#!/bin/bash
+set -euo pipefail
+
+# Build a vLLM test image with PyTorch nightly installed.
+# Called by the pipeline generator's "vLLM Against PyTorch Nightly" group.
+
+if [[ $# -lt 5 ]]; then
+  echo "Usage: $0 <registry> <repo> <commit> <branch> <image_tag>"
+  exit 1
+fi
+
+REGISTRY=$1
+REPO=$2
+BUILDKITE_COMMIT=$3
+BRANCH=$4
+IMAGE_TAG=$5
+
+# --- Arguments ---
+echo "--- :mag: Arguments"
+echo "REGISTRY: ${REGISTRY}"
+echo "REPO: ${REPO}"
+echo "BUILDKITE_COMMIT: ${BUILDKITE_COMMIT}"
+echo "BRANCH: ${BRANCH}"
+echo "IMAGE_TAG: ${IMAGE_TAG}"
+
+# --- ECR login ---
+echo "--- :key: ECR login"
+aws ecr-public get-login-password --region us-east-1 \
+  | docker login --username AWS --password-stdin "$REGISTRY"
+aws ecr get-login-password --region us-east-1 \
+  | docker login --username AWS --password-stdin 936637512419.dkr.ecr.us-east-1.amazonaws.com
+
+# --- Set up buildx ---
+echo "--- :docker: Setting up buildx"
+docker buildx create --name vllm-builder --driver docker-container --use || true
+docker buildx inspect --bootstrap
+docker buildx ls
+
+# --- Skip if image already exists ---
+echo "--- :mag: Checking if image already exists"
+if docker manifest inspect "$IMAGE_TAG" >/dev/null 2>&1; then
+  echo "Image found: $IMAGE_TAG — skipping build"
+  exit 0
+fi
+echo "Image not found, proceeding with build..."
+
+# --- CUDA 13.0 for nightly builds ---
+# Nightly CI uses CUDA 13.0 while regular CI stays on CUDA 12.9
+NIGHTLY_CUDA_VERSION="13.0.2"
+NIGHTLY_BUILD_BASE_IMAGE="nvidia/cuda:${NIGHTLY_CUDA_VERSION}-devel-ubuntu22.04"
+NIGHTLY_FINAL_BASE_IMAGE="nvidia/cuda:${NIGHTLY_CUDA_VERSION}-base-ubuntu22.04"
+
+echo "--- :docker: Building torch nightly image (CUDA ${NIGHTLY_CUDA_VERSION})"
+docker buildx build --file docker/Dockerfile \
+  --build-arg max_jobs=16 \
+  --build-arg buildkite_commit="$BUILDKITE_COMMIT" \
+  --build-arg USE_SCCACHE=1 \
+  --build-arg PYTORCH_NIGHTLY=1 \
+  --build-arg CUDA_VERSION="${NIGHTLY_CUDA_VERSION}" \
+  --build-arg BUILD_BASE_IMAGE="${NIGHTLY_BUILD_BASE_IMAGE}" \
+  --build-arg FINAL_BASE_IMAGE="${NIGHTLY_FINAL_BASE_IMAGE}" \
+  --build-arg torch_cuda_arch_list="8.0 8.9 9.0 10.0 12.0" \
+  --tag "$IMAGE_TAG" \
+  --push \
+  --target test \
+  --progress plain .
+
+echo "--- :white_check_mark: Torch nightly image build complete: $IMAGE_TAG"