From 8a6611842f47fbc73fbf4a2e9b8c1bcc510f7ce1 Mon Sep 17 00:00:00 2001
From: zejunchen-zejun <zejun.chen@amd.com>
Date: Wed, 11 Mar 2026 10:57:52 +0800
Subject: [PATCH 01/15] [plugin][CI/CD] establish CI/CD workflow and docker
 release for OOT

Signed-off-by: zejunchen-zejun <zejun.chen@amd.com>
---
 .github/workflows/atom-test.yaml              | 244 ++++++++++++++++++
 .../plugin/test_plugin_config_translation.py  |  82 ++++++
 tests/plugin/test_plugin_env_flags.py         |  56 ++++
 tests/plugin/test_plugin_mode_status.py       |  36 +++
 tests/plugin/test_plugin_registries.py        |  57 ++++
 .../plugin/test_plugin_unsupported_models.py  |  23 ++
 tests/plugin/test_plugin_vllm_import_paths.py |  85 ++++++
 7 files changed, 583 insertions(+)
 create mode 100644 tests/plugin/test_plugin_config_translation.py
 create mode 100644 tests/plugin/test_plugin_env_flags.py
 create mode 100644 tests/plugin/test_plugin_mode_status.py
 create mode 100644 tests/plugin/test_plugin_registries.py
 create mode 100644 tests/plugin/test_plugin_unsupported_models.py
 create mode 100644 tests/plugin/test_plugin_vllm_import_paths.py
diff --git a/.github/workflows/atom-test.yaml b/.github/workflows/atom-test.yaml
index 9736f09f..3c061ca3 100644
--- a/.github/workflows/atom-test.yaml
+++ b/.github/workflows/atom-test.yaml
@@ -470,3 +470,247 @@ jobs:
           docker rm "$CONTAINER_NAME" || true
           # Remove the pre-built image to free disk space on the runner
           docker rmi "rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }}" || true
+
+  atom-vllm-oot:
+    needs: [pre-checks, build_atom_image]
+    if: ${{ needs.pre-checks.result == 'success' && needs.build_atom_image.result == 'success' && (!github.event.pull_request || github.event.pull_request.draft == false) }}
+    name: ATOM vLLM OOT Test
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          # Keep CI runtime under control: enable only one OOT model for now.
+          - model_name: "Kimi-K2-Thinking-MXFP4"
+            model_path: "amd/Kimi-K2-Thinking-MXFP4"
+            accuracy_test_threshold: "0.90"
+            runner: atom-mi355-8gpu.predownload
+    runs-on: ${{ matrix.runner }}
+    timeout-minutes: 180
+    env:
+      CONTAINER_NAME: atom_vllm_oot_${{ strategy.job-index }}
+      OOT_IMAGE_TAG: atom_vllm_oot_test:${{ github.sha }}-${{ strategy.job-index }}
+      VLLM_COMMIT: b31e9326a7d9394aab8c767f8ebe225c65594b60
+      VLLM_VERSION: "0.17"
+
+    steps:
+      - name: Clean up containers and workspace
+        run: |
+          echo "=== Cleaning up containers on $(hostname) ==="
+          containers=$(docker ps -q)
+          if [ -n "$containers" ]; then
+            docker kill $containers || true
+          fi
+          docker rm -f "$CONTAINER_NAME" 2>/dev/null || true
+          docker run --rm -v "${GITHUB_WORKSPACE:-$PWD}":/workspace -w /workspace --privileged rocm/pytorch:latest bash -lc "ls -la /workspace/ && rm -rf /workspace/*" || true
+
+      - name: Checkout ATOM repo
+        uses: actions/checkout@v4
+
+      - name: Docker Login
+        if: ${{ !github.event.pull_request.head.repo.fork }}
+        run: |
+          docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Prepare OOT base image for forked repo
+        if: ${{ github.event.pull_request.head.repo.fork }}
+        run: |
+          cat <<EOF > Dockerfile.mod
+          FROM ${{ env.ATOM_BASE_NIGTHLY_IMAGE }}
+          RUN pip install -U lm-eval[api]
+          RUN pip show lm-eval || true
+          RUN pip install hf_transfer
+          RUN pip show hf_transfer || true
+          RUN echo "=== Aiter version BEFORE uninstall ===" && pip show amd-aiter || true
+          RUN pip uninstall -y amd-aiter
+          RUN pip install --upgrade "pybind11>=3.0.1"
+          RUN pip show pybind11
+          RUN rm -rf /app/aiter-test
+          RUN git clone https://github.com/ROCm/aiter.git /app/aiter-test && \\
+              cd /app/aiter-test && \\
+              git checkout HEAD && \\
+              git submodule sync && git submodule update --init --recursive && \\
+              MAX_JOBS=64 PREBUILD_KERNELS=0 GPU_ARCHS=gfx950 python3 setup.py develop
+          RUN echo "=== Aiter version AFTER installation ===" && pip show amd-aiter || true
+          RUN echo "=== ATOM version BEFORE uninstall ===" && pip show atom || true
+          RUN pip uninstall -y atom
+          RUN rm -rf /app/ATOM
+          RUN git clone ${{ env.GITHUB_REPO_URL }} /app/ATOM && \\
+              cd /app/ATOM && \\
+              git checkout ${{ env.GITHUB_COMMIT_SHA }} && \\
+              pip install -e .
+          RUN echo "=== ATOM version AFTER installation ===" && pip show atom || true
+          EOF
+
+          docker build --pull --network=host \
+            --no-cache \
+            -t atom_oot_base:ci \
+            -f Dockerfile.mod .
+          echo "OOT_BASE_IMAGE=atom_oot_base:ci" >> "$GITHUB_ENV"
+
+      - name: Select OOT base image from pre-built ATOM image
+        if: ${{ !github.event.pull_request.head.repo.fork }}
+        run: |
+          echo "OOT_BASE_IMAGE=rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }}" >> "$GITHUB_ENV"
+
+      - name: Build OOT vLLM image
+        run: |
+          chmod +x docker/plugin/build_OOT_vLLM.sh
+          IMAGE_TAG="${OOT_IMAGE_TAG}" \
+          BASE_IMAGE="${OOT_BASE_IMAGE}" \
+          VLLM_COMMIT="${VLLM_COMMIT}" \
+          VLLM_VERSION="${VLLM_VERSION}" \
+          INSTALL_LM_EVAL=1 \
+          BUILD_NO_CACHE=1 \
+          docker/plugin/build_OOT_vLLM.sh
+
+      - name: Start OOT test container
+        run: |
+          if [ -f "/etc/podinfo/gha-render-devices" ]; then
+            DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices)
+          else
+            DEVICE_FLAG="--device /dev/dri"
+          fi
+
+          if [ -d "/models" ]; then
+            MODEL_MOUNT="-v /models:/models"
+          else
+            echo "Warning: /models directory not found on runner; skipping /models mount."
+            MODEL_MOUNT=""
+          fi
+
+          docker run -dt --device=/dev/kfd $DEVICE_FLAG \
+            -v "${GITHUB_WORKSPACE:-$PWD}":/workspace \
+            $MODEL_MOUNT \
+            -w /workspace \
+            --ipc=host --group-add video \
+            --shm-size=16G \
+            --privileged \
+            --cap-add=SYS_PTRACE \
+            -e HF_TOKEN="${HF_TOKEN:-}" \
+            --security-opt seccomp=unconfined \
+            --ulimit memlock=-1 \
+            --ulimit stack=67108864 \
+            -v "${{ github.workspace }}:/workspace" \
+            -w /workspace \
+            --name "$CONTAINER_NAME" \
+            "$OOT_IMAGE_TAG"
+        env:
+          GITHUB_WORKSPACE: ${{ github.workspace }}
+
+      - name: Resolve model path
+        run: |
+          if [ -f "/models/${{ matrix.model_path }}/config.json" ]; then
+            echo "MODEL_PATH=/models/${{ matrix.model_path }}" >> "$GITHUB_ENV"
+            echo "Use pre-downloaded model path: /models/${{ matrix.model_path }}"
+          else
+            echo "MODEL_PATH=${{ matrix.model_path }}" >> "$GITHUB_ENV"
+            echo "Use HuggingFace model path: ${{ matrix.model_path }}"
+          fi
+
+      - name: Download model if needed
+        run: |
+          if [ -d "/models" ] && [ ! -f "/models/${{ matrix.model_path }}/config.json" ]; then
+            echo "Downloading model to /models/${{ matrix.model_path }}"
+            docker exec -e HF_TOKEN=${{ secrets.AMD_HF_TOKEN }} "$CONTAINER_NAME" bash -lc "hf download ${{ matrix.model_path }} --local-dir /models/${{ matrix.model_path }}"
+          else
+            echo "Skip model download"
+          fi
+
+      - name: Launch vLLM server with ATOM OOT plugin
+        run: |
+          docker exec "$CONTAINER_NAME" bash -lc "
+            set -euo pipefail
+            export SAFETENSORS_FAST_GPU=1
+            export VLLM_ROCM_USE_AITER=1
+            export VLLM_RPC_TIMEOUT=1800000
+            export VLLM_CACHE_ROOT=/tmp/.cache/vllm
+            export TORCHINDUCTOR_CACHE_DIR=/tmp/.cache/inductor
+            rm -rf /tmp/.cache
+
+            nohup vllm serve \"$MODEL_PATH\" \
+              --host 0.0.0.0 \
+              --port 8000 \
+              --tensor-parallel-size 8 \
+              --enable-expert-parallel \
+              --trust-remote-code \
+              --disable-log-requests \
+              --gpu-memory-utilization 0.9 \
+              --async-scheduling \
+              --load-format fastsafetensors \
+              --kv-cache-dtype fp8 \
+              --max-model-len 16384 \
+              > /tmp/vllm_oot.log 2>&1 &
+            echo \$! > /tmp/vllm_oot.pid
+            echo \"Server PID: \$(cat /tmp/vllm_oot.pid)\"
+          "
+
+      - name: Wait for vLLM readiness
+        timeout-minutes: 30
+        run: |
+          set -euo pipefail
+          for i in $(seq 1 60); do
+            if docker exec "$CONTAINER_NAME" bash -lc "curl -sS http://127.0.0.1:8000/v1/models >/dev/null"; then
+              echo "vLLM server is ready."
+              exit 0
+            fi
+            echo "Waiting for server... ($i/60)"
+            sleep 30
+          done
+          echo "vLLM server did not become ready in time."
+          docker exec "$CONTAINER_NAME" bash -lc "tail -n 200 /tmp/vllm_oot.log || true"
+          exit 1
+
+      - name: Run OOT accuracy test (gsm8k)
+        timeout-minutes: 45
+        run: |
+          docker exec "$CONTAINER_NAME" bash -lc "
+            set -euo pipefail
+            if ! command -v lm_eval >/dev/null 2>&1; then
+              pip install 'lm-eval[api]'
+            fi
+            mkdir -p /tmp/oot_accuracy_results
+            RESULT_FILE=/tmp/oot_accuracy_results/\$(date +%Y%m%d%H%M%S).json
+            lm_eval --model local-completions \
+              --model_args model=\"$MODEL_PATH\",base_url=http://127.0.0.1:8000/v1/completions,num_concurrent=65,max_retries=1,tokenized_requests=False \
+              --tasks gsm8k \
+              --num_fewshot 3 \
+              --output_path \"\$RESULT_FILE\" 2>&1 | tee /tmp/oot_accuracy_output.txt
+            echo \"OOT_RESULT_FILE=\$RESULT_FILE\"
+          "
+
+      - name: Check OOT accuracy threshold
+        run: |
+          docker exec "$CONTAINER_NAME" bash -lc "
+            set -euo pipefail
+            python -c \"import json, glob; files=sorted(glob.glob('/tmp/oot_accuracy_results/*.json')); assert files, 'ERROR: No OOT accuracy result JSON found.'; threshold=float('${{ matrix.accuracy_test_threshold }}'); result_file=files[-1]; data=json.load(open(result_file)); value=data['results']['gsm8k']['exact_match,flexible-extract']; print('RESULT_FILE:', result_file); print('Flexible extract value:', value); print('Accuracy threshold:', threshold); assert value >= threshold, f'Accuracy test failed: {value} < {threshold}'; print(f'Accuracy test passed: {value} >= {threshold}')\"
+          "
+
+      - name: Collect OOT accuracy summary
+        if: success()
+        run: |
+          echo "OOT Accuracy Test Summary for ${{ matrix.model_name }}:" >> $GITHUB_STEP_SUMMARY
+          docker exec "$CONTAINER_NAME" bash -lc "awk '/\|Tasks\|Version\|/,/^$/ { if (NF > 0) print }' /tmp/oot_accuracy_output.txt" >> $GITHUB_STEP_SUMMARY || true
+
+      - name: Collect OOT logs and results
+        if: always()
+        run: |
+          docker cp "$CONTAINER_NAME":/tmp/vllm_oot.log ./vllm_oot.log || true
+          docker cp "$CONTAINER_NAME":/tmp/oot_accuracy_output.txt ./oot_accuracy_output.txt || true
+          docker cp "$CONTAINER_NAME":/tmp/oot_accuracy_results ./oot_accuracy_results || true
+
+      - name: Upload OOT artifacts
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: oot-${{ matrix.model_name }}-artifacts
+          path: |
+            vllm_oot.log
+            oot_accuracy_output.txt
+            oot_accuracy_results
+
+      - name: Clean up OOT test
+        if: always()
+        run: |
+          docker exec "$CONTAINER_NAME" bash -lc "if [ -f /tmp/vllm_oot.pid ]; then kill \$(cat /tmp/vllm_oot.pid) || true; fi" || true
+          docker stop "$CONTAINER_NAME" || true
+          docker rm "$CONTAINER_NAME" || true
diff --git a/tests/plugin/test_plugin_config_translation.py b/tests/plugin/test_plugin_config_translation.py
new file mode 100644
index 00000000..d85fcf8d
--- /dev/null
+++ b/tests/plugin/test_plugin_config_translation.py
@@ -0,0 +1,82 @@
+import pytest
+
+import atom.plugin.config as plugin_config
+
+
+class _Obj:
+    def __init__(self, **kwargs):
+        for k, v in kwargs.items():
+            setattr(self, k, v)
+
+
+class _FakeConfig:
+    def __init__(self, **kwargs):
+        for k, v in kwargs.items():
+            setattr(self, k, v)
+
+
+class _FakeCompilationConfig:
+    def __init__(self, level, use_cudagraph, cudagraph_mode):
+        self.level = level
+        self.use_cudagraph = use_cudagraph
+        self.cudagraph_mode = cudagraph_mode
+
+
+def _patch_atom_config_module(monkeypatch):
+    import atom.config as atom_config_module
+
+    monkeypatch.setattr(atom_config_module, "Config", _FakeConfig, raising=False)
+    monkeypatch.setattr(
+        atom_config_module, "CompilationConfig", _FakeCompilationConfig, raising=False
+    )
+
+
+def test_generate_from_vllm_translates_core_fields(monkeypatch):
+    _patch_atom_config_module(monkeypatch)
+    monkeypatch.setenv("ATOM_DISABLE_VLLM_PLUGIN_ATTENTION", "0")
+
+    vllm_cfg = _Obj(
+        model_config=_Obj(model="m1", max_model_len=4096),
+        scheduler_config=_Obj(max_num_batched_tokens=2048, max_num_seqs=8),
+        cache_config=_Obj(
+            gpu_memory_utilization=0.5,
+            block_size=16,
+            num_gpu_blocks=1024,
+            cache_dtype="auto",
+            enable_prefix_caching=True,
+        ),
+        parallel_config=_Obj(
+            rank=1, tensor_parallel_size=2, enable_expert_parallel=False
+        ),
+        compilation_config=_Obj(mode=3),
+        quant_config=_Obj(name="q"),
+    )
+
+    cfg = plugin_config._generate_atom_config_from_vllm_config(vllm_cfg)
+
+    assert cfg.model == "m1"
+    assert cfg.max_num_batched_tokens == 2048
+    assert cfg.max_num_seqs == 8
+    assert cfg.max_model_len == 4096
+    assert cfg.tensor_parallel_size == 2
+    assert cfg.enforce_eager is True
+    assert cfg.compilation_config.level == 3
+    assert cfg.plugin_config.is_plugin_mode is True
+    assert cfg.plugin_config.is_vllm is True
+    assert cfg.plugin_config.is_sglang is False
+    assert cfg.plugin_config.vllm_use_atom_attention is True
+
+
+def test_generate_atom_config_requires_plugin_mode(monkeypatch):
+    import atom.plugin.config as config_module
+    import atom.plugin as plugin_module
+    import atom.config as atom_config_module
+
+    monkeypatch.setattr(plugin_module, "is_vllm", lambda: False, raising=False)
+    monkeypatch.setattr(plugin_module, "is_sglang", lambda: False, raising=False)
+    monkeypatch.setattr(
+        atom_config_module, "set_current_atom_config", lambda _cfg: None, raising=False
+    )
+
+    with pytest.raises(ValueError, match="running in plugin mode"):
+        config_module.generate_atom_config_for_plugin_mode(config=None)
diff --git a/tests/plugin/test_plugin_env_flags.py b/tests/plugin/test_plugin_env_flags.py
new file mode 100644
index 00000000..6ca39018
--- /dev/null
+++ b/tests/plugin/test_plugin_env_flags.py
@@ -0,0 +1,56 @@
+import importlib
+import importlib.util
+import sys
+import types
+
+import pytest
+
+
+def test_disable_vllm_plugin_flag_disables_platform(monkeypatch):
+    # ATOM_DISABLE_VLLM_PLUGIN takes precedence:
+    # when it is 1, vLLM should not get ATOM platform/attention at all.
+    for disable_attention in ("0", "1"):
+        monkeypatch.setenv("ATOM_DISABLE_VLLM_PLUGIN", "1")
+        monkeypatch.setenv("ATOM_DISABLE_VLLM_PLUGIN_ATTENTION", disable_attention)
+
+        import atom.plugin.vllm.platform as platform_module
+        import atom.plugin.vllm.register as register_module
+
+        importlib.reload(platform_module)
+        importlib.reload(register_module)
+
+        assert platform_module.ATOMPlatform is None
+        assert register_module.register_platform() is None
+
+
+@pytest.mark.skipif(
+    importlib.util.find_spec("vllm") is None,
+    reason="vllm is not installed in current test environment",
+)
+def test_disable_vllm_plugin_attention_fallbacks_to_non_atom_backend(monkeypatch):
+    rocm_module = types.ModuleType("vllm.platforms.rocm")
+
+    class _RocmPlatform:
+        @classmethod
+        def get_attn_backend_cls(cls, selected_backend, attn_selector_config):
+            return "vllm.default.backend"
+
+    rocm_module.RocmPlatform = _RocmPlatform
+
+    monkeypatch.setitem(sys.modules, "vllm", types.ModuleType("vllm"))
+    monkeypatch.setitem(
+        sys.modules, "vllm.platforms", types.ModuleType("vllm.platforms")
+    )
+    monkeypatch.setitem(sys.modules, "vllm.platforms.rocm", rocm_module)
+    monkeypatch.setenv("ATOM_DISABLE_VLLM_PLUGIN", "0")
+    monkeypatch.setenv("ATOM_DISABLE_VLLM_PLUGIN_ATTENTION", "1")
+
+    import atom.plugin.vllm.platform as platform_module
+
+    importlib.reload(platform_module)
+
+    result = platform_module.ATOMPlatform.get_attn_backend_cls(
+        selected_backend="x",
+        attn_selector_config=types.SimpleNamespace(use_mla=True),
+    )
+    assert result == "vllm.default.backend"
diff --git a/tests/plugin/test_plugin_mode_status.py b/tests/plugin/test_plugin_mode_status.py
new file mode 100644
index 00000000..6ce6e2e3
--- /dev/null
+++ b/tests/plugin/test_plugin_mode_status.py
@@ -0,0 +1,36 @@
+import pytest
+
+from atom.plugin import prepare as plugin_prepare
+
+
+@pytest.fixture(autouse=True)
+def _reset_framework_state():
+    # Autouse fixture: pytest runs this before/after every test.
+    plugin_prepare._set_framework_backbone("atom")
+    yield
+    plugin_prepare._set_framework_backbone("atom")
+
+
+def test_default_mode_is_server_mode():
+    assert plugin_prepare.is_plugin_mode() is False
+    assert plugin_prepare.is_vllm() is False
+    assert plugin_prepare.is_sglang() is False
+
+
+def test_set_framework_to_vllm():
+    plugin_prepare._set_framework_backbone("vllm")
+    assert plugin_prepare.is_plugin_mode() is True
+    assert plugin_prepare.is_vllm() is True
+    assert plugin_prepare.is_sglang() is False
+
+
+def test_set_framework_to_sgl_alias():
+    plugin_prepare._set_framework_backbone("sgl")
+    assert plugin_prepare.is_plugin_mode() is True
+    assert plugin_prepare.is_vllm() is False
+    assert plugin_prepare.is_sglang() is True
+
+
+def test_set_framework_unsupported_raises():
+    with pytest.raises(ValueError, match="Unsupported framework"):
+        plugin_prepare._set_framework_backbone("tensorflow")
diff --git a/tests/plugin/test_plugin_registries.py b/tests/plugin/test_plugin_registries.py
new file mode 100644
index 00000000..79dbe323
--- /dev/null
+++ b/tests/plugin/test_plugin_registries.py
@@ -0,0 +1,57 @@
+import sys
+import types
+import importlib
+import importlib.util
+
+import pytest
+
+from atom.plugin import prepare as plugin_prepare
+import atom.plugin.vllm.register as vllm_register
+
+
+@pytest.fixture(autouse=True)
+def _reset_framework_state():
+    plugin_prepare._set_framework_backbone("atom")
+    yield
+    plugin_prepare._set_framework_backbone("atom")
+
+
+@pytest.mark.skipif(
+    importlib.util.find_spec("vllm") is None,
+    reason="vllm is not installed in current test environment",
+)
+def test_register_platform_returns_oot_platform(monkeypatch):
+    rocm_module = types.ModuleType("vllm.platforms.rocm")
+
+    class _RocmPlatform:
+        pass
+
+    rocm_module.RocmPlatform = _RocmPlatform
+    vllm_platforms = types.ModuleType("vllm.platforms")
+    vllm_platforms.current_platform = None
+
+    monkeypatch.setitem(sys.modules, "vllm", types.ModuleType("vllm"))
+    monkeypatch.setitem(sys.modules, "vllm.platforms", vllm_platforms)
+    monkeypatch.setitem(sys.modules, "vllm.platforms.rocm", rocm_module)
+
+    monkeypatch.setenv("ATOM_DISABLE_VLLM_PLUGIN", "0")
+    monkeypatch.setenv("ATOM_DISABLE_VLLM_PLUGIN_ATTENTION", "0")
+
+    import atom.plugin.vllm.platform as platform_module
+
+    importlib.reload(platform_module)
+    importlib.reload(vllm_register)
+
+    platform_path = vllm_register.register_platform()
+    module_name, class_name = platform_path.rsplit(".", 1)
+    vllm_platforms.current_platform = getattr(
+        importlib.import_module(module_name), class_name
+    )
+
+    # get current platform from vllm side and validate it is ATOM platform.
+    assert vllm_platforms.current_platform is platform_module.ATOMPlatform
+
+
+def test_register_platform_can_be_disabled(monkeypatch):
+    monkeypatch.setattr(vllm_register, "disable_vllm_plugin", True, raising=False)
+    assert vllm_register.register_platform() is None
diff --git a/tests/plugin/test_plugin_unsupported_models.py b/tests/plugin/test_plugin_unsupported_models.py
new file mode 100644
index 00000000..0419d4a3
--- /dev/null
+++ b/tests/plugin/test_plugin_unsupported_models.py
@@ -0,0 +1,23 @@
+import importlib.util
+import importlib
+import sys
+import types
+
+import pytest
+
+
+# FIXME: remove it later when enabling fallback for unsupported models
+@pytest.mark.skipif(
+    importlib.util.find_spec("vllm") is None,
+    reason="vllm is not installed in current test environment",
+)
+def test_vllm_wrapper_rejects_unsupported_model_arch(monkeypatch):
+    # Avoid importing deep model-loader dependencies during test collection/import.
+    fake_loader = types.ModuleType("atom.model_loader.loader")
+    fake_loader.load_model_in_plugin_mode = lambda **kwargs: set()
+    monkeypatch.setitem(sys.modules, "atom.model_loader.loader", fake_loader)
+
+    model_wrapper = importlib.import_module("atom.plugin.vllm.model_wrapper")
+
+    with pytest.raises(ValueError, match="not supported by ATOM OOT backend"):
+        model_wrapper._get_atom_model_cls("UnknownModelForCausalLM")
diff --git a/tests/plugin/test_plugin_vllm_import_paths.py b/tests/plugin/test_plugin_vllm_import_paths.py
new file mode 100644
index 00000000..523e0798
--- /dev/null
+++ b/tests/plugin/test_plugin_vllm_import_paths.py
@@ -0,0 +1,85 @@
+import importlib.util
+
+import pytest
+
+
+@pytest.mark.skipif(
+    importlib.util.find_spec("vllm") is None,
+    reason="vllm is not installed in current test environment",
+)
+def test_vllm_import_paths_guardrail():
+    """Guardrail for OOT vLLM import paths used by ATOM plugin mode."""
+    # attention.py / paged_attention.py (new path with legacy fallback)
+    try:
+        from vllm.attention.layer import Attention, MLAAttention, AttentionType
+    except ImportError:
+        from vllm.model_executor.layers.attention import Attention, MLAAttention
+        from vllm.v1.attention.backend import AttentionType
+
+    # attention.py
+    from vllm.config import (
+        VllmConfig,
+        get_current_vllm_config,
+        get_layers_from_vllm_config,
+    )
+    from vllm.model_executor.layers.attention.mla_attention import (
+        MLACommonMetadataBuilder,
+        QueryLenSupport,
+    )
+    from vllm.utils.math_utils import cdiv, round_down
+    from vllm.v1.attention.backend import AttentionCGSupport, AttentionMetadataBuilder
+    from vllm.v1.attention.backends.utils import (
+        get_dcp_local_seq_lens,
+        split_decodes_and_prefills,
+        split_decodes_prefills_and_extends,
+    )
+    from vllm.v1.attention.ops.common import cp_lse_ag_out_rs
+    from vllm.v1.attention.ops.merge_attn_states import merge_attn_states
+
+    # model_wrapper.py (core vLLM model interfaces)
+    from vllm.model_executor.models.interfaces import SupportsPP, SupportsQuant
+    from vllm.model_executor.models.interfaces_base import (
+        VllmModel,
+        VllmModelForTextGeneration,
+    )
+    from vllm.model_executor.models.registry import ModelRegistry
+    from vllm.sequence import IntermediateTensors
+
+    # attention_mla.py / platform.py / register.py
+    from vllm import _custom_ops
+    from vllm.distributed.parallel_state import get_dcp_group
+    from vllm.platforms import current_platform
+    from vllm.platforms.rocm import RocmPlatform
+
+    assert all(
+        obj is not None
+        for obj in [
+            Attention,
+            MLAAttention,
+            AttentionType,
+            QueryLenSupport,
+            MLACommonMetadataBuilder,
+            cdiv,
+            round_down,
+            AttentionCGSupport,
+            AttentionMetadataBuilder,
+            get_dcp_local_seq_lens,
+            split_decodes_and_prefills,
+            split_decodes_prefills_and_extends,
+            cp_lse_ag_out_rs,
+            merge_attn_states,
+            VllmConfig,
+            get_current_vllm_config,
+            get_layers_from_vllm_config,
+            SupportsPP,
+            SupportsQuant,
+            VllmModel,
+            VllmModelForTextGeneration,
+            ModelRegistry,
+            IntermediateTensors,
+            _custom_ops,
+            get_dcp_group,
+            current_platform,
+            RocmPlatform,
+        ]
+    )

From 50e59403386fad73b588039ea1b0bbcc3a14a427 Mon Sep 17 00:00:00 2001
From: zejunchen-zejun <zejun.chen@amd.com>
Date: Wed, 11 Mar 2026 22:54:31 +0800
Subject: [PATCH 02/15] add

Signed-off-by: zejunchen-zejun <zejun.chen@amd.com>
---
 .github/workflows/atom-test.yaml              | 243 -------------
 .../workflows/atom-vllm-oot-full-test.yaml    | 319 ++++++++++++++++
 .github/workflows/atom-vllm-oot-test.yaml     | 343 ++++++++++++++++++
 3 files changed, 662 insertions(+), 243 deletions(-)
 create mode 100644 .github/workflows/atom-vllm-oot-full-test.yaml
 create mode 100644 .github/workflows/atom-vllm-oot-test.yaml

diff --git a/.github/workflows/atom-test.yaml b/.github/workflows/atom-test.yaml
index 3c061ca3..dfa4a0b9 100644
--- a/.github/workflows/atom-test.yaml
+++ b/.github/workflows/atom-test.yaml
@@ -471,246 +471,3 @@ jobs:
           # Remove the pre-built image to free disk space on the runner
           docker rmi "rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }}" || true
 
-  atom-vllm-oot:
-    needs: [pre-checks, build_atom_image]
-    if: ${{ needs.pre-checks.result == 'success' && needs.build_atom_image.result == 'success' && (!github.event.pull_request || github.event.pull_request.draft == false) }}
-    name: ATOM vLLM OOT Test
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          # Keep CI runtime under control: enable only one OOT model for now.
-          - model_name: "Kimi-K2-Thinking-MXFP4"
-            model_path: "amd/Kimi-K2-Thinking-MXFP4"
-            accuracy_test_threshold: "0.90"
-            runner: atom-mi355-8gpu.predownload
-    runs-on: ${{ matrix.runner }}
-    timeout-minutes: 180
-    env:
-      CONTAINER_NAME: atom_vllm_oot_${{ strategy.job-index }}
-      OOT_IMAGE_TAG: atom_vllm_oot_test:${{ github.sha }}-${{ strategy.job-index }}
-      VLLM_COMMIT: b31e9326a7d9394aab8c767f8ebe225c65594b60
-      VLLM_VERSION: "0.17"
-
-    steps:
-      - name: Clean up containers and workspace
-        run: |
-          echo "=== Cleaning up containers on $(hostname) ==="
-          containers=$(docker ps -q)
-          if [ -n "$containers" ]; then
-            docker kill $containers || true
-          fi
-          docker rm -f "$CONTAINER_NAME" 2>/dev/null || true
-          docker run --rm -v "${GITHUB_WORKSPACE:-$PWD}":/workspace -w /workspace --privileged rocm/pytorch:latest bash -lc "ls -la /workspace/ && rm -rf /workspace/*" || true
-
-      - name: Checkout ATOM repo
-        uses: actions/checkout@v4
-
-      - name: Docker Login
-        if: ${{ !github.event.pull_request.head.repo.fork }}
-        run: |
-          docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }}
-
-      - name: Prepare OOT base image for forked repo
-        if: ${{ github.event.pull_request.head.repo.fork }}
-        run: |
-          cat <<EOF > Dockerfile.mod
-          FROM ${{ env.ATOM_BASE_NIGTHLY_IMAGE }}
-          RUN pip install -U lm-eval[api]
-          RUN pip show lm-eval || true
-          RUN pip install hf_transfer
-          RUN pip show hf_transfer || true
-          RUN echo "=== Aiter version BEFORE uninstall ===" && pip show amd-aiter || true
-          RUN pip uninstall -y amd-aiter
-          RUN pip install --upgrade "pybind11>=3.0.1"
-          RUN pip show pybind11
-          RUN rm -rf /app/aiter-test
-          RUN git clone https://github.com/ROCm/aiter.git /app/aiter-test && \\
-              cd /app/aiter-test && \\
-              git checkout HEAD && \\
-              git submodule sync && git submodule update --init --recursive && \\
-              MAX_JOBS=64 PREBUILD_KERNELS=0 GPU_ARCHS=gfx950 python3 setup.py develop
-          RUN echo "=== Aiter version AFTER installation ===" && pip show amd-aiter || true
-          RUN echo "=== ATOM version BEFORE uninstall ===" && pip show atom || true
-          RUN pip uninstall -y atom
-          RUN rm -rf /app/ATOM
-          RUN git clone ${{ env.GITHUB_REPO_URL }} /app/ATOM && \\
-              cd /app/ATOM && \\
-              git checkout ${{ env.GITHUB_COMMIT_SHA }} && \\
-              pip install -e .
-          RUN echo "=== ATOM version AFTER installation ===" && pip show atom || true
-          EOF
-
-          docker build --pull --network=host \
-            --no-cache \
-            -t atom_oot_base:ci \
-            -f Dockerfile.mod .
-          echo "OOT_BASE_IMAGE=atom_oot_base:ci" >> "$GITHUB_ENV"
-
-      - name: Select OOT base image from pre-built ATOM image
-        if: ${{ !github.event.pull_request.head.repo.fork }}
-        run: |
-          echo "OOT_BASE_IMAGE=rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }}" >> "$GITHUB_ENV"
-
-      - name: Build OOT vLLM image
-        run: |
-          chmod +x docker/plugin/build_OOT_vLLM.sh
-          IMAGE_TAG="${OOT_IMAGE_TAG}" \
-          BASE_IMAGE="${OOT_BASE_IMAGE}" \
-          VLLM_COMMIT="${VLLM_COMMIT}" \
-          VLLM_VERSION="${VLLM_VERSION}" \
-          INSTALL_LM_EVAL=1 \
-          BUILD_NO_CACHE=1 \
-          docker/plugin/build_OOT_vLLM.sh
-
-      - name: Start OOT test container
-        run: |
-          if [ -f "/etc/podinfo/gha-render-devices" ]; then
-            DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices)
-          else
-            DEVICE_FLAG="--device /dev/dri"
-          fi
-
-          if [ -d "/models" ]; then
-            MODEL_MOUNT="-v /models:/models"
-          else
-            echo "Warning: /models directory not found on runner; skipping /models mount."
-            MODEL_MOUNT=""
-          fi
-
-          docker run -dt --device=/dev/kfd $DEVICE_FLAG \
-            -v "${GITHUB_WORKSPACE:-$PWD}":/workspace \
-            $MODEL_MOUNT \
-            -w /workspace \
-            --ipc=host --group-add video \
-            --shm-size=16G \
-            --privileged \
-            --cap-add=SYS_PTRACE \
-            -e HF_TOKEN="${HF_TOKEN:-}" \
-            --security-opt seccomp=unconfined \
-            --ulimit memlock=-1 \
-            --ulimit stack=67108864 \
-            -v "${{ github.workspace }}:/workspace" \
-            -w /workspace \
-            --name "$CONTAINER_NAME" \
-            "$OOT_IMAGE_TAG"
-        env:
-          GITHUB_WORKSPACE: ${{ github.workspace }}
-
-      - name: Resolve model path
-        run: |
-          if [ -f "/models/${{ matrix.model_path }}/config.json" ]; then
-            echo "MODEL_PATH=/models/${{ matrix.model_path }}" >> "$GITHUB_ENV"
-            echo "Use pre-downloaded model path: /models/${{ matrix.model_path }}"
-          else
-            echo "MODEL_PATH=${{ matrix.model_path }}" >> "$GITHUB_ENV"
-            echo "Use HuggingFace model path: ${{ matrix.model_path }}"
-          fi
-
-      - name: Download model if needed
-        run: |
-          if [ -d "/models" ] && [ ! -f "/models/${{ matrix.model_path }}/config.json" ]; then
-            echo "Downloading model to /models/${{ matrix.model_path }}"
-            docker exec -e HF_TOKEN=${{ secrets.AMD_HF_TOKEN }} "$CONTAINER_NAME" bash -lc "hf download ${{ matrix.model_path }} --local-dir /models/${{ matrix.model_path }}"
-          else
-            echo "Skip model download"
-          fi
-
-      - name: Launch vLLM server with ATOM OOT plugin
-        run: |
-          docker exec "$CONTAINER_NAME" bash -lc "
-            set -euo pipefail
-            export SAFETENSORS_FAST_GPU=1
-            export VLLM_ROCM_USE_AITER=1
-            export VLLM_RPC_TIMEOUT=1800000
-            export VLLM_CACHE_ROOT=/tmp/.cache/vllm
-            export TORCHINDUCTOR_CACHE_DIR=/tmp/.cache/inductor
-            rm -rf /tmp/.cache
-
-            nohup vllm serve \"$MODEL_PATH\" \
-              --host 0.0.0.0 \
-              --port 8000 \
-              --tensor-parallel-size 8 \
-              --enable-expert-parallel \
-              --trust-remote-code \
-              --disable-log-requests \
-              --gpu-memory-utilization 0.9 \
-              --async-scheduling \
-              --load-format fastsafetensors \
-              --kv-cache-dtype fp8 \
-              --max-model-len 16384 \
-              > /tmp/vllm_oot.log 2>&1 &
-            echo \$! > /tmp/vllm_oot.pid
-            echo \"Server PID: \$(cat /tmp/vllm_oot.pid)\"
-          "
-
-      - name: Wait for vLLM readiness
-        timeout-minutes: 30
-        run: |
-          set -euo pipefail
-          for i in $(seq 1 60); do
-            if docker exec "$CONTAINER_NAME" bash -lc "curl -sS http://127.0.0.1:8000/v1/models >/dev/null"; then
-              echo "vLLM server is ready."
-              exit 0
-            fi
-            echo "Waiting for server... ($i/60)"
-            sleep 30
-          done
-          echo "vLLM server did not become ready in time."
-          docker exec "$CONTAINER_NAME" bash -lc "tail -n 200 /tmp/vllm_oot.log || true"
-          exit 1
-
-      - name: Run OOT accuracy test (gsm8k)
-        timeout-minutes: 45
-        run: |
-          docker exec "$CONTAINER_NAME" bash -lc "
-            set -euo pipefail
-            if ! command -v lm_eval >/dev/null 2>&1; then
-              pip install 'lm-eval[api]'
-            fi
-            mkdir -p /tmp/oot_accuracy_results
-            RESULT_FILE=/tmp/oot_accuracy_results/\$(date +%Y%m%d%H%M%S).json
-            lm_eval --model local-completions \
-              --model_args model=\"$MODEL_PATH\",base_url=http://127.0.0.1:8000/v1/completions,num_concurrent=65,max_retries=1,tokenized_requests=False \
-              --tasks gsm8k \
-              --num_fewshot 3 \
-              --output_path \"\$RESULT_FILE\" 2>&1 | tee /tmp/oot_accuracy_output.txt
-            echo \"OOT_RESULT_FILE=\$RESULT_FILE\"
-          "
-
-      - name: Check OOT accuracy threshold
-        run: |
-          docker exec "$CONTAINER_NAME" bash -lc "
-            set -euo pipefail
-            python -c \"import json, glob; files=sorted(glob.glob('/tmp/oot_accuracy_results/*.json')); assert files, 'ERROR: No OOT accuracy result JSON found.'; threshold=float('${{ matrix.accuracy_test_threshold }}'); result_file=files[-1]; data=json.load(open(result_file)); value=data['results']['gsm8k']['exact_match,flexible-extract']; print('RESULT_FILE:', result_file); print('Flexible extract value:', value); print('Accuracy threshold:', threshold); assert value >= threshold, f'Accuracy test failed: {value} < {threshold}'; print(f'Accuracy test passed: {value} >= {threshold}')\"
-          "
-
-      - name: Collect OOT accuracy summary
-        if: success()
-        run: |
-          echo "OOT Accuracy Test Summary for ${{ matrix.model_name }}:" >> $GITHUB_STEP_SUMMARY
-          docker exec "$CONTAINER_NAME" bash -lc "awk '/\|Tasks\|Version\|/,/^$/ { if (NF > 0) print }' /tmp/oot_accuracy_output.txt" >> $GITHUB_STEP_SUMMARY || true
-
-      - name: Collect OOT logs and results
-        if: always()
-        run: |
-          docker cp "$CONTAINER_NAME":/tmp/vllm_oot.log ./vllm_oot.log || true
-          docker cp "$CONTAINER_NAME":/tmp/oot_accuracy_output.txt ./oot_accuracy_output.txt || true
-          docker cp "$CONTAINER_NAME":/tmp/oot_accuracy_results ./oot_accuracy_results || true
-
-      - name: Upload OOT artifacts
-        if: always()
-        uses: actions/upload-artifact@v4
-        with:
-          name: oot-${{ matrix.model_name }}-artifacts
-          path: |
-            vllm_oot.log
-            oot_accuracy_output.txt
-            oot_accuracy_results
-
-      - name: Clean up OOT test
-        if: always()
-        run: |
-          docker exec "$CONTAINER_NAME" bash -lc "if [ -f /tmp/vllm_oot.pid ]; then kill \$(cat /tmp/vllm_oot.pid) || true; fi" || true
-          docker stop "$CONTAINER_NAME" || true
-          docker rm "$CONTAINER_NAME" || true
diff --git a/.github/workflows/atom-vllm-oot-full-test.yaml b/.github/workflows/atom-vllm-oot-full-test.yaml
new file mode 100644
index 00000000..9081b47f
--- /dev/null
+++ b/.github/workflows/atom-vllm-oot-full-test.yaml
@@ -0,0 +1,319 @@
+name: ATOM vLLM OOT Validation
+
+on:
+  workflow_dispatch:
+    inputs:
+      vllm_commit:
+        description: "vLLM commit to validate"
+        required: false
+        type: string
+        default: "b31e9326a7d9394aab8c767f8ebe225c65594b60"
+      vllm_version:
+        description: "vLLM version label in image tag"
+        required: false
+        type: string
+        default: "0.17"
+      base_image:
+        description: "ATOM base image for rebuild"
+        required: false
+        type: string
+        default: "rocm/atom-dev:latest"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: false
+
+env:
+  BASE_IMAGE: ${{ inputs.base_image || 'rocm/atom-dev:latest' }}
+  GITHUB_REPO_URL: ${{ github.event.pull_request.head.repo.clone_url || 'https://github.com/ROCm/ATOM.git' }}
+  GITHUB_COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
+  VALIDATION_IMAGE_REPO: rocm/atom-dev
+
+jobs:
+  build-oot-image:
+    name: Build OOT validation image
+    runs-on: linux-atom-mi355-1
+    outputs:
+      oot_image_tag: ${{ steps.meta.outputs.oot_image_tag }}
+    steps:
+      - name: Checkout ATOM repo
+        uses: actions/checkout@v4
+
+      - name: Docker Login
+        run: |
+          docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Build ATOM base image from current commit
+        run: |
+          cat <<EOF > Dockerfile.mod
+          FROM ${{ env.BASE_IMAGE }}
+          RUN pip install -U lm-eval[api]
+          RUN pip show lm-eval || true
+          RUN pip install hf_transfer
+          RUN pip show hf_transfer || true
+          RUN echo "=== Aiter version BEFORE uninstall ===" && pip show amd-aiter || true
+          RUN pip uninstall -y amd-aiter
+          RUN pip install --upgrade "pybind11>=3.0.1"
+          RUN pip show pybind11
+          RUN rm -rf /app/aiter-test
+          RUN git clone --depth 1 https://github.com/ROCm/aiter.git /app/aiter-test && \\
+              cd /app/aiter-test && \\
+              git checkout HEAD && \\
+              git submodule sync && git submodule update --init --recursive && \\
+              MAX_JOBS=64 PREBUILD_KERNELS=0 GPU_ARCHS=gfx950 python3 setup.py develop
+          RUN echo "=== Aiter version AFTER installation ===" && pip show amd-aiter || true
+          RUN echo "=== ATOM version BEFORE uninstall ===" && pip show atom || true
+          RUN pip uninstall -y atom
+          RUN rm -rf /app/ATOM
+          RUN git clone ${{ env.GITHUB_REPO_URL }} /app/ATOM && \\
+              cd /app/ATOM && \\
+              git checkout ${{ env.GITHUB_COMMIT_SHA }} && \\
+              pip install -e .
+          RUN echo "=== ATOM version AFTER installation ===" && pip show atom || true
+          EOF
+
+          docker build --pull --network=host \
+            --no-cache \
+            -t atom_oot_base:ci \
+            -f Dockerfile.mod .
+
+      - name: Build OOT vLLM image from rebuilt ATOM base
+        id: meta
+        run: |
+          chmod +x docker/plugin/build_OOT_vLLM.sh
+          OOT_IMAGE_TAG="${VALIDATION_IMAGE_REPO}:oot-vllm-validation-${GITHUB_COMMIT_SHA}-${{ github.run_id }}"
+          IMAGE_TAG="${OOT_IMAGE_TAG}" \
+          BASE_IMAGE="atom_oot_base:ci" \
+          VLLM_COMMIT="${{ inputs.vllm_commit || 'b31e9326a7d9394aab8c767f8ebe225c65594b60' }}" \
+          VLLM_VERSION="${{ inputs.vllm_version || '0.17' }}" \
+          INSTALL_LM_EVAL=1 \
+          BUILD_NO_CACHE=1 \
+          docker/plugin/build_OOT_vLLM.sh
+
+          echo "oot_image_tag=${OOT_IMAGE_TAG}" >> "$GITHUB_OUTPUT"
+
+      - name: Push OOT validation image
+        run: |
+          docker push "${{ steps.meta.outputs.oot_image_tag }}"
+
+  plugin-ut:
+    name: Plugin UT (OOT image)
+    needs: [build-oot-image]
+    runs-on: linux-atom-mi355-1
+    timeout-minutes: 60
+    steps:
+      - name: Checkout ATOM repo
+        uses: actions/checkout@v4
+
+      - name: Docker Login
+        run: |
+          docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Pull built OOT image
+        run: |
+          docker pull "${{ needs.build-oot-image.outputs.oot_image_tag }}"
+
+      - name: Run plugin unit tests
+        run: |
+          docker run --rm \
+            -v "${{ github.workspace }}:/workspace" \
+            -w /workspace \
+            "${{ needs.build-oot-image.outputs.oot_image_tag }}" \
+            bash -lc "pytest -q tests/plugin"
+
+  oot-model-accuracy:
+    name: OOT Model Accuracy (${{ matrix.model_name }})
+    needs: [build-oot-image, plugin-ut]
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          # This matrix targets model architectures supported by ATOM OOT plugin.
+          - model_name: "Qwen3 Dense"
+            model_path: "Qwen/Qwen3-8B"
+            extra_args: "--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 1"
+            env_vars: ""
+            accuracy_test_threshold: "0.70"
+            runner: linux-atom-mi355-1
+          - model_name: "Qwen3 MoE"
+            model_path: "Qwen/Qwen3-235B-A22B-Instruct-2507-FP8"
+            extra_args: "--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8 --enable-expert-parallel"
+            env_vars: ""
+            accuracy_test_threshold: "0.87"
+            runner: atom-mi355-8gpu.predownload
+          - model_name: "DeepSeek-V3 family"
+            model_path: "deepseek-ai/DeepSeek-R1-0528"
+            extra_args: "--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8"
+            env_vars: ""
+            accuracy_test_threshold: "0.93"
+            runner: atom-mi355-8gpu.predownload
+          - model_name: "GPT-OSS"
+            model_path: "openai/gpt-oss-120b"
+            extra_args: "--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 2 --enable-dp-attention --enable-expert-parallel --gpu-memory-utilization 0.3"
+            env_vars: |
+              ATOM_GPT_OSS_MODEL=1
+            accuracy_test_threshold: "0.38"
+            runner: linux-atom-mi355-4
+          - model_name: "Kimi-K2"
+            model_path: "amd/Kimi-K2-Thinking-MXFP4"
+            extra_args: "--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8 --enable-expert-parallel"
+            env_vars: ""
+            accuracy_test_threshold: "0.90"
+            runner: atom-mi355-8gpu.predownload
+    runs-on: ${{ matrix.runner }}
+    timeout-minutes: 240
+    env:
+      CONTAINER_NAME: atom_vllm_oot_validation_${{ strategy.job-index }}
+      OOT_IMAGE_TAG: ${{ needs.build-oot-image.outputs.oot_image_tag }}
+    steps:
+      - name: Checkout ATOM repo
+        uses: actions/checkout@v4
+
+      - name: Docker Login
+        run: |
+          docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Pull built OOT image
+        run: |
+          docker pull "${OOT_IMAGE_TAG}"
+
+      - name: Clean up old containers
+        run: |
+          containers=$(docker ps -q)
+          if [ -n "$containers" ]; then
+            docker kill $containers || true
+          fi
+          docker rm -f "$CONTAINER_NAME" 2>/dev/null || true
+
+      - name: Start validation container
+        run: |
+          if [ -f "/etc/podinfo/gha-render-devices" ]; then
+            DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices)
+          else
+            DEVICE_FLAG="--device /dev/dri"
+          fi
+
+          if [ -d "/models" ]; then
+            MODEL_MOUNT="-v /models:/models"
+          else
+            MODEL_MOUNT=""
+          fi
+
+          cat > /tmp/oot_env_file.txt << 'EOF'
+          ${{ matrix.env_vars }}
+          EOF
+
+          docker run -dt --device=/dev/kfd $DEVICE_FLAG \
+            -v "${GITHUB_WORKSPACE:-$PWD}":/workspace \
+            $MODEL_MOUNT \
+            -w /workspace \
+            --ipc=host --group-add video \
+            --shm-size=16G \
+            --privileged \
+            --cap-add=SYS_PTRACE \
+            --security-opt seccomp=unconfined \
+            --ulimit memlock=-1 \
+            --ulimit stack=67108864 \
+            --env-file /tmp/oot_env_file.txt \
+            -e HF_TOKEN="${HF_TOKEN:-}" \
+            --name "$CONTAINER_NAME" \
+            "${OOT_IMAGE_TAG}"
+        env:
+          GITHUB_WORKSPACE: ${{ github.workspace }}
+
+      - name: Resolve and download model
+        run: |
+          if [ -f "/models/${{ matrix.model_path }}/config.json" ]; then
+            echo "MODEL_PATH=/models/${{ matrix.model_path }}" >> "$GITHUB_ENV"
+          else
+            echo "MODEL_PATH=${{ matrix.model_path }}" >> "$GITHUB_ENV"
+            if [ -d "/models" ]; then
+              docker exec -e HF_TOKEN=${{ secrets.AMD_HF_TOKEN }} "$CONTAINER_NAME" bash -lc "hf download ${{ matrix.model_path }} --local-dir /models/${{ matrix.model_path }}"
+              echo "MODEL_PATH=/models/${{ matrix.model_path }}" >> "$GITHUB_ENV"
+            fi
+          fi
+
+      - name: Launch vLLM server with ATOM OOT plugin
+        run: |
+          docker exec "$CONTAINER_NAME" bash -lc "
+            set -euo pipefail
+            export SAFETENSORS_FAST_GPU=1
+            export VLLM_ROCM_USE_AITER=1
+            export VLLM_RPC_TIMEOUT=1800000
+            export VLLM_CACHE_ROOT=/tmp/.cache/vllm
+            export TORCHINDUCTOR_CACHE_DIR=/tmp/.cache/inductor
+            rm -rf /tmp/.cache
+
+            nohup vllm serve \"$MODEL_PATH\" \
+              --host 0.0.0.0 \
+              --port 8000 \
+              ${{ matrix.extra_args }} \
+              > /tmp/vllm_oot.log 2>&1 &
+            echo \$! > /tmp/vllm_oot.pid
+          "
+
+      - name: Wait for vLLM readiness
+        timeout-minutes: 30
+        run: |
+          set -euo pipefail
+          for i in $(seq 1 60); do
+            if docker exec "$CONTAINER_NAME" bash -lc "curl -sS http://127.0.0.1:8000/v1/models >/dev/null"; then
+              echo "vLLM server is ready."
+              exit 0
+            fi
+            echo "Waiting for server... ($i/60)"
+            sleep 30
+          done
+          docker exec "$CONTAINER_NAME" bash -lc "tail -n 200 /tmp/vllm_oot.log || true"
+          exit 1
+
+      - name: Run gsm8k accuracy
+        timeout-minutes: 60
+        run: |
+          docker exec "$CONTAINER_NAME" bash -lc "
+            set -euo pipefail
+            mkdir -p /tmp/oot_accuracy_results
+            RESULT_FILE=/tmp/oot_accuracy_results/\$(date +%Y%m%d%H%M%S).json
+            lm_eval --model local-completions \
+              --model_args model=\"$MODEL_PATH\",base_url=http://127.0.0.1:8000/v1/completions,num_concurrent=65,max_retries=1,tokenized_requests=False \
+              --tasks gsm8k \
+              --num_fewshot 3 \
+              --output_path \"\$RESULT_FILE\" 2>&1 | tee /tmp/oot_accuracy_output.txt
+          "
+
+      - name: Check accuracy threshold
+        run: |
+          docker exec "$CONTAINER_NAME" bash -lc "
+            set -euo pipefail
+            python -c \"import json, glob; files=sorted(glob.glob('/tmp/oot_accuracy_results/*.json')); assert files, 'No accuracy JSON found'; threshold=float('${{ matrix.accuracy_test_threshold }}'); result_file=files[-1]; data=json.load(open(result_file)); value=data['results']['gsm8k']['exact_match,flexible-extract']; print('RESULT_FILE:', result_file); print('value:', value, 'threshold:', threshold); assert value >= threshold, f'Accuracy failed: {value} < {threshold}'\"
+          "
+
+      - name: Collect summary
+        if: success()
+        run: |
+          echo "OOT gsm8k summary for ${{ matrix.model_name }}:" >> $GITHUB_STEP_SUMMARY
+          docker exec "$CONTAINER_NAME" bash -lc "awk '/\|Tasks\|Version\|/,/^$/ { if (NF > 0) print }' /tmp/oot_accuracy_output.txt" >> $GITHUB_STEP_SUMMARY || true
+
+      - name: Collect artifacts
+        if: always()
+        run: |
+          docker cp "$CONTAINER_NAME":/tmp/vllm_oot.log ./vllm_oot.log || true
+          docker cp "$CONTAINER_NAME":/tmp/oot_accuracy_output.txt ./oot_accuracy_output.txt || true
+          docker cp "$CONTAINER_NAME":/tmp/oot_accuracy_results ./oot_accuracy_results || true
+
+      - name: Upload model artifacts
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: oot-validation-${{ matrix.model_name }}-${{ github.run_id }}
+          path: |
+            vllm_oot.log
+            oot_accuracy_output.txt
+            oot_accuracy_results
+
+      - name: Cleanup
+        if: always()
+        run: |
+          docker exec "$CONTAINER_NAME" bash -lc "if [ -f /tmp/vllm_oot.pid ]; then kill \$(cat /tmp/vllm_oot.pid) || true; fi" || true
+          docker stop "$CONTAINER_NAME" || true
+          docker rm "$CONTAINER_NAME" || true
diff --git a/.github/workflows/atom-vllm-oot-test.yaml b/.github/workflows/atom-vllm-oot-test.yaml
new file mode 100644
index 00000000..83d8d8de
--- /dev/null
+++ b/.github/workflows/atom-vllm-oot-test.yaml
@@ -0,0 +1,343 @@
+name: ATOM vLLM OOT Test
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]  # Triggers on PRs targeting `main`
+    types: [opened, synchronize, reopened, ready_for_review]
+    paths-ignore:
+      - '**/*.md'
+      - 'docs/**'
+      - 'LICENSE'
+      - '.gitignore'
+  schedule:
+    # Nightly at 00:00 Beijing time (16:00 UTC)
+    - cron: '0 16 * * *'
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+env:
+  ATOM_BASE_NIGTHLY_IMAGE: rocm/atom-dev:latest
+  GITHUB_REPO_URL: ${{ github.event.pull_request.head.repo.clone_url || 'https://github.com/ROCm/ATOM.git' }}
+  GITHUB_COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.event.head_commit.id }}
+
+jobs:
+  pre-checks:
+    uses: ./.github/workflows/pre-checks.yaml
+    with:
+      black: true
+      ruff: true
+
+  build_atom_image:
+    if: ${{ needs.pre-checks.result == 'success' && (!github.event.pull_request || github.event.pull_request.draft == false) }}
+    needs: [pre-checks]
+    name: Build ATOM image
+    runs-on: build-only-atom
+    steps:
+      - name: Checkout ATOM repo
+        if: ${{ !github.event.pull_request.head.repo.fork }}
+        uses: actions/checkout@v4
+
+      - name: Generate Dockerfile
+        if: ${{ !github.event.pull_request.head.repo.fork }}
+        run: |
+          cat <<EOF > Dockerfile.mod
+          FROM ${{ env.ATOM_BASE_NIGTHLY_IMAGE }}
+          RUN pip install -U lm-eval[api]
+          RUN pip show lm-eval || true
+          RUN pip install hf_transfer
+          RUN pip show hf_transfer || true
+          RUN echo "=== Aiter version BEFORE uninstall ===" && pip show amd-aiter || true
+          RUN pip uninstall -y amd-aiter
+          RUN pip install --upgrade "pybind11>=3.0.1"
+          RUN pip show pybind11
+          RUN wget https://github.com/stedolan/jq/releases/download/jq-1.7/jq-linux64 -O jq
+          RUN chmod +x jq
+          RUN mv jq /usr/local/bin/jq
+          RUN rm -rf /app/aiter-test
+          RUN git clone --depth 1 https://github.com/ROCm/aiter.git /app/aiter-test && \\
+              cd /app/aiter-test && \\
+              git checkout HEAD && \\
+              git submodule sync && git submodule update --init --recursive && \\
+              MAX_JOBS=64 PREBUILD_KERNELS=0 GPU_ARCHS=gfx950 python3 setup.py develop
+          RUN echo "=== Aiter version AFTER installation ===" && pip show amd-aiter || true
+          
+          RUN echo "=== ATOM version BEFORE uninstall ===" && pip show atom || true
+          RUN pip uninstall -y atom
+          RUN rm -rf /app/ATOM
+          RUN git clone ${{ env.GITHUB_REPO_URL }} /app/ATOM && \\
+              cd /app/ATOM && \\
+              git checkout ${{ env.GITHUB_COMMIT_SHA }} && \\
+              pip install -e .
+
+          RUN echo "=== ATOM version AFTER installation ===" && pip show atom || true
+          EOF
+
+      - name: Build Docker image
+        if: ${{ !github.event.pull_request.head.repo.fork }}
+        run: |
+          docker build --pull --network=host \
+            --no-cache \
+            -t atom_test:ci \
+            -f Dockerfile.mod .
+
+      - name: Push Docker image
+        if: ${{ !github.event.pull_request.head.repo.fork }}
+        run: |
+          IMAGE_TAG=rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }}
+          docker tag atom_test:ci $IMAGE_TAG
+          docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }}
+          docker push $IMAGE_TAG
+
+      - name: Success message
+        if: ${{ !github.event.pull_request.head.repo.fork }}
+        run: |
+          echo "Successfully prepared image: $IMAGE_TAG"
+
+  atom-vllm-oot:
+    needs: [pre-checks, build_atom_image]
+    if: ${{ needs.pre-checks.result == 'success' && needs.build_atom_image.result == 'success' && (!github.event.pull_request || github.event.pull_request.draft == false) }}
+    name: ATOM vLLM OOT Test
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          # Keep CI runtime under control: enable only one OOT model for now.
+          - model_name: "Kimi-K2-Thinking-MXFP4"
+            model_path: "amd/Kimi-K2-Thinking-MXFP4"
+            accuracy_test_threshold: "0.90"
+            runner: atom-mi355-8gpu.predownload
+    runs-on: ${{ matrix.runner }}
+    timeout-minutes: 180
+    env:
+      CONTAINER_NAME: atom_vllm_oot_${{ strategy.job-index }}
+      OOT_IMAGE_TAG: atom_vllm_oot_test:${{ github.sha }}-${{ strategy.job-index }}
+      VLLM_COMMIT: b31e9326a7d9394aab8c767f8ebe225c65594b60
+      VLLM_VERSION: "0.17"
+
+    steps:
+      - name: Clean up containers and workspace
+        run: |
+          echo "=== Cleaning up containers on $(hostname) ==="
+          containers=$(docker ps -q)
+          if [ -n "$containers" ]; then
+            docker kill $containers || true
+          fi
+          docker rm -f "$CONTAINER_NAME" 2>/dev/null || true
+          docker run --rm -v "${GITHUB_WORKSPACE:-$PWD}":/workspace -w /workspace --privileged rocm/pytorch:latest bash -lc "ls -la /workspace/ && rm -rf /workspace/*" || true
+
+      - name: Checkout ATOM repo
+        uses: actions/checkout@v4
+
+      - name: Docker Login
+        if: ${{ !github.event.pull_request.head.repo.fork }}
+        run: |
+          docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Prepare OOT base image for forked repo
+        if: ${{ github.event.pull_request.head.repo.fork }}
+        run: |
+          cat <<EOF > Dockerfile.mod
+          FROM ${{ env.ATOM_BASE_NIGTHLY_IMAGE }}
+          RUN pip install -U lm-eval[api]
+          RUN pip show lm-eval || true
+          RUN pip install hf_transfer
+          RUN pip show hf_transfer || true
+          RUN echo "=== Aiter version BEFORE uninstall ===" && pip show amd-aiter || true
+          RUN pip uninstall -y amd-aiter
+          RUN pip install --upgrade "pybind11>=3.0.1"
+          RUN pip show pybind11
+          RUN rm -rf /app/aiter-test
+          RUN git clone https://github.com/ROCm/aiter.git /app/aiter-test && \\
+              cd /app/aiter-test && \\
+              git checkout HEAD && \\
+              git submodule sync && git submodule update --init --recursive && \\
+              MAX_JOBS=64 PREBUILD_KERNELS=0 GPU_ARCHS=gfx950 python3 setup.py develop
+          RUN echo "=== Aiter version AFTER installation ===" && pip show amd-aiter || true
+          RUN echo "=== ATOM version BEFORE uninstall ===" && pip show atom || true
+          RUN pip uninstall -y atom
+          RUN rm -rf /app/ATOM
+          RUN git clone ${{ env.GITHUB_REPO_URL }} /app/ATOM && \\
+              cd /app/ATOM && \\
+              git checkout ${{ env.GITHUB_COMMIT_SHA }} && \\
+              pip install -e .
+          RUN echo "=== ATOM version AFTER installation ===" && pip show atom || true
+          EOF
+
+          docker build --pull --network=host \
+            --no-cache \
+            -t atom_oot_base:ci \
+            -f Dockerfile.mod .
+          echo "OOT_BASE_IMAGE=atom_oot_base:ci" >> "$GITHUB_ENV"
+
+      - name: Select OOT base image from pre-built ATOM image
+        if: ${{ !github.event.pull_request.head.repo.fork }}
+        run: |
+          echo "OOT_BASE_IMAGE=rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }}" >> "$GITHUB_ENV"
+
+      - name: Build OOT vLLM image
+        run: |
+          chmod +x docker/plugin/build_OOT_vLLM.sh
+          IMAGE_TAG="${OOT_IMAGE_TAG}" \
+          BASE_IMAGE="${OOT_BASE_IMAGE}" \
+          VLLM_COMMIT="${VLLM_COMMIT}" \
+          VLLM_VERSION="${VLLM_VERSION}" \
+          INSTALL_LM_EVAL=1 \
+          BUILD_NO_CACHE=1 \
+          docker/plugin/build_OOT_vLLM.sh
+
+      - name: Start OOT test container
+        run: |
+          if [ -f "/etc/podinfo/gha-render-devices" ]; then
+            DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices)
+          else
+            DEVICE_FLAG="--device /dev/dri"
+          fi
+
+          if [ -d "/models" ]; then
+            MODEL_MOUNT="-v /models:/models"
+          else
+            echo "Warning: /models directory not found on runner; skipping /models mount."
+            MODEL_MOUNT=""
+          fi
+
+          docker run -dt --device=/dev/kfd $DEVICE_FLAG \
+            -v "${GITHUB_WORKSPACE:-$PWD}":/workspace \
+            $MODEL_MOUNT \
+            -w /workspace \
+            --ipc=host --group-add video \
+            --shm-size=16G \
+            --privileged \
+            --cap-add=SYS_PTRACE \
+            -e HF_TOKEN="${HF_TOKEN:-}" \
+            --security-opt seccomp=unconfined \
+            --ulimit memlock=-1 \
+            --ulimit stack=67108864 \
+            -v "${{ github.workspace }}:/workspace" \
+            -w /workspace \
+            --name "$CONTAINER_NAME" \
+            "$OOT_IMAGE_TAG"
+        env:
+          GITHUB_WORKSPACE: ${{ github.workspace }}
+
+      - name: Resolve model path
+        run: |
+          if [ -f "/models/${{ matrix.model_path }}/config.json" ]; then
+            echo "MODEL_PATH=/models/${{ matrix.model_path }}" >> "$GITHUB_ENV"
+            echo "Use pre-downloaded model path: /models/${{ matrix.model_path }}"
+          else
+            echo "MODEL_PATH=${{ matrix.model_path }}" >> "$GITHUB_ENV"
+            echo "Use HuggingFace model path: ${{ matrix.model_path }}"
+          fi
+
+      - name: Download model if needed
+        run: |
+          if [ -d "/models" ] && [ ! -f "/models/${{ matrix.model_path }}/config.json" ]; then
+            echo "Downloading model to /models/${{ matrix.model_path }}"
+            docker exec -e HF_TOKEN=${{ secrets.AMD_HF_TOKEN }} "$CONTAINER_NAME" bash -lc "hf download ${{ matrix.model_path }} --local-dir /models/${{ matrix.model_path }}"
+          else
+            echo "Skip model download"
+          fi
+
+      - name: Launch vLLM server with ATOM OOT plugin
+        run: |
+          docker exec "$CONTAINER_NAME" bash -lc "
+            set -euo pipefail
+            export SAFETENSORS_FAST_GPU=1
+            export VLLM_ROCM_USE_AITER=1
+            export VLLM_RPC_TIMEOUT=1800000
+            export VLLM_CACHE_ROOT=/tmp/.cache/vllm
+            export TORCHINDUCTOR_CACHE_DIR=/tmp/.cache/inductor
+            rm -rf /tmp/.cache
+
+            nohup vllm serve \"$MODEL_PATH\" \
+              --host 0.0.0.0 \
+              --port 8000 \
+              --tensor-parallel-size 8 \
+              --enable-expert-parallel \
+              --trust-remote-code \
+              --disable-log-requests \
+              --gpu-memory-utilization 0.9 \
+              --async-scheduling \
+              --load-format fastsafetensors \
+              --kv-cache-dtype fp8 \
+              --max-model-len 16384 \
+              > /tmp/vllm_oot.log 2>&1 &
+            echo \$! > /tmp/vllm_oot.pid
+            echo \"Server PID: \$(cat /tmp/vllm_oot.pid)\"
+          "
+
+      - name: Wait for vLLM readiness
+        timeout-minutes: 30
+        run: |
+          set -euo pipefail
+          for i in $(seq 1 60); do
+            if docker exec "$CONTAINER_NAME" bash -lc "curl -sS http://127.0.0.1:8000/v1/models >/dev/null"; then
+              echo "vLLM server is ready."
+              exit 0
+            fi
+            echo "Waiting for server... ($i/60)"
+            sleep 30
+          done
+          echo "vLLM server did not become ready in time."
+          docker exec "$CONTAINER_NAME" bash -lc "tail -n 200 /tmp/vllm_oot.log || true"
+          exit 1
+
+      - name: Run OOT accuracy test (gsm8k)
+        timeout-minutes: 45
+        run: |
+          docker exec "$CONTAINER_NAME" bash -lc "
+            set -euo pipefail
+            if ! command -v lm_eval >/dev/null 2>&1; then
+              pip install 'lm-eval[api]'
+            fi
+            mkdir -p /tmp/oot_accuracy_results
+            RESULT_FILE=/tmp/oot_accuracy_results/\$(date +%Y%m%d%H%M%S).json
+            lm_eval --model local-completions \
+              --model_args model=\"$MODEL_PATH\",base_url=http://127.0.0.1:8000/v1/completions,num_concurrent=65,max_retries=1,tokenized_requests=False \
+              --tasks gsm8k \
+              --num_fewshot 3 \
+              --output_path \"\$RESULT_FILE\" 2>&1 | tee /tmp/oot_accuracy_output.txt
+            echo \"OOT_RESULT_FILE=\$RESULT_FILE\"
+          "
+
+      - name: Check OOT accuracy threshold
+        run: |
+          docker exec "$CONTAINER_NAME" bash -lc "
+            set -euo pipefail
+            python -c \"import json, glob; files=sorted(glob.glob('/tmp/oot_accuracy_results/*.json')); assert files, 'ERROR: No OOT accuracy result JSON found.'; threshold=float('${{ matrix.accuracy_test_threshold }}'); result_file=files[-1]; data=json.load(open(result_file)); value=data['results']['gsm8k']['exact_match,flexible-extract']; print('RESULT_FILE:', result_file); print('Flexible extract value:', value); print('Accuracy threshold:', threshold); assert value >= threshold, f'Accuracy test failed: {value} < {threshold}'; print(f'Accuracy test passed: {value} >= {threshold}')\"
+          "
+
+      - name: Collect OOT accuracy summary
+        if: success()
+        run: |
+          echo "OOT Accuracy Test Summary for ${{ matrix.model_name }}:" >> $GITHUB_STEP_SUMMARY
+          docker exec "$CONTAINER_NAME" bash -lc "awk '/\|Tasks\|Version\|/,/^$/ { if (NF > 0) print }' /tmp/oot_accuracy_output.txt" >> $GITHUB_STEP_SUMMARY || true
+
+      - name: Collect OOT logs and results
+        if: always()
+        run: |
+          docker cp "$CONTAINER_NAME":/tmp/vllm_oot.log ./vllm_oot.log || true
+          docker cp "$CONTAINER_NAME":/tmp/oot_accuracy_output.txt ./oot_accuracy_output.txt || true
+          docker cp "$CONTAINER_NAME":/tmp/oot_accuracy_results ./oot_accuracy_results || true
+
+      - name: Upload OOT artifacts
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: oot-${{ matrix.model_name }}-artifacts
+          path: |
+            vllm_oot.log
+            oot_accuracy_output.txt
+            oot_accuracy_results
+
+      - name: Clean up OOT test
+        if: always()
+        run: |
+          docker exec "$CONTAINER_NAME" bash -lc "if [ -f /tmp/vllm_oot.pid ]; then kill \$(cat /tmp/vllm_oot.pid) || true; fi" || true
+          docker stop "$CONTAINER_NAME" || true
+          docker rm "$CONTAINER_NAME" || true

From a6567a8c1a421f1f500965ffd248a9ac4cc0d87e Mon Sep 17 00:00:00 2001
From: zejunchen-zejun <zejun.chen@amd.com>
Date: Thu, 12 Mar 2026 09:36:24 +0800
Subject: [PATCH 03/15] add

Signed-off-by: zejunchen-zejun <zejun.chen@amd.com>
---
 .github/workflows/atom-vllm-oot-test.yaml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.github/workflows/atom-vllm-oot-test.yaml b/.github/workflows/atom-vllm-oot-test.yaml
index 83d8d8de..24e17810 100644
--- a/.github/workflows/atom-vllm-oot-test.yaml
+++ b/.github/workflows/atom-vllm-oot-test.yaml
@@ -181,12 +181,18 @@ jobs:
 
       - name: Build OOT vLLM image
         run: |
+          if [ "${{ github.event_name }}" = "pull_request" ] && [ "${{ github.event.pull_request.head.repo.fork }}" = "true" ]; then
+            pull_base_image=0
+          else
+            pull_base_image=1
+          fi
           chmod +x docker/plugin/build_OOT_vLLM.sh
           IMAGE_TAG="${OOT_IMAGE_TAG}" \
           BASE_IMAGE="${OOT_BASE_IMAGE}" \
           VLLM_COMMIT="${VLLM_COMMIT}" \
           VLLM_VERSION="${VLLM_VERSION}" \
           INSTALL_LM_EVAL=1 \
+          PULL_BASE_IMAGE="${pull_base_image}" \
           BUILD_NO_CACHE=1 \
           docker/plugin/build_OOT_vLLM.sh
 

From 21f66f66c38f3cf7bf8cdb3c50c503a1ccc5692a Mon Sep 17 00:00:00 2001
From: zejunchen-zejun <zejun.chen@amd.com>
Date: Thu, 12 Mar 2026 11:25:57 +0800
Subject: [PATCH 04/15] add

Signed-off-by: zejunchen-zejun <zejun.chen@amd.com>
---
 .github/scripts/atom_oot_test.sh              | 232 ++++++++++++++++++
 .github/workflows/atom-test.yaml              |   6 +-
 .../workflows/atom-vllm-oot-full-test.yaml    |  67 +----
 .github/workflows/atom-vllm-oot-test.yaml     | 212 +++++++++-------
 tests/plugin/test_plugin_env_flags.py         |   6 +-
 5 files changed, 371 insertions(+), 152 deletions(-)
 create mode 100644 .github/scripts/atom_oot_test.sh

diff --git a/.github/scripts/atom_oot_test.sh b/.github/scripts/atom_oot_test.sh
new file mode 100644
index 00000000..12143436
--- /dev/null
+++ b/.github/scripts/atom_oot_test.sh
@@ -0,0 +1,232 @@
+#!/bin/bash
+set -euo pipefail
+
+# Usage:
+#   .github/scripts/atom_oot_test.sh launch <mode> [model_name]
+#   .github/scripts/atom_oot_test.sh accuracy <mode> [model_name]
+#
+# TYPE:
+#   launch   - launch vLLM server and wait until ready
+#   accuracy - run gsm8k accuracy test (and threshold check)
+#
+# MODE:
+#   ci    - only Kimi-K2
+#   full  - all OOT-supported models
+#
+# Optional model_name can be used to run a single model in full mode.
+
+TYPE=${1:-launch}
+MODE=${2:-ci}
+SELECTED_MODEL=${3:-}
+
+if [[ "$TYPE" != "launch" && "$TYPE" != "accuracy" ]]; then
+  echo "Invalid TYPE: $TYPE. Expected: launch or accuracy"
+  exit 2
+fi
+
+if [[ "$MODE" != "ci" && "$MODE" != "full" ]]; then
+  echo "Invalid MODE: $MODE. Expected: ci or full"
+  exit 2
+fi
+
+MAX_WAIT_RETRIES=${MAX_WAIT_RETRIES:-60}
+WAIT_INTERVAL_SEC=${WAIT_INTERVAL_SEC:-30}
+VLLM_PORT=${VLLM_PORT:-8000}
+VLLM_HOST=${VLLM_HOST:-0.0.0.0}
+VLLM_PID_FILE=${VLLM_PID_FILE:-/tmp/vllm_oot.pid}
+VLLM_LOG_FILE=${VLLM_LOG_FILE:-/tmp/vllm_oot.log}
+RESULT_DIR=${RESULT_DIR:-/tmp/oot_accuracy_results}
+ACCURACY_LOG_FILE=${ACCURACY_LOG_FILE:-/tmp/oot_accuracy_output.txt}
+
+# Format:
+#   MODEL_NAME|MODEL_PATH|EXTRA_ARGS|THRESHOLD
+CI_MODE_MODELS=(
+  "Kimi-K2|amd/Kimi-K2-Thinking-MXFP4|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8 --enable-expert-parallel|0.90"
+)
+
+FULL_MODE_MODELS=(
+  "Qwen3 Dense|Qwen/Qwen3-8B|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 1|0.70"
+  "Qwen3 MoE|Qwen/Qwen3-235B-A22B-Instruct-2507-FP8|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8 --enable-expert-parallel|0.87"
+  "DeepSeek-V3 family|deepseek-ai/DeepSeek-R1-0528|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8|0.93"
+  "GPT-OSS|openai/gpt-oss-120b|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 2 --enable-dp-attention --enable-expert-parallel --gpu-memory-utilization 0.3|0.38"
+  "Kimi-K2|amd/Kimi-K2-Thinking-MXFP4|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8 --enable-expert-parallel|0.90"
+)
+
+declare -a ACTIVE_MODELS=()
+if [[ "$MODE" == "ci" ]]; then
+  ACTIVE_MODELS=("${CI_MODE_MODELS[@]}")
+else
+  ACTIVE_MODELS=("${FULL_MODE_MODELS[@]}")
+fi
+
+resolve_model_path() {
+  local model_path="$1"
+  if [[ -f "/models/${model_path}/config.json" ]]; then
+    echo "/models/${model_path}"
+  else
+    echo "${model_path}"
+  fi
+}
+
+wait_server_ready() {
+  local model_name="$1"
+  echo ""
+  echo "========== Waiting for vLLM server (${model_name}) =========="
+  for ((i=1; i<=MAX_WAIT_RETRIES; i++)); do
+    if curl -sS "http://127.0.0.1:${VLLM_PORT}/v1/models" >/dev/null; then
+      echo "vLLM server is ready for ${model_name}."
+      return 0
+    fi
+
+    if [[ -f "${VLLM_PID_FILE}" ]]; then
+      local pid
+      pid=$(cat "${VLLM_PID_FILE}")
+      if ! kill -0 "${pid}" 2>/dev/null; then
+        echo "vLLM process exited early for ${model_name}."
+        tail -n 200 "${VLLM_LOG_FILE}" || true
+        return 1
+      fi
+    fi
+
+    echo "Waiting for vLLM server... (${i}/${MAX_WAIT_RETRIES})"
+    sleep "${WAIT_INTERVAL_SEC}"
+  done
+
+  echo "vLLM server did not become ready in time for ${model_name}."
+  tail -n 200 "${VLLM_LOG_FILE}" || true
+  return 1
+}
+
+stop_server() {
+  if [[ -f "${VLLM_PID_FILE}" ]]; then
+    local pid
+    pid=$(cat "${VLLM_PID_FILE}")
+    kill "${pid}" 2>/dev/null || true
+    rm -f "${VLLM_PID_FILE}" || true
+  fi
+}
+
+launch_one_model() {
+  local model_name="$1"
+  local model_path="$2"
+  local extra_args="$3"
+
+  local resolved_model_path
+  resolved_model_path=$(resolve_model_path "${model_path}")
+
+  echo ""
+  echo "========== Launching vLLM server =========="
+  echo "Model name: ${model_name}"
+  echo "Model path: ${resolved_model_path}"
+  echo "Extra args: ${extra_args}"
+
+  export SAFETENSORS_FAST_GPU=1
+  export VLLM_ROCM_USE_AITER=1
+  export VLLM_RPC_TIMEOUT=1800000
+  export VLLM_CACHE_ROOT=/tmp/.cache/vllm
+  export TORCHINDUCTOR_CACHE_DIR=/tmp/.cache/inductor
+  rm -rf /tmp/.cache
+
+  rm -f "${VLLM_PID_FILE}" || true
+
+  nohup vllm serve "${resolved_model_path}" \
+    --host "${VLLM_HOST}" \
+    --port "${VLLM_PORT}" \
+    --disable-log-requests \
+    --async-scheduling \
+    --load-format fastsafetensors \
+    --max-model-len 16384 \
+    ${extra_args} \
+    > "${VLLM_LOG_FILE}" 2>&1 &
+  echo $! > "${VLLM_PID_FILE}"
+  echo "Server PID: $(cat "${VLLM_PID_FILE}")"
+
+  wait_server_ready "${model_name}"
+}
+
+accuracy_one_model() {
+  local model_name="$1"
+  local model_path="$2"
+  local extra_args="$3"
+  local threshold="$4"
+
+  local resolved_model_path
+  resolved_model_path=$(resolve_model_path "${model_path}")
+
+  if ! command -v lm_eval >/dev/null 2>&1; then
+    echo "========== Installing lm-eval =========="
+    pip install 'lm-eval[api]'
+  fi
+
+  mkdir -p "${RESULT_DIR}"
+  local result_file="${RESULT_DIR}/$(date +%Y%m%d%H%M%S)_${model_name// /_}.json"
+
+  echo ""
+  echo "========== Running OOT gsm8k accuracy =========="
+  echo "Model name: ${model_name}"
+  echo "Threshold: ${threshold}"
+
+  lm_eval --model local-completions \
+    --model_args model="${resolved_model_path}",base_url="http://127.0.0.1:${VLLM_PORT}/v1/completions",num_concurrent=65,max_retries=1,tokenized_requests=False \
+    --tasks gsm8k \
+    --num_fewshot 3 \
+    --output_path "${result_file}" 2>&1 | tee -a "${ACCURACY_LOG_FILE}"
+
+  local value
+  value=$(python - <<PY
+import json
+with open("${result_file}", "r", encoding="utf-8") as f:
+    data = json.load(f)
+print(data["results"]["gsm8k"]["exact_match,flexible-extract"])
+PY
+)
+
+  echo "Result file: ${result_file}"
+  echo "Flexible extract value: ${value}"
+  echo "Accuracy threshold: ${threshold}"
+
+  python - <<PY
+value = float("${value}")
+threshold = float("${threshold}")
+assert value >= threshold, f"Accuracy failed: {value} < {threshold}"
+print(f"Accuracy passed: {value} >= {threshold}")
+PY
+}
+
+run_for_models() {
+  local action="$1"
+  local matched=0
+
+  for entry in "${ACTIVE_MODELS[@]}"; do
+    IFS='|' read -r model_name model_path extra_args threshold <<< "${entry}"
+
+    if [[ -n "${SELECTED_MODEL}" && "${SELECTED_MODEL}" != "${model_name}" ]]; then
+      continue
+    fi
+    matched=1
+
+    if [[ "${action}" == "launch" ]]; then
+      launch_one_model "${model_name}" "${model_path}" "${extra_args}"
+      break
+    fi
+
+    # accuracy mode: launch + evaluate each selected model, then stop server.
+    launch_one_model "${model_name}" "${model_path}" "${extra_args}"
+    accuracy_one_model "${model_name}" "${model_path}" "${extra_args}" "${threshold}"
+    stop_server
+  done
+
+  if [[ "${matched}" -eq 0 ]]; then
+    echo "No model matched MODE=${MODE}, SELECTED_MODEL=${SELECTED_MODEL}"
+    exit 2
+  fi
+}
+
+trap 'stop_server' EXIT
+
+if [[ "${TYPE}" == "launch" ]]; then
+  run_for_models "launch"
+else
+  run_for_models "accuracy"
+fi
+
diff --git a/.github/workflows/atom-test.yaml b/.github/workflows/atom-test.yaml
index dfa4a0b9..51c19325 100644
--- a/.github/workflows/atom-test.yaml
+++ b/.github/workflows/atom-test.yaml
@@ -21,7 +21,7 @@ concurrency:
   cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
 
 env:
-  ATOM_BASE_NIGTHLY_IMAGE: rocm/atom-dev:latest
+  ATOM_BASE_NIGHTLY_IMAGE: rocm/atom-dev:latest
   GITHUB_REPO_URL: ${{ github.event.pull_request.head.repo.clone_url || 'https://github.com/ROCm/ATOM.git' }}
   GITHUB_COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.event.head_commit.id }}
 
@@ -46,7 +46,7 @@ jobs:
         if: ${{ !github.event.pull_request.head.repo.fork }}
         run: |
           cat <<EOF > Dockerfile.mod
-          FROM ${{ env.ATOM_BASE_NIGTHLY_IMAGE }}
+          FROM ${{ env.ATOM_BASE_NIGHTLY_IMAGE }}
           RUN pip install -U lm-eval[api]
           RUN pip show lm-eval || true
           RUN pip install hf_transfer
@@ -234,7 +234,7 @@ jobs:
         if: (matrix.run_on_pr == true || github.event_name != 'pull_request') && github.event.pull_request.head.repo.fork
         run: |
           cat <<EOF > Dockerfile.mod
-          FROM ${{ env.ATOM_BASE_NIGTHLY_IMAGE }}
+          FROM ${{ env.ATOM_BASE_NIGHTLY_IMAGE }}
           RUN pip install -U lm-eval[api]
           RUN pip show lm-eval || true
           RUN pip install hf_transfer
diff --git a/.github/workflows/atom-vllm-oot-full-test.yaml b/.github/workflows/atom-vllm-oot-full-test.yaml
index 9081b47f..82432e8e 100644
--- a/.github/workflows/atom-vllm-oot-full-test.yaml
+++ b/.github/workflows/atom-vllm-oot-full-test.yaml
@@ -113,7 +113,7 @@ jobs:
         run: |
           docker pull "${{ needs.build-oot-image.outputs.oot_image_tag }}"
 
-      - name: Run plugin unit tests
+      - name: Run all plugin unit tests
         run: |
           docker run --rm \
             -v "${{ github.workspace }}:/workspace" \
@@ -221,71 +221,20 @@ jobs:
         env:
           GITHUB_WORKSPACE: ${{ github.workspace }}
 
-      - name: Resolve and download model
+      - name: Pre-download model if /models exists
         run: |
-          if [ -f "/models/${{ matrix.model_path }}/config.json" ]; then
-            echo "MODEL_PATH=/models/${{ matrix.model_path }}" >> "$GITHUB_ENV"
+          if [ -d "/models" ] && [ ! -f "/models/${{ matrix.model_path }}/config.json" ]; then
+            docker exec -e HF_TOKEN=${{ secrets.AMD_HF_TOKEN }} "$CONTAINER_NAME" bash -lc "hf download ${{ matrix.model_path }} --local-dir /models/${{ matrix.model_path }}"
           else
-            echo "MODEL_PATH=${{ matrix.model_path }}" >> "$GITHUB_ENV"
-            if [ -d "/models" ]; then
-              docker exec -e HF_TOKEN=${{ secrets.AMD_HF_TOKEN }} "$CONTAINER_NAME" bash -lc "hf download ${{ matrix.model_path }} --local-dir /models/${{ matrix.model_path }}"
-              echo "MODEL_PATH=/models/${{ matrix.model_path }}" >> "$GITHUB_ENV"
-            fi
+            echo "Skip model pre-download"
           fi
 
-      - name: Launch vLLM server with ATOM OOT plugin
+      - name: Run OOT launch and gsm8k accuracy via script (full mode)
+        timeout-minutes: 120
         run: |
           docker exec "$CONTAINER_NAME" bash -lc "
             set -euo pipefail
-            export SAFETENSORS_FAST_GPU=1
-            export VLLM_ROCM_USE_AITER=1
-            export VLLM_RPC_TIMEOUT=1800000
-            export VLLM_CACHE_ROOT=/tmp/.cache/vllm
-            export TORCHINDUCTOR_CACHE_DIR=/tmp/.cache/inductor
-            rm -rf /tmp/.cache
-
-            nohup vllm serve \"$MODEL_PATH\" \
-              --host 0.0.0.0 \
-              --port 8000 \
-              ${{ matrix.extra_args }} \
-              > /tmp/vllm_oot.log 2>&1 &
-            echo \$! > /tmp/vllm_oot.pid
-          "
-
-      - name: Wait for vLLM readiness
-        timeout-minutes: 30
-        run: |
-          set -euo pipefail
-          for i in $(seq 1 60); do
-            if docker exec "$CONTAINER_NAME" bash -lc "curl -sS http://127.0.0.1:8000/v1/models >/dev/null"; then
-              echo "vLLM server is ready."
-              exit 0
-            fi
-            echo "Waiting for server... ($i/60)"
-            sleep 30
-          done
-          docker exec "$CONTAINER_NAME" bash -lc "tail -n 200 /tmp/vllm_oot.log || true"
-          exit 1
-
-      - name: Run gsm8k accuracy
-        timeout-minutes: 60
-        run: |
-          docker exec "$CONTAINER_NAME" bash -lc "
-            set -euo pipefail
-            mkdir -p /tmp/oot_accuracy_results
-            RESULT_FILE=/tmp/oot_accuracy_results/\$(date +%Y%m%d%H%M%S).json
-            lm_eval --model local-completions \
-              --model_args model=\"$MODEL_PATH\",base_url=http://127.0.0.1:8000/v1/completions,num_concurrent=65,max_retries=1,tokenized_requests=False \
-              --tasks gsm8k \
-              --num_fewshot 3 \
-              --output_path \"\$RESULT_FILE\" 2>&1 | tee /tmp/oot_accuracy_output.txt
-          "
-
-      - name: Check accuracy threshold
-        run: |
-          docker exec "$CONTAINER_NAME" bash -lc "
-            set -euo pipefail
-            python -c \"import json, glob; files=sorted(glob.glob('/tmp/oot_accuracy_results/*.json')); assert files, 'No accuracy JSON found'; threshold=float('${{ matrix.accuracy_test_threshold }}'); result_file=files[-1]; data=json.load(open(result_file)); value=data['results']['gsm8k']['exact_match,flexible-extract']; print('RESULT_FILE:', result_file); print('value:', value, 'threshold:', threshold); assert value >= threshold, f'Accuracy failed: {value} < {threshold}'\"
+            bash .github/scripts/atom_oot_test.sh accuracy full '${{ matrix.model_name }}'
           "
 
       - name: Collect summary
diff --git a/.github/workflows/atom-vllm-oot-test.yaml b/.github/workflows/atom-vllm-oot-test.yaml
index 24e17810..56015240 100644
--- a/.github/workflows/atom-vllm-oot-test.yaml
+++ b/.github/workflows/atom-vllm-oot-test.yaml
@@ -21,32 +21,125 @@ concurrency:
   cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
 
 env:
-  ATOM_BASE_NIGTHLY_IMAGE: rocm/atom-dev:latest
+  ATOM_BASE_NIGHTLY_IMAGE: rocm/atom-dev:latest
   GITHUB_REPO_URL: ${{ github.event.pull_request.head.repo.clone_url || 'https://github.com/ROCm/ATOM.git' }}
   GITHUB_COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.event.head_commit.id }}
 
 jobs:
+  wait-atom-test-success:
+    name: Wait for ATOM Test success
+    runs-on: ubuntu-latest
+    timeout-minutes: 180
+    outputs:
+      atom_test_ok: ${{ steps.wait.outputs.atom_test_ok }}
+    steps:
+      - name: Wait until ATOM Test is completed for this commit
+        id: wait
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const owner = context.repo.owner;
+            const repo = context.repo.repo;
+            const eventName = context.eventName;
+            const headSha = context.payload.pull_request?.head?.sha ?? context.sha;
+            const maxAttempts = 180;
+            const sleepMs = 60000;
+
+            const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
+
+            if (eventName === "workflow_dispatch") {
+              core.info("workflow_dispatch detected: bypass ATOM Test gate.");
+              core.setOutput("atom_test_ok", "true");
+              return;
+            }
+
+            let foundCompletedRun = null;
+
+            for (let attempt = 1; attempt <= maxAttempts; attempt++) {
+              const resp = await github.rest.actions.listWorkflowRuns({
+                owner,
+                repo,
+                workflow_id: "atom-test.yaml",
+                event: eventName,
+                head_sha: headSha,
+                per_page: 20,
+              });
+
+              const candidates = (resp.data.workflow_runs || [])
+                .filter((run) => run.name === "ATOM Test" && run.id !== context.runId)
+                .sort((a, b) => new Date(b.created_at) - new Date(a.created_at));
+
+              if (candidates.length > 0) {
+                const latest = candidates[0];
+                core.info(
+                  `Attempt ${attempt}/${maxAttempts}: latest ATOM Test run id=${latest.id}, status=${latest.status}, conclusion=${latest.conclusion}`
+                );
+                if (latest.status === "completed") {
+                  foundCompletedRun = latest;
+                  break;
+                }
+              } else {
+                core.info(`Attempt ${attempt}/${maxAttempts}: no ATOM Test run found yet for this sha.`);
+              }
+
+              await sleep(sleepMs);
+            }
+
+            if (!foundCompletedRun) {
+              core.warning("Timeout waiting for ATOM Test workflow completion. OOT workflow will be skipped.");
+              core.setOutput("atom_test_ok", "false");
+              return;
+            }
+
+            const ok = foundCompletedRun.conclusion === "success";
+            core.setOutput("atom_test_ok", ok ? "true" : "false");
+            if (!ok) {
+              core.warning(
+                `Skip OOT workflow: ATOM Test conclusion is '${foundCompletedRun.conclusion}'.`
+              );
+            }
+
   pre-checks:
+    needs: [wait-atom-test-success]
+    if: ${{ needs.wait-atom-test-success.outputs.atom_test_ok == 'true' }}
     uses: ./.github/workflows/pre-checks.yaml
     with:
       black: true
       ruff: true
 
   build_atom_image:
-    if: ${{ needs.pre-checks.result == 'success' && (!github.event.pull_request || github.event.pull_request.draft == false) }}
-    needs: [pre-checks]
+    if: ${{ needs.wait-atom-test-success.outputs.atom_test_ok == 'true' && needs.pre-checks.result == 'success' && (!github.event.pull_request || github.event.pull_request.draft == false) }}
+    needs: [wait-atom-test-success, pre-checks]
     name: Build ATOM image
     runs-on: build-only-atom
     steps:
-      - name: Checkout ATOM repo
+      - name: Docker Login
         if: ${{ !github.event.pull_request.head.repo.fork }}
+        run: |
+          docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: Try pull pre-built ATOM image
+        if: ${{ !github.event.pull_request.head.repo.fork }}
+        id: pull_prebuilt
+        run: |
+          IMAGE_TAG=rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }}
+          if docker pull "$IMAGE_TAG"; then
+            echo "image_ready=true" >> "$GITHUB_OUTPUT"
+            echo "Reusing existing image: $IMAGE_TAG"
+          else
+            echo "image_ready=false" >> "$GITHUB_OUTPUT"
+            echo "Pre-built image not found, will rebuild: $IMAGE_TAG"
+          fi
+
+      - name: Checkout ATOM repo
+        if: ${{ !github.event.pull_request.head.repo.fork && steps.pull_prebuilt.outputs.image_ready != 'true' }}
         uses: actions/checkout@v4
 
       - name: Generate Dockerfile
-        if: ${{ !github.event.pull_request.head.repo.fork }}
+        if: ${{ !github.event.pull_request.head.repo.fork && steps.pull_prebuilt.outputs.image_ready != 'true' }}
         run: |
           cat <<EOF > Dockerfile.mod
-          FROM ${{ env.ATOM_BASE_NIGTHLY_IMAGE }}
+          FROM ${{ env.ATOM_BASE_NIGHTLY_IMAGE }}
           RUN pip install -U lm-eval[api]
           RUN pip show lm-eval || true
           RUN pip install hf_transfer
@@ -78,7 +171,7 @@ jobs:
           EOF
 
       - name: Build Docker image
-        if: ${{ !github.event.pull_request.head.repo.fork }}
+        if: ${{ !github.event.pull_request.head.repo.fork && steps.pull_prebuilt.outputs.image_ready != 'true' }}
         run: |
           docker build --pull --network=host \
             --no-cache \
@@ -86,21 +179,25 @@ jobs:
             -f Dockerfile.mod .
 
       - name: Push Docker image
-        if: ${{ !github.event.pull_request.head.repo.fork }}
+        if: ${{ !github.event.pull_request.head.repo.fork && steps.pull_prebuilt.outputs.image_ready != 'true' }}
         run: |
           IMAGE_TAG=rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }}
           docker tag atom_test:ci $IMAGE_TAG
-          docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }}
           docker push $IMAGE_TAG
 
       - name: Success message
         if: ${{ !github.event.pull_request.head.repo.fork }}
         run: |
-          echo "Successfully prepared image: $IMAGE_TAG"
+          IMAGE_TAG=rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }}
+          if [ "${{ steps.pull_prebuilt.outputs.image_ready }}" = "true" ]; then
+            echo "Successfully reused image: $IMAGE_TAG"
+          else
+            echo "Successfully rebuilt and pushed image: $IMAGE_TAG"
+          fi
 
   atom-vllm-oot:
-    needs: [pre-checks, build_atom_image]
-    if: ${{ needs.pre-checks.result == 'success' && needs.build_atom_image.result == 'success' && (!github.event.pull_request || github.event.pull_request.draft == false) }}
+    needs: [wait-atom-test-success, pre-checks, build_atom_image]
+    if: ${{ needs.wait-atom-test-success.outputs.atom_test_ok == 'true' && needs.pre-checks.result == 'success' && needs.build_atom_image.result == 'success' && (!github.event.pull_request || github.event.pull_request.draft == false) }}
     name: ATOM vLLM OOT Test
     strategy:
       fail-fast: false
@@ -142,7 +239,7 @@ jobs:
         if: ${{ github.event.pull_request.head.repo.fork }}
         run: |
           cat <<EOF > Dockerfile.mod
-          FROM ${{ env.ATOM_BASE_NIGTHLY_IMAGE }}
+          FROM ${{ env.ATOM_BASE_NIGHTLY_IMAGE }}
           RUN pip install -U lm-eval[api]
           RUN pip show lm-eval || true
           RUN pip install hf_transfer
@@ -159,6 +256,8 @@ jobs:
               MAX_JOBS=64 PREBUILD_KERNELS=0 GPU_ARCHS=gfx950 python3 setup.py develop
           RUN echo "=== Aiter version AFTER installation ===" && pip show amd-aiter || true
           RUN echo "=== ATOM version BEFORE uninstall ===" && pip show atom || true
+          # Fork PR fallback: this workflow cannot rely on pre-built images from
+          # other workflows, so reinstall ATOM from the current PR commit.
           RUN pip uninstall -y atom
           RUN rm -rf /app/ATOM
           RUN git clone ${{ env.GITHUB_REPO_URL }} /app/ATOM && \\
@@ -196,6 +295,14 @@ jobs:
           BUILD_NO_CACHE=1 \
           docker/plugin/build_OOT_vLLM.sh
 
+      - name: Run all plugin unit tests
+        run: |
+          docker run --rm \
+            -v "${{ github.workspace }}:/workspace" \
+            -w /workspace \
+            "$OOT_IMAGE_TAG" \
+            bash -lc "pytest -q tests/plugin"
+
       - name: Start OOT test container
         run: |
           if [ -f "/etc/podinfo/gha-render-devices" ]; then
@@ -223,23 +330,11 @@ jobs:
             --security-opt seccomp=unconfined \
             --ulimit memlock=-1 \
             --ulimit stack=67108864 \
-            -v "${{ github.workspace }}:/workspace" \
-            -w /workspace \
             --name "$CONTAINER_NAME" \
             "$OOT_IMAGE_TAG"
         env:
           GITHUB_WORKSPACE: ${{ github.workspace }}
 
-      - name: Resolve model path
-        run: |
-          if [ -f "/models/${{ matrix.model_path }}/config.json" ]; then
-            echo "MODEL_PATH=/models/${{ matrix.model_path }}" >> "$GITHUB_ENV"
-            echo "Use pre-downloaded model path: /models/${{ matrix.model_path }}"
-          else
-            echo "MODEL_PATH=${{ matrix.model_path }}" >> "$GITHUB_ENV"
-            echo "Use HuggingFace model path: ${{ matrix.model_path }}"
-          fi
-
       - name: Download model if needed
         run: |
           if [ -d "/models" ] && [ ! -f "/models/${{ matrix.model_path }}/config.json" ]; then
@@ -249,73 +344,12 @@ jobs:
             echo "Skip model download"
           fi
 
-      - name: Launch vLLM server with ATOM OOT plugin
-        run: |
-          docker exec "$CONTAINER_NAME" bash -lc "
-            set -euo pipefail
-            export SAFETENSORS_FAST_GPU=1
-            export VLLM_ROCM_USE_AITER=1
-            export VLLM_RPC_TIMEOUT=1800000
-            export VLLM_CACHE_ROOT=/tmp/.cache/vllm
-            export TORCHINDUCTOR_CACHE_DIR=/tmp/.cache/inductor
-            rm -rf /tmp/.cache
-
-            nohup vllm serve \"$MODEL_PATH\" \
-              --host 0.0.0.0 \
-              --port 8000 \
-              --tensor-parallel-size 8 \
-              --enable-expert-parallel \
-              --trust-remote-code \
-              --disable-log-requests \
-              --gpu-memory-utilization 0.9 \
-              --async-scheduling \
-              --load-format fastsafetensors \
-              --kv-cache-dtype fp8 \
-              --max-model-len 16384 \
-              > /tmp/vllm_oot.log 2>&1 &
-            echo \$! > /tmp/vllm_oot.pid
-            echo \"Server PID: \$(cat /tmp/vllm_oot.pid)\"
-          "
-
-      - name: Wait for vLLM readiness
-        timeout-minutes: 30
-        run: |
-          set -euo pipefail
-          for i in $(seq 1 60); do
-            if docker exec "$CONTAINER_NAME" bash -lc "curl -sS http://127.0.0.1:8000/v1/models >/dev/null"; then
-              echo "vLLM server is ready."
-              exit 0
-            fi
-            echo "Waiting for server... ($i/60)"
-            sleep 30
-          done
-          echo "vLLM server did not become ready in time."
-          docker exec "$CONTAINER_NAME" bash -lc "tail -n 200 /tmp/vllm_oot.log || true"
-          exit 1
-
-      - name: Run OOT accuracy test (gsm8k)
-        timeout-minutes: 45
-        run: |
-          docker exec "$CONTAINER_NAME" bash -lc "
-            set -euo pipefail
-            if ! command -v lm_eval >/dev/null 2>&1; then
-              pip install 'lm-eval[api]'
-            fi
-            mkdir -p /tmp/oot_accuracy_results
-            RESULT_FILE=/tmp/oot_accuracy_results/\$(date +%Y%m%d%H%M%S).json
-            lm_eval --model local-completions \
-              --model_args model=\"$MODEL_PATH\",base_url=http://127.0.0.1:8000/v1/completions,num_concurrent=65,max_retries=1,tokenized_requests=False \
-              --tasks gsm8k \
-              --num_fewshot 3 \
-              --output_path \"\$RESULT_FILE\" 2>&1 | tee /tmp/oot_accuracy_output.txt
-            echo \"OOT_RESULT_FILE=\$RESULT_FILE\"
-          "
-
-      - name: Check OOT accuracy threshold
+      - name: Run OOT launch and gsm8k accuracy via script (ci mode)
+        timeout-minutes: 90
         run: |
           docker exec "$CONTAINER_NAME" bash -lc "
             set -euo pipefail
-            python -c \"import json, glob; files=sorted(glob.glob('/tmp/oot_accuracy_results/*.json')); assert files, 'ERROR: No OOT accuracy result JSON found.'; threshold=float('${{ matrix.accuracy_test_threshold }}'); result_file=files[-1]; data=json.load(open(result_file)); value=data['results']['gsm8k']['exact_match,flexible-extract']; print('RESULT_FILE:', result_file); print('Flexible extract value:', value); print('Accuracy threshold:', threshold); assert value >= threshold, f'Accuracy test failed: {value} < {threshold}'; print(f'Accuracy test passed: {value} >= {threshold}')\"
+            bash .github/scripts/atom_oot_test.sh accuracy ci
           "
 
       - name: Collect OOT accuracy summary
diff --git a/tests/plugin/test_plugin_env_flags.py b/tests/plugin/test_plugin_env_flags.py
index 6ca39018..e71ca95f 100644
--- a/tests/plugin/test_plugin_env_flags.py
+++ b/tests/plugin/test_plugin_env_flags.py
@@ -32,7 +32,10 @@ def test_disable_vllm_plugin_attention_fallbacks_to_non_atom_backend(monkeypatch
 
     class _RocmPlatform:
         @classmethod
-        def get_attn_backend_cls(cls, selected_backend, attn_selector_config):
+        def get_attn_backend_cls(
+            cls, selected_backend, attn_selector_config, num_heads
+        ):
+            assert num_heads == 16
             return "vllm.default.backend"
 
     rocm_module.RocmPlatform = _RocmPlatform
@@ -52,5 +55,6 @@ def get_attn_backend_cls(cls, selected_backend, attn_selector_config):
     result = platform_module.ATOMPlatform.get_attn_backend_cls(
         selected_backend="x",
         attn_selector_config=types.SimpleNamespace(use_mla=True),
+        num_heads=16,
     )
     assert result == "vllm.default.backend"

From 7efac3dedfdbf5a26c6b1c0bd640fa94dc238aec Mon Sep 17 00:00:00 2001
From: zejunchen-zejun <zejun.chen@amd.com>
Date: Thu, 12 Mar 2026 13:11:39 +0800
Subject: [PATCH 05/15] add

Signed-off-by: zejunchen-zejun <zejun.chen@amd.com>
---
 tests/plugin/test_plugin_env_flags.py         | 41 ---------
 tests/plugin/test_plugin_registries.py        | 41 ---------
 .../plugin/test_plugin_unsupported_models.py  | 23 -----
 tests/plugin/test_plugin_vllm_import_paths.py | 85 -------------------
 4 files changed, 190 deletions(-)
 delete mode 100644 tests/plugin/test_plugin_unsupported_models.py
 delete mode 100644 tests/plugin/test_plugin_vllm_import_paths.py

diff --git a/tests/plugin/test_plugin_env_flags.py b/tests/plugin/test_plugin_env_flags.py
index e71ca95f..2e7888a1 100644
--- a/tests/plugin/test_plugin_env_flags.py
+++ b/tests/plugin/test_plugin_env_flags.py
@@ -1,9 +1,4 @@
 import importlib
-import importlib.util
-import sys
-import types
-
-import pytest
 
 
 def test_disable_vllm_plugin_flag_disables_platform(monkeypatch):
@@ -22,39 +17,3 @@ def test_disable_vllm_plugin_flag_disables_platform(monkeypatch):
         assert platform_module.ATOMPlatform is None
         assert register_module.register_platform() is None
 
-
-@pytest.mark.skipif(
-    importlib.util.find_spec("vllm") is None,
-    reason="vllm is not installed in current test environment",
-)
-def test_disable_vllm_plugin_attention_fallbacks_to_non_atom_backend(monkeypatch):
-    rocm_module = types.ModuleType("vllm.platforms.rocm")
-
-    class _RocmPlatform:
-        @classmethod
-        def get_attn_backend_cls(
-            cls, selected_backend, attn_selector_config, num_heads
-        ):
-            assert num_heads == 16
-            return "vllm.default.backend"
-
-    rocm_module.RocmPlatform = _RocmPlatform
-
-    monkeypatch.setitem(sys.modules, "vllm", types.ModuleType("vllm"))
-    monkeypatch.setitem(
-        sys.modules, "vllm.platforms", types.ModuleType("vllm.platforms")
-    )
-    monkeypatch.setitem(sys.modules, "vllm.platforms.rocm", rocm_module)
-    monkeypatch.setenv("ATOM_DISABLE_VLLM_PLUGIN", "0")
-    monkeypatch.setenv("ATOM_DISABLE_VLLM_PLUGIN_ATTENTION", "1")
-
-    import atom.plugin.vllm.platform as platform_module
-
-    importlib.reload(platform_module)
-
-    result = platform_module.ATOMPlatform.get_attn_backend_cls(
-        selected_backend="x",
-        attn_selector_config=types.SimpleNamespace(use_mla=True),
-        num_heads=16,
-    )
-    assert result == "vllm.default.backend"
diff --git a/tests/plugin/test_plugin_registries.py b/tests/plugin/test_plugin_registries.py
index 79dbe323..e9d6b263 100644
--- a/tests/plugin/test_plugin_registries.py
+++ b/tests/plugin/test_plugin_registries.py
@@ -1,8 +1,3 @@
-import sys
-import types
-import importlib
-import importlib.util
-
 import pytest
 
 from atom.plugin import prepare as plugin_prepare
@@ -16,42 +11,6 @@ def _reset_framework_state():
     plugin_prepare._set_framework_backbone("atom")
 
 
-@pytest.mark.skipif(
-    importlib.util.find_spec("vllm") is None,
-    reason="vllm is not installed in current test environment",
-)
-def test_register_platform_returns_oot_platform(monkeypatch):
-    rocm_module = types.ModuleType("vllm.platforms.rocm")
-
-    class _RocmPlatform:
-        pass
-
-    rocm_module.RocmPlatform = _RocmPlatform
-    vllm_platforms = types.ModuleType("vllm.platforms")
-    vllm_platforms.current_platform = None
-
-    monkeypatch.setitem(sys.modules, "vllm", types.ModuleType("vllm"))
-    monkeypatch.setitem(sys.modules, "vllm.platforms", vllm_platforms)
-    monkeypatch.setitem(sys.modules, "vllm.platforms.rocm", rocm_module)
-
-    monkeypatch.setenv("ATOM_DISABLE_VLLM_PLUGIN", "0")
-    monkeypatch.setenv("ATOM_DISABLE_VLLM_PLUGIN_ATTENTION", "0")
-
-    import atom.plugin.vllm.platform as platform_module
-
-    importlib.reload(platform_module)
-    importlib.reload(vllm_register)
-
-    platform_path = vllm_register.register_platform()
-    module_name, class_name = platform_path.rsplit(".", 1)
-    vllm_platforms.current_platform = getattr(
-        importlib.import_module(module_name), class_name
-    )
-
-    # get current platform from vllm side and validate it is ATOM platform.
-    assert vllm_platforms.current_platform is platform_module.ATOMPlatform
-
-
 def test_register_platform_can_be_disabled(monkeypatch):
     monkeypatch.setattr(vllm_register, "disable_vllm_plugin", True, raising=False)
     assert vllm_register.register_platform() is None
diff --git a/tests/plugin/test_plugin_unsupported_models.py b/tests/plugin/test_plugin_unsupported_models.py
deleted file mode 100644
index 0419d4a3..00000000
--- a/tests/plugin/test_plugin_unsupported_models.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import importlib.util
-import importlib
-import sys
-import types
-
-import pytest
-
-
-# FIXME: remove it later when enabling fallback for unsupported models
-@pytest.mark.skipif(
-    importlib.util.find_spec("vllm") is None,
-    reason="vllm is not installed in current test environment",
-)
-def test_vllm_wrapper_rejects_unsupported_model_arch(monkeypatch):
-    # Avoid importing deep model-loader dependencies during test collection/import.
-    fake_loader = types.ModuleType("atom.model_loader.loader")
-    fake_loader.load_model_in_plugin_mode = lambda **kwargs: set()
-    monkeypatch.setitem(sys.modules, "atom.model_loader.loader", fake_loader)
-
-    model_wrapper = importlib.import_module("atom.plugin.vllm.model_wrapper")
-
-    with pytest.raises(ValueError, match="not supported by ATOM OOT backend"):
-        model_wrapper._get_atom_model_cls("UnknownModelForCausalLM")
diff --git a/tests/plugin/test_plugin_vllm_import_paths.py b/tests/plugin/test_plugin_vllm_import_paths.py
deleted file mode 100644
index 523e0798..00000000
--- a/tests/plugin/test_plugin_vllm_import_paths.py
+++ /dev/null
@@ -1,85 +0,0 @@
-import importlib.util
-
-import pytest
-
-
-@pytest.mark.skipif(
-    importlib.util.find_spec("vllm") is None,
-    reason="vllm is not installed in current test environment",
-)
-def test_vllm_import_paths_guardrail():
-    """Guardrail for OOT vLLM import paths used by ATOM plugin mode."""
-    # attention.py / paged_attention.py (new path with legacy fallback)
-    try:
-        from vllm.attention.layer import Attention, MLAAttention, AttentionType
-    except ImportError:
-        from vllm.model_executor.layers.attention import Attention, MLAAttention
-        from vllm.v1.attention.backend import AttentionType
-
-    # attention.py
-    from vllm.config import (
-        VllmConfig,
-        get_current_vllm_config,
-        get_layers_from_vllm_config,
-    )
-    from vllm.model_executor.layers.attention.mla_attention import (
-        MLACommonMetadataBuilder,
-        QueryLenSupport,
-    )
-    from vllm.utils.math_utils import cdiv, round_down
-    from vllm.v1.attention.backend import AttentionCGSupport, AttentionMetadataBuilder
-    from vllm.v1.attention.backends.utils import (
-        get_dcp_local_seq_lens,
-        split_decodes_and_prefills,
-        split_decodes_prefills_and_extends,
-    )
-    from vllm.v1.attention.ops.common import cp_lse_ag_out_rs
-    from vllm.v1.attention.ops.merge_attn_states import merge_attn_states
-
-    # model_wrapper.py (core vLLM model interfaces)
-    from vllm.model_executor.models.interfaces import SupportsPP, SupportsQuant
-    from vllm.model_executor.models.interfaces_base import (
-        VllmModel,
-        VllmModelForTextGeneration,
-    )
-    from vllm.model_executor.models.registry import ModelRegistry
-    from vllm.sequence import IntermediateTensors
-
-    # attention_mla.py / platform.py / register.py
-    from vllm import _custom_ops
-    from vllm.distributed.parallel_state import get_dcp_group
-    from vllm.platforms import current_platform
-    from vllm.platforms.rocm import RocmPlatform
-
-    assert all(
-        obj is not None
-        for obj in [
-            Attention,
-            MLAAttention,
-            AttentionType,
-            QueryLenSupport,
-            MLACommonMetadataBuilder,
-            cdiv,
-            round_down,
-            AttentionCGSupport,
-            AttentionMetadataBuilder,
-            get_dcp_local_seq_lens,
-            split_decodes_and_prefills,
-            split_decodes_prefills_and_extends,
-            cp_lse_ag_out_rs,
-            merge_attn_states,
-            VllmConfig,
-            get_current_vllm_config,
-            get_layers_from_vllm_config,
-            SupportsPP,
-            SupportsQuant,
-            VllmModel,
-            VllmModelForTextGeneration,
-            ModelRegistry,
-            IntermediateTensors,
-            _custom_ops,
-            get_dcp_group,
-            current_platform,
-            RocmPlatform,
-        ]
-    )

From e1ae8af904da8ef9a4d80eebc3d99c44b9717619 Mon Sep 17 00:00:00 2001
From: zejunchen-zejun <zejun.chen@amd.com>
Date: Thu, 12 Mar 2026 13:17:48 +0800
Subject: [PATCH 06/15] add

Signed-off-by: zejunchen-zejun <zejun.chen@amd.com>
---
 tests/plugin/test_plugin_registries.py | 16 ----------------
 1 file changed, 16 deletions(-)
 delete mode 100644 tests/plugin/test_plugin_registries.py

diff --git a/tests/plugin/test_plugin_registries.py b/tests/plugin/test_plugin_registries.py
deleted file mode 100644
index e9d6b263..00000000
--- a/tests/plugin/test_plugin_registries.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import pytest
-
-from atom.plugin import prepare as plugin_prepare
-import atom.plugin.vllm.register as vllm_register
-
-
-@pytest.fixture(autouse=True)
-def _reset_framework_state():
-    plugin_prepare._set_framework_backbone("atom")
-    yield
-    plugin_prepare._set_framework_backbone("atom")
-
-
-def test_register_platform_can_be_disabled(monkeypatch):
-    monkeypatch.setattr(vllm_register, "disable_vllm_plugin", True, raising=False)
-    assert vllm_register.register_platform() is None

From 4a3763eb04c4a080fb8b6b4fdd5f4afa249ac4f0 Mon Sep 17 00:00:00 2001
From: zejunchen-zejun <zejun.chen@amd.com>
Date: Thu, 12 Mar 2026 14:11:16 +0800
Subject: [PATCH 07/15] add

Signed-off-by: zejunchen-zejun <zejun.chen@amd.com>
---
 .github/scripts/atom_oot_test.sh          |   2 +-
 .github/workflows/atom-vllm-oot-test.yaml | 194 +---------------------
 2 files changed, 8 insertions(+), 188 deletions(-)

diff --git a/.github/scripts/atom_oot_test.sh b/.github/scripts/atom_oot_test.sh
index 12143436..ec6ce080 100644
--- a/.github/scripts/atom_oot_test.sh
+++ b/.github/scripts/atom_oot_test.sh
@@ -41,7 +41,7 @@ ACCURACY_LOG_FILE=${ACCURACY_LOG_FILE:-/tmp/oot_accuracy_output.txt}
 # Format:
 #   MODEL_NAME|MODEL_PATH|EXTRA_ARGS|THRESHOLD
 CI_MODE_MODELS=(
-  "Kimi-K2|amd/Kimi-K2-Thinking-MXFP4|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8 --enable-expert-parallel|0.90"
+  "Kimi-K2|amd/Kimi-K2-Thinking-MXFP4|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 4 --enable-expert-parallel|0.90"
 )
 
 FULL_MODE_MODELS=(
diff --git a/.github/workflows/atom-vllm-oot-test.yaml b/.github/workflows/atom-vllm-oot-test.yaml
index 56015240..1919c300 100644
--- a/.github/workflows/atom-vllm-oot-test.yaml
+++ b/.github/workflows/atom-vllm-oot-test.yaml
@@ -26,178 +26,15 @@ env:
   GITHUB_COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.event.head_commit.id }}
 
 jobs:
-  wait-atom-test-success:
-    name: Wait for ATOM Test success
-    runs-on: ubuntu-latest
-    timeout-minutes: 180
-    outputs:
-      atom_test_ok: ${{ steps.wait.outputs.atom_test_ok }}
-    steps:
-      - name: Wait until ATOM Test is completed for this commit
-        id: wait
-        uses: actions/github-script@v7
-        with:
-          script: |
-            const owner = context.repo.owner;
-            const repo = context.repo.repo;
-            const eventName = context.eventName;
-            const headSha = context.payload.pull_request?.head?.sha ?? context.sha;
-            const maxAttempts = 180;
-            const sleepMs = 60000;
-
-            const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
-
-            if (eventName === "workflow_dispatch") {
-              core.info("workflow_dispatch detected: bypass ATOM Test gate.");
-              core.setOutput("atom_test_ok", "true");
-              return;
-            }
-
-            let foundCompletedRun = null;
-
-            for (let attempt = 1; attempt <= maxAttempts; attempt++) {
-              const resp = await github.rest.actions.listWorkflowRuns({
-                owner,
-                repo,
-                workflow_id: "atom-test.yaml",
-                event: eventName,
-                head_sha: headSha,
-                per_page: 20,
-              });
-
-              const candidates = (resp.data.workflow_runs || [])
-                .filter((run) => run.name === "ATOM Test" && run.id !== context.runId)
-                .sort((a, b) => new Date(b.created_at) - new Date(a.created_at));
-
-              if (candidates.length > 0) {
-                const latest = candidates[0];
-                core.info(
-                  `Attempt ${attempt}/${maxAttempts}: latest ATOM Test run id=${latest.id}, status=${latest.status}, conclusion=${latest.conclusion}`
-                );
-                if (latest.status === "completed") {
-                  foundCompletedRun = latest;
-                  break;
-                }
-              } else {
-                core.info(`Attempt ${attempt}/${maxAttempts}: no ATOM Test run found yet for this sha.`);
-              }
-
-              await sleep(sleepMs);
-            }
-
-            if (!foundCompletedRun) {
-              core.warning("Timeout waiting for ATOM Test workflow completion. OOT workflow will be skipped.");
-              core.setOutput("atom_test_ok", "false");
-              return;
-            }
-
-            const ok = foundCompletedRun.conclusion === "success";
-            core.setOutput("atom_test_ok", ok ? "true" : "false");
-            if (!ok) {
-              core.warning(
-                `Skip OOT workflow: ATOM Test conclusion is '${foundCompletedRun.conclusion}'.`
-              );
-            }
-
   pre-checks:
-    needs: [wait-atom-test-success]
-    if: ${{ needs.wait-atom-test-success.outputs.atom_test_ok == 'true' }}
     uses: ./.github/workflows/pre-checks.yaml
     with:
       black: true
       ruff: true
 
-  build_atom_image:
-    if: ${{ needs.wait-atom-test-success.outputs.atom_test_ok == 'true' && needs.pre-checks.result == 'success' && (!github.event.pull_request || github.event.pull_request.draft == false) }}
-    needs: [wait-atom-test-success, pre-checks]
-    name: Build ATOM image
-    runs-on: build-only-atom
-    steps:
-      - name: Docker Login
-        if: ${{ !github.event.pull_request.head.repo.fork }}
-        run: |
-          docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }}
-
-      - name: Try pull pre-built ATOM image
-        if: ${{ !github.event.pull_request.head.repo.fork }}
-        id: pull_prebuilt
-        run: |
-          IMAGE_TAG=rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }}
-          if docker pull "$IMAGE_TAG"; then
-            echo "image_ready=true" >> "$GITHUB_OUTPUT"
-            echo "Reusing existing image: $IMAGE_TAG"
-          else
-            echo "image_ready=false" >> "$GITHUB_OUTPUT"
-            echo "Pre-built image not found, will rebuild: $IMAGE_TAG"
-          fi
-
-      - name: Checkout ATOM repo
-        if: ${{ !github.event.pull_request.head.repo.fork && steps.pull_prebuilt.outputs.image_ready != 'true' }}
-        uses: actions/checkout@v4
-
-      - name: Generate Dockerfile
-        if: ${{ !github.event.pull_request.head.repo.fork && steps.pull_prebuilt.outputs.image_ready != 'true' }}
-        run: |
-          cat <<EOF > Dockerfile.mod
-          FROM ${{ env.ATOM_BASE_NIGHTLY_IMAGE }}
-          RUN pip install -U lm-eval[api]
-          RUN pip show lm-eval || true
-          RUN pip install hf_transfer
-          RUN pip show hf_transfer || true
-          RUN echo "=== Aiter version BEFORE uninstall ===" && pip show amd-aiter || true
-          RUN pip uninstall -y amd-aiter
-          RUN pip install --upgrade "pybind11>=3.0.1"
-          RUN pip show pybind11
-          RUN wget https://github.com/stedolan/jq/releases/download/jq-1.7/jq-linux64 -O jq
-          RUN chmod +x jq
-          RUN mv jq /usr/local/bin/jq
-          RUN rm -rf /app/aiter-test
-          RUN git clone --depth 1 https://github.com/ROCm/aiter.git /app/aiter-test && \\
-              cd /app/aiter-test && \\
-              git checkout HEAD && \\
-              git submodule sync && git submodule update --init --recursive && \\
-              MAX_JOBS=64 PREBUILD_KERNELS=0 GPU_ARCHS=gfx950 python3 setup.py develop
-          RUN echo "=== Aiter version AFTER installation ===" && pip show amd-aiter || true
-          
-          RUN echo "=== ATOM version BEFORE uninstall ===" && pip show atom || true
-          RUN pip uninstall -y atom
-          RUN rm -rf /app/ATOM
-          RUN git clone ${{ env.GITHUB_REPO_URL }} /app/ATOM && \\
-              cd /app/ATOM && \\
-              git checkout ${{ env.GITHUB_COMMIT_SHA }} && \\
-              pip install -e .
-
-          RUN echo "=== ATOM version AFTER installation ===" && pip show atom || true
-          EOF
-
-      - name: Build Docker image
-        if: ${{ !github.event.pull_request.head.repo.fork && steps.pull_prebuilt.outputs.image_ready != 'true' }}
-        run: |
-          docker build --pull --network=host \
-            --no-cache \
-            -t atom_test:ci \
-            -f Dockerfile.mod .
-
-      - name: Push Docker image
-        if: ${{ !github.event.pull_request.head.repo.fork && steps.pull_prebuilt.outputs.image_ready != 'true' }}
-        run: |
-          IMAGE_TAG=rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }}
-          docker tag atom_test:ci $IMAGE_TAG
-          docker push $IMAGE_TAG
-
-      - name: Success message
-        if: ${{ !github.event.pull_request.head.repo.fork }}
-        run: |
-          IMAGE_TAG=rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }}
-          if [ "${{ steps.pull_prebuilt.outputs.image_ready }}" = "true" ]; then
-            echo "Successfully reused image: $IMAGE_TAG"
-          else
-            echo "Successfully rebuilt and pushed image: $IMAGE_TAG"
-          fi
-
   atom-vllm-oot:
-    needs: [wait-atom-test-success, pre-checks, build_atom_image]
-    if: ${{ needs.wait-atom-test-success.outputs.atom_test_ok == 'true' && needs.pre-checks.result == 'success' && needs.build_atom_image.result == 'success' && (!github.event.pull_request || github.event.pull_request.draft == false) }}
+    needs: [pre-checks]
+    if: ${{ needs.pre-checks.result == 'success' && (!github.event.pull_request || github.event.pull_request.draft == false) }}
     name: ATOM vLLM OOT Test
     strategy:
       fail-fast: false
@@ -230,13 +67,7 @@ jobs:
       - name: Checkout ATOM repo
         uses: actions/checkout@v4
 
-      - name: Docker Login
-        if: ${{ !github.event.pull_request.head.repo.fork }}
-        run: |
-          docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }}
-
-      - name: Prepare OOT base image for forked repo
-        if: ${{ github.event.pull_request.head.repo.fork }}
+      - name: Build ATOM base image
         run: |
           cat <<EOF > Dockerfile.mod
           FROM ${{ env.ATOM_BASE_NIGHTLY_IMAGE }}
@@ -256,8 +87,6 @@ jobs:
               MAX_JOBS=64 PREBUILD_KERNELS=0 GPU_ARCHS=gfx950 python3 setup.py develop
           RUN echo "=== Aiter version AFTER installation ===" && pip show amd-aiter || true
           RUN echo "=== ATOM version BEFORE uninstall ===" && pip show atom || true
-          # Fork PR fallback: this workflow cannot rely on pre-built images from
-          # other workflows, so reinstall ATOM from the current PR commit.
           RUN pip uninstall -y atom
           RUN rm -rf /app/ATOM
           RUN git clone ${{ env.GITHUB_REPO_URL }} /app/ATOM && \\
@@ -271,27 +100,16 @@ jobs:
             --no-cache \
             -t atom_oot_base:ci \
             -f Dockerfile.mod .
-          echo "OOT_BASE_IMAGE=atom_oot_base:ci" >> "$GITHUB_ENV"
-
-      - name: Select OOT base image from pre-built ATOM image
-        if: ${{ !github.event.pull_request.head.repo.fork }}
-        run: |
-          echo "OOT_BASE_IMAGE=rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }}" >> "$GITHUB_ENV"
 
       - name: Build OOT vLLM image
         run: |
-          if [ "${{ github.event_name }}" = "pull_request" ] && [ "${{ github.event.pull_request.head.repo.fork }}" = "true" ]; then
-            pull_base_image=0
-          else
-            pull_base_image=1
-          fi
           chmod +x docker/plugin/build_OOT_vLLM.sh
           IMAGE_TAG="${OOT_IMAGE_TAG}" \
-          BASE_IMAGE="${OOT_BASE_IMAGE}" \
+          BASE_IMAGE="atom_oot_base:ci" \
           VLLM_COMMIT="${VLLM_COMMIT}" \
           VLLM_VERSION="${VLLM_VERSION}" \
           INSTALL_LM_EVAL=1 \
-          PULL_BASE_IMAGE="${pull_base_image}" \
+          PULL_BASE_IMAGE=0 \
           BUILD_NO_CACHE=1 \
           docker/plugin/build_OOT_vLLM.sh
 
@@ -336,6 +154,7 @@ jobs:
           GITHUB_WORKSPACE: ${{ github.workspace }}
 
       - name: Download model if needed
+        if: success()
         run: |
           if [ -d "/models" ] && [ ! -f "/models/${{ matrix.model_path }}/config.json" ]; then
             echo "Downloading model to /models/${{ matrix.model_path }}"
@@ -345,6 +164,7 @@ jobs:
           fi
 
       - name: Run OOT launch and gsm8k accuracy via script (ci mode)
+        if: success()
         timeout-minutes: 90
         run: |
           docker exec "$CONTAINER_NAME" bash -lc "

From c2aaffa9ba8564314a0959d198ae291c73a3481a Mon Sep 17 00:00:00 2001
From: zejunchen-zejun <zejun.chen@amd.com>
Date: Thu, 12 Mar 2026 15:37:45 +0800
Subject: [PATCH 08/15] add

Signed-off-by: zejunchen-zejun <zejun.chen@amd.com>
---
 .../workflows/atom-vllm-oot-full-test.yaml    |  18 +--
 .github/workflows/atom-vllm-oot-test.yaml     |  19 ++--
 .github/workflows/docker-release.yaml         |  48 ++++++++
 docker/Dockerfile                             |  81 +++++++++++++-
 docker/plugin/Dockerfile_OOT_vLLM             | 105 ------------------
 docker/plugin/build_OOT_vLLM.sh               |  90 ---------------
 6 files changed, 148 insertions(+), 213 deletions(-)
 delete mode 100644 docker/plugin/Dockerfile_OOT_vLLM
 delete mode 100644 docker/plugin/build_OOT_vLLM.sh

diff --git a/.github/workflows/atom-vllm-oot-full-test.yaml b/.github/workflows/atom-vllm-oot-full-test.yaml
index 82432e8e..02cce215 100644
--- a/.github/workflows/atom-vllm-oot-full-test.yaml
+++ b/.github/workflows/atom-vllm-oot-full-test.yaml
@@ -80,15 +80,17 @@ jobs:
       - name: Build OOT vLLM image from rebuilt ATOM base
         id: meta
         run: |
-          chmod +x docker/plugin/build_OOT_vLLM.sh
           OOT_IMAGE_TAG="${VALIDATION_IMAGE_REPO}:oot-vllm-validation-${GITHUB_COMMIT_SHA}-${{ github.run_id }}"
-          IMAGE_TAG="${OOT_IMAGE_TAG}" \
-          BASE_IMAGE="atom_oot_base:ci" \
-          VLLM_COMMIT="${{ inputs.vllm_commit || 'b31e9326a7d9394aab8c767f8ebe225c65594b60' }}" \
-          VLLM_VERSION="${{ inputs.vllm_version || '0.17' }}" \
-          INSTALL_LM_EVAL=1 \
-          BUILD_NO_CACHE=1 \
-          docker/plugin/build_OOT_vLLM.sh
+          docker build --network=host \
+            --no-cache \
+            --target oot_image \
+            -t "${OOT_IMAGE_TAG}" \
+            --build-arg BASE_IMAGE="atom_oot_base:ci" \
+            --build-arg MAX_JOBS=64 \
+            --build-arg VLLM_COMMIT="${{ inputs.vllm_commit || 'b31e9326a7d9394aab8c767f8ebe225c65594b60' }}" \
+            --build-arg INSTALL_LM_EVAL=1 \
+            --build-arg INSTALL_FASTSAFETENSORS=1 \
+            -f docker/Dockerfile .
 
           echo "oot_image_tag=${OOT_IMAGE_TAG}" >> "$GITHUB_OUTPUT"
 
diff --git a/.github/workflows/atom-vllm-oot-test.yaml b/.github/workflows/atom-vllm-oot-test.yaml
index 1919c300..f84a9842 100644
--- a/.github/workflows/atom-vllm-oot-test.yaml
+++ b/.github/workflows/atom-vllm-oot-test.yaml
@@ -103,15 +103,16 @@ jobs:
 
       - name: Build OOT vLLM image
         run: |
-          chmod +x docker/plugin/build_OOT_vLLM.sh
-          IMAGE_TAG="${OOT_IMAGE_TAG}" \
-          BASE_IMAGE="atom_oot_base:ci" \
-          VLLM_COMMIT="${VLLM_COMMIT}" \
-          VLLM_VERSION="${VLLM_VERSION}" \
-          INSTALL_LM_EVAL=1 \
-          PULL_BASE_IMAGE=0 \
-          BUILD_NO_CACHE=1 \
-          docker/plugin/build_OOT_vLLM.sh
+          docker build --network=host \
+            --no-cache \
+            --target oot_image \
+            -t "${OOT_IMAGE_TAG}" \
+            --build-arg BASE_IMAGE="atom_oot_base:ci" \
+            --build-arg MAX_JOBS=64 \
+            --build-arg VLLM_COMMIT="${VLLM_COMMIT}" \
+            --build-arg INSTALL_LM_EVAL=1 \
+            --build-arg INSTALL_FASTSAFETENSORS=1 \
+            -f docker/Dockerfile .
 
       - name: Run all plugin unit tests
         run: |
diff --git a/.github/workflows/docker-release.yaml b/.github/workflows/docker-release.yaml
index a2330451..8f7c2082 100644
--- a/.github/workflows/docker-release.yaml
+++ b/.github/workflows/docker-release.yaml
@@ -27,6 +27,19 @@ on:
         description: "Runner label to use"
         type: string
         default: "atom-mi355-8gpu.predownload"
+      build_oot_image:
+        description: "Build OOT vLLM image in addition to ATOM image"
+        type: boolean
+        default: false
+      oot_base_image:
+        description: "Base image for OOT build (empty means use local atom_release:ci)"
+        default: ""
+      vllm_commit:
+        description: "vLLM commit for OOT image"
+        default: "b31e9326a7d9394aab8c767f8ebe225c65594b60"
+      vllm_version:
+        description: "vLLM version label for OOT image tags"
+        default: "0.17.0"
 
 jobs:
   docker-release:
@@ -46,6 +59,8 @@ jobs:
       RCCL_REPO: "https://github.com/ROCm/rccl.git"
       RCCL_BRANCH: "29e1567b95e28823b0beb1a988adc587bfab5b4f"
       GPU_ARCH: "gfx942;gfx950"
+      VLLM_COMMIT: "b31e9326a7d9394aab8c767f8ebe225c65594b60"
+      VLLM_VERSION: "0.17.0"
 
     steps:
       - name: Checkout ATOM repo
@@ -68,6 +83,7 @@ jobs:
         timeout-minutes: 120
         run: |
           docker build --pull --network=host -t atom_release:ci \
+          --target atom_image \
           --build-arg BASE_IMAGE="${{ matrix.base_image }}" \
           --build-arg GPU_ARCH="${{ env.GPU_ARCH }}" \
           --build-arg ATOM_REPO="${{ inputs.atom_repo || env.ATOM_REPO }}" \
@@ -136,6 +152,38 @@ jobs:
           docker tag atom_release:ci rocm/atom-dev:${TAG}
           docker push rocm/atom-dev:${TAG}
 
+      - name: Build OOT Docker image
+        if: ${{ success() && inputs.build_oot_image == true }}
+        timeout-minutes: 180
+        run: |
+          if [ -n "${{ inputs.oot_base_image }}" ]; then
+            OOT_BASE_IMAGE="${{ inputs.oot_base_image }}"
+          else
+            OOT_BASE_IMAGE="rocm/atom-dev:latest"
+          fi
+
+          echo "Using OOT base image: ${OOT_BASE_IMAGE}"
+          docker pull "${OOT_BASE_IMAGE}"
+          docker build --network=host -t atom_oot_release:ci \
+            --target oot_image \
+            --build-arg BASE_IMAGE="${OOT_BASE_IMAGE}" \
+            --build-arg MAX_JOBS=64 \
+            --build-arg VLLM_COMMIT="${{ inputs.vllm_commit || env.VLLM_COMMIT }}" \
+            --build-arg INSTALL_LM_EVAL=1 \
+            --build-arg INSTALL_FASTSAFETENSORS=1 \
+            -f docker/Dockerfile .
+          docker inspect atom_oot_release:ci
+
+      - name: Push OOT Docker image
+        if: ${{ success() && inputs.build_oot_image == true }}
+        run: |
+          TAG=nightly_$(date +%Y%m%d)
+          VLLM_VER="${{ inputs.vllm_version || env.VLLM_VERSION }}"
+          docker tag atom_oot_release:ci rocm/atom-vllm:latest
+          docker push rocm/atom-vllm:latest
+          docker tag atom_oot_release:ci rocm/atom-vllm-v${VLLM_VER}:${TAG}
+          docker push rocm/atom-vllm-v${VLLM_VER}:${TAG}
+
       - name: Clean Up
         if: always()
         run: |
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 85c99daa..e97864ac 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -2,7 +2,86 @@
 ARG BASE_IMAGE="rocm/pytorch:latest"
 ARG GPU_ARCH="gfx942;gfx950"
 
-FROM $BASE_IMAGE
+# --------------------------------------------------------------------
+# OOT image stage: extends an ATOM base image with vLLM + OOT deps.
+# Build with: docker build --target oot_image --build-arg BASE_IMAGE=...
+# --------------------------------------------------------------------
+FROM ${BASE_IMAGE} as oot_image
+
+ARG MAX_JOBS=64
+ARG VENV_PYTHON="/opt/venv/bin/python"
+ARG VLLM_REPO="https://github.com/vllm-project/vllm.git"
+ARG VLLM_COMMIT="b31e9326a7d9394aab8c767f8ebe225c65594b60"
+ARG INSTALL_LM_EVAL=1
+ARG INSTALL_FASTSAFETENSORS=1
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PATH="/opt/venv/bin:${PATH}"
+ENV VLLM_TARGET_DEVICE=rocm
+ENV CMAKE_MAKE_PROGRAM=/usr/local/bin/ninja
+ENV MAX_JOBS=${MAX_JOBS}
+ENV LD_LIBRARY_PATH="/opt/venv/lib/python3.12/site-packages/torch/lib:${LD_LIBRARY_PATH}"
+WORKDIR /app
+
+RUN echo "========== [OOT 1/7] Prepare build tools ==========" \
+    && apt-get update \
+    && apt --fix-broken install -y \
+    && apt-get install -y --no-install-recommends git ca-certificates ninja-build vim \
+    && mkdir -p /usr/local/bin \
+    && ln -sf "$(command -v ninja)" /usr/local/bin/ninja \
+    && /usr/local/bin/ninja --version \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN echo "========== [OOT 2/7] Verify base packages (atom/aiter/mori) ==========" \
+    && "${VENV_PYTHON}" -m pip show atom || true \
+    && "${VENV_PYTHON}" -m pip show amd-aiter || true \
+    && "${VENV_PYTHON}" -m pip show mori || true
+
+RUN echo "========== [OOT 3/7] Clone vLLM ==========" \
+    && git clone "${VLLM_REPO}" /app/vllm \
+    && cd /app/vllm \
+    && git checkout "${VLLM_COMMIT}" \
+    && git submodule update --init --recursive \
+    && echo "vLLM commit:" \
+    && git rev-parse HEAD
+
+RUN echo "========== [OOT 4/7] Install vLLM ROCm build dependencies ==========" \
+    && cd /app/vllm \
+    && "${VENV_PYTHON}" -m pip install --upgrade pip \
+    && sed -i -e '/xgrammar/d' -e '/compressed-tensors/d' requirements/common.txt \
+    && "${VENV_PYTHON}" -m pip install --no-deps xgrammar==0.1.29 compressed-tensors==0.13.0 loguru \
+    && sed -i -e '/peft/d' -e '/tensorizer/d' -e '/runai/d' -e '/timm/d' requirements/rocm.txt \
+    && "${VENV_PYTHON}" -m pip install --no-deps peft tensorizer==2.10.1 runai-model-streamer[s3,gcs]==0.15.3 timm>=1.0.17 \
+    && "${VENV_PYTHON}" -m pip install -r requirements/rocm.txt
+
+RUN echo "========== [OOT 5/7] Build and install amd-smi wheel ==========" \
+    && cd /opt/rocm/share/amd_smi \
+    && pip wheel . --wheel-dir=dist \
+    && pip install dist/*.whl
+
+RUN echo "========== [OOT 6/7] Build vLLM wheel ==========" \
+    && cd /app/vllm \
+    && VLLM_TARGET_DEVICE=rocm "${VENV_PYTHON}" setup.py clean --all \
+    && MAX_JOBS="${MAX_JOBS}" VLLM_TARGET_DEVICE=rocm "${VENV_PYTHON}" setup.py bdist_wheel --dist-dir=/tmp/vllm-wheels \
+    && ls -lh /tmp/vllm-wheels
+
+RUN echo "========== [OOT 7/7] Install vLLM runtime dependencies ==========" \
+    && cd /app/vllm \
+    && "${VENV_PYTHON}" -m pip uninstall -y vllm || true \
+    && "${VENV_PYTHON}" -m pip install /tmp/vllm-wheels/*.whl \
+    && "${VENV_PYTHON}" -m pip install uvloop \
+    && if [ "${INSTALL_LM_EVAL}" = "1" ]; then "${VENV_PYTHON}" -m pip install "lm-eval[api]"; else echo "Skip lm-eval install"; fi \
+    && if [ "${INSTALL_FASTSAFETENSORS}" = "1" ]; then "${VENV_PYTHON}" -m pip install "git+https://github.com/foundation-model-stack/fastsafetensors.git"; else echo "Skip fastsafetensors install"; fi \
+    && "${VENV_PYTHON}" -c "import glob, os, torch; print(f'torch.version.hip: {torch.version.hip}'); print(f'torch.version.cuda: {torch.version.cuda}'); torch_lib_dir=os.path.join(os.path.dirname(torch.__file__), 'lib'); print(f'torch lib dir: {torch_lib_dir}'); print(f'libtorch_hip candidates: {glob.glob(os.path.join(torch_lib_dir, \"libtorch_hip.so*\"))}'); assert torch.version.hip is not None, 'Torch is not ROCm build (torch.version.hip is None).'" \
+    && "${VENV_PYTHON}" -m pip show vllm torch triton torchvision torchaudio amdsmi amd-aiter atom mori || true
+
+CMD ["/bin/bash"]
+
+# --------------------------------------------------------------------
+# ATOM image stage: original Dockerfile flow for atom-dev image.
+# Build with: docker build --target atom_image --build-arg BASE_IMAGE=...
+# --------------------------------------------------------------------
+FROM ${BASE_IMAGE} as atom_image
 
 ARG GPU_ARCH
 ENV GPU_ARCH_LIST=$GPU_ARCH
diff --git a/docker/plugin/Dockerfile_OOT_vLLM b/docker/plugin/Dockerfile_OOT_vLLM
deleted file mode 100644
index a02e50c7..00000000
--- a/docker/plugin/Dockerfile_OOT_vLLM
+++ /dev/null
@@ -1,105 +0,0 @@
-ARG BASE_IMAGE="rocm/atom-dev:latest"
-FROM ${BASE_IMAGE}
-
-ARG VLLM_REPO="https://github.com/vllm-project/vllm.git"
-ARG VLLM_COMMIT="b31e9326a7d9394aab8c767f8ebe225c65594b60"
-ARG MAX_JOBS=64
-ARG INSTALL_LM_EVAL=1
-ARG INSTALL_FASTSAFETENSORS=1
-ARG VENV_PYTHON="/opt/venv/bin/python"
-
-ENV DEBIAN_FRONTEND=noninteractive
-ENV PATH="/opt/venv/bin:${PATH}"
-ENV VLLM_TARGET_DEVICE=rocm
-ENV CMAKE_MAKE_PROGRAM=/usr/local/bin/ninja
-ENV MAX_JOBS=${MAX_JOBS}
-ENV LD_LIBRARY_PATH="/opt/venv/lib/python3.12/site-packages/torch/lib:${LD_LIBRARY_PATH}"
-WORKDIR /app
-
-RUN echo "========== [1/7] Prepare build tools ==========" \
-    && apt-get update \
-    && apt --fix-broken install -y \
-    && apt-get install -y --no-install-recommends git ca-certificates ninja-build vim \
-    && mkdir -p /usr/local/bin \
-    && ln -sf "$(command -v ninja)" /usr/local/bin/ninja \
-    && /usr/local/bin/ninja --version \
-    && rm -rf /var/lib/apt/lists/*
-
-RUN echo "========== [2/7] Verify base packages (atom/aiter/mori) ==========" \
-    && "${VENV_PYTHON}" -m pip show atom || true \
-    && "${VENV_PYTHON}" -m pip show amd-aiter || true \
-    && "${VENV_PYTHON}" -m pip show mori || true
-
-RUN echo "========== [3/7] Clone vLLM ==========" \
-    && git clone "${VLLM_REPO}" /app/vllm \
-    && cd /app/vllm \
-    && git checkout "${VLLM_COMMIT}" \
-    && git submodule update --init --recursive \
-    && echo "vLLM commit:" \
-    && git rev-parse HEAD
-
-# Follow vLLM ROCm standard but DO NOT reinstall torch as already existing.
-RUN echo "========== [4/7] Install vLLM ROCm build dependencies ==========" \
-    && cd /app/vllm \
-    && "${VENV_PYTHON}" -m pip install --upgrade pip \
-    && sed -i -e '/xgrammar/d' -e '/compressed-tensors/d' requirements/common.txt \
-    && "${VENV_PYTHON}" -m pip install --no-deps xgrammar==0.1.29 compressed-tensors==0.13.0 loguru \
-    && sed -i -e '/peft/d' -e '/tensorizer/d' -e '/runai/d' -e '/timm/d' requirements/rocm.txt \
-    && "${VENV_PYTHON}" -m pip install --no-deps peft tensorizer==2.10.1 runai-model-streamer[s3,gcs]==0.15.3 timm>=1.0.17 \
-    && "${VENV_PYTHON}" -m pip install -r requirements/rocm.txt
-
-
-RUN echo "========== [5/7] Build and install amd-smi wheel ==========" \
-    && cd /opt/rocm/share/amd_smi \
-    && pip wheel . --wheel-dir=dist \
-    && pip install dist/*.whl
-
-RUN echo "========== [6/7] Build vLLM wheel ==========" \
-    && cd /app/vllm \
-    && VLLM_TARGET_DEVICE=rocm "${VENV_PYTHON}" setup.py clean --all \
-    && MAX_JOBS="${MAX_JOBS}" VLLM_TARGET_DEVICE=rocm "${VENV_PYTHON}" setup.py bdist_wheel --dist-dir=/tmp/vllm-wheels \
-    && ls -lh /tmp/vllm-wheels
-
-RUN echo "========== [7/7] Install vLLM runtime dependencies ==========" \
-    && cd /app/vllm \
-    && "${VENV_PYTHON}" -m pip uninstall -y vllm || true \
-    && "${VENV_PYTHON}" -m pip install /tmp/vllm-wheels/*.whl \
-    && "${VENV_PYTHON}" -m pip install uvloop
-
-RUN echo "========== [8/8] Optional tools and final checks ==========" \
-    && if [ "${INSTALL_LM_EVAL}" = "1" ]; then "${VENV_PYTHON}" -m pip install "lm-eval[api]"; else echo "Skip lm-eval install"; fi \
-    && if [ "${INSTALL_FASTSAFETENSORS}" = "1" ]; then "${VENV_PYTHON}" -m pip install "git+https://github.com/foundation-model-stack/fastsafetensors.git"; else echo "Skip fastsafetensors install"; fi \
-    && "${VENV_PYTHON}" - <<'PY'
-import importlib.metadata as m
-import glob
-import os
-import torch
-
-print(f"torch.version.hip: {torch.version.hip}")
-print(f"torch.version.cuda: {torch.version.cuda}")
-torch_lib_dir = os.path.join(os.path.dirname(torch.__file__), "lib")
-print(f"torch lib dir: {torch_lib_dir}")
-print(f"libtorch_hip candidates: {glob.glob(os.path.join(torch_lib_dir, 'libtorch_hip.so*'))}")
-if torch.version.hip is None:
-    raise RuntimeError("Torch is not ROCm build (torch.version.hip is None).")
-
-pkgs = [
-    "vllm",
-    "torch",
-    "triton",
-    "torchvision",
-    "torchaudio",
-    "amdsmi",
-    "amd-aiter",
-    "atom",
-    "mori",
-]
-print("Final package versions:")
-for p in pkgs:
-    try:
-        print(f"  {p}: {m.version(p)}")
-    except Exception:
-        print(f"  {p}: <not installed>")
-PY
-
-CMD ["/bin/bash"]
diff --git a/docker/plugin/build_OOT_vLLM.sh b/docker/plugin/build_OOT_vLLM.sh
deleted file mode 100644
index a87483cb..00000000
--- a/docker/plugin/build_OOT_vLLM.sh
+++ /dev/null
@@ -1,90 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)"
-LOG_DIR="${LOG_DIR:-${SCRIPT_DIR}/logs}"
-LOG_FILE="${LOG_FILE:-${LOG_DIR}/build_OOT_vLLM_$(date +%Y%m%d_%H%M%S).log}"
-
-mkdir -p "${LOG_DIR}"
-# Mirror all stdout/stderr to terminal and log file.
-exec > >(tee -a "${LOG_FILE}") 2>&1
-
-DOCKERFILE_PATH="${SCRIPT_DIR}/Dockerfile_OOT_vLLM"
-BASE_IMAGE="${BASE_IMAGE:-rocm/atom-dev:latest}"
-VLLM_REPO="${VLLM_REPO:-https://github.com/vllm-project/vllm.git}"
-VLLM_COMMIT="${VLLM_COMMIT:-b31e9326a7d9394aab8c767f8ebe225c65594b60}"
-VLLM_VERSION="${VLLM_VERSION:-0.17}"
-VLLM_COMMIT_SHORT="$(printf '%s' "${VLLM_COMMIT}" | cut -c1-6)"
-IMAGE_REPO="${IMAGE_REPO:-rocm/atom-vllm-dev}"
-IMAGE_TAG="${IMAGE_TAG:-${IMAGE_REPO}:v${VLLM_VERSION}-${VLLM_COMMIT_SHORT}}"
-MAX_JOBS="${MAX_JOBS:-64}"
-INSTALL_LM_EVAL="${INSTALL_LM_EVAL:-1}"
-PULL_BASE_IMAGE="${PULL_BASE_IMAGE:-1}"
-BUILD_NO_CACHE="${BUILD_NO_CACHE:-1}"
-
-print_banner() {
-  echo "============================================================"
-  echo "$1"
-  echo "============================================================"
-}
-
-print_banner "Build vLLM on top of ATOM base image"
-echo "Log file        : ${LOG_FILE}"
-echo "Dockerfile      : ${DOCKERFILE_PATH}"
-echo "Build context   : ${REPO_ROOT}"
-echo "Target image    : ${IMAGE_TAG}"
-echo "Base image      : ${BASE_IMAGE}"
-echo "vLLM repo       : ${VLLM_REPO}"
-echo "vLLM version    : ${VLLM_VERSION}"
-echo "vLLM commit     : ${VLLM_COMMIT}"
-echo "commit short    : ${VLLM_COMMIT_SHORT}"
-echo "MAX_JOBS        : ${MAX_JOBS}"
-echo "INSTALL_LM_EVAL : ${INSTALL_LM_EVAL}"
-echo "BUILD_NO_CACHE  : ${BUILD_NO_CACHE}"
-echo
-echo "Build plan:"
-echo "  Step 1/4: (optional) pull base image"
-echo "  Step 2/4: check/remove existing target image"
-echo "  Step 3/4: build image from Dockerfile_OOT_vLLM"
-echo "  Step 4/4: print final image info"
-echo
-
-if [[ "${PULL_BASE_IMAGE}" == "1" ]]; then
-  print_banner "Step 1/4 - Pull base image: ${BASE_IMAGE}"
-  docker pull "${BASE_IMAGE}"
-else
-  print_banner "Step 1/4 - Skip base image pull (PULL_BASE_IMAGE=${PULL_BASE_IMAGE})"
-fi
-
-print_banner "Step 2/4 - Check whether target image already exists"
-if docker image inspect "${IMAGE_TAG}" >/dev/null 2>&1; then
-  echo "Target image already exists: ${IMAGE_TAG}"
-  docker image inspect "${IMAGE_TAG}" --format 'Existing image -> ID={{.Id}}  Created={{.Created}}'
-  echo "Removing existing target image: ${IMAGE_TAG}"
-  docker image rm -f "${IMAGE_TAG}"
-else
-  echo "Target image does not exist yet: ${IMAGE_TAG}"
-fi
-echo
-
-print_banner "Step 3/4 - Build target image: ${IMAGE_TAG}"
-NO_CACHE_FLAG=""
-if [[ "${BUILD_NO_CACHE}" == "1" ]]; then
-  NO_CACHE_FLAG="--no-cache"
-fi
-
-DOCKER_BUILDKIT=1 docker build \
-  ${NO_CACHE_FLAG} \
-  -f "${DOCKERFILE_PATH}" \
-  -t "${IMAGE_TAG}" \
-  --build-arg "BASE_IMAGE=${BASE_IMAGE}" \
-  --build-arg "VLLM_REPO=${VLLM_REPO}" \
-  --build-arg "VLLM_COMMIT=${VLLM_COMMIT}" \
-  --build-arg "MAX_JOBS=${MAX_JOBS}" \
-  --build-arg "INSTALL_LM_EVAL=${INSTALL_LM_EVAL}" \
-  "$@" \
-  "${REPO_ROOT}"
-
-print_banner "Step 4/4 - Build completed"
-docker image inspect "${IMAGE_TAG}" --format 'Image={{.RepoTags}}  ID={{.Id}}  Created={{.Created}}'

From b768afd63ae3aeaecf8ebcf0c0df40a1f36f2519 Mon Sep 17 00:00:00 2001
From: zejunchen-zejun <zejun.chen@amd.com>
Date: Thu, 12 Mar 2026 15:42:39 +0800
Subject: [PATCH 09/15] add

Signed-off-by: zejunchen-zejun <zejun.chen@amd.com>
---
 .github/workflows/atom-vllm-oot-full-test.yaml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/atom-vllm-oot-full-test.yaml b/.github/workflows/atom-vllm-oot-full-test.yaml
index 02cce215..a1c2d046 100644
--- a/.github/workflows/atom-vllm-oot-full-test.yaml
+++ b/.github/workflows/atom-vllm-oot-full-test.yaml
@@ -1,10 +1,13 @@
-name: ATOM vLLM OOT Validation
+name: ATOM vLLM OOT Full Validation
 
 on:
   workflow_dispatch:
     inputs:
       vllm_commit:
         description: "vLLM commit to validate"
+        # NOTE: For full validation, set this commit explicitly when your PR
+        # adapts ATOM to a newer vLLM version; using an old default commit can
+        # hide real compatibility issues.
         required: false
         type: string
         default: "b31e9326a7d9394aab8c767f8ebe225c65594b60"

From 23d700c7405bc4a3cfbe3dd3fcbfac11455f9e72 Mon Sep 17 00:00:00 2001
From: zejunchen-zejun <zejun.chen@amd.com>
Date: Thu, 12 Mar 2026 15:54:22 +0800
Subject: [PATCH 10/15] add

Signed-off-by: zejunchen-zejun <zejun.chen@amd.com>
---
 .github/workflows/docker-release.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/docker-release.yaml b/.github/workflows/docker-release.yaml
index 8f7c2082..b9a67887 100644
--- a/.github/workflows/docker-release.yaml
+++ b/.github/workflows/docker-release.yaml
@@ -179,10 +179,10 @@ jobs:
         run: |
           TAG=nightly_$(date +%Y%m%d)
           VLLM_VER="${{ inputs.vllm_version || env.VLLM_VERSION }}"
-          docker tag atom_oot_release:ci rocm/atom-vllm:latest
-          docker push rocm/atom-vllm:latest
-          docker tag atom_oot_release:ci rocm/atom-vllm-v${VLLM_VER}:${TAG}
-          docker push rocm/atom-vllm-v${VLLM_VER}:${TAG}
+          docker tag atom_oot_release:ci rocm/atom-dev-vllm:latest
+          docker push rocm/atom-dev-vllm:latest
+          docker tag atom_oot_release:ci rocm/atom-dev-vllm-v${VLLM_VER}:${TAG}
+          docker push rocm/atom-dev-vllm-v${VLLM_VER}:${TAG}
 
       - name: Clean Up
         if: always()

From 4b7ac8f8046ca7f349b262b79cb5ff3723726350 Mon Sep 17 00:00:00 2001
From: zejunchen-zejun <zejun.chen@amd.com>
Date: Thu, 12 Mar 2026 16:03:49 +0800
Subject: [PATCH 11/15] make lint happy

Signed-off-by: zejunchen-zejun <zejun.chen@amd.com>
---
 tests/plugin/test_plugin_env_flags.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/plugin/test_plugin_env_flags.py b/tests/plugin/test_plugin_env_flags.py
index 2e7888a1..6760cc3b 100644
--- a/tests/plugin/test_plugin_env_flags.py
+++ b/tests/plugin/test_plugin_env_flags.py
@@ -16,4 +16,3 @@ def test_disable_vllm_plugin_flag_disables_platform(monkeypatch):
 
         assert platform_module.ATOMPlatform is None
         assert register_module.register_platform() is None
-

From 7794a50d34d4e7bdcf174b499239417ca275088b Mon Sep 17 00:00:00 2001
From: zejunchen-zejun <zejun.chen@amd.com>
Date: Thu, 12 Mar 2026 17:09:08 +0800
Subject: [PATCH 12/15] add

Signed-off-by: zejunchen-zejun <zejun.chen@amd.com>
---
 .github/scripts/atom_oot_test.sh               | 3 +--
 .github/workflows/atom-vllm-oot-full-test.yaml | 5 -----
 .github/workflows/atom-vllm-oot-test.yaml      | 1 -
 .github/workflows/docker-release.yaml          | 3 +++
 4 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/.github/scripts/atom_oot_test.sh b/.github/scripts/atom_oot_test.sh
index ec6ce080..8785af69 100644
--- a/.github/scripts/atom_oot_test.sh
+++ b/.github/scripts/atom_oot_test.sh
@@ -47,7 +47,7 @@ CI_MODE_MODELS=(
 FULL_MODE_MODELS=(
   "Qwen3 Dense|Qwen/Qwen3-8B|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 1|0.70"
   "Qwen3 MoE|Qwen/Qwen3-235B-A22B-Instruct-2507-FP8|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8 --enable-expert-parallel|0.87"
-  "DeepSeek-V3 family|deepseek-ai/DeepSeek-R1-0528|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8|0.93"
+  "DeepSeek-V3 family|deepseek-ai/DeepSeek-R1-0528|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8|0.94"
   "GPT-OSS|openai/gpt-oss-120b|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 2 --enable-dp-attention --enable-expert-parallel --gpu-memory-utilization 0.3|0.38"
   "Kimi-K2|amd/Kimi-K2-Thinking-MXFP4|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8 --enable-expert-parallel|0.90"
 )
@@ -132,7 +132,6 @@ launch_one_model() {
   nohup vllm serve "${resolved_model_path}" \
     --host "${VLLM_HOST}" \
     --port "${VLLM_PORT}" \
-    --disable-log-requests \
     --async-scheduling \
     --load-format fastsafetensors \
     --max-model-len 16384 \
diff --git a/.github/workflows/atom-vllm-oot-full-test.yaml b/.github/workflows/atom-vllm-oot-full-test.yaml
index a1c2d046..79200b4f 100644
--- a/.github/workflows/atom-vllm-oot-full-test.yaml
+++ b/.github/workflows/atom-vllm-oot-full-test.yaml
@@ -138,32 +138,27 @@ jobs:
             model_path: "Qwen/Qwen3-8B"
             extra_args: "--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 1"
             env_vars: ""
-            accuracy_test_threshold: "0.70"
             runner: linux-atom-mi355-1
           - model_name: "Qwen3 MoE"
             model_path: "Qwen/Qwen3-235B-A22B-Instruct-2507-FP8"
             extra_args: "--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8 --enable-expert-parallel"
             env_vars: ""
-            accuracy_test_threshold: "0.87"
             runner: atom-mi355-8gpu.predownload
           - model_name: "DeepSeek-V3 family"
             model_path: "deepseek-ai/DeepSeek-R1-0528"
             extra_args: "--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8"
             env_vars: ""
-            accuracy_test_threshold: "0.93"
             runner: atom-mi355-8gpu.predownload
           - model_name: "GPT-OSS"
             model_path: "openai/gpt-oss-120b"
             extra_args: "--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 2 --enable-dp-attention --enable-expert-parallel --gpu-memory-utilization 0.3"
             env_vars: |
               ATOM_GPT_OSS_MODEL=1
-            accuracy_test_threshold: "0.38"
             runner: linux-atom-mi355-4
           - model_name: "Kimi-K2"
             model_path: "amd/Kimi-K2-Thinking-MXFP4"
             extra_args: "--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8 --enable-expert-parallel"
             env_vars: ""
-            accuracy_test_threshold: "0.90"
             runner: atom-mi355-8gpu.predownload
     runs-on: ${{ matrix.runner }}
     timeout-minutes: 240
diff --git a/.github/workflows/atom-vllm-oot-test.yaml b/.github/workflows/atom-vllm-oot-test.yaml
index f84a9842..cba67f6f 100644
--- a/.github/workflows/atom-vllm-oot-test.yaml
+++ b/.github/workflows/atom-vllm-oot-test.yaml
@@ -43,7 +43,6 @@ jobs:
           # Keep CI runtime under control: enable only one OOT model for now.
           - model_name: "Kimi-K2-Thinking-MXFP4"
             model_path: "amd/Kimi-K2-Thinking-MXFP4"
-            accuracy_test_threshold: "0.90"
             runner: atom-mi355-8gpu.predownload
     runs-on: ${{ matrix.runner }}
     timeout-minutes: 180
diff --git a/.github/workflows/docker-release.yaml b/.github/workflows/docker-release.yaml
index b9a67887..1d95cffb 100644
--- a/.github/workflows/docker-release.yaml
+++ b/.github/workflows/docker-release.yaml
@@ -192,5 +192,8 @@ jobs:
           # Remove build and tagged images to free disk space
           docker rmi atom_release:ci || true
           docker rmi rocm/atom-dev:latest || true
+          docker rmi atom_oot_release:ci || true
+          docker rmi rocm/atom-dev-vllm:latest || true
           # Remove nightly tagged image if it exists
           docker images "rocm/atom-dev:nightly_*" -q | xargs -r docker rmi || true
+          docker images "rocm/atom-dev-vllm-v*:*" -q | xargs -r docker rmi || true

From b8215da9f00a41e2e0157fc5ed2ee786dc51bbe8 Mon Sep 17 00:00:00 2001
From: zejunchen-zejun <zejun.chen@amd.com>
Date: Thu, 12 Mar 2026 17:32:32 +0800
Subject: [PATCH 13/15] add

Signed-off-by: zejunchen-zejun <zejun.chen@amd.com>
---
 .github/workflows/docker-release.yaml | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/docker-release.yaml b/.github/workflows/docker-release.yaml
index 1d95cffb..aad79fe6 100644
--- a/.github/workflows/docker-release.yaml
+++ b/.github/workflows/docker-release.yaml
@@ -28,11 +28,11 @@ on:
         type: string
         default: "atom-mi355-8gpu.predownload"
       build_oot_image:
-        description: "Build OOT vLLM image in addition to ATOM image"
+        description: "Build OOT vLLM image"
         type: boolean
         default: false
       oot_base_image:
-        description: "Base image for OOT build (empty means use local atom_release:ci)"
+        description: "Base image for OOT vLLM (empty means rocm/atom-dev:latest)"
         default: ""
       vllm_commit:
         description: "vLLM commit for OOT image"
@@ -177,12 +177,10 @@ jobs:
       - name: Push OOT Docker image
         if: ${{ success() && inputs.build_oot_image == true }}
         run: |
-          TAG=nightly_$(date +%Y%m%d)
           VLLM_VER="${{ inputs.vllm_version || env.VLLM_VERSION }}"
-          docker tag atom_oot_release:ci rocm/atom-dev-vllm:latest
-          docker push rocm/atom-dev-vllm:latest
-          docker tag atom_oot_release:ci rocm/atom-dev-vllm-v${VLLM_VER}:${TAG}
-          docker push rocm/atom-dev-vllm-v${VLLM_VER}:${TAG}
+          OOT_TAG="vllm-v${VLLM_VER}-nightly_$(date +%Y%m%d)"
+          docker tag atom_oot_release:ci rocm/atom-dev:${OOT_TAG}
+          docker push rocm/atom-dev:${OOT_TAG}
 
       - name: Clean Up
         if: always()
@@ -193,7 +191,6 @@ jobs:
           docker rmi atom_release:ci || true
           docker rmi rocm/atom-dev:latest || true
           docker rmi atom_oot_release:ci || true
-          docker rmi rocm/atom-dev-vllm:latest || true
           # Remove nightly tagged image if it exists
           docker images "rocm/atom-dev:nightly_*" -q | xargs -r docker rmi || true
-          docker images "rocm/atom-dev-vllm-v*:*" -q | xargs -r docker rmi || true
+          docker images "rocm/atom-dev:vllm-v*-nightly_*" -q | xargs -r docker rmi || true

From 744bb8fde2ac981fb071db377e2319dc7f55ad14 Mon Sep 17 00:00:00 2001
From: zejunchen-zejun <zejun.chen@amd.com>
Date: Thu, 12 Mar 2026 17:51:50 +0800
Subject: [PATCH 14/15] add

Signed-off-by: zejunchen-zejun <zejun.chen@amd.com>
---
 .github/scripts/atom_oot_test.sh | 3 +++
 docker/Dockerfile                | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/scripts/atom_oot_test.sh b/.github/scripts/atom_oot_test.sh
index 8785af69..4eff1dfc 100644
--- a/.github/scripts/atom_oot_test.sh
+++ b/.github/scripts/atom_oot_test.sh
@@ -40,6 +40,9 @@ ACCURACY_LOG_FILE=${ACCURACY_LOG_FILE:-/tmp/oot_accuracy_output.txt}
 
 # Format:
 #   MODEL_NAME|MODEL_PATH|EXTRA_ARGS|THRESHOLD
+# Note: CI runs Kimi-K2 with TP=4 on an 8-GPU runner to reduce runtime and
+# improve CI stability. Full mode uses TP=8 on the same class of runner for
+# higher-fidelity validation.
 CI_MODE_MODELS=(
   "Kimi-K2|amd/Kimi-K2-Thinking-MXFP4|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 4 --enable-expert-parallel|0.90"
 )
diff --git a/docker/Dockerfile b/docker/Dockerfile
index e97864ac..7d4f0a51 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -6,7 +6,7 @@ ARG GPU_ARCH="gfx942;gfx950"
 # OOT image stage: extends an ATOM base image with vLLM + OOT deps.
 # Build with: docker build --target oot_image --build-arg BASE_IMAGE=...
 # --------------------------------------------------------------------
-FROM ${BASE_IMAGE} as oot_image
+FROM ${BASE_IMAGE} AS oot_image
 
 ARG MAX_JOBS=64
 ARG VENV_PYTHON="/opt/venv/bin/python"

From 5fa84b69bdd20a7b9d930c506f1a418d2eb15498 Mon Sep 17 00:00:00 2001
From: zejunchen-zejun <zejun.chen@amd.com>
Date: Thu, 12 Mar 2026 20:36:41 +0800
Subject: [PATCH 15/15] add

Signed-off-by: zejunchen-zejun <zejun.chen@amd.com>
---
 .github/scripts/atom_oot_test.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/scripts/atom_oot_test.sh b/.github/scripts/atom_oot_test.sh
index 4eff1dfc..dd4a8577 100644
--- a/.github/scripts/atom_oot_test.sh
+++ b/.github/scripts/atom_oot_test.sh
@@ -169,7 +169,7 @@ accuracy_one_model() {
   echo "Threshold: ${threshold}"
 
   lm_eval --model local-completions \
-    --model_args model="${resolved_model_path}",base_url="http://127.0.0.1:${VLLM_PORT}/v1/completions",num_concurrent=65,max_retries=1,tokenized_requests=False \
+    --model_args model="${resolved_model_path}",base_url="http://127.0.0.1:${VLLM_PORT}/v1/completions",num_concurrent=65,max_retries=1,tokenized_requests=False,trust_remote_code=True \
     --tasks gsm8k \
     --num_fewshot 3 \
     --output_path "${result_file}" 2>&1 | tee -a "${ACCURACY_LOG_FILE}"