From 8a6611842f47fbc73fbf4a2e9b8c1bcc510f7ce1 Mon Sep 17 00:00:00 2001 From: zejunchen-zejun Date: Wed, 11 Mar 2026 10:57:52 +0800 Subject: [PATCH 01/15] [plugin][CI/CD] establish CI/CD workflow and docker release for OOT Signed-off-by: zejunchen-zejun --- .github/workflows/atom-test.yaml | 244 ++++++++++++++++++ .../plugin/test_plugin_config_translation.py | 82 ++++++ tests/plugin/test_plugin_env_flags.py | 56 ++++ tests/plugin/test_plugin_mode_status.py | 36 +++ tests/plugin/test_plugin_registries.py | 57 ++++ .../plugin/test_plugin_unsupported_models.py | 23 ++ tests/plugin/test_plugin_vllm_import_paths.py | 85 ++++++ 7 files changed, 583 insertions(+) create mode 100644 tests/plugin/test_plugin_config_translation.py create mode 100644 tests/plugin/test_plugin_env_flags.py create mode 100644 tests/plugin/test_plugin_mode_status.py create mode 100644 tests/plugin/test_plugin_registries.py create mode 100644 tests/plugin/test_plugin_unsupported_models.py create mode 100644 tests/plugin/test_plugin_vllm_import_paths.py diff --git a/.github/workflows/atom-test.yaml b/.github/workflows/atom-test.yaml index 9736f09f..3c061ca3 100644 --- a/.github/workflows/atom-test.yaml +++ b/.github/workflows/atom-test.yaml @@ -470,3 +470,247 @@ jobs: docker rm "$CONTAINER_NAME" || true # Remove the pre-built image to free disk space on the runner docker rmi "rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }}" || true + + atom-vllm-oot: + needs: [pre-checks, build_atom_image] + if: ${{ needs.pre-checks.result == 'success' && needs.build_atom_image.result == 'success' && (!github.event.pull_request || github.event.pull_request.draft == false) }} + name: ATOM vLLM OOT Test + strategy: + fail-fast: false + matrix: + include: + # Keep CI runtime under control: enable only one OOT model for now. + - model_name: "Kimi-K2-Thinking-MXFP4" + model_path: "amd/Kimi-K2-Thinking-MXFP4" + accuracy_test_threshold: "0.90" + runner: atom-mi355-8gpu.predownload + runs-on: ${{ matrix.runner }} + timeout-minutes: 180 + env: + CONTAINER_NAME: atom_vllm_oot_${{ strategy.job-index }} + OOT_IMAGE_TAG: atom_vllm_oot_test:${{ github.sha }}-${{ strategy.job-index }} + VLLM_COMMIT: b31e9326a7d9394aab8c767f8ebe225c65594b60 + VLLM_VERSION: "0.17" + + steps: + - name: Clean up containers and workspace + run: | + echo "=== Cleaning up containers on $(hostname) ===" + containers=$(docker ps -q) + if [ -n "$containers" ]; then + docker kill $containers || true + fi + docker rm -f "$CONTAINER_NAME" 2>/dev/null || true + docker run --rm -v "${GITHUB_WORKSPACE:-$PWD}":/workspace -w /workspace --privileged rocm/pytorch:latest bash -lc "ls -la /workspace/ && rm -rf /workspace/*" || true + + - name: Checkout ATOM repo + uses: actions/checkout@v4 + + - name: Docker Login + if: ${{ !github.event.pull_request.head.repo.fork }} + run: | + docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }} + + - name: Prepare OOT base image for forked repo + if: ${{ github.event.pull_request.head.repo.fork }} + run: | + cat < Dockerfile.mod + FROM ${{ env.ATOM_BASE_NIGTHLY_IMAGE }} + RUN pip install -U lm-eval[api] + RUN pip show lm-eval || true + RUN pip install hf_transfer + RUN pip show hf_transfer || true + RUN echo "=== Aiter version BEFORE uninstall ===" && pip show amd-aiter || true + RUN pip uninstall -y amd-aiter + RUN pip install --upgrade "pybind11>=3.0.1" + RUN pip show pybind11 + RUN rm -rf /app/aiter-test + RUN git clone https://github.com/ROCm/aiter.git /app/aiter-test && \\ + cd /app/aiter-test && \\ + git checkout HEAD && \\ + git submodule sync && git submodule update --init --recursive && \\ + MAX_JOBS=64 PREBUILD_KERNELS=0 GPU_ARCHS=gfx950 python3 setup.py develop + RUN echo "=== Aiter version AFTER installation ===" && pip show amd-aiter || true + RUN echo "=== ATOM version BEFORE uninstall ===" && pip show atom || true + RUN pip uninstall -y atom + RUN rm -rf /app/ATOM + RUN git clone ${{ env.GITHUB_REPO_URL }} /app/ATOM && \\ + cd /app/ATOM && \\ + git checkout ${{ env.GITHUB_COMMIT_SHA }} && \\ + pip install -e . + RUN echo "=== ATOM version AFTER installation ===" && pip show atom || true + EOF + + docker build --pull --network=host \ + --no-cache \ + -t atom_oot_base:ci \ + -f Dockerfile.mod . + echo "OOT_BASE_IMAGE=atom_oot_base:ci" >> "$GITHUB_ENV" + + - name: Select OOT base image from pre-built ATOM image + if: ${{ !github.event.pull_request.head.repo.fork }} + run: | + echo "OOT_BASE_IMAGE=rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }}" >> "$GITHUB_ENV" + + - name: Build OOT vLLM image + run: | + chmod +x docker/plugin/build_OOT_vLLM.sh + IMAGE_TAG="${OOT_IMAGE_TAG}" \ + BASE_IMAGE="${OOT_BASE_IMAGE}" \ + VLLM_COMMIT="${VLLM_COMMIT}" \ + VLLM_VERSION="${VLLM_VERSION}" \ + INSTALL_LM_EVAL=1 \ + BUILD_NO_CACHE=1 \ + docker/plugin/build_OOT_vLLM.sh + + - name: Start OOT test container + run: | + if [ -f "/etc/podinfo/gha-render-devices" ]; then + DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices) + else + DEVICE_FLAG="--device /dev/dri" + fi + + if [ -d "/models" ]; then + MODEL_MOUNT="-v /models:/models" + else + echo "Warning: /models directory not found on runner; skipping /models mount." + MODEL_MOUNT="" + fi + + docker run -dt --device=/dev/kfd $DEVICE_FLAG \ + -v "${GITHUB_WORKSPACE:-$PWD}":/workspace \ + $MODEL_MOUNT \ + -w /workspace \ + --ipc=host --group-add video \ + --shm-size=16G \ + --privileged \ + --cap-add=SYS_PTRACE \ + -e HF_TOKEN="${HF_TOKEN:-}" \ + --security-opt seccomp=unconfined \ + --ulimit memlock=-1 \ + --ulimit stack=67108864 \ + -v "${{ github.workspace }}:/workspace" \ + -w /workspace \ + --name "$CONTAINER_NAME" \ + "$OOT_IMAGE_TAG" + env: + GITHUB_WORKSPACE: ${{ github.workspace }} + + - name: Resolve model path + run: | + if [ -f "/models/${{ matrix.model_path }}/config.json" ]; then + echo "MODEL_PATH=/models/${{ matrix.model_path }}" >> "$GITHUB_ENV" + echo "Use pre-downloaded model path: /models/${{ matrix.model_path }}" + else + echo "MODEL_PATH=${{ matrix.model_path }}" >> "$GITHUB_ENV" + echo "Use HuggingFace model path: ${{ matrix.model_path }}" + fi + + - name: Download model if needed + run: | + if [ -d "/models" ] && [ ! -f "/models/${{ matrix.model_path }}/config.json" ]; then + echo "Downloading model to /models/${{ matrix.model_path }}" + docker exec -e HF_TOKEN=${{ secrets.AMD_HF_TOKEN }} "$CONTAINER_NAME" bash -lc "hf download ${{ matrix.model_path }} --local-dir /models/${{ matrix.model_path }}" + else + echo "Skip model download" + fi + + - name: Launch vLLM server with ATOM OOT plugin + run: | + docker exec "$CONTAINER_NAME" bash -lc " + set -euo pipefail + export SAFETENSORS_FAST_GPU=1 + export VLLM_ROCM_USE_AITER=1 + export VLLM_RPC_TIMEOUT=1800000 + export VLLM_CACHE_ROOT=/tmp/.cache/vllm + export TORCHINDUCTOR_CACHE_DIR=/tmp/.cache/inductor + rm -rf /tmp/.cache + + nohup vllm serve \"$MODEL_PATH\" \ + --host 0.0.0.0 \ + --port 8000 \ + --tensor-parallel-size 8 \ + --enable-expert-parallel \ + --trust-remote-code \ + --disable-log-requests \ + --gpu-memory-utilization 0.9 \ + --async-scheduling \ + --load-format fastsafetensors \ + --kv-cache-dtype fp8 \ + --max-model-len 16384 \ + > /tmp/vllm_oot.log 2>&1 & + echo \$! > /tmp/vllm_oot.pid + echo \"Server PID: \$(cat /tmp/vllm_oot.pid)\" + " + + - name: Wait for vLLM readiness + timeout-minutes: 30 + run: | + set -euo pipefail + for i in $(seq 1 60); do + if docker exec "$CONTAINER_NAME" bash -lc "curl -sS http://127.0.0.1:8000/v1/models >/dev/null"; then + echo "vLLM server is ready." + exit 0 + fi + echo "Waiting for server... ($i/60)" + sleep 30 + done + echo "vLLM server did not become ready in time." + docker exec "$CONTAINER_NAME" bash -lc "tail -n 200 /tmp/vllm_oot.log || true" + exit 1 + + - name: Run OOT accuracy test (gsm8k) + timeout-minutes: 45 + run: | + docker exec "$CONTAINER_NAME" bash -lc " + set -euo pipefail + if ! command -v lm_eval >/dev/null 2>&1; then + pip install 'lm-eval[api]' + fi + mkdir -p /tmp/oot_accuracy_results + RESULT_FILE=/tmp/oot_accuracy_results/\$(date +%Y%m%d%H%M%S).json + lm_eval --model local-completions \ + --model_args model=\"$MODEL_PATH\",base_url=http://127.0.0.1:8000/v1/completions,num_concurrent=65,max_retries=1,tokenized_requests=False \ + --tasks gsm8k \ + --num_fewshot 3 \ + --output_path \"\$RESULT_FILE\" 2>&1 | tee /tmp/oot_accuracy_output.txt + echo \"OOT_RESULT_FILE=\$RESULT_FILE\" + " + + - name: Check OOT accuracy threshold + run: | + docker exec "$CONTAINER_NAME" bash -lc " + set -euo pipefail + python -c \"import json, glob; files=sorted(glob.glob('/tmp/oot_accuracy_results/*.json')); assert files, 'ERROR: No OOT accuracy result JSON found.'; threshold=float('${{ matrix.accuracy_test_threshold }}'); result_file=files[-1]; data=json.load(open(result_file)); value=data['results']['gsm8k']['exact_match,flexible-extract']; print('RESULT_FILE:', result_file); print('Flexible extract value:', value); print('Accuracy threshold:', threshold); assert value >= threshold, f'Accuracy test failed: {value} < {threshold}'; print(f'Accuracy test passed: {value} >= {threshold}')\" + " + + - name: Collect OOT accuracy summary + if: success() + run: | + echo "OOT Accuracy Test Summary for ${{ matrix.model_name }}:" >> $GITHUB_STEP_SUMMARY + docker exec "$CONTAINER_NAME" bash -lc "awk '/\|Tasks\|Version\|/,/^$/ { if (NF > 0) print }' /tmp/oot_accuracy_output.txt" >> $GITHUB_STEP_SUMMARY || true + + - name: Collect OOT logs and results + if: always() + run: | + docker cp "$CONTAINER_NAME":/tmp/vllm_oot.log ./vllm_oot.log || true + docker cp "$CONTAINER_NAME":/tmp/oot_accuracy_output.txt ./oot_accuracy_output.txt || true + docker cp "$CONTAINER_NAME":/tmp/oot_accuracy_results ./oot_accuracy_results || true + + - name: Upload OOT artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: oot-${{ matrix.model_name }}-artifacts + path: | + vllm_oot.log + oot_accuracy_output.txt + oot_accuracy_results + + - name: Clean up OOT test + if: always() + run: | + docker exec "$CONTAINER_NAME" bash -lc "if [ -f /tmp/vllm_oot.pid ]; then kill \$(cat /tmp/vllm_oot.pid) || true; fi" || true + docker stop "$CONTAINER_NAME" || true + docker rm "$CONTAINER_NAME" || true diff --git a/tests/plugin/test_plugin_config_translation.py b/tests/plugin/test_plugin_config_translation.py new file mode 100644 index 00000000..d85fcf8d --- /dev/null +++ b/tests/plugin/test_plugin_config_translation.py @@ -0,0 +1,82 @@ +import pytest + +import atom.plugin.config as plugin_config + + +class _Obj: + def __init__(self, **kwargs): + for k, v in kwargs.items(): + setattr(self, k, v) + + +class _FakeConfig: + def __init__(self, **kwargs): + for k, v in kwargs.items(): + setattr(self, k, v) + + +class _FakeCompilationConfig: + def __init__(self, level, use_cudagraph, cudagraph_mode): + self.level = level + self.use_cudagraph = use_cudagraph + self.cudagraph_mode = cudagraph_mode + + +def _patch_atom_config_module(monkeypatch): + import atom.config as atom_config_module + + monkeypatch.setattr(atom_config_module, "Config", _FakeConfig, raising=False) + monkeypatch.setattr( + atom_config_module, "CompilationConfig", _FakeCompilationConfig, raising=False + ) + + +def test_generate_from_vllm_translates_core_fields(monkeypatch): + _patch_atom_config_module(monkeypatch) + monkeypatch.setenv("ATOM_DISABLE_VLLM_PLUGIN_ATTENTION", "0") + + vllm_cfg = _Obj( + model_config=_Obj(model="m1", max_model_len=4096), + scheduler_config=_Obj(max_num_batched_tokens=2048, max_num_seqs=8), + cache_config=_Obj( + gpu_memory_utilization=0.5, + block_size=16, + num_gpu_blocks=1024, + cache_dtype="auto", + enable_prefix_caching=True, + ), + parallel_config=_Obj( + rank=1, tensor_parallel_size=2, enable_expert_parallel=False + ), + compilation_config=_Obj(mode=3), + quant_config=_Obj(name="q"), + ) + + cfg = plugin_config._generate_atom_config_from_vllm_config(vllm_cfg) + + assert cfg.model == "m1" + assert cfg.max_num_batched_tokens == 2048 + assert cfg.max_num_seqs == 8 + assert cfg.max_model_len == 4096 + assert cfg.tensor_parallel_size == 2 + assert cfg.enforce_eager is True + assert cfg.compilation_config.level == 3 + assert cfg.plugin_config.is_plugin_mode is True + assert cfg.plugin_config.is_vllm is True + assert cfg.plugin_config.is_sglang is False + assert cfg.plugin_config.vllm_use_atom_attention is True + + +def test_generate_atom_config_requires_plugin_mode(monkeypatch): + import atom.plugin.config as config_module + import atom.plugin as plugin_module + import atom.config as atom_config_module + + monkeypatch.setattr(plugin_module, "is_vllm", lambda: False, raising=False) + monkeypatch.setattr(plugin_module, "is_sglang", lambda: False, raising=False) + monkeypatch.setattr( + atom_config_module, "set_current_atom_config", lambda _cfg: None, raising=False + ) + + with pytest.raises(ValueError, match="running in plugin mode"): + config_module.generate_atom_config_for_plugin_mode(config=None) diff --git a/tests/plugin/test_plugin_env_flags.py b/tests/plugin/test_plugin_env_flags.py new file mode 100644 index 00000000..6ca39018 --- /dev/null +++ b/tests/plugin/test_plugin_env_flags.py @@ -0,0 +1,56 @@ +import importlib +import importlib.util +import sys +import types + +import pytest + + +def test_disable_vllm_plugin_flag_disables_platform(monkeypatch): + # ATOM_DISABLE_VLLM_PLUGIN takes precedence: + # when it is 1, vLLM should not get ATOM platform/attention at all. + for disable_attention in ("0", "1"): + monkeypatch.setenv("ATOM_DISABLE_VLLM_PLUGIN", "1") + monkeypatch.setenv("ATOM_DISABLE_VLLM_PLUGIN_ATTENTION", disable_attention) + + import atom.plugin.vllm.platform as platform_module + import atom.plugin.vllm.register as register_module + + importlib.reload(platform_module) + importlib.reload(register_module) + + assert platform_module.ATOMPlatform is None + assert register_module.register_platform() is None + + +@pytest.mark.skipif( + importlib.util.find_spec("vllm") is None, + reason="vllm is not installed in current test environment", +) +def test_disable_vllm_plugin_attention_fallbacks_to_non_atom_backend(monkeypatch): + rocm_module = types.ModuleType("vllm.platforms.rocm") + + class _RocmPlatform: + @classmethod + def get_attn_backend_cls(cls, selected_backend, attn_selector_config): + return "vllm.default.backend" + + rocm_module.RocmPlatform = _RocmPlatform + + monkeypatch.setitem(sys.modules, "vllm", types.ModuleType("vllm")) + monkeypatch.setitem( + sys.modules, "vllm.platforms", types.ModuleType("vllm.platforms") + ) + monkeypatch.setitem(sys.modules, "vllm.platforms.rocm", rocm_module) + monkeypatch.setenv("ATOM_DISABLE_VLLM_PLUGIN", "0") + monkeypatch.setenv("ATOM_DISABLE_VLLM_PLUGIN_ATTENTION", "1") + + import atom.plugin.vllm.platform as platform_module + + importlib.reload(platform_module) + + result = platform_module.ATOMPlatform.get_attn_backend_cls( + selected_backend="x", + attn_selector_config=types.SimpleNamespace(use_mla=True), + ) + assert result == "vllm.default.backend" diff --git a/tests/plugin/test_plugin_mode_status.py b/tests/plugin/test_plugin_mode_status.py new file mode 100644 index 00000000..6ce6e2e3 --- /dev/null +++ b/tests/plugin/test_plugin_mode_status.py @@ -0,0 +1,36 @@ +import pytest + +from atom.plugin import prepare as plugin_prepare + + +@pytest.fixture(autouse=True) +def _reset_framework_state(): + # Autouse fixture: pytest runs this before/after every test. + plugin_prepare._set_framework_backbone("atom") + yield + plugin_prepare._set_framework_backbone("atom") + + +def test_default_mode_is_server_mode(): + assert plugin_prepare.is_plugin_mode() is False + assert plugin_prepare.is_vllm() is False + assert plugin_prepare.is_sglang() is False + + +def test_set_framework_to_vllm(): + plugin_prepare._set_framework_backbone("vllm") + assert plugin_prepare.is_plugin_mode() is True + assert plugin_prepare.is_vllm() is True + assert plugin_prepare.is_sglang() is False + + +def test_set_framework_to_sgl_alias(): + plugin_prepare._set_framework_backbone("sgl") + assert plugin_prepare.is_plugin_mode() is True + assert plugin_prepare.is_vllm() is False + assert plugin_prepare.is_sglang() is True + + +def test_set_framework_unsupported_raises(): + with pytest.raises(ValueError, match="Unsupported framework"): + plugin_prepare._set_framework_backbone("tensorflow") diff --git a/tests/plugin/test_plugin_registries.py b/tests/plugin/test_plugin_registries.py new file mode 100644 index 00000000..79dbe323 --- /dev/null +++ b/tests/plugin/test_plugin_registries.py @@ -0,0 +1,57 @@ +import sys +import types +import importlib +import importlib.util + +import pytest + +from atom.plugin import prepare as plugin_prepare +import atom.plugin.vllm.register as vllm_register + + +@pytest.fixture(autouse=True) +def _reset_framework_state(): + plugin_prepare._set_framework_backbone("atom") + yield + plugin_prepare._set_framework_backbone("atom") + + +@pytest.mark.skipif( + importlib.util.find_spec("vllm") is None, + reason="vllm is not installed in current test environment", +) +def test_register_platform_returns_oot_platform(monkeypatch): + rocm_module = types.ModuleType("vllm.platforms.rocm") + + class _RocmPlatform: + pass + + rocm_module.RocmPlatform = _RocmPlatform + vllm_platforms = types.ModuleType("vllm.platforms") + vllm_platforms.current_platform = None + + monkeypatch.setitem(sys.modules, "vllm", types.ModuleType("vllm")) + monkeypatch.setitem(sys.modules, "vllm.platforms", vllm_platforms) + monkeypatch.setitem(sys.modules, "vllm.platforms.rocm", rocm_module) + + monkeypatch.setenv("ATOM_DISABLE_VLLM_PLUGIN", "0") + monkeypatch.setenv("ATOM_DISABLE_VLLM_PLUGIN_ATTENTION", "0") + + import atom.plugin.vllm.platform as platform_module + + importlib.reload(platform_module) + importlib.reload(vllm_register) + + platform_path = vllm_register.register_platform() + module_name, class_name = platform_path.rsplit(".", 1) + vllm_platforms.current_platform = getattr( + importlib.import_module(module_name), class_name + ) + + # get current platform from vllm side and validate it is ATOM platform. + assert vllm_platforms.current_platform is platform_module.ATOMPlatform + + +def test_register_platform_can_be_disabled(monkeypatch): + monkeypatch.setattr(vllm_register, "disable_vllm_plugin", True, raising=False) + assert vllm_register.register_platform() is None diff --git a/tests/plugin/test_plugin_unsupported_models.py b/tests/plugin/test_plugin_unsupported_models.py new file mode 100644 index 00000000..0419d4a3 --- /dev/null +++ b/tests/plugin/test_plugin_unsupported_models.py @@ -0,0 +1,23 @@ +import importlib.util +import importlib +import sys +import types + +import pytest + + +# FIXME: remove it later when enabling fallback for unsupported models +@pytest.mark.skipif( + importlib.util.find_spec("vllm") is None, + reason="vllm is not installed in current test environment", +) +def test_vllm_wrapper_rejects_unsupported_model_arch(monkeypatch): + # Avoid importing deep model-loader dependencies during test collection/import. + fake_loader = types.ModuleType("atom.model_loader.loader") + fake_loader.load_model_in_plugin_mode = lambda **kwargs: set() + monkeypatch.setitem(sys.modules, "atom.model_loader.loader", fake_loader) + + model_wrapper = importlib.import_module("atom.plugin.vllm.model_wrapper") + + with pytest.raises(ValueError, match="not supported by ATOM OOT backend"): + model_wrapper._get_atom_model_cls("UnknownModelForCausalLM") diff --git a/tests/plugin/test_plugin_vllm_import_paths.py b/tests/plugin/test_plugin_vllm_import_paths.py new file mode 100644 index 00000000..523e0798 --- /dev/null +++ b/tests/plugin/test_plugin_vllm_import_paths.py @@ -0,0 +1,85 @@ +import importlib.util + +import pytest + + +@pytest.mark.skipif( + importlib.util.find_spec("vllm") is None, + reason="vllm is not installed in current test environment", +) +def test_vllm_import_paths_guardrail(): + """Guardrail for OOT vLLM import paths used by ATOM plugin mode.""" + # attention.py / paged_attention.py (new path with legacy fallback) + try: + from vllm.attention.layer import Attention, MLAAttention, AttentionType + except ImportError: + from vllm.model_executor.layers.attention import Attention, MLAAttention + from vllm.v1.attention.backend import AttentionType + + # attention.py + from vllm.config import ( + VllmConfig, + get_current_vllm_config, + get_layers_from_vllm_config, + ) + from vllm.model_executor.layers.attention.mla_attention import ( + MLACommonMetadataBuilder, + QueryLenSupport, + ) + from vllm.utils.math_utils import cdiv, round_down + from vllm.v1.attention.backend import AttentionCGSupport, AttentionMetadataBuilder + from vllm.v1.attention.backends.utils import ( + get_dcp_local_seq_lens, + split_decodes_and_prefills, + split_decodes_prefills_and_extends, + ) + from vllm.v1.attention.ops.common import cp_lse_ag_out_rs + from vllm.v1.attention.ops.merge_attn_states import merge_attn_states + + # model_wrapper.py (core vLLM model interfaces) + from vllm.model_executor.models.interfaces import SupportsPP, SupportsQuant + from vllm.model_executor.models.interfaces_base import ( + VllmModel, + VllmModelForTextGeneration, + ) + from vllm.model_executor.models.registry import ModelRegistry + from vllm.sequence import IntermediateTensors + + # attention_mla.py / platform.py / register.py + from vllm import _custom_ops + from vllm.distributed.parallel_state import get_dcp_group + from vllm.platforms import current_platform + from vllm.platforms.rocm import RocmPlatform + + assert all( + obj is not None + for obj in [ + Attention, + MLAAttention, + AttentionType, + QueryLenSupport, + MLACommonMetadataBuilder, + cdiv, + round_down, + AttentionCGSupport, + AttentionMetadataBuilder, + get_dcp_local_seq_lens, + split_decodes_and_prefills, + split_decodes_prefills_and_extends, + cp_lse_ag_out_rs, + merge_attn_states, + VllmConfig, + get_current_vllm_config, + get_layers_from_vllm_config, + SupportsPP, + SupportsQuant, + VllmModel, + VllmModelForTextGeneration, + ModelRegistry, + IntermediateTensors, + _custom_ops, + get_dcp_group, + current_platform, + RocmPlatform, + ] + ) From 50e59403386fad73b588039ea1b0bbcc3a14a427 Mon Sep 17 00:00:00 2001 From: zejunchen-zejun Date: Wed, 11 Mar 2026 22:54:31 +0800 Subject: [PATCH 02/15] add Signed-off-by: zejunchen-zejun --- .github/workflows/atom-test.yaml | 243 ------------- .../workflows/atom-vllm-oot-full-test.yaml | 319 ++++++++++++++++ .github/workflows/atom-vllm-oot-test.yaml | 343 ++++++++++++++++++ 3 files changed, 662 insertions(+), 243 deletions(-) create mode 100644 .github/workflows/atom-vllm-oot-full-test.yaml create mode 100644 .github/workflows/atom-vllm-oot-test.yaml diff --git a/.github/workflows/atom-test.yaml b/.github/workflows/atom-test.yaml index 3c061ca3..dfa4a0b9 100644 --- a/.github/workflows/atom-test.yaml +++ b/.github/workflows/atom-test.yaml @@ -471,246 +471,3 @@ jobs: # Remove the pre-built image to free disk space on the runner docker rmi "rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }}" || true - atom-vllm-oot: - needs: [pre-checks, build_atom_image] - if: ${{ needs.pre-checks.result == 'success' && needs.build_atom_image.result == 'success' && (!github.event.pull_request || github.event.pull_request.draft == false) }} - name: ATOM vLLM OOT Test - strategy: - fail-fast: false - matrix: - include: - # Keep CI runtime under control: enable only one OOT model for now. - - model_name: "Kimi-K2-Thinking-MXFP4" - model_path: "amd/Kimi-K2-Thinking-MXFP4" - accuracy_test_threshold: "0.90" - runner: atom-mi355-8gpu.predownload - runs-on: ${{ matrix.runner }} - timeout-minutes: 180 - env: - CONTAINER_NAME: atom_vllm_oot_${{ strategy.job-index }} - OOT_IMAGE_TAG: atom_vllm_oot_test:${{ github.sha }}-${{ strategy.job-index }} - VLLM_COMMIT: b31e9326a7d9394aab8c767f8ebe225c65594b60 - VLLM_VERSION: "0.17" - - steps: - - name: Clean up containers and workspace - run: | - echo "=== Cleaning up containers on $(hostname) ===" - containers=$(docker ps -q) - if [ -n "$containers" ]; then - docker kill $containers || true - fi - docker rm -f "$CONTAINER_NAME" 2>/dev/null || true - docker run --rm -v "${GITHUB_WORKSPACE:-$PWD}":/workspace -w /workspace --privileged rocm/pytorch:latest bash -lc "ls -la /workspace/ && rm -rf /workspace/*" || true - - - name: Checkout ATOM repo - uses: actions/checkout@v4 - - - name: Docker Login - if: ${{ !github.event.pull_request.head.repo.fork }} - run: | - docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }} - - - name: Prepare OOT base image for forked repo - if: ${{ github.event.pull_request.head.repo.fork }} - run: | - cat < Dockerfile.mod - FROM ${{ env.ATOM_BASE_NIGTHLY_IMAGE }} - RUN pip install -U lm-eval[api] - RUN pip show lm-eval || true - RUN pip install hf_transfer - RUN pip show hf_transfer || true - RUN echo "=== Aiter version BEFORE uninstall ===" && pip show amd-aiter || true - RUN pip uninstall -y amd-aiter - RUN pip install --upgrade "pybind11>=3.0.1" - RUN pip show pybind11 - RUN rm -rf /app/aiter-test - RUN git clone https://github.com/ROCm/aiter.git /app/aiter-test && \\ - cd /app/aiter-test && \\ - git checkout HEAD && \\ - git submodule sync && git submodule update --init --recursive && \\ - MAX_JOBS=64 PREBUILD_KERNELS=0 GPU_ARCHS=gfx950 python3 setup.py develop - RUN echo "=== Aiter version AFTER installation ===" && pip show amd-aiter || true - RUN echo "=== ATOM version BEFORE uninstall ===" && pip show atom || true - RUN pip uninstall -y atom - RUN rm -rf /app/ATOM - RUN git clone ${{ env.GITHUB_REPO_URL }} /app/ATOM && \\ - cd /app/ATOM && \\ - git checkout ${{ env.GITHUB_COMMIT_SHA }} && \\ - pip install -e . - RUN echo "=== ATOM version AFTER installation ===" && pip show atom || true - EOF - - docker build --pull --network=host \ - --no-cache \ - -t atom_oot_base:ci \ - -f Dockerfile.mod . - echo "OOT_BASE_IMAGE=atom_oot_base:ci" >> "$GITHUB_ENV" - - - name: Select OOT base image from pre-built ATOM image - if: ${{ !github.event.pull_request.head.repo.fork }} - run: | - echo "OOT_BASE_IMAGE=rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }}" >> "$GITHUB_ENV" - - - name: Build OOT vLLM image - run: | - chmod +x docker/plugin/build_OOT_vLLM.sh - IMAGE_TAG="${OOT_IMAGE_TAG}" \ - BASE_IMAGE="${OOT_BASE_IMAGE}" \ - VLLM_COMMIT="${VLLM_COMMIT}" \ - VLLM_VERSION="${VLLM_VERSION}" \ - INSTALL_LM_EVAL=1 \ - BUILD_NO_CACHE=1 \ - docker/plugin/build_OOT_vLLM.sh - - - name: Start OOT test container - run: | - if [ -f "/etc/podinfo/gha-render-devices" ]; then - DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices) - else - DEVICE_FLAG="--device /dev/dri" - fi - - if [ -d "/models" ]; then - MODEL_MOUNT="-v /models:/models" - else - echo "Warning: /models directory not found on runner; skipping /models mount." - MODEL_MOUNT="" - fi - - docker run -dt --device=/dev/kfd $DEVICE_FLAG \ - -v "${GITHUB_WORKSPACE:-$PWD}":/workspace \ - $MODEL_MOUNT \ - -w /workspace \ - --ipc=host --group-add video \ - --shm-size=16G \ - --privileged \ - --cap-add=SYS_PTRACE \ - -e HF_TOKEN="${HF_TOKEN:-}" \ - --security-opt seccomp=unconfined \ - --ulimit memlock=-1 \ - --ulimit stack=67108864 \ - -v "${{ github.workspace }}:/workspace" \ - -w /workspace \ - --name "$CONTAINER_NAME" \ - "$OOT_IMAGE_TAG" - env: - GITHUB_WORKSPACE: ${{ github.workspace }} - - - name: Resolve model path - run: | - if [ -f "/models/${{ matrix.model_path }}/config.json" ]; then - echo "MODEL_PATH=/models/${{ matrix.model_path }}" >> "$GITHUB_ENV" - echo "Use pre-downloaded model path: /models/${{ matrix.model_path }}" - else - echo "MODEL_PATH=${{ matrix.model_path }}" >> "$GITHUB_ENV" - echo "Use HuggingFace model path: ${{ matrix.model_path }}" - fi - - - name: Download model if needed - run: | - if [ -d "/models" ] && [ ! -f "/models/${{ matrix.model_path }}/config.json" ]; then - echo "Downloading model to /models/${{ matrix.model_path }}" - docker exec -e HF_TOKEN=${{ secrets.AMD_HF_TOKEN }} "$CONTAINER_NAME" bash -lc "hf download ${{ matrix.model_path }} --local-dir /models/${{ matrix.model_path }}" - else - echo "Skip model download" - fi - - - name: Launch vLLM server with ATOM OOT plugin - run: | - docker exec "$CONTAINER_NAME" bash -lc " - set -euo pipefail - export SAFETENSORS_FAST_GPU=1 - export VLLM_ROCM_USE_AITER=1 - export VLLM_RPC_TIMEOUT=1800000 - export VLLM_CACHE_ROOT=/tmp/.cache/vllm - export TORCHINDUCTOR_CACHE_DIR=/tmp/.cache/inductor - rm -rf /tmp/.cache - - nohup vllm serve \"$MODEL_PATH\" \ - --host 0.0.0.0 \ - --port 8000 \ - --tensor-parallel-size 8 \ - --enable-expert-parallel \ - --trust-remote-code \ - --disable-log-requests \ - --gpu-memory-utilization 0.9 \ - --async-scheduling \ - --load-format fastsafetensors \ - --kv-cache-dtype fp8 \ - --max-model-len 16384 \ - > /tmp/vllm_oot.log 2>&1 & - echo \$! > /tmp/vllm_oot.pid - echo \"Server PID: \$(cat /tmp/vllm_oot.pid)\" - " - - - name: Wait for vLLM readiness - timeout-minutes: 30 - run: | - set -euo pipefail - for i in $(seq 1 60); do - if docker exec "$CONTAINER_NAME" bash -lc "curl -sS http://127.0.0.1:8000/v1/models >/dev/null"; then - echo "vLLM server is ready." - exit 0 - fi - echo "Waiting for server... ($i/60)" - sleep 30 - done - echo "vLLM server did not become ready in time." - docker exec "$CONTAINER_NAME" bash -lc "tail -n 200 /tmp/vllm_oot.log || true" - exit 1 - - - name: Run OOT accuracy test (gsm8k) - timeout-minutes: 45 - run: | - docker exec "$CONTAINER_NAME" bash -lc " - set -euo pipefail - if ! command -v lm_eval >/dev/null 2>&1; then - pip install 'lm-eval[api]' - fi - mkdir -p /tmp/oot_accuracy_results - RESULT_FILE=/tmp/oot_accuracy_results/\$(date +%Y%m%d%H%M%S).json - lm_eval --model local-completions \ - --model_args model=\"$MODEL_PATH\",base_url=http://127.0.0.1:8000/v1/completions,num_concurrent=65,max_retries=1,tokenized_requests=False \ - --tasks gsm8k \ - --num_fewshot 3 \ - --output_path \"\$RESULT_FILE\" 2>&1 | tee /tmp/oot_accuracy_output.txt - echo \"OOT_RESULT_FILE=\$RESULT_FILE\" - " - - - name: Check OOT accuracy threshold - run: | - docker exec "$CONTAINER_NAME" bash -lc " - set -euo pipefail - python -c \"import json, glob; files=sorted(glob.glob('/tmp/oot_accuracy_results/*.json')); assert files, 'ERROR: No OOT accuracy result JSON found.'; threshold=float('${{ matrix.accuracy_test_threshold }}'); result_file=files[-1]; data=json.load(open(result_file)); value=data['results']['gsm8k']['exact_match,flexible-extract']; print('RESULT_FILE:', result_file); print('Flexible extract value:', value); print('Accuracy threshold:', threshold); assert value >= threshold, f'Accuracy test failed: {value} < {threshold}'; print(f'Accuracy test passed: {value} >= {threshold}')\" - " - - - name: Collect OOT accuracy summary - if: success() - run: | - echo "OOT Accuracy Test Summary for ${{ matrix.model_name }}:" >> $GITHUB_STEP_SUMMARY - docker exec "$CONTAINER_NAME" bash -lc "awk '/\|Tasks\|Version\|/,/^$/ { if (NF > 0) print }' /tmp/oot_accuracy_output.txt" >> $GITHUB_STEP_SUMMARY || true - - - name: Collect OOT logs and results - if: always() - run: | - docker cp "$CONTAINER_NAME":/tmp/vllm_oot.log ./vllm_oot.log || true - docker cp "$CONTAINER_NAME":/tmp/oot_accuracy_output.txt ./oot_accuracy_output.txt || true - docker cp "$CONTAINER_NAME":/tmp/oot_accuracy_results ./oot_accuracy_results || true - - - name: Upload OOT artifacts - if: always() - uses: actions/upload-artifact@v4 - with: - name: oot-${{ matrix.model_name }}-artifacts - path: | - vllm_oot.log - oot_accuracy_output.txt - oot_accuracy_results - - - name: Clean up OOT test - if: always() - run: | - docker exec "$CONTAINER_NAME" bash -lc "if [ -f /tmp/vllm_oot.pid ]; then kill \$(cat /tmp/vllm_oot.pid) || true; fi" || true - docker stop "$CONTAINER_NAME" || true - docker rm "$CONTAINER_NAME" || true diff --git a/.github/workflows/atom-vllm-oot-full-test.yaml b/.github/workflows/atom-vllm-oot-full-test.yaml new file mode 100644 index 00000000..9081b47f --- /dev/null +++ b/.github/workflows/atom-vllm-oot-full-test.yaml @@ -0,0 +1,319 @@ +name: ATOM vLLM OOT Validation + +on: + workflow_dispatch: + inputs: + vllm_commit: + description: "vLLM commit to validate" + required: false + type: string + default: "b31e9326a7d9394aab8c767f8ebe225c65594b60" + vllm_version: + description: "vLLM version label in image tag" + required: false + type: string + default: "0.17" + base_image: + description: "ATOM base image for rebuild" + required: false + type: string + default: "rocm/atom-dev:latest" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: false + +env: + BASE_IMAGE: ${{ inputs.base_image || 'rocm/atom-dev:latest' }} + GITHUB_REPO_URL: ${{ github.event.pull_request.head.repo.clone_url || 'https://github.com/ROCm/ATOM.git' }} + GITHUB_COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }} + VALIDATION_IMAGE_REPO: rocm/atom-dev + +jobs: + build-oot-image: + name: Build OOT validation image + runs-on: linux-atom-mi355-1 + outputs: + oot_image_tag: ${{ steps.meta.outputs.oot_image_tag }} + steps: + - name: Checkout ATOM repo + uses: actions/checkout@v4 + + - name: Docker Login + run: | + docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }} + + - name: Build ATOM base image from current commit + run: | + cat < Dockerfile.mod + FROM ${{ env.BASE_IMAGE }} + RUN pip install -U lm-eval[api] + RUN pip show lm-eval || true + RUN pip install hf_transfer + RUN pip show hf_transfer || true + RUN echo "=== Aiter version BEFORE uninstall ===" && pip show amd-aiter || true + RUN pip uninstall -y amd-aiter + RUN pip install --upgrade "pybind11>=3.0.1" + RUN pip show pybind11 + RUN rm -rf /app/aiter-test + RUN git clone --depth 1 https://github.com/ROCm/aiter.git /app/aiter-test && \\ + cd /app/aiter-test && \\ + git checkout HEAD && \\ + git submodule sync && git submodule update --init --recursive && \\ + MAX_JOBS=64 PREBUILD_KERNELS=0 GPU_ARCHS=gfx950 python3 setup.py develop + RUN echo "=== Aiter version AFTER installation ===" && pip show amd-aiter || true + RUN echo "=== ATOM version BEFORE uninstall ===" && pip show atom || true + RUN pip uninstall -y atom + RUN rm -rf /app/ATOM + RUN git clone ${{ env.GITHUB_REPO_URL }} /app/ATOM && \\ + cd /app/ATOM && \\ + git checkout ${{ env.GITHUB_COMMIT_SHA }} && \\ + pip install -e . + RUN echo "=== ATOM version AFTER installation ===" && pip show atom || true + EOF + + docker build --pull --network=host \ + --no-cache \ + -t atom_oot_base:ci \ + -f Dockerfile.mod . + + - name: Build OOT vLLM image from rebuilt ATOM base + id: meta + run: | + chmod +x docker/plugin/build_OOT_vLLM.sh + OOT_IMAGE_TAG="${VALIDATION_IMAGE_REPO}:oot-vllm-validation-${GITHUB_COMMIT_SHA}-${{ github.run_id }}" + IMAGE_TAG="${OOT_IMAGE_TAG}" \ + BASE_IMAGE="atom_oot_base:ci" \ + VLLM_COMMIT="${{ inputs.vllm_commit || 'b31e9326a7d9394aab8c767f8ebe225c65594b60' }}" \ + VLLM_VERSION="${{ inputs.vllm_version || '0.17' }}" \ + INSTALL_LM_EVAL=1 \ + BUILD_NO_CACHE=1 \ + docker/plugin/build_OOT_vLLM.sh + + echo "oot_image_tag=${OOT_IMAGE_TAG}" >> "$GITHUB_OUTPUT" + + - name: Push OOT validation image + run: | + docker push "${{ steps.meta.outputs.oot_image_tag }}" + + plugin-ut: + name: Plugin UT (OOT image) + needs: [build-oot-image] + runs-on: linux-atom-mi355-1 + timeout-minutes: 60 + steps: + - name: Checkout ATOM repo + uses: actions/checkout@v4 + + - name: Docker Login + run: | + docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }} + + - name: Pull built OOT image + run: | + docker pull "${{ needs.build-oot-image.outputs.oot_image_tag }}" + + - name: Run plugin unit tests + run: | + docker run --rm \ + -v "${{ github.workspace }}:/workspace" \ + -w /workspace \ + "${{ needs.build-oot-image.outputs.oot_image_tag }}" \ + bash -lc "pytest -q tests/plugin" + + oot-model-accuracy: + name: OOT Model Accuracy (${{ matrix.model_name }}) + needs: [build-oot-image, plugin-ut] + strategy: + fail-fast: false + matrix: + include: + # This matrix targets model architectures supported by ATOM OOT plugin. + - model_name: "Qwen3 Dense" + model_path: "Qwen/Qwen3-8B" + extra_args: "--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 1" + env_vars: "" + accuracy_test_threshold: "0.70" + runner: linux-atom-mi355-1 + - model_name: "Qwen3 MoE" + model_path: "Qwen/Qwen3-235B-A22B-Instruct-2507-FP8" + extra_args: "--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8 --enable-expert-parallel" + env_vars: "" + accuracy_test_threshold: "0.87" + runner: atom-mi355-8gpu.predownload + - model_name: "DeepSeek-V3 family" + model_path: "deepseek-ai/DeepSeek-R1-0528" + extra_args: "--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8" + env_vars: "" + accuracy_test_threshold: "0.93" + runner: atom-mi355-8gpu.predownload + - model_name: "GPT-OSS" + model_path: "openai/gpt-oss-120b" + extra_args: "--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 2 --enable-dp-attention --enable-expert-parallel --gpu-memory-utilization 0.3" + env_vars: | + ATOM_GPT_OSS_MODEL=1 + accuracy_test_threshold: "0.38" + runner: linux-atom-mi355-4 + - model_name: "Kimi-K2" + model_path: "amd/Kimi-K2-Thinking-MXFP4" + extra_args: "--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8 --enable-expert-parallel" + env_vars: "" + accuracy_test_threshold: "0.90" + runner: atom-mi355-8gpu.predownload + runs-on: ${{ matrix.runner }} + timeout-minutes: 240 + env: + CONTAINER_NAME: atom_vllm_oot_validation_${{ strategy.job-index }} + OOT_IMAGE_TAG: ${{ needs.build-oot-image.outputs.oot_image_tag }} + steps: + - name: Checkout ATOM repo + uses: actions/checkout@v4 + + - name: Docker Login + run: | + docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }} + + - name: Pull built OOT image + run: | + docker pull "${OOT_IMAGE_TAG}" + + - name: Clean up old containers + run: | + containers=$(docker ps -q) + if [ -n "$containers" ]; then + docker kill $containers || true + fi + docker rm -f "$CONTAINER_NAME" 2>/dev/null || true + + - name: Start validation container + run: | + if [ -f "/etc/podinfo/gha-render-devices" ]; then + DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices) + else + DEVICE_FLAG="--device /dev/dri" + fi + + if [ -d "/models" ]; then + MODEL_MOUNT="-v /models:/models" + else + MODEL_MOUNT="" + fi + + cat > /tmp/oot_env_file.txt << 'EOF' + ${{ matrix.env_vars }} + EOF + + docker run -dt --device=/dev/kfd $DEVICE_FLAG \ + -v "${GITHUB_WORKSPACE:-$PWD}":/workspace \ + $MODEL_MOUNT \ + -w /workspace \ + --ipc=host --group-add video \ + --shm-size=16G \ + --privileged \ + --cap-add=SYS_PTRACE \ + --security-opt seccomp=unconfined \ + --ulimit memlock=-1 \ + --ulimit stack=67108864 \ + --env-file /tmp/oot_env_file.txt \ + -e HF_TOKEN="${HF_TOKEN:-}" \ + --name "$CONTAINER_NAME" \ + "${OOT_IMAGE_TAG}" + env: + GITHUB_WORKSPACE: ${{ github.workspace }} + + - name: Resolve and download model + run: | + if [ -f "/models/${{ matrix.model_path }}/config.json" ]; then + echo "MODEL_PATH=/models/${{ matrix.model_path }}" >> "$GITHUB_ENV" + else + echo "MODEL_PATH=${{ matrix.model_path }}" >> "$GITHUB_ENV" + if [ -d "/models" ]; then + docker exec -e HF_TOKEN=${{ secrets.AMD_HF_TOKEN }} "$CONTAINER_NAME" bash -lc "hf download ${{ matrix.model_path }} --local-dir /models/${{ matrix.model_path }}" + echo "MODEL_PATH=/models/${{ matrix.model_path }}" >> "$GITHUB_ENV" + fi + fi + + - name: Launch vLLM server with ATOM OOT plugin + run: | + docker exec "$CONTAINER_NAME" bash -lc " + set -euo pipefail + export SAFETENSORS_FAST_GPU=1 + export VLLM_ROCM_USE_AITER=1 + export VLLM_RPC_TIMEOUT=1800000 + export VLLM_CACHE_ROOT=/tmp/.cache/vllm + export TORCHINDUCTOR_CACHE_DIR=/tmp/.cache/inductor + rm -rf /tmp/.cache + + nohup vllm serve \"$MODEL_PATH\" \ + --host 0.0.0.0 \ + --port 8000 \ + ${{ matrix.extra_args }} \ + > /tmp/vllm_oot.log 2>&1 & + echo \$! > /tmp/vllm_oot.pid + " + + - name: Wait for vLLM readiness + timeout-minutes: 30 + run: | + set -euo pipefail + for i in $(seq 1 60); do + if docker exec "$CONTAINER_NAME" bash -lc "curl -sS http://127.0.0.1:8000/v1/models >/dev/null"; then + echo "vLLM server is ready." + exit 0 + fi + echo "Waiting for server... ($i/60)" + sleep 30 + done + docker exec "$CONTAINER_NAME" bash -lc "tail -n 200 /tmp/vllm_oot.log || true" + exit 1 + + - name: Run gsm8k accuracy + timeout-minutes: 60 + run: | + docker exec "$CONTAINER_NAME" bash -lc " + set -euo pipefail + mkdir -p /tmp/oot_accuracy_results + RESULT_FILE=/tmp/oot_accuracy_results/\$(date +%Y%m%d%H%M%S).json + lm_eval --model local-completions \ + --model_args model=\"$MODEL_PATH\",base_url=http://127.0.0.1:8000/v1/completions,num_concurrent=65,max_retries=1,tokenized_requests=False \ + --tasks gsm8k \ + --num_fewshot 3 \ + --output_path \"\$RESULT_FILE\" 2>&1 | tee /tmp/oot_accuracy_output.txt + " + + - name: Check accuracy threshold + run: | + docker exec "$CONTAINER_NAME" bash -lc " + set -euo pipefail + python -c \"import json, glob; files=sorted(glob.glob('/tmp/oot_accuracy_results/*.json')); assert files, 'No accuracy JSON found'; threshold=float('${{ matrix.accuracy_test_threshold }}'); result_file=files[-1]; data=json.load(open(result_file)); value=data['results']['gsm8k']['exact_match,flexible-extract']; print('RESULT_FILE:', result_file); print('value:', value, 'threshold:', threshold); assert value >= threshold, f'Accuracy failed: {value} < {threshold}'\" + " + + - name: Collect summary + if: success() + run: | + echo "OOT gsm8k summary for ${{ matrix.model_name }}:" >> $GITHUB_STEP_SUMMARY + docker exec "$CONTAINER_NAME" bash -lc "awk '/\|Tasks\|Version\|/,/^$/ { if (NF > 0) print }' /tmp/oot_accuracy_output.txt" >> $GITHUB_STEP_SUMMARY || true + + - name: Collect artifacts + if: always() + run: | + docker cp "$CONTAINER_NAME":/tmp/vllm_oot.log ./vllm_oot.log || true + docker cp "$CONTAINER_NAME":/tmp/oot_accuracy_output.txt ./oot_accuracy_output.txt || true + docker cp "$CONTAINER_NAME":/tmp/oot_accuracy_results ./oot_accuracy_results || true + + - name: Upload model artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: oot-validation-${{ matrix.model_name }}-${{ github.run_id }} + path: | + vllm_oot.log + oot_accuracy_output.txt + oot_accuracy_results + + - name: Cleanup + if: always() + run: | + docker exec "$CONTAINER_NAME" bash -lc "if [ -f /tmp/vllm_oot.pid ]; then kill \$(cat /tmp/vllm_oot.pid) || true; fi" || true + docker stop "$CONTAINER_NAME" || true + docker rm "$CONTAINER_NAME" || true diff --git a/.github/workflows/atom-vllm-oot-test.yaml b/.github/workflows/atom-vllm-oot-test.yaml new file mode 100644 index 00000000..83d8d8de --- /dev/null +++ b/.github/workflows/atom-vllm-oot-test.yaml @@ -0,0 +1,343 @@ +name: ATOM vLLM OOT Test + +on: + push: + branches: [main] + pull_request: + branches: [main] # Triggers on PRs targeting `main` + types: [opened, synchronize, reopened, ready_for_review] + paths-ignore: + - '**/*.md' + - 'docs/**' + - 'LICENSE' + - '.gitignore' + schedule: + # Nightly at 00:00 Beijing time (16:00 UTC) + - cron: '0 16 * * *' + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} + +env: + ATOM_BASE_NIGTHLY_IMAGE: rocm/atom-dev:latest + GITHUB_REPO_URL: ${{ github.event.pull_request.head.repo.clone_url || 'https://github.com/ROCm/ATOM.git' }} + GITHUB_COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.event.head_commit.id }} + +jobs: + pre-checks: + uses: ./.github/workflows/pre-checks.yaml + with: + black: true + ruff: true + + build_atom_image: + if: ${{ needs.pre-checks.result == 'success' && (!github.event.pull_request || github.event.pull_request.draft == false) }} + needs: [pre-checks] + name: Build ATOM image + runs-on: build-only-atom + steps: + - name: Checkout ATOM repo + if: ${{ !github.event.pull_request.head.repo.fork }} + uses: actions/checkout@v4 + + - name: Generate Dockerfile + if: ${{ !github.event.pull_request.head.repo.fork }} + run: | + cat < Dockerfile.mod + FROM ${{ env.ATOM_BASE_NIGTHLY_IMAGE }} + RUN pip install -U lm-eval[api] + RUN pip show lm-eval || true + RUN pip install hf_transfer + RUN pip show hf_transfer || true + RUN echo "=== Aiter version BEFORE uninstall ===" && pip show amd-aiter || true + RUN pip uninstall -y amd-aiter + RUN pip install --upgrade "pybind11>=3.0.1" + RUN pip show pybind11 + RUN wget https://github.com/stedolan/jq/releases/download/jq-1.7/jq-linux64 -O jq + RUN chmod +x jq + RUN mv jq /usr/local/bin/jq + RUN rm -rf /app/aiter-test + RUN git clone --depth 1 https://github.com/ROCm/aiter.git /app/aiter-test && \\ + cd /app/aiter-test && \\ + git checkout HEAD && \\ + git submodule sync && git submodule update --init --recursive && \\ + MAX_JOBS=64 PREBUILD_KERNELS=0 GPU_ARCHS=gfx950 python3 setup.py develop + RUN echo "=== Aiter version AFTER installation ===" && pip show amd-aiter || true + + RUN echo "=== ATOM version BEFORE uninstall ===" && pip show atom || true + RUN pip uninstall -y atom + RUN rm -rf /app/ATOM + RUN git clone ${{ env.GITHUB_REPO_URL }} /app/ATOM && \\ + cd /app/ATOM && \\ + git checkout ${{ env.GITHUB_COMMIT_SHA }} && \\ + pip install -e . + + RUN echo "=== ATOM version AFTER installation ===" && pip show atom || true + EOF + + - name: Build Docker image + if: ${{ !github.event.pull_request.head.repo.fork }} + run: | + docker build --pull --network=host \ + --no-cache \ + -t atom_test:ci \ + -f Dockerfile.mod . + + - name: Push Docker image + if: ${{ !github.event.pull_request.head.repo.fork }} + run: | + IMAGE_TAG=rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }} + docker tag atom_test:ci $IMAGE_TAG + docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }} + docker push $IMAGE_TAG + + - name: Success message + if: ${{ !github.event.pull_request.head.repo.fork }} + run: | + echo "Successfully prepared image: $IMAGE_TAG" + + atom-vllm-oot: + needs: [pre-checks, build_atom_image] + if: ${{ needs.pre-checks.result == 'success' && needs.build_atom_image.result == 'success' && (!github.event.pull_request || github.event.pull_request.draft == false) }} + name: ATOM vLLM OOT Test + strategy: + fail-fast: false + matrix: + include: + # Keep CI runtime under control: enable only one OOT model for now. + - model_name: "Kimi-K2-Thinking-MXFP4" + model_path: "amd/Kimi-K2-Thinking-MXFP4" + accuracy_test_threshold: "0.90" + runner: atom-mi355-8gpu.predownload + runs-on: ${{ matrix.runner }} + timeout-minutes: 180 + env: + CONTAINER_NAME: atom_vllm_oot_${{ strategy.job-index }} + OOT_IMAGE_TAG: atom_vllm_oot_test:${{ github.sha }}-${{ strategy.job-index }} + VLLM_COMMIT: b31e9326a7d9394aab8c767f8ebe225c65594b60 + VLLM_VERSION: "0.17" + + steps: + - name: Clean up containers and workspace + run: | + echo "=== Cleaning up containers on $(hostname) ===" + containers=$(docker ps -q) + if [ -n "$containers" ]; then + docker kill $containers || true + fi + docker rm -f "$CONTAINER_NAME" 2>/dev/null || true + docker run --rm -v "${GITHUB_WORKSPACE:-$PWD}":/workspace -w /workspace --privileged rocm/pytorch:latest bash -lc "ls -la /workspace/ && rm -rf /workspace/*" || true + + - name: Checkout ATOM repo + uses: actions/checkout@v4 + + - name: Docker Login + if: ${{ !github.event.pull_request.head.repo.fork }} + run: | + docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }} + + - name: Prepare OOT base image for forked repo + if: ${{ github.event.pull_request.head.repo.fork }} + run: | + cat < Dockerfile.mod + FROM ${{ env.ATOM_BASE_NIGTHLY_IMAGE }} + RUN pip install -U lm-eval[api] + RUN pip show lm-eval || true + RUN pip install hf_transfer + RUN pip show hf_transfer || true + RUN echo "=== Aiter version BEFORE uninstall ===" && pip show amd-aiter || true + RUN pip uninstall -y amd-aiter + RUN pip install --upgrade "pybind11>=3.0.1" + RUN pip show pybind11 + RUN rm -rf /app/aiter-test + RUN git clone https://github.com/ROCm/aiter.git /app/aiter-test && \\ + cd /app/aiter-test && \\ + git checkout HEAD && \\ + git submodule sync && git submodule update --init --recursive && \\ + MAX_JOBS=64 PREBUILD_KERNELS=0 GPU_ARCHS=gfx950 python3 setup.py develop + RUN echo "=== Aiter version AFTER installation ===" && pip show amd-aiter || true + RUN echo "=== ATOM version BEFORE uninstall ===" && pip show atom || true + RUN pip uninstall -y atom + RUN rm -rf /app/ATOM + RUN git clone ${{ env.GITHUB_REPO_URL }} /app/ATOM && \\ + cd /app/ATOM && \\ + git checkout ${{ env.GITHUB_COMMIT_SHA }} && \\ + pip install -e . + RUN echo "=== ATOM version AFTER installation ===" && pip show atom || true + EOF + + docker build --pull --network=host \ + --no-cache \ + -t atom_oot_base:ci \ + -f Dockerfile.mod . + echo "OOT_BASE_IMAGE=atom_oot_base:ci" >> "$GITHUB_ENV" + + - name: Select OOT base image from pre-built ATOM image + if: ${{ !github.event.pull_request.head.repo.fork }} + run: | + echo "OOT_BASE_IMAGE=rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }}" >> "$GITHUB_ENV" + + - name: Build OOT vLLM image + run: | + chmod +x docker/plugin/build_OOT_vLLM.sh + IMAGE_TAG="${OOT_IMAGE_TAG}" \ + BASE_IMAGE="${OOT_BASE_IMAGE}" \ + VLLM_COMMIT="${VLLM_COMMIT}" \ + VLLM_VERSION="${VLLM_VERSION}" \ + INSTALL_LM_EVAL=1 \ + BUILD_NO_CACHE=1 \ + docker/plugin/build_OOT_vLLM.sh + + - name: Start OOT test container + run: | + if [ -f "/etc/podinfo/gha-render-devices" ]; then + DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices) + else + DEVICE_FLAG="--device /dev/dri" + fi + + if [ -d "/models" ]; then + MODEL_MOUNT="-v /models:/models" + else + echo "Warning: /models directory not found on runner; skipping /models mount." + MODEL_MOUNT="" + fi + + docker run -dt --device=/dev/kfd $DEVICE_FLAG \ + -v "${GITHUB_WORKSPACE:-$PWD}":/workspace \ + $MODEL_MOUNT \ + -w /workspace \ + --ipc=host --group-add video \ + --shm-size=16G \ + --privileged \ + --cap-add=SYS_PTRACE \ + -e HF_TOKEN="${HF_TOKEN:-}" \ + --security-opt seccomp=unconfined \ + --ulimit memlock=-1 \ + --ulimit stack=67108864 \ + -v "${{ github.workspace }}:/workspace" \ + -w /workspace \ + --name "$CONTAINER_NAME" \ + "$OOT_IMAGE_TAG" + env: + GITHUB_WORKSPACE: ${{ github.workspace }} + + - name: Resolve model path + run: | + if [ -f "/models/${{ matrix.model_path }}/config.json" ]; then + echo "MODEL_PATH=/models/${{ matrix.model_path }}" >> "$GITHUB_ENV" + echo "Use pre-downloaded model path: /models/${{ matrix.model_path }}" + else + echo "MODEL_PATH=${{ matrix.model_path }}" >> "$GITHUB_ENV" + echo "Use HuggingFace model path: ${{ matrix.model_path }}" + fi + + - name: Download model if needed + run: | + if [ -d "/models" ] && [ ! -f "/models/${{ matrix.model_path }}/config.json" ]; then + echo "Downloading model to /models/${{ matrix.model_path }}" + docker exec -e HF_TOKEN=${{ secrets.AMD_HF_TOKEN }} "$CONTAINER_NAME" bash -lc "hf download ${{ matrix.model_path }} --local-dir /models/${{ matrix.model_path }}" + else + echo "Skip model download" + fi + + - name: Launch vLLM server with ATOM OOT plugin + run: | + docker exec "$CONTAINER_NAME" bash -lc " + set -euo pipefail + export SAFETENSORS_FAST_GPU=1 + export VLLM_ROCM_USE_AITER=1 + export VLLM_RPC_TIMEOUT=1800000 + export VLLM_CACHE_ROOT=/tmp/.cache/vllm + export TORCHINDUCTOR_CACHE_DIR=/tmp/.cache/inductor + rm -rf /tmp/.cache + + nohup vllm serve \"$MODEL_PATH\" \ + --host 0.0.0.0 \ + --port 8000 \ + --tensor-parallel-size 8 \ + --enable-expert-parallel \ + --trust-remote-code \ + --disable-log-requests \ + --gpu-memory-utilization 0.9 \ + --async-scheduling \ + --load-format fastsafetensors \ + --kv-cache-dtype fp8 \ + --max-model-len 16384 \ + > /tmp/vllm_oot.log 2>&1 & + echo \$! > /tmp/vllm_oot.pid + echo \"Server PID: \$(cat /tmp/vllm_oot.pid)\" + " + + - name: Wait for vLLM readiness + timeout-minutes: 30 + run: | + set -euo pipefail + for i in $(seq 1 60); do + if docker exec "$CONTAINER_NAME" bash -lc "curl -sS http://127.0.0.1:8000/v1/models >/dev/null"; then + echo "vLLM server is ready." + exit 0 + fi + echo "Waiting for server... ($i/60)" + sleep 30 + done + echo "vLLM server did not become ready in time." + docker exec "$CONTAINER_NAME" bash -lc "tail -n 200 /tmp/vllm_oot.log || true" + exit 1 + + - name: Run OOT accuracy test (gsm8k) + timeout-minutes: 45 + run: | + docker exec "$CONTAINER_NAME" bash -lc " + set -euo pipefail + if ! command -v lm_eval >/dev/null 2>&1; then + pip install 'lm-eval[api]' + fi + mkdir -p /tmp/oot_accuracy_results + RESULT_FILE=/tmp/oot_accuracy_results/\$(date +%Y%m%d%H%M%S).json + lm_eval --model local-completions \ + --model_args model=\"$MODEL_PATH\",base_url=http://127.0.0.1:8000/v1/completions,num_concurrent=65,max_retries=1,tokenized_requests=False \ + --tasks gsm8k \ + --num_fewshot 3 \ + --output_path \"\$RESULT_FILE\" 2>&1 | tee /tmp/oot_accuracy_output.txt + echo \"OOT_RESULT_FILE=\$RESULT_FILE\" + " + + - name: Check OOT accuracy threshold + run: | + docker exec "$CONTAINER_NAME" bash -lc " + set -euo pipefail + python -c \"import json, glob; files=sorted(glob.glob('/tmp/oot_accuracy_results/*.json')); assert files, 'ERROR: No OOT accuracy result JSON found.'; threshold=float('${{ matrix.accuracy_test_threshold }}'); result_file=files[-1]; data=json.load(open(result_file)); value=data['results']['gsm8k']['exact_match,flexible-extract']; print('RESULT_FILE:', result_file); print('Flexible extract value:', value); print('Accuracy threshold:', threshold); assert value >= threshold, f'Accuracy test failed: {value} < {threshold}'; print(f'Accuracy test passed: {value} >= {threshold}')\" + " + + - name: Collect OOT accuracy summary + if: success() + run: | + echo "OOT Accuracy Test Summary for ${{ matrix.model_name }}:" >> $GITHUB_STEP_SUMMARY + docker exec "$CONTAINER_NAME" bash -lc "awk '/\|Tasks\|Version\|/,/^$/ { if (NF > 0) print }' /tmp/oot_accuracy_output.txt" >> $GITHUB_STEP_SUMMARY || true + + - name: Collect OOT logs and results + if: always() + run: | + docker cp "$CONTAINER_NAME":/tmp/vllm_oot.log ./vllm_oot.log || true + docker cp "$CONTAINER_NAME":/tmp/oot_accuracy_output.txt ./oot_accuracy_output.txt || true + docker cp "$CONTAINER_NAME":/tmp/oot_accuracy_results ./oot_accuracy_results || true + + - name: Upload OOT artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: oot-${{ matrix.model_name }}-artifacts + path: | + vllm_oot.log + oot_accuracy_output.txt + oot_accuracy_results + + - name: Clean up OOT test + if: always() + run: | + docker exec "$CONTAINER_NAME" bash -lc "if [ -f /tmp/vllm_oot.pid ]; then kill \$(cat /tmp/vllm_oot.pid) || true; fi" || true + docker stop "$CONTAINER_NAME" || true + docker rm "$CONTAINER_NAME" || true From a6567a8c1a421f1f500965ffd248a9ac4cc0d87e Mon Sep 17 00:00:00 2001 From: zejunchen-zejun Date: Thu, 12 Mar 2026 09:36:24 +0800 Subject: [PATCH 03/15] add Signed-off-by: zejunchen-zejun --- .github/workflows/atom-vllm-oot-test.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/atom-vllm-oot-test.yaml b/.github/workflows/atom-vllm-oot-test.yaml index 83d8d8de..24e17810 100644 --- a/.github/workflows/atom-vllm-oot-test.yaml +++ b/.github/workflows/atom-vllm-oot-test.yaml @@ -181,12 +181,18 @@ jobs: - name: Build OOT vLLM image run: | + if [ "${{ github.event_name }}" = "pull_request" ] && [ "${{ github.event.pull_request.head.repo.fork }}" = "true" ]; then + pull_base_image=0 + else + pull_base_image=1 + fi chmod +x docker/plugin/build_OOT_vLLM.sh IMAGE_TAG="${OOT_IMAGE_TAG}" \ BASE_IMAGE="${OOT_BASE_IMAGE}" \ VLLM_COMMIT="${VLLM_COMMIT}" \ VLLM_VERSION="${VLLM_VERSION}" \ INSTALL_LM_EVAL=1 \ + PULL_BASE_IMAGE="${pull_base_image}" \ BUILD_NO_CACHE=1 \ docker/plugin/build_OOT_vLLM.sh From 21f66f66c38f3cf7bf8cdb3c50c503a1ccc5692a Mon Sep 17 00:00:00 2001 From: zejunchen-zejun Date: Thu, 12 Mar 2026 11:25:57 +0800 Subject: [PATCH 04/15] add Signed-off-by: zejunchen-zejun --- .github/scripts/atom_oot_test.sh | 232 ++++++++++++++++++ .github/workflows/atom-test.yaml | 6 +- .../workflows/atom-vllm-oot-full-test.yaml | 67 +---- .github/workflows/atom-vllm-oot-test.yaml | 212 +++++++++------- tests/plugin/test_plugin_env_flags.py | 6 +- 5 files changed, 371 insertions(+), 152 deletions(-) create mode 100644 .github/scripts/atom_oot_test.sh diff --git a/.github/scripts/atom_oot_test.sh b/.github/scripts/atom_oot_test.sh new file mode 100644 index 00000000..12143436 --- /dev/null +++ b/.github/scripts/atom_oot_test.sh @@ -0,0 +1,232 @@ +#!/bin/bash +set -euo pipefail + +# Usage: +# .github/scripts/atom_oot_test.sh launch [model_name] +# .github/scripts/atom_oot_test.sh accuracy [model_name] +# +# TYPE: +# launch - launch vLLM server and wait until ready +# accuracy - run gsm8k accuracy test (and threshold check) +# +# MODE: +# ci - only Kimi-K2 +# full - all OOT-supported models +# +# Optional model_name can be used to run a single model in full mode. + +TYPE=${1:-launch} +MODE=${2:-ci} +SELECTED_MODEL=${3:-} + +if [[ "$TYPE" != "launch" && "$TYPE" != "accuracy" ]]; then + echo "Invalid TYPE: $TYPE. Expected: launch or accuracy" + exit 2 +fi + +if [[ "$MODE" != "ci" && "$MODE" != "full" ]]; then + echo "Invalid MODE: $MODE. Expected: ci or full" + exit 2 +fi + +MAX_WAIT_RETRIES=${MAX_WAIT_RETRIES:-60} +WAIT_INTERVAL_SEC=${WAIT_INTERVAL_SEC:-30} +VLLM_PORT=${VLLM_PORT:-8000} +VLLM_HOST=${VLLM_HOST:-0.0.0.0} +VLLM_PID_FILE=${VLLM_PID_FILE:-/tmp/vllm_oot.pid} +VLLM_LOG_FILE=${VLLM_LOG_FILE:-/tmp/vllm_oot.log} +RESULT_DIR=${RESULT_DIR:-/tmp/oot_accuracy_results} +ACCURACY_LOG_FILE=${ACCURACY_LOG_FILE:-/tmp/oot_accuracy_output.txt} + +# Format: +# MODEL_NAME|MODEL_PATH|EXTRA_ARGS|THRESHOLD +CI_MODE_MODELS=( + "Kimi-K2|amd/Kimi-K2-Thinking-MXFP4|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8 --enable-expert-parallel|0.90" +) + +FULL_MODE_MODELS=( + "Qwen3 Dense|Qwen/Qwen3-8B|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 1|0.70" + "Qwen3 MoE|Qwen/Qwen3-235B-A22B-Instruct-2507-FP8|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8 --enable-expert-parallel|0.87" + "DeepSeek-V3 family|deepseek-ai/DeepSeek-R1-0528|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8|0.93" + "GPT-OSS|openai/gpt-oss-120b|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 2 --enable-dp-attention --enable-expert-parallel --gpu-memory-utilization 0.3|0.38" + "Kimi-K2|amd/Kimi-K2-Thinking-MXFP4|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8 --enable-expert-parallel|0.90" +) + +declare -a ACTIVE_MODELS=() +if [[ "$MODE" == "ci" ]]; then + ACTIVE_MODELS=("${CI_MODE_MODELS[@]}") +else + ACTIVE_MODELS=("${FULL_MODE_MODELS[@]}") +fi + +resolve_model_path() { + local model_path="$1" + if [[ -f "/models/${model_path}/config.json" ]]; then + echo "/models/${model_path}" + else + echo "${model_path}" + fi +} + +wait_server_ready() { + local model_name="$1" + echo "" + echo "========== Waiting for vLLM server (${model_name}) ==========" + for ((i=1; i<=MAX_WAIT_RETRIES; i++)); do + if curl -sS "http://127.0.0.1:${VLLM_PORT}/v1/models" >/dev/null; then + echo "vLLM server is ready for ${model_name}." + return 0 + fi + + if [[ -f "${VLLM_PID_FILE}" ]]; then + local pid + pid=$(cat "${VLLM_PID_FILE}") + if ! kill -0 "${pid}" 2>/dev/null; then + echo "vLLM process exited early for ${model_name}." + tail -n 200 "${VLLM_LOG_FILE}" || true + return 1 + fi + fi + + echo "Waiting for vLLM server... (${i}/${MAX_WAIT_RETRIES})" + sleep "${WAIT_INTERVAL_SEC}" + done + + echo "vLLM server did not become ready in time for ${model_name}." + tail -n 200 "${VLLM_LOG_FILE}" || true + return 1 +} + +stop_server() { + if [[ -f "${VLLM_PID_FILE}" ]]; then + local pid + pid=$(cat "${VLLM_PID_FILE}") + kill "${pid}" 2>/dev/null || true + rm -f "${VLLM_PID_FILE}" || true + fi +} + +launch_one_model() { + local model_name="$1" + local model_path="$2" + local extra_args="$3" + + local resolved_model_path + resolved_model_path=$(resolve_model_path "${model_path}") + + echo "" + echo "========== Launching vLLM server ==========" + echo "Model name: ${model_name}" + echo "Model path: ${resolved_model_path}" + echo "Extra args: ${extra_args}" + + export SAFETENSORS_FAST_GPU=1 + export VLLM_ROCM_USE_AITER=1 + export VLLM_RPC_TIMEOUT=1800000 + export VLLM_CACHE_ROOT=/tmp/.cache/vllm + export TORCHINDUCTOR_CACHE_DIR=/tmp/.cache/inductor + rm -rf /tmp/.cache + + rm -f "${VLLM_PID_FILE}" || true + + nohup vllm serve "${resolved_model_path}" \ + --host "${VLLM_HOST}" \ + --port "${VLLM_PORT}" \ + --disable-log-requests \ + --async-scheduling \ + --load-format fastsafetensors \ + --max-model-len 16384 \ + ${extra_args} \ + > "${VLLM_LOG_FILE}" 2>&1 & + echo $! > "${VLLM_PID_FILE}" + echo "Server PID: $(cat "${VLLM_PID_FILE}")" + + wait_server_ready "${model_name}" +} + +accuracy_one_model() { + local model_name="$1" + local model_path="$2" + local extra_args="$3" + local threshold="$4" + + local resolved_model_path + resolved_model_path=$(resolve_model_path "${model_path}") + + if ! command -v lm_eval >/dev/null 2>&1; then + echo "========== Installing lm-eval ==========" + pip install 'lm-eval[api]' + fi + + mkdir -p "${RESULT_DIR}" + local result_file="${RESULT_DIR}/$(date +%Y%m%d%H%M%S)_${model_name// /_}.json" + + echo "" + echo "========== Running OOT gsm8k accuracy ==========" + echo "Model name: ${model_name}" + echo "Threshold: ${threshold}" + + lm_eval --model local-completions \ + --model_args model="${resolved_model_path}",base_url="http://127.0.0.1:${VLLM_PORT}/v1/completions",num_concurrent=65,max_retries=1,tokenized_requests=False \ + --tasks gsm8k \ + --num_fewshot 3 \ + --output_path "${result_file}" 2>&1 | tee -a "${ACCURACY_LOG_FILE}" + + local value + value=$(python - <= threshold, f"Accuracy failed: {value} < {threshold}" +print(f"Accuracy passed: {value} >= {threshold}") +PY +} + +run_for_models() { + local action="$1" + local matched=0 + + for entry in "${ACTIVE_MODELS[@]}"; do + IFS='|' read -r model_name model_path extra_args threshold <<< "${entry}" + + if [[ -n "${SELECTED_MODEL}" && "${SELECTED_MODEL}" != "${model_name}" ]]; then + continue + fi + matched=1 + + if [[ "${action}" == "launch" ]]; then + launch_one_model "${model_name}" "${model_path}" "${extra_args}" + break + fi + + # accuracy mode: launch + evaluate each selected model, then stop server. + launch_one_model "${model_name}" "${model_path}" "${extra_args}" + accuracy_one_model "${model_name}" "${model_path}" "${extra_args}" "${threshold}" + stop_server + done + + if [[ "${matched}" -eq 0 ]]; then + echo "No model matched MODE=${MODE}, SELECTED_MODEL=${SELECTED_MODEL}" + exit 2 + fi +} + +trap 'stop_server' EXIT + +if [[ "${TYPE}" == "launch" ]]; then + run_for_models "launch" +else + run_for_models "accuracy" +fi + diff --git a/.github/workflows/atom-test.yaml b/.github/workflows/atom-test.yaml index dfa4a0b9..51c19325 100644 --- a/.github/workflows/atom-test.yaml +++ b/.github/workflows/atom-test.yaml @@ -21,7 +21,7 @@ concurrency: cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} env: - ATOM_BASE_NIGTHLY_IMAGE: rocm/atom-dev:latest + ATOM_BASE_NIGHTLY_IMAGE: rocm/atom-dev:latest GITHUB_REPO_URL: ${{ github.event.pull_request.head.repo.clone_url || 'https://github.com/ROCm/ATOM.git' }} GITHUB_COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.event.head_commit.id }} @@ -46,7 +46,7 @@ jobs: if: ${{ !github.event.pull_request.head.repo.fork }} run: | cat < Dockerfile.mod - FROM ${{ env.ATOM_BASE_NIGTHLY_IMAGE }} + FROM ${{ env.ATOM_BASE_NIGHTLY_IMAGE }} RUN pip install -U lm-eval[api] RUN pip show lm-eval || true RUN pip install hf_transfer @@ -234,7 +234,7 @@ jobs: if: (matrix.run_on_pr == true || github.event_name != 'pull_request') && github.event.pull_request.head.repo.fork run: | cat < Dockerfile.mod - FROM ${{ env.ATOM_BASE_NIGTHLY_IMAGE }} + FROM ${{ env.ATOM_BASE_NIGHTLY_IMAGE }} RUN pip install -U lm-eval[api] RUN pip show lm-eval || true RUN pip install hf_transfer diff --git a/.github/workflows/atom-vllm-oot-full-test.yaml b/.github/workflows/atom-vllm-oot-full-test.yaml index 9081b47f..82432e8e 100644 --- a/.github/workflows/atom-vllm-oot-full-test.yaml +++ b/.github/workflows/atom-vllm-oot-full-test.yaml @@ -113,7 +113,7 @@ jobs: run: | docker pull "${{ needs.build-oot-image.outputs.oot_image_tag }}" - - name: Run plugin unit tests + - name: Run all plugin unit tests run: | docker run --rm \ -v "${{ github.workspace }}:/workspace" \ @@ -221,71 +221,20 @@ jobs: env: GITHUB_WORKSPACE: ${{ github.workspace }} - - name: Resolve and download model + - name: Pre-download model if /models exists run: | - if [ -f "/models/${{ matrix.model_path }}/config.json" ]; then - echo "MODEL_PATH=/models/${{ matrix.model_path }}" >> "$GITHUB_ENV" + if [ -d "/models" ] && [ ! -f "/models/${{ matrix.model_path }}/config.json" ]; then + docker exec -e HF_TOKEN=${{ secrets.AMD_HF_TOKEN }} "$CONTAINER_NAME" bash -lc "hf download ${{ matrix.model_path }} --local-dir /models/${{ matrix.model_path }}" else - echo "MODEL_PATH=${{ matrix.model_path }}" >> "$GITHUB_ENV" - if [ -d "/models" ]; then - docker exec -e HF_TOKEN=${{ secrets.AMD_HF_TOKEN }} "$CONTAINER_NAME" bash -lc "hf download ${{ matrix.model_path }} --local-dir /models/${{ matrix.model_path }}" - echo "MODEL_PATH=/models/${{ matrix.model_path }}" >> "$GITHUB_ENV" - fi + echo "Skip model pre-download" fi - - name: Launch vLLM server with ATOM OOT plugin + - name: Run OOT launch and gsm8k accuracy via script (full mode) + timeout-minutes: 120 run: | docker exec "$CONTAINER_NAME" bash -lc " set -euo pipefail - export SAFETENSORS_FAST_GPU=1 - export VLLM_ROCM_USE_AITER=1 - export VLLM_RPC_TIMEOUT=1800000 - export VLLM_CACHE_ROOT=/tmp/.cache/vllm - export TORCHINDUCTOR_CACHE_DIR=/tmp/.cache/inductor - rm -rf /tmp/.cache - - nohup vllm serve \"$MODEL_PATH\" \ - --host 0.0.0.0 \ - --port 8000 \ - ${{ matrix.extra_args }} \ - > /tmp/vllm_oot.log 2>&1 & - echo \$! > /tmp/vllm_oot.pid - " - - - name: Wait for vLLM readiness - timeout-minutes: 30 - run: | - set -euo pipefail - for i in $(seq 1 60); do - if docker exec "$CONTAINER_NAME" bash -lc "curl -sS http://127.0.0.1:8000/v1/models >/dev/null"; then - echo "vLLM server is ready." - exit 0 - fi - echo "Waiting for server... ($i/60)" - sleep 30 - done - docker exec "$CONTAINER_NAME" bash -lc "tail -n 200 /tmp/vllm_oot.log || true" - exit 1 - - - name: Run gsm8k accuracy - timeout-minutes: 60 - run: | - docker exec "$CONTAINER_NAME" bash -lc " - set -euo pipefail - mkdir -p /tmp/oot_accuracy_results - RESULT_FILE=/tmp/oot_accuracy_results/\$(date +%Y%m%d%H%M%S).json - lm_eval --model local-completions \ - --model_args model=\"$MODEL_PATH\",base_url=http://127.0.0.1:8000/v1/completions,num_concurrent=65,max_retries=1,tokenized_requests=False \ - --tasks gsm8k \ - --num_fewshot 3 \ - --output_path \"\$RESULT_FILE\" 2>&1 | tee /tmp/oot_accuracy_output.txt - " - - - name: Check accuracy threshold - run: | - docker exec "$CONTAINER_NAME" bash -lc " - set -euo pipefail - python -c \"import json, glob; files=sorted(glob.glob('/tmp/oot_accuracy_results/*.json')); assert files, 'No accuracy JSON found'; threshold=float('${{ matrix.accuracy_test_threshold }}'); result_file=files[-1]; data=json.load(open(result_file)); value=data['results']['gsm8k']['exact_match,flexible-extract']; print('RESULT_FILE:', result_file); print('value:', value, 'threshold:', threshold); assert value >= threshold, f'Accuracy failed: {value} < {threshold}'\" + bash .github/scripts/atom_oot_test.sh accuracy full '${{ matrix.model_name }}' " - name: Collect summary diff --git a/.github/workflows/atom-vllm-oot-test.yaml b/.github/workflows/atom-vllm-oot-test.yaml index 24e17810..56015240 100644 --- a/.github/workflows/atom-vllm-oot-test.yaml +++ b/.github/workflows/atom-vllm-oot-test.yaml @@ -21,32 +21,125 @@ concurrency: cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} env: - ATOM_BASE_NIGTHLY_IMAGE: rocm/atom-dev:latest + ATOM_BASE_NIGHTLY_IMAGE: rocm/atom-dev:latest GITHUB_REPO_URL: ${{ github.event.pull_request.head.repo.clone_url || 'https://github.com/ROCm/ATOM.git' }} GITHUB_COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.event.head_commit.id }} jobs: + wait-atom-test-success: + name: Wait for ATOM Test success + runs-on: ubuntu-latest + timeout-minutes: 180 + outputs: + atom_test_ok: ${{ steps.wait.outputs.atom_test_ok }} + steps: + - name: Wait until ATOM Test is completed for this commit + id: wait + uses: actions/github-script@v7 + with: + script: | + const owner = context.repo.owner; + const repo = context.repo.repo; + const eventName = context.eventName; + const headSha = context.payload.pull_request?.head?.sha ?? context.sha; + const maxAttempts = 180; + const sleepMs = 60000; + + const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); + + if (eventName === "workflow_dispatch") { + core.info("workflow_dispatch detected: bypass ATOM Test gate."); + core.setOutput("atom_test_ok", "true"); + return; + } + + let foundCompletedRun = null; + + for (let attempt = 1; attempt <= maxAttempts; attempt++) { + const resp = await github.rest.actions.listWorkflowRuns({ + owner, + repo, + workflow_id: "atom-test.yaml", + event: eventName, + head_sha: headSha, + per_page: 20, + }); + + const candidates = (resp.data.workflow_runs || []) + .filter((run) => run.name === "ATOM Test" && run.id !== context.runId) + .sort((a, b) => new Date(b.created_at) - new Date(a.created_at)); + + if (candidates.length > 0) { + const latest = candidates[0]; + core.info( + `Attempt ${attempt}/${maxAttempts}: latest ATOM Test run id=${latest.id}, status=${latest.status}, conclusion=${latest.conclusion}` + ); + if (latest.status === "completed") { + foundCompletedRun = latest; + break; + } + } else { + core.info(`Attempt ${attempt}/${maxAttempts}: no ATOM Test run found yet for this sha.`); + } + + await sleep(sleepMs); + } + + if (!foundCompletedRun) { + core.warning("Timeout waiting for ATOM Test workflow completion. OOT workflow will be skipped."); + core.setOutput("atom_test_ok", "false"); + return; + } + + const ok = foundCompletedRun.conclusion === "success"; + core.setOutput("atom_test_ok", ok ? "true" : "false"); + if (!ok) { + core.warning( + `Skip OOT workflow: ATOM Test conclusion is '${foundCompletedRun.conclusion}'.` + ); + } + pre-checks: + needs: [wait-atom-test-success] + if: ${{ needs.wait-atom-test-success.outputs.atom_test_ok == 'true' }} uses: ./.github/workflows/pre-checks.yaml with: black: true ruff: true build_atom_image: - if: ${{ needs.pre-checks.result == 'success' && (!github.event.pull_request || github.event.pull_request.draft == false) }} - needs: [pre-checks] + if: ${{ needs.wait-atom-test-success.outputs.atom_test_ok == 'true' && needs.pre-checks.result == 'success' && (!github.event.pull_request || github.event.pull_request.draft == false) }} + needs: [wait-atom-test-success, pre-checks] name: Build ATOM image runs-on: build-only-atom steps: - - name: Checkout ATOM repo + - name: Docker Login if: ${{ !github.event.pull_request.head.repo.fork }} + run: | + docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }} + + - name: Try pull pre-built ATOM image + if: ${{ !github.event.pull_request.head.repo.fork }} + id: pull_prebuilt + run: | + IMAGE_TAG=rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }} + if docker pull "$IMAGE_TAG"; then + echo "image_ready=true" >> "$GITHUB_OUTPUT" + echo "Reusing existing image: $IMAGE_TAG" + else + echo "image_ready=false" >> "$GITHUB_OUTPUT" + echo "Pre-built image not found, will rebuild: $IMAGE_TAG" + fi + + - name: Checkout ATOM repo + if: ${{ !github.event.pull_request.head.repo.fork && steps.pull_prebuilt.outputs.image_ready != 'true' }} uses: actions/checkout@v4 - name: Generate Dockerfile - if: ${{ !github.event.pull_request.head.repo.fork }} + if: ${{ !github.event.pull_request.head.repo.fork && steps.pull_prebuilt.outputs.image_ready != 'true' }} run: | cat < Dockerfile.mod - FROM ${{ env.ATOM_BASE_NIGTHLY_IMAGE }} + FROM ${{ env.ATOM_BASE_NIGHTLY_IMAGE }} RUN pip install -U lm-eval[api] RUN pip show lm-eval || true RUN pip install hf_transfer @@ -78,7 +171,7 @@ jobs: EOF - name: Build Docker image - if: ${{ !github.event.pull_request.head.repo.fork }} + if: ${{ !github.event.pull_request.head.repo.fork && steps.pull_prebuilt.outputs.image_ready != 'true' }} run: | docker build --pull --network=host \ --no-cache \ @@ -86,21 +179,25 @@ jobs: -f Dockerfile.mod . - name: Push Docker image - if: ${{ !github.event.pull_request.head.repo.fork }} + if: ${{ !github.event.pull_request.head.repo.fork && steps.pull_prebuilt.outputs.image_ready != 'true' }} run: | IMAGE_TAG=rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }} docker tag atom_test:ci $IMAGE_TAG - docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }} docker push $IMAGE_TAG - name: Success message if: ${{ !github.event.pull_request.head.repo.fork }} run: | - echo "Successfully prepared image: $IMAGE_TAG" + IMAGE_TAG=rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }} + if [ "${{ steps.pull_prebuilt.outputs.image_ready }}" = "true" ]; then + echo "Successfully reused image: $IMAGE_TAG" + else + echo "Successfully rebuilt and pushed image: $IMAGE_TAG" + fi atom-vllm-oot: - needs: [pre-checks, build_atom_image] - if: ${{ needs.pre-checks.result == 'success' && needs.build_atom_image.result == 'success' && (!github.event.pull_request || github.event.pull_request.draft == false) }} + needs: [wait-atom-test-success, pre-checks, build_atom_image] + if: ${{ needs.wait-atom-test-success.outputs.atom_test_ok == 'true' && needs.pre-checks.result == 'success' && needs.build_atom_image.result == 'success' && (!github.event.pull_request || github.event.pull_request.draft == false) }} name: ATOM vLLM OOT Test strategy: fail-fast: false @@ -142,7 +239,7 @@ jobs: if: ${{ github.event.pull_request.head.repo.fork }} run: | cat < Dockerfile.mod - FROM ${{ env.ATOM_BASE_NIGTHLY_IMAGE }} + FROM ${{ env.ATOM_BASE_NIGHTLY_IMAGE }} RUN pip install -U lm-eval[api] RUN pip show lm-eval || true RUN pip install hf_transfer @@ -159,6 +256,8 @@ jobs: MAX_JOBS=64 PREBUILD_KERNELS=0 GPU_ARCHS=gfx950 python3 setup.py develop RUN echo "=== Aiter version AFTER installation ===" && pip show amd-aiter || true RUN echo "=== ATOM version BEFORE uninstall ===" && pip show atom || true + # Fork PR fallback: this workflow cannot rely on pre-built images from + # other workflows, so reinstall ATOM from the current PR commit. RUN pip uninstall -y atom RUN rm -rf /app/ATOM RUN git clone ${{ env.GITHUB_REPO_URL }} /app/ATOM && \\ @@ -196,6 +295,14 @@ jobs: BUILD_NO_CACHE=1 \ docker/plugin/build_OOT_vLLM.sh + - name: Run all plugin unit tests + run: | + docker run --rm \ + -v "${{ github.workspace }}:/workspace" \ + -w /workspace \ + "$OOT_IMAGE_TAG" \ + bash -lc "pytest -q tests/plugin" + - name: Start OOT test container run: | if [ -f "/etc/podinfo/gha-render-devices" ]; then @@ -223,23 +330,11 @@ jobs: --security-opt seccomp=unconfined \ --ulimit memlock=-1 \ --ulimit stack=67108864 \ - -v "${{ github.workspace }}:/workspace" \ - -w /workspace \ --name "$CONTAINER_NAME" \ "$OOT_IMAGE_TAG" env: GITHUB_WORKSPACE: ${{ github.workspace }} - - name: Resolve model path - run: | - if [ -f "/models/${{ matrix.model_path }}/config.json" ]; then - echo "MODEL_PATH=/models/${{ matrix.model_path }}" >> "$GITHUB_ENV" - echo "Use pre-downloaded model path: /models/${{ matrix.model_path }}" - else - echo "MODEL_PATH=${{ matrix.model_path }}" >> "$GITHUB_ENV" - echo "Use HuggingFace model path: ${{ matrix.model_path }}" - fi - - name: Download model if needed run: | if [ -d "/models" ] && [ ! -f "/models/${{ matrix.model_path }}/config.json" ]; then @@ -249,73 +344,12 @@ jobs: echo "Skip model download" fi - - name: Launch vLLM server with ATOM OOT plugin - run: | - docker exec "$CONTAINER_NAME" bash -lc " - set -euo pipefail - export SAFETENSORS_FAST_GPU=1 - export VLLM_ROCM_USE_AITER=1 - export VLLM_RPC_TIMEOUT=1800000 - export VLLM_CACHE_ROOT=/tmp/.cache/vllm - export TORCHINDUCTOR_CACHE_DIR=/tmp/.cache/inductor - rm -rf /tmp/.cache - - nohup vllm serve \"$MODEL_PATH\" \ - --host 0.0.0.0 \ - --port 8000 \ - --tensor-parallel-size 8 \ - --enable-expert-parallel \ - --trust-remote-code \ - --disable-log-requests \ - --gpu-memory-utilization 0.9 \ - --async-scheduling \ - --load-format fastsafetensors \ - --kv-cache-dtype fp8 \ - --max-model-len 16384 \ - > /tmp/vllm_oot.log 2>&1 & - echo \$! > /tmp/vllm_oot.pid - echo \"Server PID: \$(cat /tmp/vllm_oot.pid)\" - " - - - name: Wait for vLLM readiness - timeout-minutes: 30 - run: | - set -euo pipefail - for i in $(seq 1 60); do - if docker exec "$CONTAINER_NAME" bash -lc "curl -sS http://127.0.0.1:8000/v1/models >/dev/null"; then - echo "vLLM server is ready." - exit 0 - fi - echo "Waiting for server... ($i/60)" - sleep 30 - done - echo "vLLM server did not become ready in time." - docker exec "$CONTAINER_NAME" bash -lc "tail -n 200 /tmp/vllm_oot.log || true" - exit 1 - - - name: Run OOT accuracy test (gsm8k) - timeout-minutes: 45 - run: | - docker exec "$CONTAINER_NAME" bash -lc " - set -euo pipefail - if ! command -v lm_eval >/dev/null 2>&1; then - pip install 'lm-eval[api]' - fi - mkdir -p /tmp/oot_accuracy_results - RESULT_FILE=/tmp/oot_accuracy_results/\$(date +%Y%m%d%H%M%S).json - lm_eval --model local-completions \ - --model_args model=\"$MODEL_PATH\",base_url=http://127.0.0.1:8000/v1/completions,num_concurrent=65,max_retries=1,tokenized_requests=False \ - --tasks gsm8k \ - --num_fewshot 3 \ - --output_path \"\$RESULT_FILE\" 2>&1 | tee /tmp/oot_accuracy_output.txt - echo \"OOT_RESULT_FILE=\$RESULT_FILE\" - " - - - name: Check OOT accuracy threshold + - name: Run OOT launch and gsm8k accuracy via script (ci mode) + timeout-minutes: 90 run: | docker exec "$CONTAINER_NAME" bash -lc " set -euo pipefail - python -c \"import json, glob; files=sorted(glob.glob('/tmp/oot_accuracy_results/*.json')); assert files, 'ERROR: No OOT accuracy result JSON found.'; threshold=float('${{ matrix.accuracy_test_threshold }}'); result_file=files[-1]; data=json.load(open(result_file)); value=data['results']['gsm8k']['exact_match,flexible-extract']; print('RESULT_FILE:', result_file); print('Flexible extract value:', value); print('Accuracy threshold:', threshold); assert value >= threshold, f'Accuracy test failed: {value} < {threshold}'; print(f'Accuracy test passed: {value} >= {threshold}')\" + bash .github/scripts/atom_oot_test.sh accuracy ci " - name: Collect OOT accuracy summary diff --git a/tests/plugin/test_plugin_env_flags.py b/tests/plugin/test_plugin_env_flags.py index 6ca39018..e71ca95f 100644 --- a/tests/plugin/test_plugin_env_flags.py +++ b/tests/plugin/test_plugin_env_flags.py @@ -32,7 +32,10 @@ def test_disable_vllm_plugin_attention_fallbacks_to_non_atom_backend(monkeypatch class _RocmPlatform: @classmethod - def get_attn_backend_cls(cls, selected_backend, attn_selector_config): + def get_attn_backend_cls( + cls, selected_backend, attn_selector_config, num_heads + ): + assert num_heads == 16 return "vllm.default.backend" rocm_module.RocmPlatform = _RocmPlatform @@ -52,5 +55,6 @@ def get_attn_backend_cls(cls, selected_backend, attn_selector_config): result = platform_module.ATOMPlatform.get_attn_backend_cls( selected_backend="x", attn_selector_config=types.SimpleNamespace(use_mla=True), + num_heads=16, ) assert result == "vllm.default.backend" From 7efac3dedfdbf5a26c6b1c0bd640fa94dc238aec Mon Sep 17 00:00:00 2001 From: zejunchen-zejun Date: Thu, 12 Mar 2026 13:11:39 +0800 Subject: [PATCH 05/15] add Signed-off-by: zejunchen-zejun --- tests/plugin/test_plugin_env_flags.py | 41 --------- tests/plugin/test_plugin_registries.py | 41 --------- .../plugin/test_plugin_unsupported_models.py | 23 ----- tests/plugin/test_plugin_vllm_import_paths.py | 85 ------------------- 4 files changed, 190 deletions(-) delete mode 100644 tests/plugin/test_plugin_unsupported_models.py delete mode 100644 tests/plugin/test_plugin_vllm_import_paths.py diff --git a/tests/plugin/test_plugin_env_flags.py b/tests/plugin/test_plugin_env_flags.py index e71ca95f..2e7888a1 100644 --- a/tests/plugin/test_plugin_env_flags.py +++ b/tests/plugin/test_plugin_env_flags.py @@ -1,9 +1,4 @@ import importlib -import importlib.util -import sys -import types - -import pytest def test_disable_vllm_plugin_flag_disables_platform(monkeypatch): @@ -22,39 +17,3 @@ def test_disable_vllm_plugin_flag_disables_platform(monkeypatch): assert platform_module.ATOMPlatform is None assert register_module.register_platform() is None - -@pytest.mark.skipif( - importlib.util.find_spec("vllm") is None, - reason="vllm is not installed in current test environment", -) -def test_disable_vllm_plugin_attention_fallbacks_to_non_atom_backend(monkeypatch): - rocm_module = types.ModuleType("vllm.platforms.rocm") - - class _RocmPlatform: - @classmethod - def get_attn_backend_cls( - cls, selected_backend, attn_selector_config, num_heads - ): - assert num_heads == 16 - return "vllm.default.backend" - - rocm_module.RocmPlatform = _RocmPlatform - - monkeypatch.setitem(sys.modules, "vllm", types.ModuleType("vllm")) - monkeypatch.setitem( - sys.modules, "vllm.platforms", types.ModuleType("vllm.platforms") - ) - monkeypatch.setitem(sys.modules, "vllm.platforms.rocm", rocm_module) - monkeypatch.setenv("ATOM_DISABLE_VLLM_PLUGIN", "0") - monkeypatch.setenv("ATOM_DISABLE_VLLM_PLUGIN_ATTENTION", "1") - - import atom.plugin.vllm.platform as platform_module - - importlib.reload(platform_module) - - result = platform_module.ATOMPlatform.get_attn_backend_cls( - selected_backend="x", - attn_selector_config=types.SimpleNamespace(use_mla=True), - num_heads=16, - ) - assert result == "vllm.default.backend" diff --git a/tests/plugin/test_plugin_registries.py b/tests/plugin/test_plugin_registries.py index 79dbe323..e9d6b263 100644 --- a/tests/plugin/test_plugin_registries.py +++ b/tests/plugin/test_plugin_registries.py @@ -1,8 +1,3 @@ -import sys -import types -import importlib -import importlib.util - import pytest from atom.plugin import prepare as plugin_prepare @@ -16,42 +11,6 @@ def _reset_framework_state(): plugin_prepare._set_framework_backbone("atom") -@pytest.mark.skipif( - importlib.util.find_spec("vllm") is None, - reason="vllm is not installed in current test environment", -) -def test_register_platform_returns_oot_platform(monkeypatch): - rocm_module = types.ModuleType("vllm.platforms.rocm") - - class _RocmPlatform: - pass - - rocm_module.RocmPlatform = _RocmPlatform - vllm_platforms = types.ModuleType("vllm.platforms") - vllm_platforms.current_platform = None - - monkeypatch.setitem(sys.modules, "vllm", types.ModuleType("vllm")) - monkeypatch.setitem(sys.modules, "vllm.platforms", vllm_platforms) - monkeypatch.setitem(sys.modules, "vllm.platforms.rocm", rocm_module) - - monkeypatch.setenv("ATOM_DISABLE_VLLM_PLUGIN", "0") - monkeypatch.setenv("ATOM_DISABLE_VLLM_PLUGIN_ATTENTION", "0") - - import atom.plugin.vllm.platform as platform_module - - importlib.reload(platform_module) - importlib.reload(vllm_register) - - platform_path = vllm_register.register_platform() - module_name, class_name = platform_path.rsplit(".", 1) - vllm_platforms.current_platform = getattr( - importlib.import_module(module_name), class_name - ) - - # get current platform from vllm side and validate it is ATOM platform. - assert vllm_platforms.current_platform is platform_module.ATOMPlatform - - def test_register_platform_can_be_disabled(monkeypatch): monkeypatch.setattr(vllm_register, "disable_vllm_plugin", True, raising=False) assert vllm_register.register_platform() is None diff --git a/tests/plugin/test_plugin_unsupported_models.py b/tests/plugin/test_plugin_unsupported_models.py deleted file mode 100644 index 0419d4a3..00000000 --- a/tests/plugin/test_plugin_unsupported_models.py +++ /dev/null @@ -1,23 +0,0 @@ -import importlib.util -import importlib -import sys -import types - -import pytest - - -# FIXME: remove it later when enabling fallback for unsupported models -@pytest.mark.skipif( - importlib.util.find_spec("vllm") is None, - reason="vllm is not installed in current test environment", -) -def test_vllm_wrapper_rejects_unsupported_model_arch(monkeypatch): - # Avoid importing deep model-loader dependencies during test collection/import. - fake_loader = types.ModuleType("atom.model_loader.loader") - fake_loader.load_model_in_plugin_mode = lambda **kwargs: set() - monkeypatch.setitem(sys.modules, "atom.model_loader.loader", fake_loader) - - model_wrapper = importlib.import_module("atom.plugin.vllm.model_wrapper") - - with pytest.raises(ValueError, match="not supported by ATOM OOT backend"): - model_wrapper._get_atom_model_cls("UnknownModelForCausalLM") diff --git a/tests/plugin/test_plugin_vllm_import_paths.py b/tests/plugin/test_plugin_vllm_import_paths.py deleted file mode 100644 index 523e0798..00000000 --- a/tests/plugin/test_plugin_vllm_import_paths.py +++ /dev/null @@ -1,85 +0,0 @@ -import importlib.util - -import pytest - - -@pytest.mark.skipif( - importlib.util.find_spec("vllm") is None, - reason="vllm is not installed in current test environment", -) -def test_vllm_import_paths_guardrail(): - """Guardrail for OOT vLLM import paths used by ATOM plugin mode.""" - # attention.py / paged_attention.py (new path with legacy fallback) - try: - from vllm.attention.layer import Attention, MLAAttention, AttentionType - except ImportError: - from vllm.model_executor.layers.attention import Attention, MLAAttention - from vllm.v1.attention.backend import AttentionType - - # attention.py - from vllm.config import ( - VllmConfig, - get_current_vllm_config, - get_layers_from_vllm_config, - ) - from vllm.model_executor.layers.attention.mla_attention import ( - MLACommonMetadataBuilder, - QueryLenSupport, - ) - from vllm.utils.math_utils import cdiv, round_down - from vllm.v1.attention.backend import AttentionCGSupport, AttentionMetadataBuilder - from vllm.v1.attention.backends.utils import ( - get_dcp_local_seq_lens, - split_decodes_and_prefills, - split_decodes_prefills_and_extends, - ) - from vllm.v1.attention.ops.common import cp_lse_ag_out_rs - from vllm.v1.attention.ops.merge_attn_states import merge_attn_states - - # model_wrapper.py (core vLLM model interfaces) - from vllm.model_executor.models.interfaces import SupportsPP, SupportsQuant - from vllm.model_executor.models.interfaces_base import ( - VllmModel, - VllmModelForTextGeneration, - ) - from vllm.model_executor.models.registry import ModelRegistry - from vllm.sequence import IntermediateTensors - - # attention_mla.py / platform.py / register.py - from vllm import _custom_ops - from vllm.distributed.parallel_state import get_dcp_group - from vllm.platforms import current_platform - from vllm.platforms.rocm import RocmPlatform - - assert all( - obj is not None - for obj in [ - Attention, - MLAAttention, - AttentionType, - QueryLenSupport, - MLACommonMetadataBuilder, - cdiv, - round_down, - AttentionCGSupport, - AttentionMetadataBuilder, - get_dcp_local_seq_lens, - split_decodes_and_prefills, - split_decodes_prefills_and_extends, - cp_lse_ag_out_rs, - merge_attn_states, - VllmConfig, - get_current_vllm_config, - get_layers_from_vllm_config, - SupportsPP, - SupportsQuant, - VllmModel, - VllmModelForTextGeneration, - ModelRegistry, - IntermediateTensors, - _custom_ops, - get_dcp_group, - current_platform, - RocmPlatform, - ] - ) From e1ae8af904da8ef9a4d80eebc3d99c44b9717619 Mon Sep 17 00:00:00 2001 From: zejunchen-zejun Date: Thu, 12 Mar 2026 13:17:48 +0800 Subject: [PATCH 06/15] add Signed-off-by: zejunchen-zejun --- tests/plugin/test_plugin_registries.py | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 tests/plugin/test_plugin_registries.py diff --git a/tests/plugin/test_plugin_registries.py b/tests/plugin/test_plugin_registries.py deleted file mode 100644 index e9d6b263..00000000 --- a/tests/plugin/test_plugin_registries.py +++ /dev/null @@ -1,16 +0,0 @@ -import pytest - -from atom.plugin import prepare as plugin_prepare -import atom.plugin.vllm.register as vllm_register - - -@pytest.fixture(autouse=True) -def _reset_framework_state(): - plugin_prepare._set_framework_backbone("atom") - yield - plugin_prepare._set_framework_backbone("atom") - - -def test_register_platform_can_be_disabled(monkeypatch): - monkeypatch.setattr(vllm_register, "disable_vllm_plugin", True, raising=False) - assert vllm_register.register_platform() is None From 4a3763eb04c4a080fb8b6b4fdd5f4afa249ac4f0 Mon Sep 17 00:00:00 2001 From: zejunchen-zejun Date: Thu, 12 Mar 2026 14:11:16 +0800 Subject: [PATCH 07/15] add Signed-off-by: zejunchen-zejun --- .github/scripts/atom_oot_test.sh | 2 +- .github/workflows/atom-vllm-oot-test.yaml | 194 +--------------------- 2 files changed, 8 insertions(+), 188 deletions(-) diff --git a/.github/scripts/atom_oot_test.sh b/.github/scripts/atom_oot_test.sh index 12143436..ec6ce080 100644 --- a/.github/scripts/atom_oot_test.sh +++ b/.github/scripts/atom_oot_test.sh @@ -41,7 +41,7 @@ ACCURACY_LOG_FILE=${ACCURACY_LOG_FILE:-/tmp/oot_accuracy_output.txt} # Format: # MODEL_NAME|MODEL_PATH|EXTRA_ARGS|THRESHOLD CI_MODE_MODELS=( - "Kimi-K2|amd/Kimi-K2-Thinking-MXFP4|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8 --enable-expert-parallel|0.90" + "Kimi-K2|amd/Kimi-K2-Thinking-MXFP4|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 4 --enable-expert-parallel|0.90" ) FULL_MODE_MODELS=( diff --git a/.github/workflows/atom-vllm-oot-test.yaml b/.github/workflows/atom-vllm-oot-test.yaml index 56015240..1919c300 100644 --- a/.github/workflows/atom-vllm-oot-test.yaml +++ b/.github/workflows/atom-vllm-oot-test.yaml @@ -26,178 +26,15 @@ env: GITHUB_COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.event.head_commit.id }} jobs: - wait-atom-test-success: - name: Wait for ATOM Test success - runs-on: ubuntu-latest - timeout-minutes: 180 - outputs: - atom_test_ok: ${{ steps.wait.outputs.atom_test_ok }} - steps: - - name: Wait until ATOM Test is completed for this commit - id: wait - uses: actions/github-script@v7 - with: - script: | - const owner = context.repo.owner; - const repo = context.repo.repo; - const eventName = context.eventName; - const headSha = context.payload.pull_request?.head?.sha ?? context.sha; - const maxAttempts = 180; - const sleepMs = 60000; - - const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); - - if (eventName === "workflow_dispatch") { - core.info("workflow_dispatch detected: bypass ATOM Test gate."); - core.setOutput("atom_test_ok", "true"); - return; - } - - let foundCompletedRun = null; - - for (let attempt = 1; attempt <= maxAttempts; attempt++) { - const resp = await github.rest.actions.listWorkflowRuns({ - owner, - repo, - workflow_id: "atom-test.yaml", - event: eventName, - head_sha: headSha, - per_page: 20, - }); - - const candidates = (resp.data.workflow_runs || []) - .filter((run) => run.name === "ATOM Test" && run.id !== context.runId) - .sort((a, b) => new Date(b.created_at) - new Date(a.created_at)); - - if (candidates.length > 0) { - const latest = candidates[0]; - core.info( - `Attempt ${attempt}/${maxAttempts}: latest ATOM Test run id=${latest.id}, status=${latest.status}, conclusion=${latest.conclusion}` - ); - if (latest.status === "completed") { - foundCompletedRun = latest; - break; - } - } else { - core.info(`Attempt ${attempt}/${maxAttempts}: no ATOM Test run found yet for this sha.`); - } - - await sleep(sleepMs); - } - - if (!foundCompletedRun) { - core.warning("Timeout waiting for ATOM Test workflow completion. OOT workflow will be skipped."); - core.setOutput("atom_test_ok", "false"); - return; - } - - const ok = foundCompletedRun.conclusion === "success"; - core.setOutput("atom_test_ok", ok ? "true" : "false"); - if (!ok) { - core.warning( - `Skip OOT workflow: ATOM Test conclusion is '${foundCompletedRun.conclusion}'.` - ); - } - pre-checks: - needs: [wait-atom-test-success] - if: ${{ needs.wait-atom-test-success.outputs.atom_test_ok == 'true' }} uses: ./.github/workflows/pre-checks.yaml with: black: true ruff: true - build_atom_image: - if: ${{ needs.wait-atom-test-success.outputs.atom_test_ok == 'true' && needs.pre-checks.result == 'success' && (!github.event.pull_request || github.event.pull_request.draft == false) }} - needs: [wait-atom-test-success, pre-checks] - name: Build ATOM image - runs-on: build-only-atom - steps: - - name: Docker Login - if: ${{ !github.event.pull_request.head.repo.fork }} - run: | - docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }} - - - name: Try pull pre-built ATOM image - if: ${{ !github.event.pull_request.head.repo.fork }} - id: pull_prebuilt - run: | - IMAGE_TAG=rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }} - if docker pull "$IMAGE_TAG"; then - echo "image_ready=true" >> "$GITHUB_OUTPUT" - echo "Reusing existing image: $IMAGE_TAG" - else - echo "image_ready=false" >> "$GITHUB_OUTPUT" - echo "Pre-built image not found, will rebuild: $IMAGE_TAG" - fi - - - name: Checkout ATOM repo - if: ${{ !github.event.pull_request.head.repo.fork && steps.pull_prebuilt.outputs.image_ready != 'true' }} - uses: actions/checkout@v4 - - - name: Generate Dockerfile - if: ${{ !github.event.pull_request.head.repo.fork && steps.pull_prebuilt.outputs.image_ready != 'true' }} - run: | - cat < Dockerfile.mod - FROM ${{ env.ATOM_BASE_NIGHTLY_IMAGE }} - RUN pip install -U lm-eval[api] - RUN pip show lm-eval || true - RUN pip install hf_transfer - RUN pip show hf_transfer || true - RUN echo "=== Aiter version BEFORE uninstall ===" && pip show amd-aiter || true - RUN pip uninstall -y amd-aiter - RUN pip install --upgrade "pybind11>=3.0.1" - RUN pip show pybind11 - RUN wget https://github.com/stedolan/jq/releases/download/jq-1.7/jq-linux64 -O jq - RUN chmod +x jq - RUN mv jq /usr/local/bin/jq - RUN rm -rf /app/aiter-test - RUN git clone --depth 1 https://github.com/ROCm/aiter.git /app/aiter-test && \\ - cd /app/aiter-test && \\ - git checkout HEAD && \\ - git submodule sync && git submodule update --init --recursive && \\ - MAX_JOBS=64 PREBUILD_KERNELS=0 GPU_ARCHS=gfx950 python3 setup.py develop - RUN echo "=== Aiter version AFTER installation ===" && pip show amd-aiter || true - - RUN echo "=== ATOM version BEFORE uninstall ===" && pip show atom || true - RUN pip uninstall -y atom - RUN rm -rf /app/ATOM - RUN git clone ${{ env.GITHUB_REPO_URL }} /app/ATOM && \\ - cd /app/ATOM && \\ - git checkout ${{ env.GITHUB_COMMIT_SHA }} && \\ - pip install -e . - - RUN echo "=== ATOM version AFTER installation ===" && pip show atom || true - EOF - - - name: Build Docker image - if: ${{ !github.event.pull_request.head.repo.fork && steps.pull_prebuilt.outputs.image_ready != 'true' }} - run: | - docker build --pull --network=host \ - --no-cache \ - -t atom_test:ci \ - -f Dockerfile.mod . - - - name: Push Docker image - if: ${{ !github.event.pull_request.head.repo.fork && steps.pull_prebuilt.outputs.image_ready != 'true' }} - run: | - IMAGE_TAG=rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }} - docker tag atom_test:ci $IMAGE_TAG - docker push $IMAGE_TAG - - - name: Success message - if: ${{ !github.event.pull_request.head.repo.fork }} - run: | - IMAGE_TAG=rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }} - if [ "${{ steps.pull_prebuilt.outputs.image_ready }}" = "true" ]; then - echo "Successfully reused image: $IMAGE_TAG" - else - echo "Successfully rebuilt and pushed image: $IMAGE_TAG" - fi - atom-vllm-oot: - needs: [wait-atom-test-success, pre-checks, build_atom_image] - if: ${{ needs.wait-atom-test-success.outputs.atom_test_ok == 'true' && needs.pre-checks.result == 'success' && needs.build_atom_image.result == 'success' && (!github.event.pull_request || github.event.pull_request.draft == false) }} + needs: [pre-checks] + if: ${{ needs.pre-checks.result == 'success' && (!github.event.pull_request || github.event.pull_request.draft == false) }} name: ATOM vLLM OOT Test strategy: fail-fast: false @@ -230,13 +67,7 @@ jobs: - name: Checkout ATOM repo uses: actions/checkout@v4 - - name: Docker Login - if: ${{ !github.event.pull_request.head.repo.fork }} - run: | - docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }} - - - name: Prepare OOT base image for forked repo - if: ${{ github.event.pull_request.head.repo.fork }} + - name: Build ATOM base image run: | cat < Dockerfile.mod FROM ${{ env.ATOM_BASE_NIGHTLY_IMAGE }} @@ -256,8 +87,6 @@ jobs: MAX_JOBS=64 PREBUILD_KERNELS=0 GPU_ARCHS=gfx950 python3 setup.py develop RUN echo "=== Aiter version AFTER installation ===" && pip show amd-aiter || true RUN echo "=== ATOM version BEFORE uninstall ===" && pip show atom || true - # Fork PR fallback: this workflow cannot rely on pre-built images from - # other workflows, so reinstall ATOM from the current PR commit. RUN pip uninstall -y atom RUN rm -rf /app/ATOM RUN git clone ${{ env.GITHUB_REPO_URL }} /app/ATOM && \\ @@ -271,27 +100,16 @@ jobs: --no-cache \ -t atom_oot_base:ci \ -f Dockerfile.mod . - echo "OOT_BASE_IMAGE=atom_oot_base:ci" >> "$GITHUB_ENV" - - - name: Select OOT base image from pre-built ATOM image - if: ${{ !github.event.pull_request.head.repo.fork }} - run: | - echo "OOT_BASE_IMAGE=rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }}" >> "$GITHUB_ENV" - name: Build OOT vLLM image run: | - if [ "${{ github.event_name }}" = "pull_request" ] && [ "${{ github.event.pull_request.head.repo.fork }}" = "true" ]; then - pull_base_image=0 - else - pull_base_image=1 - fi chmod +x docker/plugin/build_OOT_vLLM.sh IMAGE_TAG="${OOT_IMAGE_TAG}" \ - BASE_IMAGE="${OOT_BASE_IMAGE}" \ + BASE_IMAGE="atom_oot_base:ci" \ VLLM_COMMIT="${VLLM_COMMIT}" \ VLLM_VERSION="${VLLM_VERSION}" \ INSTALL_LM_EVAL=1 \ - PULL_BASE_IMAGE="${pull_base_image}" \ + PULL_BASE_IMAGE=0 \ BUILD_NO_CACHE=1 \ docker/plugin/build_OOT_vLLM.sh @@ -336,6 +154,7 @@ jobs: GITHUB_WORKSPACE: ${{ github.workspace }} - name: Download model if needed + if: success() run: | if [ -d "/models" ] && [ ! -f "/models/${{ matrix.model_path }}/config.json" ]; then echo "Downloading model to /models/${{ matrix.model_path }}" @@ -345,6 +164,7 @@ jobs: fi - name: Run OOT launch and gsm8k accuracy via script (ci mode) + if: success() timeout-minutes: 90 run: | docker exec "$CONTAINER_NAME" bash -lc " From c2aaffa9ba8564314a0959d198ae291c73a3481a Mon Sep 17 00:00:00 2001 From: zejunchen-zejun Date: Thu, 12 Mar 2026 15:37:45 +0800 Subject: [PATCH 08/15] add Signed-off-by: zejunchen-zejun --- .../workflows/atom-vllm-oot-full-test.yaml | 18 +-- .github/workflows/atom-vllm-oot-test.yaml | 19 ++-- .github/workflows/docker-release.yaml | 48 ++++++++ docker/Dockerfile | 81 +++++++++++++- docker/plugin/Dockerfile_OOT_vLLM | 105 ------------------ docker/plugin/build_OOT_vLLM.sh | 90 --------------- 6 files changed, 148 insertions(+), 213 deletions(-) delete mode 100644 docker/plugin/Dockerfile_OOT_vLLM delete mode 100644 docker/plugin/build_OOT_vLLM.sh diff --git a/.github/workflows/atom-vllm-oot-full-test.yaml b/.github/workflows/atom-vllm-oot-full-test.yaml index 82432e8e..02cce215 100644 --- a/.github/workflows/atom-vllm-oot-full-test.yaml +++ b/.github/workflows/atom-vllm-oot-full-test.yaml @@ -80,15 +80,17 @@ jobs: - name: Build OOT vLLM image from rebuilt ATOM base id: meta run: | - chmod +x docker/plugin/build_OOT_vLLM.sh OOT_IMAGE_TAG="${VALIDATION_IMAGE_REPO}:oot-vllm-validation-${GITHUB_COMMIT_SHA}-${{ github.run_id }}" - IMAGE_TAG="${OOT_IMAGE_TAG}" \ - BASE_IMAGE="atom_oot_base:ci" \ - VLLM_COMMIT="${{ inputs.vllm_commit || 'b31e9326a7d9394aab8c767f8ebe225c65594b60' }}" \ - VLLM_VERSION="${{ inputs.vllm_version || '0.17' }}" \ - INSTALL_LM_EVAL=1 \ - BUILD_NO_CACHE=1 \ - docker/plugin/build_OOT_vLLM.sh + docker build --network=host \ + --no-cache \ + --target oot_image \ + -t "${OOT_IMAGE_TAG}" \ + --build-arg BASE_IMAGE="atom_oot_base:ci" \ + --build-arg MAX_JOBS=64 \ + --build-arg VLLM_COMMIT="${{ inputs.vllm_commit || 'b31e9326a7d9394aab8c767f8ebe225c65594b60' }}" \ + --build-arg INSTALL_LM_EVAL=1 \ + --build-arg INSTALL_FASTSAFETENSORS=1 \ + -f docker/Dockerfile . echo "oot_image_tag=${OOT_IMAGE_TAG}" >> "$GITHUB_OUTPUT" diff --git a/.github/workflows/atom-vllm-oot-test.yaml b/.github/workflows/atom-vllm-oot-test.yaml index 1919c300..f84a9842 100644 --- a/.github/workflows/atom-vllm-oot-test.yaml +++ b/.github/workflows/atom-vllm-oot-test.yaml @@ -103,15 +103,16 @@ jobs: - name: Build OOT vLLM image run: | - chmod +x docker/plugin/build_OOT_vLLM.sh - IMAGE_TAG="${OOT_IMAGE_TAG}" \ - BASE_IMAGE="atom_oot_base:ci" \ - VLLM_COMMIT="${VLLM_COMMIT}" \ - VLLM_VERSION="${VLLM_VERSION}" \ - INSTALL_LM_EVAL=1 \ - PULL_BASE_IMAGE=0 \ - BUILD_NO_CACHE=1 \ - docker/plugin/build_OOT_vLLM.sh + docker build --network=host \ + --no-cache \ + --target oot_image \ + -t "${OOT_IMAGE_TAG}" \ + --build-arg BASE_IMAGE="atom_oot_base:ci" \ + --build-arg MAX_JOBS=64 \ + --build-arg VLLM_COMMIT="${VLLM_COMMIT}" \ + --build-arg INSTALL_LM_EVAL=1 \ + --build-arg INSTALL_FASTSAFETENSORS=1 \ + -f docker/Dockerfile . - name: Run all plugin unit tests run: | diff --git a/.github/workflows/docker-release.yaml b/.github/workflows/docker-release.yaml index a2330451..8f7c2082 100644 --- a/.github/workflows/docker-release.yaml +++ b/.github/workflows/docker-release.yaml @@ -27,6 +27,19 @@ on: description: "Runner label to use" type: string default: "atom-mi355-8gpu.predownload" + build_oot_image: + description: "Build OOT vLLM image in addition to ATOM image" + type: boolean + default: false + oot_base_image: + description: "Base image for OOT build (empty means use local atom_release:ci)" + default: "" + vllm_commit: + description: "vLLM commit for OOT image" + default: "b31e9326a7d9394aab8c767f8ebe225c65594b60" + vllm_version: + description: "vLLM version label for OOT image tags" + default: "0.17.0" jobs: docker-release: @@ -46,6 +59,8 @@ jobs: RCCL_REPO: "https://github.com/ROCm/rccl.git" RCCL_BRANCH: "29e1567b95e28823b0beb1a988adc587bfab5b4f" GPU_ARCH: "gfx942;gfx950" + VLLM_COMMIT: "b31e9326a7d9394aab8c767f8ebe225c65594b60" + VLLM_VERSION: "0.17.0" steps: - name: Checkout ATOM repo @@ -68,6 +83,7 @@ jobs: timeout-minutes: 120 run: | docker build --pull --network=host -t atom_release:ci \ + --target atom_image \ --build-arg BASE_IMAGE="${{ matrix.base_image }}" \ --build-arg GPU_ARCH="${{ env.GPU_ARCH }}" \ --build-arg ATOM_REPO="${{ inputs.atom_repo || env.ATOM_REPO }}" \ @@ -136,6 +152,38 @@ jobs: docker tag atom_release:ci rocm/atom-dev:${TAG} docker push rocm/atom-dev:${TAG} + - name: Build OOT Docker image + if: ${{ success() && inputs.build_oot_image == true }} + timeout-minutes: 180 + run: | + if [ -n "${{ inputs.oot_base_image }}" ]; then + OOT_BASE_IMAGE="${{ inputs.oot_base_image }}" + else + OOT_BASE_IMAGE="rocm/atom-dev:latest" + fi + + echo "Using OOT base image: ${OOT_BASE_IMAGE}" + docker pull "${OOT_BASE_IMAGE}" + docker build --network=host -t atom_oot_release:ci \ + --target oot_image \ + --build-arg BASE_IMAGE="${OOT_BASE_IMAGE}" \ + --build-arg MAX_JOBS=64 \ + --build-arg VLLM_COMMIT="${{ inputs.vllm_commit || env.VLLM_COMMIT }}" \ + --build-arg INSTALL_LM_EVAL=1 \ + --build-arg INSTALL_FASTSAFETENSORS=1 \ + -f docker/Dockerfile . + docker inspect atom_oot_release:ci + + - name: Push OOT Docker image + if: ${{ success() && inputs.build_oot_image == true }} + run: | + TAG=nightly_$(date +%Y%m%d) + VLLM_VER="${{ inputs.vllm_version || env.VLLM_VERSION }}" + docker tag atom_oot_release:ci rocm/atom-vllm:latest + docker push rocm/atom-vllm:latest + docker tag atom_oot_release:ci rocm/atom-vllm-v${VLLM_VER}:${TAG} + docker push rocm/atom-vllm-v${VLLM_VER}:${TAG} + - name: Clean Up if: always() run: | diff --git a/docker/Dockerfile b/docker/Dockerfile index 85c99daa..e97864ac 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -2,7 +2,86 @@ ARG BASE_IMAGE="rocm/pytorch:latest" ARG GPU_ARCH="gfx942;gfx950" -FROM $BASE_IMAGE +# -------------------------------------------------------------------- +# OOT image stage: extends an ATOM base image with vLLM + OOT deps. +# Build with: docker build --target oot_image --build-arg BASE_IMAGE=... +# -------------------------------------------------------------------- +FROM ${BASE_IMAGE} as oot_image + +ARG MAX_JOBS=64 +ARG VENV_PYTHON="/opt/venv/bin/python" +ARG VLLM_REPO="https://github.com/vllm-project/vllm.git" +ARG VLLM_COMMIT="b31e9326a7d9394aab8c767f8ebe225c65594b60" +ARG INSTALL_LM_EVAL=1 +ARG INSTALL_FASTSAFETENSORS=1 + +ENV DEBIAN_FRONTEND=noninteractive +ENV PATH="/opt/venv/bin:${PATH}" +ENV VLLM_TARGET_DEVICE=rocm +ENV CMAKE_MAKE_PROGRAM=/usr/local/bin/ninja +ENV MAX_JOBS=${MAX_JOBS} +ENV LD_LIBRARY_PATH="/opt/venv/lib/python3.12/site-packages/torch/lib:${LD_LIBRARY_PATH}" +WORKDIR /app + +RUN echo "========== [OOT 1/7] Prepare build tools ==========" \ + && apt-get update \ + && apt --fix-broken install -y \ + && apt-get install -y --no-install-recommends git ca-certificates ninja-build vim \ + && mkdir -p /usr/local/bin \ + && ln -sf "$(command -v ninja)" /usr/local/bin/ninja \ + && /usr/local/bin/ninja --version \ + && rm -rf /var/lib/apt/lists/* + +RUN echo "========== [OOT 2/7] Verify base packages (atom/aiter/mori) ==========" \ + && "${VENV_PYTHON}" -m pip show atom || true \ + && "${VENV_PYTHON}" -m pip show amd-aiter || true \ + && "${VENV_PYTHON}" -m pip show mori || true + +RUN echo "========== [OOT 3/7] Clone vLLM ==========" \ + && git clone "${VLLM_REPO}" /app/vllm \ + && cd /app/vllm \ + && git checkout "${VLLM_COMMIT}" \ + && git submodule update --init --recursive \ + && echo "vLLM commit:" \ + && git rev-parse HEAD + +RUN echo "========== [OOT 4/7] Install vLLM ROCm build dependencies ==========" \ + && cd /app/vllm \ + && "${VENV_PYTHON}" -m pip install --upgrade pip \ + && sed -i -e '/xgrammar/d' -e '/compressed-tensors/d' requirements/common.txt \ + && "${VENV_PYTHON}" -m pip install --no-deps xgrammar==0.1.29 compressed-tensors==0.13.0 loguru \ + && sed -i -e '/peft/d' -e '/tensorizer/d' -e '/runai/d' -e '/timm/d' requirements/rocm.txt \ + && "${VENV_PYTHON}" -m pip install --no-deps peft tensorizer==2.10.1 runai-model-streamer[s3,gcs]==0.15.3 timm>=1.0.17 \ + && "${VENV_PYTHON}" -m pip install -r requirements/rocm.txt + +RUN echo "========== [OOT 5/7] Build and install amd-smi wheel ==========" \ + && cd /opt/rocm/share/amd_smi \ + && pip wheel . --wheel-dir=dist \ + && pip install dist/*.whl + +RUN echo "========== [OOT 6/7] Build vLLM wheel ==========" \ + && cd /app/vllm \ + && VLLM_TARGET_DEVICE=rocm "${VENV_PYTHON}" setup.py clean --all \ + && MAX_JOBS="${MAX_JOBS}" VLLM_TARGET_DEVICE=rocm "${VENV_PYTHON}" setup.py bdist_wheel --dist-dir=/tmp/vllm-wheels \ + && ls -lh /tmp/vllm-wheels + +RUN echo "========== [OOT 7/7] Install vLLM runtime dependencies ==========" \ + && cd /app/vllm \ + && "${VENV_PYTHON}" -m pip uninstall -y vllm || true \ + && "${VENV_PYTHON}" -m pip install /tmp/vllm-wheels/*.whl \ + && "${VENV_PYTHON}" -m pip install uvloop \ + && if [ "${INSTALL_LM_EVAL}" = "1" ]; then "${VENV_PYTHON}" -m pip install "lm-eval[api]"; else echo "Skip lm-eval install"; fi \ + && if [ "${INSTALL_FASTSAFETENSORS}" = "1" ]; then "${VENV_PYTHON}" -m pip install "git+https://github.com/foundation-model-stack/fastsafetensors.git"; else echo "Skip fastsafetensors install"; fi \ + && "${VENV_PYTHON}" -c "import glob, os, torch; print(f'torch.version.hip: {torch.version.hip}'); print(f'torch.version.cuda: {torch.version.cuda}'); torch_lib_dir=os.path.join(os.path.dirname(torch.__file__), 'lib'); print(f'torch lib dir: {torch_lib_dir}'); print(f'libtorch_hip candidates: {glob.glob(os.path.join(torch_lib_dir, \"libtorch_hip.so*\"))}'); assert torch.version.hip is not None, 'Torch is not ROCm build (torch.version.hip is None).'" \ + && "${VENV_PYTHON}" -m pip show vllm torch triton torchvision torchaudio amdsmi amd-aiter atom mori || true + +CMD ["/bin/bash"] + +# -------------------------------------------------------------------- +# ATOM image stage: original Dockerfile flow for atom-dev image. +# Build with: docker build --target atom_image --build-arg BASE_IMAGE=... +# -------------------------------------------------------------------- +FROM ${BASE_IMAGE} as atom_image ARG GPU_ARCH ENV GPU_ARCH_LIST=$GPU_ARCH diff --git a/docker/plugin/Dockerfile_OOT_vLLM b/docker/plugin/Dockerfile_OOT_vLLM deleted file mode 100644 index a02e50c7..00000000 --- a/docker/plugin/Dockerfile_OOT_vLLM +++ /dev/null @@ -1,105 +0,0 @@ -ARG BASE_IMAGE="rocm/atom-dev:latest" -FROM ${BASE_IMAGE} - -ARG VLLM_REPO="https://github.com/vllm-project/vllm.git" -ARG VLLM_COMMIT="b31e9326a7d9394aab8c767f8ebe225c65594b60" -ARG MAX_JOBS=64 -ARG INSTALL_LM_EVAL=1 -ARG INSTALL_FASTSAFETENSORS=1 -ARG VENV_PYTHON="/opt/venv/bin/python" - -ENV DEBIAN_FRONTEND=noninteractive -ENV PATH="/opt/venv/bin:${PATH}" -ENV VLLM_TARGET_DEVICE=rocm -ENV CMAKE_MAKE_PROGRAM=/usr/local/bin/ninja -ENV MAX_JOBS=${MAX_JOBS} -ENV LD_LIBRARY_PATH="/opt/venv/lib/python3.12/site-packages/torch/lib:${LD_LIBRARY_PATH}" -WORKDIR /app - -RUN echo "========== [1/7] Prepare build tools ==========" \ - && apt-get update \ - && apt --fix-broken install -y \ - && apt-get install -y --no-install-recommends git ca-certificates ninja-build vim \ - && mkdir -p /usr/local/bin \ - && ln -sf "$(command -v ninja)" /usr/local/bin/ninja \ - && /usr/local/bin/ninja --version \ - && rm -rf /var/lib/apt/lists/* - -RUN echo "========== [2/7] Verify base packages (atom/aiter/mori) ==========" \ - && "${VENV_PYTHON}" -m pip show atom || true \ - && "${VENV_PYTHON}" -m pip show amd-aiter || true \ - && "${VENV_PYTHON}" -m pip show mori || true - -RUN echo "========== [3/7] Clone vLLM ==========" \ - && git clone "${VLLM_REPO}" /app/vllm \ - && cd /app/vllm \ - && git checkout "${VLLM_COMMIT}" \ - && git submodule update --init --recursive \ - && echo "vLLM commit:" \ - && git rev-parse HEAD - -# Follow vLLM ROCm standard but DO NOT reinstall torch as already existing. -RUN echo "========== [4/7] Install vLLM ROCm build dependencies ==========" \ - && cd /app/vllm \ - && "${VENV_PYTHON}" -m pip install --upgrade pip \ - && sed -i -e '/xgrammar/d' -e '/compressed-tensors/d' requirements/common.txt \ - && "${VENV_PYTHON}" -m pip install --no-deps xgrammar==0.1.29 compressed-tensors==0.13.0 loguru \ - && sed -i -e '/peft/d' -e '/tensorizer/d' -e '/runai/d' -e '/timm/d' requirements/rocm.txt \ - && "${VENV_PYTHON}" -m pip install --no-deps peft tensorizer==2.10.1 runai-model-streamer[s3,gcs]==0.15.3 timm>=1.0.17 \ - && "${VENV_PYTHON}" -m pip install -r requirements/rocm.txt - - -RUN echo "========== [5/7] Build and install amd-smi wheel ==========" \ - && cd /opt/rocm/share/amd_smi \ - && pip wheel . --wheel-dir=dist \ - && pip install dist/*.whl - -RUN echo "========== [6/7] Build vLLM wheel ==========" \ - && cd /app/vllm \ - && VLLM_TARGET_DEVICE=rocm "${VENV_PYTHON}" setup.py clean --all \ - && MAX_JOBS="${MAX_JOBS}" VLLM_TARGET_DEVICE=rocm "${VENV_PYTHON}" setup.py bdist_wheel --dist-dir=/tmp/vllm-wheels \ - && ls -lh /tmp/vllm-wheels - -RUN echo "========== [7/7] Install vLLM runtime dependencies ==========" \ - && cd /app/vllm \ - && "${VENV_PYTHON}" -m pip uninstall -y vllm || true \ - && "${VENV_PYTHON}" -m pip install /tmp/vllm-wheels/*.whl \ - && "${VENV_PYTHON}" -m pip install uvloop - -RUN echo "========== [8/8] Optional tools and final checks ==========" \ - && if [ "${INSTALL_LM_EVAL}" = "1" ]; then "${VENV_PYTHON}" -m pip install "lm-eval[api]"; else echo "Skip lm-eval install"; fi \ - && if [ "${INSTALL_FASTSAFETENSORS}" = "1" ]; then "${VENV_PYTHON}" -m pip install "git+https://github.com/foundation-model-stack/fastsafetensors.git"; else echo "Skip fastsafetensors install"; fi \ - && "${VENV_PYTHON}" - <<'PY' -import importlib.metadata as m -import glob -import os -import torch - -print(f"torch.version.hip: {torch.version.hip}") -print(f"torch.version.cuda: {torch.version.cuda}") -torch_lib_dir = os.path.join(os.path.dirname(torch.__file__), "lib") -print(f"torch lib dir: {torch_lib_dir}") -print(f"libtorch_hip candidates: {glob.glob(os.path.join(torch_lib_dir, 'libtorch_hip.so*'))}") -if torch.version.hip is None: - raise RuntimeError("Torch is not ROCm build (torch.version.hip is None).") - -pkgs = [ - "vllm", - "torch", - "triton", - "torchvision", - "torchaudio", - "amdsmi", - "amd-aiter", - "atom", - "mori", -] -print("Final package versions:") -for p in pkgs: - try: - print(f" {p}: {m.version(p)}") - except Exception: - print(f" {p}: ") -PY - -CMD ["/bin/bash"] diff --git a/docker/plugin/build_OOT_vLLM.sh b/docker/plugin/build_OOT_vLLM.sh deleted file mode 100644 index a87483cb..00000000 --- a/docker/plugin/build_OOT_vLLM.sh +++ /dev/null @@ -1,90 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)" -LOG_DIR="${LOG_DIR:-${SCRIPT_DIR}/logs}" -LOG_FILE="${LOG_FILE:-${LOG_DIR}/build_OOT_vLLM_$(date +%Y%m%d_%H%M%S).log}" - -mkdir -p "${LOG_DIR}" -# Mirror all stdout/stderr to terminal and log file. -exec > >(tee -a "${LOG_FILE}") 2>&1 - -DOCKERFILE_PATH="${SCRIPT_DIR}/Dockerfile_OOT_vLLM" -BASE_IMAGE="${BASE_IMAGE:-rocm/atom-dev:latest}" -VLLM_REPO="${VLLM_REPO:-https://github.com/vllm-project/vllm.git}" -VLLM_COMMIT="${VLLM_COMMIT:-b31e9326a7d9394aab8c767f8ebe225c65594b60}" -VLLM_VERSION="${VLLM_VERSION:-0.17}" -VLLM_COMMIT_SHORT="$(printf '%s' "${VLLM_COMMIT}" | cut -c1-6)" -IMAGE_REPO="${IMAGE_REPO:-rocm/atom-vllm-dev}" -IMAGE_TAG="${IMAGE_TAG:-${IMAGE_REPO}:v${VLLM_VERSION}-${VLLM_COMMIT_SHORT}}" -MAX_JOBS="${MAX_JOBS:-64}" -INSTALL_LM_EVAL="${INSTALL_LM_EVAL:-1}" -PULL_BASE_IMAGE="${PULL_BASE_IMAGE:-1}" -BUILD_NO_CACHE="${BUILD_NO_CACHE:-1}" - -print_banner() { - echo "============================================================" - echo "$1" - echo "============================================================" -} - -print_banner "Build vLLM on top of ATOM base image" -echo "Log file : ${LOG_FILE}" -echo "Dockerfile : ${DOCKERFILE_PATH}" -echo "Build context : ${REPO_ROOT}" -echo "Target image : ${IMAGE_TAG}" -echo "Base image : ${BASE_IMAGE}" -echo "vLLM repo : ${VLLM_REPO}" -echo "vLLM version : ${VLLM_VERSION}" -echo "vLLM commit : ${VLLM_COMMIT}" -echo "commit short : ${VLLM_COMMIT_SHORT}" -echo "MAX_JOBS : ${MAX_JOBS}" -echo "INSTALL_LM_EVAL : ${INSTALL_LM_EVAL}" -echo "BUILD_NO_CACHE : ${BUILD_NO_CACHE}" -echo -echo "Build plan:" -echo " Step 1/4: (optional) pull base image" -echo " Step 2/4: check/remove existing target image" -echo " Step 3/4: build image from Dockerfile_OOT_vLLM" -echo " Step 4/4: print final image info" -echo - -if [[ "${PULL_BASE_IMAGE}" == "1" ]]; then - print_banner "Step 1/4 - Pull base image: ${BASE_IMAGE}" - docker pull "${BASE_IMAGE}" -else - print_banner "Step 1/4 - Skip base image pull (PULL_BASE_IMAGE=${PULL_BASE_IMAGE})" -fi - -print_banner "Step 2/4 - Check whether target image already exists" -if docker image inspect "${IMAGE_TAG}" >/dev/null 2>&1; then - echo "Target image already exists: ${IMAGE_TAG}" - docker image inspect "${IMAGE_TAG}" --format 'Existing image -> ID={{.Id}} Created={{.Created}}' - echo "Removing existing target image: ${IMAGE_TAG}" - docker image rm -f "${IMAGE_TAG}" -else - echo "Target image does not exist yet: ${IMAGE_TAG}" -fi -echo - -print_banner "Step 3/4 - Build target image: ${IMAGE_TAG}" -NO_CACHE_FLAG="" -if [[ "${BUILD_NO_CACHE}" == "1" ]]; then - NO_CACHE_FLAG="--no-cache" -fi - -DOCKER_BUILDKIT=1 docker build \ - ${NO_CACHE_FLAG} \ - -f "${DOCKERFILE_PATH}" \ - -t "${IMAGE_TAG}" \ - --build-arg "BASE_IMAGE=${BASE_IMAGE}" \ - --build-arg "VLLM_REPO=${VLLM_REPO}" \ - --build-arg "VLLM_COMMIT=${VLLM_COMMIT}" \ - --build-arg "MAX_JOBS=${MAX_JOBS}" \ - --build-arg "INSTALL_LM_EVAL=${INSTALL_LM_EVAL}" \ - "$@" \ - "${REPO_ROOT}" - -print_banner "Step 4/4 - Build completed" -docker image inspect "${IMAGE_TAG}" --format 'Image={{.RepoTags}} ID={{.Id}} Created={{.Created}}' From b768afd63ae3aeaecf8ebcf0c0df40a1f36f2519 Mon Sep 17 00:00:00 2001 From: zejunchen-zejun Date: Thu, 12 Mar 2026 15:42:39 +0800 Subject: [PATCH 09/15] add Signed-off-by: zejunchen-zejun --- .github/workflows/atom-vllm-oot-full-test.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/atom-vllm-oot-full-test.yaml b/.github/workflows/atom-vllm-oot-full-test.yaml index 02cce215..a1c2d046 100644 --- a/.github/workflows/atom-vllm-oot-full-test.yaml +++ b/.github/workflows/atom-vllm-oot-full-test.yaml @@ -1,10 +1,13 @@ -name: ATOM vLLM OOT Validation +name: ATOM vLLM OOT Full Validation on: workflow_dispatch: inputs: vllm_commit: description: "vLLM commit to validate" + # NOTE: For full validation, set this commit explicitly when your PR + # adapts ATOM to a newer vLLM version; using an old default commit can + # hide real compatibility issues. required: false type: string default: "b31e9326a7d9394aab8c767f8ebe225c65594b60" From 23d700c7405bc4a3cfbe3dd3fcbfac11455f9e72 Mon Sep 17 00:00:00 2001 From: zejunchen-zejun Date: Thu, 12 Mar 2026 15:54:22 +0800 Subject: [PATCH 10/15] add Signed-off-by: zejunchen-zejun --- .github/workflows/docker-release.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/docker-release.yaml b/.github/workflows/docker-release.yaml index 8f7c2082..b9a67887 100644 --- a/.github/workflows/docker-release.yaml +++ b/.github/workflows/docker-release.yaml @@ -179,10 +179,10 @@ jobs: run: | TAG=nightly_$(date +%Y%m%d) VLLM_VER="${{ inputs.vllm_version || env.VLLM_VERSION }}" - docker tag atom_oot_release:ci rocm/atom-vllm:latest - docker push rocm/atom-vllm:latest - docker tag atom_oot_release:ci rocm/atom-vllm-v${VLLM_VER}:${TAG} - docker push rocm/atom-vllm-v${VLLM_VER}:${TAG} + docker tag atom_oot_release:ci rocm/atom-dev-vllm:latest + docker push rocm/atom-dev-vllm:latest + docker tag atom_oot_release:ci rocm/atom-dev-vllm-v${VLLM_VER}:${TAG} + docker push rocm/atom-dev-vllm-v${VLLM_VER}:${TAG} - name: Clean Up if: always() From 4b7ac8f8046ca7f349b262b79cb5ff3723726350 Mon Sep 17 00:00:00 2001 From: zejunchen-zejun Date: Thu, 12 Mar 2026 16:03:49 +0800 Subject: [PATCH 11/15] make lint happy Signed-off-by: zejunchen-zejun --- tests/plugin/test_plugin_env_flags.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/plugin/test_plugin_env_flags.py b/tests/plugin/test_plugin_env_flags.py index 2e7888a1..6760cc3b 100644 --- a/tests/plugin/test_plugin_env_flags.py +++ b/tests/plugin/test_plugin_env_flags.py @@ -16,4 +16,3 @@ def test_disable_vllm_plugin_flag_disables_platform(monkeypatch): assert platform_module.ATOMPlatform is None assert register_module.register_platform() is None - From 7794a50d34d4e7bdcf174b499239417ca275088b Mon Sep 17 00:00:00 2001 From: zejunchen-zejun Date: Thu, 12 Mar 2026 17:09:08 +0800 Subject: [PATCH 12/15] add Signed-off-by: zejunchen-zejun --- .github/scripts/atom_oot_test.sh | 3 +-- .github/workflows/atom-vllm-oot-full-test.yaml | 5 ----- .github/workflows/atom-vllm-oot-test.yaml | 1 - .github/workflows/docker-release.yaml | 3 +++ 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/.github/scripts/atom_oot_test.sh b/.github/scripts/atom_oot_test.sh index ec6ce080..8785af69 100644 --- a/.github/scripts/atom_oot_test.sh +++ b/.github/scripts/atom_oot_test.sh @@ -47,7 +47,7 @@ CI_MODE_MODELS=( FULL_MODE_MODELS=( "Qwen3 Dense|Qwen/Qwen3-8B|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 1|0.70" "Qwen3 MoE|Qwen/Qwen3-235B-A22B-Instruct-2507-FP8|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8 --enable-expert-parallel|0.87" - "DeepSeek-V3 family|deepseek-ai/DeepSeek-R1-0528|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8|0.93" + "DeepSeek-V3 family|deepseek-ai/DeepSeek-R1-0528|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8|0.94" "GPT-OSS|openai/gpt-oss-120b|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 2 --enable-dp-attention --enable-expert-parallel --gpu-memory-utilization 0.3|0.38" "Kimi-K2|amd/Kimi-K2-Thinking-MXFP4|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8 --enable-expert-parallel|0.90" ) @@ -132,7 +132,6 @@ launch_one_model() { nohup vllm serve "${resolved_model_path}" \ --host "${VLLM_HOST}" \ --port "${VLLM_PORT}" \ - --disable-log-requests \ --async-scheduling \ --load-format fastsafetensors \ --max-model-len 16384 \ diff --git a/.github/workflows/atom-vllm-oot-full-test.yaml b/.github/workflows/atom-vllm-oot-full-test.yaml index a1c2d046..79200b4f 100644 --- a/.github/workflows/atom-vllm-oot-full-test.yaml +++ b/.github/workflows/atom-vllm-oot-full-test.yaml @@ -138,32 +138,27 @@ jobs: model_path: "Qwen/Qwen3-8B" extra_args: "--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 1" env_vars: "" - accuracy_test_threshold: "0.70" runner: linux-atom-mi355-1 - model_name: "Qwen3 MoE" model_path: "Qwen/Qwen3-235B-A22B-Instruct-2507-FP8" extra_args: "--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8 --enable-expert-parallel" env_vars: "" - accuracy_test_threshold: "0.87" runner: atom-mi355-8gpu.predownload - model_name: "DeepSeek-V3 family" model_path: "deepseek-ai/DeepSeek-R1-0528" extra_args: "--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8" env_vars: "" - accuracy_test_threshold: "0.93" runner: atom-mi355-8gpu.predownload - model_name: "GPT-OSS" model_path: "openai/gpt-oss-120b" extra_args: "--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 2 --enable-dp-attention --enable-expert-parallel --gpu-memory-utilization 0.3" env_vars: | ATOM_GPT_OSS_MODEL=1 - accuracy_test_threshold: "0.38" runner: linux-atom-mi355-4 - model_name: "Kimi-K2" model_path: "amd/Kimi-K2-Thinking-MXFP4" extra_args: "--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 8 --enable-expert-parallel" env_vars: "" - accuracy_test_threshold: "0.90" runner: atom-mi355-8gpu.predownload runs-on: ${{ matrix.runner }} timeout-minutes: 240 diff --git a/.github/workflows/atom-vllm-oot-test.yaml b/.github/workflows/atom-vllm-oot-test.yaml index f84a9842..cba67f6f 100644 --- a/.github/workflows/atom-vllm-oot-test.yaml +++ b/.github/workflows/atom-vllm-oot-test.yaml @@ -43,7 +43,6 @@ jobs: # Keep CI runtime under control: enable only one OOT model for now. - model_name: "Kimi-K2-Thinking-MXFP4" model_path: "amd/Kimi-K2-Thinking-MXFP4" - accuracy_test_threshold: "0.90" runner: atom-mi355-8gpu.predownload runs-on: ${{ matrix.runner }} timeout-minutes: 180 diff --git a/.github/workflows/docker-release.yaml b/.github/workflows/docker-release.yaml index b9a67887..1d95cffb 100644 --- a/.github/workflows/docker-release.yaml +++ b/.github/workflows/docker-release.yaml @@ -192,5 +192,8 @@ jobs: # Remove build and tagged images to free disk space docker rmi atom_release:ci || true docker rmi rocm/atom-dev:latest || true + docker rmi atom_oot_release:ci || true + docker rmi rocm/atom-dev-vllm:latest || true # Remove nightly tagged image if it exists docker images "rocm/atom-dev:nightly_*" -q | xargs -r docker rmi || true + docker images "rocm/atom-dev-vllm-v*:*" -q | xargs -r docker rmi || true From b8215da9f00a41e2e0157fc5ed2ee786dc51bbe8 Mon Sep 17 00:00:00 2001 From: zejunchen-zejun Date: Thu, 12 Mar 2026 17:32:32 +0800 Subject: [PATCH 13/15] add Signed-off-by: zejunchen-zejun --- .github/workflows/docker-release.yaml | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/.github/workflows/docker-release.yaml b/.github/workflows/docker-release.yaml index 1d95cffb..aad79fe6 100644 --- a/.github/workflows/docker-release.yaml +++ b/.github/workflows/docker-release.yaml @@ -28,11 +28,11 @@ on: type: string default: "atom-mi355-8gpu.predownload" build_oot_image: - description: "Build OOT vLLM image in addition to ATOM image" + description: "Build OOT vLLM image" type: boolean default: false oot_base_image: - description: "Base image for OOT build (empty means use local atom_release:ci)" + description: "Base image for OOT vLLM (empty means rocm/atom-dev:latest)" default: "" vllm_commit: description: "vLLM commit for OOT image" @@ -177,12 +177,10 @@ jobs: - name: Push OOT Docker image if: ${{ success() && inputs.build_oot_image == true }} run: | - TAG=nightly_$(date +%Y%m%d) VLLM_VER="${{ inputs.vllm_version || env.VLLM_VERSION }}" - docker tag atom_oot_release:ci rocm/atom-dev-vllm:latest - docker push rocm/atom-dev-vllm:latest - docker tag atom_oot_release:ci rocm/atom-dev-vllm-v${VLLM_VER}:${TAG} - docker push rocm/atom-dev-vllm-v${VLLM_VER}:${TAG} + OOT_TAG="vllm-v${VLLM_VER}-nightly_$(date +%Y%m%d)" + docker tag atom_oot_release:ci rocm/atom-dev:${OOT_TAG} + docker push rocm/atom-dev:${OOT_TAG} - name: Clean Up if: always() @@ -193,7 +191,6 @@ jobs: docker rmi atom_release:ci || true docker rmi rocm/atom-dev:latest || true docker rmi atom_oot_release:ci || true - docker rmi rocm/atom-dev-vllm:latest || true # Remove nightly tagged image if it exists docker images "rocm/atom-dev:nightly_*" -q | xargs -r docker rmi || true - docker images "rocm/atom-dev-vllm-v*:*" -q | xargs -r docker rmi || true + docker images "rocm/atom-dev:vllm-v*-nightly_*" -q | xargs -r docker rmi || true From 744bb8fde2ac981fb071db377e2319dc7f55ad14 Mon Sep 17 00:00:00 2001 From: zejunchen-zejun Date: Thu, 12 Mar 2026 17:51:50 +0800 Subject: [PATCH 14/15] add Signed-off-by: zejunchen-zejun --- .github/scripts/atom_oot_test.sh | 3 +++ docker/Dockerfile | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/scripts/atom_oot_test.sh b/.github/scripts/atom_oot_test.sh index 8785af69..4eff1dfc 100644 --- a/.github/scripts/atom_oot_test.sh +++ b/.github/scripts/atom_oot_test.sh @@ -40,6 +40,9 @@ ACCURACY_LOG_FILE=${ACCURACY_LOG_FILE:-/tmp/oot_accuracy_output.txt} # Format: # MODEL_NAME|MODEL_PATH|EXTRA_ARGS|THRESHOLD +# Note: CI runs Kimi-K2 with TP=4 on an 8-GPU runner to reduce runtime and +# improve CI stability. Full mode uses TP=8 on the same class of runner for +# higher-fidelity validation. CI_MODE_MODELS=( "Kimi-K2|amd/Kimi-K2-Thinking-MXFP4|--trust-remote-code --kv-cache-dtype fp8 --tensor-parallel-size 4 --enable-expert-parallel|0.90" ) diff --git a/docker/Dockerfile b/docker/Dockerfile index e97864ac..7d4f0a51 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -6,7 +6,7 @@ ARG GPU_ARCH="gfx942;gfx950" # OOT image stage: extends an ATOM base image with vLLM + OOT deps. # Build with: docker build --target oot_image --build-arg BASE_IMAGE=... # -------------------------------------------------------------------- -FROM ${BASE_IMAGE} as oot_image +FROM ${BASE_IMAGE} AS oot_image ARG MAX_JOBS=64 ARG VENV_PYTHON="/opt/venv/bin/python" From 5fa84b69bdd20a7b9d930c506f1a418d2eb15498 Mon Sep 17 00:00:00 2001 From: zejunchen-zejun Date: Thu, 12 Mar 2026 20:36:41 +0800 Subject: [PATCH 15/15] add Signed-off-by: zejunchen-zejun --- .github/scripts/atom_oot_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/scripts/atom_oot_test.sh b/.github/scripts/atom_oot_test.sh index 4eff1dfc..dd4a8577 100644 --- a/.github/scripts/atom_oot_test.sh +++ b/.github/scripts/atom_oot_test.sh @@ -169,7 +169,7 @@ accuracy_one_model() { echo "Threshold: ${threshold}" lm_eval --model local-completions \ - --model_args model="${resolved_model_path}",base_url="http://127.0.0.1:${VLLM_PORT}/v1/completions",num_concurrent=65,max_retries=1,tokenized_requests=False \ + --model_args model="${resolved_model_path}",base_url="http://127.0.0.1:${VLLM_PORT}/v1/completions",num_concurrent=65,max_retries=1,tokenized_requests=False,trust_remote_code=True \ --tasks gsm8k \ --num_fewshot 3 \ --output_path "${result_file}" 2>&1 | tee -a "${ACCURACY_LOG_FILE}"