Add CK-free fallback for fused QKNorm+RoPE+Cache #52
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: ATOM Test | |
| on: | |
| push: | |
| branches: [main] | |
| pull_request: | |
| branches: [main] # Triggers on PRs targeting `main` | |
| types: [opened, synchronize, reopened, ready_for_review] | |
| paths-ignore: | |
| - '**/*.md' | |
| - 'docs/**' | |
| - 'LICENSE' | |
| - '.gitignore' | |
| schedule: | |
| # Nightly at 00:00 Beijing time (16:00 UTC) | |
| - cron: '0 16 * * *' | |
| workflow_dispatch: | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} | |
| env: | |
| ATOM_BASE_NIGTHLY_IMAGE: rocm/atom-dev:latest | |
| GITHUB_REPO_URL: ${{ github.event.pull_request.head.repo.clone_url || 'https://github.com/ROCm/ATOM.git' }} | |
| GITHUB_COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.event.head_commit.id }} | |
| jobs: | |
| pre-checks: | |
| uses: ./.github/workflows/pre-checks.yaml | |
| with: | |
| black: true | |
| ruff: true | |
| build_atom_image: | |
| if: ${{ needs.pre-checks.result == 'success' && (!github.event.pull_request || github.event.pull_request.draft == false) }} | |
| needs: [pre-checks] | |
| name: Build ATOM image | |
| runs-on: build-only-atom | |
| steps: | |
| - name: Checkout ATOM repo | |
| if: ${{ !github.event.pull_request.head.repo.fork }} | |
| uses: actions/checkout@v4 | |
| - name: Generate Dockerfile | |
| if: ${{ !github.event.pull_request.head.repo.fork }} | |
| run: | | |
| cat <<EOF > Dockerfile.mod | |
| FROM ${{ env.ATOM_BASE_NIGTHLY_IMAGE }} | |
| RUN pip install -U lm-eval[api] | |
| RUN pip show lm-eval || true | |
| RUN pip install hf_transfer | |
| RUN pip show hf_transfer || true | |
| RUN echo "=== Aiter version BEFORE uninstall ===" && pip show amd-aiter || true | |
| RUN pip uninstall -y amd-aiter | |
| RUN pip install --upgrade "pybind11>=3.0.1" | |
| RUN pip show pybind11 | |
| RUN wget https://github.com/stedolan/jq/releases/download/jq-1.7/jq-linux64 -O jq | |
| RUN chmod +x jq | |
| RUN mv jq /usr/local/bin/jq | |
| RUN rm -rf /app/aiter-test | |
| RUN git clone --depth 1 https://github.com/ROCm/aiter.git /app/aiter-test && \\ | |
| cd /app/aiter-test && \\ | |
| git checkout HEAD && \\ | |
| git submodule sync && git submodule update --init --recursive && \\ | |
| MAX_JOBS=64 PREBUILD_KERNELS=0 GPU_ARCHS=gfx950 python3 setup.py develop | |
| RUN echo "=== Aiter version AFTER installation ===" && pip show amd-aiter || true | |
| RUN echo "=== ATOM version BEFORE uninstall ===" && pip show atom || true | |
| RUN pip uninstall -y atom | |
| RUN rm -rf /app/ATOM | |
| RUN git clone ${{ env.GITHUB_REPO_URL }} /app/ATOM && \\ | |
| cd /app/ATOM && \\ | |
| git checkout ${{ env.GITHUB_COMMIT_SHA }} && \\ | |
| pip install -e . | |
| RUN echo "=== ATOM version AFTER installation ===" && pip show atom || true | |
| EOF | |
| - name: Build Docker image | |
| if: ${{ !github.event.pull_request.head.repo.fork }} | |
| run: | | |
| docker build --network=host \ | |
| --no-cache \ | |
| -t atom_test:ci \ | |
| -f Dockerfile.mod . | |
| - name: Push Docker image | |
| if: ${{ !github.event.pull_request.head.repo.fork }} | |
| run: | | |
| IMAGE_TAG=rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }} | |
| docker tag atom_test:ci $IMAGE_TAG | |
| docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }} | |
| docker push $IMAGE_TAG | |
| - name: Success message | |
| if: ${{ !github.event.pull_request.head.repo.fork }} | |
| run: | | |
| echo "Successfully prepared image: $IMAGE_TAG" | |
| atom: | |
| needs: [pre-checks, build_atom_image] | |
| name: ATOM Test | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| include: | |
| # run_on_pr: true = run on all events; false = skip on PR (still runs on push/schedule/workflow_dispatch) | |
| - model_name: "Meta-Llama-3-8B-Instruct" | |
| model_path: "meta-llama/Meta-Llama-3-8B-Instruct" | |
| extraArgs: "--kv_cache_dtype fp8 --gpu-memory-utilization 0.3" | |
| env_vars: "" | |
| accuracy_test_threshold: "0.73" | |
| runner: linux-atom-mi355-1 | |
| run_on_pr: true | |
| - model_name: "Llama-3.3-70B-Instruct-MXFP4-Preview" | |
| model_path: "amd/Llama-3.3-70B-Instruct-MXFP4-Preview" | |
| extraArgs: "--kv_cache_dtype fp8 --gpu-memory-utilization 0.3" | |
| env_vars: "" | |
| accuracy_test_threshold: "0.88" | |
| runner: linux-atom-mi355-1 | |
| run_on_pr: true | |
| - model_name: "DeepSeek-R1-0528" | |
| model_path: "deepseek-ai/DeepSeek-R1-0528" | |
| extraArgs: "--kv_cache_dtype fp8 -tp 8" | |
| env_vars: "" | |
| accuracy_test_threshold: "0.94" | |
| runner: atom-mi355-8gpu.predownload | |
| run_on_pr: true | |
| - model_name: "DeepSeek-R1-0528 MTP" | |
| model_path: "deepseek-ai/DeepSeek-R1-0528" | |
| extraArgs: "--kv_cache_dtype fp8 -tp 8 --method mtp" | |
| env_vars: "" | |
| accuracy_test_threshold: "0.94" | |
| runner: atom-mi355-8gpu.predownload | |
| run_on_pr: true | |
| - model_name: "DeepSeek-R1-0528-FP4" | |
| model_path: "amd/DeepSeek-R1-0528-mtp-mxfp4" | |
| extraArgs: "--kv_cache_dtype fp8 -tp 8" | |
| env_vars: "" | |
| accuracy_test_threshold: "0.93" | |
| runner: atom-mi355-8gpu.predownload | |
| run_on_pr: true | |
| - model_name: "DeepSeek-R1-0528-FP4 MTP" | |
| model_path: "amd/DeepSeek-R1-0528-mtp-mxfp4" | |
| extraArgs: "--kv_cache_dtype fp8 -tp 8 --method mtp" | |
| env_vars: "" | |
| accuracy_test_threshold: "0.93" | |
| runner: atom-mi355-8gpu.predownload | |
| run_on_pr: true | |
| - model_name: "gpt-oss-120b" | |
| model_path: "openai/gpt-oss-120b" | |
| extraArgs: "--kv_cache_dtype fp8 --gpu-memory-utilization 0.3" | |
| env_vars: | | |
| ATOM_GPT_OSS_MODEL=1 | |
| accuracy_test_threshold: "0.38" | |
| runner: linux-atom-mi355-1 | |
| run_on_pr: true | |
| - model_name: "gpt-oss-120b (2 GPUs)" | |
| model_path: "openai/gpt-oss-120b" | |
| extraArgs: "--kv_cache_dtype fp8 -tp 2 --enable-dp-attention --enable-expert-parallel --gpu-memory-utilization 0.3" | |
| env_vars: | | |
| ATOM_GPT_OSS_MODEL=1 | |
| accuracy_test_threshold: "0.38" | |
| runner: linux-atom-mi355-4 | |
| run_on_pr: true | |
| - model_name: "Qwen3-235B-A22B-Instruct-2507-FP8" | |
| model_path: "Qwen/Qwen3-235B-A22B-Instruct-2507-FP8" | |
| extraArgs: "--kv_cache_dtype fp8 -tp 8 --enable-expert-parallel" | |
| env_vars: | | |
| ATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1 | |
| accuracy_test_threshold: "0.87" | |
| runner: atom-mi355-8gpu.predownload | |
| run_on_pr: true | |
| - model_name: "Qwen3-235B-A22B-Instruct-2507-MXFP4" | |
| model_path: "amd/Qwen3-235B-A22B-Instruct-2507-MXFP4" | |
| extraArgs: "--kv_cache_dtype fp8 -tp 8 --enable-expert-parallel" | |
| env_vars: | | |
| ATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1 | |
| accuracy_test_threshold: "0.87" | |
| runner: atom-mi355-8gpu.predownload | |
| run_on_pr: false | |
| - model_name: "Qwen3-Next-80B-A3B-Thinking" | |
| model_path: "Qwen/Qwen3-Next-80B-A3B-Thinking" | |
| extraArgs: "-tp 8" | |
| env_vars: "" | |
| accuracy_test_threshold: "0.65" | |
| runner: atom-mi355-8gpu.predownload | |
| run_on_pr: true | |
| if: ${{ needs.pre-checks.result == 'success' && (!github.event.pull_request || github.event.pull_request.draft == false) }} | |
| runs-on: ${{ matrix.runner }} | |
| env: | |
| CONTAINER_NAME: atom_test_${{ strategy.job-index }} | |
| steps: | |
| - name: Kill all Docker containers and clean up workspace | |
| if: (matrix.run_on_pr == true || github.event_name != 'pull_request') && matrix.runner == 'atom-mi355-8gpu.predownload' | |
| run: | | |
| echo "=== Cleaning up containers on $(hostname) ===" | |
| containers=$(docker ps -q) | |
| if [ -n "$containers" ]; then | |
| docker kill $containers || true | |
| fi | |
| docker run --rm -v "${GITHUB_WORKSPACE:-$PWD}":/workspace -w /workspace --privileged rocm/pytorch:latest bash -lc "ls -la /workspace/ && rm -rf /workspace/*" || true | |
| - name: Show Docker containers | |
| if: (matrix.run_on_pr == true || github.event_name != 'pull_request') && matrix.runner == 'atom-mi355-8gpu.predownload' | |
| run: docker ps -a | |
| - name: Show ROCm memory usage | |
| if: (matrix.run_on_pr == true || github.event_name != 'pull_request') && matrix.runner == 'atom-mi355-8gpu.predownload' | |
| run: rocm-smi --showmemuse | |
| - name: Show ROCm GPU processes | |
| if: (matrix.run_on_pr == true || github.event_name != 'pull_request') && matrix.runner == 'atom-mi355-8gpu.predownload' | |
| run: rocm-smi --showpidgpus | |
| - name: Checkout ATOM repo | |
| if: matrix.run_on_pr == true || github.event_name != 'pull_request' | |
| uses: actions/checkout@v4 | |
| - name: Docker Login | |
| if: (matrix.run_on_pr == true || github.event_name != 'pull_request') && !github.event.pull_request.head.repo.fork | |
| run: | | |
| docker login -u ${{ secrets.DOCKER_USERNAME }} -p ${{ secrets.DOCKER_PASSWORD }} | |
| - name: Generate Dockerfile for forked repo | |
| if: (matrix.run_on_pr == true || github.event_name != 'pull_request') && github.event.pull_request.head.repo.fork | |
| run: | | |
| cat <<EOF > Dockerfile.mod | |
| FROM ${{ env.ATOM_BASE_NIGTHLY_IMAGE }} | |
| RUN pip install -U lm-eval[api] | |
| RUN pip show lm-eval || true | |
| RUN pip install hf_transfer | |
| RUN pip show hf_transfer || true | |
| RUN echo "=== Aiter version BEFORE uninstall ===" && pip show amd-aiter || true | |
| RUN pip uninstall -y amd-aiter | |
| RUN pip install --upgrade "pybind11>=3.0.1" | |
| RUN pip show pybind11 | |
| RUN rm -rf /app/aiter-test | |
| RUN git clone https://github.com/ROCm/aiter.git /app/aiter-test && \\ | |
| cd /app/aiter-test && \\ | |
| git checkout HEAD && \\ | |
| git submodule sync && git submodule update --init --recursive && \\ | |
| MAX_JOBS=64 PREBUILD_KERNELS=0 GPU_ARCHS=gfx950 python3 setup.py develop | |
| RUN echo "=== Aiter version AFTER installation ===" && pip show amd-aiter || true | |
| RUN echo "=== ATOM version BEFORE uninstall ===" && pip show atom || true | |
| RUN pip uninstall -y atom | |
| RUN rm -rf /app/ATOM | |
| RUN git clone ${{ env.GITHUB_REPO_URL }} /app/ATOM && \\ | |
| cd /app/ATOM && \\ | |
| git checkout ${{ env.GITHUB_COMMIT_SHA }} && \\ | |
| pip install -e . | |
| RUN echo "=== ATOM version AFTER installation ===" && pip show atom || true | |
| EOF | |
| - name: Build Docker image for forked repo | |
| if: (matrix.run_on_pr == true || github.event_name != 'pull_request') && github.event.pull_request.head.repo.fork | |
| run: | | |
| docker build --network=host \ | |
| --no-cache \ | |
| -t atom_test:ci \ | |
| -f Dockerfile.mod . | |
| - name: Start CI container | |
| if: matrix.run_on_pr == true || github.event_name != 'pull_request' | |
| run: | | |
| echo "Clean up containers..." | |
| (docker ps -aq -f name="^${CONTAINER_NAME}$" | xargs -r docker stop) || true | |
| (docker ps -aq -f name="^${CONTAINER_NAME}$" | xargs -r docker rm) || true | |
| if [ -f "/etc/podinfo/gha-render-devices" ]; then | |
| DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices) | |
| else | |
| DEVICE_FLAG="--device /dev/dri" | |
| fi | |
| if [ -d "/models" ]; then | |
| MODEL_MOUNT="-v /models:/models" | |
| else | |
| echo "Warning: /models directory not found on runner; skipping /models mount and disabling model pre-download optimization." | |
| MODEL_MOUNT="" | |
| fi | |
| cat > /tmp/env_file.txt << 'EOF' | |
| ${{ matrix.env_vars }} | |
| EOF | |
| echo "Starting container: atom_test:ci" | |
| echo "Model-specific environment variables for ${{ matrix.model_name }}:" | |
| cat /tmp/env_file.txt | |
| if [ "${{ github.event.pull_request.head.repo.fork }}" = "true" ]; then | |
| IMAGE_TAG=atom_test:ci | |
| else | |
| IMAGE_TAG=rocm/atom-dev:pre-build-${{ env.GITHUB_COMMIT_SHA }} | |
| fi | |
| docker run -dt --device=/dev/kfd $DEVICE_FLAG \ | |
| -v "${GITHUB_WORKSPACE:-$PWD}":/workspace \ | |
| $MODEL_MOUNT \ | |
| -w /workspace \ | |
| --ipc=host --group-add video \ | |
| --shm-size=16G \ | |
| --privileged \ | |
| --cap-add=SYS_PTRACE \ | |
| -e HF_TOKEN="${HF_TOKEN:-}" \ | |
| --env-file /tmp/env_file.txt \ | |
| --security-opt seccomp=unconfined \ | |
| --ulimit memlock=-1 \ | |
| --ulimit stack=67108864 \ | |
| -e ATOM_DISABLE_MMAP=true \ | |
| -v "${{ github.workspace }}:/workspace" \ | |
| -w /workspace \ | |
| --name "$CONTAINER_NAME" \ | |
| $IMAGE_TAG | |
| env: | |
| GITHUB_WORKSPACE: ${{ github.workspace }} | |
| - name: Check shm size | |
| if: matrix.run_on_pr == true || github.event_name != 'pull_request' | |
| run: | | |
| docker exec "$CONTAINER_NAME" df -h /dev/shm | |
| - name: Download models | |
| if: matrix.run_on_pr == true || github.event_name != 'pull_request' | |
| run: | | |
| if [ -d "/models" ]; then | |
| echo "/models directory found, downloading model to /models/${{ matrix.model_path }}" | |
| if ! docker exec -e HF_TOKEN=${{ secrets.AMD_HF_TOKEN }} "$CONTAINER_NAME" bash -lc "hf download ${{ matrix.model_path }} --local-dir /models/${{ matrix.model_path }}"; then | |
| echo "Model download failed for '${{ matrix.model_path }}'. Aborting." | |
| exit 1 | |
| fi | |
| else | |
| echo "/models directory not found, skipping model download" | |
| fi | |
| - name: Run ATOM simple inference | |
| if: matrix.run_on_pr == true || github.event_name != 'pull_request' | |
| timeout-minutes: 30 | |
| run: | | |
| # Run the inference and capture output | |
| set -euo pipefail | |
| echo "" | |
| echo "========== Running test ==========" | |
| if [ -d "/models" ]; then | |
| model_path="/models/${{ matrix.model_path }}" | |
| else | |
| model_path="${{ matrix.model_path }}" | |
| fi | |
| echo "Model path: $model_path" | |
| ls -la $model_path || true | |
| # Print debug logs | |
| echo "========= Runner debug logs ===============" | |
| ps aux | |
| rocm-smi --showmemuse | |
| rocm-smi --showpids | |
| docker ps -a | |
| echo "========= End runner debug logs ===============" | |
| docker exec "$CONTAINER_NAME" bash -lc " | |
| set -euo pipefail | |
| python3 -m atom.examples.simple_inference \ | |
| --model \"$model_path\" \ | |
| ${{ matrix.extraArgs }} \ | |
| --temperature 0 \ | |
| | grep -E '^Prompt: |^Completion:' | |
| " > atom_test_output.txt | |
| echo "" | |
| echo "========== Showing test output below ==========" | |
| cat atom_test_output.txt | |
| - name: Compare output with golden outputs | |
| if: (matrix.run_on_pr == true || github.event_name != 'pull_request') && false | |
| timeout-minutes: 30 | |
| # TODO: skip for all test until it's fixed | |
| run: | | |
| echo "========== Comparing output with golden outputs ==========" | |
| if ! diff -u -B -w --strip-trailing-cr \ | |
| atom_test_output.txt \ | |
| ".github/workflows/golden_outputs/${{ matrix.model_name }}_golden_output.txt"; then | |
| echo "Failed: Output does not match golden outputs." | |
| exit 1 | |
| else | |
| echo "Success: Output matches golden outputs." | |
| fi | |
| - name: Run ATOM accuracy test | |
| if: matrix.run_on_pr == true || github.event_name != 'pull_request' | |
| timeout-minutes: 30 | |
| run: | | |
| set -euo pipefail | |
| echo "" | |
| echo "========== Launching ATOM server ==========" | |
| if [ -d "/models" ]; then | |
| model_path="/models/${{ matrix.model_path }}" | |
| else | |
| model_path="${{ matrix.model_path }}" | |
| fi | |
| docker exec "$CONTAINER_NAME" bash -lc " | |
| .github/scripts/atom_test.sh launch $model_path ${{ matrix.extraArgs }} | |
| " | |
| echo "" | |
| echo "========== Running accuracy test ==========" | |
| docker exec "$CONTAINER_NAME" bash -lc " | |
| .github/scripts/atom_test.sh accuracy $model_path | |
| " 2>&1 | tee atom_accuracy_output.txt | |
| - name: Check accuracy test results | |
| if: (matrix.run_on_pr == true || github.event_name != 'pull_request') && success() | |
| run: | | |
| result_file=$(ls -1t accuracy_test_results/*.json 2>/dev/null | head -n 1) | |
| if [ -z "$result_file" ] || [ ! -f "$result_file" ]; then | |
| echo "ERROR: No results JSON file found in accuracy_test_results/" | |
| exit 2 | |
| else | |
| echo "RESULT_FILE: $result_file" | |
| fi | |
| flexible_extract_value=$(jq '.results.gsm8k["exact_match,flexible-extract"]' "$result_file") | |
| echo "Flexible extract value: $flexible_extract_value" | |
| echo "Accuracy test threshold: ${{ matrix.accuracy_test_threshold }}" | |
| # Compare as float: use awk for decimal value comparison | |
| result=$(awk -v val="$flexible_extract_value" -v threshold="${{ matrix.accuracy_test_threshold }}" 'BEGIN {print (val < threshold) ? 1 : 0}') | |
| if [ "$result" -eq 1 ]; then | |
| echo "Accuracy test failed: Flexible extract value $flexible_extract_value is less than the threshold ${{ matrix.accuracy_test_threshold }}." | |
| exit 1 | |
| else | |
| echo "Accuracy test passed: Flexible extract value $flexible_extract_value is greater than or equal to the threshold ${{ matrix.accuracy_test_threshold }}." | |
| exit 0 | |
| fi | |
| - name: Collect Test Summary | |
| if: (matrix.run_on_pr == true || github.event_name != 'pull_request') && success() | |
| run: | | |
| echo "Accuracy Test Summary for ${{ matrix.model_name }}:" >> $GITHUB_STEP_SUMMARY | |
| awk '/\|Tasks\|Version\|/,/^$/ { if (NF > 0) print }' atom_accuracy_output.txt >> $GITHUB_STEP_SUMMARY | |
| - name: Upload output | |
| if: (matrix.run_on_pr == true || github.event_name != 'pull_request') && always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ${{ matrix.model_name }}_atom_test_output.txt | |
| path: atom_test_output.txt | |
| - name: Clean Up | |
| if: (matrix.run_on_pr == true || github.event_name != 'pull_request') && always() | |
| run: | | |
| # TODO: run a separate container for cleanup of the workspace due to permission issue to remove some pyc files under __pycache__ whose owners are root. | |
| # We should use non-root user to run the test to avoid this issue. | |
| set -x | |
| echo "========== Cleaning up workspace ==========" | |
| if [[ ${{ matrix.runner }} == atom-mi355-8gpu.predownload ]]; then | |
| docker run --rm -v "${GITHUB_WORKSPACE:-$PWD}":/workspace -w /workspace --privileged rocm/pytorch:latest bash -lc "ls -la /workspace/ && rm -rf /workspace/*" || true | |
| fi | |
| docker stop "$CONTAINER_NAME" || true | |
| docker rm "$CONTAINER_NAME" || true |