ROCm · gyohuangxin · Mar 10, 2026 · Mar 10, 2026 · Mar 10, 2026 · Mar 10, 2026
diff --git a/.github/scripts/atom_test.sh b/.github/scripts/atom_test.sh
@@ -1,111 +1,217 @@
 #!/bin/bash
-set -euo pipefail
-
-TYPE=${1:-launch}
-MODEL_PATH=${2:-meta-llama/Meta-Llama-3-8B-Instruct}
-EXTRA_ARGS=("${@:3}")
-
-
-if [ "$TYPE" == "launch" ]; then
-  echo ""
-  echo "========== Launching ATOM server =========="
-  PROFILER_ARGS=""
-  if [ "${ENABLE_TORCH_PROFILER:-0}" == "1" ]; then
-    PROFILER_ARGS="--torch-profiler-dir /app/trace"
-    echo "Torch profiler enabled, trace output: /app/trace"
-  fi
-  ATOM_SERVER_LOG="/tmp/atom_server.log"
-  python -m atom.entrypoints.openai_server --model "$MODEL_PATH" $PROFILER_ARGS "${EXTRA_ARGS[@]}" 2>&1 | tee "$ATOM_SERVER_LOG" &
-  atom_server_pid=$!
-
-  echo ""
-  echo "========== Waiting for ATOM server to start =========="
-  max_retries=30
-  retry_interval=60
-  for ((i=1; i<=max_retries; i++)); do
-      if curl -s http://localhost:8000/v1/completions -o /dev/null; then
-          echo "ATOM server is up."
-          break
-      fi
-      echo "Waiting for ATOM server to be ready... ($i/$max_retries)"
-      sleep $retry_interval
-  done
-  if ! curl -s http://localhost:8000/v1/completions -o /dev/null; then
-      echo "ATOM server did not start after $((max_retries * retry_interval)) seconds."
-      kill $atom_server_pid
-      exit 1
-  fi
+
+#############################################
+# GPU Load Test for DeepSeek-R1 Model
+# Clean version with improved table output
+#############################################
+
+MODEL_NAME="deepseek-ai/DeepSeek-R1-0528"
+MODEL_LOCAL_PATH="/models/deepseek-ai/DeepSeek-R1-0528"
+TENSOR_PARALLEL=8
+KV_CACHE_DTYPE="fp8"
+TEMPERATURE=0
+
+LOG_FILE="/tmp/gpu_load_test_$(hostname)_$(date +%Y%m%d_%H%M%S).log"
+
+echo "========================================="
+echo "GPU Load Test - DeepSeek-R1"
+echo "========================================="
+echo "Hostname: $(hostname)"
+echo "Date: $(date)"
+echo "Log: $LOG_FILE"
+echo ""
+
+# Check if model exists locally
+if [ -f "$MODEL_LOCAL_PATH/config.json" ]; then
+    echo "✅ Found model at: $MODEL_LOCAL_PATH"
+    MODEL_PATH="$MODEL_LOCAL_PATH"
+else
+    echo "⚠️  Model not found locally, will download: $MODEL_NAME"
+    MODEL_PATH="$MODEL_NAME"
 fi
+echo ""
+
+# GPU Status
+if command -v rocm-smi &> /dev/null; then
+    GPU_COUNT=$(rocm-smi --showid 2>/dev/null | grep -c 'GPU' || echo '8')
+    echo "GPU Count: $GPU_COUNT"
+    echo ""
+fi
+
+# Remove existing container
+docker rm -f atom_inference 2>/dev/null
+
+# Run the test
+echo "Starting model load test..."
+echo ""
+
+docker run \
+  --name atom_inference \
+  --network=host \
+  --device=/dev/kfd \
+  --device=/dev/dri \
+  --group-add video \
+  --cap-add=SYS_PTRACE \
+  --security-opt seccomp=unconfined \
+  -v /data:/data \
+  -e HF_HOME=/data/huggingface_cache \
+  -e NCCL_DEBUG=WARN \
+  -e RCCL_DEBUG=WARN \
+  --shm-size=16G \
+  --ulimit memlock=-1 \
+  --ulimit stack=67108864 \
+  rocm/atom-dev:latest \
+  bash -c '
+    MODEL_RUNNER="/app/ATOM/atom/model_engine/model_runner.py"
+
+    # Add timing instrumentation
+    if ! grep -q "^import time$" "$MODEL_RUNNER"; then
+        sed -i "1a import time" "$MODEL_RUNNER"
+    fi
+
+    # Instrument model loading
+    sed -i "/load_model(self.model, config.model, config.hf_config, config.load_dummy)/i\\
+        load_start_time = time.time()\\
+        logger.info(f\"[LOAD_START] GPU {self.rank} | Time: {load_start_time:.6f}\")" \
+        "$MODEL_RUNNER"
+
+    sed -i "/load_model(self.model, config.model, config.hf_config, config.load_dummy)/a\\
+        load_elapsed = time.time() - load_start_time\\
+        logger.info(f\"[LOAD_DONE] GPU {self.rank} | Duration: {load_elapsed:.2f}s\")" \
+        "$MODEL_RUNNER"
+
+    # Run inference
+    python3 -m atom.examples.simple_inference \
+      --model "'"$MODEL_PATH"'" \
+      --kv_cache_dtype "'"$KV_CACHE_DTYPE"'" \
+      -tp "'"$TENSOR_PARALLEL"'" \
+      --temperature "'"$TEMPERATURE"'"
+  ' 2>&1 | tee "$LOG_FILE"
+
+# Analyze results
+echo ""
+echo "========================================="
+echo "GPU LOAD TIME ANALYSIS"
+echo "========================================="
+echo ""
+
+# Check if test completed
+LOAD_COUNT=$(grep -c "\[LOAD_DONE\]" "$LOG_FILE" 2>/dev/null || echo 0)
 
-if [ "$TYPE" == "accuracy" ]; then
-  echo ""
-  if ! command -v lm_eval >/dev/null 2>&1; then
-    echo "========== Installing lm-eval =========="
-    pip install lm-eval[api]
-  else
-    echo "========== lm-eval already installed; skipping installation =========="
-  fi
-
-  echo ""
-  echo "========== Running accuracy test =========="
-  mkdir -p accuracy_test_results
-  RESULT_FILENAME=accuracy_test_results/$(date +%Y%m%d%H%M%S).json
-  lm_eval --model local-completions \
-          --model_args model="$MODEL_PATH",base_url=http://localhost:8000/v1/completions,num_concurrent=65,max_retries=1,tokenized_requests=False \
-          --tasks gsm8k \
-          --num_fewshot 3 \
-          --output_path "${RESULT_FILENAME}"
-  echo "Accuracy test results saved to ${RESULT_FILENAME}"
-  chmod -R 777 accuracy_test_results
+if [ "$LOAD_COUNT" -eq 0 ]; then
+    echo "❌ Test failed - no GPU load completion found"
+    echo "Check log: $LOG_FILE"
+    exit 1
 fi
 
-if [ "$TYPE" == "benchmark" ]; then
-  echo ""
-  echo "========== Cloning bench_serving =========="
-  git clone https://github.com/kimbochen/bench_serving.git && chmod +x bench_serving/benchmark_serving.py
-  echo "========== Running benchmark test =========="
-  if [ "${ENABLE_TORCH_PROFILER:-0}" == "1" ]; then
-    echo "Starting torch profiler..."
-    curl -s -S -X POST http://127.0.0.1:8000/start_profile || echo "Warning: failed to start profiler"
-  fi
-  python bench_serving/benchmark_serving.py \
-    --model=$MODEL_PATH --backend=vllm --base-url="http://localhost:8000" \
-    --dataset-name=random \
-    --random-input-len=$ISL --random-output-len=$OSL --random-range-ratio=$RANDOM_RANGE_RATIO \
-    --max-concurrency=$CONC \
-    --num-prompts=$(( $CONC * 10 )) \
-    --trust-remote-code \
-    --request-rate=inf --ignore-eos \
-    --save-result --percentile-metrics="ttft,tpot,itl,e2el" \
-    --result-dir=. --result-filename=${RESULT_FILENAME}.json
-
-  if [ "${ENABLE_TORCH_PROFILER:-0}" == "1" ]; then
-    echo "Stopping torch profiler..."
-    curl -s -S -X POST http://127.0.0.1:8000/stop_profile || echo "Warning: failed to stop profiler"
-    ATOM_SERVER_LOG="/tmp/atom_server.log"
-    echo "Waiting for 'Profiler stopped.' in server log ..."
-    profiler_done=false
-    for i in $(seq 1 300); do
-      if grep -q "Profiler stopped." "$ATOM_SERVER_LOG" 2>/dev/null; then
-        echo "Profiler stopped after ${i}s"
-        ls -lhR /app/trace/
-        profiler_done=true
-        break
-      fi
-      echo "Waiting for profiler to finish... ($i/300)"
-      sleep 1
-    done
-    if [ "$profiler_done" = false ]; then
-      echo "Warning: 'Profiler stopped.' not found in server log after 300s"
-      ls -lhR /app/trace/ 2>/dev/null || true
+# Extract GPU load times into array
+declare -A gpu_times
+while IFS= read -r line; do
+    # Extract GPU number and duration
+    gpu_num=$(echo "$line" | grep -oP 'GPU \K\d+')
+    duration=$(echo "$line" | grep -oP 'Duration: \K[0-9.]+')
+
+    if [ -n "$gpu_num" ] && [ -n "$duration" ]; then
+        gpu_times[$gpu_num]=$duration
+    fi
+done < <(grep "\[LOAD_DONE\]" "$LOG_FILE")
+
+# Find min and max times
+min_time=""
+max_time=""
+for gpu in "${!gpu_times[@]}"; do
+    time=${gpu_times[$gpu]}
+    if [ -z "$min_time" ] || (( $(awk -v t="$time" -v m="$min_time" 'BEGIN {print (t < m)}') )); then
+        min_time=$time
     fi
-  fi
-
-  # Inject ISL/OSL into result JSON for summary table
-  if [ -f "${RESULT_FILENAME}.json" ]; then
-    jq --argjson isl "$ISL" --argjson osl "$OSL" \
-      '. + {random_input_len: $isl, random_output_len: $osl}' \
-      "${RESULT_FILENAME}.json" > "${RESULT_FILENAME}.tmp" && \
-      mv "${RESULT_FILENAME}.tmp" "${RESULT_FILENAME}.json"
-  fi
-fi
+    if [ -z "$max_time" ] || (( $(awk -v t="$time" -v m="$max_time" 'BEGIN {print (t > m)}') )); then
+        max_time=$time
+    fi
+done
+
+# Sort GPUs by load time
+sorted_gpus=$(for gpu in "${!gpu_times[@]}"; do
+    echo "$gpu ${gpu_times[$gpu]}"
+done | sort -k2 -n)
+
+# Print table header
+echo "┌───────┬───────────┬──────────────┬──────────────────────┐"
+echo "│  GPU  │ Load Time │    Status    │  Delta from Fastest  │"
+echo "├───────┼───────────┼──────────────┼──────────────────────┤"
+
+# Print each GPU
+fastest_printed=false
+while IFS= read -r line; do
+    gpu=$(echo "$line" | awk '{print $1}')
+    time=$(echo "$line" | awk '{print $2}')
+
+    # Calculate delta
+    delta=$(awk -v t="$time" -v m="$min_time" 'BEGIN {printf "%.2f", t - m}')
+
+    # Determine status
+    if [ "$fastest_printed" = false ]; then
+        status="✅ Fastest"
+        delta_str="-"
+        fastest_printed=true
+    elif (( $(awk -v d="$delta" 'BEGIN {print (d < 1)}') )); then
+        status="✅ Excellent"
+        delta_str=$(printf "+%.2fs" "$delta")
+    elif (( $(awk -v d="$delta" 'BEGIN {print (d < 5)}') )); then
+        status="✅ Good"
+        delta_str=$(printf "+%.2fs" "$delta")
+    elif (( $(awk -v d="$delta" 'BEGIN {print (d < 10)}') )); then
+        status="⚠️  Moderate"
+        delta_str=$(printf "+%.2fs" "$delta")
+    elif (( $(awk -v d="$delta" 'BEGIN {print (d < 100)}') )); then
+        status="❌ SLOW"
+        multiplier=$(awk -v t="$time" -v m="$min_time" 'BEGIN {printf "%.1f", t / m}')
+        delta_str=$(printf "+%.0fs (%.1fx slower!)" "$delta" "$multiplier")
+    else
+        status="❌ VERY SLOW"
+        multiplier=$(awk -v t="$time" -v m="$min_time" 'BEGIN {printf "%.1f", t / m}')
+        delta_str=$(printf "+%.0fs (%.1fx slower!)" "$delta" "$multiplier")
+    fi
+
+    printf "│ GPU %1s │ %-9s │ %-12s │ %-20s │\n" "$gpu" "${time}s" "$status" "$delta_str"
+    echo "├───────┼───────────┼──────────────┼──────────────────────┤"
+done <<< "$sorted_gpus"
+
+# Table footer
+echo "└───────┴───────────┴──────────────┴──────────────────────┘"
+echo ""
+
+# Summary statistics
+avg_time=$(awk 'BEGIN {sum=0; count=0} {sum+=$2; count++} END {if(count>0) printf "%.2f", sum/count}' <<< "$sorted_gpus")
+delta_range=$(awk -v max="$max_time" -v min="$min_time" 'BEGIN {printf "%.2f", max - min}')
+variance=$(awk -v max="$max_time" -v min="$min_time" -v avg="$avg_time" 'BEGIN {if(avg>0) printf "%.2f", ((max - min) / avg) * 100; else print "0"}')
+
+echo "Summary:"
+echo "  GPUs Tested:  $LOAD_COUNT"
+echo "  Fastest:      ${min_time}s"
+echo "  Slowest:      ${max_time}s"
+echo "  Average:      ${avg_time}s"
+echo "  Delta:        ${delta_range}s"
+echo "  Variance:     ${variance}%"
+echo ""
+
+# Overall assessment
+if (( $(awk -v d="$delta_range" 'BEGIN {print (d < 1)}') )); then
+    echo "✅ EXCELLENT - All GPUs load within 1s of each other"
+elif (( $(awk -v d="$delta_range" 'BEGIN {print (d < 5)}') )); then
+    echo "✅ GOOD - GPUs load within 5s variance"
+elif (( $(awk -v d="$delta_range" 'BEGIN {print (d < 10)}') )); then
+    echo "⚠️  MODERATE - Some variance detected (${delta_range}s)"
+else
+    echo "❌ HIGH VARIANCE - Investigate slow GPUs (${delta_range}s difference)"
+    echo ""
+    echo "Recommended actions:"
+    echo "  1. Check which GPUs are slow (see table above)"
+    echo "  2. Run test again to verify consistency"
+    echo "  3. Check firmware versions: rocm-smi --showfw"
+    echo "  4. Check PCIe links: lspci -vv | grep LnkSta"
+fi
+
+echo ""
+echo "Full log: $LOG_FILE"
+echo "========================================="
+