ATOM/.github/scripts/atom_test.sh at bb94f882e91e8f81f9a90e2786f77c6174591975 · ROCm/ATOM · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/bin/bash
set -euo pipefail

TYPE=${1:-launch}
MODEL_PATH=${2:-meta-llama/Meta-Llama-3-8B-Instruct}
EXTRA_ARGS=("${@:3}")


if [ "$TYPE" == "launch" ]; then
  echo ""
  echo "========== Launching ATOM server =========="
  PROFILER_ARGS=""
  if [ "${ENABLE_TORCH_PROFILER:-0}" == "1" ]; then
    PROFILER_ARGS="--torch-profiler-dir /app/trace"
    echo "Torch profiler enabled, trace output: /app/trace"
  fi
  ATOM_SERVER_LOG="/tmp/atom_server.log"
  python -m atom.entrypoints.openai_server --model "$MODEL_PATH" $PROFILER_ARGS "${EXTRA_ARGS[@]}" 2>&1 | tee "$ATOM_SERVER_LOG" &
  atom_server_pid=$!

  echo ""
  echo "========== Waiting for ATOM server to start =========="
  max_retries=30
  retry_interval=60
  for ((i=1; i<=max_retries; i++)); do
      if curl -s http://localhost:8000/v1/completions -o /dev/null; then
          echo "ATOM server is up."
          break
      fi
      echo "Waiting for ATOM server to be ready... ($i/$max_retries)"
      sleep $retry_interval
  done
  if ! curl -s http://localhost:8000/v1/completions -o /dev/null; then
      echo "ATOM server did not start after $((max_retries * retry_interval)) seconds."
      kill $atom_server_pid
      exit 1
  fi
fi

if [ "$TYPE" == "accuracy" ]; then
  echo ""
  if ! command -v lm_eval >/dev/null 2>&1; then
    echo "========== Installing lm-eval =========="
    pip install lm-eval[api]
  else
    echo "========== lm-eval already installed; skipping installation =========="
  fi

  echo ""
  echo "========== Running accuracy test =========="
  mkdir -p accuracy_test_results
  RESULT_FILENAME=accuracy_test_results/$(date +%Y%m%d%H%M%S).json
  lm_eval --model local-completions \
          --model_args model="$MODEL_PATH",base_url=http://localhost:8000/v1/completions,num_concurrent=65,max_retries=1,tokenized_requests=False \
          --tasks gsm8k \
          --num_fewshot 3 \
          --output_path "${RESULT_FILENAME}"
  echo "Accuracy test results saved to ${RESULT_FILENAME}"
  chmod -R 777 accuracy_test_results
fi

if [ "$TYPE" == "benchmark" ]; then
  echo ""
  echo "========== Cloning bench_serving =========="
  git clone https://github.com/kimbochen/bench_serving.git && chmod +x bench_serving/benchmark_serving.py
  echo "========== Running benchmark test =========="
  if [ "${ENABLE_TORCH_PROFILER:-0}" == "1" ]; then
    echo "Starting torch profiler..."
    curl -s -S -X POST http://127.0.0.1:8000/start_profile || echo "Warning: failed to start profiler"
  fi
  python bench_serving/benchmark_serving.py \
    --model=$MODEL_PATH --backend=vllm --base-url="http://localhost:8000" \
    --dataset-name=random \
    --random-input-len=$ISL --random-output-len=$OSL --random-range-ratio=$RANDOM_RANGE_RATIO \
    --max-concurrency=$CONC \
    --num-prompts=$(( $CONC * 10 )) \
    --trust-remote-code \
    --num-warmups=1 \
    --request-rate=inf --ignore-eos \
    --save-result --percentile-metrics="ttft,tpot,itl,e2el" \
    --result-dir=. --result-filename=${RESULT_FILENAME}.json

  if [ "${ENABLE_TORCH_PROFILER:-0}" == "1" ]; then
    echo "Stopping torch profiler..."
    curl -s -S -X POST http://127.0.0.1:8000/stop_profile || echo "Warning: failed to stop profiler"
    ATOM_SERVER_LOG="/tmp/atom_server.log"
    echo "Waiting for 'Profiler stopped.' in server log ..."
    profiler_done=false
    for i in $(seq 1 300); do
      if grep -q "Profiler stopped." "$ATOM_SERVER_LOG" 2>/dev/null; then
        echo "Profiler stopped after ${i}s"
        ls -lhR /app/trace/
        profiler_done=true
        break
      fi
      echo "Waiting for profiler to finish... ($i/300)"
      sleep 1
    done
    if [ "$profiler_done" = false ]; then
      echo "Warning: 'Profiler stopped.' not found in server log after 300s"
      ls -lhR /app/trace/ 2>/dev/null || true
    fi
  fi

  # Inject ISL/OSL into result JSON for summary table
  if [ -f "${RESULT_FILENAME}.json" ]; then
    jq --argjson isl "$ISL" --argjson osl "$OSL" \
      '. + {random_input_len: $isl, random_output_len: $osl}' \
      "${RESULT_FILENAME}.json" > "${RESULT_FILENAME}.tmp" && \
      mv "${RESULT_FILENAME}.tmp" "${RESULT_FILENAME}.json"
  fi
fi