Skip to content

Commit 649489d

Browse files
authored
Merge pull request #931 from SkqLiao/main
Add Human Eval Benchmark Test for CI/CD
2 parents 8be56a0 + bad334f commit 649489d

File tree

2 files changed

+159
-0
lines changed

2 files changed

+159
-0
lines changed

.github/workflows/score.yml

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
name: Human Eval Score KTransformers
2+
run-name: Human Eval Score KTransformers
3+
on: workflow_dispatch
4+
jobs:
5+
Human-Eval-Score-KTransformers:
6+
runs-on: self-hosted
7+
steps:
8+
- run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event."
9+
- run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}."
10+
- name: Check out repository code
11+
uses: actions/checkout@v4
12+
- run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner."
13+
- name: Human Eval Run
14+
run: |
15+
set -e
16+
source /home/qujing3/anaconda3/etc/profile.d/conda.sh
17+
conda activate ktransformers-dev
18+
export PATH=/usr/local/cuda-12.4/bin:$PATH
19+
export LD_LIBRARY_PATH=/usr/local/cuda-12.4/lib64:$LD_LIBRARY_PATH
20+
export CUDA_HOME=/usr/local/cuda-12.4
21+
cd ${{ github.workspace }}
22+
python ktransformers/tests/score.py
23+
24+
- run: echo "This job's status is ${{ job.status }}."

ktransformers/tests/score.py

+135
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
import subprocess
2+
import time
3+
import requests
4+
import sys
5+
import os
6+
7+
def wait_for_server(base_url: str, timeout: int = None) -> None:
8+
start_time = time.time()
9+
while True:
10+
try:
11+
response = requests.get(
12+
f"{base_url}/v1/models",
13+
headers={"Authorization": "Bearer None"},
14+
)
15+
if response.status_code == 200:
16+
time.sleep(5)
17+
print("Server is ready.")
18+
break
19+
except requests.exceptions.RequestException:
20+
time.sleep(1)
21+
if timeout and time.time() - start_time > timeout:
22+
raise TimeoutError("Server did not become ready within timeout period")
23+
24+
server_cmd = [
25+
"numactl", "-N", "1", "-m", "1",
26+
"/home/qujing3/anaconda3/envs/ktransformers-dev/bin/ktransformers",
27+
"--model_path", "/home/qujing3/models/DeepSeek-R1-Q4_K_M/config",
28+
"--gguf_path", "/home/qujing3/models/DeepSeek-R1-Q4_K_M/",
29+
"--port", "10002",
30+
"--cpu_infer", "64"
31+
]
32+
33+
print("Starting ktransformers server...")
34+
print(" ".join(server_cmd))
35+
with open("/tmp/server_log.txt", "w") as f:
36+
server_process = subprocess.Popen(server_cmd, stdout=f, stderr=f, text=True)
37+
38+
try:
39+
wait_for_server("http://localhost:10002", timeout=300)
40+
41+
eval_cmd = ["python", "ktransformers/tests/humaneval/eval_api.py"]
42+
print("Running eval_api.py...")
43+
print(f"Command: {' '.join(eval_cmd)}")
44+
45+
env = os.environ.copy()
46+
env["PYTHONUNBUFFERED"] = "1"
47+
48+
eval_process = subprocess.Popen(
49+
eval_cmd,
50+
stdout=subprocess.PIPE,
51+
stderr=subprocess.PIPE,
52+
text=True,
53+
bufsize=1,
54+
env=env,
55+
universal_newlines=True
56+
)
57+
58+
import threading
59+
import queue
60+
61+
def enqueue_output(out, queue):
62+
for line in iter(out.readline, ''):
63+
queue.put(line)
64+
out.close()
65+
66+
stdout_queue = queue.Queue()
67+
stderr_queue = queue.Queue()
68+
69+
stdout_thread = threading.Thread(target=enqueue_output, args=(eval_process.stdout, stdout_queue))
70+
stderr_thread = threading.Thread(target=enqueue_output, args=(eval_process.stderr, stderr_queue))
71+
72+
stdout_thread.daemon = True
73+
stderr_thread.daemon = True
74+
stdout_thread.start()
75+
stderr_thread.start()
76+
77+
while eval_process.poll() is None:
78+
try:
79+
line = stdout_queue.get_nowait()
80+
print(line, end='', flush=True)
81+
except queue.Empty:
82+
pass
83+
84+
try:
85+
line = stderr_queue.get_nowait()
86+
print(line, end='', file=sys.stderr, flush=True)
87+
except queue.Empty:
88+
pass
89+
90+
time.sleep(1)
91+
92+
while not stdout_queue.empty():
93+
print(stdout_queue.get(), end='', flush=True)
94+
while not stderr_queue.empty():
95+
print(stderr_queue.get(), end='', file=sys.stderr, flush=True)
96+
97+
eval_process.wait()
98+
print(f"eval_api.py completed with exit code: {eval_process.returncode}")
99+
100+
evaluate_cmd = [
101+
"evaluate_functional_correctness",
102+
"ktransformers/tests/humaneval/results/api/eval_b.jsonl"
103+
]
104+
print("Running evaluate_functional_correctness...")
105+
print(f"Command: {' '.join(evaluate_cmd)}")
106+
107+
evaluate_process = subprocess.Popen(
108+
evaluate_cmd,
109+
stdout=subprocess.PIPE,
110+
stderr=subprocess.PIPE,
111+
text=True,
112+
bufsize=1,
113+
universal_newlines=True
114+
)
115+
116+
for line in evaluate_process.stdout:
117+
print(line, end='', flush=True)
118+
for line in evaluate_process.stderr:
119+
print(line, end='', file=sys.stderr, flush=True)
120+
121+
evaluate_process.wait()
122+
123+
print(f"evaluate_functional_correctness completed with exit code: {evaluate_process.returncode}")
124+
if evaluate_process.returncode != 0:
125+
print(f"evaluate_functional_correctness exited with code {evaluate_process.returncode}")
126+
sys.exit(evaluate_process.returncode)
127+
128+
finally:
129+
print("Stopping ktransformers server...")
130+
server_process.terminate()
131+
try:
132+
server_process.wait(timeout=30)
133+
except subprocess.TimeoutExpired:
134+
print("Server did not terminate gracefully, forcing...")
135+
server_process.kill()

0 commit comments

Comments
 (0)