Skip to content

Commit 8287746

Browse files
committed
Rewrite AI benchmark to default to llama.cpp, break out variables into group_data/all.py.
1 parent d491318 commit 8287746

File tree

7 files changed

+200
-61
lines changed

7 files changed

+200
-61
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ I often test a variety of other board-specific features, too, though it depends
3030

3131
The benchmark scripts are run using `pyinfra`. It can be installed with `pip3 install pyinfra`.
3232

33-
Inside the `benchmark` directory, modify `inventory.py` to point at the system under test, and run:
33+
Inside the `benchmark` directory, modify `inventory.py` to point at the system under test, modify `group_data/all.py` with the variables appropriate for your system, and run:
3434

3535
```
3636
pyinfra inventory.py main.py -y

benchmark/group_data/all.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Inventory variables applied to all hosts.
2+
3+
# These two variables, when multiplied, should generally equal the core count.
4+
hpl_ps = 1
5+
hpl_qs = 4
6+
7+
# PHP version available in system package manager (used for PTS installation).
8+
php_version = "8.3"
9+
10+
# Select from 'llama.cpp' or 'ollama'.
11+
ai_benchmark = 'llama.cpp'
12+
13+
# llama.cpp build options (e.g. '-DGGML_VULKAN=1' or '-DGGML_CUDA=1')
14+
llama_cpp_build_opts = '-DGGML_VULKAN=1'
15+
# For Nvidia DGX Spark / GB10 systems:
16+
# llama_cpp_build_opts = '-DGGML_CUDA=1 -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc'
17+
18+
# https://github.com/ggml-org/llama.cpp/blob/master/tools/llama-bench/README.md
19+
llama_bench_opts = '-n 128 -p 512,4096 -pg 4096,128 -ngl 99 -r 2'
20+
21+
# Select which models to benchmark. Ideally they will run entirely in VRAM.
22+
# The `urls` list can include multiple URLs for larger multi-part models.
23+
llama_cpp_models = {
24+
'tinyllama-1.1b-1t-openorca.Q4_K_M.gguf': {
25+
'urls': ['https://huggingface.co/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/resolve/main/tinyllama-1.1b-1t-openorca.Q4_K_M.gguf'],
26+
'size_in_gb': 0.7,
27+
},
28+
'Llama-3.2-3B-Instruct-Q4_K_M.gguf': {
29+
'urls': ['https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct-Q4_K_M.gguf'],
30+
'size_in_gb': 1.9,
31+
},
32+
# 'llama-2-13b.Q4_K_M.gguf': {
33+
# 'urls': ['https://huggingface.co/TheBloke/Llama-2-13B-GGUF/resolve/main/llama-2-13b.Q4_K_M.gguf'],
34+
# 'size_in_gb': 7.87,
35+
# },
36+
# 'DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf': {
37+
# 'urls': ['https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-14B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf'],
38+
# 'size_in_gb': 9.0,
39+
# },
40+
# 'gpt-oss-20b-Q4_K_M.gguf': {
41+
# 'urls': ['https://huggingface.co/unsloth/gpt-oss-20b-GGUF/resolve/main/gpt-oss-20b-Q4_K_M.gguf'],
42+
# 'size_in_gb': 11.6,
43+
# },
44+
# 'Qwen3-30B-A3B-Q4_K_M.gguf': {
45+
# 'urls': ['https://huggingface.co/bartowski/Qwen_Qwen3-32B-GGUF/resolve/main/Qwen_Qwen3-32B-Q4_K_M.gguf'],
46+
# 'size_in_gb': 18.6,
47+
# },
48+
# 'Meta-Llama-3.1-70B-Instruct-Q4_K_M.gguf': {
49+
# 'urls': ['https://huggingface.co/bartowski/Meta-Llama-3.1-70B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-70B-Instruct-Q4_K_M.gguf'],
50+
# 'size_in_gb': 42.5,
51+
# },
52+
# 'gpt-oss-120b-Q4_K_M-00001-of-00002.gguf': {
53+
# 'urls': [
54+
# 'https://huggingface.co/unsloth/gpt-oss-120b-GGUF/resolve/main/Q4_K_M/gpt-oss-120b-Q4_K_M-00001-of-00002.gguf',
55+
# 'https://huggingface.co/unsloth/gpt-oss-120b-GGUF/resolve/main/Q4_K_M/gpt-oss-120b-Q4_K_M-00002-of-00002.gguf',
56+
# ],
57+
# 'size_in_gb': 62.9,
58+
# },
59+
}

benchmark/inventory.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
hosts = (
2-
["framework-13.local"],
2+
["10.0.2.202"],
33
{"ssh_user": "jgeerling"},
44
)

benchmark/main.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@
88

99
pause_seconds = 60
1010
tasks = [
11-
'tinymembench.py',
12-
'geekbench.py',
13-
'disk-benchmark.py',
14-
'top500.py',
15-
'sbc-general-benchmark.py',
11+
#'tinymembench.py',
12+
#'geekbench.py',
13+
#'disk-benchmark.py',
14+
#'top500.py',
15+
#'sbc-general-benchmark.py',
1616
'ai-benchmark.py',
1717
]
1818

benchmark/tasks/ai-benchmark.py

Lines changed: 131 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,63 +1,143 @@
1+
import os
12
from pyinfra import host, logger
23
from pyinfra.facts.files import File
34
from pyinfra.facts.hardware import Memory
4-
from pyinfra.facts.server import Arch, Home
5-
from pyinfra.operations import files, git, python, server
5+
from pyinfra.facts.server import Arch, Home, LinuxName
6+
from pyinfra.operations import apt, dnf, files, git, python, server
7+
from urllib.parse import urlparse
68

79
host_ram_size=host.get_fact(Memory)
810
working_dir=host.get_fact(Home) + "/Downloads"
9-
ollama_models={
10-
'llama3.2:3b': 2000,
11-
'llama3.1:8b': 4900,
12-
'llama2:13b': 7400,
13-
'deepseek-r1:1.5b': 1100,
14-
'deepseek-r1:8b': 4900,
15-
'deepseek-r1:14b': 9000,
16-
'deepseek-r1:70b': 43000,
17-
}
18-
19-
files.download(
20-
name="Download Ollama Install Script",
21-
src="https://ollama.com/install.sh",
22-
dest="{}/install.sh".format(working_dir),
23-
)
24-
25-
# Install Ollama if necessary (but not on RISC-V, for now). For RISC-V, see:
26-
# https://github.com/geerlingguy/sbc-reviews/issues/65#issuecomment-2637866212
27-
host_arch = host.get_fact(Arch)
28-
if not host_arch == 'riscv64':
29-
if not host.get_fact(File, path='/usr/local/bin/ollama'):
30-
server.shell(
31-
name="Run Ollama Install Script",
32-
commands="sh {}/install.sh".format(working_dir),
33-
)
3411

35-
git.repo(
36-
name="Clone ai-benchmarks with git.",
37-
src="https://github.com/geerlingguy/ai-benchmarks.git",
38-
dest="{}/ai-benchmarks".format(working_dir),
39-
)
40-
41-
def ollama_loop_callback():
42-
for model, model_size in ollama_models.items():
43-
# Skip a model if it's larger than the system RAM.
44-
if (host_ram_size - (host_ram_size / 8)) < model_size:
45-
logger.info(f"\nSkipping model {model} as it is too large.\n\n")
46-
continue
47-
48-
server.shell(
49-
name="Download Ollama model: {}".format(model),
50-
commands="ollama pull {}".format(model),
12+
if host.data.ai_benchmark == 'llama.cpp':
13+
linux_name=host.get_fact(LinuxName)
14+
15+
if linux_name in ["Debian", "Ubuntu"]:
16+
apt.packages(
17+
name="Ensure prerequisites are installed (Debian).",
18+
packages=[
19+
"libvulkan-dev",
20+
"glslc",
21+
"cmake",
22+
"libcurl4-openssl-dev",
23+
],
24+
_sudo=True,
5125
)
5226

53-
ollama_benchmark_result = server.shell(
54-
name="Benchmark Ollama model: {}".format(model),
55-
commands="{}/ai-benchmarks/obench.sh -m {} -c 3 --markdown".format(working_dir, model),
27+
if linux_name in ["CentOS", "RedHat", "Fedora"]:
28+
dnf.packages(
29+
name="Ensure prerequisites are installed (RedHat).",
30+
packages=[
31+
"vulkan-loader-devel",
32+
"vulkan-validation-layers-devel",
33+
"vulkan-tools",
34+
"glslc",
35+
"cmake",
36+
"libcurl-devel",
37+
],
38+
_sudo=True,
5639
)
5740

58-
logger.info(f"\n{ollama_benchmark_result.stdout}\n\n")
41+
git.repo(
42+
name="Clone llama.cpp with git.",
43+
src="https://github.com/ggerganov/llama.cpp.git",
44+
dest="{}/llama.cpp".format(working_dir),
45+
)
46+
47+
llama_cpp_build_opts=host.data.llama_cpp_build_opts
48+
server.shell(
49+
name="Build llama.cpp",
50+
commands=[
51+
"cd {}/llama.cpp && cmake -B build {}".format(working_dir, llama_cpp_build_opts),
52+
"cd {}/llama.cpp && cmake --build build --config Release".format(working_dir)
53+
]
54+
)
55+
56+
llama_bench_opts=host.data.llama_bench_opts
57+
def llama_cpp_loop_callback():
58+
for model, model_details in host.data.llama_cpp_models.items():
59+
# Accounting for multiple URL models.
60+
counter = 0
61+
total = len(model_details['urls'])
62+
63+
for url in model_details['urls']:
64+
counter = counter + 1
65+
filename = os.path.basename(urlparse(url).path)
66+
files.download(
67+
name="Downloading model: {} (file {} of {})".format(model, counter, total),
68+
src=url,
69+
dest="{}/llama.cpp/models/{}".format(working_dir, filename),
70+
)
71+
72+
llama_bench_result = server.shell(
73+
name="Run llama-bench",
74+
commands="cd {}/llama.cpp && ./build/bin/llama-bench -m models/{} {}".format(working_dir, model, llama_bench_opts),
75+
)
76+
77+
logger.info(f"\n{llama_bench_result.stdout}\n")
78+
79+
python.call(
80+
name="Execute llama.cpp loop",
81+
function=llama_cpp_loop_callback,
82+
)
83+
84+
# TODO: Currently breaks, see https://github.com/pyinfra-dev/pyinfra/issues/1355
85+
elif host.data.ai_benchmark == 'ollama':
86+
ollama_models={
87+
'llama3.2:3b': 2000,
88+
'llama3.1:8b': 4900,
89+
'llama2:13b': 7400,
90+
'deepseek-r1:1.5b': 1100,
91+
'deepseek-r1:8b': 4900,
92+
'deepseek-r1:14b': 9000,
93+
'deepseek-r1:70b': 43000,
94+
}
95+
96+
files.download(
97+
name="Download Ollama Install Script",
98+
src="https://ollama.com/install.sh",
99+
dest="{}/install.sh".format(working_dir),
100+
)
101+
102+
# Install Ollama if necessary (but not on RISC-V, for now). For RISC-V, see:
103+
# https://github.com/geerlingguy/sbc-reviews/issues/65#issuecomment-2637866212
104+
host_arch = host.get_fact(Arch)
105+
if not host_arch == 'riscv64':
106+
if not host.get_fact(File, path='/usr/local/bin/ollama'):
107+
server.shell(
108+
name="Run Ollama Install Script",
109+
commands="sh {}/install.sh".format(working_dir),
110+
)
111+
112+
git.repo(
113+
name="Clone ai-benchmarks with git.",
114+
src="https://github.com/geerlingguy/ai-benchmarks.git",
115+
dest="{}/ai-benchmarks".format(working_dir),
116+
)
117+
118+
def ollama_loop_callback():
119+
for model, model_size in ollama_models.items():
120+
# Skip a model if it's larger than the system RAM.
121+
if (host_ram_size - (host_ram_size / 8)) < model_size:
122+
logger.info(f"\nSkipping model {model} as it is too large.\n\n")
123+
continue
124+
125+
server.shell(
126+
name="Download Ollama model: {}".format(model),
127+
commands="ollama pull {}".format(model),
128+
)
129+
130+
ollama_benchmark_result = server.shell(
131+
name="Benchmark Ollama model: {}".format(model),
132+
commands="{}/ai-benchmarks/obench.sh -m {} -c 3 --markdown".format(working_dir, model),
133+
)
134+
135+
logger.info(f"\n{ollama_benchmark_result.stdout}\n\n")
136+
137+
python.call(
138+
name="Execute Ollama loop",
139+
function=ollama_loop_callback,
140+
)
59141

60-
python.call(
61-
name="Execute Ollama loop",
62-
function=ollama_loop_callback,
63-
)
142+
else:
143+
logger.info(f"Please specify a valid ai-benchmark option.")

benchmark/tasks/sbc-general-benchmark.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from pyinfra.operations import files, python, server
33
from pyinfra.facts.server import Home
44

5-
php_version="8.4" # TODO Maybe map 8.x for different Ubuntu / Debian versions.
5+
php_version=host.data.php_version # TODO Map 8.x for different deb versions.
66
working_dir=host.get_fact(Home) + "/Downloads"
77

88
files.download(

benchmark/tasks/top500.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77

88
# TODO: Make this dynamic based on CPU core count?
99
# See: https://gist.github.com/CJCShadowsan/94efdf21539f3156414c1224b1c76605
10-
hpl_ps=1
11-
hpl_qs=6
10+
hpl_ps=host.data.hpl_ps
11+
hpl_qs=host.data.hpl_qs
1212

1313
git.repo(
1414
name="Clone top500 with git.",

0 commit comments

Comments
 (0)