|
| 1 | +import os |
1 | 2 | from pyinfra import host, logger |
2 | 3 | from pyinfra.facts.files import File |
3 | 4 | from pyinfra.facts.hardware import Memory |
4 | | -from pyinfra.facts.server import Arch, Home |
5 | | -from pyinfra.operations import files, git, python, server |
| 5 | +from pyinfra.facts.server import Arch, Home, LinuxName |
| 6 | +from pyinfra.operations import apt, dnf, files, git, python, server |
| 7 | +from urllib.parse import urlparse |
6 | 8 |
|
7 | 9 | host_ram_size=host.get_fact(Memory) |
8 | 10 | working_dir=host.get_fact(Home) + "/Downloads" |
9 | | -ollama_models={ |
10 | | - 'llama3.2:3b': 2000, |
11 | | - 'llama3.1:8b': 4900, |
12 | | - 'llama2:13b': 7400, |
13 | | - 'deepseek-r1:1.5b': 1100, |
14 | | - 'deepseek-r1:8b': 4900, |
15 | | - 'deepseek-r1:14b': 9000, |
16 | | - 'deepseek-r1:70b': 43000, |
17 | | -} |
18 | | - |
19 | | -files.download( |
20 | | - name="Download Ollama Install Script", |
21 | | - src="https://ollama.com/install.sh", |
22 | | - dest="{}/install.sh".format(working_dir), |
23 | | -) |
24 | | - |
25 | | -# Install Ollama if necessary (but not on RISC-V, for now). For RISC-V, see: |
26 | | -# https://github.com/geerlingguy/sbc-reviews/issues/65#issuecomment-2637866212 |
27 | | -host_arch = host.get_fact(Arch) |
28 | | -if not host_arch == 'riscv64': |
29 | | - if not host.get_fact(File, path='/usr/local/bin/ollama'): |
30 | | - server.shell( |
31 | | - name="Run Ollama Install Script", |
32 | | - commands="sh {}/install.sh".format(working_dir), |
33 | | - ) |
34 | 11 |
|
35 | | -git.repo( |
36 | | - name="Clone ai-benchmarks with git.", |
37 | | - src="https://github.com/geerlingguy/ai-benchmarks.git", |
38 | | - dest="{}/ai-benchmarks".format(working_dir), |
39 | | -) |
40 | | - |
41 | | -def ollama_loop_callback(): |
42 | | - for model, model_size in ollama_models.items(): |
43 | | - # Skip a model if it's larger than the system RAM. |
44 | | - if (host_ram_size - (host_ram_size / 8)) < model_size: |
45 | | - logger.info(f"\nSkipping model {model} as it is too large.\n\n") |
46 | | - continue |
47 | | - |
48 | | - server.shell( |
49 | | - name="Download Ollama model: {}".format(model), |
50 | | - commands="ollama pull {}".format(model), |
| 12 | +if host.data.ai_benchmark == 'llama.cpp': |
| 13 | + linux_name=host.get_fact(LinuxName) |
| 14 | + |
| 15 | + if linux_name in ["Debian", "Ubuntu"]: |
| 16 | + apt.packages( |
| 17 | + name="Ensure prerequisites are installed (Debian).", |
| 18 | + packages=[ |
| 19 | + "libvulkan-dev", |
| 20 | + "glslc", |
| 21 | + "cmake", |
| 22 | + "libcurl4-openssl-dev", |
| 23 | + ], |
| 24 | + _sudo=True, |
51 | 25 | ) |
52 | 26 |
|
53 | | - ollama_benchmark_result = server.shell( |
54 | | - name="Benchmark Ollama model: {}".format(model), |
55 | | - commands="{}/ai-benchmarks/obench.sh -m {} -c 3 --markdown".format(working_dir, model), |
| 27 | + if linux_name in ["CentOS", "RedHat", "Fedora"]: |
| 28 | + dnf.packages( |
| 29 | + name="Ensure prerequisites are installed (RedHat).", |
| 30 | + packages=[ |
| 31 | + "vulkan-loader-devel", |
| 32 | + "vulkan-validation-layers-devel", |
| 33 | + "vulkan-tools", |
| 34 | + "glslc", |
| 35 | + "cmake", |
| 36 | + "libcurl-devel", |
| 37 | + ], |
| 38 | + _sudo=True, |
56 | 39 | ) |
57 | 40 |
|
58 | | - logger.info(f"\n{ollama_benchmark_result.stdout}\n\n") |
| 41 | + git.repo( |
| 42 | + name="Clone llama.cpp with git.", |
| 43 | + src="https://github.com/ggerganov/llama.cpp.git", |
| 44 | + dest="{}/llama.cpp".format(working_dir), |
| 45 | + ) |
| 46 | + |
| 47 | + llama_cpp_build_opts=host.data.llama_cpp_build_opts |
| 48 | + server.shell( |
| 49 | + name="Build llama.cpp", |
| 50 | + commands=[ |
| 51 | + "cd {}/llama.cpp && cmake -B build {}".format(working_dir, llama_cpp_build_opts), |
| 52 | + "cd {}/llama.cpp && cmake --build build --config Release".format(working_dir) |
| 53 | + ] |
| 54 | + ) |
| 55 | + |
| 56 | + llama_bench_opts=host.data.llama_bench_opts |
| 57 | + def llama_cpp_loop_callback(): |
| 58 | + for model, model_details in host.data.llama_cpp_models.items(): |
| 59 | + # Accounting for multiple URL models. |
| 60 | + counter = 0 |
| 61 | + total = len(model_details['urls']) |
| 62 | + |
| 63 | + for url in model_details['urls']: |
| 64 | + counter = counter + 1 |
| 65 | + filename = os.path.basename(urlparse(url).path) |
| 66 | + files.download( |
| 67 | + name="Downloading model: {} (file {} of {})".format(model, counter, total), |
| 68 | + src=url, |
| 69 | + dest="{}/llama.cpp/models/{}".format(working_dir, filename), |
| 70 | + ) |
| 71 | + |
| 72 | + llama_bench_result = server.shell( |
| 73 | + name="Run llama-bench", |
| 74 | + commands="cd {}/llama.cpp && ./build/bin/llama-bench -m models/{} {}".format(working_dir, model, llama_bench_opts), |
| 75 | + ) |
| 76 | + |
| 77 | + logger.info(f"\n{llama_bench_result.stdout}\n") |
| 78 | + |
| 79 | + python.call( |
| 80 | + name="Execute llama.cpp loop", |
| 81 | + function=llama_cpp_loop_callback, |
| 82 | + ) |
| 83 | + |
| 84 | +# TODO: Currently breaks, see https://github.com/pyinfra-dev/pyinfra/issues/1355 |
| 85 | +elif host.data.ai_benchmark == 'ollama': |
| 86 | + ollama_models={ |
| 87 | + 'llama3.2:3b': 2000, |
| 88 | + 'llama3.1:8b': 4900, |
| 89 | + 'llama2:13b': 7400, |
| 90 | + 'deepseek-r1:1.5b': 1100, |
| 91 | + 'deepseek-r1:8b': 4900, |
| 92 | + 'deepseek-r1:14b': 9000, |
| 93 | + 'deepseek-r1:70b': 43000, |
| 94 | + } |
| 95 | + |
| 96 | + files.download( |
| 97 | + name="Download Ollama Install Script", |
| 98 | + src="https://ollama.com/install.sh", |
| 99 | + dest="{}/install.sh".format(working_dir), |
| 100 | + ) |
| 101 | + |
| 102 | + # Install Ollama if necessary (but not on RISC-V, for now). For RISC-V, see: |
| 103 | + # https://github.com/geerlingguy/sbc-reviews/issues/65#issuecomment-2637866212 |
| 104 | + host_arch = host.get_fact(Arch) |
| 105 | + if not host_arch == 'riscv64': |
| 106 | + if not host.get_fact(File, path='/usr/local/bin/ollama'): |
| 107 | + server.shell( |
| 108 | + name="Run Ollama Install Script", |
| 109 | + commands="sh {}/install.sh".format(working_dir), |
| 110 | + ) |
| 111 | + |
| 112 | + git.repo( |
| 113 | + name="Clone ai-benchmarks with git.", |
| 114 | + src="https://github.com/geerlingguy/ai-benchmarks.git", |
| 115 | + dest="{}/ai-benchmarks".format(working_dir), |
| 116 | + ) |
| 117 | + |
| 118 | + def ollama_loop_callback(): |
| 119 | + for model, model_size in ollama_models.items(): |
| 120 | + # Skip a model if it's larger than the system RAM. |
| 121 | + if (host_ram_size - (host_ram_size / 8)) < model_size: |
| 122 | + logger.info(f"\nSkipping model {model} as it is too large.\n\n") |
| 123 | + continue |
| 124 | + |
| 125 | + server.shell( |
| 126 | + name="Download Ollama model: {}".format(model), |
| 127 | + commands="ollama pull {}".format(model), |
| 128 | + ) |
| 129 | + |
| 130 | + ollama_benchmark_result = server.shell( |
| 131 | + name="Benchmark Ollama model: {}".format(model), |
| 132 | + commands="{}/ai-benchmarks/obench.sh -m {} -c 3 --markdown".format(working_dir, model), |
| 133 | + ) |
| 134 | + |
| 135 | + logger.info(f"\n{ollama_benchmark_result.stdout}\n\n") |
| 136 | + |
| 137 | + python.call( |
| 138 | + name="Execute Ollama loop", |
| 139 | + function=ollama_loop_callback, |
| 140 | + ) |
59 | 141 |
|
60 | | -python.call( |
61 | | - name="Execute Ollama loop", |
62 | | - function=ollama_loop_callback, |
63 | | -) |
| 142 | +else: |
| 143 | + logger.info(f"Please specify a valid ai-benchmark option.") |
0 commit comments