-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgpulayers.zsh
More file actions
37 lines (35 loc) · 1.7 KB
/
gpulayers.zsh
File metadata and controls
37 lines (35 loc) · 1.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/bin/zsh
#Utility to create a csv file in the local directory (as well as an array with VRAM use - indexed against the # of offloaded layers) with VRAM use per offloaded layers.
# Iterate over numbers 1 to max layers ml, where ml is determined empirically per model and GPU and is preset in qlm.cfg ($gpulayers assoc. array)
ml=34 # For Gemma3-27B_Q5_K_L
#The file containing the ModelName array to be used by qlm for dynamic -ngl N AND the name of the array, contents pasted to the end of qlm.cfg
#Please, see the bottom of qlm.cfg to understand what this script does.
filem='Gemma3_27B'
echo -n "${filem}=(" > $filem
for i in {1..$ml}; do
qlm --log-file /dev/shm/log$i -ngl $i --simple-io -- $filem "List the first 30 digits of sqrt(2)." > output.log &
LLAMA_PID=$!
if [ -z "$LLAMA_PID" ]; then
echo "Error: Failed to get PID for qlm process."
return 1
fi
sleep 11 # Increase sleep duration if machine is slow or decrease if needed.
str=$(nvidia-smi --query-gpu=memory.free,memory.used,memory.reserved,memory.total --format=csv,nounits,noheader)
memvals=(${(s:, :)str})
echo "|$i|$memvals[1]|$memvals[2]|ts|" >> results$filem
echo -n "$memvals[2] " >> $filem
wait $LLAMA_PID
done
echo -n ')' >> $filem
#Assuming this script remains in the same directory as qlm.cfg:
cat $filem >> qlm.cfg
#The next will work until llama-cli changes the standard output format.
for i in {1..$ml}; do
last_line=$(tail -n 1 /dev/shm/log$i)
time=$(echo $last_line | awk '{print $5}')
tokens=$(echo $last_line | awk '{print $8}')
result=$(( 1000 * $tokens / $time ))
sed -i "${i}s/ts/${result}/" results$filem
done
#Convert to csv file:
sed 's/^|//;s/|$//;s/|/,/g' results$filem > results$filem.csv