-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun-checkpoint-timings.sh
More file actions
executable file
·213 lines (176 loc) · 5.58 KB
/
Copy pathrun-checkpoint-timings.sh
File metadata and controls
executable file
·213 lines (176 loc) · 5.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
CHECKOUT="${CHECKOUT:-/tmp/zdisamar-perf-checkpoints}"
OUT="${OUT:-/tmp/zdisamar-perf-checkpoints.tsv}"
LOG_DIR="${LOG_DIR:-/tmp/zdisamar-perf-checkpoint-logs}"
MODE="${MODE:-all}"
SPEEDUP_COMMITS=(
"5ef6c71 line_spectroscopy_and_grid"
"b0a9e0f reusable_storage"
"97088cf fused_doubling_math"
"0ae1cad direct_matrix_math"
"f42445d skip_empty_layers"
"c423f4a fourier_tail"
"f295ace layer_activity_orders"
"07b19f3 zero_scatter_layers"
"9138e6a pre_qseries_skips"
"286c5b8 skip_zero_qseries"
"63df87e qseries_precheck_reuse"
"4791c22 separate_matrix_outputs"
"baf0b4f fused_d_update"
"862511b final_checkpoint"
)
EARLY_SPLIT_COMMITS=(
"511061b first_split_timer profile_bin"
"e23035b shared_grid_fast_intermediate profile_bin"
"56ec761 rtm_prep_tightened profile_bin"
"f8f495d tracked_plot_bundle profile_bin"
"207034e spectroscopy_partition install"
)
PYTHON_BASELINE_COMMITS=(
"163db7e python_validation_baseline"
)
fail() {
local label="$1"
local phase="$2"
printf 'checkpoint %s failed during %s\n' "$label" "$phase" >&2
printf 'logs are in %s\n' "$LOG_DIR" >&2
exit 1
}
want_section() {
local section="$1"
case "$MODE" in
all) return 0 ;;
early) [ "$section" = "early" ] ;;
python-baseline) [ "$section" = "python-baseline" ] ;;
speedup) [ "$section" = "speedup" ] ;;
*)
printf 'unknown MODE=%s\n' "$MODE" >&2
printf 'valid modes: all, early, python-baseline, speedup\n' >&2
exit 1
;;
esac
}
commit_available() {
local commit="$1"
git -C "$CHECKOUT" cat-file -e "$commit^{commit}" 2>/dev/null
}
prepare_checkout() {
local commit="$1"
git -C "$CHECKOUT" checkout --quiet --detach "$commit"
if [ -d "$REPO_ROOT/vendor" ]; then
mkdir -p "$CHECKOUT/vendor"
rsync -a --delete "$REPO_ROOT/vendor/" "$CHECKOUT/vendor/"
fi
}
rm -rf "$CHECKOUT" "$LOG_DIR"
mkdir -p "$(dirname "$OUT")" "$LOG_DIR"
git clone --quiet --no-hardlinks "$REPO_ROOT" "$CHECKOUT" >/dev/null
if [ -d "$REPO_ROOT/vendor" ]; then
mkdir -p "$CHECKOUT/vendor"
rsync -a "$REPO_ROOT/vendor/" "$CHECKOUT/vendor/"
fi
printf 'checkpoint\tprepare_s\tforward_s\ttotal_s\n' > "$OUT"
if want_section "early"; then
for entry in "${EARLY_SPLIT_COMMITS[@]}"; do
read -r commit label build_mode <<< "$entry"
if ! commit_available "$commit"; then
printf 'skipping early checkpoint %s %s; commit object is not available in this checkout\n' \
"$commit" "$label" >&2
continue
fi
prepare_checkout "$commit"
output_dir="$CHECKOUT/out/perf_timeline/$label"
rm -rf "$output_dir"
if [ "$build_mode" = "profile_bin" ]; then
build_command="zig build o2a-forward-profile-bin"
else
build_command="zig build install"
fi
run_command="zig-out/bin/zdisamar-o2a-forward-profile --repeat 1 --output-dir $output_dir"
(
cd "$CHECKOUT"
$build_command
) >"$LOG_DIR/build-${label}.log" 2>&1 || fail "$label" "build"
(
cd "$CHECKOUT"
$run_command
) >"$LOG_DIR/run-${label}.log" 2>&1 || fail "$label" "timing run"
python3 - "$output_dir/summary.json" "$commit" "$label" "$build_command; $run_command" <<'PY' >> "$OUT"
import json
import sys
summary = json.load(open(sys.argv[1]))
print(
f"{sys.argv[2]} {sys.argv[3]}",
"",
summary["total_forward_ns"]["mean_ns"] / 1.0e9,
"",
sep="\t",
)
PY
done
fi
if want_section "python-baseline"; then
for entry in "${PYTHON_BASELINE_COMMITS[@]}"; do
read -r commit label <<< "$entry"
prepare_checkout "$commit"
rm -rf "$CHECKOUT/out/ci/o2a_validation_spectrum"
build_command="zig build install"
run_command="PYTHONPATH=$CHECKOUT uv run scripts/testing_harness/python_o2a_validation_spectrum.py"
(
cd "$CHECKOUT"
$build_command
) >"$LOG_DIR/build-${label}.log" 2>&1 || fail "$label" "build"
(
cd "$CHECKOUT"
PYTHONPATH="$CHECKOUT" uv run scripts/testing_harness/python_o2a_validation_spectrum.py
) >"$LOG_DIR/run-${label}.log" 2>&1 || fail "$label" "validation run"
python3 - "$CHECKOUT/out/ci/o2a_validation_spectrum/summary.json" "$commit" "$label" "$build_command; $run_command" <<'PY' >> "$OUT"
import json
import sys
summary = json.load(open(sys.argv[1]))
prepare_s = summary["timing"]["prepare_o2a_s"]
forward_s = summary["timing"]["forward_model_s"]
print(
f"{sys.argv[2]} {sys.argv[3]}",
prepare_s,
forward_s,
prepare_s + forward_s,
sep="\t",
)
PY
rm -rf "$CHECKOUT/out/ci/o2a_validation_spectrum"
done
fi
if want_section "speedup"; then
for entry in "${SPEEDUP_COMMITS[@]}"; do
read -r commit label <<< "$entry"
prepare_checkout "$commit"
(
cd "$CHECKOUT"
zig build install
) >"$LOG_DIR/build-${label}.log" 2>&1 || fail "$label" "build"
(
cd "$CHECKOUT"
PYTHONPATH="$CHECKOUT" uv run scripts/testing_harness/python_o2a_validation_spectrum.py
) >"$LOG_DIR/run-${label}.log" 2>&1 || fail "$label" "validation run"
python3 - "$CHECKOUT/out/ci/o2a_validation_spectrum/summary.json" "$commit" "$label" "zig build install; PYTHONPATH=$CHECKOUT uv run scripts/testing_harness/python_o2a_validation_spectrum.py" <<'PY' >> "$OUT"
import json
import sys
summary = json.load(open(sys.argv[1]))
prepare_s = summary["timing"]["prepare_o2a_s"]
forward_s = summary["timing"]["forward_model_s"]
print(
f"{sys.argv[2]} {sys.argv[3]}",
prepare_s,
forward_s,
prepare_s + forward_s,
sep="\t",
)
PY
rm -rf "$CHECKOUT/out/ci/o2a_validation_spectrum"
done
fi
cat "$OUT"