|
| 1 | +#!/bin/bash |
| 2 | +# |
| 3 | +# test_drich_gdml.sh |
| 4 | +# =================== |
| 5 | +# End-to-end test: Opticks GPU vs Geant4 CPU for the standalone (no-DD4hep) |
| 6 | +# dRICH GDML geometry (examples/drich). The driver runs both transports in one |
| 7 | +# process and prints "gpu_hits=N cpu_hits=M" per event; this wraps it with a |
| 8 | +# statistical pass/fail on the GPU/CPU sensor-hit agreement. |
| 9 | +# |
| 10 | +# Geometry: examples/drich/drich_ag02.gdml (single-sector dRICH, 0.2 mm airgap) |
| 11 | +# |
| 12 | +# Comparison uses Poisson statistics (CLAUDE.md house rule): for counts |
| 13 | +# G(pu) and C(pu) the difference error is sqrt(G+C), so the test passes when |
| 14 | +# |G - C| <= NSIGMA * sqrt(G + C) |
| 15 | +# i.e. GPU and CPU agree within NSIGMA standard deviations. The validated |
| 16 | +# dRICH agreement is ~1 sigma; NSIGMA defaults to 5 for a robust CI gate that |
| 17 | +# still flags the gross divergences the geometry fixes addressed (0.6x..1.25x). |
| 18 | +# |
| 19 | +# The G4 CPU optical navigator has a rare (~1-in-150k photon) hang on the |
| 20 | +# aerogel/airgap coincident face (a Geant4 11.04 bug, not Opticks). We run with |
| 21 | +# a step watchdog + per-try timeout and retry a few seeds; if every try times |
| 22 | +# out we SKIP (exit 0) rather than fail, keeping CI non-flaky. |
| 23 | +# |
| 24 | +# Env knobs (all optional): DRICH_BIN MULT NEVENTS NSIGMA TIMEOUT_S SEEDS |
| 25 | +# CUDA_VISIBLE_DEVICES OPTICKS_PREFIX |
| 26 | +# |
| 27 | +# Usage: ./tests/test_drich_gdml.sh |
| 28 | +# Exit: 0 = PASS or SKIP, 1 = FAIL |
| 29 | +# |
| 30 | + |
| 31 | +set -u |
| 32 | + |
| 33 | +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" |
| 34 | +REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" |
| 35 | +GDML="$REPO_DIR/examples/drich/drich_ag02.gdml" |
| 36 | + |
| 37 | +MULT="${MULT:-20}" |
| 38 | +NEVENTS="${NEVENTS:-2}" |
| 39 | +NSIGMA="${NSIGMA:-5}" |
| 40 | +TIMEOUT_S="${TIMEOUT_S:-300}" |
| 41 | +SEEDS="${SEEDS:-12345 23456 34567}" |
| 42 | + |
| 43 | +echo "==============================================" |
| 44 | +echo " dRICH GDML test: Opticks GPU vs Geant4 CPU" |
| 45 | +echo "==============================================" |
| 46 | + |
| 47 | +# --- GPU availability: this is a GPU test, SKIP (benign) where there is none. |
| 48 | +if ! command -v nvidia-smi >/dev/null 2>&1 || ! nvidia-smi -L >/dev/null 2>&1; then |
| 49 | + echo "SKIPPED: no GPU available" |
| 50 | + exit 0 |
| 51 | +fi |
| 52 | + |
| 53 | +if [ ! -f "$GDML" ]; then |
| 54 | + echo "SKIPPED: $GDML not found" |
| 55 | + exit 0 |
| 56 | +fi |
| 57 | + |
| 58 | +source /opt/eic-opticks/eic-opticks-env.sh 2>/dev/null || true |
| 59 | + |
| 60 | +# --- Obtain the driver: prefer an installed/explicit binary, else build the |
| 61 | +# example against the installed library (also exercises examples/drich/CMakeLists.txt). |
| 62 | +BIN="${DRICH_BIN:-}" |
| 63 | +if [ -z "$BIN" ] && command -v drich_gdml_main >/dev/null 2>&1; then |
| 64 | + BIN="drich_gdml_main" |
| 65 | +fi |
| 66 | +if [ -z "$BIN" ]; then |
| 67 | + BUILD_DIR="${TMPDIR:-/tmp}/drich_ci_build" |
| 68 | + LOG="${TMPDIR:-/tmp}/drich_ci_build.log" |
| 69 | + echo "[build] configuring + building examples/drich ..." |
| 70 | + if cmake -S "$REPO_DIR/examples/drich" -B "$BUILD_DIR" \ |
| 71 | + -DCMAKE_PREFIX_PATH="${OPTICKS_PREFIX:-/opt/eic-opticks}" >"$LOG" 2>&1 \ |
| 72 | + && cmake --build "$BUILD_DIR" >>"$LOG" 2>&1; then |
| 73 | + BIN="$BUILD_DIR/drich_gdml_main" |
| 74 | + echo "[build] OK -> $BIN" |
| 75 | + else |
| 76 | + echo "FAIL: could not build examples/drich" |
| 77 | + tail -25 "$LOG" |
| 78 | + exit 1 |
| 79 | + fi |
| 80 | +fi |
| 81 | + |
| 82 | +echo " driver: $BIN" |
| 83 | +echo " geometry: $GDML" |
| 84 | +echo " config: MULT=$MULT NEVENTS=$NEVENTS NSIGMA=$NSIGMA TIMEOUT_S=$TIMEOUT_S" |
| 85 | +echo "" |
| 86 | + |
| 87 | +# --- Run: G4 CPU + Opticks GPU side-by-side. Retry seeds to survive the rare |
| 88 | +# CPU navigator hang; first clean run (gpu>0 && cpu>0) wins. |
| 89 | +GPU=0 |
| 90 | +CPU=0 |
| 91 | +USED_SEED="" |
| 92 | +for SEED in $SEEDS; do |
| 93 | + echo "[run] seed=$SEED ..." |
| 94 | + OUT=$(QBND_FILTER_POINT=1 KILL_OPTICAL=0 \ |
| 95 | + GDML_FILE="$GDML" OPTICKS_MAX_SLOT="${OPTICKS_MAX_SLOT:-2000000}" \ |
| 96 | + MULT="$MULT" SEED="$SEED" NEVENTS="$NEVENTS" PHI_DEG=30 ETA=2.0 \ |
| 97 | + MAX_STEP_WATCHDOG="${MAX_STEP_WATCHDOG:-100000}" \ |
| 98 | + timeout --signal=KILL "${TIMEOUT_S}s" "$BIN" 2>&1) || true |
| 99 | + g=$(echo "$OUT" | grep -oE "gpu_hits=[0-9]+" | grep -oE "[0-9]+" | awk '{s+=$1} END{print s+0}') |
| 100 | + c=$(echo "$OUT" | grep -oE "cpu_hits=[0-9]+" | grep -oE "[0-9]+" | awk '{s+=$1} END{print s+0}') |
| 101 | + echo " gpu_hits=$g cpu_hits=$c" |
| 102 | + if [ "$g" -gt 0 ] && [ "$c" -gt 0 ]; then |
| 103 | + GPU=$g; CPU=$c; USED_SEED=$SEED |
| 104 | + break |
| 105 | + fi |
| 106 | + echo " (no clean result this seed -- likely CPU navigator hang/timeout, retrying)" |
| 107 | +done |
| 108 | + |
| 109 | +if [ -z "$USED_SEED" ]; then |
| 110 | + echo "SKIPPED: no clean G4+GPU run obtained in $(echo $SEEDS | wc -w) tries" |
| 111 | + echo " (known Geant4 11.04 navigator hang at the aerogel/airgap face)" |
| 112 | + exit 0 |
| 113 | +fi |
| 114 | + |
| 115 | +# --- Compare with Poisson (sqrt(N)) statistics. |
| 116 | +echo "" |
| 117 | +echo "[compare] seed=$USED_SEED GPU=$GPU CPU=$CPU" |
| 118 | +awk -v g="$GPU" -v c="$CPU" -v ns="$NSIGMA" 'BEGIN{ |
| 119 | + diff = g - c; adiff = (diff<0)? -diff : diff; |
| 120 | + sig = adiff / sqrt(g + c); |
| 121 | + ratio= g / c; |
| 122 | + tol = ns * sqrt(g + c); |
| 123 | + printf " ratio GPU/CPU = %.4f\n", ratio; |
| 124 | + printf " |GPU-CPU| = %d ; sqrt(G+C) = %.1f ; significance = %.2f sigma (tol %d-sigma => %.0f)\n", adiff, sqrt(g+c), sig, ns, tol; |
| 125 | + if (sig <= ns) { print "PASS: GPU and CPU hit counts agree within tolerance"; exit 0 } |
| 126 | + else { print "FAIL: GPU vs CPU hit-count difference exceeds tolerance"; exit 1 } |
| 127 | +}' |
| 128 | +exit $? |
0 commit comments