-
Notifications
You must be signed in to change notification settings - Fork 5.5k
Expand file tree
/
Copy pathrun_multi_benchmark.sh
More file actions
executable file
·61 lines (53 loc) · 1.78 KB
/
run_multi_benchmark.sh
File metadata and controls
executable file
·61 lines (53 loc) · 1.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/bin/bash
# Multi-run benchmark script for credible statistical results
# Runs each agent 5 times with different seeds to compute variance
set -e
SCRIPT_DIR=$(dirname "$0")
cd "$SCRIPT_DIR"
# Source environment
source /Users/sohom/gauntlet/.env 2>/dev/null || true
OUTPUT_DIR="./benchmark_results/multi_run_$(date +%Y%m%d_%H%M%S)"
mkdir -p "$OUTPUT_DIR"
SEEDS=(12345 23456 34567 45678 56789)
AGENTS=("agents/always_execute_agent.py" "agents/smart_agent.py")
echo "============================================================"
echo "MULTI-RUN BENCHMARK"
echo "============================================================"
echo "Output: $OUTPUT_DIR"
echo "Seeds: ${SEEDS[*]}"
echo "Agents: ${AGENTS[*]}"
echo ""
# Run always_execute_agent (5 runs)
echo "=== Running always_execute_agent (5 runs) ==="
for i in "${!SEEDS[@]}"; do
seed=${SEEDS[$i]}
run_num=$((i + 1))
echo " Run $run_num/5 (seed=$seed)..."
mkdir -p "$OUTPUT_DIR/always_execute/run_$run_num"
gauntlet run \
--agent agents/always_execute_agent.py \
--mock \
--seed "$seed" \
--output "$OUTPUT_DIR/always_execute/run_$run_num" \
2>&1 | tail -5
done
# Run smart_agent (5 runs)
echo ""
echo "=== Running smart_agent (5 runs) ==="
for i in "${!SEEDS[@]}"; do
seed=${SEEDS[$i]}
run_num=$((i + 1))
echo " Run $run_num/5 (seed=$seed)..."
mkdir -p "$OUTPUT_DIR/smart_agent/run_$run_num"
gauntlet run \
--agent agents/smart_agent.py \
--mock \
--seed "$seed" \
--output "$OUTPUT_DIR/smart_agent/run_$run_num" \
2>&1 | tail -5
done
echo ""
echo "============================================================"
echo "MULTI-RUN COMPLETE"
echo "============================================================"
echo "Results saved to: $OUTPUT_DIR"