Skip to content

Commit 7abdd4d

Browse files
authored
Merge pull request #29 from yogesh1801/pr/agent-option
added agent selection in CLI
2 parents 13c5fa1 + 3b55007 commit 7abdd4d

File tree

7 files changed

+39
-19
lines changed

7 files changed

+39
-19
lines changed

benchmarks/arteval_bench/run.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22

33
set -e # Exit immediately on error.
44

5-
if [ $# -ne 1 ]; then
6-
echo "Usage: $0 <model_location>"
5+
if [ $# -lt 1 ] || [ $# -gt 2 ]; then
6+
echo "Usage: $0 <model_location> <agent>"
77
echo "Example: $0 Qwen/Qwen2.5-7B-Instruct"
8+
echo "Note: agent parameter is accepted for consistency but not used by this benchmark"
89
exit 1
910
fi
1011

benchmarks/cache_algo_bench/run.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22

33
set -e # Exit immediately on error.
44

5-
if [ $# -ne 1 ]; then
6-
echo "Usage: $0 <model_location>"
5+
if [ $# -lt 1 ] || [ $# -gt 2 ]; then
6+
echo "Usage: $0 <model_location> <agent>"
77
echo "Example: $0 Qwen/Qwen2.5-7B-Instruct"
8+
echo "Note: agent parameter is accepted for consistency but not used by this benchmark"
89
exit 1
910
fi
1011

benchmarks/course_exam_bench/run.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22

33
set -e # Exit immediately on error.
44

5-
if [ $# -ne 1 ]; then
6-
echo "Usage: $0 <model_location>"
5+
if [ $# -lt 1 ] || [ $# -gt 2 ]; then
6+
echo "Usage: $0 <model_location> <agent>"
77
echo "Example: $0 Qwen/Qwen2.5-7B-Instruct"
8+
echo "Note: agent parameter is accepted for consistency but not used by this benchmark"
89
exit 1
910
fi
1011

benchmarks/course_lab_bench/run.sh

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@
22

33
set -e # Exit immediately on error.
44

5-
if [ $# -ne 1 ]; then
6-
echo "Usage: $0 <model_name>"
5+
if [ $# -lt 1 ] || [ $# -gt 2 ]; then
6+
echo "Usage: $0 <model_name> <agent>"
77
echo "Example: $0 claude-sonnet-4-5-20250929"
8+
echo "Example: $0 gpt-4o claudecode"
89
exit 1
910
fi
1011

1112
MODEL_NAME="$1"
13+
AGENT="${2:-claudecode}"
1214
NEW_MODEL_NAME="${MODEL_NAME//\//_}"
1315

1416
# Note: set it to "openai" if you are using your own model server (vllm)
@@ -24,7 +26,7 @@ echo "==> Start to run CourseLabBench"
2426
# in your code to enable task selection.
2527

2628
python src/main.py \
27-
--agent "claudecode" \
29+
--agent "$AGENT" \
2830
--model "$MODEL_NAME" \
2931
# --task "test"
3032
# --save_path "./outputs/course_lab_bench__${NEW_MODEL_NAME}__$(date +"%Y-%m-%d_%H-%M-%S")" \

benchmarks/example_bench/run.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22

33
set -e # Exit immediately on error.
44

5-
if [ $# -ne 1 ]; then
6-
echo "Usage: $0 <model_location>"
5+
if [ $# -lt 1 ] || [ $# -gt 2 ]; then
6+
echo "Usage: $0 <model_location> <agent>"
77
echo "Example: $0 Qwen/Qwen2.5-7B-Instruct"
8+
echo "Note: agent parameter is accepted for consistency but not used by this benchmark"
89
exit 1
910
fi
1011

benchmarks/sysmobench/run.sh

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,16 @@
22

33
set -e
44

5-
if [ $# -ne 1 ]; then
6-
echo "Usage: $0 <model_name>"
5+
if [ $# -lt 1 ] || [ $# -gt 2 ]; then
6+
echo "Usage: $0 <model_name> <agent>"
77
echo "Example: $0 gpt-4o"
88
echo "Example: $0 claude-3-5-sonnet-20241022"
9+
echo "Example: $0 gpt-4o trace_based"
910
exit 1
1011
fi
1112

1213
MODEL_NAME="$1"
14+
AGENT="${2:-agent_based}"
1315
NEW_MODEL_NAME="${MODEL_NAME//\//_}"
1416

1517
# Activate venv if it exists
@@ -20,7 +22,7 @@ fi
2022
echo "==> Start to run SysMoBench"
2123
python3 src/main.py \
2224
--model_name "${MODEL_NAME}" \
23-
--agent agent_based \
25+
--agent "${AGENT}" \
2426
--max_iterations 3
2527

2628
# Deactivate if we activated

cli/run_all_local.sh

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,18 @@
11
#!/bin/bash
22

33
# Script to run install.sh and run.sh for all benchmarks
4-
# Usage: ./run_all_benchmarks.sh <model>
4+
# Usage: ./run_all_local.sh <model> <agent>
55

66
set -e # Exit immediately on error.
77

88
MODEL="$1"
9+
AGENT="${2:-}"
910

1011
if [ -z "$MODEL" ]; then
1112
echo "Error: Model parameter is required"
12-
echo "Usage: $0 <model>"
13+
echo "Usage: $0 <model> <agent>"
14+
echo "Example: $0 gpt-4o"
15+
echo "Example: $0 gpt-4o agent_based"
1316
exit 1
1417
fi
1518

@@ -21,7 +24,11 @@ if [ ! -d "$BENCHMARKS_DIR" ]; then
2124
exit 1
2225
fi
2326

24-
echo "Running all benchmarks with model: $MODEL"
27+
if [ -n "$AGENT" ]; then
28+
echo "Running all benchmarks with model: $MODEL and agent: $AGENT"
29+
else
30+
echo "Running all benchmarks with model: $MODEL"
31+
fi
2532
echo ""
2633

2734
# Iterate through each subdirectory in benchmarks
@@ -43,8 +50,13 @@ for bench_dir in "$BENCHMARKS_DIR"/*/; do
4350

4451
# Run run.sh if it exists
4552
if [ -f "$bench_dir/run.sh" ]; then
46-
echo "Running run.sh for $bench_name with model $MODEL..."
47-
cd "$bench_dir" && bash run.sh "$MODEL"
53+
if [ -n "$AGENT" ]; then
54+
echo "Running run.sh for $bench_name with model $MODEL and agent $AGENT..."
55+
cd "$bench_dir" && bash run.sh "$MODEL" "$AGENT"
56+
else
57+
echo "Running run.sh for $bench_name with model $MODEL..."
58+
cd "$bench_dir" && bash run.sh "$MODEL"
59+
fi
4860
cd - > /dev/null
4961
else
5062
echo "Warning: run.sh not found in $bench_dir"

0 commit comments

Comments
 (0)