Merge pull request #29 from yogesh1801/pr/agent-option

xuafeng · web-flow · commit 7abdd4d6d382 · 2025-12-04T00:10:22.000-08:00
added agent selection in CLI
diff --git a/benchmarks/arteval_bench/run.sh b/benchmarks/arteval_bench/run.sh
@@ -2,9 +2,10 @@
 
 set -e  # Exit immediately on error.
 
-if [ $# -ne 1 ]; then
-    echo "Usage: $0 <model_location>"
+if [ $# -lt 1 ] || [ $# -gt 2 ]; then
+    echo "Usage: $0 <model_location> <agent>"
     echo "Example: $0 Qwen/Qwen2.5-7B-Instruct"
+    echo "Note: agent parameter is accepted for consistency but not used by this benchmark"
     exit 1
 fi
 
diff --git a/benchmarks/cache_algo_bench/run.sh b/benchmarks/cache_algo_bench/run.sh
@@ -2,9 +2,10 @@
 
 set -e  # Exit immediately on error.
 
-if [ $# -ne 1 ]; then
-    echo "Usage: $0 <model_location>"
+if [ $# -lt 1 ] || [ $# -gt 2 ]; then
+    echo "Usage: $0 <model_location> <agent>"
     echo "Example: $0 Qwen/Qwen2.5-7B-Instruct"
+    echo "Note: agent parameter is accepted for consistency but not used by this benchmark"
     exit 1
 fi
 
diff --git a/benchmarks/course_exam_bench/run.sh b/benchmarks/course_exam_bench/run.sh
@@ -2,9 +2,10 @@
 
 set -e  # Exit immediately on error.
 
-if [ $# -ne 1 ]; then
-    echo "Usage: $0 <model_location>"
+if [ $# -lt 1 ] || [ $# -gt 2 ]; then
+    echo "Usage: $0 <model_location> <agent>"
     echo "Example: $0 Qwen/Qwen2.5-7B-Instruct"
+    echo "Note: agent parameter is accepted for consistency but not used by this benchmark"
     exit 1
 fi
 
diff --git a/benchmarks/course_lab_bench/run.sh b/benchmarks/course_lab_bench/run.sh
@@ -2,13 +2,15 @@
 
 set -e  # Exit immediately on error.
 
-if [ $# -ne 1 ]; then
-    echo "Usage: $0 <model_name>"
+if [ $# -lt 1 ] || [ $# -gt 2 ]; then
+    echo "Usage: $0 <model_name> <agent>"
     echo "Example: $0 claude-sonnet-4-5-20250929"
+    echo "Example: $0 gpt-4o claudecode"
     exit 1
 fi
 
 MODEL_NAME="$1"
+AGENT="${2:-claudecode}"
 NEW_MODEL_NAME="${MODEL_NAME//\//_}"
 
 # Note: set it to "openai" if you are using your own model server (vllm)
@@ -24,7 +26,7 @@ echo "==> Start to run CourseLabBench"
 # in your code to enable task selection.
 
 python src/main.py \
-    --agent "claudecode" \
+    --agent "$AGENT" \
     --model "$MODEL_NAME" \
     # --task "test"
     # --save_path "./outputs/course_lab_bench__${NEW_MODEL_NAME}__$(date +"%Y-%m-%d_%H-%M-%S")" \
diff --git a/benchmarks/example_bench/run.sh b/benchmarks/example_bench/run.sh
@@ -2,9 +2,10 @@
 
 set -e  # Exit immediately on error.
 
-if [ $# -ne 1 ]; then
-    echo "Usage: $0 <model_location>"
+if [ $# -lt 1 ] || [ $# -gt 2 ]; then
+    echo "Usage: $0 <model_location> <agent>"
     echo "Example: $0 Qwen/Qwen2.5-7B-Instruct"
+    echo "Note: agent parameter is accepted for consistency but not used by this benchmark"
     exit 1
 fi
 
diff --git a/benchmarks/sysmobench/run.sh b/benchmarks/sysmobench/run.sh
@@ -2,14 +2,16 @@
 
 set -e
 
-if [ $# -ne 1 ]; then
-    echo "Usage: $0 <model_name>"
+if [ $# -lt 1 ] || [ $# -gt 2 ]; then
+    echo "Usage: $0 <model_name> <agent>"
     echo "Example: $0 gpt-4o"
     echo "Example: $0 claude-3-5-sonnet-20241022"
+    echo "Example: $0 gpt-4o trace_based"
     exit 1
 fi
 
 MODEL_NAME="$1"
+AGENT="${2:-agent_based}"
 NEW_MODEL_NAME="${MODEL_NAME//\//_}"
 
 # Activate venv if it exists
@@ -20,7 +22,7 @@ fi
 echo "==> Start to run SysMoBench"
 python3 src/main.py \
     --model_name "${MODEL_NAME}" \
-    --agent agent_based \
+    --agent "${AGENT}" \
     --max_iterations 3
 
 # Deactivate if we activated
diff --git a/cli/run_all_local.sh b/cli/run_all_local.sh
@@ -1,15 +1,18 @@
 #!/bin/bash
 
 # Script to run install.sh and run.sh for all benchmarks
-# Usage: ./run_all_benchmarks.sh <model>
+# Usage: ./run_all_local.sh <model> <agent>
 
 set -e  # Exit immediately on error.
 
 MODEL="$1"
+AGENT="${2:-}"
 
 if [ -z "$MODEL" ]; then
     echo "Error: Model parameter is required"
-    echo "Usage: $0 <model>"
+    echo "Usage: $0 <model> <agent>"
+    echo "Example: $0 gpt-4o"
+    echo "Example: $0 gpt-4o agent_based"
     exit 1
 fi
 
@@ -21,7 +24,11 @@ if [ ! -d "$BENCHMARKS_DIR" ]; then
     exit 1
 fi
 
-echo "Running all benchmarks with model: $MODEL"
+if [ -n "$AGENT" ]; then
+    echo "Running all benchmarks with model: $MODEL and agent: $AGENT"
+else
+    echo "Running all benchmarks with model: $MODEL"
+fi
 echo ""
 
 # Iterate through each subdirectory in benchmarks
@@ -43,8 +50,13 @@ for bench_dir in "$BENCHMARKS_DIR"/*/; do
 
         # Run run.sh if it exists
         if [ -f "$bench_dir/run.sh" ]; then
-            echo "Running run.sh for $bench_name with model $MODEL..."
-            cd "$bench_dir" && bash run.sh "$MODEL"
+            if [ -n "$AGENT" ]; then
+                echo "Running run.sh for $bench_name with model $MODEL and agent $AGENT..."
+                cd "$bench_dir" && bash run.sh "$MODEL" "$AGENT"
+            else
+                echo "Running run.sh for $bench_name with model $MODEL..."
+                cd "$bench_dir" && bash run.sh "$MODEL"
+            fi
             cd - > /dev/null
         else
             echo "Warning: run.sh not found in $bench_dir"