-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_orion_labbench.sh
More file actions
executable file
·59 lines (48 loc) · 2.44 KB
/
Copy pathrun_orion_labbench.sh
File metadata and controls
executable file
·59 lines (48 loc) · 2.44 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/bin/bash
#
# Launch a LabBench QA evaluation via run_orion_labbench.py.
# Switch BENCHMARK between dbqa / litqa2 / figqa for the three subsets.
set -e
# Run from Orion root regardless of where the script is invoked from
cd "$(dirname "$0")/.."
# Load defaults from .env (HOST_VM_FILE, VM_USER, VM_PASSWORD, GITHUB_API_KEY, ...)
if [ -f .env ]; then
set -a
source .env
set +a
fi
# ---------------------------------------------------------------------------
# Resume / output
# ---------------------------------------------------------------------------
export RESUME_MODE="${RESUME_MODE:-true}"
export RESUME_OUTPUT_DIR="${RESUME_OUTPUT_DIR:-/Users/machang/Documents/research-work/CellMMAgent/results/LabBench_1_5}"
# ---------------------------------------------------------------------------
# Agent YAML config (lives under Orion/config/)
# ---------------------------------------------------------------------------
export CONFIG_FILE="${CONFIG_FILE:-vmware_labbench.yaml}"
# ---------------------------------------------------------------------------
# Benchmark split: "dbqa", "litqa2", or "figqa"
# ---------------------------------------------------------------------------
export BENCHMARK="${BENCHMARK:-figqa}"
# ---------------------------------------------------------------------------
# Screen resolution (passed both to DesktopEnv and to set_guest_resolution)
# ---------------------------------------------------------------------------
export SCREEN_WIDTH="${SCREEN_WIDTH:-1200}"
export SCREEN_HEIGHT="${SCREEN_HEIGHT:-800}"
# ---------------------------------------------------------------------------
# VM / host paths (typically inherited from .env; shown here for reference)
# ---------------------------------------------------------------------------
export HOST_VM_FILE="${HOST_VM_FILE:-/Users/machang/Documents/research-work/os-agent/OSWorld/vmware_vm_data/UbuntuBio/Ubuntu.vmx}"
export VM_WORKING_DIR="${VM_WORKING_DIR:-/home/user/Documents/WorkingDir}"
export VM_USER="${VM_USER:-user}"
export VM_PASSWORD="${VM_PASSWORD:-password}"
export ENV_NAME="${ENV_NAME:-python39}"
echo "RESUME_MODE = $RESUME_MODE"
echo "RESUME_OUTPUT_DIR = $RESUME_OUTPUT_DIR"
echo "CONFIG_FILE = $CONFIG_FILE"
echo "BENCHMARK = $BENCHMARK"
echo "SCREEN = ${SCREEN_WIDTH}x${SCREEN_HEIGHT}"
echo "HOST_VM_FILE = $HOST_VM_FILE"
python -m sweagent.run.run_orion_labbench
echo
echo "Script execution for $BENCHMARK completed!"