Designing resilient toolkits and scalable RL environments for CAMEL terminal agents
# Clone the repository
git clone https://github.com/camel-ai/seta.git
cd seta
bash setup.sh#=========================================
# Run single developer agent / workforce
#=========================================
cd evaluation/terminal_bench_run/
bash run_agent.sh \
-a <attempt,0..n> \
-n <total_attempts> \
-e <conda env name> \
-w <use_workforce> # can have a try, focus on single chat agent now.βββ play-zork
βββ play-zork.1-of-1.test_run # trial name
βββ CAMEL_WORKDIR # not used at the moment
βββ agent-logs # not used at the moment
βββ commands.txt # not used at the moment
βββ chatagent.log # βοΈβοΈ full history of running agent including test results
βββ eigent_logs.json # β οΈ exists only when running workforce
βββ panes # not used at the moment
βββ sessions # session logs
βββ agent.cast # not used at the moment
βββ agent.log # not used at the moment
βββ session_logs # βοΈβοΈsession logs for terminal toolkit
β βββ blocking_commands.log # βοΈβοΈall block mode commands + output
β βββ session_run_zork_1_correct_path.log # βοΈβοΈnon-block mode single session command + output
β βββ session_zork-1.log # βοΈβοΈsame as above session_{id}.log
β βββ session_zork_start.log # βοΈβοΈsame as above session_{id}.log
βββ tests.cast # not used at the moment
βββ tests.log # βοΈβοΈtest log
βββ tests.log.strip # βοΈβοΈtest log with ansi control characters removed
cd evaluation/terminal_bench_eval/
# terminal bench 1.0
bash run_eval.sh
# terminal bench 2.0
bash run_tb2.sh
## The agent class is implemented in tbench_camel_agent.py - final results will be in `evaluation/terminal_bench_eval/run/{run_id}/results.json`
- task specific terminal session logs will be in `evaluation/terminal_bench_eval/logs/camel_logs/{task_id}/`
Everything is under training folder
Please refer to Training Setup for detailed instructions.
Note: new TerminalToolkit design document Terminal Toolkit Design
