Skip to content

Commit 192ce4b

Browse files
fix: pass absolute --config-path to verl; hydra resolved ours under verl/trainer
1 parent 6220897 commit 192ce4b

1 file changed

Lines changed: 7 additions & 3 deletions

File tree

chtc/train_grpo.sh

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ pip install -e ".[dev,gpu]" --quiet
5858
# Pin to v0.7.0 (same as llm-starter, known to work with this image).
5959
echo "==> cloning + installing verl@v0.7.0"
6060
if [ ! -d "verl" ]; then
61-
git clone https://github.com/volcengine/verl.git -b v0.7.0 --depth 1
61+
git clone https://github.com/volcengine/verl.git -b v0.7.0
6262
fi
6363
pip install -e verl --quiet
6464

@@ -103,13 +103,17 @@ else
103103
fi
104104

105105
# --- Launch verl ---
106-
echo "==> python -m verl.trainer.main_ppo --config-path=configs --config-name=${CONFIG_NAME}"
106+
# Hydra resolves --config-path RELATIVE TO the entry point's source dir
107+
# (verl/trainer/) by default. Pass an absolute path so it picks up our
108+
# project's configs/ instead of looking inside verl's own source tree.
109+
CONFIG_PATH_ABS="$(pwd)/configs"
110+
echo "==> python -m verl.trainer.main_ppo --config-path=${CONFIG_PATH_ABS} --config-name=${CONFIG_NAME}"
107111

108112
# Don't bail on training failure — we still want the placeholder tarball
109113
# transferred so the job doesn't end up held.
110114
set +e
111115
python -m verl.trainer.main_ppo \
112-
--config-path=configs \
116+
--config-path="${CONFIG_PATH_ABS}" \
113117
--config-name="${CONFIG_NAME}" \
114118
${MODEL_OVERRIDE}
115119
TRAIN_EXIT=$?

0 commit comments

Comments
 (0)