-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathtrain_qwen2p5_3b_cot.sh
More file actions
69 lines (63 loc) · 2.24 KB
/
train_qwen2p5_3b_cot.sh
File metadata and controls
69 lines (63 loc) · 2.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
export PYTHONPATH=$PYTHONPATH:$(pwd)
#!/bin/bash
NNODES=1
NODE_RANK=0
MASTER_ADDR=localhost
MASTER_PORT=12345
# MODIFY HERE: please prepare the env related variables
PR1_PATH="./"
CHECKPOINT_PATH="./outputs" # directory to save the checkpoint
RUN_NAME="qwen2p5_stage2_reasonseg_ft_cot" # describe what your experiment is about
# Default Setting
OUTPUT_DIR="${CHECKPOINT_PATH}/${RUN_NAME}" # path to save the output
SRC_PATH="${OUTPUT_DIR}/src" # path to backup the source code
export LOG_DIR="${OUTPUT_DIR}/logs" # path to save the log
export WANDB_PROJECT="LENS" # project name in wandb
export WANDB_TAGS="qwen2p5_stage2_cot" # tags for the experiment in wandb
export WANDB_MODE=offline
if [ ! -d "${OUTPUT_DIR}"/src ]; then
mkdir -p ${OUTPUT_DIR}/src
fi
# backup the source code
cp -r ${PR1_PATH}/src ${SRC_PATH}
mkdir -p ${LOG_DIR}
# ReasonSeg Training needs change the dataset in src/open_r1/grpo_vllm_sam_stage2.py
# ReasonSeg Finetune: --question_template "pr1_grounding"
# CoT: --question_template "samr1_v4"
torchrun \
--nproc_per_node="4" \
--nnodes="${NNODES}" \
--node_rank="${NODE_RANK}" \
--master_addr="${MASTER_ADDR}" \
--master_port="${MASTER_PORT}" \
${PR1_PATH}/src/open_r1/grpo_vllm_sam_stage2.py \
--deepspeed ${PR1_PATH}/configs/zero3.json \
--output_dir "${OUTPUT_DIR}" \
--model_name_or_path ./pretrained/qwen2p5_refcoco_1500step \
--max_prompt_length 2048 \
--max_completion_length 768 \
--per_device_train_batch_size 4 \
--gradient_accumulation_steps 4 \
--dataloader_num_workers 8 \
--num_generations 8 \
--logging_steps 1 \
--bf16 \
--gradient_checkpointing true \
--attn_implementation flash_attention_2 \
--report_to wandb \
--max_pixels 1000000 \
--num_train_epochs 40 \
--run_name ${RUN_NAME} \
--save_steps 100 \
--learning_rate 3e-6 \
--reward_funcs "pr1_grounding" "think_format" \
--save_only_model false \
--system_prompt_template "default" \
--question_template "samr1_v4" \
--train_sample_size 5000000000000 \
--skip_special_tokens false \
--answer_template "default" \
--if_detach_res_loss false \
--if_use_mask_iou_reward true \
--if_square_mask_iou_as_reward true \
--coord_norm_type "qwen2p5vl"