From 3774ad4db72b93224ac24110fdb8ab6f1870d0b2 Mon Sep 17 00:00:00 2001 From: FindHao Date: Thu, 13 Apr 2023 17:16:01 -0400 Subject: [PATCH] fix run_6.7b.sh for single gpu --- .../training_scripts/single_gpu/run_6.7b.sh | 31 ++++++++++++ .../training_scripts/single_gpu/run_6.7b.sh | 49 +++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 applications/DeepSpeed-Chat/training/step1_supervised_finetuning/training_scripts/single_gpu/run_6.7b.sh create mode 100644 applications/DeepSpeed-Chat/training/step3_rlhf_finetuning/training_scripts/single_gpu/run_6.7b.sh diff --git a/applications/DeepSpeed-Chat/training/step1_supervised_finetuning/training_scripts/single_gpu/run_6.7b.sh b/applications/DeepSpeed-Chat/training/step1_supervised_finetuning/training_scripts/single_gpu/run_6.7b.sh new file mode 100644 index 000000000..bc8ca718f --- /dev/null +++ b/applications/DeepSpeed-Chat/training/step1_supervised_finetuning/training_scripts/single_gpu/run_6.7b.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# Note that usually LoRA needs to use larger learning rate +OUTPUT_PATH=./output +mkdir -p $OUTPUT_PATH + +deepspeed --num_gpus 1 main.py \ + --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets openai/webgpt_comparisons stanfordnlp/SHP \ + --data_split 2,4,4 \ + --model_name_or_path facebook/opt-6.7b \ + --per_device_train_batch_size 8 \ + --per_device_eval_batch_size 8 \ + --max_seq_len 512 \ + --learning_rate 1e-3 \ + --weight_decay 0.1 \ + --num_train_epochs 2 \ + --gradient_accumulation_steps 16 \ + --lr_scheduler_type cosine \ + --num_warmup_steps 0 \ + --seed 1234 \ + --gradient_checkpointing \ + --zero_stage 0 \ + --lora_dim 128 \ + --lora_module_name decoder.layers. \ + --deepspeed \ + --output_dir $OUTPUT_PATH \ + &> $OUTPUT_PATH/training.log diff --git a/applications/DeepSpeed-Chat/training/step3_rlhf_finetuning/training_scripts/single_gpu/run_6.7b.sh b/applications/DeepSpeed-Chat/training/step3_rlhf_finetuning/training_scripts/single_gpu/run_6.7b.sh new file mode 100644 index 000000000..65177977d --- /dev/null +++ b/applications/DeepSpeed-Chat/training/step3_rlhf_finetuning/training_scripts/single_gpu/run_6.7b.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team +ACTOR_MODEL_PATH=${1:-"../output/actor_model"} +CRITIC_MODEL_PATH=${2:-"../output/critic_model"} +ACTOR_ZERO_STAGE=${3:-"--actor_zero_stage 0"} +CRITIC_ZERO_STAGE=${4:-"--critic_zero_stage 0"} +OUTPUT=${5:-"./output"} + + +Num_Padding_at_Beginning=1 # this is model related + +Actor_Lr=5e-4 +Critic_Lr=5e-6 + +mkdir -p $OUTPUT + +deepspeed --num_gpus 1 main.py \ + --data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets openai/webgpt_comparisons stanfordnlp/SHP \ + --data_split 2,4,4 \ + --actor_model_name_or_path $ACTOR_MODEL_PATH \ + --critic_model_name_or_path $CRITIC_MODEL_PATH \ + --num_padding_at_beginning 1 \ + --per_device_train_batch_size 8 \ + --per_device_mini_train_batch_size 8 \ + --generation_batch_numbers 1 \ + --ppo_epochs 1 \ + --max_answer_seq_len 256 \ + --max_prompt_seq_len 256 \ + --ppo_epochs 1 \ + --actor_learning_rate ${Actor_Lr} \ + --critic_learning_rate ${Critic_Lr} \ + --actor_weight_decay 0.1 \ + --critic_weight_decay 0.1 \ + --num_train_epochs 1 \ + --lr_scheduler_type cosine \ + --gradient_accumulation_steps 16 \ + --num_warmup_steps 100 \ + --deepspeed --seed 1234 \ + ${ACTOR_ZERO_STAGE} \ + ${CRITIC_ZERO_STAGE} ${OFFLOAD}\ + --actor_lora_dim 128 \ + --actor_gradient_checkpointing \ + --critic_gradient_checkpointing \ + --enable_hybrid_engine \ + --output_dir $OUTPUT \ + &> $OUTPUT/training.log