Skip to content

Commit 37051cd

Browse files
committed
[chore] Cleanup
1 parent 31e967b commit 37051cd

File tree

6 files changed

+25
-38
lines changed

6 files changed

+25
-38
lines changed

quickstart/scripts/train_ddp_i2v.sh

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,12 @@ TRAIN_ARGS=(
3030
--batch_size 1
3131
--gradient_accumulation_steps 1
3232
--mixed_precision "bf16" # ["no", "fp16"]
33-
--learning_rate 2e-5
33+
--learning_rate 5e-5
3434

3535
# Note:
3636
# for CogVideoX series models, number of training frames should be **8N+1**
3737
# for CogVideoX1.5 series models, number of training frames should be **16N+1**
3838
--train_resolution "81x768x1360" # (frames x height x width)
39-
40-
# enable --low_vram will slow down validation speed and enable quantization during training
41-
# Note: --low_vram currently does not support multi-GPU training
42-
--low_vram false
4339
)
4440

4541
# System Configuration

quickstart/scripts/train_ddp_t2i.sh

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,58 +7,46 @@ export TOKENIZERS_PARALLELISM=false
77
# Model Configuration
88
MODEL_ARGS=(
99
--model_path "THUDM/CogView4-6B"
10-
# --model_path "/home/lhy/code/CogKit/CogView4-6B"
1110
--model_name "cogview4-6b" # candidate: ["cogview4-6b"]
1211
--model_type "t2i"
1312
--training_type "lora"
1413
)
1514

1615
# Output Configuration
1716
OUTPUT_ARGS=(
18-
--output_dir "/home/lhy/code/CogKit/quickstart/train_result/t2i/pixelart-packing-unquantized-512x512"
19-
# --output_dir "/home/lhy/code/CogKit/quickstart/scripts/train_result/debug"
17+
--output_dir "/path/to/output"
2018
--report_to "tensorboard"
2119
)
2220

2321
# Data Configuration
2422
DATA_ARGS=(
25-
# --data_root "/home/lhy/code/CogKit/quickstart/data/t2i"
26-
# --data_root "/home/lhy/code/CogKit/quickstart/data/t2i-nieta"
27-
--data_root "/home/lhy/code/CogKit/quickstart/data/t2i-pixelart"
23+
--data_root "/path/to/data"
2824
)
2925

3026
# Training Configuration
3127
TRAIN_ARGS=(
3228
--seed 42 # random seed
33-
# --train_epochs 3 # number of training epochs
34-
# --batch_size 1
29+
--train_epochs 1 # number of training epochs
30+
--batch_size 1
3531

36-
--train_epochs 100 # number of training epochs
37-
--batch_size 8
38-
# --batch_size 64
32+
--train_epochs 1 # number of training epochs
33+
34+
--batch_size 1
3935

4036
--gradient_accumulation_steps 1
41-
# --gradient_accumulation_steps 2
4237

4338
# Note: For CogView4 series models, height and width should be **32N** (multiple of 32)
44-
--train_resolution "512x512" # (height x width)
45-
# --train_resolution "1024x1024" # (height x width)
39+
--train_resolution "1024x1024" # (height x width)
4640

4741
# When enable_packing is true, training will use the native image resolution
4842
# (otherwise all images will be resized to train_resolution, which may distort the original aspect ratio).
4943
#
50-
# Note: Since images won't be resized, you must ensure all images have total pixels
51-
# less than what's specified in train_resolution.
52-
#
53-
# Packing is not supported for CogVideo series models currently.
54-
#
5544
# IMPORTANT: When changing enable_packing from true to false (or vice versa),
5645
# make sure to clear the .cache directories in your data_root/train and data_root/test folders if they exist.
57-
--enable_packing true
46+
--enable_packing false
5847

5948
--mixed_precision "bf16" # ["no", "fp16"]
60-
--learning_rate 2e-5
61-
49+
--learning_rate 5e-5
6250

6351
# enable --low_vram will slow down validation speed and enable quantization during training
6452
# Note: --low_vram currently does not support multi-GPU training
@@ -83,11 +71,11 @@ CHECKPOINT_ARGS=(
8371
# Validation Configuration
8472
VALIDATION_ARGS=(
8573
--do_validation true # ["true", "false"]
86-
--validation_steps 30 # should be multiple of checkpointing_steps
74+
--validation_steps 10 # should be multiple of checkpointing_steps
8775
)
8876

8977
# Combine all arguments and launch training
90-
accelerate launch train.py \
78+
accelerate launch --main_process_port 29502 train.py \
9179
"${MODEL_ARGS[@]}" \
9280
"${OUTPUT_ARGS[@]}" \
9381
"${DATA_ARGS[@]}" \

quickstart/scripts/train_ddp_t2v.sh

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,12 @@ TRAIN_ARGS=(
2929
--batch_size 1
3030
--gradient_accumulation_steps 1
3131
--mixed_precision "bf16" # ["no", "fp16"] Note: CogVideoX-2B only supports fp16 training
32-
--learning_rate 2e-5
32+
--learning_rate 5e-5
3333

3434
# Note:
3535
# for CogVideoX series models, number of training frames should be **8N+1**
3636
# for CogVideoX1.5 series models, number of training frames should be **16N+1**
3737
--train_resolution "81x768x1360" # (frames x height x width)
38-
39-
# enable --low_vram will slow down validation speed and enable quantization during training
40-
# Note: --low_vram currently does not support multi-GPU training
41-
--low_vram false
4238
)
4339

4440
# System Configuration

quickstart/scripts/train_zero_i2v.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ TRAIN_ARGS=(
2727
--seed 42 # random seed
2828
--train_epochs 1 # number of training epochs
2929

30-
--learning_rate 2e-5
30+
--learning_rate 5e-5
3131

3232
######### Please keep consistent with deepspeed config file ##########
3333
--batch_size 1

quickstart/scripts/train_zero_t2i.sh

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ TRAIN_ARGS=(
2727
--seed 42 # random seed
2828
--train_epochs 1 # number of training epochs
2929

30-
--learning_rate 2e-5
30+
--learning_rate 5e-5
3131

3232
# Note: For CogView4 series models, height and width should be **32N** (multiple of 32)
3333
--train_resolution "1024x1024" # (height x width)
@@ -38,6 +38,13 @@ TRAIN_ARGS=(
3838
--mixed_precision "bf16" # ["no", "fp16"] Note: CogVideoX-2B only supports fp16 training
3939
########################################################################
4040

41+
# When enable_packing is true, training will use the native image resolution
42+
# (otherwise all images will be resized to train_resolution, which may distort the original aspect ratio).
43+
#
44+
# IMPORTANT: When changing enable_packing from true to false (or vice versa),
45+
# make sure to clear the .cache directories in your data_root/train and data_root/test folders if they exist.
46+
--enable_packing false
47+
4148
)
4249

4350
# System Configuration
@@ -61,7 +68,7 @@ VALIDATION_ARGS=(
6168
)
6269

6370
# Combine all arguments and launch training
64-
accelerate launch --config_file ../configs/accelerate_config.yaml train.py \
71+
accelerate launch --config_file ../configs/accelerate_config.yaml train.py\
6572
"${MODEL_ARGS[@]}" \
6673
"${OUTPUT_ARGS[@]}" \
6774
"${DATA_ARGS[@]}" \

quickstart/scripts/train_zero_t2v.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ TRAIN_ARGS=(
2727
--seed 42 # random seed
2828
--train_epochs 1 # number of training epochs
2929

30-
--learning_rate 2e-5
30+
--learning_rate 5e-5
3131

3232
######### Please keep consistent with deepspeed config file ##########
3333
--batch_size 1

0 commit comments

Comments
 (0)