Skip to content

Commit e4f6da0

Browse files
committed
[chore] Cleanup
1 parent 31e967b commit e4f6da0

File tree

6 files changed

+22
-40
lines changed

6 files changed

+22
-40
lines changed

quickstart/scripts/train_ddp_i2v.sh

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,12 @@ TRAIN_ARGS=(
3030
--batch_size 1
3131
--gradient_accumulation_steps 1
3232
--mixed_precision "bf16" # ["no", "fp16"]
33-
--learning_rate 2e-5
33+
--learning_rate 5e-5
3434

3535
# Note:
3636
# for CogVideoX series models, number of training frames should be **8N+1**
3737
# for CogVideoX1.5 series models, number of training frames should be **16N+1**
3838
--train_resolution "81x768x1360" # (frames x height x width)
39-
40-
# enable --low_vram will slow down validation speed and enable quantization during training
41-
# Note: --low_vram currently does not support multi-GPU training
42-
--low_vram false
4339
)
4440

4541
# System Configuration

quickstart/scripts/train_ddp_t2i.sh

Lines changed: 9 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -7,58 +7,42 @@ export TOKENIZERS_PARALLELISM=false
77
# Model Configuration
88
MODEL_ARGS=(
99
--model_path "THUDM/CogView4-6B"
10-
# --model_path "/home/lhy/code/CogKit/CogView4-6B"
1110
--model_name "cogview4-6b" # candidate: ["cogview4-6b"]
1211
--model_type "t2i"
1312
--training_type "lora"
1413
)
1514

1615
# Output Configuration
1716
OUTPUT_ARGS=(
18-
--output_dir "/home/lhy/code/CogKit/quickstart/train_result/t2i/pixelart-packing-unquantized-512x512"
19-
# --output_dir "/home/lhy/code/CogKit/quickstart/scripts/train_result/debug"
17+
--output_dir "/path/to/output"
2018
--report_to "tensorboard"
2119
)
2220

2321
# Data Configuration
2422
DATA_ARGS=(
25-
# --data_root "/home/lhy/code/CogKit/quickstart/data/t2i"
26-
# --data_root "/home/lhy/code/CogKit/quickstart/data/t2i-nieta"
27-
--data_root "/home/lhy/code/CogKit/quickstart/data/t2i-pixelart"
23+
--data_root "/path/to/data"
2824
)
2925

3026
# Training Configuration
3127
TRAIN_ARGS=(
3228
--seed 42 # random seed
33-
# --train_epochs 3 # number of training epochs
34-
# --batch_size 1
35-
36-
--train_epochs 100 # number of training epochs
37-
--batch_size 8
38-
# --batch_size 64
29+
--train_epochs 1 # number of training epochs
30+
--batch_size 1
3931

4032
--gradient_accumulation_steps 1
41-
# --gradient_accumulation_steps 2
4233

4334
# Note: For CogView4 series models, height and width should be **32N** (multiple of 32)
44-
--train_resolution "512x512" # (height x width)
45-
# --train_resolution "1024x1024" # (height x width)
35+
--train_resolution "1024x1024" # (height x width)
4636

4737
# When enable_packing is true, training will use the native image resolution
4838
# (otherwise all images will be resized to train_resolution, which may distort the original aspect ratio).
4939
#
50-
# Note: Since images won't be resized, you must ensure all images have total pixels
51-
# less than what's specified in train_resolution.
52-
#
53-
# Packing is not supported for CogVideo series models currently.
54-
#
5540
# IMPORTANT: When changing enable_packing from true to false (or vice versa),
5641
# make sure to clear the .cache directories in your data_root/train and data_root/test folders if they exist.
57-
--enable_packing true
42+
--enable_packing false
5843

5944
--mixed_precision "bf16" # ["no", "fp16"]
60-
--learning_rate 2e-5
61-
45+
--learning_rate 5e-5
6246

6347
# enable --low_vram will slow down validation speed and enable quantization during training
6448
# Note: --low_vram currently does not support multi-GPU training
@@ -74,16 +58,15 @@ SYSTEM_ARGS=(
7458

7559
# Checkpointing Configuration
7660
CHECKPOINT_ARGS=(
77-
# --checkpointing_steps 10 # save checkpoint every x steps
78-
--checkpointing_steps 30 # save checkpoint every x steps
61+
--checkpointing_steps 10 # save checkpoint every x steps
7962
--checkpointing_limit 2 # maximum number of checkpoints to keep, after which the oldest one is deleted
8063
# --resume_from_checkpoint "/absolute/path/to/checkpoint_dir" # if you want to resume from a checkpoint
8164
)
8265

8366
# Validation Configuration
8467
VALIDATION_ARGS=(
8568
--do_validation true # ["true", "false"]
86-
--validation_steps 30 # should be multiple of checkpointing_steps
69+
--validation_steps 10 # should be multiple of checkpointing_steps
8770
)
8871

8972
# Combine all arguments and launch training

quickstart/scripts/train_ddp_t2v.sh

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,12 @@ TRAIN_ARGS=(
2929
--batch_size 1
3030
--gradient_accumulation_steps 1
3131
--mixed_precision "bf16" # ["no", "fp16"] Note: CogVideoX-2B only supports fp16 training
32-
--learning_rate 2e-5
32+
--learning_rate 5e-5
3333

3434
# Note:
3535
# for CogVideoX series models, number of training frames should be **8N+1**
3636
# for CogVideoX1.5 series models, number of training frames should be **16N+1**
3737
--train_resolution "81x768x1360" # (frames x height x width)
38-
39-
# enable --low_vram will slow down validation speed and enable quantization during training
40-
# Note: --low_vram currently does not support multi-GPU training
41-
--low_vram false
4238
)
4339

4440
# System Configuration

quickstart/scripts/train_zero_i2v.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ TRAIN_ARGS=(
2727
--seed 42 # random seed
2828
--train_epochs 1 # number of training epochs
2929

30-
--learning_rate 2e-5
30+
--learning_rate 5e-5
3131

3232
######### Please keep consistent with deepspeed config file ##########
3333
--batch_size 1

quickstart/scripts/train_zero_t2i.sh

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ TRAIN_ARGS=(
2727
--seed 42 # random seed
2828
--train_epochs 1 # number of training epochs
2929

30-
--learning_rate 2e-5
30+
--learning_rate 5e-5
3131

3232
# Note: For CogView4 series models, height and width should be **32N** (multiple of 32)
3333
--train_resolution "1024x1024" # (height x width)
@@ -38,6 +38,13 @@ TRAIN_ARGS=(
3838
--mixed_precision "bf16" # ["no", "fp16"] Note: CogVideoX-2B only supports fp16 training
3939
########################################################################
4040

41+
# When enable_packing is true, training will use the native image resolution
42+
# (otherwise all images will be resized to train_resolution, which may distort the original aspect ratio).
43+
#
44+
# IMPORTANT: When changing enable_packing from true to false (or vice versa),
45+
# make sure to clear the .cache directories in your data_root/train and data_root/test folders if they exist.
46+
--enable_packing false
47+
4148
)
4249

4350
# System Configuration
@@ -61,7 +68,7 @@ VALIDATION_ARGS=(
6168
)
6269

6370
# Combine all arguments and launch training
64-
accelerate launch --config_file ../configs/accelerate_config.yaml train.py \
71+
accelerate launch --config_file ../configs/accelerate_config.yaml train.py\
6572
"${MODEL_ARGS[@]}" \
6673
"${OUTPUT_ARGS[@]}" \
6774
"${DATA_ARGS[@]}" \

quickstart/scripts/train_zero_t2v.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ TRAIN_ARGS=(
2727
--seed 42 # random seed
2828
--train_epochs 1 # number of training epochs
2929

30-
--learning_rate 2e-5
30+
--learning_rate 5e-5
3131

3232
######### Please keep consistent with deepspeed config file ##########
3333
--batch_size 1

0 commit comments

Comments
 (0)