[chore] Cleanup

OleehyO · OleehyO · commit 37051cda29a5 · 2025-04-17T05:39:52.000Z
diff --git a/quickstart/scripts/train_ddp_i2v.sh b/quickstart/scripts/train_ddp_i2v.sh
@@ -30,16 +30,12 @@ TRAIN_ARGS=(
     --batch_size 1
     --gradient_accumulation_steps 1
     --mixed_precision "bf16"  # ["no", "fp16"]
-    --learning_rate 2e-5
+    --learning_rate 5e-5
 
     # Note:
     #  for CogVideoX series models, number of training frames should be **8N+1**
     #  for CogVideoX1.5 series models, number of training frames should be **16N+1**
     --train_resolution "81x768x1360"  # (frames x height x width)
-
-    # enable --low_vram will slow down validation speed and enable quantization during training
-    # Note: --low_vram currently does not support multi-GPU training
-    --low_vram false
 )
 
 # System Configuration
diff --git a/quickstart/scripts/train_ddp_t2i.sh b/quickstart/scripts/train_ddp_t2i.sh
@@ -7,58 +7,46 @@ export TOKENIZERS_PARALLELISM=false
 # Model Configuration
 MODEL_ARGS=(
     --model_path "THUDM/CogView4-6B"
-    # --model_path "/home/lhy/code/CogKit/CogView4-6B"
     --model_name "cogview4-6b"  # candidate: ["cogview4-6b"]
     --model_type "t2i"
     --training_type "lora"
 )
 
 # Output Configuration
 OUTPUT_ARGS=(
-    --output_dir "/home/lhy/code/CogKit/quickstart/train_result/t2i/pixelart-packing-unquantized-512x512"
-    # --output_dir "/home/lhy/code/CogKit/quickstart/scripts/train_result/debug"
+    --output_dir "/path/to/output"
     --report_to "tensorboard"
 )
 
 # Data Configuration
 DATA_ARGS=(
-    # --data_root "/home/lhy/code/CogKit/quickstart/data/t2i"
-    # --data_root "/home/lhy/code/CogKit/quickstart/data/t2i-nieta"
-    --data_root "/home/lhy/code/CogKit/quickstart/data/t2i-pixelart"
+    --data_root "/path/to/data"
 )
 
 # Training Configuration
 TRAIN_ARGS=(
     --seed 42  # random seed
-    # --train_epochs 3  # number of training epochs
-    # --batch_size 1
+    --train_epochs 1  # number of training epochs
+    --batch_size 1
 
-    --train_epochs 100  # number of training epochs
-    --batch_size 8
-    # --batch_size 64
+    --train_epochs 1  # number of training epochs
+
+    --batch_size 1
 
     --gradient_accumulation_steps 1
-    # --gradient_accumulation_steps 2
 
     # Note: For CogView4 series models, height and width should be **32N** (multiple of 32)
-    --train_resolution "512x512"  # (height x width)
-    # --train_resolution "1024x1024"  # (height x width)
+    --train_resolution "1024x1024"  # (height x width)
 
     # When enable_packing is true, training will use the native image resolution
     # (otherwise all images will be resized to train_resolution, which may distort the original aspect ratio).
     #
-    # Note: Since images won't be resized, you must ensure all images have total pixels
-    #     less than what's specified in train_resolution.
-    #
-    # Packing is not supported for CogVideo series models currently.
-    #
     # IMPORTANT: When changing enable_packing from true to false (or vice versa),
     # make sure to clear the .cache directories in your data_root/train and data_root/test folders if they exist.
-    --enable_packing true
+    --enable_packing false
 
     --mixed_precision "bf16"  # ["no", "fp16"]
-    --learning_rate 2e-5
-
+    --learning_rate 5e-5
 
     # enable --low_vram will slow down validation speed and enable quantization during training
     # Note: --low_vram currently does not support multi-GPU training
@@ -83,11 +71,11 @@ CHECKPOINT_ARGS=(
 # Validation Configuration
 VALIDATION_ARGS=(
     --do_validation true   # ["true", "false"]
-    --validation_steps 30  # should be multiple of checkpointing_steps
+    --validation_steps 10  # should be multiple of checkpointing_steps
 )
 
 # Combine all arguments and launch training
-accelerate launch train.py \
+accelerate launch --main_process_port 29502  train.py \
     "${MODEL_ARGS[@]}" \
     "${OUTPUT_ARGS[@]}" \
     "${DATA_ARGS[@]}" \
diff --git a/quickstart/scripts/train_ddp_t2v.sh b/quickstart/scripts/train_ddp_t2v.sh
@@ -29,16 +29,12 @@ TRAIN_ARGS=(
     --batch_size 1
     --gradient_accumulation_steps 1
     --mixed_precision "bf16"  # ["no", "fp16"]  Note: CogVideoX-2B only supports fp16 training
-    --learning_rate 2e-5
+    --learning_rate 5e-5
 
     # Note:
     #  for CogVideoX series models, number of training frames should be **8N+1**
     #  for CogVideoX1.5 series models, number of training frames should be **16N+1**
     --train_resolution "81x768x1360"  # (frames x height x width)
-
-    # enable --low_vram will slow down validation speed and enable quantization during training
-    # Note: --low_vram currently does not support multi-GPU training
-    --low_vram false
 )
 
 # System Configuration
diff --git a/quickstart/scripts/train_zero_i2v.sh b/quickstart/scripts/train_zero_i2v.sh
@@ -27,7 +27,7 @@ TRAIN_ARGS=(
     --seed 42  # random seed
     --train_epochs 1  # number of training epochs
 
-    --learning_rate 2e-5
+    --learning_rate 5e-5
 
     #########   Please keep consistent with deepspeed config file ##########
     --batch_size 1
diff --git a/quickstart/scripts/train_zero_t2i.sh b/quickstart/scripts/train_zero_t2i.sh
@@ -27,7 +27,7 @@ TRAIN_ARGS=(
     --seed 42  # random seed
     --train_epochs 1  # number of training epochs
 
-    --learning_rate 2e-5
+    --learning_rate 5e-5
 
     # Note: For CogView4 series models, height and width should be **32N** (multiple of 32)
     --train_resolution "1024x1024"  # (height x width)
@@ -38,6 +38,13 @@ TRAIN_ARGS=(
     --mixed_precision "bf16"  # ["no", "fp16"]   Note: CogVideoX-2B only supports fp16 training
     ########################################################################
 
+    # When enable_packing is true, training will use the native image resolution
+    # (otherwise all images will be resized to train_resolution, which may distort the original aspect ratio).
+    #
+    # IMPORTANT: When changing enable_packing from true to false (or vice versa),
+    # make sure to clear the .cache directories in your data_root/train and data_root/test folders if they exist.
+    --enable_packing false
+
 )
 
 # System Configuration
@@ -61,7 +68,7 @@ VALIDATION_ARGS=(
 )
 
 # Combine all arguments and launch training
-accelerate launch --config_file ../configs/accelerate_config.yaml train.py \
+accelerate launch --config_file ../configs/accelerate_config.yaml train.py\
     "${MODEL_ARGS[@]}" \
     "${OUTPUT_ARGS[@]}" \
     "${DATA_ARGS[@]}" \
diff --git a/quickstart/scripts/train_zero_t2v.sh b/quickstart/scripts/train_zero_t2v.sh
@@ -27,7 +27,7 @@ TRAIN_ARGS=(
     --seed 42  # random seed
     --train_epochs 1  # number of training epochs
 
-    --learning_rate 2e-5
+    --learning_rate 5e-5
 
     #########   Please keep consistent with deepspeed config file ##########
     --batch_size 1