FastVideo/examples/train/dfsft_wangame_causal_v3.yaml at 9da01fa56fb30a23eaaf8f639e5da34c3a477190 · FoundationResearch/FastVideo · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# V3 config: WanGame causal Diffusion-Forcing SFT (DFSFT).
#
# Uses _target_-based instantiation — each model role is an independent
# class instance; the method class is resolved directly from the YAML.

models:
  student:
    _target_: fastvideo.train.models.wangame.WanGameCausalModel
    init_from: /mnt/weka/home/hao.zhang/kaiqin/wg_models/WanGame-2.1-0223-9000steps
    trainable: true

method:
  _target_: fastvideo.train.methods.fine_tuning.dfsft.DiffusionForcingSFTMethod
  attn_kind: dense
  # use_ema: true
  chunk_size: 3
  min_timestep_ratio: 0.02
  max_timestep_ratio: 0.98

training:
  distributed:
    num_gpus: 8
    sp_size: 1
    tp_size: 1
    hsdp_replicate_dim: 8
    hsdp_shard_dim: 1

  data:
    data_path: >-
      /mnt/weka/home/hao.zhang/mhuo/traindata_0204_2130/preprocessed:0,
      /mnt/weka/home/hao.zhang/mhuo/traindata_0204_1600/preprocessed:0,
      /mnt/weka/home/hao.zhang/mhuo/traindata_0205_1330/data/0_static_plus_w_only/preprocessed:1,
      /mnt/weka/home/hao.zhang/mhuo/traindata_0205_1330/data/1_wasd_only/preprocessed:1,
      /mnt/weka/home/hao.zhang/mhuo/traindata_0206_1200/data/wasdonly_alpha1/preprocessed:1,
      /mnt/weka/home/hao.zhang/mhuo/traindata_0206_1200/data/camera/preprocessed:1,
      /mnt/weka/home/hao.zhang/mhuo/traindata_0208_2000/data/camera4hold_alpha1/preprocessed:1,
      /mnt/weka/home/hao.zhang/mhuo/traindata_0208_2000/data/wasd4holdrandview_simple_1key1mouse1/preprocessed:1
    dataloader_num_workers: 4
    train_batch_size: 1
    training_cfg_rate: 0.0
    seed: 1000
    num_latent_t: 20
    num_height: 352
    num_width: 640
    num_frames: 77

  optimizer:
    learning_rate: 1.0e-5
    betas: [0.9, 0.999]
    weight_decay: 1.0e-4
    lr_scheduler: constant
    lr_warmup_steps: 0

  loop:
    max_train_steps: 20000
    gradient_accumulation_steps: 1

  checkpoint:
    output_dir: outputs/wangame_dfsft_causal_v3
    training_state_checkpointing_steps: 1000
    checkpoints_total_limit: 2

  tracker:
    project_name: distillation_wangame_r
    run_name: wangame_dfsft_causal_v3

  model:
    enable_gradient_checkpointing_type: full

callbacks:
  grad_clip:
    _target_: fastvideo.train.callbacks.grad_clip.GradNormClipCallback
    max_grad_norm: 1.0
  # ema:
  #   _target_: fastvideo.train.callbacks.ema.EMACallback
  #   beta: 0.9999
  validation:
    _target_: fastvideo.train.callbacks.validation.ValidationCallback
    pipeline_target: fastvideo.pipelines.basic.wan.wangame_causal_dmd_pipeline.WanGameCausalDMDPipeline
    dataset_file: examples/training/finetune/WanGame2.1_1.3b_i2v/validation_random_8.json
    every_steps: 100
    sampling_steps: [40]
    rollout_mode: streaming
    sampler_kind: ode
    scheduler_target: fastvideo.models.schedulers.scheduling_flow_match_euler_discrete.FlowMatchEulerDiscreteScheduler
    guidance_scale: 1.0
    num_frames: 69

pipeline:
  flow_shift: 3
  sampler_kind: ode