-
Notifications
You must be signed in to change notification settings - Fork 164
Expand file tree
/
Copy pathqwen3_vl_moe.yaml
More file actions
47 lines (45 loc) · 1019 Bytes
/
qwen3_vl_moe.yaml
File metadata and controls
47 lines (45 loc) · 1019 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
model:
model_path: Qwen3-VL-30B-A3B-Instruct
ops_implementation:
moe_implementation: fused
attn_implementation: flash_attention_2
data:
train_path: configs/multimodal/data/tulu_sharegpt4v_llavavideo.yaml
data_type: conversation
chat_template: qwen3vl
max_seq_len: 4096
train_size: 80000000
mm_configs:
image_max_pixels: 602112 # 28 * 28 * 768
video_max_pixels: 602112 # 28 * 28 * 768
max_frames: 16
fps: 2.0
use_audio_in_video: false
train:
accelerator:
ep_size: 1
fsdp_config:
fsdp_mode: fsdp2
gradient_checkpointing:
enable_reentrant: false
wandb:
enable: false
project: qwen3_vl_moe
name: qwen3_vl_moe
freeze_vit: false
optimizer:
lr: 1.0e-5
lr_decay_style: cosine
num_train_epochs: 2
micro_batch_size: 1
global_batch_size: 32
max_steps: 500
init_device: meta
profile:
enable: true
start_step: 20
end_step: 21
record_shapes: true
checkpoint:
output_dir: qwen3_vl_moe_sft
manager: dcp