-
Notifications
You must be signed in to change notification settings - Fork 164
Expand file tree
/
Copy pathqwen25_omni.yaml
More file actions
43 lines (41 loc) · 955 Bytes
/
qwen25_omni.yaml
File metadata and controls
43 lines (41 loc) · 955 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
model:
model_path: qwen2_5-omni-7b
data:
train_path: configs/multimodal/data/tulu_sharegpt4v_llavavideo_voiceassistant.yaml
data_type: conversation
max_seq_len: 8196
train_size: 80000000
dataloader:
num_workers: 0
mm_configs:
scale_factor: 28
image_min_pixels: 3136 # 4 * 28 * 28
image_max_pixels: 12845056 # 16384 * 28 * 28
video_min_pixels: 100352 # 128 * 28 * 28
video_max_pixels: 602112 # 768 * 28 * 28
max_ratio: 200
min_frames: 4
# max_frames: 768
max_frames: 20
frame_factor: 2
sample_rate: 16000
fps: 2.0
use_audio_in_video: true
train:
accelerator:
ulysses_size: 1
fsdp_config:
fsdp_mode: fsdp2
wandb:
project: qwen2_5_omni
name: qwen2_5_omni
optimizer:
lr: 1.0e-5
lr_decay_style: cosine
num_train_epochs: 3
micro_batch_size: 1
global_batch_size: 8
max_steps: 20
init_device: meta
checkpoint:
output_dir: qwen2_5_omni_sft