lmms-engine/examples/qwen2_5_omni/example_config.yaml at main · EvolvingLMMs-Lab/lmms-engine · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
trainer_type: fsdp2_trainer

dataset_config:
  extra_kwargs:
    use_audio_in_video: true
  dataset_type: qwen_omni_iterable
  dataset_format: json
  processor_config:
    processor_name: Qwen/Qwen2.5-Omni-7B
    processor_type: Qwen2_5OmniProcessor
    audio_max_length: 60
    video_max_pixels: 602112
    video_min_pixels: 28800
    image_max_pixels: 602112
    image_min_pixels: 28800
  dataset_path: /path/to/your/dataset.json
  datasets: null
  shuffle: true
  eval_dataset_path: null
  object_storage: none
  bucket_name: null
  packing: false
  packing_strategy: first_fit
  packing_length: 51200
  filter_overlong: true
  filter_overlong_workers: 8
  max_length: null
  video_sampling_strategy: fps
  video_max_pixels: 602112
  video_max_frames: 512
  fps: 1
  video_backend: qwen_omni_utils

trainer_args:
  output_dir: ./output/qwen_omni_debug
  overwrite_output_dir: false
  do_train: false
  do_eval: false
  do_predict: false
  eval_strategy: 'no'
  prediction_loss_only: false
  per_device_train_batch_size: 1
  gradient_accumulation_steps: 1
  eval_accumulation_steps: null
  eval_delay: 0
  learning_rate: 0.00001
  weight_decay: 0.0
  adam_beta1: 0.9
  adam_beta2: 0.999
  adam_epsilon: 1.0e-08
  max_grad_norm: 1.0
  num_train_epochs: 1
  max_steps: 5
  lr_scheduler_type: constant
  lr_scheduler_kwargs: {}
  warmup_ratio: 0.1
  warmup_steps: 0
  log_level: passive
  log_level_replica: warning
  log_on_each_node: true
  logging_dir: ./output/qwen_omni_debug/runs
  logging_strategy: steps
  logging_first_step: false
  logging_steps: 1
  logging_nan_inf_filter: true
  save_strategy: steps
  save_steps: 1
  save_total_limit: 1
  save_safetensors: true
  save_on_each_node: false
  save_only_model: false
  restore_callback_states_from_checkpoint: false
  no_cuda: false
  use_cpu: false
  use_mps_device: false
  seed: 42
  data_seed: null
  jit_mode_eval: false
  bf16: true
  fp16: false
  fp16_opt_level: O1
  half_precision_backend: auto
  bf16_full_eval: false
  fp16_full_eval: false
  tf32: null
  local_rank: 0
  ddp_backend: null
  tpu_num_cores: null
  tpu_metrics_debug: false
  debug: []
  dataloader_drop_last: false
  eval_steps: null
  dataloader_num_workers: 0
  dataloader_prefetch_factor: null
  past_index: -1
  run_name: qwen2_5_omni
  disable_tqdm: false
  remove_unused_columns: true
  label_names: null
  load_best_model_at_end: false
  metric_for_best_model: null
  greater_is_better: null
  ignore_data_skip: false
  fsdp: []
  fsdp_min_num_params: 0
  fsdp_config:
    transformer_layer_cls_to_wrap:
    - Qwen2_5OmniDecoderLayer
    reshard_after_forward: false
    min_num_params: 0
    xla: false
    xla_fsdp_v2: false
    xla_fsdp_grad_ckpt: false
  fsdp_transformer_layer_cls_to_wrap: null
  accelerator_config:
    split_batches: false
    dispatch_batches: null
    even_batches: true
    use_seedable_sampler: true
    non_blocking: false
    gradient_accumulation_kwargs: null
  parallelism_config: null
  deepspeed: null
  label_smoothing_factor: 0.0
  optim: adamw_torch_fused
  optim_args: null
  adafactor: false
  group_by_length: false
  length_column_name: length
  report_to:
  - wandb
  project: huggingface
  trackio_space_id: trackio
  ddp_find_unused_parameters: null
  ddp_bucket_cap_mb: null
  ddp_broadcast_buffers: null
  dataloader_pin_memory: true
  dataloader_persistent_workers: false
  skip_memory_metrics: true
  use_legacy_prediction_loop: false
  push_to_hub: false
  resume_from_checkpoint: null
  hub_model_id: null
  hub_strategy: every_save
  hub_token: <HUB_TOKEN>
  hub_private_repo: null
  hub_always_push: false
  hub_revision: null
  gradient_checkpointing: true
  gradient_checkpointing_kwargs: null
  include_inputs_for_metrics: false
  include_for_metrics: []
  eval_do_concat_batches: true
  fp16_backend: auto
  push_to_hub_model_id: null
  push_to_hub_organization: null
  mp_parameters: ''
  auto_find_batch_size: false
  full_determinism: false
  torchdynamo: null
  ray_scope: last
  ddp_timeout: 1800
  torch_compile: false
  torch_compile_backend: null
  torch_compile_mode: null
  include_tokens_per_second: false
  include_num_input_tokens_seen: 'no'
  neftune_noise_alpha: null
  optim_target_modules: null
  batch_eval_metrics: false
  eval_on_start: false
  use_liger_kernel: true
  liger_kernel_config: null
  eval_use_gather_object: false
  average_tokens_across_devices: true
  use_muon: false
  freeze_modules: null
  use_rmpad: true
  fsdp2: true
  sp_ulysses_degree: 1
  reduce_dtype: bfloat16
  output_dtype: bfloat16
  print_batch_input_steps: 5
  enable_profiler: false
  profiler_config:
    start_step: 1
    end_step: 3

model_config:
  extra_kwargs: {}
  load_from_pretrained_path: ngqtrung/Qwen2.5-Omni-Thinker-7B
  load_from_config: null
  attn_implementation: flash_attention_2
  model_type: qwen2_5_omni
  torch_dtype: bfloat16
  overwrite_config: null
  monkey_patch_kwargs: null

extra_kwargs: null