Skip to content

Commit 83317ba

Browse files
committed
support llada2.0-mini
1 parent 1343a21 commit 83317ba

File tree

9 files changed

+592
-137
lines changed

9 files changed

+592
-137
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ LittleAcademia[<a href="https://github.com/foldl/little-academia" style="text-
3131

3232
**What's New:**
3333

34+
* 2025-10-25: LLaDA2.0-mini
3435
* 2025-10-14: Nanonets-OCR2
3536
* 2025-10-13: dots.ocr
3637
* 2025-10-10: [I can draw](./docs/multimodal.md): Janus-Pro

convert.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ class ModelType(Enum):
219219
GroveMoE = 0x2D00
220220

221221
BailingMoE2 = 0x2E00
222+
LlaDA2 = 0x2E01
222223

223224
BCE_Embedding = 0x10000100
224225
BCE_ReRanker = 0x10000101
@@ -242,6 +243,7 @@ class ModelType(Enum):
242243

243244
Qwen2_5VL = ModelTypeTagChatImageVideoIn + 0x0000001
244245
Qwen2_VL = ModelTypeTagChatImageVideoIn + 0x0000002
246+
Qwen3_VL = ModelTypeTagChatImageVideoIn + 0x0000003
245247
KimiVL = ModelTypeTagChatImageVideoIn + 0x0000100
246248
SmolVLM = ModelTypeTagChatImageVideoIn + 0x0000200
247249

@@ -5079,7 +5081,7 @@ class QWen3Converter(BaseConverter):
50795081
@staticmethod
50805082
def dump_config(f, config, ggml_type):
50815083
MAX_LAYERS = 128
5082-
assert config.use_sliding_window == False, "use_sliding_window must be False"
5084+
assert (config.use_sliding_window is None) or (config.use_sliding_window == False), "use_sliding_window must be False"
50835085
assert not config.attention_bias
50845086
assert (config.output_router_logits is None) or (not config.output_router_logits)
50855087

@@ -5184,6 +5186,9 @@ def get_weight_names(config):
51845186

51855187
return weight_names
51865188

5189+
class Qwen3VLConverter(BaseConverter):
5190+
MODEL_TYPE = ModelType.Qwen3_VL
5191+
51875192
class QWen3EmbConverter(BaseConverter):
51885193
MODEL_TYPE = ModelType.QWen3_Embedding
51895194

@@ -6477,10 +6482,10 @@ def state_dict_pp(cls, config, state_dict):
64776482
def dump_config(f, config, ggml_type):
64786483

64796484
assert config.rope_scaling is None
6480-
assert config.use_qk_norm
6485+
assert (config.use_qk_norm is None) or config.use_qk_norm
64816486
assert config.moe_router_enable_expert_bias
64826487
assert (config.num_nextn_predict_layers is None) or (config.num_nextn_predict_layers == 0)
6483-
assert config.moe_shared_expert_intermediate_size == config.moe_intermediate_size
6488+
assert (config.moe_shared_expert_intermediate_size is None) or (config.moe_shared_expert_intermediate_size == config.moe_intermediate_size)
64846489

64856490
BailingMoeConverter.dump_config(f, config, ggml_type)
64866491

@@ -8684,6 +8689,9 @@ def main():
86848689
BailingMoeConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
86858690
elif arch == 'BailingMoeV2ForCausalLM':
86868691
BailingMoe2Converter.convert(config, model_files, vocab, ggml_type, args.save_path)
8692+
elif arch == 'LLaDA2MoeModelLM':
8693+
BailingMoe2Converter.MODEL_TYPE = ModelType.LlaDA2
8694+
BailingMoe2Converter.convert(config, model_files, vocab, ggml_type, args.save_path)
86878695
elif arch == 'AprielForCausalLM':
86888696
AprielConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
86898697
elif arch in ['Qwen3MoeForCausalLM', 'Qwen3ForCausalLM']:

docs/models.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,9 @@
121121
* [x] v1.5: [Ling-lite-1.5-2507](https://huggingface.co/inclusionAI/Ling-lite-1.5-2507/tree/6656efdc763a77102207fc66b176e4c5d07a316b), [Ring-lite2507](https://huggingface.co/inclusionAI/Ring-lite-2507/commit/8cf0ec244871c90102b353cef3568e061fd2504f)
122122
* [x] v2: [Ling-mini-2.0](https://huggingface.co/inclusionAI/Ling-mini-2.0/tree/56c261e07b78d95dad61336fcbdb21ef4fdbcabe), [Ring-mini-2.0](https://huggingface.co/inclusionAI/Ring-mini-2.0/tree/d4eac003b34b59b733f05039a876616d840a37d6)
123123

124+
* LLaDA (`LLaDA2MoeModelLM`)
125+
* [x] [mini-preview](https://huggingface.co/inclusionAI/LLaDA2.0-mini-preview/tree/d25d3b2ac0b966b64da11d6c791f8bf4bc31e90c)
126+
124127
* LlaMA-like (`LlamaForCausalLM`, `Llama4ForConditionalGeneration`):
125128
* [x] All LlaMA-1 models
126129
* [x] LlaMA-2: [Chat-7B](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf), etc

0 commit comments

Comments
 (0)