foldl
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎convert.py‎
Lines changed: 11 additions & 3 deletions b/‎convert.py‎
Lines changed: 11 additions & 3 deletions
diff --git a/‎docs/models.md‎
Lines changed: 3 additions & 0 deletions b/‎docs/models.md‎
Lines changed: 3 additions & 0 deletions
@@ -31,6 +31,7 @@ LittleAcademia[<a href="https://github.com/foldl/little-academia"   style="text-
 
 **What's New:**
 
+* 2025-10-25: LLaDA2.0-mini
 * 2025-10-14: Nanonets-OCR2
 * 2025-10-13: dots.ocr
 * 2025-10-10: [I can draw](./docs/multimodal.md): Janus-Pro
 
@@ -219,6 +219,7 @@ class ModelType(Enum):
     GroveMoE        = 0x2D00
 
     BailingMoE2     = 0x2E00
+    LlaDA2          = 0x2E01
 
     BCE_Embedding           = 0x10000100
     BCE_ReRanker            = 0x10000101
@@ -242,6 +243,7 @@ class ModelType(Enum):
 
     Qwen2_5VL               = ModelTypeTagChatImageVideoIn + 0x0000001
     Qwen2_VL                = ModelTypeTagChatImageVideoIn + 0x0000002
+    Qwen3_VL                = ModelTypeTagChatImageVideoIn + 0x0000003
     KimiVL                  = ModelTypeTagChatImageVideoIn + 0x0000100
     SmolVLM                 = ModelTypeTagChatImageVideoIn + 0x0000200
 
@@ -5079,7 +5081,7 @@ class QWen3Converter(BaseConverter):
     @staticmethod
     def dump_config(f, config, ggml_type):
         MAX_LAYERS = 128
-        assert config.use_sliding_window == False, "use_sliding_window must be False"
+        assert (config.use_sliding_window is None) or (config.use_sliding_window == False), "use_sliding_window must be False"
         assert not config.attention_bias
         assert (config.output_router_logits is None) or (not config.output_router_logits)
 
@@ -5184,6 +5186,9 @@ def get_weight_names(config):
 
         return weight_names
 
+class Qwen3VLConverter(BaseConverter):
+    MODEL_TYPE = ModelType.Qwen3_VL
+
 class QWen3EmbConverter(BaseConverter):
     MODEL_TYPE = ModelType.QWen3_Embedding
 
@@ -6477,10 +6482,10 @@ def state_dict_pp(cls, config, state_dict):
     def dump_config(f, config, ggml_type):
 
         assert config.rope_scaling is None
-        assert config.use_qk_norm
+        assert (config.use_qk_norm is None) or config.use_qk_norm
         assert config.moe_router_enable_expert_bias
         assert (config.num_nextn_predict_layers is None) or (config.num_nextn_predict_layers == 0)
-        assert config.moe_shared_expert_intermediate_size == config.moe_intermediate_size
+        assert (config.moe_shared_expert_intermediate_size is None) or (config.moe_shared_expert_intermediate_size == config.moe_intermediate_size)
 
         BailingMoeConverter.dump_config(f, config, ggml_type)
 
@@ -8684,6 +8689,9 @@ def main():
         BailingMoeConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
     elif arch == 'BailingMoeV2ForCausalLM':
         BailingMoe2Converter.convert(config, model_files, vocab, ggml_type, args.save_path)
+    elif arch == 'LLaDA2MoeModelLM':
+        BailingMoe2Converter.MODEL_TYPE = ModelType.LlaDA2
+        BailingMoe2Converter.convert(config, model_files, vocab, ggml_type, args.save_path)
     elif arch == 'AprielForCausalLM':
         AprielConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
     elif arch in ['Qwen3MoeForCausalLM', 'Qwen3ForCausalLM']:
 
@@ -121,6 +121,9 @@
     * [x] v1.5: [Ling-lite-1.5-2507](https://huggingface.co/inclusionAI/Ling-lite-1.5-2507/tree/6656efdc763a77102207fc66b176e4c5d07a316b), [Ring-lite2507](https://huggingface.co/inclusionAI/Ring-lite-2507/commit/8cf0ec244871c90102b353cef3568e061fd2504f)
     * [x] v2: [Ling-mini-2.0](https://huggingface.co/inclusionAI/Ling-mini-2.0/tree/56c261e07b78d95dad61336fcbdb21ef4fdbcabe), [Ring-mini-2.0](https://huggingface.co/inclusionAI/Ring-mini-2.0/tree/d4eac003b34b59b733f05039a876616d840a37d6)
 
+* LLaDA (`LLaDA2MoeModelLM`)
+    * [x] [mini-preview](https://huggingface.co/inclusionAI/LLaDA2.0-mini-preview/tree/d25d3b2ac0b966b64da11d6c791f8bf4bc31e90c)
+
 * LlaMA-like (`LlamaForCausalLM`, `Llama4ForConditionalGeneration`):
     * [x] All LlaMA-1 models
     * [x] LlaMA-2: [Chat-7B](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf), etc