Skip to content

Commit 58350b8

Browse files
committed
support Ouro
1 parent 4d37adf commit 58350b8

File tree

15 files changed

+450
-18
lines changed

15 files changed

+450
-18
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ set(core_files src/backend.cpp
106106
models/orpheus.cpp
107107
models/openchat.cpp
108108
models/orion.cpp
109+
models/ouro.cpp
109110
models/oute.cpp
110111
models/pangu.cpp
111112
models/phi.cpp

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ LittleAcademia[<a href="https://github.com/foldl/little-academia" style="text-
3333

3434
**What's New:**
3535

36+
* 2025-11-03: Ouro
3637
* 2025-10-31: Megrez2-3x7B-A3B
3738
* 2025-10-25: LLaDA2.0-mini
3839
* 2025-10-14: Nanonets-OCR2

convert.py

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,8 @@ class ModelType(Enum):
223223

224224
MegrezMoE = 0x2F00
225225

226+
OURO = 0x3000
227+
226228
BCE_Embedding = 0x10000100
227229
BCE_ReRanker = 0x10000101
228230
BGE_M3 = 0x10000102
@@ -8175,6 +8177,41 @@ def get_weight_names(config):
81758177

81768178
return weight_names
81778179

8180+
class OuroConverter(BaseConverter):
8181+
MODEL_TYPE = ModelType.OURO
8182+
8183+
@classmethod
8184+
def pp(cls, config, name: str, tensor):
8185+
return Llama3Converter.pp(config, name, tensor)
8186+
8187+
@staticmethod
8188+
def dump_config(f, config, ggml_type):
8189+
assert config.rope_scaling is None
8190+
assert (config.layer_types.count('full_attention') == config.num_hidden_layers) or \
8191+
(config.use_sliding_window is None) or (not config.use_sliding_window)
8192+
assert not config.tie_word_embeddings is None
8193+
8194+
dump_llama_like_config(f, config, ggml_type)
8195+
config_values = [
8196+
config.num_key_value_heads,
8197+
config.rope_theta,
8198+
]
8199+
f.write(struct.pack("<if", *config_values))
8200+
8201+
@staticmethod
8202+
def get_weight_names(config):
8203+
weight_names = Llama3Converter.get_weight_names(config)
8204+
for i in range(config.num_hidden_layers):
8205+
weight_names += [
8206+
f"model.layers.{i}.input_layernorm_2.weight",
8207+
f"model.layers.{i}.post_attention_layernorm_2.weight",
8208+
]
8209+
weight_names += [
8210+
f"model.early_exit_gate.bias",
8211+
f"model.early_exit_gate.weight",
8212+
]
8213+
return weight_names
8214+
81788215
def convert_grok_1_base(args, vocab, ggml_type):
81798216
def ffn_size(emb_size, widening_factor):
81808217
_ffn_size = int(widening_factor * emb_size) * 2 // 3
@@ -8793,10 +8830,12 @@ def main():
87938830
elif arch == 'MultiModalityCausalLM':
87948831
assert JanusConverter.is_proper_config(config)
87958832
JanusConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
8796-
elif arch.endswith('DotsOCRForCausalLM'):
8833+
elif arch == 'DotsOCRForCausalLM':
87978834
DotsOCRConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
8798-
elif arch.endswith('MegrezMoeForCausalLM'):
8835+
elif arch == 'MegrezMoeForCausalLM':
87998836
MegrezMoEConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
8837+
elif arch == 'OuroForCausalLM':
8838+
OuroConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
88008839
elif arch == 'deepseek-r1-distill-qwen3':
88018840
QWen3Converter.MODEL_TYPE = ModelType.DeepSeek_R1_Distill_QWen3
88028841
QWen3Converter.convert(config, model_files, vocab, ggml_type, args.save_path)

docs/fun.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@ Before shuffling a model's layers, use `--show` to view basic information about
5151
...
5252
```
5353

54+
> [!CAUTION]
55+
> Not all models support this. For example, a ordinal dense model like LlaMA supports this, but a heterogeneous model might
56+
> not support this (special treatment is needed for mapping a layer to its type).
57+
5458
## Talk to Each Other
5559

5660
[Here](../scripts/crosstask.py) is a simple program to let two LLMs talk to each other.

docs/models.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,13 @@
228228
* [x] OLMoE: [Instruct-7B](https://huggingface.co/allenai/OLMoE-1B-7B-0924-Instruct)
229229
* [x] OLM-2: [Instruct-7B](https://huggingface.co/allenai/OLMo-2-1124-7B-Instruct), [Instruct-13B](https://huggingface.co/allenai/OLMo-2-1124-13B-Instruct), [Instruct-32B](https://huggingface.co/allenai/OLMo-2-0325-32B-Instruct/tree/5942a2f5e0bc38c2a5f5200cec2ea236d5984547)
230230

231+
* Ouro (`OuroForCausalLM`)
232+
* [x] [2.6B-Thinking](https://huggingface.co/ByteDance/Ouro-2.6B-Thinking/tree/a2d3a54cea06168ba371ec3e089734f715824d5b)
233+
234+
Note: additional options supported (`--set ...`)
235+
* `total_ut_steps`: default 4
236+
* `exit_threshold`: default 1.0
237+
231238
* Orion (`OrionForCausalLM`)
232239
* [x] [Chat-14B](https://huggingface.co/OrionStarAI/Orion-14B-Chat)
233240

0 commit comments

Comments
 (0)