-
Notifications
You must be signed in to change notification settings - Fork 14
Expand file tree
/
Copy pathexample_create_tiny.py
More file actions
51 lines (40 loc) · 1.46 KB
/
example_create_tiny.py
File metadata and controls
51 lines (40 loc) · 1.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/env python3
#
# Randomly initialize a tiny model and its quantized version
# Then it can be trained in example_train_tiny.py
import torch
from transformers import AutoTokenizer, BitsAndBytesConfig, Qwen3MoeConfig
from qwen3_moe_fused.modular_qwen3_moe_fused import Qwen3MoeFusedForCausalLM
from qwen3_moe_fused.quantize.quantizer import patch_bnb_quantizer
def main():
patch_bnb_quantizer()
model_dir = "./pretrained/qwen-moe-tiny-lm"
model_quantized_dir = "./pretrained/qwen-moe-tiny-lm-quantized"
# Create the model
config = Qwen3MoeConfig(
hidden_size=16,
intermediate_size=5,
num_hidden_layers=2,
num_attention_heads=8,
num_key_value_heads=4,
max_window_layers=2,
moe_intermediate_size=3,
num_experts=16,
norm_topk_prob=True,
)
model = Qwen3MoeFusedForCausalLM(config)
model.save_pretrained(model_dir)
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B")
tokenizer.save_pretrained(model_dir)
# Load and quantize the model
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
)
model = Qwen3MoeFusedForCausalLM.from_pretrained(model_dir, quantization_config=bnb_config)
model.save_pretrained(model_quantized_dir)
tokenizer.save_pretrained(model_quantized_dir)
if __name__ == "__main__":
main()