Skip to content

Commit ac1d40d

Browse files
committed
support SmolLM-3
1 parent 4d9f0c7 commit ac1d40d

File tree

13 files changed

+1020
-793
lines changed

13 files changed

+1020
-793
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ set(core_files src/backend.cpp
6060
models/llama.cpp
6161
models/qwen.cpp
6262
models/pangu.cpp
63+
models/smol.cpp
6364
)
6465

6566
add_library(libchatllm SHARED EXCLUDE_FROM_ALL src/main.cpp ${core_files})

README.md

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ pure C++ implementation based on [@ggerganov](https://github.com/ggerganov)'s [g
1313

1414
**What's New:**
1515

16+
* 2025-07-10: SmolLM-3
1617
* 2025-07-05: Pangu-Pro-MoE
1718
* 2025-07-04: ERNIE-MoE
1819
* 2025-06-30: Hunyuan-A13B, ERNIE-Dense
@@ -28,21 +29,12 @@ pure C++ implementation based on [@ggerganov](https://github.com/ggerganov)'s [g
2829
* 2025-05-17: [I can speak](./docs/multimodal.md): Orpheus-TTS
2930
* 2025-05-11: Seed-Coder
3031
* 2025-04-30: QWen3, MiMo
31-
* 2025-04-23: [ggml updated](https://github.com/ggml-org/llama.cpp/tree/658987cfc9d752dca7758987390d5fb1a7a0a54a) again
32-
* 2025-04-18: Apriel
33-
* 2025-04-15: GLM-4-0414
34-
* 2025-04-10: LlaMA4 (Language model)
35-
* 2025-03-27: Ling (Bailing)
36-
* 2025-03-26: AquilaChat2
37-
* 2025-03-25: DeepSeek v1 & GigaChat
3832
* 2025-03-24: [GGMM](./docs/ggmm.md) file format
3933
* 2025-02-21: [Distributed inference](./docs/rpc.md)
40-
* 2025-02-17: [ggml updated](https://github.com/ggml-org/llama.cpp/tree/0f2bbe656473177538956d22b6842bcaa0449fab) again
4134
* 2025-02-10: [GPU acceleration](./docs/gpu.md) 🔥
4235
* 2024-12-09: [Reversed role](./docs/fun.md#reversed-role)
4336
* 2024-11-21: [Continued generation](./docs/fun.md#continued-generation)
4437
* 2024-11-01: [generation steering](./docs/fun.md#generation-steering)
45-
* 2024-07-14: [ggml updated](https://github.com/ggerganov/ggml/tree/3e7e5e26f90fecf4f7c2808df7d94454630b219c)
4638
* 2024-06-15: [Tool calling](./docs/tool_calling.md)
4739
* 2024-05-29: [ggml](https://github.com/ggerganov/ggml) is forked instead of submodule
4840
* 2024-05-14: [OpenAI API](./docs/binding.md#openai-compatible-api), CodeGemma Base & Instruct supported

convert.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,8 @@ class ModelType(Enum):
201201

202202
PenguMoE = 0x2600
203203

204+
SmolLM3 = 0x2700
205+
204206
BCE_Embedding = 0x10000100
205207
BCE_ReRanker = 0x10000101
206208
BGE_M3 = 0x10000102
@@ -1880,6 +1882,31 @@ def get_weight_names(config):
18801882
r = Llama3Converter.get_weight_names(config)
18811883
return r[:-1]
18821884

1885+
class SmolLM3Converter(BaseConverter):
1886+
MODEL_TYPE = ModelType.SmolLM3
1887+
tie_word_embeddings = True
1888+
1889+
@staticmethod
1890+
def dump_config(f, config, ggml_type):
1891+
assert config.rope_scaling is None
1892+
assert (config.layer_types.count('full_attention') == config.num_hidden_layers) or \
1893+
(config.use_sliding_window is None) or (not config.use_sliding_window)
1894+
SmolLM3Converter.tie_word_embeddings = (config.tie_word_embeddings is None) or (config.tie_word_embeddings)
1895+
1896+
dump_llama_like_config(f, config, ggml_type)
1897+
config_values = [
1898+
config.num_key_value_heads,
1899+
config.no_rope_layer_interval,
1900+
1 if SmolLM3Converter.tie_word_embeddings else 0,
1901+
]
1902+
f.write(struct.pack("i" * len(config_values), *config_values))
1903+
f.write(struct.pack("<f", config.rope_theta))
1904+
1905+
@staticmethod
1906+
def get_weight_names(config):
1907+
r = Llama3Converter.get_weight_names(config)
1908+
return r[:-1] if SmolLM3Converter.tie_word_embeddings else r
1909+
18831910
class SmolVLMConverter(BaseConverter):
18841911
MODEL_TYPE = ModelType.SmolVLM
18851912

@@ -7404,6 +7431,8 @@ def main():
74047431
Llama3Converter.convert(config, model_files, vocab, ggml_type, args.save_path)
74057432
elif arch == 'smollm':
74067433
SmolLMConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
7434+
elif arch == 'SmolLM3ForCausalLM':
7435+
SmolLM3Converter.convert(config, model_files, vocab, ggml_type, args.save_path)
74077436
elif arch == 'SmolVLMForConditionalGeneration':
74087437
SmolVLMConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
74097438
elif arch == 'XverseForCausalLM':

docs/models.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,9 @@
237237
* [x] Confucius3-Math: [14B](https://huggingface.co/netease-youdao/Confucius3-Math/tree/62621490d5dccf5fea997be9df62dd8dc017f777) (`-a DeepSeek-R1-Distill-QWen`)
238238
* [x] Jan-Nano: [4B](https://huggingface.co/Menlo/Jan-nano/tree/5f4e450c127322db9477400890a0dd951c9f6ab7)
239239

240+
* SmolLM-3 (`SmolLM3ForCausalLM`)
241+
* [x] [3B](https://huggingface.co/HuggingFaceTB/SmolLM3-3B/tree/297fd6336cf21656d5f9d30a1db612ceeca67619)
242+
240243
* Solor (`SolarForCausalLM`)
241244
* [x] [Pro](https://huggingface.co/upstage/solar-pro-preview-instruct/tree/dd4bcf7006df9b1ce3f87711e702e4063832aae3)
242245

models/qwen.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -995,11 +995,7 @@ namespace chatllm::qwen::ds_r1_distill_v3
995995
Tokenizer::Tokenizer(BaseConfig config)
996996
: ds_r1_distill::Tokenizer(config)
997997
{
998-
std::time_t now = std::time(nullptr);
999-
std::tm* timeinfo = std::localtime(&now);
1000-
char buffer[1000];
1001-
std::strftime(buffer, sizeof(buffer), "%Y-%m-%d, %A", timeinfo);
1002-
std::string date_str(buffer);
998+
std::string date_str = utils::now("%Y-%m-%d, %A");
1003999
sys_prompt = "该助手为DeepSeek-R1,由深度求索公司创造。\n今天是" + date_str + "";
10041000
}
10051001
}

0 commit comments

Comments
 (0)