Skip to content

Commit 808685b

Browse files
authored
[Model] Add DeepSeek-R1-Distill and Hermes-3-Llama-3.2 (#652)
This PR adds the following models to the prebuilt list: - `DeepSeek-R1-Distill-Qwen-7B-q4f16_1-MLC` - `DeepSeek-R1-Distill-Qwen-7B-q4f32_1-MLC` - `DeepSeek-R1-Distill-Llama-8B-q4f16_1-MLC` - `DeepSeek-R1-Distill-Llama-8B-q4f32_1-MLC` - `Hermes-3-Llama-3.2-3B-q4f16_1-MLC` - `Hermes-3-Llama-3.2-3B-q4f32_1-MLC` We will add `DeepSeek-R1-Distill-Qwen-1.5B` afterward, which is currently experiencing correctness issues. Separately, we fix the handling of `role_content_sep` and `role_empty_sep` when it is `""`, which evaluates to false (currently we make it `": "`, which is inconsistent with what the model expects).
1 parent 8b77b3d commit 808685b

File tree

2 files changed

+123
-7
lines changed

2 files changed

+123
-7
lines changed

Diff for: src/config.ts

+113-1
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,7 @@ export const functionCallingModelIds = [
309309
export const prebuiltAppConfig: AppConfig = {
310310
useIndexedDBCache: false,
311311
model_list: [
312+
// Llama-3.2
312313
{
313314
model: "https://huggingface.co/mlc-ai/Llama-3.2-1B-Instruct-q4f32_1-MLC",
314315
model_id: "Llama-3.2-1B-Instruct-q4f32_1-MLC",
@@ -440,7 +441,92 @@ export const prebuiltAppConfig: AppConfig = {
440441
context_window_size: 4096,
441442
},
442443
},
443-
// Hermes-2
444+
// DeepSeek-R1-Distill-Qwen
445+
// TODO(Charlie): Qwen2-1.5B is experiencing correctness issue, hence commented for now.
446+
// {
447+
// model: "https://huggingface.co/mlc-ai/DeepSeek-R1-Distill-Qwen-1.5B-q4f16_1-MLC",
448+
// model_id: "DeepSeek-R1-Distill-Qwen-1.5B-q4f16_1-MLC",
449+
// model_lib:
450+
// modelLibURLPrefix +
451+
// modelVersion +
452+
// "/Qwen2-1.5B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
453+
// low_resource_required: true,
454+
// vram_required_MB: 1629.75,
455+
// overrides: {
456+
// context_window_size: 4096,
457+
// },
458+
// },
459+
// {
460+
// model: "https://huggingface.co/mlc-ai/DeepSeek-R1-Distill-Qwen-1.5B-q4f32_1-MLC",
461+
// model_id: "DeepSeek-R1-Distill-Qwen-1.5B-q4f32_1-MLC",
462+
// model_lib:
463+
// modelLibURLPrefix +
464+
// modelVersion +
465+
// "/Qwen2-1.5B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
466+
// low_resource_required: true,
467+
// vram_required_MB: 1888.97,
468+
// overrides: {
469+
// context_window_size: 4096,
470+
// },
471+
// },
472+
{
473+
model:
474+
"https://huggingface.co/mlc-ai/DeepSeek-R1-Distill-Qwen-7B-q4f16_1-MLC",
475+
model_id: "DeepSeek-R1-Distill-Qwen-7B-q4f16_1-MLC",
476+
model_lib:
477+
modelLibURLPrefix +
478+
modelVersion +
479+
"/Qwen2-7B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
480+
low_resource_required: false,
481+
vram_required_MB: 5106.67,
482+
overrides: {
483+
context_window_size: 4096,
484+
},
485+
},
486+
{
487+
model:
488+
"https://huggingface.co/mlc-ai/DeepSeek-R1-Distill-Qwen-7B-q4f32_1-MLC",
489+
model_id: "DeepSeek-R1-Distill-Qwen-7B-q4f32_1-MLC",
490+
model_lib:
491+
modelLibURLPrefix +
492+
modelVersion +
493+
"/Qwen2-7B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
494+
low_resource_required: false,
495+
vram_required_MB: 5900.09,
496+
overrides: {
497+
context_window_size: 4096,
498+
},
499+
},
500+
// DeepSeek-R1-Distill-Llama
501+
{
502+
model:
503+
"https://huggingface.co/mlc-ai/DeepSeek-R1-Distill-Llama-8B-q4f32_1-MLC",
504+
model_id: "DeepSeek-R1-Distill-Llama-8B-q4f32_1-MLC",
505+
model_lib:
506+
modelLibURLPrefix +
507+
modelVersion +
508+
"/Llama-3_1-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
509+
vram_required_MB: 6101.01,
510+
low_resource_required: false,
511+
overrides: {
512+
context_window_size: 4096,
513+
},
514+
},
515+
{
516+
model:
517+
"https://huggingface.co/mlc-ai/DeepSeek-R1-Distill-Llama-8B-q4f16_1-MLC",
518+
model_id: "DeepSeek-R1-Distill-Llama-8B-q4f16_1-MLC",
519+
model_lib:
520+
modelLibURLPrefix +
521+
modelVersion +
522+
"/Llama-3_1-8B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
523+
vram_required_MB: 5001.0,
524+
low_resource_required: false,
525+
overrides: {
526+
context_window_size: 4096,
527+
},
528+
},
529+
// Hermes-3 and Hermes-2
444530
{
445531
model:
446532
"https://huggingface.co/mlc-ai/Hermes-2-Theta-Llama-3-8B-q4f16_1-MLC",
@@ -497,6 +583,32 @@ export const prebuiltAppConfig: AppConfig = {
497583
context_window_size: 4096,
498584
},
499585
},
586+
{
587+
model: "https://huggingface.co/mlc-ai/Hermes-3-Llama-3.2-3B-q4f32_1-MLC",
588+
model_id: "Hermes-3-Llama-3.2-3B-q4f32_1-MLC",
589+
model_lib:
590+
modelLibURLPrefix +
591+
modelVersion +
592+
"/Llama-3.2-3B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
593+
vram_required_MB: 2951.51,
594+
low_resource_required: true,
595+
overrides: {
596+
context_window_size: 4096,
597+
},
598+
},
599+
{
600+
model: "https://huggingface.co/mlc-ai/Hermes-3-Llama-3.2-3B-q4f16_1-MLC",
601+
model_id: "Hermes-3-Llama-3.2-3B-q4f16_1-MLC",
602+
model_lib:
603+
modelLibURLPrefix +
604+
modelVersion +
605+
"/Llama-3.2-3B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
606+
vram_required_MB: 2263.69,
607+
low_resource_required: true,
608+
overrides: {
609+
context_window_size: 4096,
610+
},
611+
},
500612
{
501613
model: "https://huggingface.co/mlc-ai/Hermes-3-Llama-3.1-8B-q4f32_1-MLC",
502614
model_id: "Hermes-3-Llama-3.1-8B-q4f32_1-MLC",

Diff for: src/conversation.ts

+10-6
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,11 @@ export class Conversation {
9393
"message for a reply header.",
9494
);
9595
}
96-
const empty_sep = this.config.role_empty_sep
97-
? this.config.role_empty_sep
98-
: ": ";
96+
// Add ": " if there is no such field. If "", do not add sep
97+
const empty_sep =
98+
this.config.role_empty_sep || this.config.role_empty_sep == ""
99+
? this.config.role_empty_sep
100+
: ": ";
99101
ret.push(role_str + empty_sep);
100102
continue;
101103
}
@@ -153,9 +155,11 @@ export class Conversation {
153155
) {
154156
role_prefix = "";
155157
} else {
156-
const content_sep = this.config.role_content_sep
157-
? this.config.role_content_sep
158-
: ": ";
158+
// Add ": " if there is no such field. If "", do not add sep
159+
const content_sep =
160+
this.config.role_content_sep || this.config.role_content_sep == ""
161+
? this.config.role_content_sep
162+
: ": ";
159163
role_prefix = role_str + content_sep;
160164
}
161165

0 commit comments

Comments
 (0)