Skip to content

Commit ac629bc

Browse files
[Model] Add Qwen3 and UI to toggle between thinking and non-thinking modes (#75)
* Add Qwen3 models * Add UI to toggle between thinking and non-thinking modes for Qwen3 models * Remove <think> tags in chat titles and non-thinking mode --------- Co-authored-by: Nestor Qin <[email protected]>
1 parent d52d954 commit ac629bc

File tree

13 files changed

+177
-25
lines changed

13 files changed

+177
-25
lines changed

app/client/api.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ export interface LLMConfig {
3333
stream?: boolean;
3434
presence_penalty?: number;
3535
frequency_penalty?: number;
36+
enable_thinking?: boolean;
3637
}
3738

3839
export interface ChatOptions {

app/client/webllm.ts

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,20 @@ export class WebLLMApi implements LLMApi {
8484
async chat(options: ChatOptions): Promise<void> {
8585
if (!this.initialized || this.isDifferentConfig(options.config)) {
8686
this.llmConfig = { ...(this.llmConfig || {}), ...options.config };
87+
// Check if this is a Qwen3 model with thinking mode enabled
88+
const isQwen3Model = this.llmConfig?.model
89+
?.toLowerCase()
90+
.startsWith("qwen3");
91+
const isThinkingEnabled = this.llmConfig?.enable_thinking === true;
92+
93+
// Apply special config for Qwen3 models with thinking mode enabled
94+
if (isQwen3Model && isThinkingEnabled && this.llmConfig) {
95+
this.llmConfig = {
96+
...this.llmConfig,
97+
temperature: 0.6,
98+
top_p: 0.95,
99+
};
100+
}
87101
try {
88102
await this.initModel(options.onUpdate);
89103
} catch (err: any) {
@@ -160,13 +174,14 @@ export class WebLLMApi implements LLMApi {
160174
"stream",
161175
"presence_penalty",
162176
"frequency_penalty",
177+
"enable_thinking",
163178
];
164179

165180
for (const field of optionalFields) {
166181
if (
167182
this.llmConfig[field] !== undefined &&
168183
config[field] !== undefined &&
169-
config[field] !== config[field]
184+
this.llmConfig[field] !== config[field]
170185
) {
171186
return true;
172187
}
@@ -184,10 +199,39 @@ export class WebLLMApi implements LLMApi {
184199
usage?: CompletionUsage,
185200
) => void,
186201
) {
202+
// For Qwen3 models, we need to filter out the <think>...</think> content
203+
// Do not do it inplace, create a new messages array
204+
let newMessages: RequestMessage[] | undefined;
205+
const isQwen3Model = this.llmConfig?.model
206+
?.toLowerCase()
207+
.startsWith("qwen3");
208+
if (isQwen3Model) {
209+
newMessages = messages.map((message) => {
210+
const newMessage = { ...message };
211+
if (
212+
message.role === "assistant" &&
213+
typeof message.content === "string"
214+
) {
215+
newMessage.content = message.content.replace(
216+
/^<think>[\s\S]*?<\/think>\n?\n?/,
217+
"",
218+
);
219+
}
220+
return newMessage;
221+
});
222+
}
223+
224+
// Prepare extra_body with enable_thinking option for Qwen3 models
225+
const extraBody: Record<string, any> = {};
226+
if (isQwen3Model) {
227+
extraBody.enable_thinking = this.llmConfig?.enable_thinking ?? false;
228+
}
229+
187230
const completion = await this.webllm.engine.chatCompletion({
188231
stream: stream,
189-
messages: messages as ChatCompletionMessageParam[],
232+
messages: (newMessages || messages) as ChatCompletionMessageParam[],
190233
...(stream ? { stream_options: { include_usage: true } } : {}),
234+
...(Object.keys(extraBody).length > 0 ? { extra_body: extraBody } : {}),
191235
});
192236

193237
if (stream) {

app/components/chat.module.scss

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,10 @@
7171
width: var(--icon-width);
7272
overflow: hidden;
7373

74+
&.selected {
75+
background-color: var(--second);
76+
}
77+
7478
&:not(:last-child) {
7579
margin-right: 5px;
7680
}

app/components/chat.tsx

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import DeleteIcon from "../icons/clear.svg";
2727
import EditIcon from "../icons/rename.svg";
2828
import ConfirmIcon from "../icons/confirm.svg";
2929
import ImageIcon from "../icons/image.svg";
30+
import BrainIcon from "../icons/brain.svg";
3031

3132
import BottomIcon from "../icons/bottom.svg";
3233
import StopIcon from "../icons/pause.svg";
@@ -385,6 +386,7 @@ function ChatAction(props: {
385386
icon: JSX.Element;
386387
onClick: () => void;
387388
fullWidth?: boolean;
389+
selected?: boolean;
388390
}) {
389391
const iconRef = useRef<HTMLDivElement>(null);
390392
const textRef = useRef<HTMLDivElement>(null);
@@ -406,7 +408,7 @@ function ChatAction(props: {
406408

407409
return props.fullWidth ? (
408410
<div
409-
className={`${styles["chat-input-action"]} clickable ${styles["full-width"]}`}
411+
className={`${styles["chat-input-action"]} clickable ${styles["full-width"]} ${props.selected ? styles["selected"] : ""}`}
410412
onClick={props.onClick}
411413
>
412414
<div ref={iconRef} className={styles["icon"]}>
@@ -418,7 +420,7 @@ function ChatAction(props: {
418420
</div>
419421
) : (
420422
<div
421-
className={`${styles["chat-input-action"]} clickable`}
423+
className={`${styles["chat-input-action"]} clickable ${props.selected ? styles["selected"] : ""}`}
422424
onClick={() => {
423425
props.onClick();
424426
setTimeout(updateWidth, 1);
@@ -535,6 +537,18 @@ export function ChatActions(props: {
535537
});
536538
}}
537539
/>
540+
{config.modelConfig.model.toLowerCase().startsWith("qwen3") && (
541+
<ChatAction
542+
onClick={() =>
543+
config.update(
544+
(config) => (config.enableThinking = !config.enableThinking),
545+
)
546+
}
547+
text={Locale.Settings.THINKING}
548+
icon={<BrainIcon />}
549+
selected={config.enableThinking}
550+
/>
551+
)}
538552
<ChatAction
539553
onClick={() => setShowModelSelector(true)}
540554
text={currentModel}

app/components/model-config.tsx

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,24 @@ export function ModelConfigList() {
8383
</Select>
8484
</ListItem>
8585

86+
{config.modelConfig.model.toLowerCase().startsWith("qwen3") && (
87+
<ListItem
88+
title={Locale.Settings.EnableThinking.Title}
89+
subTitle={Locale.Settings.EnableThinking.SubTitle}
90+
>
91+
<input
92+
type="checkbox"
93+
checked={config.enableThinking}
94+
onChange={(e) =>
95+
config.update(
96+
(config) =>
97+
(config.enableThinking = e.currentTarget.checked),
98+
)
99+
}
100+
></input>
101+
</ListItem>
102+
)}
103+
86104
{/* New setting item for LLM model context window length */}
87105
<ListItem
88106
title={Locale.Settings.ContextWindowLength.Title}

app/constant.ts

Lines changed: 63 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,20 @@ export enum ModelFamily {
7979
DEEPSEEK = "DeepSeek",
8080
}
8181

82+
const qwen3_common_configs = {
83+
display_name: "Qwen",
84+
provider: "Alibaba",
85+
family: ModelFamily.QWEN,
86+
// Recommended config is for non-thinking mode
87+
// For thinking mode, see webllm.ts where temperature=0.6 and top_p=0.95 are applied
88+
recommended_config: {
89+
temperature: 0.7,
90+
presence_penalty: 0,
91+
frequency_penalty: 0,
92+
top_p: 0.8,
93+
},
94+
};
95+
8296
const DEFAULT_MODEL_BASES: ModelRecord[] = [
8397
// Phi-3.5 Vision
8498
{
@@ -410,6 +424,7 @@ const DEFAULT_MODEL_BASES: ModelRecord[] = [
410424
top_p: 1,
411425
},
412426
},
427+
// Mistral
413428
{
414429
name: "Mistral-7B-Instruct-v0.3-q4f16_1-MLC",
415430
display_name: "Mistral",
@@ -464,6 +479,7 @@ const DEFAULT_MODEL_BASES: ModelRecord[] = [
464479
top_p: 0.95,
465480
},
466481
},
482+
// WizardMath
467483
{
468484
name: "WizardMath-7B-V1.1-q4f16_1-MLC",
469485
display_name: "WizardMath",
@@ -571,20 +587,50 @@ const DEFAULT_MODEL_BASES: ModelRecord[] = [
571587
top_p: 1,
572588
},
573589
},
590+
// Qwen3
574591
{
575-
name: "Qwen2.5-0.5B-Instruct-q4f16_1-MLC",
576-
display_name: "Qwen",
577-
provider: "Alibaba",
578-
family: ModelFamily.QWEN,
579-
recommended_config: {
580-
temperature: 0.7,
581-
presence_penalty: 0,
582-
frequency_penalty: 0,
583-
top_p: 0.8,
584-
},
592+
name: "Qwen3-0.6B-q4f16_1-MLC",
593+
...qwen3_common_configs,
585594
},
586595
{
587-
name: "Qwen2.5-0.5B-Instruct-q4f32_1-MLC",
596+
name: "Qwen3-0.6B-q4f32_1-MLC",
597+
...qwen3_common_configs,
598+
},
599+
{
600+
name: "Qwen3-0.6B-q0f16-MLC",
601+
...qwen3_common_configs,
602+
},
603+
{
604+
name: "Qwen3-0.6B-q0f32-MLC",
605+
...qwen3_common_configs,
606+
},
607+
{
608+
name: "Qwen3-1.7B-q4f16_1-MLC",
609+
...qwen3_common_configs,
610+
},
611+
{
612+
name: "Qwen3-1.7B-q4f32_1-MLC",
613+
...qwen3_common_configs,
614+
},
615+
{
616+
name: "Qwen3-4B-q4f16_1-MLC",
617+
...qwen3_common_configs,
618+
},
619+
{
620+
name: "Qwen3-4B-q4f32_1-MLC",
621+
...qwen3_common_configs,
622+
},
623+
{
624+
name: "Qwen3-8B-q4f16_1-MLC",
625+
...qwen3_common_configs,
626+
},
627+
{
628+
name: "Qwen3-8B-q4f32_1-MLC",
629+
...qwen3_common_configs,
630+
},
631+
// Qwen2.5
632+
{
633+
name: "Qwen2.5-0.5B-Instruct-q4f16_1-MLC",
588634
display_name: "Qwen",
589635
provider: "Alibaba",
590636
family: ModelFamily.QWEN,
@@ -596,7 +642,7 @@ const DEFAULT_MODEL_BASES: ModelRecord[] = [
596642
},
597643
},
598644
{
599-
name: "Qwen2.5-0.5B-Instruct-q4f16_1-MLC",
645+
name: "Qwen2.5-0.5B-Instruct-q4f32_1-MLC",
600646
display_name: "Qwen",
601647
provider: "Alibaba",
602648
family: ModelFamily.QWEN,
@@ -873,6 +919,7 @@ const DEFAULT_MODEL_BASES: ModelRecord[] = [
873919
top_p: 0.8,
874920
},
875921
},
922+
// Gemma 2
876923
{
877924
name: "gemma-2-2b-it-q4f16_1-MLC",
878925
display_name: "Gemma",
@@ -969,6 +1016,7 @@ const DEFAULT_MODEL_BASES: ModelRecord[] = [
9691016
top_p: 0.9,
9701017
},
9711018
},
1019+
// StableLM
9721020
{
9731021
name: "stablelm-2-zephyr-1_6b-q4f16_1-MLC",
9741022
display_name: "StableLM",
@@ -1017,6 +1065,7 @@ const DEFAULT_MODEL_BASES: ModelRecord[] = [
10171065
top_p: 0.95,
10181066
},
10191067
},
1068+
// RedPajama
10201069
{
10211070
name: "RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC",
10221071
display_name: "RedPajama",
@@ -1057,6 +1106,7 @@ const DEFAULT_MODEL_BASES: ModelRecord[] = [
10571106
top_p: 0.95,
10581107
},
10591108
},
1109+
// TinyLlama
10601110
{
10611111
name: "TinyLlama-1.1B-Chat-v1.0-q4f16_1-MLC",
10621112
display_name: "TinyLlama",
@@ -1105,6 +1155,7 @@ const DEFAULT_MODEL_BASES: ModelRecord[] = [
11051155
top_p: 1,
11061156
},
11071157
},
1158+
// Older models
11081159
{
11091160
name: "Llama-3.1-70B-Instruct-q3f16_1-MLC",
11101161
display_name: "Llama",

app/locales/cn.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,10 @@ const cn = {
228228
Title: "频率惩罚度 (frequency_penalty)",
229229
SubTitle: "值越大,越有可能降低重复字词",
230230
},
231+
EnableThinking: {
232+
Title: "深度思考",
233+
SubTitle: "允许模型在回答时进行深度思考",
234+
},
231235
},
232236
Store: {
233237
DefaultTopic: "新的聊天",

app/locales/en.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ const en = {
206206
"Will compress if uncompressed messages length exceeds the value",
207207
},
208208

209+
THINKING: "Thinking",
209210
Usage: {
210211
Title: "Account Balance",
211212
SubTitle(used: any, total: any) {
@@ -264,6 +265,10 @@ const en = {
264265
Title: "Logging Level",
265266
SubTitle: "Adjust how much detail should be printed to console",
266267
},
268+
EnableThinking: {
269+
Title: "Enable Thinking",
270+
SubTitle: "Allow reasoning models to think step-by-step",
271+
},
267272
},
268273
Store: {
269274
DefaultTopic: "New Conversation",

app/store/chat.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,7 @@ export const useChatStore = createPersistStore(
347347
...modelConfig,
348348
cache: useAppConfig.getState().cacheType,
349349
stream: true,
350+
enable_thinking: useAppConfig.getState().enableThinking,
350351
},
351352
onUpdate(message) {
352353
botMessage.streaming = true;
@@ -362,6 +363,9 @@ export const useChatStore = createPersistStore(
362363
botMessage.usage = usage;
363364
botMessage.stopReason = stopReason;
364365
if (message) {
366+
if (!this.config.enable_thinking) {
367+
message = message.replace(/<think>\s*<\/think>/g, "");
368+
}
365369
botMessage.content = message;
366370
get().onNewMessage(botMessage, llm);
367371
}
@@ -532,6 +536,7 @@ export const useChatStore = createPersistStore(
532536
model: modelConfig.model,
533537
cache: useAppConfig.getState().cacheType,
534538
stream: false,
539+
enable_thinking: false, // never think for topic
535540
},
536541
onFinish(message) {
537542
get().updateCurrentSession(
@@ -615,6 +620,7 @@ export const useChatStore = createPersistStore(
615620
stream: true,
616621
model: modelConfig.model,
617622
cache: useAppConfig.getState().cacheType,
623+
enable_thinking: false, // never think for summarization
618624
},
619625
onUpdate(message) {
620626
session.memoryPrompt = message;

0 commit comments

Comments
 (0)