[Model] Add Qwen3 and UI to toggle between thinking and non-thinking modes (#75)

CharlieFRuan · Neet-Nestor · web-flow · commit ac629bc115c6 · 2025-05-05T01:21:15.000-07:00
* Add Qwen3 models
* Add UI to toggle between thinking and non-thinking modes for Qwen3 models
* Remove &lt;think&gt; tags in chat titles and non-thinking mode

---------

Co-authored-by: Nestor Qin &lt;imba.qxy@gmail.com&gt;
diff --git a/app/client/api.ts b/app/client/api.ts
@@ -33,6 +33,7 @@ export interface LLMConfig {
   stream?: boolean;
   presence_penalty?: number;
   frequency_penalty?: number;
+  enable_thinking?: boolean;
 }
 
 export interface ChatOptions {
diff --git a/app/client/webllm.ts b/app/client/webllm.ts
@@ -84,6 +84,20 @@ export class WebLLMApi implements LLMApi {
   async chat(options: ChatOptions): Promise<void> {
     if (!this.initialized || this.isDifferentConfig(options.config)) {
       this.llmConfig = { ...(this.llmConfig || {}), ...options.config };
+      // Check if this is a Qwen3 model with thinking mode enabled
+      const isQwen3Model = this.llmConfig?.model
+        ?.toLowerCase()
+        .startsWith("qwen3");
+      const isThinkingEnabled = this.llmConfig?.enable_thinking === true;
+
+      // Apply special config for Qwen3 models with thinking mode enabled
+      if (isQwen3Model && isThinkingEnabled && this.llmConfig) {
+        this.llmConfig = {
+          ...this.llmConfig,
+          temperature: 0.6,
+          top_p: 0.95,
+        };
+      }
       try {
         await this.initModel(options.onUpdate);
       } catch (err: any) {
@@ -160,13 +174,14 @@ export class WebLLMApi implements LLMApi {
       "stream",
       "presence_penalty",
       "frequency_penalty",
+      "enable_thinking",
     ];
 
     for (const field of optionalFields) {
       if (
         this.llmConfig[field] !== undefined &&
         config[field] !== undefined &&
-        config[field] !== config[field]
+        this.llmConfig[field] !== config[field]
       ) {
         return true;
       }
@@ -184,10 +199,39 @@ export class WebLLMApi implements LLMApi {
       usage?: CompletionUsage,
     ) => void,
   ) {
+    // For Qwen3 models, we need to filter out the <think>...</think> content
+    // Do not do it inplace, create a new messages array
+    let newMessages: RequestMessage[] | undefined;
+    const isQwen3Model = this.llmConfig?.model
+      ?.toLowerCase()
+      .startsWith("qwen3");
+    if (isQwen3Model) {
+      newMessages = messages.map((message) => {
+        const newMessage = { ...message };
+        if (
+          message.role === "assistant" &&
+          typeof message.content === "string"
+        ) {
+          newMessage.content = message.content.replace(
+            /^<think>[\s\S]*?<\/think>\n?\n?/,
+            "",
+          );
+        }
+        return newMessage;
+      });
+    }
+
+    // Prepare extra_body with enable_thinking option for Qwen3 models
+    const extraBody: Record<string, any> = {};
+    if (isQwen3Model) {
+      extraBody.enable_thinking = this.llmConfig?.enable_thinking ?? false;
+    }
+
     const completion = await this.webllm.engine.chatCompletion({
       stream: stream,
-      messages: messages as ChatCompletionMessageParam[],
+      messages: (newMessages || messages) as ChatCompletionMessageParam[],
       ...(stream ? { stream_options: { include_usage: true } } : {}),
+      ...(Object.keys(extraBody).length > 0 ? { extra_body: extraBody } : {}),
     });
 
     if (stream) {
diff --git a/app/components/chat.module.scss b/app/components/chat.module.scss
@@ -71,6 +71,10 @@
     width: var(--icon-width);
     overflow: hidden;
 
+    &.selected {
+      background-color: var(--second);
+    }
+
     &:not(:last-child) {
       margin-right: 5px;
     }
diff --git a/app/components/chat.tsx b/app/components/chat.tsx
@@ -27,6 +27,7 @@ import DeleteIcon from "../icons/clear.svg";
 import EditIcon from "../icons/rename.svg";
 import ConfirmIcon from "../icons/confirm.svg";
 import ImageIcon from "../icons/image.svg";
+import BrainIcon from "../icons/brain.svg";
 
 import BottomIcon from "../icons/bottom.svg";
 import StopIcon from "../icons/pause.svg";
@@ -385,6 +386,7 @@ function ChatAction(props: {
   icon: JSX.Element;
   onClick: () => void;
   fullWidth?: boolean;
+  selected?: boolean;
 }) {
   const iconRef = useRef<HTMLDivElement>(null);
   const textRef = useRef<HTMLDivElement>(null);
@@ -406,7 +408,7 @@ function ChatAction(props: {
 
   return props.fullWidth ? (
     <div
-      className={`${styles["chat-input-action"]} clickable ${styles["full-width"]}`}
+      className={`${styles["chat-input-action"]} clickable ${styles["full-width"]} ${props.selected ? styles["selected"] : ""}`}
       onClick={props.onClick}
     >
       <div ref={iconRef} className={styles["icon"]}>
@@ -418,7 +420,7 @@ function ChatAction(props: {
     </div>
   ) : (
     <div
-      className={`${styles["chat-input-action"]} clickable`}
+      className={`${styles["chat-input-action"]} clickable ${props.selected ? styles["selected"] : ""}`}
       onClick={() => {
         props.onClick();
         setTimeout(updateWidth, 1);
@@ -535,6 +537,18 @@ export function ChatActions(props: {
           });
         }}
       />
+      {config.modelConfig.model.toLowerCase().startsWith("qwen3") && (
+        <ChatAction
+          onClick={() =>
+            config.update(
+              (config) => (config.enableThinking = !config.enableThinking),
+            )
+          }
+          text={Locale.Settings.THINKING}
+          icon={<BrainIcon />}
+          selected={config.enableThinking}
+        />
+      )}
       <ChatAction
         onClick={() => setShowModelSelector(true)}
         text={currentModel}
diff --git a/app/components/model-config.tsx b/app/components/model-config.tsx
@@ -83,6 +83,24 @@ export function ModelConfigList() {
             </Select>
           </ListItem>
 
+          {config.modelConfig.model.toLowerCase().startsWith("qwen3") && (
+            <ListItem
+              title={Locale.Settings.EnableThinking.Title}
+              subTitle={Locale.Settings.EnableThinking.SubTitle}
+            >
+              <input
+                type="checkbox"
+                checked={config.enableThinking}
+                onChange={(e) =>
+                  config.update(
+                    (config) =>
+                      (config.enableThinking = e.currentTarget.checked),
+                  )
+                }
+              ></input>
+            </ListItem>
+          )}
+
           {/* New setting item for LLM model context window length */}
           <ListItem
             title={Locale.Settings.ContextWindowLength.Title}
diff --git a/app/constant.ts b/app/constant.ts
@@ -79,6 +79,20 @@ export enum ModelFamily {
   DEEPSEEK = "DeepSeek",
 }
 
+const qwen3_common_configs = {
+  display_name: "Qwen",
+  provider: "Alibaba",
+  family: ModelFamily.QWEN,
+  // Recommended config is for non-thinking mode
+  // For thinking mode, see webllm.ts where temperature=0.6 and top_p=0.95 are applied
+  recommended_config: {
+    temperature: 0.7,
+    presence_penalty: 0,
+    frequency_penalty: 0,
+    top_p: 0.8,
+  },
+};
+
 const DEFAULT_MODEL_BASES: ModelRecord[] = [
   // Phi-3.5 Vision
   {
@@ -410,6 +424,7 @@ const DEFAULT_MODEL_BASES: ModelRecord[] = [
       top_p: 1,
     },
   },
+  // Mistral
   {
     name: "Mistral-7B-Instruct-v0.3-q4f16_1-MLC",
     display_name: "Mistral",
@@ -464,6 +479,7 @@ const DEFAULT_MODEL_BASES: ModelRecord[] = [
       top_p: 0.95,
     },
   },
+  // WizardMath
   {
     name: "WizardMath-7B-V1.1-q4f16_1-MLC",
     display_name: "WizardMath",
@@ -571,20 +587,50 @@ const DEFAULT_MODEL_BASES: ModelRecord[] = [
       top_p: 1,
     },
   },
+  // Qwen3
   {
-    name: "Qwen2.5-0.5B-Instruct-q4f16_1-MLC",
-    display_name: "Qwen",
-    provider: "Alibaba",
-    family: ModelFamily.QWEN,
-    recommended_config: {
-      temperature: 0.7,
-      presence_penalty: 0,
-      frequency_penalty: 0,
-      top_p: 0.8,
-    },
+    name: "Qwen3-0.6B-q4f16_1-MLC",
+    ...qwen3_common_configs,
   },
   {
-    name: "Qwen2.5-0.5B-Instruct-q4f32_1-MLC",
+    name: "Qwen3-0.6B-q4f32_1-MLC",
+    ...qwen3_common_configs,
+  },
+  {
+    name: "Qwen3-0.6B-q0f16-MLC",
+    ...qwen3_common_configs,
+  },
+  {
+    name: "Qwen3-0.6B-q0f32-MLC",
+    ...qwen3_common_configs,
+  },
+  {
+    name: "Qwen3-1.7B-q4f16_1-MLC",
+    ...qwen3_common_configs,
+  },
+  {
+    name: "Qwen3-1.7B-q4f32_1-MLC",
+    ...qwen3_common_configs,
+  },
+  {
+    name: "Qwen3-4B-q4f16_1-MLC",
+    ...qwen3_common_configs,
+  },
+  {
+    name: "Qwen3-4B-q4f32_1-MLC",
+    ...qwen3_common_configs,
+  },
+  {
+    name: "Qwen3-8B-q4f16_1-MLC",
+    ...qwen3_common_configs,
+  },
+  {
+    name: "Qwen3-8B-q4f32_1-MLC",
+    ...qwen3_common_configs,
+  },
+  // Qwen2.5
+  {
+    name: "Qwen2.5-0.5B-Instruct-q4f16_1-MLC",
     display_name: "Qwen",
     provider: "Alibaba",
     family: ModelFamily.QWEN,
@@ -596,7 +642,7 @@ const DEFAULT_MODEL_BASES: ModelRecord[] = [
     },
   },
   {
-    name: "Qwen2.5-0.5B-Instruct-q4f16_1-MLC",
+    name: "Qwen2.5-0.5B-Instruct-q4f32_1-MLC",
     display_name: "Qwen",
     provider: "Alibaba",
     family: ModelFamily.QWEN,
@@ -873,6 +919,7 @@ const DEFAULT_MODEL_BASES: ModelRecord[] = [
       top_p: 0.8,
     },
   },
+  // Gemma 2
   {
     name: "gemma-2-2b-it-q4f16_1-MLC",
     display_name: "Gemma",
@@ -969,6 +1016,7 @@ const DEFAULT_MODEL_BASES: ModelRecord[] = [
       top_p: 0.9,
     },
   },
+  // StableLM
   {
     name: "stablelm-2-zephyr-1_6b-q4f16_1-MLC",
     display_name: "StableLM",
@@ -1017,6 +1065,7 @@ const DEFAULT_MODEL_BASES: ModelRecord[] = [
       top_p: 0.95,
     },
   },
+  // RedPajama
   {
     name: "RedPajama-INCITE-Chat-3B-v1-q4f16_1-MLC",
     display_name: "RedPajama",
@@ -1057,6 +1106,7 @@ const DEFAULT_MODEL_BASES: ModelRecord[] = [
       top_p: 0.95,
     },
   },
+  // TinyLlama
   {
     name: "TinyLlama-1.1B-Chat-v1.0-q4f16_1-MLC",
     display_name: "TinyLlama",
@@ -1105,6 +1155,7 @@ const DEFAULT_MODEL_BASES: ModelRecord[] = [
       top_p: 1,
     },
   },
+  // Older models
   {
     name: "Llama-3.1-70B-Instruct-q3f16_1-MLC",
     display_name: "Llama",
diff --git a/app/locales/cn.ts b/app/locales/cn.ts
@@ -228,6 +228,10 @@ const cn = {
       Title: "频率惩罚度 (frequency_penalty)",
       SubTitle: "值越大，越有可能降低重复字词",
     },
+    EnableThinking: {
+      Title: "深度思考",
+      SubTitle: "允许模型在回答时进行深度思考",
+    },
   },
   Store: {
     DefaultTopic: "新的聊天",
diff --git a/app/locales/en.ts b/app/locales/en.ts
@@ -206,6 +206,7 @@ const en = {
         "Will compress if uncompressed messages length exceeds the value",
     },
 
+    THINKING: "Thinking",
     Usage: {
       Title: "Account Balance",
       SubTitle(used: any, total: any) {
@@ -264,6 +265,10 @@ const en = {
       Title: "Logging Level",
       SubTitle: "Adjust how much detail should be printed to console",
     },
+    EnableThinking: {
+      Title: "Enable Thinking",
+      SubTitle: "Allow reasoning models to think step-by-step",
+    },
   },
   Store: {
     DefaultTopic: "New Conversation",
diff --git a/app/store/chat.ts b/app/store/chat.ts
@@ -347,6 +347,7 @@ export const useChatStore = createPersistStore(
             ...modelConfig,
             cache: useAppConfig.getState().cacheType,
             stream: true,
+            enable_thinking: useAppConfig.getState().enableThinking,
           },
           onUpdate(message) {
             botMessage.streaming = true;
@@ -362,6 +363,9 @@ export const useChatStore = createPersistStore(
             botMessage.usage = usage;
             botMessage.stopReason = stopReason;
             if (message) {
+              if (!this.config.enable_thinking) {
+                message = message.replace(/<think>\s*<\/think>/g, "");
+              }
               botMessage.content = message;
               get().onNewMessage(botMessage, llm);
             }
@@ -532,6 +536,7 @@ export const useChatStore = createPersistStore(
               model: modelConfig.model,
               cache: useAppConfig.getState().cacheType,
               stream: false,
+              enable_thinking: false, // never think for topic
             },
             onFinish(message) {
               get().updateCurrentSession(
@@ -615,6 +620,7 @@ export const useChatStore = createPersistStore(
               stream: true,
               model: modelConfig.model,
               cache: useAppConfig.getState().cacheType,
+              enable_thinking: false, // never think for summarization
             },
             onUpdate(message) {
               session.memoryPrompt = message;
diff --git a/app/store/config.ts b/app/store/config.ts
diff --git a/app/utils.ts b/app/utils.ts
diff --git a/package.json b/package.json
diff --git a/yarn.lock b/yarn.lock

Original file line number	Diff line number	Diff line change
`@@ -33,6 +33,7 @@ export interface LLMConfig {`
`33`	`33`	`stream?: boolean;`
`34`	`34`	`presence_penalty?: number;`
`35`	`35`	`frequency_penalty?: number;`
	`36`	`+ enable_thinking?: boolean;`
`36`	`37`	`}`
`37`	`38`
`38`	`39`	`export interface ChatOptions {`