handle override jinja template

LostRuins · LostRuins · commit e5eab545f31f · 2026-04-19T00:30:28.000+08:00
diff --git a/embd_res/klite.embd b/embd_res/klite.embd
@@ -4370,7 +4370,7 @@ Current version indicated by LITEVER below.
 		saved_alltalk_url: default_alltalk_base,
 		saved_mcp_urls: "",
 		prev_custom_endpoint_type: 0, //show a reconnect box to custom endpoint if needed. 0 is horde, otherwise its dropdown value+1
-		prev_custom_endpoint_model: "", //we may not be able to match, but set it if we do
+		prev_custom_endpoint_model_dict: {}, //we may not be able to match, but set it if we do
 		prev_custom_endpoint_ischatcmpl: true,
 		generate_images_mode: (localflag?0:1), //0=off, 1=horde, 2=a1111, 3=dalle, 4=comfy, 5=pollinations
 
@@ -5595,7 +5595,7 @@ Current version indicated by LITEVER below.
 
 				//offer to reconnect
 				let pending_eptype = localsettings.prev_custom_endpoint_type;
-				let pending_custmodel = localsettings.prev_custom_endpoint_model;
+				let pending_custmodel = (localsettings.prev_custom_endpoint_model_dict?(localsettings.prev_custom_endpoint_model_dict[pending_eptype] || ""):"");
 				let pending_ischatcmpl = localsettings.prev_custom_endpoint_ischatcmpl;
 				if((!localflag && pending_eptype>0) || (localflag && pending_eptype==2))
 				{
@@ -8111,7 +8111,7 @@ Current version indicated by LITEVER below.
 			if(data.data && data.data.length > 0)
 			{
 				let dropdown = get_custom_ep_model_dropdown("2");
-				update_oai_model_list(data,dropdown);
+				update_oai_model_list(data,dropdown,2);
 				onDone(data.data); //probe success
 			}
 			else
@@ -13491,7 +13491,7 @@ Current version indicated by LITEVER below.
 			});
 		});
 	}
-	function update_oai_model_list(data, dropdown)
+	function update_oai_model_list(data, dropdown, epchoice)
 	{
 		var lastOption = dropdown.lastElementChild;
 		for (var i = dropdown.options.length - 1; i >= 0; i--) {
@@ -13505,12 +13505,13 @@ Current version indicated by LITEVER below.
 			sortedarr.push(opt.id);
 		}
 		sortedarr.sort((a, b) => a.localeCompare(b, undefined, { sensitivity: 'base' }));
+		let pending_custmodel = (localsettings.prev_custom_endpoint_model_dict?(localsettings.prev_custom_endpoint_model_dict[epchoice] || ""):"");
 		for(var i=0;i<sortedarr.length;++i)
 		{
 			var el = document.createElement("option");
 			el.textContent = sortedarr[i];
 			el.value = sortedarr[i];
-			if(localsettings.prev_custom_endpoint_model && sortedarr[i]==localsettings.prev_custom_endpoint_model)
+			if(pending_custmodel && sortedarr[i]==pending_custmodel)
 			{
 				selidx = i;
 				document.getElementById("useoaichatcompl").checked = localsettings.prev_custom_endpoint_ischatcmpl;
@@ -13566,7 +13567,7 @@ Current version indicated by LITEVER below.
 
 			if (!data.error && data.data && data.data.length > 0)
 			{
-				update_oai_model_list(data,dropdown);
+				update_oai_model_list(data,dropdown,epchoice);
 			}
 			else
 			{
@@ -26575,7 +26576,10 @@ Current version indicated by LITEVER below.
 	function update_prev_custom_endpoint_type()
 	{
 		localsettings.prev_custom_endpoint_type = 0;
-		localsettings.prev_custom_endpoint_model = "";
+		if(!localsettings.prev_custom_endpoint_model_dict)
+		{
+			localsettings.prev_custom_endpoint_model_dict = {};
+		}
 		localsettings.prev_custom_endpoint_ischatcmpl = true;
 		if (custom_kobold_endpoint != "") {
 			localsettings.prev_custom_endpoint_type = 1;
@@ -26621,7 +26625,7 @@ Current version indicated by LITEVER below.
 			let dropdown = get_custom_ep_model_dropdown(localsettings.prev_custom_endpoint_type);
 			if(dropdown && dropdown.value)
 			{
-				localsettings.prev_custom_endpoint_model = dropdown.value;
+				localsettings.prev_custom_endpoint_model_dict[localsettings.prev_custom_endpoint_type] = dropdown.value;
 				localsettings.prev_custom_endpoint_ischatcmpl = document.getElementById("useoaichatcompl").checked?true:false;
 			}
 		}
@@ -27953,7 +27957,7 @@ Current version indicated by LITEVER below.
 			var childInput = element.querySelector('.colorpickerchild');
 			if (childInput && newColor.includes("rgb")) {
 				childInput.value = rgb_to_hex(newColor);
-			} else {
+			} else if(childInput) {
 				childInput.value = newColor;
 			}
 		}
diff --git a/expose.h b/expose.h
@@ -74,6 +74,7 @@ struct load_model_inputs
     const int quant_k = 0;
     const int quant_v = 0;
     const bool check_slowness = false;
+    const char * jinja_template = nullptr;
     const bool highpriority = false;
     const bool swa_support = false;
     const int swa_padding = 0;
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -3126,7 +3126,7 @@ static bool ggml_cuda_graph_update_required(ggml_backend_cuda_context * cuda_ctx
 
     if (cgraph->uid != 0 &&
         cgraph->uid == graph->uid) {
-        GGML_LOG_DEBUG("CUDA Graph id %zu reused\n", cgraph->uid);
+        // GGML_LOG_DEBUG("CUDA Graph id %zu reused\n", cgraph->uid);
         GGML_ASSERT((int)graph->node_props.size() == cgraph->n_nodes);
         return false;
     }
diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
@@ -156,6 +156,7 @@ static bool check_slowness = false; //will display a suggestion to use highprior
 static bool showed_rnn_warning = false;
 static bool highpriority = false;
 static int rnn_reusable_slot_idx = -1;
+static std::string overridden_jinja_template = ""; //if set, overrides jinja template
 
 static int delayed_generated_tokens_limit = 0;
 std::deque<std::string> delayed_generated_tokens; //for use with antislop sampling
@@ -2225,6 +2226,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
     audio_multimodal_supported = false;
     vision_multimodal_supported = false;
     use_mrope = false;
+    overridden_jinja_template = inputs.jinja_template;
 
     auto clamped_max_context_length = inputs.max_context_length;
 
@@ -3272,6 +3274,10 @@ std::string gpttype_get_chat_template()
         printf("\nWarning: KCPP text generation not initialized!\n");
         return "";
     }
+    if(overridden_jinja_template!="")
+    {
+        return overridden_jinja_template;
+    }
     if(file_format!=FileFormat::GGUF_GENERIC || !llama_ctx_v4)
     {
         return "";
diff --git a/koboldcpp.py b/koboldcpp.py
@@ -157,6 +157,7 @@
 embedded_lcpp_ui_gz = None
 embedded_musicui = None
 embedded_musicui_gz = None
+preloaded_custom_jinja = ""
 voicebank = {}
 voicelist = ["kobo","cheery","sleepy","shouty","chatty"]
 sslvalid = False
@@ -259,6 +260,7 @@ class load_model_inputs(ctypes.Structure):
                 ("quant_k", ctypes.c_int),
                 ("quant_v", ctypes.c_int),
                 ("check_slowness", ctypes.c_bool),
+                ("jinja_template", ctypes.c_char_p),
                 ("highpriority", ctypes.c_bool),
                 ("swa_support", ctypes.c_bool),
                 ("swa_padding", ctypes.c_int),
@@ -1955,6 +1957,7 @@ def load_model(model_filename):
     inputs.override_tensors = args.overridetensors.encode("UTF-8") if args.overridetensors else "".encode("UTF-8")
     inputs.moecpu = (200 if args.moecpu > 200 else args.moecpu)
     inputs.check_slowness = (not args.highpriority and os.name == 'nt' and 'Intel' in platform.processor())
+    inputs.jinja_template = preloaded_custom_jinja.encode("UTF-8")
     inputs.highpriority = args.highpriority
     inputs.swa_support = args.useswa
     inputs.swa_padding = args.swapadding if args.useswa else 0
@@ -7287,6 +7290,7 @@ def hide_tooltip(event):
     customrope_base = ctk.StringVar(value="10000")
     customrope_nativectx = ctk.StringVar(value=str(default_native_ctx))
     chatcompletionsadapter_var = ctk.StringVar(value="AutoGuess")
+    jinjatemplate_var = ctk.StringVar()
     jinja_var = ctk.IntVar(value=0)
     jinja_tools_var = ctk.IntVar(value=0)
     jinja_kwargs_var = ctk.StringVar()
@@ -8091,10 +8095,11 @@ def pickpremadetemplate():
         if fnam:
             chatcompletionsadapter_var.set(fnam)
     ctk.CTkButton(model_tab, 64, text="Pick Premade", command=pickpremadetemplate).grid(row=24, column=0, padx=(350), pady=2, stick="nw")
+    makefileentry(model_tab, "Jinja Template:", "Select a custom Jinja chat template", jinjatemplate_var, 30, width=280, filetypes=[("Jinja Template", "*.jinja")], singlerow=True, tooltiptxt="Select a custom Jinja chat template, will overwrite model jinja chat template")
 
     mmproj_var.trace_add("write", gui_changed_modelfile)
     draftmodel_var.trace_add("write", gui_changed_modelfile)
-    makefileentry(model_tab, "Download Dir:", "Select directory to store all model downloads", download_dir_var, 27, width=280, singlerow=True, dialog_type=2, tooltiptxt="Specify a directory to store any downloaded models.")
+    makefileentry(model_tab, "Download Dir:", "Select directory to store all model downloads", download_dir_var, 35, width=280, singlerow=True, dialog_type=2, tooltiptxt="Specify a directory to store any downloaded models.")
     makecheckbox(model_tab, "Allow Launch Without Models", nomodel, 40, tooltiptxt="Allows running the WebUI with no model loaded.")
 
     # Network Tab
@@ -8431,6 +8436,8 @@ def export_vars():
         args.jinja_tools = (jinja_tools_var.get()==1)
         if jinja_kwargs_var.get() != "":
             args.jinja_kwargs = jinja_kwargs_var.get()
+        if jinjatemplate_var.get() != "":
+            args.jinjatemplate = jinjatemplate_var.get()
         args.enableguidance = (enableguidance_var.get()==1)
         args.overridekv = None if override_kv_var.get() == "" else override_kv_var.get()
         args.overridetensors = None if override_tensors_var.get() == "" else override_tensors_var.get()
@@ -8719,6 +8726,7 @@ def import_vars(mydict):
         if isinstance(jinja_kwargs, type({})):
             jinja_kwargs = json.dumps(jinja_kwargs)
         jinja_kwargs_var.set(jinja_kwargs)
+        jinjatemplate_var.set(mydict["jinjatemplate"] if ("jinjatemplate" in mydict and mydict["jinjatemplate"]) else "")
 
         enableguidance_var.set(mydict["enableguidance"] if ("enableguidance" in mydict) else 0)
         if "overridekv" in mydict and mydict["overridekv"]:
@@ -10177,7 +10185,7 @@ def disableSwappedFieldsInConfig(args, swapReqType):
 
 def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
     global embedded_kailite, embedded_kcpp_docs, embedded_kcpp_sdui, embedded_kailite_gz, embedded_kcpp_docs_gz, embedded_kcpp_sdui_gz, embedded_lcpp_ui_gz, embedded_musicui, embedded_musicui_gz, start_time, exitcounter, global_memory, using_gui_launcher
-    global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, password, fullwhispermodelpath, ttsmodelpath, embeddingsmodelpath, musicdiffusionmodelpath, musicllmmodelpath, friendlyembeddingsmodelname, has_audio_support, has_vision_support, cached_chat_template
+    global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, password, fullwhispermodelpath, ttsmodelpath, embeddingsmodelpath, musicdiffusionmodelpath, musicllmmodelpath, friendlyembeddingsmodelname, has_audio_support, has_vision_support, cached_chat_template, preloaded_custom_jinja
 
     start_server = True
 
@@ -10408,6 +10416,23 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
         dlfile = download_model_from_url(args.musicvae,[".gguf"],min_file_size=500000)
         if dlfile:
             args.musicvae = dlfile
+    if args.mcpfile and args.mcpfile!="":
+        dlfile = download_model_from_url(args.mcpfile,[".json"],min_file_size=64)
+        if dlfile:
+            args.mcpfile = dlfile
+    if args.jinjatemplate and args.jinjatemplate!="":
+        dlfile = download_model_from_url(args.jinjatemplate,[".jinja"],min_file_size=64)
+        if dlfile:
+            args.jinjatemplate = dlfile
+
+    if args.jinjatemplate and os.path.exists(args.jinjatemplate):
+        try:
+            print(f"Using custom Jinja template: {args.jinjatemplate}")
+            with open(args.jinjatemplate, 'r', encoding='utf-8', errors='ignore') as f:
+                preloaded_custom_jinja = f.read()
+        except Exception as e:
+            print(f"Error loading jinja templat: {e}")
+            preloaded_custom_jinja = ""
 
     # sanitize and replace the default vanity name. remember me....
     if args.model_param and args.model_param!="":
@@ -11224,7 +11249,8 @@ def range_checker(arg: str):
     advparser.add_argument("--chatcompletionsadapter", metavar=('[filename]'), help="Select an optional ChatCompletions Adapter JSON file to force custom instruct tags.", default="AutoGuess")
     advparser.add_argument("--jinja", help="Enables using jinja chat template formatting for chat completions endpoint. Other endpoints are unaffected. Tool calls are done without jinja.", action='store_true')
     advparser.add_argument("--jinja_tools","--jinja-tools","--jinjatools", help="Enables using jinja chat template formatting for chat completions endpoint. Other endpoints are unaffected. Tool calls are done with jinja.", action='store_true')
-    advparser.add_argument("--jinja_kwargs","--jinja-kwargs","--jinjakwargs","--chat-template-kwargs", metavar=('{"parameter":"value",...}'), help="Set additiona fields for Jinja JSON template parser, must be a valid JSON object.", default="")
+    advparser.add_argument("--jinja_kwargs","--jinja-kwargs","--jinjakwargs","--chat-template-kwargs", metavar=('{"parameter":"value",...}'), help="Set additional fields for Jinja JSON template parser, must be a valid JSON object.", default="")
+    advparser.add_argument("--jinjatemplate","--chat-template-file", metavar=('[filename]'), help="Select a custom Jinja chat template, will overwrite model jinja chat template", default="")
     advparser.add_argument("--noflashattention","--no-flash-attn","-nofa", help="Disables flash attention.", action='store_true')
     advparser.add_argument("--lowvram","-nkvo","--no-kv-offload", help="If supported by the backend, do not offload KV to GPU (lowvram mode). Not recommended, will be slow.", action='store_true')
     advparser.add_argument("--quantkv", help="Sets the KV cache data type quantization, options are f16/bf16/q8_0/q5_1/q4_0. Requires Flash Attention for full effect, otherwise only K cache is quantized.",metavar=('[quantization level f16/bf16/q8_0/q5_1/q4_0]'), type=str, choices=["f16","bf16","q8_0","q5_1","q4_0","0","1","2","3"], default="f16")

Original file line number	Diff line number	Diff line change
`@@ -4370,7 +4370,7 @@ Current version indicated by LITEVER below.`
`4370`	`4370`	`saved_alltalk_url: default_alltalk_base,`
`4371`	`4371`	`saved_mcp_urls: "",`
`4372`	`4372`	`prev_custom_endpoint_type: 0, //show a reconnect box to custom endpoint if needed. 0 is horde, otherwise its dropdown value+1`
`4373`		`- prev_custom_endpoint_model: "", //we may not be able to match, but set it if we do`
	`4373`	`+ prev_custom_endpoint_model_dict: {}, //we may not be able to match, but set it if we do`
`4374`	`4374`	`prev_custom_endpoint_ischatcmpl: true,`
`4375`	`4375`	`generate_images_mode: (localflag?0:1), //0=off, 1=horde, 2=a1111, 3=dalle, 4=comfy, 5=pollinations`
`4376`	`4376`
`@@ -5595,7 +5595,7 @@ Current version indicated by LITEVER below.`
`5595`	`5595`
`5596`	`5596`	`//offer to reconnect`
`5597`	`5597`	`let pending_eptype = localsettings.prev_custom_endpoint_type;`
`5598`		`- let pending_custmodel = localsettings.prev_custom_endpoint_model;`
	`5598`	`+ let pending_custmodel = (localsettings.prev_custom_endpoint_model_dict?(localsettings.prev_custom_endpoint_model_dict[pending_eptype] \|\| ""):"");`
`5599`	`5599`	`let pending_ischatcmpl = localsettings.prev_custom_endpoint_ischatcmpl;`
`5600`	`5600`	`if((!localflag && pending_eptype>0) \|\| (localflag && pending_eptype==2))`
`5601`	`5601`	`{`
`@@ -8111,7 +8111,7 @@ Current version indicated by LITEVER below.`
`8111`	`8111`	`if(data.data && data.data.length > 0)`
`8112`	`8112`	`{`
`8113`	`8113`	`let dropdown = get_custom_ep_model_dropdown("2");`
`8114`		`- update_oai_model_list(data,dropdown);`
	`8114`	`+ update_oai_model_list(data,dropdown,2);`
`8115`	`8115`	`onDone(data.data); //probe success`
`8116`	`8116`	`}`
`8117`	`8117`	`else`
`@@ -13491,7 +13491,7 @@ Current version indicated by LITEVER below.`
`13491`	`13491`	`});`
`13492`	`13492`	`});`
`13493`	`13493`	`}`
`13494`		`- function update_oai_model_list(data, dropdown)`
	`13494`	`+ function update_oai_model_list(data, dropdown, epchoice)`
`13495`	`13495`	`{`
`13496`	`13496`	`var lastOption = dropdown.lastElementChild;`
`13497`	`13497`	`for (var i = dropdown.options.length - 1; i >= 0; i--) {`
`@@ -13505,12 +13505,13 @@ Current version indicated by LITEVER below.`
`13505`	`13505`	`sortedarr.push(opt.id);`
`13506`	`13506`	`}`
`13507`	`13507`	`sortedarr.sort((a, b) => a.localeCompare(b, undefined, { sensitivity: 'base' }));`
	`13508`	`+ let pending_custmodel = (localsettings.prev_custom_endpoint_model_dict?(localsettings.prev_custom_endpoint_model_dict[epchoice] \|\| ""):"");`
`13508`	`13509`	`for(var i=0;i<sortedarr.length;++i)`
`13509`	`13510`	`{`
`13510`	`13511`	`var el = document.createElement("option");`
`13511`	`13512`	`el.textContent = sortedarr[i];`
`13512`	`13513`	`el.value = sortedarr[i];`
`13513`		`- if(localsettings.prev_custom_endpoint_model && sortedarr[i]==localsettings.prev_custom_endpoint_model)`
	`13514`	`+ if(pending_custmodel && sortedarr[i]==pending_custmodel)`
`13514`	`13515`	`{`
`13515`	`13516`	`selidx = i;`
`13516`	`13517`	`document.getElementById("useoaichatcompl").checked = localsettings.prev_custom_endpoint_ischatcmpl;`
`@@ -13566,7 +13567,7 @@ Current version indicated by LITEVER below.`
`13566`	`13567`
`13567`	`13568`	`if (!data.error && data.data && data.data.length > 0)`
`13568`	`13569`	`{`
`13569`		`- update_oai_model_list(data,dropdown);`
	`13570`	`+ update_oai_model_list(data,dropdown,epchoice);`
`13570`	`13571`	`}`
`13571`	`13572`	`else`
`13572`	`13573`	`{`
`@@ -26575,7 +26576,10 @@ Current version indicated by LITEVER below.`
`26575`	`26576`	`function update_prev_custom_endpoint_type()`
`26576`	`26577`	`{`
`26577`	`26578`	`localsettings.prev_custom_endpoint_type = 0;`
`26578`		`- localsettings.prev_custom_endpoint_model = "";`
	`26579`	`+ if(!localsettings.prev_custom_endpoint_model_dict)`
	`26580`	`+ {`
	`26581`	`+ localsettings.prev_custom_endpoint_model_dict = {};`
	`26582`	`+ }`
`26579`	`26583`	`localsettings.prev_custom_endpoint_ischatcmpl = true;`
`26580`	`26584`	`if (custom_kobold_endpoint != "") {`
`26581`	`26585`	`localsettings.prev_custom_endpoint_type = 1;`
`@@ -26621,7 +26625,7 @@ Current version indicated by LITEVER below.`
`26621`	`26625`	`let dropdown = get_custom_ep_model_dropdown(localsettings.prev_custom_endpoint_type);`
`26622`	`26626`	`if(dropdown && dropdown.value)`
`26623`	`26627`	`{`
`26624`		`- localsettings.prev_custom_endpoint_model = dropdown.value;`
	`26628`	`+ localsettings.prev_custom_endpoint_model_dict[localsettings.prev_custom_endpoint_type] = dropdown.value;`
`26625`	`26629`	`localsettings.prev_custom_endpoint_ischatcmpl = document.getElementById("useoaichatcompl").checked?true:false;`
`26626`	`26630`	`}`
`26627`	`26631`	`}`
`@@ -27953,7 +27957,7 @@ Current version indicated by LITEVER below.`
`27953`	`27957`	`var childInput = element.querySelector('.colorpickerchild');`
`27954`	`27958`	`if (childInput && newColor.includes("rgb")) {`
`27955`	`27959`	`childInput.value = rgb_to_hex(newColor);`
`27956`		`- } else {`
	`27960`	`+ } else if(childInput) {`
`27957`	`27961`	`childInput.value = newColor;`
`27958`	`27962`	`}`
`27959`	`27963`	`}`
Original file line number	Diff line number	Diff line change
`@@ -3126,7 +3126,7 @@ static bool ggml_cuda_graph_update_required(ggml_backend_cuda_context * cuda_ctx`
`3126`	`3126`
`3127`	`3127`	`if (cgraph->uid != 0 &&`
`3128`	`3128`	`cgraph->uid == graph->uid) {`
`3129`		`- GGML_LOG_DEBUG("CUDA Graph id %zu reused\n", cgraph->uid);`
	`3129`	`+ // GGML_LOG_DEBUG("CUDA Graph id %zu reused\n", cgraph->uid);`
`3130`	`3130`	`GGML_ASSERT((int)graph->node_props.size() == cgraph->n_nodes);`
`3131`	`3131`	`return false;`
`3132`	`3132`	`}`