Skip to content

Commit e5eab54

Browse files
committed
handle override jinja template
1 parent ff37b33 commit e5eab54

5 files changed

Lines changed: 50 additions & 13 deletions

File tree

embd_res/klite.embd

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4370,7 +4370,7 @@ Current version indicated by LITEVER below.
43704370
saved_alltalk_url: default_alltalk_base,
43714371
saved_mcp_urls: "",
43724372
prev_custom_endpoint_type: 0, //show a reconnect box to custom endpoint if needed. 0 is horde, otherwise its dropdown value+1
4373-
prev_custom_endpoint_model: "", //we may not be able to match, but set it if we do
4373+
prev_custom_endpoint_model_dict: {}, //we may not be able to match, but set it if we do
43744374
prev_custom_endpoint_ischatcmpl: true,
43754375
generate_images_mode: (localflag?0:1), //0=off, 1=horde, 2=a1111, 3=dalle, 4=comfy, 5=pollinations
43764376

@@ -5595,7 +5595,7 @@ Current version indicated by LITEVER below.
55955595

55965596
//offer to reconnect
55975597
let pending_eptype = localsettings.prev_custom_endpoint_type;
5598-
let pending_custmodel = localsettings.prev_custom_endpoint_model;
5598+
let pending_custmodel = (localsettings.prev_custom_endpoint_model_dict?(localsettings.prev_custom_endpoint_model_dict[pending_eptype] || ""):"");
55995599
let pending_ischatcmpl = localsettings.prev_custom_endpoint_ischatcmpl;
56005600
if((!localflag && pending_eptype>0) || (localflag && pending_eptype==2))
56015601
{
@@ -8111,7 +8111,7 @@ Current version indicated by LITEVER below.
81118111
if(data.data && data.data.length > 0)
81128112
{
81138113
let dropdown = get_custom_ep_model_dropdown("2");
8114-
update_oai_model_list(data,dropdown);
8114+
update_oai_model_list(data,dropdown,2);
81158115
onDone(data.data); //probe success
81168116
}
81178117
else
@@ -13491,7 +13491,7 @@ Current version indicated by LITEVER below.
1349113491
});
1349213492
});
1349313493
}
13494-
function update_oai_model_list(data, dropdown)
13494+
function update_oai_model_list(data, dropdown, epchoice)
1349513495
{
1349613496
var lastOption = dropdown.lastElementChild;
1349713497
for (var i = dropdown.options.length - 1; i >= 0; i--) {
@@ -13505,12 +13505,13 @@ Current version indicated by LITEVER below.
1350513505
sortedarr.push(opt.id);
1350613506
}
1350713507
sortedarr.sort((a, b) => a.localeCompare(b, undefined, { sensitivity: 'base' }));
13508+
let pending_custmodel = (localsettings.prev_custom_endpoint_model_dict?(localsettings.prev_custom_endpoint_model_dict[epchoice] || ""):"");
1350813509
for(var i=0;i<sortedarr.length;++i)
1350913510
{
1351013511
var el = document.createElement("option");
1351113512
el.textContent = sortedarr[i];
1351213513
el.value = sortedarr[i];
13513-
if(localsettings.prev_custom_endpoint_model && sortedarr[i]==localsettings.prev_custom_endpoint_model)
13514+
if(pending_custmodel && sortedarr[i]==pending_custmodel)
1351413515
{
1351513516
selidx = i;
1351613517
document.getElementById("useoaichatcompl").checked = localsettings.prev_custom_endpoint_ischatcmpl;
@@ -13566,7 +13567,7 @@ Current version indicated by LITEVER below.
1356613567

1356713568
if (!data.error && data.data && data.data.length > 0)
1356813569
{
13569-
update_oai_model_list(data,dropdown);
13570+
update_oai_model_list(data,dropdown,epchoice);
1357013571
}
1357113572
else
1357213573
{
@@ -26575,7 +26576,10 @@ Current version indicated by LITEVER below.
2657526576
function update_prev_custom_endpoint_type()
2657626577
{
2657726578
localsettings.prev_custom_endpoint_type = 0;
26578-
localsettings.prev_custom_endpoint_model = "";
26579+
if(!localsettings.prev_custom_endpoint_model_dict)
26580+
{
26581+
localsettings.prev_custom_endpoint_model_dict = {};
26582+
}
2657926583
localsettings.prev_custom_endpoint_ischatcmpl = true;
2658026584
if (custom_kobold_endpoint != "") {
2658126585
localsettings.prev_custom_endpoint_type = 1;
@@ -26621,7 +26625,7 @@ Current version indicated by LITEVER below.
2662126625
let dropdown = get_custom_ep_model_dropdown(localsettings.prev_custom_endpoint_type);
2662226626
if(dropdown && dropdown.value)
2662326627
{
26624-
localsettings.prev_custom_endpoint_model = dropdown.value;
26628+
localsettings.prev_custom_endpoint_model_dict[localsettings.prev_custom_endpoint_type] = dropdown.value;
2662526629
localsettings.prev_custom_endpoint_ischatcmpl = document.getElementById("useoaichatcompl").checked?true:false;
2662626630
}
2662726631
}
@@ -27953,7 +27957,7 @@ Current version indicated by LITEVER below.
2795327957
var childInput = element.querySelector('.colorpickerchild');
2795427958
if (childInput && newColor.includes("rgb")) {
2795527959
childInput.value = rgb_to_hex(newColor);
27956-
} else {
27960+
} else if(childInput) {
2795727961
childInput.value = newColor;
2795827962
}
2795927963
}

expose.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ struct load_model_inputs
7474
const int quant_k = 0;
7575
const int quant_v = 0;
7676
const bool check_slowness = false;
77+
const char * jinja_template = nullptr;
7778
const bool highpriority = false;
7879
const bool swa_support = false;
7980
const int swa_padding = 0;

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3126,7 +3126,7 @@ static bool ggml_cuda_graph_update_required(ggml_backend_cuda_context * cuda_ctx
31263126

31273127
if (cgraph->uid != 0 &&
31283128
cgraph->uid == graph->uid) {
3129-
GGML_LOG_DEBUG("CUDA Graph id %zu reused\n", cgraph->uid);
3129+
// GGML_LOG_DEBUG("CUDA Graph id %zu reused\n", cgraph->uid);
31303130
GGML_ASSERT((int)graph->node_props.size() == cgraph->n_nodes);
31313131
return false;
31323132
}

gpttype_adapter.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ static bool check_slowness = false; //will display a suggestion to use highprior
156156
static bool showed_rnn_warning = false;
157157
static bool highpriority = false;
158158
static int rnn_reusable_slot_idx = -1;
159+
static std::string overridden_jinja_template = ""; //if set, overrides jinja template
159160

160161
static int delayed_generated_tokens_limit = 0;
161162
std::deque<std::string> delayed_generated_tokens; //for use with antislop sampling
@@ -2225,6 +2226,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
22252226
audio_multimodal_supported = false;
22262227
vision_multimodal_supported = false;
22272228
use_mrope = false;
2229+
overridden_jinja_template = inputs.jinja_template;
22282230

22292231
auto clamped_max_context_length = inputs.max_context_length;
22302232

@@ -3272,6 +3274,10 @@ std::string gpttype_get_chat_template()
32723274
printf("\nWarning: KCPP text generation not initialized!\n");
32733275
return "";
32743276
}
3277+
if(overridden_jinja_template!="")
3278+
{
3279+
return overridden_jinja_template;
3280+
}
32753281
if(file_format!=FileFormat::GGUF_GENERIC || !llama_ctx_v4)
32763282
{
32773283
return "";

koboldcpp.py

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@
157157
embedded_lcpp_ui_gz = None
158158
embedded_musicui = None
159159
embedded_musicui_gz = None
160+
preloaded_custom_jinja = ""
160161
voicebank = {}
161162
voicelist = ["kobo","cheery","sleepy","shouty","chatty"]
162163
sslvalid = False
@@ -259,6 +260,7 @@ class load_model_inputs(ctypes.Structure):
259260
("quant_k", ctypes.c_int),
260261
("quant_v", ctypes.c_int),
261262
("check_slowness", ctypes.c_bool),
263+
("jinja_template", ctypes.c_char_p),
262264
("highpriority", ctypes.c_bool),
263265
("swa_support", ctypes.c_bool),
264266
("swa_padding", ctypes.c_int),
@@ -1955,6 +1957,7 @@ def load_model(model_filename):
19551957
inputs.override_tensors = args.overridetensors.encode("UTF-8") if args.overridetensors else "".encode("UTF-8")
19561958
inputs.moecpu = (200 if args.moecpu > 200 else args.moecpu)
19571959
inputs.check_slowness = (not args.highpriority and os.name == 'nt' and 'Intel' in platform.processor())
1960+
inputs.jinja_template = preloaded_custom_jinja.encode("UTF-8")
19581961
inputs.highpriority = args.highpriority
19591962
inputs.swa_support = args.useswa
19601963
inputs.swa_padding = args.swapadding if args.useswa else 0
@@ -7287,6 +7290,7 @@ def hide_tooltip(event):
72877290
customrope_base = ctk.StringVar(value="10000")
72887291
customrope_nativectx = ctk.StringVar(value=str(default_native_ctx))
72897292
chatcompletionsadapter_var = ctk.StringVar(value="AutoGuess")
7293+
jinjatemplate_var = ctk.StringVar()
72907294
jinja_var = ctk.IntVar(value=0)
72917295
jinja_tools_var = ctk.IntVar(value=0)
72927296
jinja_kwargs_var = ctk.StringVar()
@@ -8091,10 +8095,11 @@ def pickpremadetemplate():
80918095
if fnam:
80928096
chatcompletionsadapter_var.set(fnam)
80938097
ctk.CTkButton(model_tab, 64, text="Pick Premade", command=pickpremadetemplate).grid(row=24, column=0, padx=(350), pady=2, stick="nw")
8098+
makefileentry(model_tab, "Jinja Template:", "Select a custom Jinja chat template", jinjatemplate_var, 30, width=280, filetypes=[("Jinja Template", "*.jinja")], singlerow=True, tooltiptxt="Select a custom Jinja chat template, will overwrite model jinja chat template")
80948099

80958100
mmproj_var.trace_add("write", gui_changed_modelfile)
80968101
draftmodel_var.trace_add("write", gui_changed_modelfile)
8097-
makefileentry(model_tab, "Download Dir:", "Select directory to store all model downloads", download_dir_var, 27, width=280, singlerow=True, dialog_type=2, tooltiptxt="Specify a directory to store any downloaded models.")
8102+
makefileentry(model_tab, "Download Dir:", "Select directory to store all model downloads", download_dir_var, 35, width=280, singlerow=True, dialog_type=2, tooltiptxt="Specify a directory to store any downloaded models.")
80988103
makecheckbox(model_tab, "Allow Launch Without Models", nomodel, 40, tooltiptxt="Allows running the WebUI with no model loaded.")
80998104

81008105
# Network Tab
@@ -8431,6 +8436,8 @@ def export_vars():
84318436
args.jinja_tools = (jinja_tools_var.get()==1)
84328437
if jinja_kwargs_var.get() != "":
84338438
args.jinja_kwargs = jinja_kwargs_var.get()
8439+
if jinjatemplate_var.get() != "":
8440+
args.jinjatemplate = jinjatemplate_var.get()
84348441
args.enableguidance = (enableguidance_var.get()==1)
84358442
args.overridekv = None if override_kv_var.get() == "" else override_kv_var.get()
84368443
args.overridetensors = None if override_tensors_var.get() == "" else override_tensors_var.get()
@@ -8719,6 +8726,7 @@ def import_vars(mydict):
87198726
if isinstance(jinja_kwargs, type({})):
87208727
jinja_kwargs = json.dumps(jinja_kwargs)
87218728
jinja_kwargs_var.set(jinja_kwargs)
8729+
jinjatemplate_var.set(mydict["jinjatemplate"] if ("jinjatemplate" in mydict and mydict["jinjatemplate"]) else "")
87228730

87238731
enableguidance_var.set(mydict["enableguidance"] if ("enableguidance" in mydict) else 0)
87248732
if "overridekv" in mydict and mydict["overridekv"]:
@@ -10177,7 +10185,7 @@ def disableSwappedFieldsInConfig(args, swapReqType):
1017710185

1017810186
def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
1017910187
global embedded_kailite, embedded_kcpp_docs, embedded_kcpp_sdui, embedded_kailite_gz, embedded_kcpp_docs_gz, embedded_kcpp_sdui_gz, embedded_lcpp_ui_gz, embedded_musicui, embedded_musicui_gz, start_time, exitcounter, global_memory, using_gui_launcher
10180-
global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, password, fullwhispermodelpath, ttsmodelpath, embeddingsmodelpath, musicdiffusionmodelpath, musicllmmodelpath, friendlyembeddingsmodelname, has_audio_support, has_vision_support, cached_chat_template
10188+
global libname, args, friendlymodelname, friendlysdmodelname, fullsdmodelpath, password, fullwhispermodelpath, ttsmodelpath, embeddingsmodelpath, musicdiffusionmodelpath, musicllmmodelpath, friendlyembeddingsmodelname, has_audio_support, has_vision_support, cached_chat_template, preloaded_custom_jinja
1018110189

1018210190
start_server = True
1018310191

@@ -10408,6 +10416,23 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
1040810416
dlfile = download_model_from_url(args.musicvae,[".gguf"],min_file_size=500000)
1040910417
if dlfile:
1041010418
args.musicvae = dlfile
10419+
if args.mcpfile and args.mcpfile!="":
10420+
dlfile = download_model_from_url(args.mcpfile,[".json"],min_file_size=64)
10421+
if dlfile:
10422+
args.mcpfile = dlfile
10423+
if args.jinjatemplate and args.jinjatemplate!="":
10424+
dlfile = download_model_from_url(args.jinjatemplate,[".jinja"],min_file_size=64)
10425+
if dlfile:
10426+
args.jinjatemplate = dlfile
10427+
10428+
if args.jinjatemplate and os.path.exists(args.jinjatemplate):
10429+
try:
10430+
print(f"Using custom Jinja template: {args.jinjatemplate}")
10431+
with open(args.jinjatemplate, 'r', encoding='utf-8', errors='ignore') as f:
10432+
preloaded_custom_jinja = f.read()
10433+
except Exception as e:
10434+
print(f"Error loading jinja templat: {e}")
10435+
preloaded_custom_jinja = ""
1041110436

1041210437
# sanitize and replace the default vanity name. remember me....
1041310438
if args.model_param and args.model_param!="":
@@ -11224,7 +11249,8 @@ def range_checker(arg: str):
1122411249
advparser.add_argument("--chatcompletionsadapter", metavar=('[filename]'), help="Select an optional ChatCompletions Adapter JSON file to force custom instruct tags.", default="AutoGuess")
1122511250
advparser.add_argument("--jinja", help="Enables using jinja chat template formatting for chat completions endpoint. Other endpoints are unaffected. Tool calls are done without jinja.", action='store_true')
1122611251
advparser.add_argument("--jinja_tools","--jinja-tools","--jinjatools", help="Enables using jinja chat template formatting for chat completions endpoint. Other endpoints are unaffected. Tool calls are done with jinja.", action='store_true')
11227-
advparser.add_argument("--jinja_kwargs","--jinja-kwargs","--jinjakwargs","--chat-template-kwargs", metavar=('{"parameter":"value",...}'), help="Set additiona fields for Jinja JSON template parser, must be a valid JSON object.", default="")
11252+
advparser.add_argument("--jinja_kwargs","--jinja-kwargs","--jinjakwargs","--chat-template-kwargs", metavar=('{"parameter":"value",...}'), help="Set additional fields for Jinja JSON template parser, must be a valid JSON object.", default="")
11253+
advparser.add_argument("--jinjatemplate","--chat-template-file", metavar=('[filename]'), help="Select a custom Jinja chat template, will overwrite model jinja chat template", default="")
1122811254
advparser.add_argument("--noflashattention","--no-flash-attn","-nofa", help="Disables flash attention.", action='store_true')
1122911255
advparser.add_argument("--lowvram","-nkvo","--no-kv-offload", help="If supported by the backend, do not offload KV to GPU (lowvram mode). Not recommended, will be slow.", action='store_true')
1123011256
advparser.add_argument("--quantkv", help="Sets the KV cache data type quantization, options are f16/bf16/q8_0/q5_1/q4_0. Requires Flash Attention for full effect, otherwise only K cache is quantized.",metavar=('[quantization level f16/bf16/q8_0/q5_1/q4_0]'), type=str, choices=["f16","bf16","q8_0","q5_1","q4_0","0","1","2","3"], default="f16")

0 commit comments

Comments
 (0)