Skip to content

Commit 8b1272b

Browse files
committed
update LLM_service to latest llama.cpp
1 parent e9bc447 commit 8b1272b

2 files changed

Lines changed: 34 additions & 34 deletions

File tree

include/LLM_service.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ class UNDREAMAI_API LLMService : public LLMProvider
236236
/// @brief Auto-detect appropriate chat template
237237
/// @return Detected chat template string
238238
/// @details Analyzes the model to determine the best chat template format
239-
const std::string detect_chat_template();
239+
// const std::string detect_chat_template();
240240

241241
/// @brief Escape reasoning by adding think tokens
242242
/// @param server_http_req request with original prompt

src/LLM_service.cpp

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ void LLMService::init(int argc, char **argv)
220220

221221
// for consistency between server router mode and single-model mode, we set the same model name as alias
222222
if (params->model_alias.empty() && !params->model.name.empty()) {
223-
params->model_alias = params->model.name;
223+
params->model_alias.insert(params->model.name);
224224
}
225225

226226
common_init();
@@ -244,8 +244,8 @@ void LLMService::init(int argc, char **argv)
244244
routes = new server_routes(*params, *ctx_server);
245245
routes->update_meta(*ctx_server);
246246

247-
params->chat_template = detect_chat_template();
248-
LOG_INF("chat_template: %s\n", params->chat_template.c_str());
247+
// params->chat_template = detect_chat_template();
248+
// LOG_INF("chat_template: %s\n", params->chat_template.c_str());
249249

250250
ctx_server->impl->queue_tasks.on_new_task([this](server_task && task)
251251
{ this->ctx_server->impl->process_single_task(std::move(task)); });
@@ -261,31 +261,31 @@ void LLMService::init(int argc, char **argv)
261261

262262
void LLMService::enable_reasoning(bool reasoning) {
263263
LLMProvider::enable_reasoning(reasoning);
264-
if (ctx_server != nullptr) ctx_server->impl->oai_parser_opt.enable_thinking = reasoning_enabled;
265-
}
266-
267-
const std::string LLMService::detect_chat_template()
268-
{
269-
const char *chat_template_jinja = common_chat_templates_source(ctx_server->impl->chat_templates.get());
270-
int chat_template_value = llm_chat_detect_template(chat_template_jinja);
271-
std::vector<const char *> supported_tmpl;
272-
int res = llama_chat_builtin_templates(nullptr, 0);
273-
if (res > 0)
274-
{
275-
supported_tmpl.resize(res);
276-
llama_chat_builtin_templates(supported_tmpl.data(), supported_tmpl.size());
277-
for (const auto &key : supported_tmpl)
278-
{
279-
llm_chat_template val = llm_chat_template_from_str(key);
280-
if ((int)val == chat_template_value)
281-
{
282-
return key;
283-
break;
284-
}
285-
}
286-
}
287-
return "";
288-
}
264+
if (ctx_server != nullptr) ctx_server->impl->chat_params.enable_thinking = reasoning_enabled;
265+
}
266+
267+
// const std::string LLMService::detect_chat_template()
268+
// {
269+
// const char *chat_template_jinja = common_chat_templates_source(ctx_server->impl->chat_templates.get());
270+
// int chat_template_value = llm_chat_detect_template(chat_template_jinja);
271+
// std::vector<const char *> supported_tmpl;
272+
// int res = llama_chat_builtin_templates(nullptr, 0);
273+
// if (res > 0)
274+
// {
275+
// supported_tmpl.resize(res);
276+
// llama_chat_builtin_templates(supported_tmpl.data(), supported_tmpl.size());
277+
// for (const auto &key : supported_tmpl)
278+
// {
279+
// llm_chat_template val = llm_chat_template_from_str(key);
280+
// if ((int)val == chat_template_value)
281+
// {
282+
// return key;
283+
// break;
284+
// }
285+
// }
286+
// }
287+
// return "";
288+
// }
289289

290290
void LLMService::debug(int debug_level)
291291
{
@@ -532,7 +532,7 @@ std::string LLMService::encapsulate_route(const json &body, server_http_context:
532532

533533
try
534534
{
535-
server_http_req req{ {}, {}, "", body.dump(), always_false };
535+
server_http_req req{ {}, {}, "", "", body.dump(), always_false };
536536
return route_handler(req)->data;
537537
}
538538
catch (...)
@@ -550,7 +550,7 @@ std::string LLMService::apply_template_json(const json &body)
550550
json copy = body;
551551
json data = oaicompat_chat_params_parse(
552552
copy,
553-
ctx_server->impl->oai_parser_opt,
553+
ctx_server->impl->chat_params,
554554
files);
555555
return safe_json_to_str({{"prompt", std::move(data.at("prompt"))}});
556556
}
@@ -591,7 +591,7 @@ std::string LLMService::completion_json(const json &data_in, CharArrayFn callbac
591591
json data = data_in;
592592
data["stream"] = stream;
593593

594-
server_http_req req{ {}, {}, "", data.dump(), always_false };
594+
server_http_req req{ {}, {}, "", "", data.dump(), always_false };
595595
auto result = routes->post_completions(req);
596596
if (result->status != 200)
597597
{
@@ -652,7 +652,7 @@ std::string LLMService::slot_json(const json &data)
652652

653653
server_task task(task_type);
654654
task.id = ctx_server->impl->queue_tasks.get_new_id();
655-
task.slot_action.slot_id = id_slot;
655+
task.slot_action.id_slot = id_slot;
656656

657657
if (action == "save" || action == "restore")
658658
{
@@ -712,7 +712,7 @@ std::unique_ptr<server_http_res> LLMService::get_props(){
712712
if (get_status_code() < 0 || setjmp(get_jump_point()) != 0)
713713
return nullptr;
714714

715-
server_http_req req{ {}, {}, "", "", always_false };
715+
server_http_req req{ {}, {}, "", "", "", always_false };
716716
auto result = routes->get_props(req);
717717

718718
json data = json::parse(result->data);

0 commit comments

Comments
 (0)