Skip to content

Commit 3c2d40a

Browse files
committed
fix compatible with llama.cpp webui
1 parent fffc988 commit 3c2d40a

File tree

1 file changed

+48
-10
lines changed

1 file changed

+48
-10
lines changed

bindings/server.nim

Lines changed: 48 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,9 @@ proc handle_completions(req: Request) {.async gcsafe.} =
410410
streamer.history[^1].messages.add (role: "assistant", content: @[(t: "text", content: streamer.acc)])
411411
streamer.history[^1].pos = streamer.get_cursor()
412412

413+
if streamer.tokens_start > streamer.history[^1].pos:
414+
streamer.tokens_start = streamer.history[^1].pos
415+
413416
let total_tokens = streamer.history[^1].pos - streamer.tokens_start
414417
let prompt_tokens = total_tokens div 2
415418

@@ -492,6 +495,35 @@ proc handle_index(req: Request) {.async gcsafe.} =
492495
headers.add ("Content-Encoding", "gzip")
493496
await req.respond(Http200, readFile(fn_ui), headers.newHttpHeaders())
494497

498+
proc handle_oai_models(req: Request) {.async gcsafe.} =
499+
type
500+
Meta = object
501+
n_params: int
502+
n_ctx_train: int
503+
504+
Info = object
505+
id: string
506+
`object`: string = "model"
507+
created: int
508+
owned_by: string = "You"
509+
meta: Meta
510+
511+
Infos = object
512+
`object`: string = "list"
513+
data: seq[Info]
514+
515+
var infos = Infos()
516+
let streamer = get_streamer(StreamerType.Chat)
517+
if streamer != nil:
518+
let info = parseJson(streamer.model_info)
519+
var m = Info(id: info["name"].getStr(), created: now().toTime().toUnix())
520+
m.meta.n_params = info["param_num"].getInt()
521+
m.meta.n_ctx_train = info["context_length"].getInt()
522+
infos.data.add(m)
523+
524+
let headers = {"Content-type": "application/json"}
525+
await req.respond(Http200, $(%* infos), headers.newHttpHeaders())
526+
495527
proc handle_ollama_tags(req: Request) {.async gcsafe.} =
496528
type
497529
Info = object
@@ -550,33 +582,38 @@ proc handle_ollama_show(req: Request) {.async gcsafe.} =
550582

551583
proc handle_llama_props(req: Request) {.async gcsafe.} =
552584
type
585+
Empty = object
553586
Modalities = object
554587
vision: bool = false
555-
Empty = object
588+
audio: bool = false
589+
GenerationSettings = object
590+
id: int = 0
591+
id_task: int = -1
592+
n_ctx: int = 0
593+
speculative: bool = false
594+
is_processing: bool
595+
params: Empty
556596
Props = object
557-
default_generation_settings: string = ""
597+
default_generation_settings: GenerationSettings
558598
total_slots: int = 1
559599
model_alias: string = ""
560-
model_path: string = ""
600+
model_path: string = "/some/where"
561601
modalities: Modalities
562-
endpoint_slots: int = 0
563-
endpoint_props: Empty
564-
endpoint_metrics: int = 0
565-
webui: int = 0
566-
chat_template: string = ""
567-
bos_token: seq[int] = @[]
568-
eos_token: seq[int] = @[]
569602
build_info: string = "Today"
570603

571604
var props = Props()
572605
var streamer = get_streamer(StreamerType.Chat)
573606
if streamer != nil:
574607
let info = parseJson(streamer.model_info)
575608
let capabilities = info.getOrDefault("capabilities")
609+
props.default_generation_settings.n_ctx = info["context_length"].getInt()
610+
props.default_generation_settings.is_processing = streamer.busy()
576611
props.model_alias = info.getOrDefault("name").getStr()
577612
for c in capabilities.getElems():
578613
if c.getStr() == "Image Input":
579614
props.modalities.vision = true
615+
elif c.getStr() == "Audio Input":
616+
props.modalities.audio = true
580617

581618
let headers = {"Content-type": "application/json"}
582619
await req.respond(Http200, $(%* props), headers.newHttpHeaders())
@@ -618,6 +655,7 @@ proc run {.async.} =
618655
# OAI-compatible
619656
router.post "/v1/chat/completions", handle_completions
620657
router.post "/v1/embeddings", handle_embeddings
658+
router.get "/v1/models", handle_oai_models
621659

622660
# llama-compatible
623661
router.get "/props", handle_llama_props

0 commit comments

Comments
 (0)