foldl
diff --git a/‎bindings/chatllm.py‎
Lines changed: 4 additions & 2 deletions b/‎bindings/chatllm.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎bindings/libchatllm.h‎
Lines changed: 33 additions & 0 deletions b/‎bindings/libchatllm.h‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎bindings/libchatllm.nim‎
Lines changed: 230 additions & 7 deletions b/‎bindings/libchatllm.nim‎
Lines changed: 230 additions & 7 deletions
diff --git a/‎bindings/nim.cfg‎
Lines changed: 2 additions & 0 deletions b/‎bindings/nim.cfg‎
Lines changed: 2 additions & 0 deletions
@@ -191,6 +191,10 @@ def __init__(self, lib: str = '', model_storage: str = '', init_params: list[str
     def callback_print(user_data: int, print_type: c_int, s: bytes) -> None:
         obj = LibChatLLM._id2obj[user_data]
 
+        if print_type == PrintType.PRINT_EVT_ASYNC_COMPLETED.value:
+            obj.callback_async_done()
+            return
+
         txt = s.decode()
         if print_type == PrintType.PRINT_CHAT_CHUNK.value:
             obj.callback_print(txt)
@@ -222,8 +226,6 @@ def callback_print(user_data: int, print_type: c_int, s: bytes) -> None:
             obj.callback_print_beam_search(txt)
         elif print_type == PrintType.PRINT_EVT_ASYNC_COMPLETED.value:
             obj.callback_async_done()
-        elif print_type == PrintType.PRINT_EVT_THOUGHT_COMPLETED.value:
-            obj.callback_thought_done()
         elif print_type == PrintType.PRINTLN_MODEL_INFO.value:
             obj._model_info = json.loads(txt)
         else:
 
@@ -196,6 +196,39 @@ enum RoleType
  */
 DLL_DECL void API_CALL chatllm_history_append(struct chatllm_obj *obj, int role_type, const char *utf8_str);
 
+/**
+ * @brief push back current multimedia message to the end of chat history.
+ *
+ * see `chatllm_history_append`
+ *
+ * @param[in] obj               model object
+ * @param[in] role_type         message type (see `RoleType`)
+ * @return                      >= 0 if success else < 0
+ */
+DLL_DECL int chatllm_history_append_multimedia_msg(struct chatllm_obj *obj, int role_type);
+
+/**
+ * @brief get current position of "cursor": total number of processed/generated tokens
+ *
+ * Possible use case: token usage statistics.
+ *
+ * @param[in] obj               model object
+ * @return                      position of cursor
+ */
+DLL_DECL int API_CALL chatllm_get_cursor(struct chatllm_obj *obj);
+
+/**
+ * @brief set current position of "cursor"
+ *
+ * Possible use case: rewind and re-generate.
+ *
+ * Note: once used, the history in save session is not reliable any more.
+ *
+ * @param[in] obj               model object
+ * @return                      position of cursor
+ */
+DLL_DECL int API_CALL chatllm_set_cursor(struct chatllm_obj *obj, int pos);
+
 /**
  * @brief user input
  *
 
@@ -1,3 +1,5 @@
+import tables
+
 type
     PrintType* = enum
         PRINT_CHAT_CHUNK = 0,           ##  below items share the same value with BaseStreamer::TextType
@@ -25,7 +27,7 @@ type
         PRINT_EVT_THOUGHT_COMPLETED     = 101,  ## thought completed
 
 type
-    chatllm_obj = object
+    chatllm_obj* = object
     f_chatllm_print* = proc (user_data: pointer; print_type: cint; utf8_str: cstring) {.cdecl.}
     f_chatllm_end* = proc (user_data: pointer) {.cdecl.}
 
@@ -96,7 +98,7 @@ proc chatllm_restart*(obj: ptr chatllm_obj; utf8_sys_prompt: cstring) {.stdcall,
 ## @param[in] obj               model object
 ## @return                      0 if succeeded
 ##
-proc chatllm_multimedia_msg_prepare(obj: ptr chatllm_obj) {.stdcall, dynlib: libName, importc.}
+proc chatllm_multimedia_msg_prepare*(obj: ptr chatllm_obj) {.stdcall, dynlib: libName, importc.}
 
 ##
 ## @brief add a piece to a multimedia message
@@ -108,7 +110,7 @@ proc chatllm_multimedia_msg_prepare(obj: ptr chatllm_obj) {.stdcall, dynlib: lib
 ## @param[in] utf8_str          content, i.e. utf8 text content, or base64 encoded data of multimedia data.
 ## @return                      0 if succeeded
 ##
-proc chatllm_multimedia_msg_append(obj: ptr chatllm_obj; content_type: cstring; utf8_str: cstring): cint {.stdcall, dynlib: libName, importc.}
+proc chatllm_multimedia_msg_append*(obj: ptr chatllm_obj; content_type: cstring; utf8_str: cstring): cint {.stdcall, dynlib: libName, importc.}
 
 type
     RoleType* = enum
@@ -126,7 +128,40 @@ type
 ## @param[in] role_type         message type (see `RoleType`)
 ## @param[in] utf8_str          content
 ##
-proc chatllm_history_append*(obj: ptr chatllm_obj; role_type: int; utf8_str: cstring) {.stdcall, dynlib: libName, importc.}
+proc chatllm_history_append*(obj: ptr chatllm_obj; role_type: cint; utf8_str: cstring) {.stdcall, dynlib: libName, importc.}
+
+##
+## @brief push back current multimedia message to the end of chat history.
+##
+## see `chatllm_history_append`
+##
+## @param[in] obj               model object
+## @param[in] role_type         message type (see `RoleType`)
+## @return                      >= 0 if success else < 0
+##
+proc chatllm_history_append_multimedia_msg*(obj: ptr chatllm_obj; role_type: cint): cint {.stdcall, dynlib: libName, importc.}
+
+##
+## @brief brief get current position of "cursor": total number of processed/generated tokens
+##
+## Possible use case: token usage statistics.
+##
+## @param[in] obj               model object
+## @return                      position of cursor
+##
+proc chatllm_get_cursor*(obj: ptr chatllm_obj): cint {.stdcall, dynlib: libName, importc.}
+
+##
+## @brief set current position of "cursor"
+##
+## Possible use case: rewind and re-generate.
+##
+## Note: once used, the history in save session is not reliable any more.
+##
+## @param[in] obj               model object
+## @return                      position of cursor
+##
+proc chatllm_set_cursor(obj: ptr chatllm_obj, pos: cint): int {.stdcall, dynlib: libName, importc.}
 
 ##
 ##  @brief user input
@@ -147,7 +182,7 @@ proc chatllm_user_input*(obj: ptr chatllm_obj; utf8_str: cstring): cint {.stdcal
 ## @param[in] obj               model object
 ## @return                      0 if succeeded
 ##
-proc chatllm_user_input_multimedia_msg(obj: ptr chatllm_obj): cint {.stdcall, dynlib: libName, importc.}
+proc chatllm_user_input_multimedia_msg*(obj: ptr chatllm_obj): cint {.stdcall, dynlib: libName, importc.}
 
 ##
 ##  @brief set prefix for AI generation
@@ -318,7 +353,7 @@ proc chatllm_async_user_input*(obj: ptr chatllm_obj; utf8_str: cstring): cint {.
 ## @param   ...
 ## @return                      0 if started else -1
 ##
-proc chatllm_async_user_input_multimedia_msg(obj: ptr chatllm_obj): cint {.stdcall, dynlib: libName, importc.}
+proc chatllm_async_user_input_multimedia_msg*(obj: ptr chatllm_obj): cint {.stdcall, dynlib: libName, importc.}
 
 ##
 ##  @brief async version of `chatllm_tool_input`
@@ -351,4 +386,192 @@ proc chatllm_async_text_embedding*(obj: ptr chatllm_obj; utf8_str: cstring; purp
 ##  @return                      0 if started else -1
 ##
 proc chatllm_async_qa_rank*(obj: ptr chatllm_obj; utf8_str_q: cstring;
-                            utf8_str_a: cstring): cint {.stdcall, dynlib: libName, importc.}
+                            utf8_str_a: cstring): cint {.stdcall, dynlib: libName, importc.}
+
+## Streamer in OOP style
+type
+    StreamerMessageType = enum
+        Done = 0,
+        Chunk = 1,
+        ThoughtChunk = 2,
+        Meta = 3,
+
+    StreamerMessage = tuple[t: StreamerMessageType, chunk: string]
+
+    ChunkType* = enum
+        Chat = 0
+        Thought = 1
+
+    Streamer* = ref object of RootObj
+        llm*: ptr chatllm_obj
+        auto_restart: bool
+        system_prompt*: string
+        system_prompt_updating: bool
+        acc*: string
+        thought_acc*: string
+        is_generating: bool
+        input_id: int
+        tool_input_id: int
+        references: seq[string]
+        rewritten_query: string
+        result_embedding*: string
+        result_ranking*: string
+        result_token_ids*: string
+        result_beam_search: seq[string]
+        model_info*: string
+        chan_output: Channel[StreamerMessage]
+
+var streamer_dict = initTable[int, Streamer]()
+
+proc get_streamer(id: pointer): Streamer =
+    return streamer_dict[cast[int](id)]
+
+method on_call_tool(streamer: Streamer, query: string) {.base.} =
+    raise newException(IOError, "call_tool not implemented (overrided)!")
+
+method on_logging(streamer: Streamer, text: string) {.base.} =
+    discard
+
+method on_error(streamer: Streamer, text: string) {.base.} =
+    raise newException(IOError, "Error: " & text)
+
+method on_thought_completed(streamer: Streamer) {.base.} =
+    discard
+
+method on_async_completed(streamer: Streamer) {.base.} =
+    streamer.chan_output.send((t: StreamerMessageType.Done, chunk: ""))
+
+proc streamer_on_print(user_data: pointer, print_type: cint, utf8_str: cstring) {.cdecl.} =
+    var streamer = get_streamer(user_data)
+    case cast[PrintType](print_type):
+        of PrintType.PRINT_CHAT_CHUNK:
+            streamer.chan_output.send((t: StreamerMessageType.Chunk, chunk: $utf8_str))
+        of PrintType.PRINTLN_META:
+            streamer.chan_output.send((t: StreamerMessageType.Meta, chunk: $utf8_str))
+        of PrintType.PRINTLN_ERROR:
+            on_error(streamer, $utf8_str)
+        of PrintType.PRINTLN_REF:
+            streamer.references.add $utf8_str
+        of PrintType.PRINTLN_REWRITTEN_QUERY:
+            streamer.rewritten_query = $utf8_str
+        of PrintType.PRINTLN_HISTORY_USER:
+            discard
+        of PrintType.PRINTLN_HISTORY_AI:
+            discard
+        of PrintType.PRINTLN_TOOL_CALLING:
+            on_call_tool(streamer, $utf8_str)
+        of PrintType.PRINTLN_EMBEDDING:
+            streamer.result_embedding = $utf8_str
+        of PrintType.PRINTLN_RANKING:
+            streamer.result_ranking = $utf8_str
+        of PrintType.PRINTLN_TOKEN_IDS:
+            streamer.result_token_ids = $utf8_str
+        of PrintType.PRINTLN_LOGGING:
+            on_logging(streamer, $utf8_str)
+        of PrintType.PRINTLN_BEAM_SEARCH:
+            streamer.result_beam_search.add $utf8_str
+        of PrintType.RINTLN_MODEL_INFO:
+            streamer.model_info = $utf8_str
+        of PrintType.PRINT_THOUGHT_CHUNK:
+            streamer.chan_output.send((t: StreamerMessageType.ThoughtChunk, chunk: $utf8_str))
+        of PrintType.PRINT_EVT_ASYNC_COMPLETED:
+            on_async_completed(streamer)
+        of PrintType.PRINT_EVT_THOUGHT_COMPLETED:
+            on_thought_completed(streamer)
+
+proc streamer_on_end(user_data: pointer) {.cdecl.} =
+    var streamer = get_streamer(user_data)
+    streamer.is_generating = false
+
+proc initStreamer*(streamer: Streamer; args: openArray[string], auto_restart: bool = false): bool =
+    let id = streamer_dict.len + 1
+    streamer_dict[id] = streamer
+    streamer.llm = chatllm_create()
+    streamer.chan_output.open()
+    streamer.system_prompt = ""
+    streamer.system_prompt_updating = false
+    streamer.auto_restart = auto_restart
+    streamer.is_generating = false
+    streamer.input_id = 0
+    streamer.tool_input_id = 0
+    streamer.references = @[]
+    streamer.result_embedding = ""
+    streamer.result_ranking = ""
+    streamer.result_token_ids = ""
+    streamer.model_info = ""
+    for s in args:
+        chatllm_append_param(streamer.llm, s.cstring)
+
+    let r = chatllm_start(streamer.llm, streamer_on_print, streamer_on_end, cast[pointer](id))
+    result = r == 0
+
+proc newStreamer*(args: openArray[string], auto_restart: bool = false): Streamer =
+    var streamer: Streamer
+    new(streamer)
+    let r = initStreamer(streamer, args, auto_restart)
+    result = if r: streamer else: nil
+
+proc set_system_prompt*(streamer: Streamer, prompt: string) =
+    if streamer.system_prompt == prompt: return
+    streamer.system_prompt = prompt
+    streamer.system_prompt_updating = true
+
+proc abort*(streamer: Streamer) =
+    if streamer.is_generating:
+        chatllm_abort_generation(streamer.llm)
+
+method restart*(streamer: Streamer) {.base gcsafe.} =
+    if not streamer.is_generating:
+        chatllm_restart(streamer.llm, if streamer.system_prompt_updating: streamer.system_prompt.cstring else: nil)
+
+proc clear(chan: var Channel[StreamerMessage]) =
+    while chan.tryRecv().dataAvailable:
+        discard
+
+proc start_chat*(streamer: Streamer, user_input: string): bool =
+    if streamer.is_generating:
+        return false
+    inc streamer.input_id
+    if streamer.auto_restart or streamer.system_prompt_updating:
+        streamer.restart()
+    else:
+        discard
+    streamer.acc = ""
+    streamer.thought_acc = ""
+    streamer.references = @[]
+    streamer.result_embedding = ""
+    streamer.result_ranking = ""
+    streamer.result_token_ids = ""
+    streamer.result_beam_search = @[]
+    streamer.chan_output.clear()
+    result = chatllm_async_user_input(streamer.llm, user_input.cstring) == 0
+    if result:
+        streamer.is_generating = true
+
+iterator chunks*(streamer: Streamer): tuple[t: ChunkType; chunk: string] =
+    while true:
+        let msg = streamer.chan_output.recv()
+        case msg.t:
+            of StreamerMessageType.Chunk:
+                streamer.acc &= msg.chunk
+                yield (t: ChunkType.Chat, chunk: msg.chunk)
+            of StreamerMessageType.ThoughtChunk:
+                streamer.thought_acc &= msg.chunk
+                yield (t: ChunkType.Thought, chunk: msg.chunk)
+            of StreamerMessageType.Done:
+                break
+            of StreamerMessageType.Meta:
+                discard
+
+proc set_max_gen_tokens*(streamer: Streamer, max_new_tokens: int) =
+    chatllm_set_gen_max_tokens(streamer.llm, cint(max_new_tokens))
+
+proc id*(streamer: Streamer): int = streamer.input_id
+
+proc busy*(streamer: Streamer): bool = streamer.is_generating
+
+proc get_cursor*(streamer: Streamer): int =
+    result = chatllm_get_cursor(streamer.llm)
+
+proc set_cursor*(streamer: Streamer, pos: int): int =
+    result = chatllm_set_cursor(streamer.llm, cint(pos))
@@ -0,0 +1,2 @@
+-d:Release
+-d:ssl