Added progress bar updates and streaming to the summarize tool

TikaaVo · TikaaVo · commit bb4b2d1bc3af · 2026-05-30T15:47:03.000+02:00
Signed-off-by: TikaaVo &lt;tikavod6@gmail.com&gt;
diff --git a/WRITEUP.md b/WRITEUP.md
@@ -0,0 +1,35 @@
+# Coding Challenge - Issue #241
+
+Total time spent: 4 hours
+
+## 1. Environment Setup
+
+Setting up the environment took significantly longer (approximately 2 hours) than expected due to several issues. These are centered around the fact that I used an outdated source to base myself off when writing the docker compose file and thus was running on an older NextCloud version.
+
+- `host.docker.internal` didn't work, as the Nextcloud container couldn't reach llm2 running on the host, giving the following error: `nc_py_api._exceptions.NextcloudException: [400] Bad Request <request: PUT /ocs/v1.php/apps/app_api/ex-app/status>`, which was difficult to guage the issue from. I eventually managed to fix it by switching to the Docker bridge IP `172.17.0.1` and added it to Nextcloud's `trusted_domains`.
+- llm2's task processing provider registration endpoint requires Nextcloud 30+, but my usage of `nextcloud:29` returned `ERROR - Failed to register llama-2-7b-chat.Q4_K_M - core:text2text:summary, Error: [501] <request: POST /ocs/v1.php/apps/app_api/api/v1/ai_provider/task_processing>`. Upgrading fixed this.
+- The version of nc_py_api installed by Poetry (`0.24.2`) was out of sync with AppAPI 3.2.3, so I upgraded to `0.30.1`.
+
+After fixing this, llm2 successfully initialized, and upon running a summarization task, I saw the issue to be fixed, being that the progress bar was fixed at 0.00% until the task was completed.
+
+## 2. Investigating the issue
+
+Firstly, I wanted to locate the code that set the progress, so I looked through the source code of `nc_py_api` until I found `set_progress` in the `_TaskProcessingProviderAPI` class, which accepted the task_id and the progress as a float value from 0.00 to 100.00. 
+
+## 3. Pass needed information
+
+I decided to start with the summarization task under `summary.py`. Firstly, in order to call set_progress, we need `SummarizeProcessor` to have access to `nc` and `task_id`, so they were passed as parameters into the constructor function, then `task_processors.py` and `main.py` were modified to support this.
+
+## 4. Investigate how to estimate the response length
+
+Initially, I was thinking about whether the context window `n_ctx` could be used to estimate the response length. However, then I found that inside `task_processors.py`, the model's `max_tokens` can be extracted from the model config, so I extracted that and passed it to `SummarizeProcessor` as another parameter, as this can be used as a more accurate estimation of response length.
+
+## 5. Updating the Progress Bar
+
+The `__call__` method in  `SummarizeProcessor` used `invoke`, which doesn't provide progress. Therefore, I wrote a helped function `_invoke_progress`, which streams the generation and calls `set_progress`, using the max_tokens as the upper bound. For multiple splits, I assumed that each split is roughly equal, so if there are N splits, then split M (1 <= M <= N) would take the progress bar from `(100/N) * (M-1)` to `(100/N) * M`, so the `_invoke_progress` function accepts the current split index and the total number of splits.
+
+## 6. Testing and Limitations
+
+Upon testing, I noticed that the GUI was showing the progress as 0.00% to 1.00%. Therefore, if my code passed 25 to `set_progress`, the GUI would show 0.25%. This seems to be an issue on the side of either the `set_progress` function or the GUI, as there is some kind of division by 100 happening.
+
+One limitation of using the max_tokens as an upper bound is that most responses don't hit this limit or even come close, so it's frequent to see the progress bar jump from a few percentage points to completed or to the boundary of the next split. This upper bound is safe, as responses cannot exceed it, but conservative.
diff --git a/lib/main.py b/lib/main.py
@@ -132,7 +132,7 @@ def background_thread_task():
                     task["id"], error_message="Requested model is not available"
                 )
                 continue
-            task_processor = task_processor_loader()
+            task_processor = task_processor_loader(nc, task["id"])
             log(nc, LogLvl.INFO, "Generating reply")
             time_start = perf_counter()
             log(nc, LogLvl.INFO, task.get("input"))
diff --git a/lib/summarize.py b/lib/summarize.py
@@ -7,6 +7,7 @@
 from langchain.schema.prompt_template import BasePromptTemplate
 from langchain_core.runnables import Runnable
 from langchain.text_splitter import RecursiveCharacterTextSplitter
+from nc_py_api import NextcloudApp
 
 
 class SummarizeProcessor:
@@ -35,8 +36,12 @@ class SummarizeProcessor:
 """
     )
 
-    def __init__(self, runnable: Runnable, n_ctx: int = 8000):
+    def __init__(self, runnable: Runnable, nc: NextcloudApp, task_id: int, n_ctx: int = 8000, max_tokens: int = 512):
         self.runnable = runnable
+        self.nc = nc
+        self.task_id = task_id
+        self.n_ctx = n_ctx
+        self.max_tokens = max_tokens if max_tokens > 0 else 512
         self.text_splitter = RecursiveCharacterTextSplitter(
             separators=['\n\n|\\.|\\?|\\!'],
             is_separator_regex=True,
@@ -46,6 +51,29 @@ def __init__(self, runnable: Runnable, n_ctx: int = 8000):
             length_function=len,
         )
 
+    def _invoke_progress(self, messages, max_tokens: int, idx: int, total_splits: int) -> str:
+        # Stream the response and update progress
+
+        start_pct = (idx / total_splits) * 100.0
+        end_pct = ((idx + 1) / total_splits) * 100.0
+
+        tokens_generated = 0
+        full_response = ""
+        total_range = end_pct - start_pct
+
+        for chunk in self.runnable.stream(messages):
+            token = chunk.content if hasattr(chunk, 'content') else str(chunk)
+            full_response += token
+            tokens_generated += 1
+
+            fraction = min(1.0, tokens_generated / max_tokens)
+            progress = start_pct + fraction * total_range
+            self.nc.providers.task_processing.set_progress(self.task_id, progress)
+
+        # Ensure the end percentage is set after completion
+        self.nc.providers.task_processing.set_progress(self.task_id, end_pct)
+        return full_response
+
     def __call__(self, inputs: dict[str, Any]) -> dict[str, Any]:
         # Split text if needed
         splits = self.text_splitter.split_text(inputs['input'])
@@ -55,23 +83,29 @@ def __call__(self, inputs: dict[str, Any]) -> dict[str, Any]:
                 SystemMessage(content=self.system_prompt),
                 HumanMessage(content=self.user_prompt.format(input=splits[0]))
             ]
-            output = self.runnable.invoke(messages)
-            return {'output': output.content}
+
+            output = self._invoke_progress(messages, self.max_tokens, 0, 1)
+            return {'output': output}
 
         # Process each split
+        total_splits = len(splits)
         summaries = []
-        for split in splits:
+
+        for idx, split in enumerate(splits):
+            
+
             messages = [
                 SystemMessage(content=self.system_prompt),
                 HumanMessage(content=self.user_prompt.format(input=split))
             ]
-            output = self.runnable.invoke(messages)
-            summaries.append(output.content)
 
-        # Merge summaries
-        messages = [
+            split_output = self._invoke_progress(messages, self.max_tokens, idx, total_splits)
+            summaries.append(split_output)
+
+        merge_messages = [
             SystemMessage(content=self.system_prompt),
             HumanMessage(content=self.merge_prompt.format(input="\n\n".join(summaries)))
         ]
-        final_output = self.runnable.invoke(messages)
+        final_output = self.runnable.invoke(merge_messages)
+        self.nc.providers.task_processing.set_progress(self.task_id, 100.0)
         return {'output': final_output.content}
diff --git a/lib/task_processors.py b/lib/task_processors.py
@@ -128,9 +128,12 @@ def generate_task_processors(task_processors = {}):
 
 def generate_task_processors_for_model(file_name, task_processors):
     model_name = file_name.split('.gguf')[0]
-    n_ctx = get_model_config(file_name)["loader_config"]["n_ctx"]
+    model_config = get_model_config(file_name)
+    n_ctx = model_config["loader_config"]["n_ctx"]
+    max_tokens = model_config["loader_config"].get("max_tokens")
+
 
-    task_processors[model_name + ":core:text2text:summary"] = lambda: SummarizeProcessor(generate_chat_chain(file_name), n_ctx)
+    task_processors[model_name + ":core:text2text:summary"] = lambda nc, task_id: SummarizeProcessor(generate_chat_chain(file_name), nc, task_id, n_ctx, max_tokens)
     task_processors[model_name + ":core:text2text:headline"] = lambda: HeadlineProcessor(generate_chat_chain(file_name))
     task_processors[model_name + ":core:text2text:topics"] = lambda: TopicsProcessor(generate_chat_chain(file_name))
     task_processors[model_name + ":core:text2text:simplification"] = lambda: SimplifyProcessor(generate_chat_chain(file_name))

Original file line number	Diff line number	Diff line change
`@@ -132,7 +132,7 @@ def background_thread_task():`
`132`	`132`	`task["id"], error_message="Requested model is not available"`
`133`	`133`	`)`
`134`	`134`	`continue`
`135`		`- task_processor = task_processor_loader()`
	`135`	`+ task_processor = task_processor_loader(nc, task["id"])`
`136`	`136`	`log(nc, LogLvl.INFO, "Generating reply")`
`137`	`137`	`time_start = perf_counter()`
`138`	`138`	`log(nc, LogLvl.INFO, task.get("input"))`