TAO71-AI
diff --git a/‎CHANGELOG.md‎
Lines changed: 25 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎Client/I40Client/__main__.py‎
Lines changed: 6 additions & 2 deletions b/‎Client/I40Client/__main__.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎Client/I40Client/server_connection.py‎
Lines changed: 1 addition & 1 deletion b/‎Client/I40Client/server_connection.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Client/pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎Client/pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Documentation/ServerDocs/01 Installation and hardware requirements.md‎
Lines changed: 1 addition & 0 deletions b/‎Documentation/ServerDocs/01 Installation and hardware requirements.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Server/Services/chatbot/default_service_configuration.yaml‎
Lines changed: 17 additions & 15 deletions b/‎Server/Services/chatbot/default_service_configuration.yaml‎
Lines changed: 17 additions & 15 deletions
diff --git a/‎Server/Services/chatbot/llama_utils.py‎
Lines changed: 3 additions & 130 deletions b/‎Server/Services/chatbot/llama_utils.py‎
Lines changed: 3 additions & 130 deletions
@@ -6,6 +6,31 @@ Keep in mind that these are only the more relevant changes.
 
 ---
 
+## 5-2-2026 (commit `v17.1.0`)
+
+### Server changes
+
+- Fixed bugs.
+- Added `BASE_FLASH_ATTN_MAX_JOBS` environment variable to the requirements installation.
+- (chatbot module) Removed unnecesary configuration.
+- (chatbot module) Replaced chatbot `test_inference_files` and `test_inference_text` parameters with `test_inference_conversation` and `test_inference_configuration`.
+- (chatbot module) Removed reasoning, since it will be provided by the client.
+- (chatbot module) Added a `stop_tokens` parameter. This will stop the inference when any of the tokens in the list are generated.
+- (musicgen module) Added a warning log when loading a HeartMuLa model.
+- Implemented a new module: **stt** (*Speech To Text*).
+  - If you are using the Qwen3-ASR with ForcedAligner, keep in mind that it has not been fully tested yet.
+
+### Client changes
+
+- (Basic CLI Client) Updated tools detection when using a chatbot.
+- (Basic CLI Client) Added a message when receiving the `extra` parameter in the tokens response.
+
+### Other changes
+
+- Updated server documentation.
+
+---
+
 ## 1-2-2026 (commit `v17.0.0`)
 
 ### Other changes
 
@@ -133,8 +133,12 @@ async def __send__(AllowTools: bool = True) -> None:
 
                             print(f"\nFile saved at '{fileName}'.", flush = True)
 
-                    if ("tools" in token["response"]):
-                        tools += token["response"]["tools"]
+                    if ("extra" in token["response"] and "tools" in token["response"]["extra"]):
+                        tools += token["response"]["extra"]["tools"]
+                        token["response"]["extra"].pop(tools)
+                    
+                    if ("extra" in token["response"] and len(token["response"]["extra"]) > 0):
+                        print(f"Received extra data: {token['response']['extra']}", flush = True)
 
                 if ("warnings" in token):
                     for warning in token["warnings"]:
 
@@ -7,7 +7,7 @@
 import base64
 import asyncio
 
-VERSION: int = 170000
+VERSION: int = 170100
 TRANSFER_RATE = 8192 * 1024
 
 class ClientSocket():
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "I4_0-Client-PY"
-version = "17.0.0"
+version = "17.1.0"
 description = "Python bindings for I4.0, Client-side."
 authors = [{name = "TAO71-AI"}]
 license = {text = "TAO71 I4.0 License (version 2)"}
 
@@ -61,6 +61,7 @@ Second, execute the `requirements.py` script. This will automatically install al
 |FORCE_UPGRADE|bool|false|-|Forces to upgrade all packages.|
 |VERBOSE|bool|false|-|Prints more information when installing.|
 |INSTALL_OPTIONAL|bool|false|-|Installs optional (but recommended) packages.|
+|BASE_FLASH_ATTN_MAX_JOBS|int|-|-|Sets the `MAX_JOBS` environment variable when installing `flash-attn`.|
 |BASE_TORCH_CIDX|string|-|-|Sets a custom PIP index url for installing PyTorch.|
 |BASE_TORCH_IDX|string|cpu|-|Sets a pre-defined PIP index url for installing PyTorch. Values: `cuda13.0` for **NVIDIA** cards and CUDA >= 13.0, `cuda12.8` for **NVIDIA** cards and CUDA >= 12.8, < 13.0, `cuda12.6` for **NVIDIA** cards and CUDA >= 12.6, < 12.8, `rocm6.4` for **AMD** cards and ROCm >= 6.4, `sycl` for **INTEL** cards with SYCL, `cpu` (default) for no GPU cards, `disable` to skip PyTorch installation.|
 |BASE_FORCE_UPGRADE|bool|false|-|Forces to upgrade PIP packages.|
 
@@ -25,24 +25,26 @@ frequency_penalty:
 repeat_penalty:
   default: 1
   modified_by_user: true
-tools:
-  default: []
-  modified_by_user: true
-tool_choice:
-  default: "auto"
-  modified_by_user: true
-extra_system_prompt:
-  default: ""
-  modified_by_user: true
 tool_start_token: "<tool_call>"
 tool_end_token: "</tool_call>"
 max_length:
   default: 999999
   modified_by_user: true
   allow_greater_than_default: false
-test_inference_files:
-  - {"type": "image", "data": "./TestAssets/test_image.png"}
-  - {"type": "audio", "data": "./TestAssets/test_audio.wav"}
-  - {"type": "video", "data": "./TestAssets/test_video.mp4"}
-test_inference_prompt: "Hey!"
-test_inference_max_length: 1000
+test_inference_conversation:
+  - role: "system"
+    content:
+      - type: "text"
+        text: "Your name is 'I4.0'. You are a nekomimi with blue hair and green eyes."
+  - role: "user"
+    content:
+      - type: "image"
+        image: "./TestAssets/test_image.png"
+      - type: "audio"
+        audio: "./TestAssets/test_audio.wav"
+      - type: "video"
+        video: "./TestAssets/test_video.mp4"
+      - type: "text"
+        text: "Hey! How are you?"
+test_inference_configuration:
+  max_length: 1024
@@ -277,7 +277,7 @@ def StringToChatHandler(
     UseGPU: bool,
     ImageTokens: tuple[int, int],
     Verbose: bool
-) -> CH_Llava15 | CH_Llava16 | CH_Llama3VisionAlpha | CH_MiniCPMv26 | CH_Moondream | CH_NanoLlava | CH_Qwen25VL | None:
+) -> CH_Llava15 | None:
     """
     Converts a string (chat handler name) into a class.
 
@@ -288,7 +288,7 @@ def StringToChatHandler(
         ImageTokens (tuple[int, int]): Min and max image tokens.
     
     Returns:
-        CH_Llava15 | CH_Llava16 | CH_Llama3VisionAlpha | CH_MiniCPMv26 | CH_Moondream | CH_NanoLlava | CH_Qwen25VL | CH_Qwen3VL | None
+        CH_Llava15 | None
     """
     # Lower the chat handler name
     chatHandler = ChatHandler.lower()
@@ -727,132 +727,6 @@ def LoadLlamaModel(Configuration: dict[str, Any]) -> dict[str, Llama | Any]:
         cacheType = None
         logs.WriteLog(logs.INFO, "[llama_utils] `_private_cache_type` not defined. Set to None.")
 
-    # Set reasoning configuration
-    if ("reasoning" in Configuration):
-        reasoningConfiguration = Configuration["reasoning"]
-        autoReasoningClassifier = None  # Requires the `text-classification` service
-        autoReasoningConvert = {}  # {"classifier_output": "level_name", "default": "level_name"}
-        reasoningLevels = []
-        reasoningDefaultMode = "auto"
-        nonReasoningLevel = None
-        defaultReasoningLevel = None
-        reasoningStartToken = "<think>"
-        reasoningEndToken = "</think>"
-        reasoningParameters = {}
-        reasoningUserPrompt = {"position": "end", "separator": " ", "levels": []}
-        reasoningSystemPrompt = {"position": "end", "separator": " ", "levels": []}
-
-        if ("levels" in reasoningConfiguration):
-            reasoningLevels = reasoningConfiguration["levels"]
-
-        if ("_private_auto" in reasoningConfiguration):
-            if ("classifier" in reasoningConfiguration["_private_auto"]):
-                autoReasoningClassifier = reasoningConfiguration["_private_auto"]["classifier"]
-            
-            if ("convert" in reasoningConfiguration["_private_auto"]):
-                autoReasoningConvert = reasoningConfiguration["_private_auto"]["convert"]
-        
-        if ("default_mode" in reasoningConfiguration):
-            defaultMode = reasoningConfiguration["default_mode"]
-
-            if (defaultMode != "reasoning" and defaultMode != "nonreasoning" and defaultMode != "auto"):
-                logs.PrintLog(logs.WARNING, "[llama_utils] Default reasoning mode is expected to be `reasoning`, `nonreasoning`, or `auto`. Setting to default.")
-                defaultMode = "auto"
-        
-        if ("non_reasoning_level" in reasoningConfiguration):
-            nonReasoningLevel = reasoningConfiguration["non_reasoning_level"]
-        
-        if ("default_reasoning_level" in reasoningConfiguration):
-            defaultReasoningLevel = reasoningConfiguration["default_reasoning_level"]
-        
-        if (nonReasoningLevel not in reasoningLevels):
-            raise ValueError(f"Non-reasoning level `{nonReasoningLevel}` not in the levels list `{reasoningLevels}`.")
-        
-        if (defaultReasoningLevel not in reasoningLevels):
-            raise ValueError(f"Reasoning level `{defaultReasoningLevel}` not in the levels list `{reasoningLevels}`.")
-        
-        if ("start_token" in reasoningConfiguration):
-            reasoningStartToken = reasoningConfiguration["start_token"]
-        else:
-            logs.WriteLog(logs.INFO, f"[llama_utils] Reasoning start token not detected in config. Using default `{reasoningStartToken}`.")
-
-        if ("end_token" in reasoningConfiguration):
-            reasoningStartToken = reasoningConfiguration["end_token"]
-        else:
-            logs.WriteLog(logs.INFO, f"[llama_utils] Reasoning end token not detected in config. Using default `{reasoningEndToken}`.")
-        
-        if ("_private_parameters" in reasoningConfiguration):
-            reasoningParameters = reasoningConfiguration["_private_parameters"]
-        
-        if ("_private_user_prompt" in reasoningConfiguration):
-            if ("position" in reasoningConfiguration["_private_user_prompt"]):
-                reasoningUserPrompt["position"] = reasoningConfiguration["_private_user_prompt"]["position"]
-            else:
-                logs.PrintLog(logs.INFO, f"[llama_utils] Position not set at user prompt (reasoning). Using default `{reasoningUserPrompt['position']}`.")
-            
-            if ("separator" in reasoningConfiguration["_private_user_prompt"]):
-                reasoningUserPrompt["separator"] = reasoningConfiguration["_private_user_prompt"]["separator"]
-            else:
-                logs.PrintLog(logs.INFO, f"[llama_utils] Separator not set at user prompt (reasoning). Using default `{reasoningUserPrompt['separator']}`.")
-            
-            if ("levels" in reasoningConfiguration["_private_user_prompt"]):
-                reasoningUserPrompt["levels"] = reasoningConfiguration["_private_user_prompt"]["levels"]
-            
-        if ("_private_system_prompt" in reasoningConfiguration):
-            if ("position" in reasoningConfiguration["_private_system_prompt"]):
-                reasoningSystemPrompt["position"] = reasoningConfiguration["_private_system_prompt"]["position"]
-            else:
-                logs.PrintLog(logs.INFO, f"[llama_utils] Position not set at system prompt (reasoning). Using default `{reasoningSystemPrompt['position']}`.")
-            
-            if ("separator" in reasoningConfiguration["_private_system_prompt"]):
-                reasoningSystemPrompt["separator"] = reasoningConfiguration["_private_system_prompt"]["separator"]
-            else:
-                logs.PrintLog(logs.INFO, f"[llama_utils] Separator not set at system prompt (reasoning). Using default `{reasoningSystemPrompt['separator']}`.")
-            
-            if ("levels" in reasoningConfiguration["_private_system_prompt"]):
-                reasoningSystemPrompt["levels"] = reasoningConfiguration["_private_system_prompt"]["levels"]
-        
-        reasoning = {
-            "auto": {
-                "classifier": autoReasoningClassifier,
-                "convert": autoReasoningConvert
-            },
-            "levels": reasoningLevels,
-            "default_mode": reasoningDefaultMode,
-            "non_reasoning_level": nonReasoningLevel,
-            "default_reasoning_level": defaultReasoningLevel,
-            "start_token": reasoningStartToken,
-            "end_token": reasoningEndToken,
-            "parameters": reasoningParameters,
-            "user_prompt": reasoningUserPrompt,
-            "system_prompt": reasoningSystemPrompt
-        }
-    else:
-        reasoning = {
-            "auto": {
-                "classifier": None,
-                "convert": {}
-            },
-            "levels": ["no_reasoning"],
-            "default_mode": "nonreasoning",
-            "non_reasoning_level": "no_reasoning",
-            "default_reasoning_level": "no_reasoning",
-            "start_token": "<think>",
-            "end_token": "</think>",
-            "parameters": {},
-            "user_prompt": {
-                "position": "end",
-                "separator": " ",
-                "levels": {}
-            },
-            "system_prompt": {
-                "position": "end",
-                "separator": " ",
-                "levels": {}
-            }
-        }
-        logs.WriteLog(logs.INFO, f"[llama_utils] `reasoning` not defined. Set to default mode; {reasoning}.")
-    
     # Set multimodal type
     if ("multimodal" in Configuration):
         multimodal = Configuration["multimodal"]
@@ -926,6 +800,5 @@ def LoadLlamaModel(Configuration: dict[str, Any]) -> dict[str, Llama | Any]:
     logs.WriteLog(logs.INFO, f"[llama_utils] Model loaded in {loadingTime} seconds.")
     return {
         "_private_model": model,
-        "_private_type": "lcpp",
-        "reasoning": reasoning
+        "_private_type": "lcpp"
     }