Skip to content

Commit b83ca7d

Browse files
committed
v17.1.0
1 parent 810a09d commit b83ca7d

21 files changed

+299
-287
lines changed

CHANGELOG.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,31 @@ Keep in mind that these are only the more relevant changes.
66

77
---
88

9+
## 5-2-2026 (commit `v17.1.0`)
10+
11+
### Server changes
12+
13+
- Fixed bugs.
14+
- Added `BASE_FLASH_ATTN_MAX_JOBS` environment variable to the requirements installation.
15+
- (chatbot module) Removed unnecesary configuration.
16+
- (chatbot module) Replaced chatbot `test_inference_files` and `test_inference_text` parameters with `test_inference_conversation` and `test_inference_configuration`.
17+
- (chatbot module) Removed reasoning, since it will be provided by the client.
18+
- (chatbot module) Added a `stop_tokens` parameter. This will stop the inference when any of the tokens in the list are generated.
19+
- (musicgen module) Added a warning log when loading a HeartMuLa model.
20+
- Implemented a new module: **stt** (*Speech To Text*).
21+
- If you are using the Qwen3-ASR with ForcedAligner, keep in mind that it has not been fully tested yet.
22+
23+
### Client changes
24+
25+
- (Basic CLI Client) Updated tools detection when using a chatbot.
26+
- (Basic CLI Client) Added a message when receiving the `extra` parameter in the tokens response.
27+
28+
### Other changes
29+
30+
- Updated server documentation.
31+
32+
---
33+
934
## 1-2-2026 (commit `v17.0.0`)
1035

1136
### Other changes

Client/I40Client/__main__.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,12 @@ async def __send__(AllowTools: bool = True) -> None:
133133

134134
print(f"\nFile saved at '{fileName}'.", flush = True)
135135

136-
if ("tools" in token["response"]):
137-
tools += token["response"]["tools"]
136+
if ("extra" in token["response"] and "tools" in token["response"]["extra"]):
137+
tools += token["response"]["extra"]["tools"]
138+
token["response"]["extra"].pop(tools)
139+
140+
if ("extra" in token["response"] and len(token["response"]["extra"]) > 0):
141+
print(f"Received extra data: {token['response']['extra']}", flush = True)
138142

139143
if ("warnings" in token):
140144
for warning in token["warnings"]:

Client/I40Client/server_connection.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import base64
88
import asyncio
99

10-
VERSION: int = 170000
10+
VERSION: int = 170100
1111
TRANSFER_RATE = 8192 * 1024
1212

1313
class ClientSocket():

Client/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "I4_0-Client-PY"
7-
version = "17.0.0"
7+
version = "17.1.0"
88
description = "Python bindings for I4.0, Client-side."
99
authors = [{name = "TAO71-AI"}]
1010
license = {text = "TAO71 I4.0 License (version 2)"}

Documentation/ServerDocs/01 Installation and hardware requirements.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ Second, execute the `requirements.py` script. This will automatically install al
6161
|FORCE_UPGRADE|bool|false|-|Forces to upgrade all packages.|
6262
|VERBOSE|bool|false|-|Prints more information when installing.|
6363
|INSTALL_OPTIONAL|bool|false|-|Installs optional (but recommended) packages.|
64+
|BASE_FLASH_ATTN_MAX_JOBS|int|-|-|Sets the `MAX_JOBS` environment variable when installing `flash-attn`.|
6465
|BASE_TORCH_CIDX|string|-|-|Sets a custom PIP index url for installing PyTorch.|
6566
|BASE_TORCH_IDX|string|cpu|-|Sets a pre-defined PIP index url for installing PyTorch. Values: `cuda13.0` for **NVIDIA** cards and CUDA >= 13.0, `cuda12.8` for **NVIDIA** cards and CUDA >= 12.8, < 13.0, `cuda12.6` for **NVIDIA** cards and CUDA >= 12.6, < 12.8, `rocm6.4` for **AMD** cards and ROCm >= 6.4, `sycl` for **INTEL** cards with SYCL, `cpu` (default) for no GPU cards, `disable` to skip PyTorch installation.|
6667
|BASE_FORCE_UPGRADE|bool|false|-|Forces to upgrade PIP packages.|

Server/Services/chatbot/default_service_configuration.yaml

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,24 +25,26 @@ frequency_penalty:
2525
repeat_penalty:
2626
default: 1
2727
modified_by_user: true
28-
tools:
29-
default: []
30-
modified_by_user: true
31-
tool_choice:
32-
default: "auto"
33-
modified_by_user: true
34-
extra_system_prompt:
35-
default: ""
36-
modified_by_user: true
3728
tool_start_token: "<tool_call>"
3829
tool_end_token: "</tool_call>"
3930
max_length:
4031
default: 999999
4132
modified_by_user: true
4233
allow_greater_than_default: false
43-
test_inference_files:
44-
- {"type": "image", "data": "./TestAssets/test_image.png"}
45-
- {"type": "audio", "data": "./TestAssets/test_audio.wav"}
46-
- {"type": "video", "data": "./TestAssets/test_video.mp4"}
47-
test_inference_prompt: "Hey!"
48-
test_inference_max_length: 1000
34+
test_inference_conversation:
35+
- role: "system"
36+
content:
37+
- type: "text"
38+
text: "Your name is 'I4.0'. You are a nekomimi with blue hair and green eyes."
39+
- role: "user"
40+
content:
41+
- type: "image"
42+
image: "./TestAssets/test_image.png"
43+
- type: "audio"
44+
audio: "./TestAssets/test_audio.wav"
45+
- type: "video"
46+
video: "./TestAssets/test_video.mp4"
47+
- type: "text"
48+
text: "Hey! How are you?"
49+
test_inference_configuration:
50+
max_length: 1024

Server/Services/chatbot/llama_utils.py

Lines changed: 3 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ def StringToChatHandler(
277277
UseGPU: bool,
278278
ImageTokens: tuple[int, int],
279279
Verbose: bool
280-
) -> CH_Llava15 | CH_Llava16 | CH_Llama3VisionAlpha | CH_MiniCPMv26 | CH_Moondream | CH_NanoLlava | CH_Qwen25VL | None:
280+
) -> CH_Llava15 | None:
281281
"""
282282
Converts a string (chat handler name) into a class.
283283
@@ -288,7 +288,7 @@ def StringToChatHandler(
288288
ImageTokens (tuple[int, int]): Min and max image tokens.
289289
290290
Returns:
291-
CH_Llava15 | CH_Llava16 | CH_Llama3VisionAlpha | CH_MiniCPMv26 | CH_Moondream | CH_NanoLlava | CH_Qwen25VL | CH_Qwen3VL | None
291+
CH_Llava15 | None
292292
"""
293293
# Lower the chat handler name
294294
chatHandler = ChatHandler.lower()
@@ -727,132 +727,6 @@ def LoadLlamaModel(Configuration: dict[str, Any]) -> dict[str, Llama | Any]:
727727
cacheType = None
728728
logs.WriteLog(logs.INFO, "[llama_utils] `_private_cache_type` not defined. Set to None.")
729729

730-
# Set reasoning configuration
731-
if ("reasoning" in Configuration):
732-
reasoningConfiguration = Configuration["reasoning"]
733-
autoReasoningClassifier = None # Requires the `text-classification` service
734-
autoReasoningConvert = {} # {"classifier_output": "level_name", "default": "level_name"}
735-
reasoningLevels = []
736-
reasoningDefaultMode = "auto"
737-
nonReasoningLevel = None
738-
defaultReasoningLevel = None
739-
reasoningStartToken = "<think>"
740-
reasoningEndToken = "</think>"
741-
reasoningParameters = {}
742-
reasoningUserPrompt = {"position": "end", "separator": " ", "levels": []}
743-
reasoningSystemPrompt = {"position": "end", "separator": " ", "levels": []}
744-
745-
if ("levels" in reasoningConfiguration):
746-
reasoningLevels = reasoningConfiguration["levels"]
747-
748-
if ("_private_auto" in reasoningConfiguration):
749-
if ("classifier" in reasoningConfiguration["_private_auto"]):
750-
autoReasoningClassifier = reasoningConfiguration["_private_auto"]["classifier"]
751-
752-
if ("convert" in reasoningConfiguration["_private_auto"]):
753-
autoReasoningConvert = reasoningConfiguration["_private_auto"]["convert"]
754-
755-
if ("default_mode" in reasoningConfiguration):
756-
defaultMode = reasoningConfiguration["default_mode"]
757-
758-
if (defaultMode != "reasoning" and defaultMode != "nonreasoning" and defaultMode != "auto"):
759-
logs.PrintLog(logs.WARNING, "[llama_utils] Default reasoning mode is expected to be `reasoning`, `nonreasoning`, or `auto`. Setting to default.")
760-
defaultMode = "auto"
761-
762-
if ("non_reasoning_level" in reasoningConfiguration):
763-
nonReasoningLevel = reasoningConfiguration["non_reasoning_level"]
764-
765-
if ("default_reasoning_level" in reasoningConfiguration):
766-
defaultReasoningLevel = reasoningConfiguration["default_reasoning_level"]
767-
768-
if (nonReasoningLevel not in reasoningLevels):
769-
raise ValueError(f"Non-reasoning level `{nonReasoningLevel}` not in the levels list `{reasoningLevels}`.")
770-
771-
if (defaultReasoningLevel not in reasoningLevels):
772-
raise ValueError(f"Reasoning level `{defaultReasoningLevel}` not in the levels list `{reasoningLevels}`.")
773-
774-
if ("start_token" in reasoningConfiguration):
775-
reasoningStartToken = reasoningConfiguration["start_token"]
776-
else:
777-
logs.WriteLog(logs.INFO, f"[llama_utils] Reasoning start token not detected in config. Using default `{reasoningStartToken}`.")
778-
779-
if ("end_token" in reasoningConfiguration):
780-
reasoningStartToken = reasoningConfiguration["end_token"]
781-
else:
782-
logs.WriteLog(logs.INFO, f"[llama_utils] Reasoning end token not detected in config. Using default `{reasoningEndToken}`.")
783-
784-
if ("_private_parameters" in reasoningConfiguration):
785-
reasoningParameters = reasoningConfiguration["_private_parameters"]
786-
787-
if ("_private_user_prompt" in reasoningConfiguration):
788-
if ("position" in reasoningConfiguration["_private_user_prompt"]):
789-
reasoningUserPrompt["position"] = reasoningConfiguration["_private_user_prompt"]["position"]
790-
else:
791-
logs.PrintLog(logs.INFO, f"[llama_utils] Position not set at user prompt (reasoning). Using default `{reasoningUserPrompt['position']}`.")
792-
793-
if ("separator" in reasoningConfiguration["_private_user_prompt"]):
794-
reasoningUserPrompt["separator"] = reasoningConfiguration["_private_user_prompt"]["separator"]
795-
else:
796-
logs.PrintLog(logs.INFO, f"[llama_utils] Separator not set at user prompt (reasoning). Using default `{reasoningUserPrompt['separator']}`.")
797-
798-
if ("levels" in reasoningConfiguration["_private_user_prompt"]):
799-
reasoningUserPrompt["levels"] = reasoningConfiguration["_private_user_prompt"]["levels"]
800-
801-
if ("_private_system_prompt" in reasoningConfiguration):
802-
if ("position" in reasoningConfiguration["_private_system_prompt"]):
803-
reasoningSystemPrompt["position"] = reasoningConfiguration["_private_system_prompt"]["position"]
804-
else:
805-
logs.PrintLog(logs.INFO, f"[llama_utils] Position not set at system prompt (reasoning). Using default `{reasoningSystemPrompt['position']}`.")
806-
807-
if ("separator" in reasoningConfiguration["_private_system_prompt"]):
808-
reasoningSystemPrompt["separator"] = reasoningConfiguration["_private_system_prompt"]["separator"]
809-
else:
810-
logs.PrintLog(logs.INFO, f"[llama_utils] Separator not set at system prompt (reasoning). Using default `{reasoningSystemPrompt['separator']}`.")
811-
812-
if ("levels" in reasoningConfiguration["_private_system_prompt"]):
813-
reasoningSystemPrompt["levels"] = reasoningConfiguration["_private_system_prompt"]["levels"]
814-
815-
reasoning = {
816-
"auto": {
817-
"classifier": autoReasoningClassifier,
818-
"convert": autoReasoningConvert
819-
},
820-
"levels": reasoningLevels,
821-
"default_mode": reasoningDefaultMode,
822-
"non_reasoning_level": nonReasoningLevel,
823-
"default_reasoning_level": defaultReasoningLevel,
824-
"start_token": reasoningStartToken,
825-
"end_token": reasoningEndToken,
826-
"parameters": reasoningParameters,
827-
"user_prompt": reasoningUserPrompt,
828-
"system_prompt": reasoningSystemPrompt
829-
}
830-
else:
831-
reasoning = {
832-
"auto": {
833-
"classifier": None,
834-
"convert": {}
835-
},
836-
"levels": ["no_reasoning"],
837-
"default_mode": "nonreasoning",
838-
"non_reasoning_level": "no_reasoning",
839-
"default_reasoning_level": "no_reasoning",
840-
"start_token": "<think>",
841-
"end_token": "</think>",
842-
"parameters": {},
843-
"user_prompt": {
844-
"position": "end",
845-
"separator": " ",
846-
"levels": {}
847-
},
848-
"system_prompt": {
849-
"position": "end",
850-
"separator": " ",
851-
"levels": {}
852-
}
853-
}
854-
logs.WriteLog(logs.INFO, f"[llama_utils] `reasoning` not defined. Set to default mode; {reasoning}.")
855-
856730
# Set multimodal type
857731
if ("multimodal" in Configuration):
858732
multimodal = Configuration["multimodal"]
@@ -926,6 +800,5 @@ def LoadLlamaModel(Configuration: dict[str, Any]) -> dict[str, Llama | Any]:
926800
logs.WriteLog(logs.INFO, f"[llama_utils] Model loaded in {loadingTime} seconds.")
927801
return {
928802
"_private_model": model,
929-
"_private_type": "lcpp",
930-
"reasoning": reasoning
803+
"_private_type": "lcpp"
931804
}

0 commit comments

Comments
 (0)