diff --git a/.clang-format b/.clang-format
index 87d39608..06eb3d4b 100644
--- a/.clang-format
+++ b/.clang-format
@@ -163,5 +163,4 @@ StatementMacros:
   - QT_REQUIRE_VERSION
 TabWidth:        4
 UseCRLF:         false
-UseTab:          Never
-...
\ No newline at end of file
+UseTab:          Never
\ No newline at end of file
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
new file mode 100644
index 00000000..5396ec89
--- /dev/null
+++ b/.github/workflows/benchmark.yml
@@ -0,0 +1,18 @@
+name: Benchmark Test
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - dev
+jobs:
+  build:
+    runs-on: [self-hosted, linux, arm64]
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Start Benchmark Test
+        run: |
+          echo "This job runs on a self-hosted runner!"
+          echo "Running benchmark test..."
+          python3 benchmark/benchmodulellm.py
\ No newline at end of file
diff --git a/benchmark/README.md b/benchmark/README.md
new file mode 100644
index 00000000..e21ed4a5
--- /dev/null
+++ b/benchmark/README.md
@@ -0,0 +1,10 @@
+benchmodulellm can be used to test llm unit inference performance
+
+Only the llm unit definition files (model json) are required.
+
+If no model specified, it would benchmark default list. More model networks may be added later.
+
+Usage
+```shell
+python benchmodulellm.py --host 192.168.20.100 --port 10001 --test-items default.yaml
+```
\ No newline at end of file
diff --git a/benchmark/RESULTS.md b/benchmark/RESULTS.md
new file mode 100644
index 00000000..da44e3f6
--- /dev/null
+++ b/benchmark/RESULTS.md
@@ -0,0 +1,39 @@
+# Results
+
+## ModuleLLM (AX630C)
+
+### LLM
+| model                           | ttft (ms)  | avg-token/s | model version | llm version |
+|---------------------------------|------------|-------------|---------------|-------------|
+| qwen2.5-0.5B-prefill-20e        | 359.8      | 10.32       | v0.2          | v1.8        |
+| qwen2.5-0.5B-p256-ax630c        | 1126.19    | 10.30       | v0.4          | v1.8        |
+| qwen2.5-0.5B-Int4-ax630c        | 442.95     | 12.52       | v0.4          | v1.8        |
+| qwen2.5-coder-0.5B-ax630c       | 361.81     | 10.28       | v0.2          | v1.8        |
+| qwen2.5-1.5B-ax630c             | 1029.41    | 3.59        | v0.3          | v1.8        |
+| qwen2.5-1.5B-p256-ax630c        | 3056.54    | 3.57        | v0.4          | v1.8        |
+| qwen2.5-1.5B-Int4-ax630c        | 1219.54    | 4.63        | v0.4          | v1.8        |
+| deepseek-r1-1.5B-ax630c         | 1075.04    | 3.57        | v0.3          | v1.8        |
+| deepseek-r1-1.5B-p256-ax630c    | 3056.86    | 3.57        | v0.4          | v1.8        |
+| llama3.2-1B-prefill-ax630c      | 891.00     | 4.48        | v0.2          | v1.8        |
+| llama3.2-1B-p256-ax630c         | 2601.11    | 4.49        | v0.4          | v1.8        |
+| openbuddy-llama3.2-1B-ax630c    | 891.02     | 4.52        | v0.2          | v1.8        |
+
+`The input text used by the llm test is "hello!“`
+
+### VLM
+| model                           | ttft (ms)  | avg-token/s | image encode (ms) | model version | vlm version |
+|---------------------------------|------------|-------------|-------------------|---------------|-------------|
+| internvl2.5-1B-364-ax630c       | 1117.27    | 10.56       | 1164.61           | v0.4          | v1.7        |
+| smolvlm-256M-ax630c             | 185.75     | 30.16       | 799.11            | v0.4          | v1.7        |
+| smolvlm-500M-ax630c             | 365.69     | 13.14       | 838.30            | v0.4          | v1.7        |
+
+`The image encoding test uses a jpg image with a size of 810*1080`
+
+### STT
+| model              | encode (ms) | avg-decode (ms) | model version | whisper version |
+|--------------------|-------------|-----------------|---------------|-----------------|
+| whisper-tiny       | 248.0       | 32.54           | v0.4          | v1.7            |
+| whisper-base       | 660.31      | 51.11           | v0.4          | v1.7            |
+| whisper-small      | 1606.08     | 148.92          | v0.4          | v1.7            |
+
+`The STT test uses a 30-second wav English audio`
\ No newline at end of file
diff --git a/benchmark/benchmodulellm.py b/benchmark/benchmodulellm.py
new file mode 100644
index 00000000..d534e7e1
--- /dev/null
+++ b/benchmark/benchmodulellm.py
@@ -0,0 +1,126 @@
+import argparse
+import os
+import sys
+
+import yaml
+import logging
+
+from pathlib import Path
+
+from utils import LLMClient
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[0]
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+
+def parse_opt(known=False):
+    """
+    Parse command-line options.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--host", type=str, default="127.0.0.1", help="ModuleLLM IP Address")
+    parser.add_argument("--port", type=int, default=10001, help="ModuleLLM TCP Port")
+    parser.add_argument("--test-items", type=str, default=ROOT / "default.yaml", help="testitems.yaml path")
+
+    args = parser.parse_known_args()[0] if known else parser.parse_args()
+
+    return args
+
+def read_yaml(file_path):
+    """
+    Read a YAML file and return its content.
+    """
+    if not os.path.exists(file_path):
+        logging.error(f"YAML file '{file_path}' does not exist.")
+        sys.exit(1)
+    
+    try:
+        with open(file_path, "r") as file:
+            data = yaml.safe_load(file)
+            if data is None:
+                logging.warning(f"YAML file '{file_path}' is empty.")
+                return {}
+            
+            logging.info(f"YAML file '{file_path}' read successfully.")
+            
+            if "items" in data:
+                return data["items"]
+            else:
+                logging.warning(f"'items' not found in YAML file.")
+                return []
+    except Exception as e:
+        logging.error(f"Failed to read YAML file '{file_path}': {e}")
+        sys.exit(1)
+
+def write_yaml(file_path, data):
+    """
+    Write data to a YAML file.
+    """
+    try:
+        with open(file_path, "w") as file:
+            yaml.safe_dump(data, file)
+            logging.info(f"YAML file '{file_path}' written successfully.")
+    except Exception as e:
+        logging.error(f"Failed to write YAML file '{file_path}': {e}")
+        sys.exit(1)
+
+def categorize_and_deduplicate(items):
+    """
+    Categorize items by 'type' and remove duplicate 'model_name'.
+    """
+    categorized = {}
+    for item in items:
+        item_type = item.get("type")
+        model_name = item.get("model_name")
+        if not item_type or not model_name:
+            continue
+        
+        if item_type not in categorized:
+            categorized[item_type] = set()
+        
+        categorized[item_type].add(model_name)
+    
+    # Convert sets back to lists for easier usage
+    return {key: list(value) for key, value in categorized.items()}
+
+def main(opt):
+    items = read_yaml(opt.test_items)
+    if not items:
+        logging.warning(f"No items found in YAML file '{opt.test_items}'.")
+        return
+    
+    categorized_items = categorize_and_deduplicate(items)
+    
+    logging.info("Categorized items:")
+    for item_type, models in categorized_items.items():
+        logging.info(f"Type: {item_type}, Models: {models}")
+        
+        if item_type == "llm":
+            logging.info("Initializing LLMClient...")
+            llm_client = LLMClient(opt.host, opt.port)
+            
+            for model_name in models:
+                logging.info(f"Testing model: {model_name}")
+                input_text = "Tell me an adventure story."
+                try:
+                    result = llm_client.test(model_name, input_text)
+                    logging.info(f"Test result for model '{model_name}': {result}")
+                except Exception as e:
+                    logging.error(f"Error testing model '{model_name}': {e}")
+            
+            del llm_client
+            logging.info("LLMClient deleted successfully.")
+    
+    return categorized_items
+
+if __name__ == "__main__":
+    opt = parse_opt()
+    main(opt)
diff --git a/benchmark/default.yaml b/benchmark/default.yaml
new file mode 100644
index 00000000..b10f7f2a
--- /dev/null
+++ b/benchmark/default.yaml
@@ -0,0 +1,31 @@
+items:
+- model_name: qwen2.5-0.5B-p256-ax630c
+  type: llm
+- model_name: internvl2.5-1B-364-ax630c
+  type: vlm
+- model_name: whisper-tiny
+  type: whisper
+- model_name: whisper-base
+  type: whisper
+- model_name: whisper-small
+  type: whisper
+- model_name: sherpa-ncnn-streaming-zipformer-20M-2023-02-17
+  type: asr
+- model_name: sherpa-ncnn-streaming-zipformer-zh-14M-2023-02-23
+  type: asr
+- model_name: sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01
+  type: kws
+- model_name: sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01
+  type: kws
+- model_name: melotts-zh-cn
+  type: melotts
+- model_name: single_speaker_english_fast
+  type: tts
+- model_name: single_speaker_fast
+  type: tts
+- model_name: yolo11n
+  type: yolo
+- model_name: yolo11n-seg
+  type: yolo
+- model_name: yolo11n-pose
+  type: yolo
\ No newline at end of file
diff --git a/benchmark/utils/__init__.py b/benchmark/utils/__init__.py
new file mode 100644
index 00000000..5714a538
--- /dev/null
+++ b/benchmark/utils/__init__.py
@@ -0,0 +1,3 @@
+from .llm import LLMClient
+
+__all__ = ["LLMClient"]
\ No newline at end of file
diff --git a/benchmark/utils/llm.py b/benchmark/utils/llm.py
new file mode 100644
index 00000000..6c62d859
--- /dev/null
+++ b/benchmark/utils/llm.py
@@ -0,0 +1,168 @@
+import socket
+import json
+import time
+import logging
+import uuid
+# from .token_calc import calculate_token_length
+
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+
+class LLMClient:
+    def __init__(self, host, port):
+        self.host = host
+        self.port = port
+        self.work_id = None
+        self.response_format = None
+        self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        self.sock.connect((self.host, self.port))
+
+    def generate_request_id(self):
+        return str(uuid.uuid4())
+
+    def send_request_stream(self, request):
+        self.sock.sendall(json.dumps(request).encode('utf-8'))
+        response = b""
+        parsed_responses = []
+        output_text = ""
+        token_count = 0
+
+        start_time = time.time()
+        first_packet_time = None
+
+        while True:
+            chunk = self.sock.recv(4096)
+            response += chunk
+
+            while b'\n' in response:
+                line, response = response.split(b'\n', 1)
+                try:
+                    parsed_response = json.loads(line.decode('utf-8'))
+                    parsed_responses.append(parsed_response)
+
+                    if "data" in parsed_response and "delta" in parsed_response["data"]:
+                        if first_packet_time is None:
+                            first_packet_time = time.time()
+                        output_text += parsed_response["data"]["delta"]
+                        token_count += 3
+
+                    if "data" in parsed_response and parsed_response["data"].get("finish", False):
+                        end_time = time.time()
+                        total_time = end_time - start_time
+                        first_packet_latency = first_packet_time - start_time if first_packet_time else None
+
+                        # token_count = calculate_token_length(output_text)
+                        token_speed = token_count / total_time if total_time > 0 else 0
+
+                        logging.info("Stream reception completed.")
+                        logging.info("First packet latency: %.2f seconds", first_packet_latency if first_packet_latency else 0)
+                        logging.info("Total reception time: %.2f seconds", total_time)
+                        logging.info("Total tokens received: %d", token_count)
+                        logging.info("Token reception speed: %.2f tokens/second", token_speed)
+                        logging.info("Total output text length: %d characters", len(output_text))
+
+                        return {
+                            "responses": parsed_responses,
+                            "output_text": output_text,
+                            "token_count": token_count,
+                            "first_packet_latency": first_packet_latency,
+                            "total_time": total_time,
+                            "token_speed": token_speed
+                        }
+                except json.JSONDecodeError:
+                    logging.warning("Failed to decode JSON, skipping line.")
+                    continue
+
+    def send_request_non_stream(self, request):
+        self.sock.sendall(json.dumps(request).encode('utf-8'))
+        response = b""
+        while True:
+            chunk = self.sock.recv(4096)
+            response += chunk
+            if b'\n' in chunk:
+                break
+        return json.loads(response.decode('utf-8'))
+
+    def setup(self, model):
+        setup_request = {
+            "request_id": self.generate_request_id(),
+            "work_id": "llm",
+            "action": "setup",
+            "object": "llm.setup",
+            "data": {
+                "model": model,
+                "response_format": "llm.utf-8.stream",
+                "input": "llm.utf-8",
+                "enoutput": True,
+                "max_token_len": 256,
+                "prompt": "You are a knowledgeable assistant capable of answering various questions and providing information."
+            }
+        }
+        response = self.send_request_non_stream(setup_request)
+        self.work_id = response.get("work_id")
+        self.response_format = setup_request["data"]["response_format"]
+        return response
+
+    def inference(self, input_text):
+        if not self.work_id:
+            raise ValueError("work_id is not set. Please call setup() first.")
+        
+        inference_request = {
+            "request_id": self.generate_request_id(),
+            "work_id": self.work_id,
+            "action": "inference",
+            "object": self.response_format,
+            "data": {
+                "delta": input_text,
+                "index": 0,
+                "finish": True
+            }
+        }
+        if "stream" in self.response_format:
+            logging.info("Sending stream request...")
+            result = self.send_request_stream(inference_request)
+            return {
+                "output_text": result["output_text"],
+                "token_count": result["token_count"],
+                "first_packet_latency": result["first_packet_latency"],
+                "total_time": result["total_time"],
+                "token_speed": result["token_speed"]
+            }
+        else:
+            logging.info("Sending non-stream request...")
+            response = self.send_request_non_stream(inference_request)
+            return {
+                "output_text": response.get("data", ""),
+                "token_count": len(response.get("data", "").split())
+            }
+
+    def exit(self):
+        if not self.work_id:
+            raise ValueError("work_id is not set. Please call setup() first.")
+        
+        exit_request = {
+            "request_id": self.generate_request_id(),
+            "work_id": self.work_id,
+            "action": "exit"
+        }
+        response = self.send_request_non_stream(exit_request)
+        return response
+
+    def test(self, model, input_text):
+        logging.info("Setting up...")
+        setup_response = self.setup(model)
+
+        logging.info("Running inference...")
+        inference_result = self.inference(input_text)
+
+        logging.info("Exiting...")
+        exit_response = self.exit()
+
+        return {}
+
+if __name__ == "__main__":
+    host = "192.168.20.186"
+    port = 10001
+    client = LLMClient(host, port)
+    model_name = "qwen2.5-0.5B-p256-ax630c"
+    input_text = "This is a test input for the LLM."
+    client.test(model_name, input_text)
\ No newline at end of file
diff --git a/benchmark/utils/token_calc.py b/benchmark/utils/token_calc.py
new file mode 100644
index 00000000..47154bb2
--- /dev/null
+++ b/benchmark/utils/token_calc.py
@@ -0,0 +1,20 @@
+import tiktoken
+
+def calculate_token_length(input_string: str) -> int:
+    """
+    Calculate the token length of a given string using tiktoken.
+
+    Args:
+        input_string (str): The input string to calculate token length for.
+
+    Returns:
+        int: The length of the tokens.
+    """
+    # Initialize the tokenizer (you can specify a model if needed, e.g., 'gpt-4')
+    tokenizer = tiktoken.get_encoding("cl100k_base")
+    
+    # Encode the input string to tokens
+    tokens = tokenizer.encode(input_string)
+    
+    # Return the length of the tokens
+    return len(tokens)
\ No newline at end of file
diff --git a/doc/projects_llm_framework_doc/llm_asr_en.md b/doc/projects_llm_framework_doc/llm_asr_en.md
index adf681a6..282bb685 100644
--- a/doc/projects_llm_framework_doc/llm_asr_en.md
+++ b/doc/projects_llm_framework_doc/llm_asr_en.md
@@ -16,7 +16,7 @@ Send JSON:
   "action": "setup",
   "object": "asr.setup",
   "data": {
-    "model": "sherpa-ncnn-streaming-zipformer-zh-14M-2023-02-23",
+    "model": "sherpa-ncnn-streaming-zipformer-20M-2023-02-17",
     "response_format": "asr.utf-8.stream",
     "input": "sys.pcm",
     "enoutput": true,
@@ -34,7 +34,7 @@ Send JSON:
 - work_id: For configuration units, it is `asr`.
 - action: The method to be called is `setup`.
 - object: The type of data being transmitted is `asr.setup`.
-- model: The model used is the Chinese model `sherpa-ncnn-streaming-zipformer-zh-14M-2023-02-23`.
+- model: The model used is the Chinese model `sherpa-ncnn-streaming-zipformer-20M-2023-02-17`.
 - response_format: The result format is `asr.utf-8.stream`, a UTF-8 stream output.
 - input: The input is `sys.pcm`, representing system audio.
 - enoutput: Whether to enable user result output.
@@ -109,7 +109,7 @@ Example:
   "action": "setup",
   "object": "asr.setup",
   "data": {
-    "model": "sherpa-ncnn-streaming-zipformer-zh-14M-2023-02-23",
+    "model": "sherpa-ncnn-streaming-zipformer-20M-2023-02-17",
     "response_format": "asr.utf-8.stream",
     "input": [
       "sys.pcm",
@@ -119,9 +119,9 @@ Example:
     "endpoint_config.rule1.min_trailing_silence": 2.4,
     "endpoint_config.rule2.min_trailing_silence": 1.2,
     "endpoint_config.rule3.min_trailing_silence": 30.1,
-    "endpoint_config.rule1.must_contain_nonsilence": false,
-    "endpoint_config.rule2.must_contain_nonsilence": false,
-    "endpoint_config.rule3.must_contain_nonsilence": false
+    "endpoint_config.rule1.must_contain_nonsilence": true,
+    "endpoint_config.rule2.must_contain_nonsilence": true,
+    "endpoint_config.rule3.must_contain_nonsilence": true
   }
 }
 ```
@@ -310,7 +310,7 @@ Response JSON:
     "inputs_": [
       "sys.pcm"
     ],
-    "model": "sherpa-ncnn-streaming-zipformer-zh-14M-2023-02-23",
+    "model": "sherpa-ncnn-streaming-zipformer-20M-2023-02-17",
     "response_format": "asr.utf-8-stream"
   },
   "error": {
diff --git a/doc/projects_llm_framework_doc/llm_asr_zh.md b/doc/projects_llm_framework_doc/llm_asr_zh.md
index e139f287..97c8e90e 100644
--- a/doc/projects_llm_framework_doc/llm_asr_zh.md
+++ b/doc/projects_llm_framework_doc/llm_asr_zh.md
@@ -117,9 +117,9 @@ error::code 为 0 表示执行成功。
     "endpoint_config.rule1.min_trailing_silence": 2.4,
     "endpoint_config.rule2.min_trailing_silence": 1.2,
     "endpoint_config.rule3.min_trailing_silence": 30.1,
-    "endpoint_config.rule1.must_contain_nonsilence": false,
-    "endpoint_config.rule2.must_contain_nonsilence": false,
-    "endpoint_config.rule3.must_contain_nonsilence": false
+    "endpoint_config.rule1.must_contain_nonsilence": true,
+    "endpoint_config.rule2.must_contain_nonsilence": true,
+    "endpoint_config.rule3.must_contain_nonsilence": true
   }
 }
 ```
diff --git a/doc/projects_llm_framework_doc/llm_camera_en.md b/doc/projects_llm_framework_doc/llm_camera_en.md
index f7a93f94..25a0f05c 100644
--- a/doc/projects_llm_framework_doc/llm_camera_en.md
+++ b/doc/projects_llm_framework_doc/llm_camera_en.md
@@ -15,25 +15,28 @@ Send JSON:
   "action": "setup",
   "object": "camera.setup",
   "data": {
-    "response_format": "camera.raw",
+    "response_format": "image.yuyv422.base64",
     "input": "/dev/video0",
     "enoutput": false,
     "frame_width": 320,
-    "frame_height": 320
+    "frame_height": 320,
+    "enable_webstream":false,
+    "rtsp":"rtsp.1280x720.h265"
   }
 }
 ```
 
-- request_id: Reference basic data explanation.
+- request_id: Refer to the basic data explanation.
 - work_id: When configuring the unit, it is `camera`.
-- action: The method being called is `setup`.
-- object: The data type being transmitted is `camera.setup`.
-- response_format: The returned result is `camera.raw`, which is in YUV422 format.
-- input: The name of the device being read.
-- frame_width: The output video frame width.
-- frame_height: The output video frame height.
-- enoutput: Whether to enable the user result output. If camera images are not needed, do not enable this parameter, as
-  the video stream will increase the communication load of the channel.
+- action: The method called is `setup`.
+- object: The type of data transmitted is `camera.setup`.
+- response_format: The output format is `image.yuyv422.base64`, which is in yuyv422 format. An optional format is image.jpeg.base64.
+- input: The device name to be read. Example: "/dev/video0", "axera_single_sc850sl"
+- frame_width: The width of the video frame output.
+- frame_height: The height of the video frame output.
+- enoutput: Whether to enable user result output. If you do not need to obtain camera images, do not enable this parameter, as the video stream will increase the communication pressure on the channel.
+- enable_webstream: Whether to enable webstream output, webstream will listen on tcp:8989 port, and once a client connection is received, it will push jpeg images in HTTP protocol multipart/x-mixed-replace type.
+- rtsp: Whether to enable rtsp stream output, rtsp will establish an RTSP TCP server at rtsp://{DevIp}:8554/axstream0, and you can pull the video stream from this port using the RTSP protocol. The video stream format is 1280x720 H265. Note that this video stream is only valid on the AX630C MIPI camera, and the UVC camera cannot use RTSP.
 
 Response JSON:
 
@@ -137,7 +140,7 @@ Response JSON:
   "created": 1731652344,
   "data": {
     "enoutput": false,
-    "response_format": "camera.raw",
+    "response_format": "image.yuyv422.base64",
     "input": "/dev/video0",
     "frame_width": 320,
     "frame_height": 320
diff --git a/doc/projects_llm_framework_doc/llm_camera_zh.md b/doc/projects_llm_framework_doc/llm_camera_zh.md
index 3610a81a..5675df39 100644
--- a/doc/projects_llm_framework_doc/llm_camera_zh.md
+++ b/doc/projects_llm_framework_doc/llm_camera_zh.md
@@ -15,11 +15,13 @@
   "action": "setup",
   "object": "camera.setup",
   "data": {
-    "response_format": "camera.raw",
+    "response_format": "image.yuyv422.base64",
     "input": "/dev/video0",
     "enoutput": false,
     "frame_width": 320,
-    "frame_height": 320
+    "frame_height": 320,
+    "enable_webstream":false,
+    "rtsp":"rtsp.1280x720.h265"
   }
 }
 ```
@@ -28,11 +30,13 @@
 - work_id：配置单元时，为 `camera`。
 - action：调用的方法为 `setup`。
 - object：传输的数据类型为 `camera.setup`。
-- response_format：返回结果为 `camera.raw`，是 yuv422 格式。
-- input：读取的设备名。
+- response_format：返回结果为 `image.yuyv422.base64`，是 yuyv422 格式。可选 image.jpeg.base64 格式输出。
+- input：读取的设备名。示例："/dev/video0", "axera_single_sc850sl"
 - frame_width：输出的视频帧宽。
 - frame_height：输出的视频帧高。
 - enoutput：是否起用用户结果输出。如果不需要获取摄像头图片，请不要开启该参数，视频流会增加信道的通信压力。
+- enable_webstream：是否启用 webstream 流输出，webstream 会监听 tcp:8989 端口，一但收到客户端连接，将会以 HTTP 协议 multipart/x-mixed-replace 类型推送 jpeg 图片。
+- rtsp：是否启用 rtsp 流输出，rtsp 会建立一个 rtsp://{DevIp}:8554/axstream0 RTSP TCP 服务端，可使用RTSP 协议向该端口拉取视频流。视频流的格式为 1280x720 H265。注意，该视频流只在 AX630C MIPI 摄像头上有效，UVC 摄像头无法使用 RTSP。
 
 响应 json：
 
@@ -136,7 +140,7 @@ error::code 为 0 表示执行成功。
   "created": 1731652344,
   "data": {
     "enoutput": false,
-    "response_format": "camera.raw",
+    "response_format": "image.yuyv422.base64",
     "input": "/dev/video0",
     "frame_width": 320,
     "frame_height": 320
@@ -151,4 +155,32 @@ error::code 为 0 表示执行成功。
 }
 ```
 
+获取本机的摄像头列表。
+
+发送 json：
+
+```json
+{
+  "request_id": "2",
+  "work_id": "camera",
+  "action": "list_camera"
+}
+```
+
+响应 json：
+
+```json
+{
+  "created":1746515639,
+  "data":[],
+  "error":{
+    "code":0,
+    "message":""
+    },
+  "object":"camera.devices",
+  "request_id":"2",
+  "work_id":"camera"
+}
+```
+
 > **注意：work_id 是按照单元的初始化注册顺序增加的，并不是固定的索引值。**  
\ No newline at end of file
diff --git a/doc/projects_llm_framework_doc/llm_depth_anything_en.md b/doc/projects_llm_framework_doc/llm_depth_anything_en.md
index 301cc543..b1d0b51c 100644
--- a/doc/projects_llm_framework_doc/llm_depth_anything_en.md
+++ b/doc/projects_llm_framework_doc/llm_depth_anything_en.md
@@ -27,7 +27,7 @@ Send JSON:
 - work_id: When configuring the unit, it is `depth_anything`.
 - action: The method called is `setup`.
 - object: The data type being transmitted is `depth_anything.setup`.
-- model: The model used is the `depth_anything` model.
+- model: The model used is the `depth-anything-ax630c` model.
 - response_format: The return result is `jpeg.base64.stream`.
 - input: The input is `camera.1001`, which refers to the input from the camera unit, as detailed in the camera unit
   documentation.
diff --git a/doc/projects_llm_framework_doc/llm_depth_anything_zh.md b/doc/projects_llm_framework_doc/llm_depth_anything_zh.md
index 846ca34f..85f07f44 100644
--- a/doc/projects_llm_framework_doc/llm_depth_anything_zh.md
+++ b/doc/projects_llm_framework_doc/llm_depth_anything_zh.md
@@ -27,7 +27,7 @@ depth_anything 视觉单元，用于提供图片深度信息。
 - work_id：配置单元时，为 `depth_anything`。
 - action：调用的方法为 `setup`。
 - object：传输的数据类型为 `depth_anything.setup`。
-- model：使用的模型为 `depth_anything` 模型。
+- model：使用的模型为 `depth-anything-ax630c` 模型。
 - response_format：返回结果为 `jpeg.base64.stream`。
 - input：输入的为 `camera.1001`，代表的是从 camera 单元内部输入，详见 camera 单位文档。
 - enoutput：是否启用用户结果输出。
diff --git a/doc/projects_llm_framework_doc/llm_kws_en.md b/doc/projects_llm_framework_doc/llm_kws_en.md
index 7504e264..4ab85b47 100644
--- a/doc/projects_llm_framework_doc/llm_kws_en.md
+++ b/doc/projects_llm_framework_doc/llm_kws_en.md
@@ -16,11 +16,12 @@ Send JSON:
   "action": "setup",
   "object": "kws.setup",
   "data": {
-    "model": "sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01",
+    "model": "sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01",
     "response_format": "kws.bool",
     "input": "sys.pcm",
     "enoutput": true,
-    "kws": "你好你好"
+    "kws": "HELLO",
+    "enwake_audio": true
   }
 }
 ```
@@ -29,11 +30,12 @@ Send JSON:
 - work_id: When configuring the unit, it is `kws`.
 - action: The method called is `setup`.
 - object: The type of data being transmitted is `kws.setup`.
-- model: The model used is the Chinese model `sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01`.
+- model: The model used is the Chinese model `sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01`.
 - response_format: The result returned is in `kws.bool` format.
 - input: The input is `sys.pcm`, representing system audio.
 - enoutput: Whether to enable user result output.
 - kws: The Chinese wake-up word is `"你好你好"`.
+- enwake_audio: Whether to enable wake-up audio output. Default is true.
 
 Response JSON:
 
@@ -204,7 +206,7 @@ Response JSON:
     "inputs_": [
       "sys.pcm"
     ],
-    "model": "sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01",
+    "model": "sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01",
     "response_format": "kws.bool"
   },
   "error": {
diff --git a/doc/projects_llm_framework_doc/llm_kws_zh.md b/doc/projects_llm_framework_doc/llm_kws_zh.md
index 86dcbfae..5bda61b4 100644
--- a/doc/projects_llm_framework_doc/llm_kws_zh.md
+++ b/doc/projects_llm_framework_doc/llm_kws_zh.md
@@ -19,7 +19,8 @@
     "response_format": "kws.bool",
     "input": "sys.pcm",
     "enoutput": true,
-    "kws": "你好你好"
+    "kws": "你好你好",
+    "enwake_audio": true
   }
 }
 ```
@@ -33,6 +34,7 @@
 - input：输入的为 `sys.pcm`,代表的是系统音频。
 - enoutput：是否起用用户结果输出。
 - kws：中文唤醒词为 `"你好你好"`。
+- enwake_audio：是否起用唤醒音频输出。默认是 true
 
 响应 json：
 
diff --git a/doc/projects_llm_framework_doc/llm_melotts_en.md b/doc/projects_llm_framework_doc/llm_melotts_en.md
index f45eb0e2..08222dfc 100644
--- a/doc/projects_llm_framework_doc/llm_melotts_en.md
+++ b/doc/projects_llm_framework_doc/llm_melotts_en.md
@@ -16,7 +16,7 @@ Send JSON:
   "action": "setup",
   "object": "melotts.setup",
   "data": {
-    "model": "melotts_zh-cn",
+    "model": "melotts-en-us",
     "response_format": "sys.pcm",
     "input": "tts.utf-8",
     "enoutput": false
@@ -28,7 +28,7 @@ Send JSON:
 - work_id: For configuration, it is `melotts`.
 - action: The method to be called is `setup`.
 - object: The data type being transmitted is `melotts.setup`.
-- model: The model being used is the Chinese model `melotts_zh-cn`.
+- model: The model being used is the English model `melotts-en-us`.
 - response_format: The result is returned as `sys.pcm`, system audio data, which is directly sent to the llm-audio
   module for playback.
 - input: The input is `tts.utf-8`, representing user input.
@@ -139,7 +139,7 @@ Example:
   "action": "setup",
   "object": "melotts.setup",
   "data": {
-    "model": "melotts_zh-cn",
+    "model": "melotts-en-us",
     "response_format": "sys.pcm",
     "input": [
       "tts.utf-8",
@@ -335,7 +335,7 @@ Response JSON:
     "inputs_": [
       "tts.utf-8"
     ],
-    "model": "melotts_zh-cn",
+    "model": "melotts-en-us",
     "response_format": "sys.pcm"
   },
   "error": {
diff --git a/doc/projects_llm_framework_doc/llm_melotts_zh.md b/doc/projects_llm_framework_doc/llm_melotts_zh.md
index 7d23b510..8c86e827 100644
--- a/doc/projects_llm_framework_doc/llm_melotts_zh.md
+++ b/doc/projects_llm_framework_doc/llm_melotts_zh.md
@@ -15,7 +15,7 @@
   "action": "setup",
   "object": "melotts.setup",
   "data": {
-    "model": "melotts_zh-cn",
+    "model": "melotts-zh-cn",
     "response_format": "sys.pcm",
     "input": "tts.utf-8",
     "enoutput": false
@@ -27,7 +27,7 @@
 - work_id：配置单元时，为 `melotts`。
 - action：调用的方法为 `setup`。
 - object：传输的数据类型为 `melotts.setup`。
-- model：使用的模型为 `melotts_zh-cn` 中文模型。
+- model：使用的模型为 `melotts-zh-cn` 中文模型。
 - response_format：返回结果为 `sys.pcm`, 系统音频数据，并直接发送到 llm-audio 模块进行播放。
 - input：输入的为 `tts.utf-8`,代表的是从用户输入。
 - enoutput：是否起用用户结果输出。
@@ -134,7 +134,7 @@ error::code 为 0 表示执行成功。
   "action": "setup",
   "object": "melotts.setup",
   "data": {
-    "model": "melotts_zh-cn",
+    "model": "melotts-zh-cn",
     "response_format": "sys.pcm",
     "input": [
       "tts.utf-8",
@@ -328,7 +328,7 @@ error::code 为 0 表示执行成功。
     "inputs_": [
       "tts.utf-8"
     ],
-    "model": "melotts_zh-cn",
+    "model": "melotts-zh-cn",
     "response_format": "sys.pcm"
   },
   "error": {
diff --git a/doc/projects_llm_framework_doc/llm_tts_en.md b/doc/projects_llm_framework_doc/llm_tts_en.md
index 78338289..61d0e45e 100644
--- a/doc/projects_llm_framework_doc/llm_tts_en.md
+++ b/doc/projects_llm_framework_doc/llm_tts_en.md
@@ -16,7 +16,7 @@ Send JSON:
   "action": "setup",
   "object": "tts.setup",
   "data": {
-    "model": "single_speaker_fast",
+    "model": "single-speaker-english-fast",
     "response_format": "sys.pcm",
     "input": "tts.utf-8",
     "enoutput": false
@@ -28,7 +28,7 @@ Send JSON:
 - work_id: For configuring the unit, it is `tts`.
 - action: The method to call is `setup`.
 - object: The type of data being transmitted is `tts.setup`.
-- model: The model used is the `single_speaker_fast` Chinese model.
+- model: The model used is the `single-speaker-english-fast` English model.
 - response_format: The returned result is `sys.pcm`, system audio data, which is directly sent to the llm-audio module
   for playback.
 - input: Input is `tts.utf-8`, representing user input.
@@ -46,13 +46,50 @@ Response JSON:
   },
   "object": "None",
   "request_id": "2",
-  "work_id": "llm.1003"
+  "work_id": "tts.1003"
 }
 ```
 
 - created: Message creation time, in Unix time.
 - work_id: The successfully created work_id unit.
 
+## inference
+
+### streaming input
+
+```json
+{
+    "request_id": "2",
+    "work_id": "tts.1003",
+    "action": "inference",
+    "object": "tts.utf-8.stream",
+    "data": {
+        "delta": "What's ur name?",
+        "index": 0,
+        "finish": true
+    }
+}
+```
+- object: The data type transmitted is tts.utf-8.stream, indicating a streaming input from the user's UTF-8.
+- delta: Segment data of the streaming input.
+- index: Index of the segment in the streaming input.
+- finish: A flag indicating whether the streaming input has completed.
+
+### non-streaming input
+
+```json
+{
+    "request_id": "2",
+    "work_id": "tts.1003",
+    "action": "inference",
+    "object": "tts.utf-8",
+    "data": "What's ur name?"
+}
+```
+
+- object: The data type transmitted is tts.utf-8, indicating a non-streaming input from the user's UTF-8.
+- data: Data for non-streaming input.
+
 ## link
 
 Link the output of the upper unit.
@@ -102,7 +139,7 @@ Example:
   "action": "setup",
   "object": "tts.setup",
   "data": {
-    "model": "single_speaker_fast",
+    "model": "single-speaker-fast",
     "response_format": "sys.pcm",
     "input": [
       "tts.utf-8",
@@ -298,7 +335,7 @@ Response JSON:
     "inputs_": [
       "tts.utf-8"
     ],
-    "model": "single_speaker_fast",
+    "model": "single-speaker-fast",
     "response_format": "sys.pcm"
   },
   "error": {
diff --git a/doc/projects_llm_framework_doc/llm_tts_zh.md b/doc/projects_llm_framework_doc/llm_tts_zh.md
index 719d4fbb..7e4ab189 100644
--- a/doc/projects_llm_framework_doc/llm_tts_zh.md
+++ b/doc/projects_llm_framework_doc/llm_tts_zh.md
@@ -15,7 +15,7 @@
   "action": "setup",
   "object": "tts.setup",
   "data": {
-    "model": "single_speaker_fast",
+    "model": "single-speaker-fast",
     "response_format": "sys.pcm",
     "input": "tts.utf-8",
     "enoutput": false
@@ -27,7 +27,7 @@
 - work_id：配置单元时，为 `tts`。
 - action：调用的方法为 `setup`。
 - object：传输的数据类型为 `tts.setup`。
-- model：使用的模型为 `single_speaker_fast` 中文模型。
+- model：使用的模型为 `single-speaker-fast` 中文模型。
 - response_format：返回结果为 `sys.pcm`, 系统音频数据，并直接发送到 llm-audio 模块进行播放。
 - input：输入的为 `tts.utf-8`,代表的是从用户输入。
 - enoutput：是否起用用户结果输出。
@@ -44,13 +44,49 @@
   },
   "object": "None",
   "request_id": "2",
-  "work_id": "llm.1003"
+  "work_id": "tts.1003"
 }
 ```
 
 - created：消息创建时间，unix 时间。
 - work_id：返回成功创建的 work_id 单元。
 
+## inference
+
+### 流式输入
+
+```json
+{
+    "request_id": "2",
+    "work_id": "tts.1003",
+    "action": "inference",
+    "object": "tts.utf-8.stream",
+    "data": {
+        "delta": "今天天气真好！",
+        "index": 0,
+        "finish": true
+    }
+}
+```
+- object：传输的数据类型为 `tts.utf-8.stream` 代表的是从用户 utf-8 的流式输入
+- delta：流式输入的分段数据
+- index：流式输入的分段索引
+- finish:流式输入是否完成的标志位
+
+### 非流式输入
+
+```json
+{
+    "request_id": "2",
+    "work_id": "tts.1003",
+    "action": "inference",
+    "object": "tts.utf-8",
+    "data": "今天天气真好！"
+}
+```
+- object：传输的数据类型为 `tts.utf-8` 代表的是从用户 utf-8 的非流式输入
+- data：非流式输入的数据
+
 ## link
 
 链接上级单元的输出。
@@ -98,7 +134,7 @@ error::code 为 0 表示执行成功。
   "action": "setup",
   "object": "tts.setup",
   "data": {
-    "model": "single_speaker_fast",
+    "model": "single-speaker-fast",
     "response_format": "sys.pcm",
     "input": [
       "tts.utf-8",
@@ -294,7 +330,7 @@ error::code 为 0 表示执行成功。
     "inputs_": [
       "tts.utf-8"
     ],
-    "model": "single_speaker_fast",
+    "model": "single-speaker-fast",
     "response_format": "sys.pcm"
   },
   "error": {
diff --git a/ext_components/StackFlow/stackflow/StackFlow.cpp b/ext_components/StackFlow/stackflow/StackFlow.cpp
index 236cc7ab..2991162e 100644
--- a/ext_components/StackFlow/stackflow/StackFlow.cpp
+++ b/ext_components/StackFlow/stackflow/StackFlow.cpp
@@ -14,7 +14,8 @@ std::string llm_channel_obj::uart_push_url;
 #define RPC_PARSE_TO_PARAM_OLD(obj) \
     sample_json_str_get(obj, "zmq_com"), sample_unescapeString(sample_json_str_get(obj, "raw_data"))
 
-#define RPC_PARSE_TO_PARAM(obj) RPC_PARSE_TO_FIRST(obj), RPC_PARSE_TO_SECOND(obj)
+#define RPC_PARSE_TO_PARAM(obj)     RPC_PARSE_TO_FIRST(obj), RPC_PARSE_TO_SECOND(obj)
+#define PTR_RPC_PARSE_TO_PARAM(obj) PTR_RPC_PARSE_TO_FIRST(obj), PTR_RPC_PARSE_TO_SECOND(obj)
 
 llm_channel_obj::llm_channel_obj(const std::string &_publisher_url, const std::string &inference_url,
                                  const std::string &unit_name)
@@ -30,23 +31,24 @@ llm_channel_obj::~llm_channel_obj()
 }
 
 void llm_channel_obj::subscriber_event_call(const std::function<void(const std::string &, const std::string &)> &call,
-                                            pzmq *_pzmq, const std::string &raw)
+                                            pzmq *_pzmq, const std::shared_ptr<pzmq_data> &raw)
 {
+    auto _raw                            = raw->string();
     const char *user_inference_flage_str = "\"action\"";
-    std::size_t pos                      = raw.find(user_inference_flage_str);
+    std::size_t pos                      = _raw.find(user_inference_flage_str);
     while (true) {
         if (pos == std::string::npos) {
             break;
-        } else if ((pos > 0) && (raw[pos - 1] != '\\')) {
-            std::string zmq_com = sample_json_str_get(raw, "zmq_com");
+        } else if ((pos > 0) && (_raw[pos - 1] != '\\')) {
+            std::string zmq_com = sample_json_str_get(_raw, "zmq_com");
             if (!zmq_com.empty()) set_push_url(zmq_com);
-            request_id_ = sample_json_str_get(raw, "request_id");
-            work_id_    = sample_json_str_get(raw, "work_id");
+            request_id_ = sample_json_str_get(_raw, "request_id");
+            work_id_    = sample_json_str_get(_raw, "work_id");
             break;
         }
-        pos = raw.find(user_inference_flage_str, pos + sizeof(user_inference_flage_str));
+        pos = _raw.find(user_inference_flage_str, pos + sizeof(user_inference_flage_str));
     }
-    call(sample_json_str_get(raw, "object"), sample_json_str_get(raw, "data"));
+    call(sample_json_str_get(_raw, "object"), sample_json_str_get(_raw, "data"));
 }
 
 int llm_channel_obj::subscriber_work_id(const std::string &work_id,
@@ -151,26 +153,16 @@ int llm_channel_obj::output_to_uart(const std::string &data)
 StackFlow::StackFlow::StackFlow(const std::string &unit_name)
     : work_id_num_cout_(1000), unit_name_(unit_name), rpc_ctx_(std::make_unique<pzmq>(unit_name))
 {
-    event_queue_.appendListener(EVENT_NONE,
-                                std::bind(&StackFlow::_none_event, this, std::placeholders::_1, std::placeholders::_2));
-    event_queue_.appendListener(EVENT_PAUSE,
-                                std::bind(&StackFlow::_pause, this, std::placeholders::_1, std::placeholders::_2));
-    event_queue_.appendListener(EVENT_WORK,
-                                std::bind(&StackFlow::_work, this, std::placeholders::_1, std::placeholders::_2));
-    event_queue_.appendListener(EVENT_EXIT,
-                                std::bind(&StackFlow::_exit, this, std::placeholders::_1, std::placeholders::_2));
-    event_queue_.appendListener(EVENT_SETUP,
-                                std::bind(&StackFlow::_setup, this, std::placeholders::_1, std::placeholders::_2));
-    event_queue_.appendListener(EVENT_LINK,
-                                std::bind(&StackFlow::_link, this, std::placeholders::_1, std::placeholders::_2));
-    event_queue_.appendListener(EVENT_UNLINK,
-                                std::bind(&StackFlow::_unlink, this, std::placeholders::_1, std::placeholders::_2));
-    event_queue_.appendListener(EVENT_TASKINFO,
-                                std::bind(&StackFlow::_taskinfo, this, std::placeholders::_1, std::placeholders::_2));
-    event_queue_.appendListener(EVENT_SYS_INIT,
-                                std::bind(&StackFlow::_sys_init, this, std::placeholders::_1, std::placeholders::_2));
-    event_queue_.appendListener(
-        EVENT_REPEAT_EVENT, std::bind(&StackFlow::_repeat_loop, this, std::placeholders::_1, std::placeholders::_2));
+    event_queue_.appendListener(EVENT_NONE, std::bind(&StackFlow::_none_event, this, std::placeholders::_1));
+    event_queue_.appendListener(EVENT_PAUSE, std::bind(&StackFlow::_pause, this, std::placeholders::_1));
+    event_queue_.appendListener(EVENT_WORK, std::bind(&StackFlow::_work, this, std::placeholders::_1));
+    event_queue_.appendListener(EVENT_EXIT, std::bind(&StackFlow::_exit, this, std::placeholders::_1));
+    event_queue_.appendListener(EVENT_SETUP, std::bind(&StackFlow::_setup, this, std::placeholders::_1));
+    event_queue_.appendListener(EVENT_LINK, std::bind(&StackFlow::_link, this, std::placeholders::_1));
+    event_queue_.appendListener(EVENT_UNLINK, std::bind(&StackFlow::_unlink, this, std::placeholders::_1));
+    event_queue_.appendListener(EVENT_TASKINFO, std::bind(&StackFlow::_taskinfo, this, std::placeholders::_1));
+    event_queue_.appendListener(EVENT_SYS_INIT, std::bind(&StackFlow::_sys_init, this, std::placeholders::_1));
+    event_queue_.appendListener(EVENT_REPEAT_EVENT, std::bind(&StackFlow::_repeat_loop, this, std::placeholders::_1));
     rpc_ctx_->register_rpc_action(
         "setup", std::bind(&StackFlow::_rpc_setup, this, std::placeholders::_1, std::placeholders::_2));
     rpc_ctx_->register_rpc_action(
@@ -215,7 +207,7 @@ StackFlow::~StackFlow()
         llm_task_channel_.erase(iteam->first);
     }
     exit_flage_.store(true);
-    event_queue_.enqueue(EVENT_NONE, "", "");
+    event_queue_.enqueue(EVENT_NONE, nullptr);
     even_loop_thread_->join();
 }
 
@@ -227,18 +219,19 @@ void StackFlow::even_loop()
     }
 }
 
-void StackFlow::_none_event(const std::string &data1, const std::string &data2)
+void StackFlow::_none_event(const std::shared_ptr<void> &arg)
 {
+    // std::shared_ptr<stackflow_data> originalPtr = std::static_pointer_cast<stackflow_data>(arg);
 }
 
-void StackFlow::_sys_init(const std::string &zmq_url, const std::string &data)
+void StackFlow::_sys_init(const std::shared_ptr<void> &arg)
 {
     // todo:...
 }
 
-std::string StackFlow::_rpc_setup(pzmq *_pzmq, const std::string &data)
+std::string StackFlow::_rpc_setup(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &data)
 {
-    event_queue_.enqueue(EVENT_SETUP, RPC_PARSE_TO_PARAM(data));
+    event_queue_.enqueue(EVENT_SETUP, std::make_shared<stackflow_data>(data->get_param(0), data->get_param(1)));
     return std::string("None");
 }
 
@@ -270,9 +263,9 @@ int StackFlow::setup(const std::string &work_id, const std::string &object, cons
     return -1;
 }
 
-std::string StackFlow::_rpc_link(pzmq *_pzmq, const std::string &data)
+std::string StackFlow::_rpc_link(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &data)
 {
-    event_queue_.enqueue(EVENT_LINK, RPC_PARSE_TO_PARAM(data));
+    event_queue_.enqueue(EVENT_LINK, std::make_shared<stackflow_data>(data->get_param(0), data->get_param(1)));
     return std::string("None");
 }
 
@@ -301,9 +294,9 @@ void StackFlow::link(const std::string &work_id, const std::string &object, cons
     send("None", "None", error_body, work_id);
 }
 
-std::string StackFlow::_rpc_unlink(pzmq *_pzmq, const std::string &data)
+std::string StackFlow::_rpc_unlink(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &data)
 {
-    event_queue_.enqueue(EVENT_UNLINK, RPC_PARSE_TO_PARAM(data));
+    event_queue_.enqueue(EVENT_UNLINK, std::make_shared<stackflow_data>(data->get_param(0), data->get_param(1)));
     return std::string("None");
 }
 
@@ -332,9 +325,9 @@ void StackFlow::unlink(const std::string &work_id, const std::string &object, co
     send("None", "None", error_body, work_id);
 }
 
-std::string StackFlow::_rpc_work(pzmq *_pzmq, const std::string &data)
+std::string StackFlow::_rpc_work(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &data)
 {
-    event_queue_.enqueue(EVENT_WORK, RPC_PARSE_TO_PARAM(data));
+    event_queue_.enqueue(EVENT_WORK, std::make_shared<stackflow_data>(data->get_param(0), data->get_param(1)));
     return std::string("None");
 }
 
@@ -363,9 +356,9 @@ void StackFlow::work(const std::string &work_id, const std::string &object, cons
     send("None", "None", error_body, work_id);
 }
 
-std::string StackFlow::_rpc_exit(pzmq *_pzmq, const std::string &data)
+std::string StackFlow::_rpc_exit(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &data)
 {
-    event_queue_.enqueue(EVENT_EXIT, RPC_PARSE_TO_PARAM(data));
+    event_queue_.enqueue(EVENT_EXIT, std::make_shared<stackflow_data>(data->get_param(0), data->get_param(1)));
     return std::string("None");
 }
 
@@ -397,9 +390,9 @@ int StackFlow::exit(const std::string &work_id, const std::string &object, const
     return 0;
 }
 
-std::string StackFlow::_rpc_pause(pzmq *_pzmq, const std::string &data)
+std::string StackFlow::_rpc_pause(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &data)
 {
-    event_queue_.enqueue(EVENT_PAUSE, RPC_PARSE_TO_PARAM(data));
+    event_queue_.enqueue(EVENT_PAUSE, std::make_shared<stackflow_data>(data->get_param(0), data->get_param(1)));
     return std::string("None");
 }
 
@@ -428,15 +421,15 @@ void StackFlow::pause(const std::string &work_id, const std::string &object, con
     send("None", "None", error_body, work_id);
 }
 
-std::string StackFlow::_rpc_taskinfo(pzmq *_pzmq, const std::string &data)
+std::string StackFlow::_rpc_taskinfo(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &data)
 {
-    event_queue_.enqueue(EVENT_TASKINFO, RPC_PARSE_TO_PARAM(data));
+    event_queue_.enqueue(EVENT_TASKINFO, std::make_shared<stackflow_data>(data->get_param(0), data->get_param(1)));
     return std::string("None");
 }
 
 void StackFlow::taskinfo(const std::string &zmq_url, const std::string &raw)
 {
-    SLOGI("StackFlow::taskinfo raw");
+    // SLOGI("StackFlow::taskinfo raw");
     std::string work_id = sample_json_str_get(raw, "work_id");
     try {
         auto task_channel = get_channel(sample_get_work_id_num(work_id));
@@ -448,7 +441,7 @@ void StackFlow::taskinfo(const std::string &zmq_url, const std::string &raw)
 
 void StackFlow::taskinfo(const std::string &work_id, const std::string &object, const std::string &data)
 {
-    SLOGI("StackFlow::taskinfo");
+    // SLOGI("StackFlow::taskinfo");
     if (_taskinfo_) {
         _taskinfo_(work_id, object, data);
         return;
@@ -462,13 +455,17 @@ void StackFlow::taskinfo(const std::string &work_id, const std::string &object,
 int StackFlow::sys_register_unit(const std::string &unit_name)
 {
     int work_id_number;
-    std::string component_msg  = unit_call("sys", "register_unit", unit_name);
-    std::string str_port       = RPC_PARSE_TO_FIRST(component_msg);
-    work_id_number             = std::stoi(str_port);
-    std::string tmp_buf        = RPC_PARSE_TO_SECOND(component_msg);
-    std::string out_port       = RPC_PARSE_TO_FIRST(tmp_buf);
-    std::string inference_port = RPC_PARSE_TO_SECOND(tmp_buf);
-
+    std::string str_port;
+    std::string out_port;
+    std::string inference_port;
+
+    unit_call("sys", "register_unit", unit_name, [&](const std::shared_ptr<StackFlows::pzmq_data> &pzmg_msg) {
+        str_port       = pzmg_msg->get_param(1);
+        out_port       = pzmg_msg->get_param(0, str_port);
+        inference_port = pzmg_msg->get_param(1, str_port);
+        str_port       = pzmg_msg->get_param(0);
+    });
+    work_id_number = std::stoi(str_port);
     SLOGI("work_id_number:%d, out_port:%s, inference_port:%s ", work_id_number, out_port.c_str(),
           inference_port.c_str());
     llm_task_channel_[work_id_number] = std::make_shared<llm_channel_obj>(out_port, inference_port, unit_name_);
@@ -511,20 +508,23 @@ void StackFlow::sys_sql_unset(const std::string &key)
     unit_call("sys", "sql_unset", key);
 }
 
-void StackFlow::_repeat_loop(const std::string &action, const std::string &ms)
+void StackFlow::_repeat_loop(const std::shared_ptr<void> &arg)
 {
+    std::shared_ptr<stackflow_data> originalPtr = std::static_pointer_cast<stackflow_data>(arg);
+    std::string action                          = originalPtr->string(0);
+    int ms                                      = originalPtr->integer(0);
     repeat_callback_fun_mutex_.lock();
     const auto call_fun = repeat_callback_fun_[action];
     repeat_callback_fun_mutex_.unlock();
     if (call_fun()) {
-        int delayms = std::stoi(ms);
+        int delayms = ms;
         if (delayms)
             std::thread([this, action, delayms, ms]() {
                 std::this_thread::sleep_for(std::chrono::milliseconds(delayms));
-                this->event_queue_.enqueue(EVENT_REPEAT_EVENT, action, ms);
+                this->event_queue_.enqueue(EVENT_REPEAT_EVENT, std::make_shared<stackflow_data>(action, ms));
             }).detach();
         else {
-            event_queue_.enqueue(EVENT_REPEAT_EVENT, action, ms);
+            event_queue_.enqueue(EVENT_REPEAT_EVENT, std::make_shared<stackflow_data>(action, ms));
         }
     } else {
         repeat_callback_fun_mutex_.lock();
@@ -542,9 +542,9 @@ void StackFlow::repeat_event(int ms, std::function<int(void)> repeat_fun, bool n
     if (!now)
         std::thread([this, action, ms]() {
             std::this_thread::sleep_for(std::chrono::milliseconds(ms));
-            this->event_queue_.enqueue(EVENT_REPEAT_EVENT, action, std::to_string(ms));
+            this->event_queue_.enqueue(EVENT_REPEAT_EVENT, std::make_shared<stackflow_data>(action, ms));
         }).detach();
     else {
-        event_queue_.enqueue(EVENT_REPEAT_EVENT, action, std::to_string(ms));
+        event_queue_.enqueue(EVENT_REPEAT_EVENT, std::make_shared<stackflow_data>(action, ms));
     }
 }
diff --git a/ext_components/StackFlow/stackflow/StackFlow.h b/ext_components/StackFlow/stackflow/StackFlow.h
index 2098458c..25ac6743 100644
--- a/ext_components/StackFlow/stackflow/StackFlow.h
+++ b/ext_components/StackFlow/stackflow/StackFlow.h
@@ -98,7 +98,7 @@ class llm_channel_obj {
         return enstream_;
     }
     void subscriber_event_call(const std::function<void(const std::string &, const std::string &)> &call, pzmq *_pzmq,
-                               const std::string &raw);
+                               const std::shared_ptr<pzmq_data> &raw);
     int subscriber_work_id(const std::string &work_id,
                            const std::function<void(const std::string &, const std::string &)> &call);
     void stop_subscriber_work_id(const std::string &work_id);
@@ -185,6 +185,37 @@ class llm_channel_obj {
     }
 };
 
+class stackflow_data {
+public:
+    stackflow_data()
+    {
+    }
+    stackflow_data(const std::string &_data1)
+    {
+        str_data[0] = _data1;
+    }
+    stackflow_data(const std::string &_data1, const std::string &_data2)
+    {
+        str_data[0] = _data1;
+        str_data[1] = _data2;
+    }
+    stackflow_data(const std::string &_data1, int _data2)
+    {
+        str_data[0] = _data1;
+        int_data[0] = _data2;
+    }
+    std::string string(int index = 0)
+    {
+        return str_data[index];
+    }
+    int integer(int index = 0)
+    {
+        return int_data[index];
+    }
+    std::string str_data[2];
+    int int_data[2];
+};
+
 class StackFlow {
 private:
     std::atomic_int work_id_num_cout_;
@@ -209,7 +240,7 @@ class StackFlow {
         EVENT_EXPORT,
     } local_event_t;
 
-    eventpp::EventQueue<int, void(const std::string &, const std::string &)> event_queue_;
+    eventpp::EventQueue<int, void(const std::shared_ptr<void> &)> event_queue_;
     std::unique_ptr<std::thread> even_loop_thread_;
     std::unique_ptr<pzmq> rpc_ctx_;
     std::atomic<int> status_;
@@ -217,7 +248,7 @@ class StackFlow {
     std::unordered_map<std::string, std::function<int(void)>> repeat_callback_fun_;
     std::mutex repeat_callback_fun_mutex_;
 
-    void _repeat_loop(const std::string &zmq_url, const std::string &raw);
+    void _repeat_loop(const std::shared_ptr<void> &arg);
 
 public:
     std::string request_id_;
@@ -236,7 +267,7 @@ class StackFlow {
 
     StackFlow(const std::string &unit_name);
     void even_loop();
-    void _none_event(const std::string &data1, const std::string &data2);
+    void _none_event(const std::shared_ptr<void> &arg);
 
     template <typename T>
     std::shared_ptr<llm_channel_obj> get_channel(T workid)
@@ -252,9 +283,12 @@ class StackFlow {
         return llm_task_channel_.at(_work_id_num);
     }
 
-    std::string _rpc_setup(pzmq *_pzmq, const std::string &data);
-    void _setup(const std::string &zmq_url, const std::string &data)
+    std::string _rpc_setup(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &data);
+    void _setup(const std::shared_ptr<void> &arg)
     {
+        std::shared_ptr<stackflow_data> originalPtr = std::static_pointer_cast<stackflow_data>(arg);
+        std::string zmq_url                         = originalPtr->string(0);
+        std::string data                            = originalPtr->string(1);
         // printf("void _setup run \n");
         request_id_  = sample_json_str_get(data, "request_id");
         out_zmq_url_ = zmq_url;
@@ -263,9 +297,12 @@ class StackFlow {
     virtual int setup(const std::string &zmq_url, const std::string &raw);
     virtual int setup(const std::string &work_id, const std::string &object, const std::string &data);
 
-    std::string _rpc_link(pzmq *_pzmq, const std::string &data);
-    void _link(const std::string &zmq_url, const std::string &data)
+    std::string _rpc_link(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &data);
+    void _link(const std::shared_ptr<void> &arg)
     {
+        std::shared_ptr<stackflow_data> originalPtr = std::static_pointer_cast<stackflow_data>(arg);
+        std::string zmq_url                         = originalPtr->string(0);
+        std::string data                            = originalPtr->string(1);
         // printf("void _link run \n");
         request_id_  = sample_json_str_get(data, "request_id");
         out_zmq_url_ = zmq_url;
@@ -274,9 +311,12 @@ class StackFlow {
     virtual void link(const std::string &zmq_url, const std::string &raw);
     virtual void link(const std::string &work_id, const std::string &object, const std::string &data);
 
-    std::string _rpc_unlink(pzmq *_pzmq, const std::string &data);
-    void _unlink(const std::string &zmq_url, const std::string &data)
+    std::string _rpc_unlink(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &data);
+    void _unlink(const std::shared_ptr<void> &arg)
     {
+        std::shared_ptr<stackflow_data> originalPtr = std::static_pointer_cast<stackflow_data>(arg);
+        std::string zmq_url                         = originalPtr->string(0);
+        std::string data                            = originalPtr->string(1);
         // printf("void _unlink run \n");
         request_id_  = sample_json_str_get(data, "request_id");
         out_zmq_url_ = zmq_url;
@@ -285,47 +325,59 @@ class StackFlow {
     virtual void unlink(const std::string &zmq_url, const std::string &raw);
     virtual void unlink(const std::string &work_id, const std::string &object, const std::string &data);
 
-    std::string _rpc_exit(pzmq *_pzmq, const std::string &data);
-    void _exit(const std::string &zmq_url, const std::string &data)
+    std::string _rpc_exit(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &data);
+    void _exit(const std::shared_ptr<void> &arg)
     {
-        request_id_  = sample_json_str_get(data, "request_id");
-        out_zmq_url_ = zmq_url;
+        std::shared_ptr<stackflow_data> originalPtr = std::static_pointer_cast<stackflow_data>(arg);
+        std::string zmq_url                         = originalPtr->string(0);
+        std::string data                            = originalPtr->string(1);
+        request_id_                                 = sample_json_str_get(data, "request_id");
+        out_zmq_url_                                = zmq_url;
         if (status_.load()) exit(zmq_url, data);
     }
     virtual int exit(const std::string &zmq_url, const std::string &raw);
     virtual int exit(const std::string &work_id, const std::string &object, const std::string &data);
 
-    std::string _rpc_work(pzmq *_pzmq, const std::string &data);
-    void _work(const std::string &zmq_url, const std::string &data)
+    std::string _rpc_work(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &data);
+    void _work(const std::shared_ptr<void> &arg)
     {
-        request_id_  = sample_json_str_get(data, "request_id");
-        out_zmq_url_ = zmq_url;
+        std::shared_ptr<stackflow_data> originalPtr = std::static_pointer_cast<stackflow_data>(arg);
+        std::string zmq_url                         = originalPtr->string(0);
+        std::string data                            = originalPtr->string(1);
+        request_id_                                 = sample_json_str_get(data, "request_id");
+        out_zmq_url_                                = zmq_url;
         if (status_.load()) work(zmq_url, data);
     }
     virtual void work(const std::string &zmq_url, const std::string &raw);
     virtual void work(const std::string &work_id, const std::string &object, const std::string &data);
 
-    std::string _rpc_pause(pzmq *_pzmq, const std::string &data);
-    void _pause(const std::string &zmq_url, const std::string &data)
+    std::string _rpc_pause(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &data);
+    void _pause(const std::shared_ptr<void> &arg)
     {
-        request_id_  = sample_json_str_get(data, "request_id");
-        out_zmq_url_ = zmq_url;
+        std::shared_ptr<stackflow_data> originalPtr = std::static_pointer_cast<stackflow_data>(arg);
+        std::string zmq_url                         = originalPtr->string(0);
+        std::string data                            = originalPtr->string(1);
+        request_id_                                 = sample_json_str_get(data, "request_id");
+        out_zmq_url_                                = zmq_url;
         if (status_.load()) pause(zmq_url, data);
     }
     virtual void pause(const std::string &zmq_url, const std::string &raw);
     virtual void pause(const std::string &work_id, const std::string &object, const std::string &data);
 
-    std::string _rpc_taskinfo(pzmq *_pzmq, const std::string &data);
-    void _taskinfo(const std::string &zmq_url, const std::string &data)
+    std::string _rpc_taskinfo(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &data);
+    void _taskinfo(const std::shared_ptr<void> &arg)
     {
-        request_id_  = sample_json_str_get(data, "request_id");
-        out_zmq_url_ = zmq_url;
+        std::shared_ptr<stackflow_data> originalPtr = std::static_pointer_cast<stackflow_data>(arg);
+        std::string zmq_url                         = originalPtr->string(0);
+        std::string data                            = originalPtr->string(1);
+        request_id_                                 = sample_json_str_get(data, "request_id");
+        out_zmq_url_                                = zmq_url;
         if (status_.load()) taskinfo(zmq_url, data);
     }
     virtual void taskinfo(const std::string &zmq_url, const std::string &raw);
     virtual void taskinfo(const std::string &work_id, const std::string &object, const std::string &data);
 
-    void _sys_init(const std::string &zmq_url, const std::string &data);
+    void _sys_init(const std::shared_ptr<void> &arg);
 
     void user_output(const std::string &zmq_url, const std::string &request_id, const std::string &data);
     template <typename T, typename U>
@@ -379,7 +431,7 @@ class StackFlow {
             return false;
         }
         pzmq _call("sys");
-        _call.call_rpc_action("release_unit", _work_id, [](pzmq *_pzmq, const std::string &data) {});
+        _call.call_rpc_action("release_unit", _work_id, [](pzmq *_pzmq, const std::shared_ptr<pzmq_data> &data) {});
         llm_task_channel_[_work_id_num].reset();
         llm_task_channel_.erase(_work_id_num);
         // SLOGI("release work_id %s success", _work_id.c_str());
diff --git a/ext_components/StackFlow/stackflow/StackFlowUtil.cpp b/ext_components/StackFlow/stackflow/StackFlowUtil.cpp
index a5cebea0..7da33547 100644
--- a/ext_components/StackFlow/stackflow/StackFlowUtil.cpp
+++ b/ext_components/StackFlow/stackflow/StackFlowUtil.cpp
@@ -358,10 +358,17 @@ std::string StackFlows::unit_call(const std::string &unit_name, const std::strin
 {
     std::string value;
     pzmq _call(unit_name);
-    _call.call_rpc_action(unit_action, data, [&value](pzmq *_pzmq, const std::string &raw) { value = raw; });
+    _call.call_rpc_action(unit_action, data, [&value](pzmq *_pzmq, const std::shared_ptr<pzmq_data> &raw) { value = raw->string(); });
     return value;
 }
 
+void StackFlows::unit_call(const std::string &unit_name, const std::string &unit_action, const std::string &data, std::function<void(const std::shared_ptr<StackFlows::pzmq_data> &)> callback)
+{
+    std::string value;
+    StackFlows::pzmq _call(unit_name);
+    _call.call_rpc_action(unit_action, data, [callback](StackFlows::pzmq *_pzmq, const std::shared_ptr<StackFlows::pzmq_data> &raw) { callback(raw); });
+}
+
 std::list<std::string> StackFlows::get_config_file_paths(std::string &base_model_path,
                                                          std::string &base_model_config_path,
                                                          const std::string &mode_name)
diff --git a/ext_components/StackFlow/stackflow/StackFlowUtil.h b/ext_components/StackFlow/stackflow/StackFlowUtil.h
index 9b3df868..ed867191 100644
--- a/ext_components/StackFlow/stackflow/StackFlowUtil.h
+++ b/ext_components/StackFlow/stackflow/StackFlowUtil.h
@@ -9,6 +9,9 @@
 #include <unordered_map>
 #include <list>
 #include <vector>
+#include <functional>
+#include "pzmq.hpp"
+#include <memory>
 #define WORK_ID_NONE -100
 
 #define RPC_PUSH_PARAM(_obj, _data1, _data2)                                                     \
@@ -33,6 +36,7 @@ bool decode_stream(const std::string &in, std::string &out, std::unordered_map<i
 int decode_base64(const std::string &in, std::string &out);
 int encode_base64(const std::string &in, std::string &out);
 std::string unit_call(const std::string &unit_name, const std::string &unit_action, const std::string &data);
+void unit_call(const std::string &unit_name, const std::string &unit_action, const std::string &data, std::function<void(const std::shared_ptr<StackFlows::pzmq_data> &)> callback);
 std::list<std::string> get_config_file_paths(std::string &base_model_path, std::string &base_model_config_path, const std::string &mode_name);
 std::vector<std::string> glob_files(const std::vector<std::string> &patterns);
 bool file_exists(const std::string& filePath);
diff --git a/ext_components/StackFlow/stackflow/pzmq.hpp b/ext_components/StackFlow/stackflow/pzmq.hpp
index 1a7baf90..5d51ac32 100644
--- a/ext_components/StackFlow/stackflow/pzmq.hpp
+++ b/ext_components/StackFlow/stackflow/pzmq.hpp
@@ -18,10 +18,75 @@
 #define ZMQ_RPC_CALL (ZMQ_REQ | 0x80)
 
 namespace StackFlows {
+
+class pzmq_data {
+private:
+    zmq_msg_t msg;
+
+public:
+    pzmq_data(/* args */)
+    {
+        zmq_msg_init(&msg);
+    }
+    std::shared_ptr<std::string> get_string()
+    {
+        auto len = zmq_msg_size(&msg);
+        return std::make_shared<std::string>((const char *)zmq_msg_data(&msg), zmq_msg_size(&msg));
+    }
+    std::string string()
+    {
+        auto len = zmq_msg_size(&msg);
+        return std::string((const char *)zmq_msg_data(&msg), zmq_msg_size(&msg));
+    }
+    void *data()
+    {
+        return zmq_msg_data(&msg);
+    }
+    size_t size()
+    {
+        return zmq_msg_size(&msg);
+    }
+    zmq_msg_t *get()
+    {
+        return &msg;
+    }
+
+    std::string get_param(int index, const std::string &idata = "")
+    {
+        const char *data = NULL;
+        int size         = 0;
+        if (idata.length() > 0) {
+            data = idata.c_str();
+            size = idata.length();
+        } else {
+            data = (const char *)zmq_msg_data(&msg);
+            size = zmq_msg_size(&msg);
+        }
+
+        if ((index % 2) == 0) {
+            return std::string((const char *)(data + 1), data[0]);
+        } else {
+            return std::string((const char *)(data + data[0] + 1), zmq_msg_size(&msg) - data[0] - 1);
+        }
+    }
+
+    static std::string set_param(std::string param0, std::string param1)
+    {
+        std::string data = " " + param0 + param1;
+        data[0]          = param0.length();
+        return data;
+    }
+
+    ~pzmq_data()
+    {
+        zmq_msg_close(&msg);
+    }
+};
+
 class pzmq {
 public:
-    typedef std::function<std::string(pzmq *, const std::string &)> rpc_callback_fun;
-    typedef std::function<void(pzmq *, const std::string &)> msg_callback_fun;
+    typedef std::function<std::string(pzmq *, const std::shared_ptr<pzmq_data> &)> rpc_callback_fun;
+    typedef std::function<void(pzmq *, const std::shared_ptr<pzmq_data> &)> msg_callback_fun;
 
 private:
     const int rpc_url_head_length = 6;
@@ -85,7 +150,7 @@ class pzmq {
         }
         return zmq_url_;
     }
-    std::string _rpc_list_action(pzmq *self, const std::string &_None)
+    std::string _rpc_list_action(pzmq *self, const std::shared_ptr<pzmq_data> &_None)
     {
         std::string action_list;
         action_list.reserve(128);
@@ -131,8 +196,7 @@ class pzmq {
     int call_rpc_action(const std::string &action, const std::string &data, const msg_callback_fun &raw_call)
     {
         int ret;
-        zmq_msg_t msg;
-        zmq_msg_init(&msg);
+        std::shared_ptr<pzmq_data> msg_ptr = std::make_shared<pzmq_data>();
         try {
             if (NULL == zmq_socket_) {
                 if (rpc_server_.empty()) return -1;
@@ -150,13 +214,13 @@ class pzmq {
             }
             // action
             {
-                zmq_msg_recv(&msg, zmq_socket_, 0);
+                zmq_msg_recv(msg_ptr->get(), zmq_socket_, 0);
             }
-            raw_call(this, std::string((const char *)zmq_msg_data(&msg), zmq_msg_size(&msg)));
+            raw_call(this, msg_ptr);
         } catch (int e) {
             ret = e;
         }
-        zmq_msg_close(&msg);
+        msg_ptr.reset();
         close_zmq();
         return ret;
     }
@@ -293,8 +357,7 @@ class pzmq {
             items[0].revents = 0;
         };
         while (!flage_.load()) {
-            zmq_msg_t msg;
-            zmq_msg_init(&msg);
+            std::shared_ptr<pzmq_data> msg_ptr = std::make_shared<pzmq_data>();
             if (mode_ == ZMQ_PULL) {
                 ret = zmq_poll(items, 1, -1);
                 if (ret == -1) {
@@ -305,30 +368,28 @@ class pzmq {
                     continue;
                 }
             }
-            ret = zmq_msg_recv(&msg, zmq_socket_, 0);
+            ret = zmq_msg_recv(msg_ptr->get(), zmq_socket_, 0);
             if (ret <= 0) {
-                zmq_msg_close(&msg);
+                msg_ptr.reset();
                 continue;
             }
-            std::string raw_data((const char *)zmq_msg_data(&msg), zmq_msg_size(&msg));
+
             if (mode_ == ZMQ_RPC_FUN) {
-                zmq_msg_t msg1;
-                zmq_msg_init(&msg1);
-                zmq_msg_recv(&msg1, zmq_socket_, 0);
-                std::string _raw_data((const char *)zmq_msg_data(&msg1), zmq_msg_size(&msg1));
+                std::shared_ptr<pzmq_data> msg1_ptr = std::make_shared<pzmq_data>();
+                zmq_msg_recv(msg1_ptr->get(), zmq_socket_, 0);
                 std::string retval;
                 try {
                     std::unique_lock<std::mutex> lock(zmq_fun_mtx_);
-                    retval = zmq_fun_.at(raw_data)(this, _raw_data);
+                    retval = zmq_fun_.at(msg_ptr->string())(this, msg1_ptr);
                 } catch (...) {
                     retval = "NotAction";
                 }
                 zmq_send(zmq_socket_, retval.c_str(), retval.length(), 0);
-                zmq_msg_close(&msg1);
+                msg1_ptr.reset();
             } else {
-                raw_call(this, raw_data);
+                raw_call(this, msg_ptr);
             }
-            zmq_msg_close(&msg);
+            msg_ptr.reset();
         }
     }
     void close_zmq()
diff --git a/ext_components/ax_msp/Kconfig b/ext_components/ax_msp/Kconfig
index ba264e39..73824242 100644
--- a/ext_components/ax_msp/Kconfig
+++ b/ext_components/ax_msp/Kconfig
@@ -12,6 +12,10 @@ menuconfig AX_MSP_ENABLED
         bool "enable ax620e bsp"
         help
             build by AXERA!
+    config AX_650N_MSP_ENABLED
+        bool "enable ax650n bsp"
+        help
+            build by AXERA!
     config AX_520_MSP_ENABLED
         bool "enable ax520 bsp"
         help
diff --git a/projects/llm_framework/main/SConstruct b/projects/llm_framework/main/SConstruct
index 32841632..77237295 100644
--- a/projects/llm_framework/main/SConstruct
+++ b/projects/llm_framework/main/SConstruct
@@ -24,10 +24,9 @@ STATIC_FILES += [AFile('../static_lib/sherpa/ncnn/libsherpa-ncnn-core.so'),
                  AFile('../static_lib/sherpa/ncnn/libncnn.so'),
                  AFile('../static_lib/libtts.so'),
                  AFile('../static_lib/sherpa/ncnn/libkaldi-native-fbank-core.so'),
-                 AFile('../static_lib/libonnxruntime.so.1.14.0')
                  ]
 
-env['COMPONENTS'].append({'target':'static_file',
+env['COMPONENTS'].append({'target':'static_file-1.0',
                           'SRCS':SRCS,
                           'INCLUDE':INCLUDE,
                           'PRIVATE_INCLUDE':PRIVATE_INCLUDE,
diff --git a/projects/llm_framework/main/src/main.cpp b/projects/llm_framework/main/src/main.cpp
index 116a52da..89423c94 100644
--- a/projects/llm_framework/main/src/main.cpp
+++ b/projects/llm_framework/main/src/main.cpp
@@ -73,8 +73,17 @@ class llm_task {
     {
     }
 
+    void start()
+    {
+    }
+
+    void stop()
+    {
+    }
+
     ~llm_task()
     {
+        stop();
     }
 };
 
@@ -229,7 +238,7 @@ class llm_llm : public StackFlow {
             req_body["model"]           = llm_task_obj->model_;
             req_body["response_format"] = llm_task_obj->response_format_;
             req_body["enoutput"]        = llm_task_obj->enoutput_;
-            req_body["inputs"]         = llm_task_obj->inputs_;
+            req_body["inputs"]          = llm_task_obj->inputs_;
             send("llm.taskinfo", req_body, LLM_NO_ERROR, work_id);
         }
     }
@@ -244,6 +253,7 @@ class llm_llm : public StackFlow {
             send("None", "None", error_body, work_id);
             return -1;
         }
+        llm_task_[work_id_num]->stop();
         auto llm_channel = get_channel(work_id_num);
         llm_channel->stop_subscriber("");
         llm_task_.erase(work_id_num);
@@ -258,6 +268,7 @@ class llm_llm : public StackFlow {
             if (iteam == llm_task_.end()) {
                 break;
             }
+            iteam->second->stop();
             get_channel(iteam->first)->stop_subscriber("");
             iteam->second.reset();
             llm_task_.erase(iteam->first);
diff --git a/projects/llm_framework/main_asr/SConstruct b/projects/llm_framework/main_asr/SConstruct
index c947a9c5..344162bb 100644
--- a/projects/llm_framework/main_asr/SConstruct
+++ b/projects/llm_framework/main_asr/SConstruct
@@ -26,7 +26,7 @@ REQUIREMENTS += ['ncnn', 'sherpa-ncnn-core']
 
 STATIC_FILES += Glob('mode_*.json')
 
-env['COMPONENTS'].append({'target':'llm_asr',
+env['COMPONENTS'].append({'target':'llm_asr-1.6',
                           'SRCS':SRCS,
                           'INCLUDE':INCLUDE,
                           'PRIVATE_INCLUDE':PRIVATE_INCLUDE,
diff --git a/projects/llm_framework/main_asr/src/main.cpp b/projects/llm_framework/main_asr/src/main.cpp
index 0648433b..c3bd64f2 100644
--- a/projects/llm_framework/main_asr/src/main.cpp
+++ b/projects/llm_framework/main_asr/src/main.cpp
@@ -101,6 +101,7 @@ class llm_task {
                     SLOGW("config file :%s miss", file_name.c_str());
                     continue;
                 }
+                SLOGI("config file :%s read", file_name.c_str());
                 config_file >> file_body;
                 config_file.close();
                 break;
@@ -247,11 +248,17 @@ class llm_task {
         pcmdata      = buffer_create();
     }
 
+    void start()
+    {
+    }
+
+    void stop()
+    {
+    }
+
     ~llm_task()
     {
-        if (recognizer_stream_) {
-            recognizer_stream_.reset();
-        }
+        stop();
         buffer_destroy(pcmdata);
     }
 };
@@ -271,8 +278,7 @@ class llm_asr : public StackFlow {
     llm_asr() : StackFlow("asr")
     {
         task_count_ = 1;
-        event_queue_.appendListener(
-            EVENT_TASK_PAUSE, std::bind(&llm_asr::_task_pause, this, std::placeholders::_1, std::placeholders::_2));
+        event_queue_.appendListener(EVENT_TASK_PAUSE, std::bind(&llm_asr::_task_pause, this, std::placeholders::_1));
     }
 
     void task_output(const std::weak_ptr<llm_task> llm_task_obj_weak,
@@ -389,9 +395,10 @@ class llm_asr : public StackFlow {
         llm_task_obj->sys_pcm_on_data((*next_data));
     }
 
-    void _task_pause(const std::string &work_id, const std::string &data)
+    void _task_pause(const std::shared_ptr<void> &arg)
     {
-        int work_id_num = sample_get_work_id_num(work_id);
+        std::shared_ptr<std::string> work_id = std::static_pointer_cast<std::string>(arg);
+        int work_id_num                      = sample_get_work_id_num(*work_id);
         if (llm_task_.find(work_id_num) == llm_task_.end()) {
             return;
         }
@@ -405,7 +412,7 @@ class llm_asr : public StackFlow {
 
     void task_pause(const std::string &work_id, const std::string &data)
     {
-        event_queue_.enqueue(EVENT_TASK_PAUSE, work_id, "");
+        event_queue_.enqueue(EVENT_TASK_PAUSE, std::make_shared<std::string>(work_id));
     }
 
     void task_work(const std::weak_ptr<llm_task> llm_task_obj_weak,
@@ -419,8 +426,8 @@ class llm_asr : public StackFlow {
         llm_task_obj->kws_awake();
         if ((!audio_url_.empty()) && (llm_task_obj->audio_flage_ == false)) {
             std::weak_ptr<llm_task> _llm_task_obj = llm_task_obj;
-            llm_channel->subscriber(audio_url_, [_llm_task_obj](pzmq *_pzmq, const std::string &raw) {
-                _llm_task_obj.lock()->sys_pcm_on_data(raw);
+            llm_channel->subscriber(audio_url_, [_llm_task_obj](pzmq *_pzmq, const std::shared_ptr<pzmq_data> &raw) {
+                _llm_task_obj.lock()->sys_pcm_on_data(raw->string());
             });
             llm_task_obj->audio_flage_ = true;
         }
@@ -508,9 +515,10 @@ class llm_asr : public StackFlow {
                 if (input.find("sys") != std::string::npos) {
                     audio_url_                            = unit_call("audio", "cap", input);
                     std::weak_ptr<llm_task> _llm_task_obj = llm_task_obj;
-                    llm_channel->subscriber(audio_url_, [_llm_task_obj](pzmq *_pzmq, const std::string &raw) {
-                        _llm_task_obj.lock()->sys_pcm_on_data(raw);
-                    });
+                    llm_channel->subscriber(audio_url_,
+                                            [_llm_task_obj](pzmq *_pzmq, const std::shared_ptr<pzmq_data> &raw) {
+                                                _llm_task_obj.lock()->sys_pcm_on_data(raw->string());
+                                            });
                     llm_task_obj->audio_flage_ = true;
                 } else if (input.find("asr") != std::string::npos) {
                     llm_channel->subscriber_work_id(
@@ -556,8 +564,8 @@ class llm_asr : public StackFlow {
         if (data.find("sys") != std::string::npos) {
             if (audio_url_.empty()) audio_url_ = unit_call("audio", "cap", data);
             std::weak_ptr<llm_task> _llm_task_obj = llm_task_obj;
-            llm_channel->subscriber(audio_url_, [_llm_task_obj](pzmq *_pzmq, const std::string &raw) {
-                _llm_task_obj.lock()->sys_pcm_on_data(raw);
+            llm_channel->subscriber(audio_url_, [_llm_task_obj](pzmq *_pzmq, const std::shared_ptr<pzmq_data> &raw) {
+                _llm_task_obj.lock()->sys_pcm_on_data(raw->string());
             });
             llm_task_obj->audio_flage_ = true;
             llm_task_obj->inputs_.push_back(data);
@@ -643,6 +651,7 @@ class llm_asr : public StackFlow {
             send("None", "None", error_body, work_id);
             return -1;
         }
+        llm_task_[work_id_num]->stop();
         auto llm_channel = get_channel(work_id_num);
         llm_channel->stop_subscriber("");
         if (llm_task_[work_id_num]->audio_flage_) {
@@ -660,6 +669,7 @@ class llm_asr : public StackFlow {
             if (iteam == llm_task_.end()) {
                 break;
             }
+            iteam->second->stop();
             if (iteam->second->audio_flage_) {
                 unit_call("audio", "cap_stop", "None");
             }
diff --git a/projects/llm_framework/main_audio/SConstruct b/projects/llm_framework/main_audio/SConstruct
index afbad26d..27a04b68 100644
--- a/projects/llm_framework/main_audio/SConstruct
+++ b/projects/llm_framework/main_audio/SConstruct
@@ -29,7 +29,7 @@ REQUIREMENTS += ['tinyalsa', 'opus', 'samplerate', 'fdk-aac']
 STATIC_FILES += [AFile('audio.json')]
 STATIC_FILES += Glob('mode_*.json')
 
-env['COMPONENTS'].append({'target':'llm_audio',
+env['COMPONENTS'].append({'target':'llm_audio-1.6',
                           'SRCS':SRCS,
                           'INCLUDE':INCLUDE,
                           'PRIVATE_INCLUDE':PRIVATE_INCLUDE,
diff --git a/projects/llm_framework/main_audio/src/main.cpp b/projects/llm_framework/main_audio/src/main.cpp
index 3e023ea9..80da841c 100644
--- a/projects/llm_framework/main_audio/src/main.cpp
+++ b/projects/llm_framework/main_audio/src/main.cpp
@@ -48,14 +48,15 @@ class llm_audio : public StackFlow {
         self->pub_ctx_->send_data((const char *)data, size);
     }
 
-    void hw_queue_play(const std::string &audio_data, const std::string &None)
+    void hw_queue_play(const std::shared_ptr<void> &arg)
     {
         if (audio_clear_flage_) {
             return;
         }
+        std::shared_ptr<pzmq_data> originalPtr = std::static_pointer_cast<pzmq_data>(arg);
         std::lock_guard<std::mutex> guard(ax_play_mtx);
         ax_play(play_config.card, play_config.device, play_config.volume, play_config.channel, play_config.rate,
-                play_config.bit, audio_data.c_str(), audio_data.length());
+                play_config.bit, originalPtr->data(), originalPtr->size());
     }
 
     void hw_play(const std::string &audio_data)
@@ -109,8 +110,8 @@ class llm_audio : public StackFlow {
 public:
     llm_audio() : StackFlow("audio")
     {
-        event_queue_.appendListener(
-            EVENT_QUEUE_PLAY, std::bind(&llm_audio::hw_queue_play, this, std::placeholders::_1, std::placeholders::_2));
+        event_queue_.appendListener(EVENT_QUEUE_PLAY,
+                                    std::bind(&llm_audio::hw_queue_play, this, std::placeholders::_1));
         setup("", "audio.play", "{\"None\":\"None\"}");
         setup("", "audio.cap", "{\"None\":\"None\"}");
         self        = this;
@@ -151,6 +152,7 @@ class llm_audio : public StackFlow {
                     SLOGW("config file :%s miss", file_name.c_str());
                     continue;
                 }
+                SLOGI("config file :%s read", file_name.c_str());
                 config_file >> file_body;
                 config_file.close();
                 break;
@@ -388,10 +390,10 @@ class llm_audio : public StackFlow {
         return LLM_NONE;
     }
 
-    std::string play(pzmq *_pzmq, const std::string &rawdata)
+    std::string play(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &rawdata)
     {
-        std::string zmq_url    = RPC_PARSE_TO_FIRST(rawdata);
-        std::string audio_json = RPC_PARSE_TO_SECOND(rawdata);
+        std::string zmq_url    = rawdata->get_param(0);
+        std::string audio_json = rawdata->get_param(1);
         std::string ret_val =
             parse_data(sample_json_str_get(audio_json, "object"), sample_json_str_get(audio_json, "data"));
         request_id_ = sample_json_str_get(audio_json, "request_id");
@@ -399,29 +401,31 @@ class llm_audio : public StackFlow {
         return ret_val;
     }
 
-    std::string play_raw(pzmq *_pzmq, const std::string &rawdata)
+    std::string play_raw(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &rawdata)
     {
-        if (rawdata.empty()) return std::string("rawdata empty");
-        _play(rawdata);
+        auto _rawdata = rawdata->string();
+        if (_rawdata.empty()) return std::string("rawdata empty");
+        _play(_rawdata);
         return LLM_NONE;
     }
 
-    std::string enqueue_play(pzmq *_pzmq, const std::string &rawdata)
+    std::string enqueue_play(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &rawdata)
     {
         audio_clear_flage_ = false;
-        event_queue_.enqueue(EVENT_QUEUE_PLAY, rawdata, "");
+        event_queue_.enqueue(EVENT_QUEUE_PLAY, rawdata);
         return LLM_NONE;
     }
 
-    std::string audio_status(pzmq *_pzmq, const std::string &rawdata)
+    std::string audio_status(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &rawdata)
     {
-        if (rawdata == "play") {
+        std::string _rawdata = rawdata->string();
+        if (_rawdata == "play") {
             if (ax_play_status()) {
                 return std::string("None");
             } else {
                 return std::string("Runing");
             }
-        } else if (rawdata == "cap") {
+        } else if (_rawdata == "cap") {
             if (ax_cap_status()) {
                 return std::string("None");
             } else {
@@ -446,19 +450,19 @@ class llm_audio : public StackFlow {
         }
     }
 
-    std::string play_stop(pzmq *_pzmq, const std::string &rawdata)
+    std::string play_stop(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &rawdata)
     {
         _play_stop();
         return LLM_NONE;
     }
 
-    std::string queue_play_stop(pzmq *_pzmq, const std::string &rawdata)
+    std::string queue_play_stop(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &rawdata)
     {
         audio_clear_flage_ = true;
         return LLM_NONE;
     }
 
-    std::string cap(pzmq *_pzmq, const std::string &rawdata)
+    std::string cap(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &rawdata)
     {
         if (cap_status_ == 0) {
             _cap();
@@ -467,7 +471,7 @@ class llm_audio : public StackFlow {
         return sys_pcm_cap_channel;
     }
 
-    std::string cap_stop(pzmq *_pzmq, const std::string &rawdata)
+    std::string cap_stop(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &rawdata)
     {
         if (cap_status_ > 0) {
             cap_status_--;
@@ -478,7 +482,7 @@ class llm_audio : public StackFlow {
         return LLM_NONE;
     }
 
-    std::string cap_stop_all(pzmq *_pzmq, const std::string &rawdata)
+    std::string cap_stop_all(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &rawdata)
     {
         cap_status_ = 0;
         _cap_stop();
diff --git a/projects/llm_framework/main_camera/SConstruct b/projects/llm_framework/main_camera/SConstruct
index 56707dbd..eb5190c7 100644
--- a/projects/llm_framework/main_camera/SConstruct
+++ b/projects/llm_framework/main_camera/SConstruct
@@ -6,9 +6,9 @@ with open(env['PROJECT_TOOL_S']) as f:
 
 # SRCS = append_srcs_dir(ADir('src'))
 SRCS = Glob('src/*.c*')
-INCLUDE = [ADir('include'), ADir('.')]
+INCLUDE = [ADir('../include'), ADir('.')]
 PRIVATE_INCLUDE = []
-REQUIREMENTS = ['pthread', 'utilities', 'ax_msp', 'eventpp', 'StackFlow', 'single_header_libs']
+REQUIREMENTS = ['hv', 'pthread', 'utilities', 'ax_msp', 'eventpp', 'StackFlow', 'single_header_libs']
 STATIC_LIB = []
 DYNAMIC_LIB = []
 DEFINITIONS = []
@@ -17,11 +17,26 @@ LDFLAGS = []
 LINK_SEARCH_PATH = []
 STATIC_FILES = []
 
-DEFINITIONS += ['-std=c++17', '-O2']
+DEFINITIONS += ['-O2']
+DEFINITIONS += ['-std=c++17']
 LDFLAGS+=['-Wl,-rpath=/opt/m5stack/lib', '-Wl,-rpath=/usr/local/m5stack/lib', '-Wl,-rpath=/usr/local/m5stack/lib/gcc-10.3', '-Wl,-rpath=/opt/lib', '-Wl,-rpath=/opt/usr/lib', '-Wl,-rpath=./']
 # INCLUDE += [ADir('../include'), ADir('../include/cppzmq'), ADir('../include/libzmq')]
 LINK_SEARCH_PATH += [ADir('../static_lib')]
 
+# REQUIREMENTS += ['Backward_cpp']
+# DYNAMIC_LIB += [ AFile('../static_lib/libdw.so.1'),
+#                 AFile('../static_lib/libelf.so.1'),
+#                 AFile('../static_lib/libz.so.1'),
+#                 AFile('../static_lib/liblzma.so.5'),
+#                 AFile('../static_lib/libbz2.so.1.0')]
+# DEFINITIONS += ["-DENABLE_BACKWARD"]
+
+INCLUDE.append(os.path.join(env["MSP_PATH"], 'third-party/live/out/arm64/glibc/include/liveMedia'))
+INCLUDE.append(os.path.join(env["MSP_PATH"], 'third-party/live/out/arm64/glibc/include/groupsock'))
+INCLUDE.append(os.path.join(env["MSP_PATH"], 'third-party/live/out/arm64/glibc/include/UsageEnvironment'))
+INCLUDE.append(os.path.join(env["MSP_PATH"], 'third-party/live/out/arm64/glibc/include/BasicUsageEnvironment'))
+INCLUDE.append(os.path.join(env["MSP_PATH"], 'sample/rtsp'))
+SRCS += Glob(os.path.join(env["MSP_PATH"], 'sample/rtsp/*.c*') )
 
 # INCLUDE.append(ADir('ax_sample'))
 # SRCS += Glob('ax_sample/*.c*')
@@ -30,7 +45,7 @@ REQUIREMENTS += ['ax_sys', 'ax_interpreter']
 REQUIREMENTS += ['ax_engine', 'ax_interpreter', 'ax_sys', 'ax_vo']
 REQUIREMENTS += ['ax_ae', 'ax_skel', 'ax_venc', 'ax_mipi', 'ax_ives', 'ax_ivps', 'ax_proton', 'ax_audio', 'tinyalsa']
 REQUIREMENTS += ['ax_audio_3a','samplerate', 'ax_fdk', 'fdk-aac', 'ax_awb', 'ax_af', 'ax_nt_stream', 'ax_nt_ctrl']
-
+REQUIREMENTS += ['liveMedia', 'UsageEnvironment', 'groupsock', 'BasicUsageEnvironment']
 
 INCLUDE += [ADir('../include/opencv4')]
 static_file = []
@@ -53,7 +68,7 @@ STATIC_LIB += static_file * 4
 STATIC_FILES += [AFile('camera.json')]
 STATIC_FILES += Glob('mode_*.json')
 
-env['COMPONENTS'].append({'target':'llm_camera',
+env['COMPONENTS'].append({'target':'llm_camera-1.8',
                           'SRCS':SRCS,
                           'INCLUDE':INCLUDE,
                           'PRIVATE_INCLUDE':PRIVATE_INCLUDE,
diff --git a/projects/llm_framework/main_camera/camera.json b/projects/llm_framework/main_camera/camera.json
index 608b163c..22fbd880 100644
--- a/projects/llm_framework/main_camera/camera.json
+++ b/projects/llm_framework/main_camera/camera.json
@@ -2,17 +2,119 @@
     "mode": "None",
     "type": "camera",
     "capabilities": [
-        "play",
-        "cap"
+        "camera cap"
     ],
     "input_type": [
-        "rpc.camera.wav.base64",
-        "rpc.camera.pcm.base64"
+        "camera.v4l2_dev",
+        "camera.axera_dev"
     ],
     "output_type": [
-        "camera.pcm.stream"
+        "image.yuyv422.base64",
+        "image.jpeg.base64"
     ],
     "cap_param": {
         "None": "None"
+    },
+    "jpeg_config_param": {
+        "stVencAttr.enType": 26,
+        "stVencAttr.u32MaxPicWidth": 32768,
+        "stVencAttr.u32MaxPicHeight": 32768,
+        "stVencAttr.u32PicWidthSrc": 32768,
+        "stVencAttr.u32PicHeightSrc": 32768,
+        "stVencAttr.enLinkMode": 1
+    },
+    "h264_config_param": {
+        "stVencAttr.enType": 96,
+        "stVencAttr.u32MaxPicWidth": 3840,
+        "stVencAttr.u32MaxPicHeight": 2160,
+        "stVencAttr.enMemSource": 0,
+        "stVencAttr.u32BufSize": 16588800,
+        "stVencAttr.enProfile": 10,
+        "stVencAttr.enLevel": 52,
+        "stVencAttr.enTier": 0,
+        "stVencAttr.u32PicWidthSrc": 1280,
+        "stVencAttr.u32PicHeightSrc": 720,
+        "stVencAttr.stCropCfg.bEnable": 0,
+        "stVencAttr.stCropCfg.stRect.s32X": 0,
+        "stVencAttr.stCropCfg.stRect.s32Y": 0,
+        "stVencAttr.stCropCfg.stRect.u32Width": 0,
+        "stVencAttr.stCropCfg.stRect.u32Height": 0,
+        "stVencAttr.enRotation": 0,
+        "stVencAttr.enLinkMode": 1,
+        "stVencAttr.bDeBreathEffect": 0,
+        "stVencAttr.bRefRingbuf": 0,
+        "stVencAttr.s32StopWaitTime": 0,
+        "stVencAttr.u8InFifoDepth": 4,
+        "stVencAttr.u8OutFifoDepth": 4,
+        "stVencAttr.u32SliceNum": 0,
+        "stVencAttr.stAttrH265e.bRcnRefShareBuf": 0,
+        "stRcAttr.enRcMode": 1,
+        "stRcAttr.s32FirstFrameStartQp": -1,
+        "stRcAttr.stFrameRate.fSrcFrameRate": 30.0,
+        "stRcAttr.stFrameRate.fDstFrameRate": 30.0,
+        "stRcAttr.stH264Cbr.u32Gop": 120,
+        "stRcAttr.stH264Cbr.u32StatTime": 0,
+        "stRcAttr.stH264Cbr.u32BitRate": 2048,
+        "stRcAttr.stH264Cbr.u32MinQp": 10,
+        "stRcAttr.stH264Cbr.u32MaxQp": 51,
+        "stRcAttr.stH264Cbr.u32MinIQp": 10,
+        "stRcAttr.stH264Cbr.u32MaxIQp": 51,
+        "stRcAttr.stH264Cbr.u32MaxIprop": 40,
+        "stRcAttr.stH264Cbr.u32MinIprop": 10,
+        "stRcAttr.stH264Cbr.s32IntraQpDelta": -2,
+        "stRcAttr.stH264Cbr.s32DeBreathQpDelta": 0,
+        "stRcAttr.stH264Cbr.u32IdrQpDeltaRange": 0,
+        "stRcAttr.stH264Cbr.stQpmapInfo.enCtbRcMode": 0,
+        "stRcAttr.stH264Cbr.stQpmapInfo.enQpmapQpType": 0,
+        "stRcAttr.stH264Cbr.stQpmapInfo.enQpmapBlockType": 0,
+        "stRcAttr.stH264Cbr.stQpmapInfo.enQpmapBlockUnit": 0,
+        "stGopAttr.enGopMode": 0
+    },
+    "h265_config_param": {
+        "stVencAttr.enType": 265,
+        "stVencAttr.u32MaxPicWidth": 3840,
+        "stVencAttr.u32MaxPicHeight": 2160,
+        "stVencAttr.enMemSource": 0,
+        "stVencAttr.u32BufSize": 16588800,
+        "stVencAttr.enProfile": 0,
+        "stVencAttr.enLevel": 153,
+        "stVencAttr.enTier": 0,
+        "stVencAttr.u32PicWidthSrc": 1280,
+        "stVencAttr.u32PicHeightSrc": 720,
+        "stVencAttr.stCropCfg.bEnable": 0,
+        "stVencAttr.stCropCfg.stRect.s32X": 0,
+        "stVencAttr.stCropCfg.stRect.s32Y": 0,
+        "stVencAttr.stCropCfg.stRect.u32Width": 0,
+        "stVencAttr.stCropCfg.stRect.u32Height": 0,
+        "stVencAttr.enRotation": 0,
+        "stVencAttr.enLinkMode": 1,
+        "stVencAttr.bDeBreathEffect": 0,
+        "stVencAttr.bRefRingbuf": 0,
+        "stVencAttr.s32StopWaitTime": 0,
+        "stVencAttr.u8InFifoDepth": 4,
+        "stVencAttr.u8OutFifoDepth": 4,
+        "stVencAttr.u32SliceNum": 0,
+        "stVencAttr.stAttrH265e.bRcnRefShareBuf": 0,
+        "stRcAttr.enRcMode": 11,
+        "stRcAttr.s32FirstFrameStartQp": -1,
+        "stRcAttr.stFrameRate.fSrcFrameRate": 30.0,
+        "stRcAttr.stFrameRate.fDstFrameRate": 30.0,
+        "stRcAttr.stH265Cbr.u32Gop": 120,
+        "stRcAttr.stH265Cbr.u32StatTime": 0,
+        "stRcAttr.stH265Cbr.u32BitRate": 2048,
+        "stRcAttr.stH265Cbr.u32MinQp": 10,
+        "stRcAttr.stH265Cbr.u32MaxQp": 51,
+        "stRcAttr.stH265Cbr.u32MinIQp": 10,
+        "stRcAttr.stH265Cbr.u32MaxIQp": 51,
+        "stRcAttr.stH265Cbr.u32MaxIprop": 40,
+        "stRcAttr.stH265Cbr.u32MinIprop": 30,
+        "stRcAttr.stH265Cbr.s32IntraQpDelta": -2,
+        "stRcAttr.stH265Cbr.s32DeBreathQpDelta": 0,
+        "stRcAttr.stH265Cbr.u32IdrQpDeltaRange": 0,
+        "stRcAttr.stH265Cbr.stQpmapInfo.enCtbRcMode": 0,
+        "stRcAttr.stH265Cbr.stQpmapInfo.enQpmapQpType": 0,
+        "stRcAttr.stH265Cbr.stQpmapInfo.enQpmapBlockType": 0,
+        "stRcAttr.stH265Cbr.stQpmapInfo.enQpmapBlockUnit": 0,
+        "stGopAttr.enGopMode": 0
     }
 }
\ No newline at end of file
diff --git a/projects/llm_framework/main_camera/src/axera_camera.c b/projects/llm_framework/main_camera/src/axera_camera.c
index d929a218..841a6b56 100644
--- a/projects/llm_framework/main_camera/src/axera_camera.c
+++ b/projects/llm_framework/main_camera/src/axera_camera.c
@@ -16,7 +16,10 @@
 #include <sys/ioctl.h>
 #include <sys/mman.h>
 #include <unistd.h>
-
+#include <dlfcn.h>
+#include <sys/prctl.h>
+#include "AXRtspWrapper.h"
+#include "ax_venc_api.h"
 #include "ax_global_type.h"
 #include "common_isp.h"
 #include "common_sys.h"
@@ -28,13 +31,16 @@
 #include "ax_ivps_api.h"
 
 #ifndef ALIGN_UP
-#define ALIGN_UP(x, a)           ((((x) + ((a) - 1)) / a) * a)
+#define ALIGN_UP(x, a) ((((x) + ((a) - 1)) / a) * a)
 #endif
 
+#define USER_OUTPUT_CHN 2
+#define RTSP_OUTPUT_CHN 1
+
 AX_MIPI_RX_ATTR_T gSc850slMipiAttr = {
-    .ePhyMode = AX_MIPI_PHY_TYPE_DPHY,
-    .eLaneNum = AX_MIPI_DATA_LANE_4,
-    .nDataRate =  80,
+    .ePhyMode        = AX_MIPI_PHY_TYPE_DPHY,
+    .eLaneNum        = AX_MIPI_DATA_LANE_4,
+    .nDataRate       = 80,
     .nDataLaneMap[0] = 0,
     .nDataLaneMap[1] = 1,
     .nDataLaneMap[2] = 3,
@@ -44,30 +50,30 @@ AX_MIPI_RX_ATTR_T gSc850slMipiAttr = {
 };
 
 AX_SNS_ATTR_T gSc850slSnsAttr = {
-    .nWidth = 3840,
-    .nHeight = 2160,
-    .fFrameRate = 30,
-    .eSnsMode = AX_SNS_LINEAR_MODE,
-    .eRawType = AX_RT_RAW10,
-    .eBayerPattern = AX_BP_RGGB,
+    .nWidth             = 3840,
+    .nHeight            = 2160,
+    .fFrameRate         = 30,
+    .eSnsMode           = AX_SNS_LINEAR_MODE,
+    .eRawType           = AX_RT_RAW10,
+    .eBayerPattern      = AX_BP_RGGB,
     .bTestPatternEnable = AX_FALSE,
     // .nSettingIndex = 12,
 };
 
 AX_SNS_CLK_ATTR_T gSc850slSnsClkAttr = {
-    .nSnsClkIdx = 0,
+    .nSnsClkIdx  = 0,
     .eSnsClkRate = AX_SNS_CLK_24M,
 };
 
 AX_VIN_DEV_ATTR_T gSc850slDevAttr = {
-    .bImgDataEnable = AX_TRUE,
+    .bImgDataEnable    = AX_TRUE,
     .bNonImgDataEnable = AX_FALSE,
-    .eDevMode = AX_VIN_DEV_ONLINE,
-    .eSnsIntfType = AX_SNS_INTF_TYPE_MIPI_RAW,
-    .tDevImgRgn[0] = {0, 0, 3840, 2160},
-    .tDevImgRgn[1] = {0, 0, 3840, 2160},
-    .tDevImgRgn[2] = {0, 0, 3840, 2160},
-    .tDevImgRgn[3] = {0, 0, 3840, 2160},
+    .eDevMode          = AX_VIN_DEV_ONLINE,
+    .eSnsIntfType      = AX_SNS_INTF_TYPE_MIPI_RAW,
+    .tDevImgRgn[0]     = {0, 0, 3840, 2160},
+    .tDevImgRgn[1]     = {0, 0, 3840, 2160},
+    .tDevImgRgn[2]     = {0, 0, 3840, 2160},
+    .tDevImgRgn[3]     = {0, 0, 3840, 2160},
 
     /* When users transfer special data, they need to configure VC&DT for szImgVc/szImgDt/szInfoVc/szInfoDt */
     //.tMipiIntfAttr.szImgVc[0] = 0,
@@ -79,33 +85,33 @@ AX_VIN_DEV_ATTR_T gSc850slDevAttr = {
     //.tMipiIntfAttr.szInfoDt[0] = 63,
     //.tMipiIntfAttr.szInfoDt[1] = 63,
 
-    .ePixelFmt = AX_FORMAT_BAYER_RAW_10BPP_PACKED,
-    .eBayerPattern = AX_BP_RGGB,
-    .eSnsMode = AX_SNS_LINEAR_MODE,
+    .ePixelFmt      = AX_FORMAT_BAYER_RAW_10BPP_PACKED,
+    .eBayerPattern  = AX_BP_RGGB,
+    .eSnsMode       = AX_SNS_LINEAR_MODE,
     .eSnsOutputMode = AX_SNS_NORMAL,
-    .tCompressInfo = {AX_COMPRESS_MODE_NONE, 0},
-    .tFrameRateCtrl= {AX_INVALID_FRMRATE, AX_INVALID_FRMRATE},
+    .tCompressInfo  = {AX_COMPRESS_MODE_NONE, 0},
+    .tFrameRateCtrl = {AX_INVALID_FRMRATE, AX_INVALID_FRMRATE},
 };
 
 AX_VIN_PIPE_ATTR_T gSc850slPipeAttr = {
-    .ePipeWorkMode = AX_VIN_PIPE_NORMAL_MODE1,
-    .tPipeImgRgn = {0, 0, 3840, 2160},
-    .nWidthStride = 3840,
-    .eBayerPattern = AX_BP_RGGB,
-    .ePixelFmt = AX_FORMAT_BAYER_RAW_10BPP_PACKED,
-    .eSnsMode = AX_SNS_LINEAR_MODE,
-    .tCompressInfo = {AX_COMPRESS_MODE_LOSSY, 0},
-    .tNrAttr = {{0, {AX_COMPRESS_MODE_LOSSLESS, 0}}, {0, {AX_COMPRESS_MODE_NONE, 0}}},
+    .ePipeWorkMode  = AX_VIN_PIPE_NORMAL_MODE1,
+    .tPipeImgRgn    = {0, 0, 3840, 2160},
+    .nWidthStride   = 3840,
+    .eBayerPattern  = AX_BP_RGGB,
+    .ePixelFmt      = AX_FORMAT_BAYER_RAW_10BPP_PACKED,
+    .eSnsMode       = AX_SNS_LINEAR_MODE,
+    .tCompressInfo  = {AX_COMPRESS_MODE_LOSSY, 0},
+    .tNrAttr        = {{0, {AX_COMPRESS_MODE_LOSSLESS, 0}}, {0, {AX_COMPRESS_MODE_NONE, 0}}},
     .tFrameRateCtrl = {AX_INVALID_FRMRATE, AX_INVALID_FRMRATE},
 };
 
 AX_VIN_CHN_ATTR_T gSc850slChn0Attr = {
-    .nWidth = 3840,
-    .nHeight = 2160,
-    .nWidthStride = 3840,
-    .eImgFormat = AX_FORMAT_YUV420_SEMIPLANAR,
-    .nDepth = 1,
-    .tCompressInfo = {AX_COMPRESS_MODE_LOSSY, 4},
+    .nWidth         = 3840,
+    .nHeight        = 2160,
+    .nWidthStride   = 3840,
+    .eImgFormat     = AX_FORMAT_YUV420_SEMIPLANAR,
+    .nDepth         = 1,
+    .tCompressInfo  = {AX_COMPRESS_MODE_LOSSY, 4},
     .tFrameRateCtrl = {AX_INVALID_FRMRATE, AX_INVALID_FRMRATE},
 };
 
@@ -131,28 +137,25 @@ typedef enum {
     SAMPLE_VIN_BUTT
 } SAMPLE_VIN_CASE_E;
 
-struct axera_camera_index_t
-{
+struct axera_camera_index_t {
     char name[48];
     SAMPLE_VIN_CASE_E index;
-}axera_camera_index[] = {
-    {"axera_single_dummy", SAMPLE_VIN_SINGLE_DUMMY},
-    {"axera_single_os04a10", SAMPLE_VIN_SINGLE_OS04A10},
-    {"axera_double_os04a10", SAMPLE_VIN_DOUBLE_OS04A10},
-    {"axera_single_sc450ai", SAMPLE_VIN_SINGLE_SC450AI},
-    {"axera_double_sc450ai", SAMPLE_VIN_DOUBLE_SC450AI},
-    {"axera_double_os04a10_and_bt656", SAMPLE_VIN_DOUBLE_OS04A10_AND_BT656},
-    {"axera_single_s5kjn1sq03", SAMPLE_VIN_SINGLE_S5KJN1SQ03},
-    {"axera_single_os04a10_dcg_hdr", SAMPLE_VIN_SINGLE_OS04A10_DCG_HDR},
-    {"axera_single_os04a10_dcg_vs_hdr", SAMPLE_VIN_SINGLE_OS04A10_DCG_VS_HDR},
-    {"axera_single_dvp", SYS_CASE_SINGLE_DVP},
-    {"axera_single_bt601", SYS_CASE_SINGLE_BT601},
-    {"axera_single_bt656", SYS_CASE_SINGLE_BT656},
-    {"axera_single_bt1120", SYS_CASE_SINGLE_BT1120},
-    {"axera_single_lvds", SYS_CASE_SINGLE_LVDS},
-    {"axera_single_os04a10_online", SYS_CASE_SINGLE_OS04A10_ONLINE},
-    {"axera_single_sc850sl", SAMPLE_VIN_SINGLE_SC850SL}
-};
+} axera_camera_index[] = {{"axera_single_dummy", SAMPLE_VIN_SINGLE_DUMMY},
+                          {"axera_single_os04a10", SAMPLE_VIN_SINGLE_OS04A10},
+                          {"axera_double_os04a10", SAMPLE_VIN_DOUBLE_OS04A10},
+                          {"axera_single_sc450ai", SAMPLE_VIN_SINGLE_SC450AI},
+                          {"axera_double_sc450ai", SAMPLE_VIN_DOUBLE_SC450AI},
+                          {"axera_double_os04a10_and_bt656", SAMPLE_VIN_DOUBLE_OS04A10_AND_BT656},
+                          {"axera_single_s5kjn1sq03", SAMPLE_VIN_SINGLE_S5KJN1SQ03},
+                          {"axera_single_os04a10_dcg_hdr", SAMPLE_VIN_SINGLE_OS04A10_DCG_HDR},
+                          {"axera_single_os04a10_dcg_vs_hdr", SAMPLE_VIN_SINGLE_OS04A10_DCG_VS_HDR},
+                          {"axera_single_dvp", SYS_CASE_SINGLE_DVP},
+                          {"axera_single_bt601", SYS_CASE_SINGLE_BT601},
+                          {"axera_single_bt656", SYS_CASE_SINGLE_BT656},
+                          {"axera_single_bt1120", SYS_CASE_SINGLE_BT1120},
+                          {"axera_single_lvds", SYS_CASE_SINGLE_LVDS},
+                          {"axera_single_os04a10_online", SYS_CASE_SINGLE_OS04A10_ONLINE},
+                          {"axera_single_sc850sl", SAMPLE_VIN_SINGLE_SC850SL}};
 
 typedef struct {
     SAMPLE_VIN_CASE_E eSysCase;
@@ -260,7 +263,7 @@ COMMON_SYS_POOL_CFG_T gtPrivatePoolSingleOs450aiSdr[] = {
 
 /*************************************/
 COMMON_SYS_POOL_CFG_T gtSysCommPoolSingleSc850SlSdr[] = {
-    {3840, 2160, 3840, AX_FORMAT_YUV420_SEMIPLANAR, 4, AX_COMPRESS_MODE_LOSSY, 4}, /* vin nv21/nv21 use */
+    {3840, 2160, 3840, AX_FORMAT_YUV420_SEMIPLANAR, 8, AX_COMPRESS_MODE_LOSSY, 8}, /* vin nv21/nv21 use */
     {2688, 1520, 2688, AX_FORMAT_YUV420_SEMIPLANAR, 4},                            /* vin nv21/nv21 use */
     {1920, 1080, 1920, AX_FORMAT_YUV420_SEMIPLANAR, 3},                            /* vin nv21/nv21 use */
     {720, 576, 720, AX_FORMAT_YUV420_SEMIPLANAR, 3},                               /* vin nv21/nv21 use */
@@ -385,24 +388,54 @@ static AX_VOID __set_vin_attr(AX_CAMERA_T *pCam, SAMPLE_SNS_TYPE_E eSnsType, AX_
     return;
 }
 
-
-AX_S32 CUSTOM_COMMON_VIN_GetSnsConfig(SAMPLE_SNS_TYPE_E eSnsType,
-    AX_MIPI_RX_ATTR_T *ptMipiAttr, AX_SNS_ATTR_T *ptSnsAttr,
-    AX_SNS_CLK_ATTR_T *ptSnsClkAttr, AX_VIN_DEV_ATTR_T *pDevAttr,
-    AX_VIN_PIPE_ATTR_T *pPipeAttr, AX_VIN_CHN_ATTR_T *pChnAttr) {
-    if(eSnsType == SMARTSENS_SC850SL)
-    {
+AX_S32 CUSTOM_COMMON_VIN_GetSnsConfig(SAMPLE_SNS_TYPE_E eSnsType, AX_MIPI_RX_ATTR_T *ptMipiAttr,
+                                      AX_SNS_ATTR_T *ptSnsAttr, AX_SNS_CLK_ATTR_T *ptSnsClkAttr,
+                                      AX_VIN_DEV_ATTR_T *pDevAttr, AX_VIN_PIPE_ATTR_T *pPipeAttr,
+                                      AX_VIN_CHN_ATTR_T *pChnAttr)
+{
+    if (eSnsType == SMARTSENS_SC850SL) {
         memcpy(ptMipiAttr, &gSc850slMipiAttr, sizeof(AX_MIPI_RX_ATTR_T));
         memcpy(ptSnsAttr, &gSc850slSnsAttr, sizeof(AX_SNS_ATTR_T));
         memcpy(ptSnsClkAttr, &gSc850slSnsClkAttr, sizeof(AX_SNS_CLK_ATTR_T));
         memcpy(pDevAttr, &gSc850slDevAttr, sizeof(AX_VIN_DEV_ATTR_T));
         memcpy(pPipeAttr, &gSc850slPipeAttr, sizeof(AX_VIN_PIPE_ATTR_T));
         memcpy(&pChnAttr[0], &gSc850slChn0Attr, sizeof(AX_VIN_CHN_ATTR_T));
+        return 0;
     }
-    return COMMON_VIN_GetSnsConfig(eSnsType, ptMipiAttr, ptSnsAttr,
-        ptSnsClkAttr, pDevAttr, pPipeAttr, pChnAttr);
+    return COMMON_VIN_GetSnsConfig(eSnsType, ptMipiAttr, ptSnsAttr, ptSnsClkAttr, pDevAttr, pPipeAttr, pChnAttr);
 }
 
+AX_SENSOR_REGISTER_FUNC_T *CUSTOM_COMMON_ISP_GetSnsObj(SAMPLE_SNS_TYPE_E eSnsType)
+{
+    if (eSnsType == SMARTSENS_SC850SL) {
+        AX_SENSOR_REGISTER_FUNC_T *ptSnsHdl = NULL;
+        void *handler                       = NULL;
+        AX_CHAR *err                        = NULL;
+        AX_U16 i                            = 0;
+        AX_CHAR *pSnsPath                   = "/opt/lib/libsns_sc850sl.so";
+        AX_CHAR *pObjName                   = "gSnssc850slObj";
+
+        if ((NULL != pSnsPath) && (NULL != pObjName)) {
+            handler = dlopen((void *)pSnsPath, RTLD_LAZY);
+            if (NULL == handler) {
+                COMM_ISP_PRT("open %s fail!---%s\n", pSnsPath, dlerror());
+                return NULL;
+            }
+            ptSnsHdl = (AX_SENSOR_REGISTER_FUNC_T *)dlsym(handler, pObjName);
+            err      = dlerror();
+            if (NULL != err) {
+                ptSnsHdl = NULL;
+                COMM_ISP_PRT("dlsym %s fail!\n", pObjName);
+            }
+        } else {
+            ptSnsHdl = NULL;
+            COMM_ISP_PRT("not find eSnsType = %d\n", eSnsType);
+        }
+
+        return ptSnsHdl;
+    }
+    return COMMON_ISP_GetSnsObj(eSnsType);
+}
 
 static AX_U32 __sample_case_single_dummy(AX_CAMERA_T *pCamList, SAMPLE_SNS_TYPE_E eSnsType,
                                          SAMPLE_VIN_PARAM_T *pVinParam, COMMON_SYS_ARGS_T *pCommonArgs)
@@ -419,14 +452,14 @@ static AX_U32 __sample_case_single_dummy(AX_CAMERA_T *pCamList, SAMPLE_SNS_TYPE_
         pCam          = &pCamList[i];
         pCam->nPipeId = 0;
         CUSTOM_COMMON_VIN_GetSnsConfig(eSnsType, &pCam->tMipiAttr, &pCam->tSnsAttr, &pCam->tSnsClkAttr, &pCam->tDevAttr,
-                                &pCam->tPipeAttr[pCam->nPipeId], pCam->tChnAttr);
+                                       &pCam->tPipeAttr[pCam->nPipeId], pCam->tChnAttr);
 
         pCam->nDevId                  = 0;
         pCam->nRxDev                  = 0;
         pCam->tSnsClkAttr.nSnsClkIdx  = 0;
         pCam->tDevBindPipe.nNum       = 1;
         pCam->tDevBindPipe.nPipeId[0] = pCam->nPipeId;
-        pCam->ptSnsHdl[pCam->nPipeId] = COMMON_ISP_GetSnsObj(eSnsType);
+        pCam->ptSnsHdl[pCam->nPipeId] = CUSTOM_COMMON_ISP_GetSnsObj(eSnsType);
         pCam->eBusType                = COMMON_ISP_GetSnsBusType(eSnsType);
         pCam->eLoadRawNode            = eLoadRawNode;
         pCam->eInputMode              = AX_INPUT_MODE_MIPI;
@@ -452,13 +485,13 @@ static AX_U32 __sample_case_single_dvp(AX_CAMERA_T *pCamList, SAMPLE_SNS_TYPE_E
     pCam                       = &pCamList[0];
     pCam->nPipeId              = 0;
     CUSTOM_COMMON_VIN_GetSnsConfig(eSnsType, &pCam->tMipiAttr, &pCam->tSnsAttr, &pCam->tSnsClkAttr, &pCam->tDevAttr,
-                            &pCam->tPipeAttr[pCam->nPipeId], pCam->tChnAttr);
+                                   &pCam->tPipeAttr[pCam->nPipeId], pCam->tChnAttr);
     pCam->nDevId                  = 0;
     pCam->nRxDev                  = 0;
     pCam->tSnsClkAttr.nSnsClkIdx  = 0;
     pCam->tDevBindPipe.nNum       = 1;
     pCam->tDevBindPipe.nPipeId[0] = pCam->nPipeId;
-    pCam->ptSnsHdl[pCam->nPipeId] = COMMON_ISP_GetSnsObj(eSnsType);
+    pCam->ptSnsHdl[pCam->nPipeId] = CUSTOM_COMMON_ISP_GetSnsObj(eSnsType);
     pCam->eBusType                = COMMON_ISP_GetSnsBusType(eSnsType);
     pCam->eInputMode              = AX_INPUT_MODE_DVP;
     __set_pipe_hdr_mode(&pCam->tDevBindPipe.nHDRSel[0], eHdrMode);
@@ -482,13 +515,13 @@ static AX_U32 __sample_case_single_bt656(AX_CAMERA_T *pCamList, SAMPLE_SNS_TYPE_
     pCam                       = &pCamList[0];
     pCam->nPipeId              = 2;
     CUSTOM_COMMON_VIN_GetSnsConfig(eSnsType, &pCam->tMipiAttr, &pCam->tSnsAttr, &pCam->tSnsClkAttr, &pCam->tDevAttr,
-                            &pCam->tPipeAttr[pCam->nPipeId], pCam->tChnAttr);
+                                   &pCam->tPipeAttr[pCam->nPipeId], pCam->tChnAttr);
     pCam->nDevId                                 = 2;
     pCam->nRxDev                                 = 2;
     pCam->tSnsClkAttr.nSnsClkIdx                 = 0;
     pCam->tDevBindPipe.nNum                      = 1;
     pCam->tDevBindPipe.nPipeId[0]                = pCam->nPipeId;
-    pCam->ptSnsHdl[pCam->nPipeId]                = COMMON_ISP_GetSnsObj(eSnsType);
+    pCam->ptSnsHdl[pCam->nPipeId]                = CUSTOM_COMMON_ISP_GetSnsObj(eSnsType);
     pCam->eBusType                               = COMMON_ISP_GetSnsBusType(eSnsType);
     pCam->eInputMode                             = AX_INPUT_MODE_BT656;
     pCam->tPipeAttr[pCam->nPipeId].ePipeWorkMode = AX_VIN_PIPE_ISP_BYPASS_MODE;
@@ -510,13 +543,13 @@ static AX_U32 __sample_case_single_bt1120(AX_CAMERA_T *pCamList, SAMPLE_SNS_TYPE
     pCam                       = &pCamList[0];
     pCam->nPipeId              = 0;
     CUSTOM_COMMON_VIN_GetSnsConfig(eSnsType, &pCam->tMipiAttr, &pCam->tSnsAttr, &pCam->tSnsClkAttr, &pCam->tDevAttr,
-                            &pCam->tPipeAttr[pCam->nPipeId], pCam->tChnAttr);
+                                   &pCam->tPipeAttr[pCam->nPipeId], pCam->tChnAttr);
     pCam->nDevId                                 = 2;
     pCam->nRxDev                                 = 2;
     pCam->tSnsClkAttr.nSnsClkIdx                 = 0;
     pCam->tDevBindPipe.nNum                      = 1;
     pCam->tDevBindPipe.nPipeId[0]                = pCam->nPipeId;
-    pCam->ptSnsHdl[pCam->nPipeId]                = COMMON_ISP_GetSnsObj(eSnsType);
+    pCam->ptSnsHdl[pCam->nPipeId]                = CUSTOM_COMMON_ISP_GetSnsObj(eSnsType);
     pCam->eBusType                               = COMMON_ISP_GetSnsBusType(eSnsType);
     pCam->eInputMode                             = AX_INPUT_MODE_BT1120;
     pCam->tPipeAttr[pCam->nPipeId].ePipeWorkMode = AX_VIN_PIPE_ISP_BYPASS_MODE;
@@ -538,13 +571,13 @@ static AX_U32 __sample_case_single_lvds(AX_CAMERA_T *pCamList, SAMPLE_SNS_TYPE_E
     pCam                       = &pCamList[0];
     pCam->nPipeId              = 0;
     CUSTOM_COMMON_VIN_GetSnsConfig(eSnsType, &pCam->tMipiAttr, &pCam->tSnsAttr, &pCam->tSnsClkAttr, &pCam->tDevAttr,
-                            &pCam->tPipeAttr[pCam->nPipeId], pCam->tChnAttr);
+                                   &pCam->tPipeAttr[pCam->nPipeId], pCam->tChnAttr);
     pCam->nDevId                  = 0;
     pCam->nRxDev                  = 0;
     pCam->tSnsClkAttr.nSnsClkIdx  = 0;
     pCam->tDevBindPipe.nNum       = 1;
     pCam->tDevBindPipe.nPipeId[0] = pCam->nPipeId;
-    pCam->ptSnsHdl[pCam->nPipeId] = COMMON_ISP_GetSnsObj(eSnsType);
+    pCam->ptSnsHdl[pCam->nPipeId] = CUSTOM_COMMON_ISP_GetSnsObj(eSnsType);
     pCam->eBusType                = COMMON_ISP_GetSnsBusType(eSnsType);
     pCam->eInputMode              = AX_INPUT_MODE_LVDS;
     __set_pipe_hdr_mode(&pCam->tDevBindPipe.nHDRSel[0], eHdrMode);
@@ -570,14 +603,14 @@ static AX_U32 __sample_case_single_os04a10(AX_CAMERA_T *pCamList, SAMPLE_SNS_TYP
     pCam                                = &pCamList[0];
     pCam->nPipeId                       = 0;
     CUSTOM_COMMON_VIN_GetSnsConfig(eSnsType, &pCam->tMipiAttr, &pCam->tSnsAttr, &pCam->tSnsClkAttr, &pCam->tDevAttr,
-                            &pCam->tPipeAttr[pCam->nPipeId], pCam->tChnAttr);
+                                   &pCam->tPipeAttr[pCam->nPipeId], pCam->tChnAttr);
     pCam->nDevId                  = 0;
     pCam->nRxDev                  = 0;
     pCam->tSnsClkAttr.nSnsClkIdx  = 0;
     pCam->tDevBindPipe.nNum       = 1;
     pCam->tDevBindPipe.nPipeId[0] = pCam->nPipeId;
     pCam->eLoadRawNode            = eLoadRawNode;
-    pCam->ptSnsHdl[pCam->nPipeId] = COMMON_ISP_GetSnsObj(eSnsType);
+    pCam->ptSnsHdl[pCam->nPipeId] = CUSTOM_COMMON_ISP_GetSnsObj(eSnsType);
     pCam->eBusType                = COMMON_ISP_GetSnsBusType(eSnsType);
     pCam->eInputMode              = AX_INPUT_MODE_MIPI;
     __set_pipe_hdr_mode(&pCam->tDevBindPipe.nHDRSel[0], eHdrMode);
@@ -602,14 +635,14 @@ static AX_U32 __sample_case_single_sc850sl(AX_CAMERA_T *pCamList, SAMPLE_SNS_TYP
     pCam                                = &pCamList[0];
     pCam->nPipeId                       = 0;
     CUSTOM_COMMON_VIN_GetSnsConfig(eSnsType, &pCam->tMipiAttr, &pCam->tSnsAttr, &pCam->tSnsClkAttr, &pCam->tDevAttr,
-                            &pCam->tPipeAttr[pCam->nPipeId], pCam->tChnAttr);
+                                   &pCam->tPipeAttr[pCam->nPipeId], pCam->tChnAttr);
     pCam->nDevId                  = 0;
     pCam->nRxDev                  = 0;
     pCam->tSnsClkAttr.nSnsClkIdx  = 0;
     pCam->tDevBindPipe.nNum       = 1;
     pCam->tDevBindPipe.nPipeId[0] = pCam->nPipeId;
     pCam->eLoadRawNode            = eLoadRawNode;
-    pCam->ptSnsHdl[pCam->nPipeId] = COMMON_ISP_GetSnsObj(eSnsType);
+    pCam->ptSnsHdl[pCam->nPipeId] = CUSTOM_COMMON_ISP_GetSnsObj(eSnsType);
     pCam->eBusType                = COMMON_ISP_GetSnsBusType(eSnsType);
     pCam->eInputMode              = AX_INPUT_MODE_MIPI;
     __set_pipe_hdr_mode(&pCam->tDevBindPipe.nHDRSel[0], eHdrMode);
@@ -619,8 +652,7 @@ static AX_U32 __sample_case_single_sc850sl(AX_CAMERA_T *pCamList, SAMPLE_SNS_TYP
         pCam->tPipeInfo[j].bAiispEnable = pVinParam->bAiispEnable;
         if (pCam->tPipeInfo[j].bAiispEnable) {
             if (eHdrMode <= AX_SNS_LINEAR_MODE) {
-                strncpy(pCam->tPipeInfo[j].szBinPath, "/opt/etc/sc850sl_sdr.bin",
-                        sizeof(pCam->tPipeInfo[j].szBinPath));
+                strncpy(pCam->tPipeInfo[j].szBinPath, "/opt/etc/sc850sl_sdr.bin", sizeof(pCam->tPipeInfo[j].szBinPath));
             } else {
                 strncpy(pCam->tPipeInfo[j].szBinPath, "/opt/etc/sc850sl_hdr_2x.bin",
                         sizeof(pCam->tPipeInfo[j].szBinPath));
@@ -646,7 +678,7 @@ static AX_U32 __sample_case_double_os04a10(AX_CAMERA_T *pCamList, SAMPLE_SNS_TYP
         pCam->nNumber = i;
         pCam->nPipeId = i;
         CUSTOM_COMMON_VIN_GetSnsConfig(eSnsType, &pCam->tMipiAttr, &pCam->tSnsAttr, &pCam->tSnsClkAttr, &pCam->tDevAttr,
-                                &pCam->tPipeAttr[pCam->nPipeId], pCam->tChnAttr);
+                                       &pCam->tPipeAttr[pCam->nPipeId], pCam->tChnAttr);
 
         pCam->nDevId = i;
         if (i == 0) {
@@ -659,7 +691,7 @@ static AX_U32 __sample_case_double_os04a10(AX_CAMERA_T *pCamList, SAMPLE_SNS_TYP
         pCam->tSnsClkAttr.nSnsClkIdx  = 0;
         pCam->tDevBindPipe.nNum       = 1;
         pCam->tDevBindPipe.nPipeId[0] = pCam->nPipeId;
-        pCam->ptSnsHdl[pCam->nPipeId] = COMMON_ISP_GetSnsObj(eSnsType);
+        pCam->ptSnsHdl[pCam->nPipeId] = CUSTOM_COMMON_ISP_GetSnsObj(eSnsType);
         pCam->eBusType                = COMMON_ISP_GetSnsBusType(eSnsType);
         if (eHdrMode == AX_SNS_LINEAR_MODE)
             pCam->tSnsAttr.nSettingIndex = 33;
@@ -694,14 +726,14 @@ static AX_U32 __sample_case_single_sc450ai(AX_CAMERA_T *pCamList, SAMPLE_SNS_TYP
     pCam          = &pCamList[0];
     pCam->nPipeId = 0;
     CUSTOM_COMMON_VIN_GetSnsConfig(eSnsType, &pCam->tMipiAttr, &pCam->tSnsAttr, &pCam->tSnsClkAttr, &pCam->tDevAttr,
-                            &pCam->tPipeAttr[pCam->nPipeId], pCam->tChnAttr);
+                                   &pCam->tPipeAttr[pCam->nPipeId], pCam->tChnAttr);
     pCam->nDevId                  = 0;
     pCam->nRxDev                  = 0;
     pCam->tSnsClkAttr.nSnsClkIdx  = 0;
     pCam->tDevBindPipe.nNum       = 1;
     pCam->eLoadRawNode            = eLoadRawNode;
     pCam->tDevBindPipe.nPipeId[0] = pCam->nPipeId;
-    pCam->ptSnsHdl[pCam->nPipeId] = COMMON_ISP_GetSnsObj(eSnsType);
+    pCam->ptSnsHdl[pCam->nPipeId] = CUSTOM_COMMON_ISP_GetSnsObj(eSnsType);
     pCam->eBusType                = COMMON_ISP_GetSnsBusType(eSnsType);
     pCam->eInputMode              = AX_INPUT_MODE_MIPI;
     __set_pipe_hdr_mode(&pCam->tDevBindPipe.nHDRSel[0], eHdrMode);
@@ -738,7 +770,7 @@ static AX_U32 __sample_case_double_sc450ai(AX_CAMERA_T *pCamList, SAMPLE_SNS_TYP
         pCam->nNumber = i;
         pCam->nPipeId = i;
         CUSTOM_COMMON_VIN_GetSnsConfig(eSnsType, &pCam->tMipiAttr, &pCam->tSnsAttr, &pCam->tSnsClkAttr, &pCam->tDevAttr,
-                                &pCam->tPipeAttr[pCam->nPipeId], pCam->tChnAttr);
+                                       &pCam->tPipeAttr[pCam->nPipeId], pCam->tChnAttr);
 
         pCam->nDevId = i;
         if (i == 0) {
@@ -751,7 +783,7 @@ static AX_U32 __sample_case_double_sc450ai(AX_CAMERA_T *pCamList, SAMPLE_SNS_TYP
         pCam->tSnsClkAttr.nSnsClkIdx  = 0;
         pCam->tDevBindPipe.nNum       = 1;
         pCam->tDevBindPipe.nPipeId[0] = pCam->nPipeId;
-        pCam->ptSnsHdl[pCam->nPipeId] = COMMON_ISP_GetSnsObj(eSnsType);
+        pCam->ptSnsHdl[pCam->nPipeId] = CUSTOM_COMMON_ISP_GetSnsObj(eSnsType);
         pCam->eBusType                = COMMON_ISP_GetSnsBusType(eSnsType);
         if (eHdrMode == AX_SNS_LINEAR_MODE)
             pCam->tSnsAttr.nSettingIndex = 33;
@@ -804,7 +836,7 @@ static AX_U32 __sample_case_double_os04a10_and_bt656(AX_CAMERA_T *pCamList, SAMP
         pCam->nNumber = i;
         pCam->nPipeId = i;
         CUSTOM_COMMON_VIN_GetSnsConfig(eSnsType, &pCam->tMipiAttr, &pCam->tSnsAttr, &pCam->tSnsClkAttr, &pCam->tDevAttr,
-                                &pCam->tPipeAttr[pCam->nPipeId], pCam->tChnAttr);
+                                       &pCam->tPipeAttr[pCam->nPipeId], pCam->tChnAttr);
 
         pCam->nDevId = i;
         pCam->nRxDev = i;
@@ -816,7 +848,7 @@ static AX_U32 __sample_case_double_os04a10_and_bt656(AX_CAMERA_T *pCamList, SAMP
         pCam->tSnsClkAttr.nSnsClkIdx  = 0;
         pCam->tDevBindPipe.nNum       = 1;
         pCam->tDevBindPipe.nPipeId[0] = pCam->nPipeId;
-        pCam->ptSnsHdl[pCam->nPipeId] = COMMON_ISP_GetSnsObj(eSnsType);
+        pCam->ptSnsHdl[pCam->nPipeId] = CUSTOM_COMMON_ISP_GetSnsObj(eSnsType);
         pCam->eBusType                = COMMON_ISP_GetSnsBusType(eSnsType);
         if (eHdrMode == AX_SNS_LINEAR_MODE)
             pCam->tSnsAttr.nSettingIndex = 33;
@@ -861,14 +893,14 @@ static AX_U32 __sample_case_single_s5kjn1sq03(AX_CAMERA_T *pCamList, SAMPLE_SNS_
     pCam                                = &pCamList[0];
     pCam->nPipeId                       = 0;
     CUSTOM_COMMON_VIN_GetSnsConfig(eSnsType, &pCam->tMipiAttr, &pCam->tSnsAttr, &pCam->tSnsClkAttr, &pCam->tDevAttr,
-                            &pCam->tPipeAttr[pCam->nPipeId], pCam->tChnAttr);
+                                   &pCam->tPipeAttr[pCam->nPipeId], pCam->tChnAttr);
     pCam->nDevId                  = 0;
     pCam->nRxDev                  = 0;
     pCam->tSnsClkAttr.nSnsClkIdx  = 0;
     pCam->tDevBindPipe.nNum       = 1;
     pCam->tDevBindPipe.nPipeId[0] = pCam->nPipeId;
     pCam->eLoadRawNode            = eLoadRawNode;
-    pCam->ptSnsHdl[pCam->nPipeId] = COMMON_ISP_GetSnsObj(eSnsType);
+    pCam->ptSnsHdl[pCam->nPipeId] = CUSTOM_COMMON_ISP_GetSnsObj(eSnsType);
     pCam->eBusType                = COMMON_ISP_GetSnsBusType(eSnsType);
     pCam->eInputMode              = AX_INPUT_MODE_MIPI;
     __set_pipe_hdr_mode(&pCam->tDevBindPipe.nHDRSel[0], eHdrMode);
@@ -881,12 +913,8 @@ static AX_U32 __sample_case_single_s5kjn1sq03(AX_CAMERA_T *pCamList, SAMPLE_SNS_
     return 0;
 }
 
-
-
-
-
-static AX_U32 __sample_case_config(AX_CAMERA_T *gCams, SAMPLE_VIN_PARAM_T *pVinParam,
-                                   COMMON_SYS_ARGS_T *pCommonArgs, COMMON_SYS_ARGS_T *pPrivArgs)
+static AX_U32 __sample_case_config(AX_CAMERA_T *gCams, SAMPLE_VIN_PARAM_T *pVinParam, COMMON_SYS_ARGS_T *pCommonArgs,
+                                   COMMON_SYS_ARGS_T *pPrivArgs)
 {
     AX_CAMERA_T *pCamList      = gCams;
     SAMPLE_SNS_TYPE_E eSnsType = OMNIVISION_OS04A10;
@@ -1067,20 +1095,20 @@ static AX_U32 __sample_case_config(AX_CAMERA_T *gCams, SAMPLE_VIN_PARAM_T *pVinP
             /* cams config */
             __sample_case_single_sc450ai(pCamList, eSnsType, pVinParam, pCommonArgs);
             break;
-            case SAMPLE_VIN_SINGLE_SC850SL:
-                eSnsType = SMARTSENS_SC850SL;
-                /* comm pool config */
-                __cal_dump_pool(gtSysCommPoolSingleSc850SlSdr, pVinParam->eHdrMode, pVinParam->nDumpFrameNum);
-                pCommonArgs->nPoolCfgCnt = sizeof(gtSysCommPoolSingleSc850SlSdr) / sizeof(gtSysCommPoolSingleSc850SlSdr[0]);
-                pCommonArgs->pPoolCfg    = gtSysCommPoolSingleSc850SlSdr;
-
-                /* private pool config */
-                __cal_dump_pool(gtPrivatePoolSingleSc850SlSdr, pVinParam->eHdrMode, pVinParam->nDumpFrameNum);
-                pPrivArgs->nPoolCfgCnt = sizeof(gtPrivatePoolSingleSc850SlSdr) / sizeof(gtPrivatePoolSingleSc850SlSdr[0]);
-                pPrivArgs->pPoolCfg    = gtPrivatePoolSingleSc850SlSdr;
-
-                /* cams config */
-                __sample_case_single_sc850sl(pCamList, eSnsType, pVinParam, pCommonArgs);
+        case SAMPLE_VIN_SINGLE_SC850SL:
+            eSnsType = SMARTSENS_SC850SL;
+            /* comm pool config */
+            __cal_dump_pool(gtSysCommPoolSingleSc850SlSdr, pVinParam->eHdrMode, pVinParam->nDumpFrameNum);
+            pCommonArgs->nPoolCfgCnt = sizeof(gtSysCommPoolSingleSc850SlSdr) / sizeof(gtSysCommPoolSingleSc850SlSdr[0]);
+            pCommonArgs->pPoolCfg    = gtSysCommPoolSingleSc850SlSdr;
+
+            /* private pool config */
+            __cal_dump_pool(gtPrivatePoolSingleSc850SlSdr, pVinParam->eHdrMode, pVinParam->nDumpFrameNum);
+            pPrivArgs->nPoolCfgCnt = sizeof(gtPrivatePoolSingleSc850SlSdr) / sizeof(gtPrivatePoolSingleSc850SlSdr[0]);
+            pPrivArgs->pPoolCfg    = gtPrivatePoolSingleSc850SlSdr;
+
+            /* cams config */
+            __sample_case_single_sc850sl(pCamList, eSnsType, pVinParam, pCommonArgs);
             break;
         case SAMPLE_VIN_DOUBLE_OS04A10_AND_BT656:
             /* comm pool config */
@@ -1165,9 +1193,15 @@ struct axera_camera_t {
     AX_IMG_INFO_T ax_img;
     AX_VIDEO_FRAME_T out_img;
     int Chn;
-} axera_obj; 
-
-static int camera_capture_callback_set(struct camera_t* camera, vcamera_frame_get pcallback)
+    AX_VENC_CHN_ATTR_T stVencChnAttr;
+    AX_IVPS_PIPELINE_ATTR_T stPipelineAttr;
+    AX_RTSP_HANDLE pRtspHandle;
+    AX_RTSP_ATTR_T stRtspAttr[MAX_RTSP_MAX_CHANNEL_NUM];
+    pthread_t venc_thread_id_;
+    int venc_run_;
+} axera_obj = {0};
+
+static int camera_capture_callback_set(struct camera_t *camera, vcamera_frame_get pcallback)
 {
     if (camera->state_ == CAMERA_SATTE_CAP) {
         SLOGW("Set capture callback failed");
@@ -1177,10 +1211,10 @@ static int camera_capture_callback_set(struct camera_t* camera, vcamera_frame_ge
     return 0;
 }
 
-static void* camera_capture_thread(void* param)
+static void *camera_capture_thread(void *param)
 {
     int Ret          = -1;
-    camera_t* camera = (camera_t*)param;
+    camera_t *camera = (camera_t *)param;
     struct v4l2_buffer EnQueueBuf;
     struct v4l2_buffer DeQueueBuf;
 
@@ -1188,21 +1222,20 @@ static void* camera_capture_thread(void* param)
 
     while (camera->state_ & AX_SENSOR_GET_FRAME_THREAD) {
         AX_S32 axRet = AX_VIN_GetYuvFrame(axera_obj.gCams.nPipeId, axera_obj.Chn, &axera_obj.ax_img, 500);
-        if (axRet == 0)
-        {
-            // axera_obj.ax_img.tFrameInfo.stVFrame.u64VirAddr[0] = (AX_U64)AX_POOL_GetBlockVirAddr(axera_obj.ax_img.tFrameInfo.stVFrame.u32BlkId[0]);
-            // axera_obj.ax_img.tFrameInfo.stVFrame.u64PhyAddr[0] = AX_POOL_Handle2PhysAddr(axera_obj.ax_img.tFrameInfo.stVFrame.u32BlkId[0]);
-            // AX_S32 AX_IVPS_CropResizeTdp(const AX_VIDEO_FRAME_T *ptSrc, AX_VIDEO_FRAME_T *ptDst,
+        if (axRet == 0) {
+            // axera_obj.ax_img.tFrameInfo.stVFrame.u64VirAddr[0] =
+            // (AX_U64)AX_POOL_GetBlockVirAddr(axera_obj.ax_img.tFrameInfo.stVFrame.u32BlkId[0]);
+            // axera_obj.ax_img.tFrameInfo.stVFrame.u64PhyAddr[0] =
+            // AX_POOL_Handle2PhysAddr(axera_obj.ax_img.tFrameInfo.stVFrame.u32BlkId[0]); AX_S32
+            // AX_IVPS_CropResizeTdp(const AX_VIDEO_FRAME_T *ptSrc, AX_VIDEO_FRAME_T *ptDst,
             //     const AX_IVPS_CROP_RESIZE_ATTR_T *ptAttr);
-            AX_IVPS_CROP_RESIZE_ATTR_T tAttr = {0};
-            AX_IVPS_CropResizeTdp(&axera_obj.ax_img.tFrameInfo.stVFrame, &axera_obj.out_img, &tAttr);
-            AX_VIN_ReleaseYuvFrame(axera_obj.gCams.nPipeId, axera_obj.Chn, &axera_obj.ax_img);
-            camera->pcallback_((void*)axera_obj.out_img.u64VirAddr[0], axera_obj.out_img.u32Width, axera_obj.out_img.u32Height,
-                axera_obj.out_img.u32FrameSize, camera->ctx_);
-        }
-        else
-        {
-            // ALOGD("get ax img error! code:0x%x", axRet);
+            // AX_IVPS_CROP_RESIZE_ATTR_T tAttr = {0};
+            // AX_IVPS_CropResizeTdp(&axera_obj.ax_img.tFrameInfo.stVFrame, &axera_obj.out_img, &tAttr);
+            // AX_VIN_ReleaseYuvFrame(axera_obj.gCams.nPipeId, axera_obj.Chn, &axera_obj.ax_img);
+            // camera->pcallback_((void *)axera_obj.out_img.u64VirAddr[0], axera_obj.out_img.u32Width,
+            //                    axera_obj.out_img.u32Height, axera_obj.out_img.u32FrameSize, camera->ctx_);
+        } else {
+            SLOGI("get ax img error! code:0x%x", axRet);
             usleep(10 * 1000);
         }
     }
@@ -1212,7 +1245,49 @@ static void* camera_capture_thread(void* param)
     return NULL;
 }
 
-static int camera_capture_start(struct camera_t* camera)
+static AX_VOID *IVPS_GetFrameThread(AX_VOID *pArg)
+{
+    AX_S32 ret = 0;
+    AX_VIDEO_FRAME_T tSrcFrame;
+    AX_BLK BlkId;
+    struct camera_t *camera = (struct camera_t *)pArg;
+    SLOGI("IVPS Grp: %d, Chn: %d", 0, USER_OUTPUT_CHN - 1);
+    uint64_t totalGetStream = 0;
+    while (camera->state_ & AX_SENSOR_GET_FRAME_THREAD) {
+        // sleep(1);
+        ret = AX_IVPS_GetChnFrame(0, USER_OUTPUT_CHN - 1, &tSrcFrame, 1000);
+        if (ret) {
+            /* reach EOF */
+            SLOGI("[WARN] CHN[%d] is empty ret:0x%x\n", 2, ret);
+            usleep(10000);
+            continue;
+        }
+
+        // SLOGI(
+        //     "AX_IVPS_GetChnFrame(%lld): Chn:%d, (%d x %d) Stride:%d, FrameSize:%d, Phy:%llx, UserData:%llx, PTS:%llx,
+        //     " "BlockId:%x\n", tSrcFrame.u64SeqNum, USER_OUTPUT_CHN-1, tSrcFrame.u32Width, tSrcFrame.u32Height,
+        //     tSrcFrame.u32PicStride[0], tSrcFrame.u32FrameSize, tSrcFrame.u64PhyAddr[0], tSrcFrame.u64UserData,
+        //     tSrcFrame.u64PTS, tSrcFrame.u32BlkId[0]);
+        void *pFrame = (void *)AX_POOL_GetBlockVirAddr(tSrcFrame.u32BlkId[0]);
+        camera->pcallback_(pFrame, tSrcFrame.u32Width, tSrcFrame.u32Height,
+                           tSrcFrame.u32Width * tSrcFrame.u32Height * 2, camera->ctx_);
+        if ((totalGetStream % 60) == 0) {
+            memset(pFrame, 128, tSrcFrame.u32Width * tSrcFrame.u32Height * 2);
+        }
+        ret = AX_IVPS_ReleaseChnFrame(0, USER_OUTPUT_CHN - 1, &tSrcFrame);
+        if (ret) {
+            SLOGI("AX_IVPS_ReleaseFrame fail, ret=0x%x", ret);
+            usleep(10000);
+            continue;
+        }
+        // SLOGI("AX_IVPS_GetFrame");
+        totalGetStream++;
+    }
+    SLOGI("IVPS END");
+    return NULL;
+}
+
+static int camera_capture_start(struct camera_t *camera)
 {
     SLOGI("Start capture thread");
     if (!camera->pcallback_) {
@@ -1224,12 +1299,11 @@ static int camera_capture_start(struct camera_t* camera)
         return -1;
     }
     camera->state_ |= AX_SENSOR_GET_FRAME_THREAD;
-    pthread_create(&camera->capture_thread_id_, NULL, camera_capture_thread, camera);
-
+    pthread_create(&camera->capture_thread_id_, NULL, IVPS_GetFrameThread, camera);
     return 0;
 }
 
-static int camera_capture_stop(struct camera_t* camera)
+static int camera_capture_stop(struct camera_t *camera)
 {
     SLOGI("Stop capture thread");
     camera->state_ &= ~((int)AX_SENSOR_GET_FRAME_THREAD);
@@ -1238,12 +1312,301 @@ static int camera_capture_stop(struct camera_t* camera)
     return 0;
 }
 
-static void camera_set_ctx(struct camera_t* camera, void* ctx)
+static void camera_set_ctx(struct camera_t *camera, void *ctx)
 {
     camera->ctx_ = ctx;
 }
 
-int axera_camera_open_from(camera_t* camera)
+/* venc get stream task */
+static void *VencGetStreamProc(void *arg)
+{
+    AX_S32 s32Ret = -1;
+    AX_VENC_RECV_PIC_PARAM_T stRecvParam;
+    AX_VENC_STREAM_T stStream = {0};
+    AX_S16 syncType           = -1;
+    FILE *pStrm               = NULL;
+    int totalGetStream        = 0;
+    s32Ret                    = AX_VENC_StartRecvFrame(RTSP_OUTPUT_CHN - 1, &stRecvParam);
+    if (AX_SUCCESS != s32Ret) {
+        SLOGI("AX_VENC_StartRecvFrame failed, s32Ret:0x%x", s32Ret);
+        return NULL;
+    }
+    while (axera_obj.venc_run_) {
+        s32Ret = AX_VENC_GetStream(RTSP_OUTPUT_CHN - 1, &stStream, 1000);
+        if (AX_SUCCESS == s32Ret) {
+            totalGetStream++;
+            /* Send to RTSP */
+            AX_BOOL bIFrame = (AX_VENC_INTRA_FRAME == stStream.stPack.enCodingType) ? AX_TRUE : AX_FALSE;
+            AX_Rtsp_SendNalu(axera_obj.pRtspHandle, 0, stStream.stPack.pu8Addr, stStream.stPack.u32Len,
+                             stStream.stPack.u64PTS, bIFrame);
+            // SLOGI("VencChn %d: u64PTS:%lld pu8Addr:%p u32Len:%d enCodingType:%d", 2, stStream.stPack.u64PTS,
+            //       stStream.stPack.pu8Addr, stStream.stPack.u32Len, stStream.stPack.enCodingType);
+            s32Ret = AX_VENC_ReleaseStream(RTSP_OUTPUT_CHN - 1, &stStream);
+            if (AX_SUCCESS != s32Ret) {
+                SLOGE("VencChn %d: AX_VENC_ReleaseStream failed!s32Ret:0x%x", 0, s32Ret);
+                usleep(10000);
+                continue;
+            }
+        } else if (AX_ERR_VENC_FLOW_END == s32Ret) {
+            SLOGE("VencChn %d: AX_VENC_GetStream end flow,exit!", 0);
+            usleep(10000);
+            continue;
+        }
+    }
+EXIT:
+    s32Ret = AX_VENC_StopRecvFrame(RTSP_OUTPUT_CHN - 1);
+    if (0 != s32Ret) {
+        SLOGE("VencChn %d:AX_VENC_StopRecvFrame failed,s32Ret:0x%x", 0, s32Ret);
+        return NULL;
+    }
+    SLOGI("VencChn %d: Total get %u encoded frames. getStream Exit!", 0, totalGetStream);
+    return NULL;
+}
+
+static AX_S32 SAMPLE_VENC_Init()
+{
+    AX_S32 VencChn = 0, s32Ret = 0;
+    AX_VENC_MOD_ATTR_T stModAttr = {
+        .enVencType                     = AX_VENC_MULTI_ENCODER,
+        .stModThdAttr.u32TotalThreadNum = 1,
+        .stModThdAttr.bExplicitSched    = AX_FALSE,
+    };
+    s32Ret = AX_VENC_Init(&stModAttr);
+    if (AX_SUCCESS != s32Ret) {
+        SLOGE("AX_VENC_Init failed, s32Ret:0x%x", s32Ret);
+        return s32Ret;
+    }
+    AX_S32 ret = AX_VENC_CreateChn(VencChn, &axera_obj.stVencChnAttr);
+    if (AX_SUCCESS != ret) {
+        SLOGE("VencChn %d: AX_VENC_CreateChn failed, s32Ret:0x%x", VencChn, ret);
+        return -1;
+    }
+    AX_MOD_INFO_T srcMod, dstMod;
+    srcMod.enModId  = AX_ID_IVPS;
+    srcMod.s32GrpId = 0;
+    srcMod.s32ChnId = RTSP_OUTPUT_CHN - 1;
+    dstMod.enModId  = AX_ID_VENC;
+    dstMod.s32GrpId = 0;
+    dstMod.s32ChnId = RTSP_OUTPUT_CHN - 1;
+    AX_SYS_Link(&srcMod, &dstMod);
+    axera_obj.stRtspAttr[0].nChannel            = 0;
+    axera_obj.stRtspAttr[0].stVideoAttr.bEnable = AX_TRUE;
+    axera_obj.stRtspAttr[0].stVideoAttr.ePt     = axera_obj.stVencChnAttr.stVencAttr.enType;
+    AX_Rtsp_Init(&axera_obj.pRtspHandle, &axera_obj.stRtspAttr[0], 1, 0);
+    AX_Rtsp_Start(axera_obj.pRtspHandle);
+    pthread_create(&axera_obj.venc_thread_id_, NULL, VencGetStreamProc, NULL);
+    SLOGI("RTSP pRtspHandle:%p nChnNum:%d", axera_obj.pRtspHandle, 1);
+    return 0;
+}
+
+static AX_S32 SAMPLE_VENC_DeInit()
+{
+    AX_S32 VencChn = 0, s32Ret = 0, s32Retry = 5;
+
+    if (axera_obj.stVencChnAttr.stVencAttr.enType == PT_PCMU) {
+        return s32Ret;
+    }
+    axera_obj.venc_run_ = 0;
+    pthread_join(axera_obj.venc_thread_id_, NULL);
+    AX_Rtsp_Stop(axera_obj.pRtspHandle);
+    AX_Rtsp_Deinit(axera_obj.pRtspHandle);
+    AX_MOD_INFO_T srcMod, dstMod;
+    srcMod.enModId  = AX_ID_IVPS;
+    srcMod.s32GrpId = 0;
+    srcMod.s32ChnId = RTSP_OUTPUT_CHN - 1;
+    dstMod.enModId  = AX_ID_VENC;
+    dstMod.s32GrpId = 0;
+    dstMod.s32ChnId = RTSP_OUTPUT_CHN - 1;
+    AX_SYS_UnLink(&srcMod, &dstMod);
+
+    s32Retry = 5;
+    do {
+        s32Ret = AX_VENC_DestroyChn(VencChn);
+        if (AX_ERR_VENC_BUSY == s32Ret) {
+            SLOGE("VencChn %d:AX_VENC_DestroyChn return AX_ERR_VENC_BUSY,retry...", VencChn);
+            --s32Retry;
+            usleep(100 * 1000);
+        } else {
+            break;
+        }
+    } while (s32Retry >= 0);
+
+    if (s32Retry == -1 || AX_SUCCESS != s32Ret) {
+        SLOGE("VencChn %d: AX_VENC_DestroyChn failed, s32Retry=%d, s32Ret=0x%x\n", VencChn, s32Retry, s32Ret);
+    }
+    s32Ret = AX_VENC_Deinit();
+    if (AX_SUCCESS != s32Ret) {
+        SLOGE("AX_VENC_Deinit failed, s32Ret=0x%x", s32Ret);
+        return s32Ret;
+    }
+    return 0;
+}
+
+void init_rtsp(AX_VENC_CHN_ATTR_T *stVencChnAttr)
+{
+    axera_obj.stVencChnAttr = *stVencChnAttr;
+    axera_obj.venc_run_     = 1;
+}
+
+static int SAMPLE_IVPS_Init(AX_S32 nGrpId, camera_t *camera)
+{
+    AX_S32 s32Ret                          = 0, nChn;
+    AX_IVPS_GRP_ATTR_T stGrpAttr           = {0};
+    AX_IVPS_PIPELINE_ATTR_T stPipelineAttr = {0};
+
+    s32Ret = AX_IVPS_Init();
+    if (AX_SUCCESS != s32Ret) {
+        SLOGE("AX_IVPS_Init failed,s32Ret:0x%x", s32Ret);
+        return s32Ret;
+    }
+
+    stGrpAttr.nInFifoDepth = 2;
+    stGrpAttr.ePipeline    = AX_IVPS_PIPELINE_DEFAULT;
+    s32Ret                 = AX_IVPS_CreateGrp(nGrpId, &stGrpAttr);
+    if (AX_SUCCESS != s32Ret) {
+        SLOGE("AX_IVPS_CreateGrp failed,nGrp %d,s32Ret:0x%x", nGrpId, s32Ret);
+        return s32Ret;
+    }
+    stPipelineAttr.nOutChnNum = 3;
+    // vin 输入
+    stPipelineAttr.tFilter[0][0].bEngage = AX_TRUE;
+    if (axera_obj.stVencChnAttr.stVencAttr.u32PicWidthSrc > 1920) {
+        stPipelineAttr.tFilter[0][0].nDstPicWidth  = axera_obj.gCams.tChnAttr[0].nWidth;
+        stPipelineAttr.tFilter[0][0].nDstPicHeight = axera_obj.gCams.tChnAttr[0].nHeight;
+        stPipelineAttr.tFilter[0][0].nDstPicStride = axera_obj.gCams.tChnAttr[0].nWidthStride;
+    } else {
+        stPipelineAttr.tFilter[0][0].nDstPicWidth  = 1920;
+        stPipelineAttr.tFilter[0][0].nDstPicHeight = 1080;
+        stPipelineAttr.tFilter[0][0].nDstPicStride = 1920;
+    }
+
+    stPipelineAttr.tFilter[0][0].eDstPicFormat = AX_FORMAT_YUV420_SEMIPLANAR;
+    stPipelineAttr.tFilter[0][0].eEngine       = AX_IVPS_ENGINE_VPP;
+    // rtps输出 chn0
+    stPipelineAttr.tFilter[RTSP_OUTPUT_CHN][0].bEngage = AX_TRUE;
+    if (axera_obj.stVencChnAttr.stVencAttr.enType != PT_PCMU) {
+        stPipelineAttr.tFilter[RTSP_OUTPUT_CHN][0].nDstPicWidth  = axera_obj.stVencChnAttr.stVencAttr.u32PicWidthSrc;
+        stPipelineAttr.tFilter[RTSP_OUTPUT_CHN][0].nDstPicHeight = axera_obj.stVencChnAttr.stVencAttr.u32PicHeightSrc;
+    } else {
+        stPipelineAttr.tFilter[RTSP_OUTPUT_CHN][0].nDstPicWidth  = 1280;
+        stPipelineAttr.tFilter[RTSP_OUTPUT_CHN][0].nDstPicHeight = 720;
+    }
+    stPipelineAttr.tFilter[RTSP_OUTPUT_CHN][0].nDstPicStride =
+        ALIGN_UP(stPipelineAttr.tFilter[RTSP_OUTPUT_CHN][0].nDstPicWidth, 16);
+    stPipelineAttr.tFilter[RTSP_OUTPUT_CHN][0].eDstPicFormat = AX_FORMAT_YUV420_SEMIPLANAR;
+    stPipelineAttr.tFilter[RTSP_OUTPUT_CHN][0].eEngine       = AX_IVPS_ENGINE_VPP;
+
+    // 设备获取 chn2
+    stPipelineAttr.tFilter[USER_OUTPUT_CHN][0].bEngage       = AX_TRUE;
+    stPipelineAttr.tFilter[USER_OUTPUT_CHN][0].nDstPicWidth  = camera->width_;
+    stPipelineAttr.tFilter[USER_OUTPUT_CHN][0].nDstPicHeight = camera->height_;
+    stPipelineAttr.tFilter[USER_OUTPUT_CHN][0].nDstPicStride =
+        ALIGN_UP(stPipelineAttr.tFilter[USER_OUTPUT_CHN][0].nDstPicWidth, 16);
+    stPipelineAttr.tFilter[USER_OUTPUT_CHN][0].eDstPicFormat         = AX_FORMAT_YUV422_INTERLEAVED_YUYV;
+    stPipelineAttr.tFilter[USER_OUTPUT_CHN][0].eEngine               = AX_IVPS_ENGINE_TDP;
+    stPipelineAttr.tFilter[USER_OUTPUT_CHN][0].tTdpCfg.eRotation     = AX_IVPS_ROTATION_0;
+    stPipelineAttr.tFilter[USER_OUTPUT_CHN][0].tAspectRatio.eMode    = AX_IVPS_ASPECT_RATIO_MANUAL;
+    stPipelineAttr.tFilter[USER_OUTPUT_CHN][0].tAspectRatio.tRect.nX = 0;
+    stPipelineAttr.tFilter[USER_OUTPUT_CHN][0].tAspectRatio.tRect.nY =
+        (int)(stPipelineAttr.tFilter[USER_OUTPUT_CHN][0].nDstPicHeight -
+              (((stPipelineAttr.tFilter[USER_OUTPUT_CHN][0].nDstPicWidth * 1.0f) /
+                (stPipelineAttr.tFilter[0][0].nDstPicWidth * 1.0f)) *
+               (stPipelineAttr.tFilter[0][0].nDstPicHeight * 1.0f)));
+    stPipelineAttr.tFilter[USER_OUTPUT_CHN][0].tAspectRatio.tRect.nW = camera->width_;
+    stPipelineAttr.tFilter[USER_OUTPUT_CHN][0].tAspectRatio.tRect.nH = camera->height_;
+    stPipelineAttr.nOutFifoDepth[USER_OUTPUT_CHN - 1]                = 2;
+
+    // // jpeg输出 chn1
+    // memcpy(&stPipelineAttr.tFilter[2][0], &stPipelineAttr.tFilter[3][0], sizeof(AX_IVPS_FILTER_T));
+    // stPipelineAttr.tFilter[2][0].eDstPicFormat      = AX_FORMAT_YUV420_SEMIPLANAR;
+
+#ifdef SAMPLE_IVPS_CROPRESIZE_ENABLE
+    AX_S32 nChnGetId                        = 0;
+    stPipelineAttr.nOutFifoDepth[nChnGetId] = 1;
+#endif
+    s32Ret = AX_IVPS_SetPipelineAttr(nGrpId, &stPipelineAttr);
+    if (AX_SUCCESS != s32Ret) {
+        SLOGE("AX_IVPS_SetPipelineAttr failed,nGrp %d,s32Ret:0x%x", nGrpId, s32Ret);
+        return s32Ret;
+    }
+    for (nChn = 0; nChn < stPipelineAttr.nOutChnNum; nChn++) {
+        s32Ret = AX_IVPS_EnableChn(nGrpId, nChn);
+        if (AX_SUCCESS != s32Ret) {
+            SLOGE("AX_IVPS_EnableChn failed,nGrp %d,nChn %d,s32Ret:0x%x", nGrpId, nChn, s32Ret);
+            return s32Ret;
+        }
+    }
+    s32Ret = AX_IVPS_StartGrp(nGrpId);
+    if (AX_SUCCESS != s32Ret) {
+        SLOGE("AX_IVPS_StartGrp failed,nGrp %d,s32Ret:0x%x", nGrpId, s32Ret);
+        return s32Ret;
+    }
+#ifdef SAMPLE_IVPS_CROPRESIZE_ENABLE
+    s32Ret = IVPS_CropResizeThreadStart(nGrpId, nChnGetId);
+    if (AX_SUCCESS != s32Ret) {
+        SLOGE("IVPS_CropResizeThreadStart failed,nGrp %d,s32Ret:0x%x", nGrpId, s32Ret);
+        return s32Ret;
+    }
+#endif
+    AX_MOD_INFO_T srcMod, dstMod;
+    srcMod.enModId  = AX_ID_VIN;
+    srcMod.s32GrpId = 0;
+    srcMod.s32ChnId = 0;
+
+    dstMod.enModId  = AX_ID_IVPS;
+    dstMod.s32GrpId = nGrpId;
+    dstMod.s32ChnId = 0;
+    AX_SYS_Link(&srcMod, &dstMod);
+    return 0;
+}
+
+static AX_S32 SAMPLE_IVPS_DeInit(AX_S32 nGrpId)
+{
+    AX_S32 s32Ret = 0, nChn = 0;
+
+    AX_MOD_INFO_T srcMod, dstMod;
+    srcMod.enModId  = AX_ID_VIN;
+    srcMod.s32GrpId = 0;
+    srcMod.s32ChnId = 0;
+    dstMod.enModId  = AX_ID_IVPS;
+    dstMod.s32GrpId = nGrpId;
+    dstMod.s32ChnId = 0;
+    AX_SYS_UnLink(&srcMod, &dstMod);
+
+#ifdef SAMPLE_IVPS_CROPRESIZE_ENABLE
+    IVPS_CropResizeThreadStop();
+#endif
+
+    s32Ret = AX_IVPS_StopGrp(nGrpId);
+    if (AX_SUCCESS != s32Ret) {
+        SLOGE("AX_IVPS_StopGrp failed,nGrp %d,s32Ret:0x%x", nGrpId, s32Ret);
+        return s32Ret;
+    }
+
+    for (nChn = 0; nChn < 3; nChn++) {
+        s32Ret = AX_IVPS_DisableChn(nGrpId, nChn);
+        if (AX_SUCCESS != s32Ret) {
+            SLOGE("AX_IVPS_DisableChn failed,nGrp %d,nChn %d,s32Ret:0x%x", nGrpId, nChn, s32Ret);
+            return s32Ret;
+        }
+    }
+
+    s32Ret = AX_IVPS_DestoryGrp(nGrpId);
+    if (AX_SUCCESS != s32Ret) {
+        SLOGE("AX_IVPS_DestoryGrp failed,nGrp %d,s32Ret:0x%x", nGrpId, s32Ret);
+        return s32Ret;
+    }
+
+    s32Ret = AX_IVPS_Deinit();
+    if (AX_SUCCESS != s32Ret) {
+        SLOGE("AX_IVPS_Deinit failed,s32Ret:0x%x", s32Ret);
+        return s32Ret;
+    }
+
+    return 0;
+}
+
+int axera_camera_open_from(camera_t *camera)
 {
     int Ret = -1;
     AX_S32 axRet;
@@ -1255,28 +1618,29 @@ int axera_camera_open_from(camera_t* camera)
         goto ErrorHandle;
     }
     axera_obj.VinParam.eSysCase = SAMPLE_VIN_BUTT;
-    for (int i = 0; i < sizeof(axera_camera_index) / sizeof(axera_camera_index[0]); i++)
-    {
-        if(strcmp(axera_camera_index[i].name, camera->dev_name_) == 0)
-        {
+    for (int i = 0; i < sizeof(axera_camera_index) / sizeof(axera_camera_index[0]); i++) {
+        if (strcmp(axera_camera_index[i].name, camera->dev_name_) == 0) {
             axera_obj.VinParam.eSysCase = axera_camera_index[i].index;
             break;
         }
     }
-    if(axera_obj.VinParam.eSysCase == SAMPLE_VIN_BUTT)
-    {
+    if (axera_obj.VinParam.eSysCase == SAMPLE_VIN_BUTT) {
         SLOGE("Error: camera not support %s", camera->dev_name_);
         return -10;
     }
 
-    axera_obj.VinParam.eSysMode = COMMON_VIN_SENSOR;
-    axera_obj.VinParam.eHdrMode = AX_SNS_LINEAR_MODE;
+    axera_obj.VinParam.eSysMode     = COMMON_VIN_SENSOR;
+    axera_obj.VinParam.eHdrMode     = AX_SNS_LINEAR_MODE;
     axera_obj.VinParam.bAiispEnable = AX_TRUE;
     // axera_obj.gCams.tChnAttr
     __sample_case_config(&axera_obj.gCams, &axera_obj.VinParam, &axera_obj.tCommonArgs, &axera_obj.tPrivArgs);
     COMMON_SYS_Init(&axera_obj.tCommonArgs);
     COMMON_NPU_Init();
-    AX_IVPS_Init();
+    SAMPLE_IVPS_Init(0, camera);
+    if (axera_obj.venc_run_) {
+        SAMPLE_VENC_Init();
+    }
+
     axRet = COMMON_CAM_Init();
     if (axRet) {
         COMM_ISP_PRT("COMMON_CAM_Init fail, ret:0x%x", axRet);
@@ -1296,14 +1660,15 @@ int axera_camera_open_from(camera_t* camera)
         return -3;
     }
     axera_obj.Chn = AX_VIN_CHN_ID_MAIN;
-    camera->state_                      |= AX_SENSOR_CAM_OPEN;
+    camera->state_ |= AX_SENSOR_CAM_OPEN;
 
-    axera_obj.out_img.u32Width  = camera->width_;
-    axera_obj.out_img.u32Height = camera->height_;
-    axera_obj.out_img.u32PicStride[0] = ALIGN_UP(camera->width_, 16);
-    axera_obj.out_img.enImgFormat = AX_FORMAT_YUV420_SEMIPLANAR;
-    axera_obj.out_img.u32FrameSize = camera->width_ * camera->height_ * 3 / 2;
-    AX_SYS_MemAlloc(&axera_obj.out_img.u64PhyAddr[0], (AX_VOID **)&axera_obj.out_img.u64VirAddr[0], ALIGN_UP(axera_obj.out_img.u32FrameSize, 0x100), 0x100, (AX_S8 *)"StackFlow_camera_output_buff");
+    // axera_obj.out_img.u32Width        = camera->width_;
+    // axera_obj.out_img.u32Height       = camera->height_;
+    // axera_obj.out_img.u32PicStride[0] = ALIGN_UP(camera->width_, 16);
+    // axera_obj.out_img.enImgFormat     = AX_FORMAT_YUV420_SEMIPLANAR;
+    // axera_obj.out_img.u32FrameSize    = camera->width_ * camera->height_ * 3 / 2;
+    // AX_SYS_MemAlloc(&axera_obj.out_img.u64PhyAddr[0], (AX_VOID **)&axera_obj.out_img.u64VirAddr[0],
+    //                 ALIGN_UP(axera_obj.out_img.u32FrameSize, 0x100), 0x100, (AX_S8 *)"StackFlowCameraBuff");
 
     camera->camera_capture_callback_set = camera_capture_callback_set;
     camera->camera_capture_start        = camera_capture_start;
@@ -1317,10 +1682,10 @@ int axera_camera_open_from(camera_t* camera)
     return -1;
 }
 
-camera_t* axera_camera_open(const char* pdev_name, int width, int height, int fps)
+camera_t *axera_camera_open(const char *pdev_name, int width, int height, int fps)
 {
     int Ret          = -1;
-    camera_t* camera = (camera_t*)malloc(sizeof(camera_t));
+    camera_t *camera = (camera_t *)malloc(sizeof(camera_t));
     if (camera == NULL) return NULL;
     memset(camera, 0, sizeof(camera_t));
     camera->buffer_cnt_ = CONFIG_CAPTURE_BUF_CNT;
@@ -1342,6 +1707,7 @@ camera_t* axera_camera_open(const char* pdev_name, int width, int height, int fp
         goto ErrorHandle;
     }
     camera->is_alloc_ = 1;
+    SLOGI("camera %s open success", camera->dev_name_);
     return camera;
 
 ErrorHandle:
@@ -1350,20 +1716,21 @@ camera_t* axera_camera_open(const char* pdev_name, int width, int height, int fp
     return NULL;
 }
 
-int axera_camera_close(camera_t* camera)
+int axera_camera_close(camera_t *camera)
 {
     if (camera == NULL) return -1;
     if (camera->state_ & AX_SENSOR_CAM_OPEN) {
         COMMON_CAM_Close(&axera_obj.gCams, axera_obj.tCommonArgs.nCamCnt);
         camera->state_ &= ~((int)AX_SENSOR_CAM_OPEN);
     }
-    
+
     if (camera->state_ & AX_SENSOR_CAM_ENABLE) {
         COMMON_CAM_Deinit();
         camera->state_ &= ~((int)AX_SENSOR_CAM_ENABLE);
     }
     camera->state_ = AX_SENSOR_NONT;
-    AX_IVPS_Deinit();
+    SAMPLE_IVPS_DeInit(0);
+    SAMPLE_VENC_DeInit();
     COMMON_SYS_DeInit();
     if (camera->is_alloc_) free(camera);
 
diff --git a/projects/llm_framework/main_camera/src/axera_camera.h b/projects/llm_framework/main_camera/src/axera_camera.h
index 6035c8c5..6b8644d3 100644
--- a/projects/llm_framework/main_camera/src/axera_camera.h
+++ b/projects/llm_framework/main_camera/src/axera_camera.h
@@ -5,7 +5,7 @@
  */
 #ifndef AXERA_CAMERA_H
 #define AXERA_CAMERA_H
-
+#include "common_venc.h"
 #if __cplusplus
 extern "C" {
 #endif
@@ -29,6 +29,9 @@ int axera_camera_open_from(camera_t* camera);
  */
 int axera_camera_close(camera_t* camera);
 
+void init_rtsp(AX_VENC_CHN_ATTR_T *stVencChnAttr);
+void init_jpeg();
+
 #if __cplusplus
 }
 #endif
diff --git a/projects/llm_framework/main_camera/src/main.cpp b/projects/llm_framework/main_camera/src/main.cpp
index b53467f0..dad26993 100644
--- a/projects/llm_framework/main_camera/src/main.cpp
+++ b/projects/llm_framework/main_camera/src/main.cpp
@@ -16,9 +16,44 @@
 #include "axera_camera.h"
 #include <glob.h>
 #include <opencv2/opencv.hpp>
+#include "hv/TcpServer.h"
+#include <sys/time.h>
+#include <time.h>
 
+#include <regex>
+// #include <jpeglib.h>
+
+#ifdef ENABLE_BACKWARD
+#define BACKWARD_HAS_DW 1
+#include "backward.hpp"
+#include "backward.h"
+#endif
+
+#define MAX_TASK_NUM 1
 using namespace StackFlows;
 int main_exit_flage = 0;
+
+const char *http_response =
+    "HTTP/1.0 200 OK\n"
+    "Server: BaseHTTP/0.6 Python/3.10.12\n"
+    "Date: %s\n"
+    "Cache-Control: no-store, no-cache, must-revalidate, pre-check=0, post-check=0, max-age=0\n"
+    "Connection: close\n"
+    "Content-Type: multipart/x-mixed-replace;boundary=--boundarydonotcross\n"
+    "Expires: Mon, 1 Jan 2130 00:00:00 GMT\n"
+    "Pragma: no-cache\n"
+    "Access-Control-Allow-Origin: *\n";
+const char *http_jpeg_response =
+    "\n"
+    "--boundarydonotcross\n"
+    "X-Timestamp: %lf\n"
+    "Content-Length: %d\n"
+    "Content-Type: image/jpeg\n"
+    "\n";
+
+char http_response_buff[1024];
+char http_response_buff1[1024];
+
 static void __sigint(int iSigNo)
 {
     main_exit_flage = 1;
@@ -26,26 +61,29 @@ static void __sigint(int iSigNo)
 
 typedef std::function<void(const void *, int)> task_callback_t;
 
-typedef camera_t* (*hal_camera_open_fun)(const char* pdev_name, int width, int height, int fps);
-typedef int (*hal_camera_close_fun)(camera_t* camera);
-
+typedef camera_t *(*hal_camera_open_fun)(const char *pdev_name, int width, int height, int fps);
+typedef int (*hal_camera_close_fun)(camera_t *camera);
 
-#define CONFIG_AUTO_SET(obj, key)             \
-    if (config_body.contains(#key))           \
-        mode_config_.key = config_body[#key]; \
-    else if (obj.contains(#key))              \
-        mode_config_.key = obj[#key];
+#define CONFIG_AUTO_SET(obj, key)              \
+    if (config_body.contains(#key))            \
+        stVencChnAttr.key = config_body[#key]; \
+    else if (obj.contains(#key))               \
+        stVencChnAttr.key = obj[#key];
 
 class llm_task {
 private:
     camera_t *cam;
     hal_camera_open_fun hal_camera_open;
     hal_camera_close_fun hal_camera_close;
+
 public:
     std::string response_format_;
     task_callback_t out_callback_;
     bool enoutput_;
     bool enstream_;
+    bool enjpegout_;
+    std::string rtsp_config_;
+    bool enable_webstream_;
     std::atomic_int cap_status_;
     std::unique_ptr<std::thread> camera_cap_thread_;
     std::atomic_bool camera_clear_flage_;
@@ -55,6 +93,8 @@ class llm_task {
     int frame_height_;
     cv::Mat yuv_dist_;
 
+    std::unique_ptr<hv::TcpServer> hv_tcpserver_;
+
     static void on_cap_fream(void *pData, uint32_t width, uint32_t height, uint32_t Length, void *ctx)
     {
         llm_task *self = static_cast<llm_task *>(ctx);
@@ -127,21 +167,360 @@ class llm_task {
             devname_         = config_body.at("input");
             frame_width_     = config_body.at("frame_width");
             frame_height_    = config_body.at("frame_height");
+            if (config_body.contains("rtsp")) {
+                rtsp_config_ = config_body.at("rtsp");
+            }
+            if (config_body.contains("enable_webstream")) {
+                enable_webstream_ = config_body.at("enable_webstream");
+            } else {
+                enable_webstream_ = false;
+            }
 
         } catch (...) {
             return true;
         }
-        enstream_ = (response_format_.find("stream") != std::string::npos);
-        yuv_dist_ = cv::Mat(frame_height_, frame_width_, CV_8UC2, cv::Scalar(0, 128));
-        if(devname_.find("/dev/video") != std::string::npos){
-            hal_camera_open = camera_open;
+        enstream_  = (response_format_.find("stream") != std::string::npos);
+        enjpegout_ = (response_format_.find("jpeg") != std::string::npos);
+        yuv_dist_  = cv::Mat(frame_height_, frame_width_, CV_8UC2, cv::Scalar(0, 128));
+        if (devname_.find("/dev/video") != std::string::npos) {
+            hal_camera_open  = camera_open;
             hal_camera_close = camera_close;
-        }else if(devname_.find("axera_") != std::string::npos){
-            hal_camera_open = axera_camera_open;
+        } else if (devname_.find("axera_") != std::string::npos) {
+            hal_camera_open  = axera_camera_open;
             hal_camera_close = axera_camera_close;
-        }else {
+            if (!rtsp_config_.empty()) {
+                nlohmann::json error_body;
+                nlohmann::json file_body;
+                std::string base_model_path;
+                std::string base_model_config_path;
+                std::list<std::string> config_file_paths =
+                    get_config_file_paths(base_model_path, base_model_config_path, "camera");
+                try {
+                    for (auto file_name : config_file_paths) {
+                        std::ifstream config_file(file_name);
+                        if (!config_file.is_open()) {
+                            SLOGW("config file :%s miss", file_name.c_str());
+                            continue;
+                        }
+                        SLOGI("config file :%s read", file_name.c_str());
+                        config_file >> file_body;
+                        config_file.close();
+                        break;
+                    }
+                    if (file_body.empty()) {
+                        SLOGE("all config file miss");
+                        return true;
+                    }
+                    AX_VENC_CHN_ATTR_T stVencChnAttr;
+                    memset(&stVencChnAttr, 0, sizeof(AX_VENC_CHN_ATTR_T));
+                    if (rtsp_config_.find("h264") != std::string::npos) {
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stVencAttr.enType);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stVencAttr.u32MaxPicWidth);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stVencAttr.u32MaxPicHeight);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stVencAttr.enMemSource);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stVencAttr.u32BufSize);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stVencAttr.enProfile);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stVencAttr.enLevel);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stVencAttr.enTier);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stVencAttr.u32PicWidthSrc);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stVencAttr.u32PicHeightSrc);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stVencAttr.stCropCfg.bEnable);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stVencAttr.stCropCfg.stRect.s32X);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stVencAttr.stCropCfg.stRect.s32Y);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stVencAttr.stCropCfg.stRect.u32Width);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stVencAttr.stCropCfg.stRect.u32Height);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stVencAttr.enRotation);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stVencAttr.enLinkMode);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stVencAttr.bDeBreathEffect);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stVencAttr.bRefRingbuf);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stVencAttr.s32StopWaitTime);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stVencAttr.u8InFifoDepth);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stVencAttr.u8OutFifoDepth);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stVencAttr.u32SliceNum);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stVencAttr.stAttrH265e.bRcnRefShareBuf);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.enRcMode);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.s32FirstFrameStartQp);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stFrameRate.fSrcFrameRate);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stFrameRate.fDstFrameRate);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264Cbr.u32Gop);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264Cbr.u32StatTime);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264Cbr.u32BitRate);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264Cbr.u32MinQp);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264Cbr.u32MaxQp);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264Cbr.u32MinIQp);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264Cbr.u32MaxIQp);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264Cbr.u32MaxIprop);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264Cbr.u32MinIprop);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264Cbr.s32IntraQpDelta);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264Cbr.s32DeBreathQpDelta);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264Cbr.u32IdrQpDeltaRange);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264Cbr.stQpmapInfo.enCtbRcMode);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264Cbr.stQpmapInfo.enQpmapQpType);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"],
+                                        stRcAttr.stH264Cbr.stQpmapInfo.enQpmapBlockType);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"],
+                                        stRcAttr.stH264Cbr.stQpmapInfo.enQpmapBlockUnit);
+
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264Vbr.u32Gop);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264Vbr.u32StatTime);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264Vbr.u32MaxBitRate);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264Vbr.enVQ);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264Vbr.u32MaxQp);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264Vbr.u32MinQp);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264Vbr.u32MaxIQp);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264Vbr.u32MinIQp);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264Vbr.s32IntraQpDelta);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264Vbr.s32DeBreathQpDelta);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264Vbr.u32IdrQpDeltaRange);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264Vbr.stQpmapInfo.enCtbRcMode);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264Vbr.stQpmapInfo.enQpmapQpType);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"],
+                                        stRcAttr.stH264Vbr.stQpmapInfo.enQpmapBlockType);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"],
+                                        stRcAttr.stH264Vbr.stQpmapInfo.enQpmapBlockUnit);
+
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264AVbr.u32Gop);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264AVbr.u32StatTime);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264AVbr.u32MaxBitRate);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264AVbr.u32MaxQp);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264AVbr.u32MinQp);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264AVbr.u32MaxIQp);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264AVbr.u32MinIQp);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264AVbr.s32IntraQpDelta);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264AVbr.s32DeBreathQpDelta);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264AVbr.u32IdrQpDeltaRange);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264AVbr.stQpmapInfo.enCtbRcMode);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264AVbr.stQpmapInfo.enQpmapQpType);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"],
+                                        stRcAttr.stH264AVbr.stQpmapInfo.enQpmapBlockType);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"],
+                                        stRcAttr.stH264AVbr.stQpmapInfo.enQpmapBlockUnit);
+
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264QVbr.u32Gop);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264QVbr.u32StatTime);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264QVbr.u32TargetBitRate);
+
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264CVbr.u32Gop);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264CVbr.u32StatTime);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264CVbr.u32MaxQp);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264CVbr.u32MinQp);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264CVbr.u32MaxIQp);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264CVbr.u32MinIQp);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264CVbr.u32MinQpDelta);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264CVbr.u32MaxQpDelta);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264CVbr.s32DeBreathQpDelta);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264CVbr.u32IdrQpDeltaRange);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264CVbr.u32MaxIprop);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264CVbr.u32MinIprop);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264CVbr.u32MaxBitRate);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264CVbr.u32ShortTermStatTime);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264CVbr.u32LongTermStatTime);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264CVbr.u32LongTermMaxBitrate);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264CVbr.u32LongTermMinBitrate);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264CVbr.u32ExtraBitPercent);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264CVbr.u32LongTermStatTimeUnit);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264CVbr.s32IntraQpDelta);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264CVbr.stQpmapInfo.enCtbRcMode);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264CVbr.stQpmapInfo.enQpmapQpType);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"],
+                                        stRcAttr.stH264CVbr.stQpmapInfo.enQpmapBlockType);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"],
+                                        stRcAttr.stH264CVbr.stQpmapInfo.enQpmapBlockUnit);
+
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264FixQp.u32Gop);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264FixQp.u32IQp);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264FixQp.u32PQp);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264FixQp.u32BQp);
+
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264QpMap.u32Gop);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264QpMap.u32StatTime);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264QpMap.u32TargetBitRate);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264QpMap.stQpmapInfo.enCtbRcMode);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stRcAttr.stH264QpMap.stQpmapInfo.enQpmapQpType);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"],
+                                        stRcAttr.stH264QpMap.stQpmapInfo.enQpmapBlockType);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"],
+                                        stRcAttr.stH264QpMap.stQpmapInfo.enQpmapBlockUnit);
+
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stGopAttr.enGopMode);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stGopAttr.stNormalP.stPicConfig.s32QpOffset);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stGopAttr.stNormalP.stPicConfig.f32QpFactor);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stGopAttr.stOneLTR.stPicConfig.s32QpOffset);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stGopAttr.stOneLTR.stPicConfig.f32QpFactor);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"],
+                                        stGopAttr.stOneLTR.stPicSpecialConfig.s32QpOffset);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"],
+                                        stGopAttr.stOneLTR.stPicSpecialConfig.f32QpFactor);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"],
+                                        stGopAttr.stOneLTR.stPicSpecialConfig.s32Interval);
+                        CONFIG_AUTO_SET(file_body["h264_config_param"], stGopAttr.stSvcT.u32GopSize);
+                    } else if (rtsp_config_.find("h265") != std::string::npos) {
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stVencAttr.enType);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stVencAttr.u32MaxPicWidth);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stVencAttr.u32MaxPicHeight);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stVencAttr.enMemSource);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stVencAttr.u32BufSize);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stVencAttr.enProfile);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stVencAttr.enLevel);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stVencAttr.enTier);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stVencAttr.u32PicWidthSrc);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stVencAttr.u32PicHeightSrc);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stVencAttr.stCropCfg.bEnable);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stVencAttr.stCropCfg.stRect.s32X);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stVencAttr.stCropCfg.stRect.s32Y);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stVencAttr.stCropCfg.stRect.u32Width);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stVencAttr.stCropCfg.stRect.u32Height);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stVencAttr.enRotation);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stVencAttr.enLinkMode);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stVencAttr.bDeBreathEffect);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stVencAttr.bRefRingbuf);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stVencAttr.s32StopWaitTime);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stVencAttr.u8InFifoDepth);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stVencAttr.u8OutFifoDepth);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stVencAttr.u32SliceNum);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stVencAttr.stAttrH265e.bRcnRefShareBuf);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.enRcMode);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.s32FirstFrameStartQp);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stFrameRate.fSrcFrameRate);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stFrameRate.fDstFrameRate);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265Cbr.u32Gop);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265Cbr.u32StatTime);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265Cbr.u32BitRate);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265Cbr.u32MinQp);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265Cbr.u32MaxQp);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265Cbr.u32MinIQp);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265Cbr.u32MaxIQp);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265Cbr.u32MaxIprop);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265Cbr.u32MinIprop);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265Cbr.s32IntraQpDelta);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265Cbr.s32DeBreathQpDelta);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265Cbr.u32IdrQpDeltaRange);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265Cbr.stQpmapInfo.enCtbRcMode);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265Cbr.stQpmapInfo.enQpmapQpType);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"],
+                                        stRcAttr.stH265Cbr.stQpmapInfo.enQpmapBlockType);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"],
+                                        stRcAttr.stH265Cbr.stQpmapInfo.enQpmapBlockUnit);
+
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265Vbr.u32Gop);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265Vbr.u32StatTime);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265Vbr.u32MaxBitRate);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265Vbr.enVQ);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265Vbr.u32MaxQp);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265Vbr.u32MinQp);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265Vbr.u32MaxIQp);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265Vbr.u32MinIQp);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265Vbr.s32IntraQpDelta);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265Vbr.s32DeBreathQpDelta);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265Vbr.u32IdrQpDeltaRange);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265Vbr.stQpmapInfo.enCtbRcMode);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265Vbr.stQpmapInfo.enQpmapQpType);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"],
+                                        stRcAttr.stH265Vbr.stQpmapInfo.enQpmapBlockType);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"],
+                                        stRcAttr.stH265Vbr.stQpmapInfo.enQpmapBlockUnit);
+
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265AVbr.u32Gop);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265AVbr.u32StatTime);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265AVbr.u32MaxBitRate);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265AVbr.u32MaxQp);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265AVbr.u32MinQp);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265AVbr.u32MaxIQp);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265AVbr.u32MinIQp);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265AVbr.s32IntraQpDelta);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265AVbr.s32DeBreathQpDelta);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265AVbr.u32IdrQpDeltaRange);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265AVbr.stQpmapInfo.enCtbRcMode);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265AVbr.stQpmapInfo.enQpmapQpType);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"],
+                                        stRcAttr.stH265AVbr.stQpmapInfo.enQpmapBlockType);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"],
+                                        stRcAttr.stH265AVbr.stQpmapInfo.enQpmapBlockUnit);
+
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265QVbr.u32Gop);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265QVbr.u32StatTime);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265QVbr.u32TargetBitRate);
+
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265CVbr.u32Gop);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265CVbr.u32StatTime);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265CVbr.u32MaxQp);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265CVbr.u32MinQp);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265CVbr.u32MaxIQp);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265CVbr.u32MinIQp);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265CVbr.u32MinQpDelta);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265CVbr.u32MaxQpDelta);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265CVbr.s32DeBreathQpDelta);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265CVbr.u32IdrQpDeltaRange);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265CVbr.u32MaxIprop);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265CVbr.u32MinIprop);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265CVbr.u32MaxBitRate);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265CVbr.u32ShortTermStatTime);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265CVbr.u32LongTermStatTime);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265CVbr.u32LongTermMaxBitrate);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265CVbr.u32LongTermMinBitrate);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265CVbr.u32ExtraBitPercent);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265CVbr.u32LongTermStatTimeUnit);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265CVbr.s32IntraQpDelta);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265CVbr.stQpmapInfo.enCtbRcMode);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265CVbr.stQpmapInfo.enQpmapQpType);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"],
+                                        stRcAttr.stH265CVbr.stQpmapInfo.enQpmapBlockType);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"],
+                                        stRcAttr.stH265CVbr.stQpmapInfo.enQpmapBlockUnit);
+
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265FixQp.u32Gop);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265FixQp.u32IQp);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265FixQp.u32PQp);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265FixQp.u32BQp);
+
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265QpMap.u32Gop);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265QpMap.u32StatTime);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265QpMap.u32TargetBitRate);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265QpMap.stQpmapInfo.enCtbRcMode);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stRcAttr.stH265QpMap.stQpmapInfo.enQpmapQpType);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"],
+                                        stRcAttr.stH265QpMap.stQpmapInfo.enQpmapBlockType);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"],
+                                        stRcAttr.stH265QpMap.stQpmapInfo.enQpmapBlockUnit);
+
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stGopAttr.enGopMode);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stGopAttr.stNormalP.stPicConfig.s32QpOffset);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stGopAttr.stNormalP.stPicConfig.f32QpFactor);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stGopAttr.stOneLTR.stPicConfig.s32QpOffset);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stGopAttr.stOneLTR.stPicConfig.f32QpFactor);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"],
+                                        stGopAttr.stOneLTR.stPicSpecialConfig.s32QpOffset);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"],
+                                        stGopAttr.stOneLTR.stPicSpecialConfig.f32QpFactor);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"],
+                                        stGopAttr.stOneLTR.stPicSpecialConfig.s32Interval);
+                        CONFIG_AUTO_SET(file_body["h265_config_param"], stGopAttr.stSvcT.u32GopSize);
+                    }
+                    try {
+                        std::regex pattern(R"(rtsp\.(\d+)[xX-](\d+)\.h(264|265))");
+                        std::smatch matches;
+                        if (std::regex_search(rtsp_config_, matches, pattern)) {
+                            if (matches.size() >= 3) {
+                                stVencChnAttr.stVencAttr.u32PicWidthSrc  = std::stoi(matches[1].str());
+                                stVencChnAttr.stVencAttr.u32PicHeightSrc = std::stoi(matches[2].str());
+                            }
+                        }
+                    } catch (...) {
+                        return true;
+                    }
+                    if ((stVencChnAttr.stVencAttr.u32PicWidthSrc < frame_width_) ||
+                        (stVencChnAttr.stVencAttr.u32PicHeightSrc < frame_height_)) {
+                        return true;
+                    }
+                    init_rtsp(&stVencChnAttr);
+                } catch (...) {
+                    return true;
+                }
+            }
+        } else {
             return true;
         }
+
         return false;
     }
 
@@ -177,13 +556,26 @@ class llm_task {
         cam = NULL;
     }
 
-    ~llm_task()
+    void start()
+    {
+    }
+
+    void stop()
     {
         if (cam) {
             cam->camera_capture_stop(cam);
             hal_camera_close(cam);
             cam = NULL;
         }
+        if (hv_tcpserver_) {
+            hv_tcpserver_->stop();
+            hv_tcpserver_.reset();
+        }
+    }
+
+    ~llm_task()
+    {
+        stop();
     }
 };
 
@@ -198,11 +590,12 @@ class llm_camera : public StackFlow {
             "list_camera", std::bind(&llm_camera::list_camera, this, std::placeholders::_1, std::placeholders::_2));
     }
 
-    std::string list_camera(pzmq *_pzmq, const std::string &rawdata)
+    std::string list_camera(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &rawdata)
     {
+        auto _rawdata = rawdata->string();
         nlohmann::json req_body;
-        std::string zmq_url    = RPC_PARSE_TO_FIRST(rawdata);
-        std::string param_json = RPC_PARSE_TO_SECOND(rawdata);
+        std::string zmq_url    = rawdata->get_param(0);
+        std::string param_json = rawdata->get_param(1);
         std::vector<std::string> devices;
         glob_t glob_result;
         glob("/dev/video*", GLOB_TILDE, NULL, &glob_result);
@@ -222,27 +615,92 @@ class llm_camera : public StackFlow {
         if (!(llm_task_obj && llm_channel)) {
             return;
         }
+        std::vector<uchar> jpeg_image;
+        // StackFlow output
         std::string out_data((char *)data, size);
         llm_channel->send_raw_to_pub(out_data);
+        // user output
         if (llm_task_obj->enoutput_) {
             std::string base64_data;
-            int ret = StackFlows::encode_base64(out_data, base64_data);
+            if (llm_task_obj->enjpegout_) {
+                cv::Mat yuv_image(llm_task_obj->frame_height_, llm_task_obj->frame_width_, CV_8UC2, (void *)data);
+                cv::Mat bgr_image;
+                cv::cvtColor(yuv_image, bgr_image, cv::COLOR_YUV2BGR_YUYV);
+                cv::imencode(".jpg", bgr_image, jpeg_image);
+                std::string in_data((char *)jpeg_image.data(), jpeg_image.size());
+                StackFlows::encode_base64(in_data, base64_data);
+            } else {
+                StackFlows::encode_base64(out_data, base64_data);
+            }
             std::string out_json_str;
-            out_json_str.reserve(llm_channel->request_id_.size() + llm_channel->work_id_.size() + base64_data.size() + 108);
+            out_json_str.reserve(llm_channel->request_id_.size() + llm_channel->work_id_.size() + base64_data.size() +
+                                 128);
             out_json_str += R"({"request_id":")";
             out_json_str += llm_channel->request_id_;
             out_json_str += R"(","work_id":")";
             out_json_str += llm_channel->work_id_;
-            out_json_str += R"(","object":"image.yuvraw.base64","error":{"code":0, "message":""},"data":")";
+            out_json_str += R"(","object":")";
+            out_json_str += llm_task_obj->response_format_;
+            out_json_str += R"(","error":{"code":0, "message":""},"data":")";
             out_json_str += base64_data;
             out_json_str += "\"}\n";
             llm_channel->send_raw_to_usr(out_json_str);
         }
+        // webstream output
+        if (llm_task_obj->enable_webstream_) {
+            if (!llm_task_obj->hv_tcpserver_) {
+                llm_task_obj->hv_tcpserver_ = std::make_unique<hv::TcpServer>();
+                int listenfd                = llm_task_obj->hv_tcpserver_->createsocket(8989);
+                if (listenfd < 0) {
+                    llm_task_obj->hv_tcpserver_.reset();
+                    return;
+                }
+                llm_task_obj->hv_tcpserver_->onConnection = [](const hv::SocketChannelPtr &channel) {
+                    std::string peeraddr = channel->peeraddr();
+                    if (channel->isConnected()) {
+                        memset(http_response_buff, 0, 1024);
+                        time_t current_time;
+                        struct tm *time_info;
+                        time(&current_time);
+                        time_info = gmtime(&current_time);
+                        char time_str[30];
+                        strftime(time_str, sizeof(time_str), "%a, %d %b %Y %H:%M:%S GMT", time_info);
+                        sprintf(http_response_buff, http_response, time_str);
+                        channel->write(http_response_buff);
+                    }
+                };
+                llm_task_obj->hv_tcpserver_->onMessage = [](const hv::SocketChannelPtr &channel, hv::Buffer *buf) {};
+                llm_task_obj->hv_tcpserver_->setThreadNum(1);
+                llm_task_obj->hv_tcpserver_->start();
+            }
+            llm_task_obj->hv_tcpserver_->foreachChannel([&](const hv::SocketChannelPtr &channel) {
+                if (jpeg_image.empty()) {
+                    cv::Mat yuv_image(llm_task_obj->frame_height_, llm_task_obj->frame_width_, CV_8UC2, (void *)data);
+                    cv::Mat bgr_image;
+                    cv::cvtColor(yuv_image, bgr_image, cv::COLOR_YUV2BGR_YUYV);
+                    cv::imencode(".jpg", bgr_image, jpeg_image);
+                }
+                char tmpsdas[256];
+                struct timeval tv;
+                gettimeofday(&tv, NULL);
+                double timestamp = (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
+                memset(http_response_buff1, 0, 1024);
+                sprintf(http_response_buff1, http_jpeg_response, timestamp, jpeg_image.size());
+                channel->write(http_response_buff1);
+                channel->write(jpeg_image.data(), jpeg_image.size());
+            });
+        }
     }
 
     int setup(const std::string &work_id, const std::string &object, const std::string &data) override
     {
         nlohmann::json error_body;
+        if ((llm_task_channel_.size() - 1) == MAX_TASK_NUM) {
+            error_body["code"]    = -21;
+            error_body["message"] = "task full";
+            send("None", "None", error_body, "llm");
+            return -1;
+        }
         int work_id_num   = sample_get_work_id_num(work_id);
         auto llm_channel  = get_channel(work_id);
         auto llm_task_obj = std::make_shared<llm_task>(work_id);
@@ -310,6 +768,7 @@ class llm_camera : public StackFlow {
             send("None", "None", error_body, work_id);
             return -1;
         }
+        llm_task_[work_id_num]->stop();
         auto llm_channel = get_channel(work_id_num);
         llm_channel->stop_subscriber("");
         llm_task_.erase(work_id_num);
@@ -324,6 +783,7 @@ class llm_camera : public StackFlow {
             if (iteam == llm_task_.end()) {
                 break;
             }
+            iteam->second->stop();
             get_channel(iteam->first)->stop_subscriber("");
             iteam->second.reset();
             llm_task_.erase(iteam->first);
diff --git a/projects/llm_framework/main_depth_anything/SConstruct b/projects/llm_framework/main_depth_anything/SConstruct
index 8500311d..a90887f6 100644
--- a/projects/llm_framework/main_depth_anything/SConstruct
+++ b/projects/llm_framework/main_depth_anything/SConstruct
@@ -16,7 +16,7 @@ LDFLAGS = []
 LINK_SEARCH_PATH = []
 STATIC_FILES = []
 
-DEFINITIONS += ['-std=c++17']
+DEFINITIONS += ['-O3', '-std=c++17']
 LDFLAGS+=['-Wl,-rpath=/opt/m5stack/lib', '-Wl,-rpath=/usr/local/m5stack/lib', '-Wl,-rpath=/usr/local/m5stack/lib/gcc-10.3', '-Wl,-rpath=/opt/lib', '-Wl,-rpath=/opt/usr/lib', '-Wl,-rpath=./']
 LINK_SEARCH_PATH += [ADir('../static_lib')]
 REQUIREMENTS += ['ax_engine', 'ax_interpreter', 'ax_sys']
@@ -30,7 +30,7 @@ STATIC_LIB += static_file * 2
 STATIC_FILES += Glob('mode_*.json')
 
 
-env['COMPONENTS'].append({'target':'llm_depth_anything',
+env['COMPONENTS'].append({'target':'llm_depth_anything-1.6',
                           'SRCS':SRCS,
                           'INCLUDE':INCLUDE,
                           'PRIVATE_INCLUDE':PRIVATE_INCLUDE,
diff --git a/projects/llm_framework/main_depth_anything/mode_depth-anything-ax630c.json b/projects/llm_framework/main_depth_anything/mode_depth-anything-ax630c.json
index ddf0a6ce..fd91eb3e 100644
--- a/projects/llm_framework/main_depth_anything/mode_depth-anything-ax630c.json
+++ b/projects/llm_framework/main_depth_anything/mode_depth-anything-ax630c.json
@@ -1,7 +1,9 @@
 {
-    "mode":"depth_anything",
+    "mode":"depth-anything-ax630c",
     "type":"cv",
     "homepage":"https://github.com/DepthAnything/Depth-Anything-V2",
+    "compile_flage":"pulsar2 build --input depth_anything.onnx --config config.json --output_dir output --output_name depth_anything.axmodel --target_hardware AX620E",
+    "pulsar_version":"2.5-2d5d0fa3",
     "capabilities":[
         "Segmentation"
     ],
diff --git a/projects/llm_framework/main_depth_anything/src/EngineWrapper.cpp b/projects/llm_framework/main_depth_anything/src/EngineWrapper.cpp
index 09d70670..7a7ec61f 100644
--- a/projects/llm_framework/main_depth_anything/src/EngineWrapper.cpp
+++ b/projects/llm_framework/main_depth_anything/src/EngineWrapper.cpp
@@ -7,13 +7,20 @@
  * written consent of Axera Semiconductor (Ningbo) Co., Ltd.
  *
  **************************************************************************************************/
-#include "base/detection.hpp"
-#define UNUSE_STRUCT_OBJECT
 #include "EngineWrapper.hpp"
 #include "utils/io.hpp"
+
 #include <cstdlib>
 
-static const char* strAlgoModelType[AX_ENGINE_VIRTUAL_NPU_BUTT] = {"1.6T", "3.2T"};
+#include <global_config.h>
+
+#if defined(CONFIG_AX_650N_MSP_ENABLED)
+static const char *strAlgoModelType[AX_ENGINE_MODEL_TYPE_BUTT] = {"3.6T", "7.2T", "18T"};
+#endif
+
+#if defined(CONFIG_AX_620E_MSP_ENABLED) || defined(CONFIG_AX_620Q_MSP_ENABLED)
+static const char *strAlgoModelType[AX_ENGINE_MODEL_TYPE_BUTT] = {"HalfOCM", "FullOCM"};
+#endif
 
 /// @brief npu type
 typedef enum axNPU_TYPE_E {
@@ -25,8 +32,9 @@ typedef enum axNPU_TYPE_E {
     AX_BL_VNPU_2   = (1 << 4)  /* running under BIG-LITTLE VNPU2 */
 } AX_NPU_TYPE_E;
 
-static AX_S32 CheckModelVNpu(const std::string& strModel, const AX_ENGINE_MODEL_TYPE_T& eModelType,
-                             const AX_S32& nNpuType, AX_U32& nNpuSet)
+#if defined(CHIP_AX650)
+static AX_S32 CheckModelVNpu(const std::string &strModel, const AX_ENGINE_MODEL_TYPE_T &eModelType,
+                             const AX_S32 &nNpuType, AX_U32 &nNpuSet)
 {
     AX_ENGINE_NPU_ATTR_T stNpuAttr;
     memset(&stNpuAttr, 0x00, sizeof(stNpuAttr));
@@ -36,33 +44,42 @@ static AX_S32 CheckModelVNpu(const std::string& strModel, const AX_ENGINE_MODEL_
         // VNPU DISABLE
         if (stNpuAttr.eHardMode == AX_ENGINE_VIRTUAL_NPU_DISABLE) {
             nNpuSet = 0x01;  // NON-VNPU (0b111)
+            // printf("%s will run under VNPU-DISABLE [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
         }
         // STD VNPU
-        else if (stNpuAttr.eHardMode == AX_ENGINE_VIRTUAL_NPU_BUTT) {
+        else if (stNpuAttr.eHardMode == AX_ENGINE_VIRTUAL_NPU_STD) {
             // 7.2T & 10.8T no allow
-            if (eModelType == AX_ENGINE_MODEL_TYPE1 || eModelType == AX_ENGINE_MODEL_TYPE1) {
+            if (eModelType == AX_ENGINE_MODEL_TYPE1 || eModelType == AX_ENGINE_MODEL_TYPE2) {
+                // printf("%s model type%d: [%s], no allow run under STD VNPU\n", strModel.c_str(), eModelType,
+                // strAlgoModelType[eModelType]);
                 return -1;
             }
 
             // default STD VNPU2
             if (nNpuType == 0) {
                 nNpuSet = 0x02;  // VNPU2 (0b010)
+                // printf("%s will run under default STD-VNPU2 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
             } else {
                 if (nNpuType & AX_STD_VNPU_1) {
                     nNpuSet |= 0x01;  // VNPU1 (0b001)
+                    // printf("%s will run under STD-VNPU1 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
                 }
                 if (nNpuType & AX_STD_VNPU_2) {
                     nNpuSet |= 0x02;  // VNPU2 (0b010)
+                    // printf("%s will run under STD-VNPU2 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
                 }
                 if (nNpuType & AX_STD_VNPU_3) {
                     nNpuSet |= 0x04;  // VNPU3 (0b100)
+                    // printf("%s will run under STD-VNPU3 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
                 }
             }
         }
         // BL VNPU
-        else if (stNpuAttr.eHardMode == AX_ENGINE_VIRTUAL_NPU_BUTT) {
+        else if (stNpuAttr.eHardMode == AX_ENGINE_VIRTUAL_NPU_BIG_LITTLE) {
             // 10.8T no allow
-            if (eModelType == AX_ENGINE_MODEL_TYPE1) {
+            if (eModelType == AX_ENGINE_MODEL_TYPE2) {
+                // printf("%s model type%d: [%s], no allow run under BL VNPU\n", strModel.c_str(), eModelType,
+                // strAlgoModelType[eModelType]);
                 return -1;
             }
 
@@ -71,29 +88,38 @@ static AX_S32 CheckModelVNpu(const std::string& strModel, const AX_ENGINE_MODEL_
                 // 7.2T default BL VNPU1
                 if (eModelType == AX_ENGINE_MODEL_TYPE1) {
                     nNpuSet = 0x01;  // VNPU1 (0b001)
+                    // printf("%s will run under default BL-VNPU1 [%s]\n", strModel.c_str(),
+                    // strAlgoModelType[eModelType]);
                 }
                 // 3.6T default BL VNPU2
                 else {
                     nNpuSet = 0x02;  // VNPU2 (0b010)
+                    // printf("%s will run under default BL-VNPU2 [%s]\n", strModel.c_str(),
+                    // strAlgoModelType[eModelType]);
                 }
             } else {
                 // 7.2T
                 if (eModelType == AX_ENGINE_MODEL_TYPE1) {
                     // no allow set to BL VNPU2
                     if (nNpuType & AX_BL_VNPU_2) {
+                        // printf("%s model type%d: [%s], no allow run under BL VNPU2\n", strModel.c_str(), eModelType,
+                        // strAlgoModelType[eModelType]);
                         return -1;
                     }
                     if (nNpuType & AX_BL_VNPU_1) {
                         nNpuSet |= 0x01;  // VNPU1 (0b001)
+                        // printf("%s will run under BL-VNPU1 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
                     }
                 }
                 // 3.6T
                 else {
                     if (nNpuType & AX_BL_VNPU_1) {
                         nNpuSet |= 0x01;  // VNPU1 (0b001)
+                        // printf("%s will run under BL-VNPU1 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
                     }
                     if (nNpuType & AX_BL_VNPU_2) {
                         nNpuSet |= 0x02;  // VNPU2 (0b010)
+                        // printf("%s will run under BL-VNPU2 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
                     }
                 }
             }
@@ -104,21 +130,69 @@ static AX_S32 CheckModelVNpu(const std::string& strModel, const AX_ENGINE_MODEL_
 
     return ret;
 }
+#endif
 
-int EngineWrapper::Init(const char* strModelPath, uint32_t nNpuType)
+#if defined(CONFIG_AX_620E_MSP_ENABLED) || defined(CONFIG_AX_620Q_MSP_ENABLED)
+static AX_S32 CheckModelVNpu(const std::string &strModel, const AX_ENGINE_MODEL_TYPE_T &eModelType,
+                             const AX_S32 &nNpuType, AX_U32 &nNpuSet)
+{
+    AX_ENGINE_NPU_ATTR_T stNpuAttr;
+    memset(&stNpuAttr, 0x00, sizeof(stNpuAttr));
+
+    auto ret = AX_ENGINE_GetVNPUAttr(&stNpuAttr);
+    if (ret == 0) {
+        // VNPU DISABLE
+        if (stNpuAttr.eHardMode == AX_ENGINE_VIRTUAL_NPU_DISABLE) {
+            nNpuSet = 0x01;  // NON-VNPU (0b111)
+            // ALOGN("%s will run under VNPU-DISABLE [%s]", strModel.c_str(), strAlgoModelType[eModelType]);
+        }
+        // STD VNPU
+        else if (stNpuAttr.eHardMode == AX_ENGINE_VIRTUAL_NPU_ENABLE) {
+            // full ocm model was no allowned
+            if (eModelType == AX_ENGINE_MODEL_TYPE1) {
+                // printf("%s model type%d: [%s], no allow run under STD VNPU", strModel.c_str(), eModelType,
+                // strAlgoModelType[eModelType]);
+                return -1;
+            }
+
+            // default STD VNPU2
+            if (nNpuType == 0) {
+                nNpuSet = 0x02;  // VNPU2 (0b010)
+                // printf("%s will run under default STD-VNPU2 [%s]", strModel.c_str(), strAlgoModelType[eModelType]);
+            } else {
+                if (nNpuType & AX_STD_VNPU_1) {
+                    nNpuSet |= 0x01;  // VNPU1 (0b001)
+                    // printf("%s will run under STD-VNPU1 [%s]", strModel.c_str(), strAlgoModelType[eModelType]);
+                }
+                if (nNpuType & AX_STD_VNPU_2) {
+                    nNpuSet |= 0x02;  // VNPU2 (0b010)
+                    // printf("%s will run under STD-VNPU2 [%s]", strModel.c_str(), strAlgoModelType[eModelType]);
+                }
+            }
+        }
+    } else {
+        printf("AX_ENGINE_GetVNPUAttr fail ret = %x", ret);
+    }
+
+    return ret;
+}
+#endif
+
+int EngineWrapper::Init(const char *strModelPath, uint32_t nNpuType)
 {
     AX_S32 ret = 0;
 
     // 1. load model
-    AX_BOOL bLoadModelUseCmm     = AX_FALSE;
-    AX_CHAR* pModelBufferVirAddr = nullptr;
+    AX_BOOL bLoadModelUseCmm     = AX_TRUE;
+    AX_CHAR *pModelBufferVirAddr = nullptr;
     AX_U64 u64ModelBufferPhyAddr = 0;
     AX_U32 nModelBufferSize      = 0;
 
     std::vector<char> model_buffer;
 
     if (bLoadModelUseCmm) {
-        if (!utils::read_file(strModelPath, (AX_VOID**)&pModelBufferVirAddr, u64ModelBufferPhyAddr, nModelBufferSize)) {
+        if (!utils::read_file(strModelPath, (AX_VOID **)&pModelBufferVirAddr, u64ModelBufferPhyAddr,
+                              nModelBufferSize)) {
             printf("ALGO read model(%s) fail\n", strModelPath);
             return -1;
         }
@@ -220,7 +294,7 @@ int EngineWrapper::Init(const char* strModelPath, uint32_t nNpuType)
 
     // 6. prepare io
     // AX_U32 nIoDepth = (stCtx.vecOutputBufferFlag.size() == 0) ? 1 : stCtx.vecOutputBufferFlag.size();
-    ret = utils::prepare_io(strModelPath, m_io_info, m_io, utils::IO_BUFFER_STRATEGY_DEFAULT);
+    ret = utils::prepare_io(strModelPath, m_io_info, m_io, utils::IO_BUFFER_STRATEGY_CACHED);
     if (0 != ret) {
         printf("prepare io failed!\n");
         utils::free_io(m_io);
@@ -233,12 +307,12 @@ int EngineWrapper::Init(const char* strModelPath, uint32_t nNpuType)
     return 0;
 }
 
-int EngineWrapper::SetInput(void* pInput, int index)
+int EngineWrapper::SetInput(void *pInput, int index)
 {
     return utils::push_io_input(pInput, index, m_io);
 }
 
-int EngineWrapper::RunSync()
+int EngineWrapper::Run()
 {
     if (!m_hasInit) return -1;
 
@@ -252,12 +326,43 @@ int EngineWrapper::RunSync()
     return 0;
 }
 
-void post_process(AX_ENGINE_IO_INFO_T* io_info, AX_ENGINE_IO_T* io_data, const cv::Mat& mat, std::string& model_type,
-                  std::string& byteString)
+int EngineWrapper::GetOutput(void *pOutput, int index)
+{
+    return utils::push_io_output(pOutput, index, m_io);
+}
+
+int EngineWrapper::GetInputSize(int index)
+{
+    return m_io.pInputs[index].nSize;
+}
+
+int EngineWrapper::GetOutputSize(int index)
+{
+    return m_io.pOutputs[index].nSize;
+}
+
+void *EngineWrapper::GetOutputPtr(int index)
+{
+    utils::cache_io_flush(&m_io.pOutputs[index]);
+    return m_io.pOutputs[index].pVirAddr;
+}
+
+int EngineWrapper::Release()
+{
+    if (m_handle) {
+        utils::free_io(m_io);
+        AX_ENGINE_DestroyHandle(m_handle);
+        m_handle = nullptr;
+    }
+    return 0;
+}
+
+void post_process(AX_ENGINE_IO_INFO_T *io_info, AX_ENGINE_IO_T *io_data, const cv::Mat &mat, std::string &model_type,
+                  std::string &byteString)
 {
     if (model_type == "segment") {
-        auto& output = io_data->pOutputs[0];
-        auto& info   = io_info->pOutputs[0];
+        auto &output = io_data->pOutputs[0];
+        auto &info   = io_info->pOutputs[0];
         cv::Mat feature(info.pShape[2], info.pShape[3], CV_32FC1, output.pVirAddr);
 
         double minVal, maxVal;
@@ -280,33 +385,8 @@ void post_process(AX_ENGINE_IO_INFO_T* io_info, AX_ENGINE_IO_T* io_data, const c
     }
 }
 
-int EngineWrapper::Post_Process(cv::Mat& mat, std::string& model_type, std::string& byteString)
+int EngineWrapper::Post_Process(cv::Mat &mat, std::string &model_type, std::string &byteString)
 {
     post_process(m_io_info, &m_io, mat, model_type, byteString);
     return 0;
-}
-
-int EngineWrapper::GetOutput(void* pOutput, int index)
-{
-    return utils::push_io_output(pOutput, index, m_io);
-}
-
-int EngineWrapper::GetInputSize(int index)
-{
-    return m_io.pInputs[index].nSize;
-}
-
-int EngineWrapper::GetOutputSize(int index)
-{
-    return m_io.pOutputs[index].nSize;
-}
-
-int EngineWrapper::Release()
-{
-    if (m_handle) {
-        utils::free_io(m_io);
-        AX_ENGINE_DestroyHandle(m_handle);
-        m_handle = nullptr;
-    }
-    return 0;
-}
+}
\ No newline at end of file
diff --git a/projects/llm_framework/main_depth_anything/src/EngineWrapper.hpp b/projects/llm_framework/main_depth_anything/src/EngineWrapper.hpp
index 520a313f..c3f848ea 100644
--- a/projects/llm_framework/main_depth_anything/src/EngineWrapper.hpp
+++ b/projects/llm_framework/main_depth_anything/src/EngineWrapper.hpp
@@ -10,27 +10,14 @@
 
 #pragma once
 
+#include <string>
+#include <vector>
+#include <cstring>
+#include <array>
 #include <cstdint>
 #include <opencv2/opencv.hpp>
-#include "ax_engine_api.h"
-
-#ifndef UNUSE_STRUCT_OBJECT
-namespace detection {
-typedef struct Object {
-    cv::Rect_<float> rect;
-    int label;
-    float prob;
-    cv::Point2f landmark[5];
-    /* for yolov5-seg */
-    cv::Mat mask;
-    std::vector<float> mask_feat;
-    std::vector<float> kps_feat;
-    /* for yolov8-obb */
-    float angle;
-} Object;
 
-}  // namespace detection
-#endif
+#include "ax_engine_api.h"
 
 class EngineWrapper {
 public:
@@ -47,21 +34,23 @@ class EngineWrapper {
 
     int SetInput(void* pInput, int index);
 
-    int RunSync();
-
-    int Post_Process(cv::Mat& mat, std::string& model_type, std::string& byteString);
+    int Run();
 
     int GetOutput(void* pOutput, int index);
 
     int GetInputSize(int index);
     int GetOutputSize(int index);
 
+    void* GetOutputPtr(int index);
+
     int Release();
 
+    int Post_Process(cv::Mat& mat, std::string& model_type, std::string& byteString);
+
 protected:
     bool m_hasInit;
     AX_ENGINE_HANDLE m_handle;
     AX_ENGINE_IO_INFO_T* m_io_info{};
     AX_ENGINE_IO_T m_io{};
     int m_input_num{}, m_output_num{};
-};
+};
\ No newline at end of file
diff --git a/projects/llm_framework/main_depth_anything/src/main.cpp b/projects/llm_framework/main_depth_anything/src/main.cpp
index 4e9369c1..7685bc83 100644
--- a/projects/llm_framework/main_depth_anything/src/main.cpp
+++ b/projects/llm_framework/main_depth_anything/src/main.cpp
@@ -98,6 +98,7 @@ class llm_task {
                     SLOGW("config file :%s miss", file_name.c_str());
                     continue;
                 }
+                SLOGI("config file :%s read", file_name.c_str());
                 config_file >> file_body;
                 config_file.close();
                 break;
@@ -197,11 +198,10 @@ class llm_task {
             common::get_input_data_no_letterbox(src, image, mode_config_.img_h, mode_config_.img_w, bgr2rgb);
             cv::Mat img_mat(mode_config_.img_h, mode_config_.img_w, CV_8UC3, image.data());
             depth_anything_->SetInput((void *)image.data(), 0);
-            if (0 != depth_anything_->RunSync()) {
+            if (0 != depth_anything_->Run()) {
                 SLOGE("Run depth_anything model failed!\n");
                 throw std::string("depth_anything_ RunSync error");
             }
-            std::vector<detection::Object> objects;
             std::string depth_anything_output;
             depth_anything_->Post_Process(img_mat, mode_config_.model_type, depth_anything_output);
             if (out_callback_) out_callback_(depth_anything_output, true);
@@ -245,8 +245,18 @@ class llm_task {
         _ax_init();
     }
 
+    void start()
+    {
+    }
+
+    void stop()
+    {
+        }
+
     ~llm_task()
     {
+        stop();
+        if (depth_anything_) depth_anything_->Release();
         _ax_deinit();
     }
 };
@@ -409,8 +419,8 @@ class llm_depth_anything : public StackFlow {
                         std::weak_ptr<llm_task> _llm_task_obj       = llm_task_obj;
                         std::weak_ptr<llm_channel_obj> _llm_channel = llm_channel;
                         llm_channel->subscriber(
-                            input_url, [this, _llm_task_obj, _llm_channel](pzmq *_pzmq, const std::string &raw) {
-                                this->task_camera_data(_llm_task_obj, _llm_channel, raw);
+                            input_url, [this, _llm_task_obj, _llm_channel](pzmq *_pzmq, const std::shared_ptr<pzmq_data> &raw) {
+                                this->task_camera_data(_llm_task_obj, _llm_channel, raw->string());
                             });
                     }
                 }
@@ -456,8 +466,8 @@ class llm_depth_anything : public StackFlow {
                 std::weak_ptr<llm_task> _llm_task_obj       = llm_task_obj;
                 std::weak_ptr<llm_channel_obj> _llm_channel = llm_channel;
                 llm_channel->subscriber(input_url,
-                                        [this, _llm_task_obj, _llm_channel](pzmq *_pzmq, const std::string &raw) {
-                                            this->task_camera_data(_llm_task_obj, _llm_channel, raw);
+                                        [this, _llm_task_obj, _llm_channel](pzmq *_pzmq, const std::shared_ptr<pzmq_data> &raw) {
+                                            this->task_camera_data(_llm_task_obj, _llm_channel, raw->string());
                                         });
             }
             llm_task_obj->inputs_.push_back(data);
@@ -536,6 +546,7 @@ class llm_depth_anything : public StackFlow {
             send("None", "None", error_body, work_id);
             return -1;
         }
+        llm_task_[work_id_num]->stop();
         auto llm_channel = get_channel(work_id_num);
         llm_channel->stop_subscriber("");
         llm_task_.erase(work_id_num);
@@ -550,6 +561,7 @@ class llm_depth_anything : public StackFlow {
             if (iteam == llm_task_.end()) {
                 break;
             }
+            iteam->second->stop();
             get_channel(iteam->first)->stop_subscriber("");
             iteam->second.reset();
             llm_task_.erase(iteam->first);
diff --git a/projects/llm_framework/main_kws/SConstruct b/projects/llm_framework/main_kws/SConstruct
index 5a484071..c09ca41a 100644
--- a/projects/llm_framework/main_kws/SConstruct
+++ b/projects/llm_framework/main_kws/SConstruct
@@ -16,6 +16,8 @@ LDFLAGS = []
 LINK_SEARCH_PATH = []
 STATIC_FILES = []
 
+python_venv = check_wget_down("https://m5stack.oss-cn-shenzhen.aliyuncs.com/resource/linux/llm/m5stack_llm-kws-python-venv_v1.6.tar.gz", 'm5stack_llm-kws-python-venv_v1.6.tar.gz')
+
 DEFINITIONS += ['-std=c++17']
 LDFLAGS+=['-Wl,-rpath=/opt/m5stack/lib', '-Wl,-rpath=/usr/local/m5stack/lib', '-Wl,-rpath=/usr/local/m5stack/lib/gcc-10.3', '-Wl,-rpath=/opt/lib', '-Wl,-rpath=/opt/usr/lib', '-Wl,-rpath=./']
 LINK_SEARCH_PATH += [ADir('../static_lib')]
@@ -31,10 +33,29 @@ LDFLAGS += ['-l:libcargs.a', '-l:libonnxruntime.a',
             '-l:libsherpa-onnx-core.a', '-l:libkaldi-native-fbank-core.a',
             '-l:libkaldi-decoder-core.a', '-l:libssentencepiece_core.a']
 
+STATIC_FILES += [os.path.join(python_venv, 'sherpa-onnx')]
 STATIC_FILES += Glob('llm-kws_text2token.py')
 STATIC_FILES += Glob('mode_*.json')
 
-env['COMPONENTS'].append({'target':'llm_kws',
+IGNORE_FILES = []
+IGNORE_FILES += ['sherpa-onnx']
+
+import json
+if not os.path.exists('../dist'):
+    os.makedirs('../dist')
+ignore = {'ignore':[]}
+try:
+    with open('../dist/fileignore', 'a+') as f:
+        f.seek(0)
+        ignore = json.load(f)
+except:
+    pass
+ignore['ignore'] += IGNORE_FILES
+ignore['ignore'] = list(set(ignore['ignore']))
+with open('../dist/fileignore', 'w') as f:
+    json.dump(ignore, f, indent=4)
+
+env['COMPONENTS'].append({'target':'llm_kws-1.7',
                           'SRCS':SRCS,
                           'INCLUDE':INCLUDE,
                           'PRIVATE_INCLUDE':PRIVATE_INCLUDE,
diff --git a/projects/llm_framework/main_kws/src/main.cpp b/projects/llm_framework/main_kws/src/main.cpp
index 4c51002d..9c05ce82 100644
--- a/projects/llm_framework/main_kws/src/main.cpp
+++ b/projects/llm_framework/main_kws/src/main.cpp
@@ -107,6 +107,7 @@ class llm_task {
                     SLOGW("config file :%s miss", file_name.c_str());
                     continue;
                 }
+                SLOGI("config file :%s read", file_name.c_str());
                 config_file >> file_body;
                 config_file.close();
                 break;
@@ -176,9 +177,11 @@ class llm_task {
             temp_awake_key.close();
             std::ostringstream awake_key_compile_cmd;
             if (file_exists("/opt/m5stack/scripts/text2token.py"))
-                awake_key_compile_cmd << "/usr/bin/python3 /opt/m5stack/scripts/text2token.py ";
+                awake_key_compile_cmd << "PYTHONPATH=/opt/m5stack/lib/sherpa-onnx/site-packages /usr/bin/python3 "
+                                         "/opt/m5stack/scripts/text2token.py ";
             else if (file_exists("/opt/m5stack/scripts/llm-kws_text2token.py"))
-                awake_key_compile_cmd << "/usr/bin/python3 /opt/m5stack/scripts/llm-kws_text2token.py ";
+                awake_key_compile_cmd << "PYTHONPATH=/opt/m5stack/lib/sherpa-onnx/site-packages /usr/bin/python3 "
+                                         "/opt/m5stack/scripts/llm-kws_text2token.py ";
             else {
                 SLOGE("text2token.py or llm-kws_text2token.py not found!");
             }
@@ -255,11 +258,17 @@ class llm_task {
         pcmdata = buffer_create();
     }
 
+    void start()
+    {
+    }
+
+    void stop()
+    {
+    }
+
     ~llm_task()
     {
-        if (spotter_stream_) {
-            spotter_stream_.reset();
-        }
+        stop();
         buffer_destroy(pcmdata);
     }
 };
@@ -327,8 +336,8 @@ class llm_kws : public StackFlow {
         }
         if ((!audio_url_.empty()) && (llm_task_obj->audio_flage_ == false)) {
             std::weak_ptr<llm_task> _llm_task_obj = llm_task_obj;
-            llm_channel->subscriber(audio_url_, [_llm_task_obj](pzmq *_pzmq, const std::string &raw) {
-                _llm_task_obj.lock()->sys_pcm_on_data(raw);
+            llm_channel->subscriber(audio_url_, [_llm_task_obj](pzmq *_pzmq, const std::shared_ptr<pzmq_data> &raw) {
+                _llm_task_obj.lock()->sys_pcm_on_data(raw->string());
             });
             llm_task_obj->audio_flage_ = true;
         }
@@ -447,8 +456,8 @@ class llm_kws : public StackFlow {
                 if (input.find("sys") != std::string::npos) {
                     audio_url_                            = unit_call("audio", "cap", "None");
                     std::weak_ptr<llm_task> _llm_task_obj = llm_task_obj;
-                    llm_channel->subscriber(audio_url_, [_llm_task_obj](pzmq *_pzmq, const std::string &raw) {
-                        _llm_task_obj.lock()->sys_pcm_on_data(raw);
+                    llm_channel->subscriber(audio_url_, [_llm_task_obj](pzmq *_pzmq, const std::shared_ptr<pzmq_data> &raw) {
+                        _llm_task_obj.lock()->sys_pcm_on_data(raw->string());
                     });
                     llm_task_obj->audio_flage_ = true;
                 } else if (input.find("kws") != std::string::npos) {
@@ -509,6 +518,7 @@ class llm_kws : public StackFlow {
             send("None", "None", error_body, work_id);
             return -1;
         }
+        llm_task_[work_id_num]->stop();
         auto llm_channel = get_channel(work_id_num);
         llm_channel->stop_subscriber("");
         if (llm_task_[work_id_num]->audio_flage_) {
@@ -526,6 +536,7 @@ class llm_kws : public StackFlow {
             if (iteam == llm_task_.end()) {
                 break;
             }
+            iteam->second->stop();
             if (iteam->second->audio_flage_) {
                 unit_call("audio", "cap_stop", "None");
             }
diff --git a/projects/llm_framework/main_llm/SConstruct b/projects/llm_framework/main_llm/SConstruct
index e744507e..1b12643b 100644
--- a/projects/llm_framework/main_llm/SConstruct
+++ b/projects/llm_framework/main_llm/SConstruct
@@ -17,6 +17,8 @@ LDFLAGS = []
 LINK_SEARCH_PATH = []
 STATIC_FILES = []
 
+python_venv = check_wget_down("https://m5stack.oss-cn-shenzhen.aliyuncs.com/resource/linux/llm/m5stack_llm-llm-python-venv_v1.7.tar.gz", 'm5stack_llm-llm-python-venv_v1.7.tar.gz')
+
 # REQUIREMENTS += ['Backward_cpp']
 # DYNAMIC_LIB += [ AFile('../static_lib/libdw.so.1'),
 #                 AFile('../static_lib/libelf.so.1'),
@@ -41,11 +43,30 @@ static_file = Glob('../static_lib/module-llm/libabsl_*')
 static_file += [AFile('../static_lib/module-llm/libre2.a'), AFile('../static_lib/module-llm/libsentencepiece.a'), AFile('../static_lib/module-llm/libsentencepiece_train.a')]
 STATIC_LIB += static_file * 4
 
+STATIC_FILES += [os.path.join(python_venv, 'llm')]
 STATIC_FILES += Glob('scripts/tokenizer_*.py')
 STATIC_FILES += Glob('models/mode_*.json')
 STATIC_FILES += [AFile('scripts/llm-llm_tokenizer_auto.py')]
 
-env['COMPONENTS'].append({'target':'llm_llm',
+IGNORE_FILES = []
+IGNORE_FILES += ['llm']
+
+import json
+if not os.path.exists('../dist'):
+    os.makedirs('../dist')
+ignore = {'ignore':[]}
+try:
+    with open('../dist/fileignore', 'a+') as f:
+        f.seek(0)
+        ignore = json.load(f)
+except:
+    pass
+ignore['ignore'] += IGNORE_FILES
+ignore['ignore'] = list(set(ignore['ignore']))
+with open('../dist/fileignore', 'w') as f:
+    json.dump(ignore, f, indent=4)
+
+env['COMPONENTS'].append({'target':'llm_llm-1.8',
                           'SRCS':SRCS,
                           'INCLUDE':INCLUDE,
                           'PRIVATE_INCLUDE':PRIVATE_INCLUDE,
diff --git a/projects/llm_framework/main_llm/models/mode_qwen2.5-0.5B-p256-ax630c.json b/projects/llm_framework/main_llm/models/mode_qwen2.5-0.5B-p256-ax630c.json
index dc988104..cd16feca 100644
--- a/projects/llm_framework/main_llm/models/mode_qwen2.5-0.5B-p256-ax630c.json
+++ b/projects/llm_framework/main_llm/models/mode_qwen2.5-0.5B-p256-ax630c.json
@@ -2,6 +2,8 @@
     "mode":"qwen2.5-0.5B-p256-ax630c",
     "type":"llm",
     "homepage":"https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct",
+    "compile_flage":"pulsar2 llm_build --input_path Qwen/Qwen2-0.5B-Instruct/ --output_path Qwen/Qwen2-0.5B-w8a16/ --kv_cache_len 1023 --hidden_state_type bf16 --prefill_len 256 --chip AX620E",
+    "pulsar_version":"3.4-983bb35e",
     "capabilities":[
         "text_generation",
         "chat"
diff --git a/projects/llm_framework/main_llm/models/mode_qwen2.5-0.5B-prefill-20e.json b/projects/llm_framework/main_llm/models/mode_qwen2.5-0.5B-prefill-20e.json
index 86bda882..b5502b5c 100644
--- a/projects/llm_framework/main_llm/models/mode_qwen2.5-0.5B-prefill-20e.json
+++ b/projects/llm_framework/main_llm/models/mode_qwen2.5-0.5B-prefill-20e.json
@@ -2,6 +2,8 @@
     "mode":"qwen2.5-0.5B-prefill-20e",
     "type":"llm",
     "homepage":"https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct",
+    "compile_flage":"pulsar2 llm_build --input_path Qwen/Qwen2-0.5B-Instruct/ --output_path Qwen/Qwen2-0.5B-w8a16/ --kv_cache_len 1023 --hidden_state_type bf16 --prefill_len 128 --chip AX620E;./tools/embed_process.sh Qwen/Qwen2-0.5B-Instruct/ Qwen/Qwen2-0.5B-w8a16/",
+    "pulsar_version":"3.4-983bb35e",
     "capabilities":[
         "text_generation",
         "chat"
diff --git a/projects/llm_framework/main_llm/models/mode_qwen3-0.6B-ax630c.json b/projects/llm_framework/main_llm/models/mode_qwen3-0.6B-ax630c.json
new file mode 100644
index 00000000..6b349d36
--- /dev/null
+++ b/projects/llm_framework/main_llm/models/mode_qwen3-0.6B-ax630c.json
@@ -0,0 +1,35 @@
+{
+    "mode":"qwen3-0.6B-ax630c",
+    "type":"llm",
+    "homepage":"https://huggingface.co/Qwen/Qwen3-0.6B",
+    "capabilities":[
+        "text_generation",
+        "chat"
+    ],
+    "input_type":[
+        "llm.utf-8",
+        "llm.utf-8.stream",
+        "llm.chat_completion",
+        "llm.chat_completion.stream"
+    ],
+    "output_type":[
+        "llm.utf-8",
+        "llm.utf-8.stream"
+    ],
+    "mode_param":{
+        "tokenizer_type":2,
+        "filename_tokenizer_model":"http://localhost:8080",
+        "filename_tokens_embed":"model.embed_tokens.weight.bfloat16.bin",
+        "filename_post_axmodel":"qwen3_post.axmodel",
+        "template_filename_axmodel":"qwen3_p128_l%d_together.axmodel",
+        "b_use_topk":false,
+        "b_bos":false,
+        "b_eos":false,
+        "axmodel_num":28,
+        "tokens_embed_num":151936,
+        "tokens_embed_size":1024,
+        "b_use_mmap_load_embed":true,
+        "b_dynamic_load_axmodel_layer":false,
+        "ext_scripts":["tokenizer_qwen3-0.6B-ax630c.py"]
+    }
+}
\ No newline at end of file
diff --git a/projects/llm_framework/main_llm/scripts/tokenizer_qwen3-0.6B-ax630c.py b/projects/llm_framework/main_llm/scripts/tokenizer_qwen3-0.6B-ax630c.py
new file mode 100644
index 00000000..652335ac
--- /dev/null
+++ b/projects/llm_framework/main_llm/scripts/tokenizer_qwen3-0.6B-ax630c.py
@@ -0,0 +1,131 @@
+from transformers import AutoTokenizer, PreTrainedTokenizerFast
+from http.server import HTTPServer, BaseHTTPRequestHandler
+import json
+import argparse
+
+class Tokenizer_Http():
+
+    def __init__(self, model_id):
+        self.tokenizer = AutoTokenizer.from_pretrained(model_id)
+
+    def encode(self, prompt, content):
+        messages = [
+            {"role": "system", "content": content},
+            {"role": "user", "content": prompt}
+        ]
+        text = self.tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True
+        )
+        print(text)
+        token_ids = self.tokenizer.encode(text)
+        return token_ids
+
+    def decode(self, token_ids):
+        return self.tokenizer.decode(token_ids)
+
+    @property
+    def bos_id(self):
+        return self.tokenizer.bos_token_id
+
+    @property
+    def eos_id(self):
+        return self.tokenizer.eos_token_id
+    
+    @property
+    def bos_token(self):
+        return self.tokenizer.bos_token
+
+    @property
+    def eos_token(self):
+        return self.tokenizer.eos_token
+
+class Request(BaseHTTPRequestHandler):
+    #通过类继承，新定义类
+    timeout = 5
+    server_version = 'Apache'
+
+    def do_GET(self):
+        print(self.path)
+        #在新类中定义get的内容（当客户端向该服务端使用get请求时，本服务端将如下运行）
+        self.send_response(200)
+        self.send_header("type", "get")  #设置响应头，可省略或设置多个
+        self.end_headers()
+
+        if self.path == '/bos_id':
+            bos_id = tokenizer.bos_id
+            # print(bos_id)
+            # to json
+            if bos_id is None:
+                msg = json.dumps({'bos_id': -1})
+            else:
+                msg = json.dumps({'bos_id': bos_id})
+        elif self.path == '/eos_id':
+            eos_id = tokenizer.eos_id
+            if eos_id is None:
+                msg = json.dumps({'eos_id': -1})
+            else:
+                msg = json.dumps({'eos_id': eos_id})
+        else:
+            msg = 'error'
+
+        print(msg)
+        msg = str(msg).encode()  #转为str再转为byte格式
+
+        self.wfile.write(msg)  #将byte格式的信息返回给客户端
+
+    def do_POST(self):
+        #在新类中定义post的内容（当客户端向该服务端使用post请求时，本服务端将如下运行）
+        data = self.rfile.read(int(
+            self.headers['content-length']))  #获取从客户端传入的参数（byte格式）
+        data = data.decode()  #将byte格式转为str格式
+
+        self.send_response(200)
+        self.send_header("type", "post")  #设置响应头，可省略或设置多个
+        self.end_headers()
+
+        if self.path == '/encode':
+            req = json.loads(data)
+            prompt = req['text']
+
+            token_ids = tokenizer.encode(prompt, args.content)
+            if token_ids is None:
+                msg = json.dumps({'token_ids': -1})
+            else:
+                msg = json.dumps({'token_ids': token_ids})
+
+        elif self.path == '/decode':
+            req = json.loads(data)
+            token_ids = req['token_ids']
+            text = tokenizer.decode(token_ids)
+            if text is None:
+                msg = json.dumps({'text': ""})
+            else:
+                msg = json.dumps({'text': text})
+        else:
+            msg = 'error'
+        print(msg)
+        msg = str(msg).encode()  #转为str再转为byte格式
+
+        self.wfile.write(msg)  #将byte格式的信息返回给客户端
+
+
+if __name__ == "__main__":
+
+    args = argparse.ArgumentParser()
+    args.add_argument('--host', type=str, default='localhost')
+    args.add_argument('--port', type=int, default=8080)
+    args.add_argument('--model_id', type=str, default='qwen3_0.6B_tokenizer')
+    args.add_argument('--content', type=str, default='You are Qwen, created by Alibaba Cloud. You are a helpful assistant.')
+    args = args.parse_args()
+
+    tokenizer = Tokenizer_Http(args.model_id)
+
+    # print(tokenizer.bos_id, tokenizer.bos_token, tokenizer.eos_id, tokenizer.eos_token)
+    # print(tokenizer.encode("hello world", args.content))
+
+    host = (args.host, args.port)  #设定地址与端口号，'localhost'等价于'127.0.0.1'
+    print('http://%s:%s' % host)
+    server = HTTPServer(host, Request)  #根据地址端口号和新定义的类，创建服务器实例
+    server.serve_forever()  #开启服务
diff --git a/projects/llm_framework/main_llm/src/main.cpp b/projects/llm_framework/main_llm/src/main.cpp
index c6672096..7cbb9fbe 100644
--- a/projects/llm_framework/main_llm/src/main.cpp
+++ b/projects/llm_framework/main_llm/src/main.cpp
@@ -23,6 +23,9 @@ using namespace StackFlows;
 #include "backward.hpp"
 #include "backward.h"
 #endif
+
+#define MAX_TASK_NUM 2
+
 int main_exit_flage = 0;
 static void __sigint(int iSigNo)
 {
@@ -108,6 +111,7 @@ class llm_task {
                     SLOGW("config file :%s miss", file_name.c_str());
                     continue;
                 }
+                SLOGI("config file :%s read", file_name.c_str());
                 config_file >> file_body;
                 config_file.close();
                 break;
@@ -155,6 +159,7 @@ class llm_task {
                 if (!tokenizer_server_flage_.load()) {
                     tokenizer_pid_ = fork();
                     if (tokenizer_pid_ == 0) {
+                        setenv("PYTHONPATH", "/opt/m5stack/lib/llm/site-packages", 1);
                         execl("/usr/bin/python3", "python3", tokenizer_file.c_str(), "--host", "localhost", "--port",
                               std::to_string(port_).c_str(), "--model_id", (base_model + "tokenizer").c_str(),
                               "--content", ("'" + prompt_ + "'").c_str(), nullptr);
@@ -287,8 +292,7 @@ class llm_task {
 
     bool pause()
     {
-        if(lLaMa_)
-            lLaMa_->Stop();
+        if (lLaMa_) lLaMa_->Stop();
         return true;
     }
 
@@ -331,12 +335,11 @@ class llm_task {
         if (inference_run_) {
             std::string par;
             async_list_.put(par);
-            if(lLaMa_)
-                lLaMa_->Stop();
+            if (lLaMa_) lLaMa_->Stop();
             inference_run_->join();
             inference_run_.reset();
         }
-    }    
+    }
 
     ~llm_task()
     {
@@ -357,13 +360,11 @@ std::atomic<unsigned int> llm_task::next_port_{8080};
 
 class llm_llm : public StackFlow {
 private:
-    int task_count_;
     std::unordered_map<int, std::shared_ptr<llm_task>> llm_task_;
 
 public:
     llm_llm() : StackFlow("llm")
     {
-        task_count_ = 2;
     }
 
     void task_output(const std::weak_ptr<llm_task> llm_task_obj_weak,
@@ -499,7 +500,7 @@ class llm_llm : public StackFlow {
     int setup(const std::string &work_id, const std::string &object, const std::string &data) override
     {
         nlohmann::json error_body;
-        if ((llm_task_channel_.size() - 1) == task_count_) {
+        if ((llm_task_channel_.size() - 1) == MAX_TASK_NUM) {
             error_body["code"]    = -21;
             error_body["message"] = "task full";
             send("None", "None", error_body, "llm");
diff --git a/projects/llm_framework/main_melotts/SConstruct b/projects/llm_framework/main_melotts/SConstruct
index e54608b9..6663ca30 100644
--- a/projects/llm_framework/main_melotts/SConstruct
+++ b/projects/llm_framework/main_melotts/SConstruct
@@ -21,7 +21,7 @@ DEFINITIONS += ['-O3', '-fopenmp', '-std=c++17']
 LDFLAGS+=['-Wl,-rpath=/opt/m5stack/lib', '-Wl,-rpath=/usr/local/m5stack/lib', '-Wl,-rpath=/usr/local/m5stack/lib/gcc-10.3', '-Wl,-rpath=/opt/lib', '-Wl,-rpath=/opt/usr/lib', '-Wl,-rpath=./']
 LINK_SEARCH_PATH += [ADir('../static_lib')]
 REQUIREMENTS += ['ax_engine', 'ax_interpreter', 'ax_sys']
-REQUIREMENTS += ['onnxruntime', 'samplerate']
+REQUIREMENTS += ['samplerate']
 
 INCLUDE += [ADir('../include')]
 INCLUDE += [ADir('src/runner'), ADir('../include/onnxruntime/core/session')]
@@ -31,7 +31,7 @@ LDFLAGS += ['-l:libcargs.a', '-l:libonnxruntime.a']
 
 STATIC_FILES += Glob('mode_*.json')
 
-env['COMPONENTS'].append({'target':'llm_melotts',
+env['COMPONENTS'].append({'target':'llm_melotts-1.7',
                           'SRCS':SRCS,
                           'INCLUDE':INCLUDE,
                           'PRIVATE_INCLUDE':PRIVATE_INCLUDE,
diff --git a/projects/llm_framework/main_melotts/mode_melotts-en-default.json b/projects/llm_framework/main_melotts/mode_melotts-en-default.json
new file mode 100644
index 00000000..18945145
--- /dev/null
+++ b/projects/llm_framework/main_melotts/mode_melotts-en-default.json
@@ -0,0 +1,29 @@
+{
+    "mode": "melotts-en-default",
+    "type": "tts",
+    "homepage": "https://github.com/ml-inory/melotts.axera/tree/main/model_convert",
+    "compile_flage": "pulsar2 build --input decoder-en.onnx --config config_decoder_u16.json --output_dir decoder --output_name decoder-en.axmodel --target_hardware AX620E",
+    "pulsar_version": "3.4-983bb35e",
+    "capabilities": [
+        "tts",
+        "English"
+    ],
+    "input_type": [
+        "tts.utf-8"
+    ],
+    "output_type": [
+        "tts.wav",
+        "sys.play.0_1"
+    ],
+    "mode_param": {
+        "encoder": "encoder-en-default.ort",
+        "decoder": "decoder-en-default.axmodel",
+        "gbin": "g-en-default.bin",
+        "tokens": "tokens-en.txt",
+        "lexicon": "lexicon-en.txt",
+        "spacker_speed": 1.2,
+        "mode_rate": 44100,
+        "audio_rate": 16000,
+        "awake_delay": 1000
+    }
+}
\ No newline at end of file
diff --git a/projects/llm_framework/main_melotts/mode_melotts-en-us.json b/projects/llm_framework/main_melotts/mode_melotts-en-us.json
new file mode 100644
index 00000000..6a375c93
--- /dev/null
+++ b/projects/llm_framework/main_melotts/mode_melotts-en-us.json
@@ -0,0 +1,29 @@
+{
+    "mode": "melotts-en-us",
+    "type": "tts",
+    "homepage":"https://huggingface.co/myshell-ai/MeloTTS-English",
+    "compile_flage":"pulsar2 build --input decoder-en.onnx --config config_decoder_u16.json --output_dir decoder-en --output_name decoder-en.axmodel --target_hardware AX620E --npu_mode NPU2 --compiler.check 0",
+    "pulsar_version":"3.4-3dfd5692",
+    "capabilities": [
+        "tts",
+        "English"
+    ],
+    "input_type": [
+        "tts.utf-8"
+    ],
+    "output_type": [
+        "tts.wav",
+        "sys.play.0_1"
+    ],
+    "mode_param": {
+        "encoder": "encoder-en.ort",
+        "decoder": "decoder-en.axmodel",
+        "gbin": "g-en.bin",
+        "tokens": "tokens.txt",
+        "lexicon": "lexicon.txt",
+        "spacker_speed": 1.0,
+        "mode_rate": 44100,
+        "audio_rate": 16000,
+        "awake_delay": 1000
+    }
+}
\ No newline at end of file
diff --git a/projects/llm_framework/main_melotts/mode_melotts-ja-jp.json b/projects/llm_framework/main_melotts/mode_melotts-ja-jp.json
new file mode 100644
index 00000000..d2df3e12
--- /dev/null
+++ b/projects/llm_framework/main_melotts/mode_melotts-ja-jp.json
@@ -0,0 +1,29 @@
+{
+    "mode": "melotts-ja-jp",
+    "type": "tts",
+    "homepage": "https://github.com/ml-inory/melotts.axera/tree/main/model_convert",
+    "compile_flage": "pulsar2 build --input decoder-jp.onnx --config config_decoder_u16.json --output_dir decoder --output_name decoder-jp.axmodel --target_hardware AX620E",
+    "pulsar_version": "3.4-983bb35e",
+    "capabilities": [
+        "tts",
+        "Japanese"
+    ],
+    "input_type": [
+        "tts.utf-8"
+    ],
+    "output_type": [
+        "tts.wav",
+        "sys.play.0_1"
+    ],
+    "mode_param": {
+        "encoder": "encoder-jp.ort",
+        "decoder": "decoder-jp.axmodel",
+        "gbin": "g-jp.bin",
+        "tokens": "tokens-jp.txt",
+        "lexicon": "lexicon-jp.txt",
+        "spacker_speed": 1.1,
+        "mode_rate": 44100,
+        "audio_rate": 16000,
+        "awake_delay": 1000
+    }
+}
\ No newline at end of file
diff --git a/projects/llm_framework/main_melotts/mode_melotts-zh-cn.json b/projects/llm_framework/main_melotts/mode_melotts-zh-cn.json
index e7af2c5f..b5edfe02 100644
--- a/projects/llm_framework/main_melotts/mode_melotts-zh-cn.json
+++ b/projects/llm_framework/main_melotts/mode_melotts-zh-cn.json
@@ -1,7 +1,9 @@
 {
-    "mode": "melotts_zh-cn",
+    "mode": "melotts-zh-cn",
     "type": "tts",
-    "homepage":"https://huggingface.co/myshell-ai/MeloTTS-Chinese",
+    "homepage": "https://huggingface.co/myshell-ai/MeloTTS-Chinese",
+    "compile_flage": "pulsar2 build --input decoder.onnx --config config_decoder_u16.json --output_dir decoder --output_name decoder.axmodel --target_hardware AX620E --npu_mode NPU2 --compiler.check 0",
+    "pulsar_version": "3.2-99f14d0a",
     "capabilities": [
         "tts",
         "Chinese"
@@ -19,7 +21,7 @@
         "gbin": "g-zh_mix_en.bin",
         "tokens": "tokens.txt",
         "lexicon": "lexicon.txt",
-        "spacker_speed": 1.0,
+        "spacker_speed": 1.1,
         "mode_rate": 44100,
         "audio_rate": 16000,
         "awake_delay": 1000
diff --git a/projects/llm_framework/main_melotts/src/main.cpp b/projects/llm_framework/main_melotts/src/main.cpp
index 6b9c89ef..4c25df80 100644
--- a/projects/llm_framework/main_melotts/src/main.cpp
+++ b/projects/llm_framework/main_melotts/src/main.cpp
@@ -29,7 +29,7 @@ using namespace StackFlows;
 int main_exit_flage = 0;
 static void __sigint(int iSigNo)
 {
-    SLOGW("llm_sys will be exit!");
+    SLOGW("llm_melotts will be exit!");
     main_exit_flage = 1;
 }
 
@@ -37,6 +37,7 @@ static std::string base_model_path_;
 static std::string base_model_config_path_;
 
 typedef struct {
+    std::string mode;
     std::string encoder;
     std::string decoder;
     std::string lexicon;
@@ -112,6 +113,9 @@ class llm_task {
         return false;
     }
 
+    std::unordered_map<std::string, int> MELOTTS_LANG_IDS_MAP{
+        {"melotts-ja-jp", 1}, {"melotts-en-us", 2}, {"melotts_zh-cn", 3}, {"melotts-zh-cn", 3}};
+
     std::vector<int> intersperse(const std::vector<int> &lst, int item)
     {
         std::vector<int> result(lst.size() * 2 + 1, item);
@@ -140,6 +144,7 @@ class llm_task {
                     SLOGW("config file :%s miss", file_name.c_str());
                     continue;
                 }
+                SLOGI("config file :%s read", file_name.c_str());
                 config_file >> file_body;
                 config_file.close();
                 break;
@@ -240,66 +245,281 @@ class llm_task {
             if (msg_str.empty()) {
                 SLOGI("empty");
                 if (out_callback_) {
-                    std::string output = wav_pcm_data.empty() ? 
-                        std::string() : 
-                        std::string((char *)wav_pcm_data.data(), wav_pcm_data.size() * sizeof(int16_t));
+                    std::string output = wav_pcm_data.empty() ? std::string()
+                                                              : std::string((char *)wav_pcm_data.data(),
+                                                                            wav_pcm_data.size() * sizeof(int16_t));
                     out_callback_(output, finish);
                 }
                 return false;
             }
+
+            // Convert text to phonemes and tones
             std::vector<int> phones_bef, tones_bef;
             lexicon_->convert(msg_str, phones_bef, tones_bef);
-            // Add blank between words
             auto phones   = intersperse(phones_bef, 0);
             auto tones    = intersperse(tones_bef, 0);
             int phone_len = phones.size();
             std::vector<int> langids(phone_len, 3);
+
+            // Run the encoder to generate hidden representations
             auto encoder_output =
                 encoder_->Run(phones, tones, langids, g_matrix, mode_config_.noise_scale, mode_config_.noise_scale_w,
                               mode_config_.get_length_scale(), mode_config_.sdp_ratio);
-            float *zp_data      = encoder_output.at(0).GetTensorMutableData<float>();
-            int audio_len       = encoder_output.at(2).GetTensorMutableData<int>()[0];
-            auto zp_info        = encoder_output.at(0).GetTensorTypeAndShapeInfo();
-            auto zp_shape       = zp_info.GetShape();
+            float *zp_data = encoder_output.at(0).GetTensorMutableData<float>();
+            int audio_len  = encoder_output.at(2).GetTensorMutableData<int>()[0];
+            auto zp_info   = encoder_output.at(0).GetTensorTypeAndShapeInfo();
+            auto zp_shape  = zp_info.GetShape();
+
+            // Calculate decoder parameters
             int zp_size         = decoder_->GetInputSize(0) / sizeof(float);
             int dec_len         = zp_size / zp_shape[1];
             int audio_slice_len = decoder_->GetOutputSize(0) / sizeof(float);
-            std::vector<float> decoder_output(audio_slice_len);
-            int dec_slice_num = int(std::ceil(zp_shape[2] * 1.0 / dec_len));
+
+            const int pad_frames        = 24;
+            const int samples_per_frame = 512;
+
+            const int effective_frames = dec_len - 2 * pad_frames;
+
+            int dec_slice_num =
+                static_cast<int>(std::ceil(static_cast<double>(zp_shape[2]) / static_cast<double>(effective_frames)));
+
+            // SOLA parameters setup
+            const int sola_buffer_frame = pad_frames * samples_per_frame;                  // Overlap buffer length
+            const int sola_search_frame = pad_frames * samples_per_frame;                  // Search window length
+            const int block_frame       = (dec_len - 2 * pad_frames) * samples_per_frame;  // Effective block length
+
+            // Create fade-in/fade-out windows for smooth transitions
+            std::vector<float> fade_in_window(sola_buffer_frame);
+            std::vector<float> fade_out_window(sola_buffer_frame);
+
+            for (int i = 0; i < sola_buffer_frame; i++) {
+                fade_in_window[i]  = static_cast<float>(i) / sola_buffer_frame;
+                fade_out_window[i] = 1.0f - fade_in_window[i];
+            }
+
+            // Initialize SOLA buffer
+            std::vector<float> sola_buffer(sola_buffer_frame, 0.0f);
+            bool first_frame = true;
+
             std::vector<float> pcmlist;
+
+            // Main decoding loop - process each slice
             for (int i = 0; i < dec_slice_num; i++) {
+                // Calculate start position for current batch input
+                int input_start = i * effective_frames;
+                // Consider forward padding, but ensure non-negative
+                if (i > 0) {
+                    input_start -= pad_frames;
+                }
+                input_start = std::max(0, input_start);
+
+                // Actual input length
+                int actual_len = std::min(dec_len, static_cast<int>(zp_shape[2] - input_start));
+
+                // Calculate effective output range (frame level)
+                int output_start_frame, output_end_frame;
+
+                if (i == 0) {
+                    // First frame: skip padding at beginning
+                    output_start_frame = 0;
+                    output_end_frame   = effective_frames - 1;
+                } else if (i == dec_slice_num - 1) {
+                    // Last frame: calculate from current segment start
+                    output_start_frame = i * effective_frames;
+                    // Last frame extends to encoder's maximum output length
+                    output_end_frame = static_cast<int>(zp_shape[2]) - 1;
+                } else {
+                    // Middle frames: standard calculation
+                    output_start_frame = i * effective_frames;
+                    output_end_frame   = (i + 1) * effective_frames - 1;
+                }
+                // Prepare decoder input, initialize all to zero
                 std::vector<float> zp(zp_size, 0);
-                int actual_size = (i + 1) * dec_len < zp_shape[2] ? dec_len : zp_shape[2] - i * dec_len;
+
+                // Copy data to decoder input
                 for (int n = 0; n < zp_shape[1]; n++) {
-                    memcpy(zp.data() + n * dec_len, zp_data + n * zp_shape[2] + i * dec_len,
-                           sizeof(float) * actual_size);
+                    int copy_size = std::min(actual_len, static_cast<int>(zp_shape[2] - input_start));
+                    if (copy_size > 0) {
+                        memcpy(zp.data() + n * dec_len, zp_data + n * zp_shape[2] + input_start,
+                               sizeof(float) * copy_size);
+                    }
                 }
+
+                // Run decoder
+                std::vector<float> decoder_output(audio_slice_len);
                 decoder_->SetInput(zp.data(), 0);
                 decoder_->SetInput(g_matrix.data(), 1);
-                if (0 != decoder_->RunSync()) {
-                    printf("Run decoder model failed!\n");
+
+                if (0 != decoder_->Run()) {
+                    SLOGI("Inference #%d: decoding failed", i + 1);
                     throw std::string("decoder_ RunSync error");
                 }
+
                 decoder_->GetOutput(decoder_output.data(), 0);
-                actual_size = (i + 1) * audio_slice_len < audio_len ? audio_slice_len : audio_len - i * audio_slice_len;
-                if (decoder_output.size() > actual_size) {
-                    pcmlist.reserve(pcmlist.size() + actual_size);
-                    std::copy(decoder_output.begin(), decoder_output.begin() + actual_size,
-                              std::back_inserter(pcmlist));
+
+                // === SOLA Processing Logic ===
+                if (first_frame) {
+                    // Special handling for first frame - should not skip initial content
+                    // First frame starts directly from decoder output without skipping
+                    int audio_start = 0;  // Start from beginning, don't skip pad_frames
+
+                    // Calculate data length for first frame
+                    // First frame should preserve complete decoder output, only reserving sola_buffer_frame at the end
+                    // for next frame alignment
+                    int audio_len = decoder_output.size() - sola_buffer_frame;
+
+                    // Boundary check
+                    audio_len = std::max(0, audio_len);  // Ensure non-negative
+
+                    // Add first frame data
+                    if (audio_len > 0) {
+                        pcmlist.insert(pcmlist.end(), decoder_output.begin() + audio_start,
+                                       decoder_output.begin() + audio_start + audio_len);
+                    }
+
+                    // Save sola_buffer_frame length from the end to SOLA buffer for next frame alignment
+                    int buffer_start = audio_len;
+
+                    // Ensure sufficient data is available for copying
+                    if (buffer_start + sola_buffer_frame <= decoder_output.size()) {
+                        std::copy(decoder_output.begin() + buffer_start,
+                                  decoder_output.begin() + buffer_start + sola_buffer_frame, sola_buffer.begin());
+                    } else {
+                        // Possible case: first frame data is shorter than sola_buffer_frame
+                        int available = static_cast<int>(decoder_output.size() - buffer_start);
+                        if (available > 0) {
+                            std::copy(decoder_output.begin() + buffer_start, decoder_output.end(), sola_buffer.begin());
+                            // Fill with zeros
+                            std::fill(sola_buffer.begin() + available, sola_buffer.end(), 0.0f);
+                        } else {
+                            // Completely insufficient data, fill all with zeros
+                            std::fill(sola_buffer.begin(), sola_buffer.end(), 0.0f);
+                        }
+                    }
+
+                    first_frame = false;
+
                 } else {
-                    pcmlist.reserve(pcmlist.size() + decoder_output.size());
-                    std::copy(decoder_output.begin(), decoder_output.end(), std::back_inserter(pcmlist));
+                    // Non-first frame: SOLA alignment required
+                    int audio_start = pad_frames * samples_per_frame;
+
+                    // 1. Prepare search window - beginning portion of current frame
+                    std::vector<float> search_window(sola_buffer_frame + sola_search_frame);
+                    std::copy(decoder_output.begin() + audio_start,
+                              decoder_output.begin() + audio_start + search_window.size(), search_window.begin());
+
+                    // 2. Find best alignment point (calculate cross-correlation)
+                    int best_offset        = 0;
+                    float best_correlation = -1.0;
+
+                    for (int offset = 0; offset <= sola_search_frame; offset++) {
+                        float correlation = 0.0;
+                        float energy      = 0.0;
+
+                        for (int j = 0; j < sola_buffer_frame; j++) {
+                            correlation += sola_buffer[j] * search_window[j + offset];
+                            energy += search_window[j + offset] * search_window[j + offset];
+                        }
+
+                        // Normalize correlation (avoid division by zero)
+                        float normalized_correlation = (energy > 1e-8) ? correlation / std::sqrt(energy) : 0.0f;
+
+                        if (normalized_correlation > best_correlation) {
+                            best_correlation = normalized_correlation;
+                            best_offset      = offset;
+                        }
+                    }
+
+                    // 3. Apply alignment offset
+                    int aligned_start = audio_start + best_offset;
+
+                    // 4. Smooth transition processing (crossfade in alignment region)
+                    std::vector<float> crossfade_region(sola_buffer_frame);
+
+                    for (int j = 0; j < sola_buffer_frame; j++) {
+                        // Apply fade-in/fade-out window functions
+                        crossfade_region[j] =
+                            decoder_output[aligned_start + j] * fade_in_window[j] + sola_buffer[j] * fade_out_window[j];
+                    }
+
+                    // 5. Add crossfade region to output
+                    pcmlist.insert(pcmlist.end(), crossfade_region.begin(), crossfade_region.end());
+
+                    int remaining_start = aligned_start + sola_buffer_frame;
+
+                    if (i == dec_slice_num - 1) {
+                        int total_expected_samples = audio_len * samples_per_frame / 512;
+
+                        int processed_samples = static_cast<int>(pcmlist.size());
+
+                        int remaining_needed = total_expected_samples - processed_samples;
+                        remaining_needed     = std::max(0, remaining_needed);
+
+                        int remaining_len =
+                            std::min(remaining_needed, static_cast<int>(decoder_output.size() - remaining_start));
+
+                        if (remaining_len > 0) {
+                            pcmlist.insert(pcmlist.end(), decoder_output.begin() + remaining_start,
+                                           decoder_output.begin() + remaining_start + remaining_len);
+                        }
+
+                    } else {
+                        int remaining_len = (dec_len - 2 * pad_frames) * samples_per_frame - sola_buffer_frame;
+
+                        remaining_len =
+                            std::min(remaining_len, static_cast<int>(decoder_output.size() - remaining_start));
+
+                        if (remaining_len > 0) {
+                            pcmlist.insert(pcmlist.end(), decoder_output.begin() + remaining_start,
+                                           decoder_output.begin() + remaining_start + remaining_len);
+                        }
+
+                        int buffer_start = remaining_start + remaining_len;
+
+                        if (buffer_start + sola_buffer_frame <= decoder_output.size()) {
+                            std::copy(decoder_output.begin() + buffer_start,
+                                      decoder_output.begin() + buffer_start + sola_buffer_frame, sola_buffer.begin());
+                        } else {
+                            int avail = static_cast<int>(decoder_output.size() - buffer_start);
+                            if (avail > 0) {
+                                std::copy(decoder_output.begin() + buffer_start, decoder_output.end(),
+                                          sola_buffer.begin());
+                            }
+                            std::fill(sola_buffer.begin() + avail, sola_buffer.end(), 0.0f);
+                        }
+                    }
                 }
             }
-            double src_ratio = (mode_config_.audio_rate * 1.0f) / (mode_config_.mode_rate * 1.0f);
+
+            if (pcmlist.size() > audio_len) {
+                pcmlist.resize(audio_len);
+            }
+
+            // Post-processing: resample and convert to int16
+            double src_ratio =
+                static_cast<double>(mode_config_.audio_rate) / static_cast<double>(mode_config_.mode_rate);
             std::vector<float> tmp_pcm((pcmlist.size() * src_ratio + 1));
             int len;
+
             resample_audio(pcmlist.data(), pcmlist.size(), tmp_pcm.data(), &len, src_ratio);
+
+            // Convert to 16-bit PCM
+            wav_pcm_data.reserve(len);
             std::transform(tmp_pcm.begin(), tmp_pcm.begin() + len, std::back_inserter(wav_pcm_data),
-                           [](const auto val) { return (int16_t)(val * INT16_MAX); });
-            if (out_callback_)
-                out_callback_(std::string((char *)wav_pcm_data.data(), wav_pcm_data.size() * sizeof(int16_t)), finish);
+                           [](const auto val) { return static_cast<int16_t>(val * INT16_MAX); });
+
+            // Call the output callback function with the result
+            if (out_callback_) {
+                out_callback_(
+                    std::string(reinterpret_cast<char *>(wav_pcm_data.data()), wav_pcm_data.size() * sizeof(int16_t)),
+                    finish);
+            }
+
+        } catch (const std::exception &e) {
+            SLOGI("TTS processing exception: %s", e.what());
+            return true;
         } catch (...) {
+            SLOGI("TTS processing encountered an unknown exception");
             return true;
         }
         return false;
@@ -350,11 +570,18 @@ class llm_task {
         _ax_init();
     }
 
+    void start()
+    {
+    }
+
+    void stop()
+    {
+    }
+
     ~llm_task()
     {
-        if (decoder_) {
-            decoder_->Release();
-        }
+        stop();
+        if (decoder_) decoder_->Release();
         _ax_deinit();
     }
 };
@@ -661,7 +888,7 @@ class llm_tts : public StackFlow {
 
     int exit(const std::string &work_id, const std::string &object, const std::string &data) override
     {
-        SLOGI("llm_tts::exit:%s", data.c_str());
+        SLOGI("llm_melotts::exit:%s", data.c_str());
 
         nlohmann::json error_body;
         int work_id_num = sample_get_work_id_num(work_id);
@@ -671,6 +898,7 @@ class llm_tts : public StackFlow {
             send("None", "None", error_body, work_id);
             return -1;
         }
+        llm_task_[work_id_num]->stop();
         auto llm_channel = get_channel(work_id_num);
         llm_channel->stop_subscriber("");
         llm_task_.erase(work_id_num);
@@ -685,6 +913,7 @@ class llm_tts : public StackFlow {
             if (iteam == llm_task_.end()) {
                 break;
             }
+            iteam->second->stop();
             get_channel(iteam->first)->stop_subscriber("");
             iteam->second.reset();
             llm_task_.erase(iteam->first);
diff --git a/projects/llm_framework/main_melotts/src/runner/EngineWrapper.cpp b/projects/llm_framework/main_melotts/src/runner/EngineWrapper.cpp
index d604104f..0dda3e7b 100644
--- a/projects/llm_framework/main_melotts/src/runner/EngineWrapper.cpp
+++ b/projects/llm_framework/main_melotts/src/runner/EngineWrapper.cpp
@@ -11,8 +11,16 @@
 #include "utils/io.hpp"
 
 #include <cstdlib>
+#include <global_config.h>
+
+#if defined(CONFIG_AX_650N_MSP_ENABLED)
+static const char *strAlgoModelType[AX_ENGINE_MODEL_TYPE_BUTT] = {"3.6T", "7.2T", "18T"};
+#endif
+
+#if defined(CONFIG_AX_620E_MSP_ENABLED) || defined(CONFIG_AX_620Q_MSP_ENABLED)
+static const char *strAlgoModelType[AX_ENGINE_MODEL_TYPE_BUTT] = {"HalfOCM", "FullOCM"};
+#endif
 
-static const char *strAlgoModelType[AX_ENGINE_VIRTUAL_NPU_BUTT] = {"1.6T", "3.2T"};
 
 /// @brief npu type
 typedef enum axNPU_TYPE_E {
@@ -24,6 +32,7 @@ typedef enum axNPU_TYPE_E {
     AX_BL_VNPU_2 = (1 << 4)    /* running under BIG-LITTLE VNPU2 */
 } AX_NPU_TYPE_E;
 
+#if defined(CHIP_AX650)
 static AX_S32 CheckModelVNpu(const std::string &strModel, const AX_ENGINE_MODEL_TYPE_T &eModelType, const AX_S32 &nNpuType, AX_U32 &nNpuSet) {
     AX_ENGINE_NPU_ATTR_T stNpuAttr;
     memset(&stNpuAttr, 0x00, sizeof(stNpuAttr));
@@ -33,35 +42,42 @@ static AX_S32 CheckModelVNpu(const std::string &strModel, const AX_ENGINE_MODEL_
         // VNPU DISABLE
         if (stNpuAttr.eHardMode == AX_ENGINE_VIRTUAL_NPU_DISABLE) {
             nNpuSet = 0x01; // NON-VNPU (0b111)
+            // printf("%s will run under VNPU-DISABLE [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
         }
         // STD VNPU
-        else if (stNpuAttr.eHardMode == AX_ENGINE_VIRTUAL_NPU_BUTT) {
+        else if (stNpuAttr.eHardMode == AX_ENGINE_VIRTUAL_NPU_STD) {
             // 7.2T & 10.8T no allow
             if (eModelType == AX_ENGINE_MODEL_TYPE1
-                || eModelType == AX_ENGINE_MODEL_TYPE1) {
+                || eModelType == AX_ENGINE_MODEL_TYPE2) {
+                // printf("%s model type%d: [%s], no allow run under STD VNPU\n", strModel.c_str(), eModelType, strAlgoModelType[eModelType]);
                 return -1;
             }
 
             // default STD VNPU2
             if (nNpuType == 0) {
                 nNpuSet = 0x02; // VNPU2 (0b010)
+                // printf("%s will run under default STD-VNPU2 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
             }
             else {
                 if (nNpuType & AX_STD_VNPU_1) {
                     nNpuSet |= 0x01; // VNPU1 (0b001)
+                    // printf("%s will run under STD-VNPU1 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
                 }
                 if (nNpuType & AX_STD_VNPU_2) {
                     nNpuSet |= 0x02; // VNPU2 (0b010)
+                    // printf("%s will run under STD-VNPU2 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
                 }
                 if (nNpuType & AX_STD_VNPU_3) {
                     nNpuSet |= 0x04; // VNPU3 (0b100)
+                    // printf("%s will run under STD-VNPU3 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
                 }
             }
         }
         // BL VNPU
-        else if (stNpuAttr.eHardMode == AX_ENGINE_VIRTUAL_NPU_BUTT) {
+        else if (stNpuAttr.eHardMode == AX_ENGINE_VIRTUAL_NPU_BIG_LITTLE) {
             // 10.8T no allow
-            if (eModelType == AX_ENGINE_MODEL_TYPE1) {
+            if (eModelType == AX_ENGINE_MODEL_TYPE2) {
+                // printf("%s model type%d: [%s], no allow run under BL VNPU\n", strModel.c_str(), eModelType, strAlgoModelType[eModelType]);
                 return -1;
             }
 
@@ -70,10 +86,12 @@ static AX_S32 CheckModelVNpu(const std::string &strModel, const AX_ENGINE_MODEL_
                 // 7.2T default BL VNPU1
                 if (eModelType == AX_ENGINE_MODEL_TYPE1) {
                     nNpuSet = 0x01; // VNPU1 (0b001)
+                    // printf("%s will run under default BL-VNPU1 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
                 }
                 // 3.6T default BL VNPU2
                 else {
                     nNpuSet = 0x02; // VNPU2 (0b010)
+                    // printf("%s will run under default BL-VNPU2 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
                 }
             }
             else {
@@ -81,19 +99,23 @@ static AX_S32 CheckModelVNpu(const std::string &strModel, const AX_ENGINE_MODEL_
                 if (eModelType == AX_ENGINE_MODEL_TYPE1) {
                     // no allow set to BL VNPU2
                     if (nNpuType & AX_BL_VNPU_2) {
+                        // printf("%s model type%d: [%s], no allow run under BL VNPU2\n", strModel.c_str(), eModelType, strAlgoModelType[eModelType]);
                         return -1;
                     }
                     if (nNpuType & AX_BL_VNPU_1) {
                         nNpuSet |= 0x01; // VNPU1 (0b001)
+                        // printf("%s will run under BL-VNPU1 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
                     }
                 }
                 // 3.6T
                 else {
                     if (nNpuType & AX_BL_VNPU_1) {
                         nNpuSet |= 0x01; // VNPU1 (0b001)
+                        // printf("%s will run under BL-VNPU1 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
                     }
                     if (nNpuType & AX_BL_VNPU_2) {
                         nNpuSet |= 0x02; // VNPU2 (0b010)
+                        // printf("%s will run under BL-VNPU2 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
                     }
                 }
             }
@@ -105,6 +127,52 @@ static AX_S32 CheckModelVNpu(const std::string &strModel, const AX_ENGINE_MODEL_
 
     return ret;
 }
+#endif
+
+#if defined(CONFIG_AX_620E_MSP_ENABLED) || defined(CONFIG_AX_620Q_MSP_ENABLED)
+static AX_S32 CheckModelVNpu(const std::string &strModel, const AX_ENGINE_MODEL_TYPE_T &eModelType, const AX_S32 &nNpuType, AX_U32 &nNpuSet) {
+    AX_ENGINE_NPU_ATTR_T stNpuAttr;
+    memset(&stNpuAttr, 0x00, sizeof(stNpuAttr));
+
+    auto ret = AX_ENGINE_GetVNPUAttr(&stNpuAttr);
+    if (ret == 0) {
+        // VNPU DISABLE
+        if (stNpuAttr.eHardMode == AX_ENGINE_VIRTUAL_NPU_DISABLE) {
+            nNpuSet = 0x01; // NON-VNPU (0b111)
+            // ALOGN("%s will run under VNPU-DISABLE [%s]", strModel.c_str(), strAlgoModelType[eModelType]);
+        }
+        // STD VNPU
+        else if (stNpuAttr.eHardMode == AX_ENGINE_VIRTUAL_NPU_ENABLE) {
+            // full ocm model was no allowned
+            if (eModelType == AX_ENGINE_MODEL_TYPE1) {
+                // printf("%s model type%d: [%s], no allow run under STD VNPU", strModel.c_str(), eModelType, strAlgoModelType[eModelType]);
+                return -1;
+            }
+
+            // default STD VNPU2
+            if (nNpuType == 0) {
+                nNpuSet = 0x02; // VNPU2 (0b010)
+                // printf("%s will run under default STD-VNPU2 [%s]", strModel.c_str(), strAlgoModelType[eModelType]);
+            }
+            else {
+                if (nNpuType & AX_STD_VNPU_1) {
+                    nNpuSet |= 0x01; // VNPU1 (0b001)
+                    // printf("%s will run under STD-VNPU1 [%s]", strModel.c_str(), strAlgoModelType[eModelType]);
+                }
+                if (nNpuType & AX_STD_VNPU_2) {
+                    nNpuSet |= 0x02; // VNPU2 (0b010)
+                    // printf("%s will run under STD-VNPU2 [%s]", strModel.c_str(), strAlgoModelType[eModelType]);
+                }
+            }
+        }
+    }
+    else {
+        printf("AX_ENGINE_GetVNPUAttr fail ret = %x", ret);
+    }
+
+    return ret;
+}
+#endif
 
 
 int EngineWrapper::Init(const char* strModelPath, uint32_t nNpuType)
@@ -112,7 +180,7 @@ int EngineWrapper::Init(const char* strModelPath, uint32_t nNpuType)
     AX_S32 ret = 0;
 
     // 1. load model
-    AX_BOOL bLoadModelUseCmm = AX_FALSE;
+    AX_BOOL bLoadModelUseCmm = AX_TRUE;
     AX_CHAR *pModelBufferVirAddr = nullptr;
     AX_U64 u64ModelBufferPhyAddr = 0;
     AX_U32 nModelBufferSize = 0;
@@ -224,7 +292,7 @@ int EngineWrapper::Init(const char* strModelPath, uint32_t nNpuType)
 
     // 6. prepare io
     // AX_U32 nIoDepth = (stCtx.vecOutputBufferFlag.size() == 0) ? 1 : stCtx.vecOutputBufferFlag.size();
-    ret = utils::prepare_io(strModelPath, m_io_info, m_io, utils::IO_BUFFER_STRATEGY_DEFAULT);
+    ret = utils::prepare_io(strModelPath, m_io_info, m_io, utils::IO_BUFFER_STRATEGY_CACHED);
     if (0 != ret) {
         printf("prepare io failed!\n");
         utils::free_io(m_io);
@@ -241,7 +309,7 @@ int EngineWrapper::SetInput(void* pInput, int index) {
     return utils::push_io_input(pInput, index, m_io);
 }
 
-int EngineWrapper::RunSync()
+int EngineWrapper::Run()
 {
     if (!m_hasInit)
         return -1;
@@ -268,6 +336,11 @@ int EngineWrapper::GetOutputSize(int index) {
     return m_io.pOutputs[index].nSize;
 }
 
+void* EngineWrapper::GetOutputPtr(int index) {
+    utils::cache_io_flush(&m_io.pOutputs[index]);
+    return m_io.pOutputs[index].pVirAddr;
+}
+
 int EngineWrapper::Release()
 {
     if (m_handle) {
diff --git a/projects/llm_framework/main_melotts/src/runner/EngineWrapper.hpp b/projects/llm_framework/main_melotts/src/runner/EngineWrapper.hpp
index e249d546..c0d53ba6 100644
--- a/projects/llm_framework/main_melotts/src/runner/EngineWrapper.hpp
+++ b/projects/llm_framework/main_melotts/src/runner/EngineWrapper.hpp
@@ -33,13 +33,15 @@ class EngineWrapper {
 
     int SetInput(void* pInput, int index);
 
-    int RunSync();
+    int Run();
 
     int GetOutput(void* pOutput, int index);
 
     int GetInputSize(int index);
     int GetOutputSize(int index);
 
+    void* GetOutputPtr(int index);
+
     int Release();
 
 protected:
diff --git a/projects/llm_framework/main_melotts/src/runner/Lexicon.hpp b/projects/llm_framework/main_melotts/src/runner/Lexicon.hpp
index 8b6255d2..d1bcbe90 100644
--- a/projects/llm_framework/main_melotts/src/runner/Lexicon.hpp
+++ b/projects/llm_framework/main_melotts/src/runner/Lexicon.hpp
@@ -1,152 +1,326 @@
 #pragma once
-
 #include <string>
 #include <vector>
 #include <fstream>
 #include <unordered_map>
-#include <assert.h>
-
-std::vector<std::string> split (const std::string &s, char delim) {
+#include <algorithm>
+#include <sstream>
+#include <cassert>
+#include <iostream>
+#include "../../../../../SDK/components/utilities/include/sample_log.h"
+// Debug logging switch - set to true to enable debug logs
+static bool DEBUG_LOGGING = false;
+// Macro for debug logging
+#define DEBUG_LOG(fmt, ...)            \
+    do {                               \
+        if (DEBUG_LOGGING) {           \
+            SLOGI(fmt, ##__VA_ARGS__); \
+        }                              \
+    } while (0)
+std::vector<std::string> split(const std::string& s, char delim)
+{
     std::vector<std::string> result;
-    std::stringstream ss (s);
+    std::stringstream ss(s);
     std::string item;
-    while (getline (ss, item, delim)) {
-        result.push_back (item);
+    while (getline(ss, item, delim)) {
+        if (!item.empty()) {
+            result.push_back(item);
+        }
     }
     return result;
 }
-
 class Lexicon {
 private:
     std::unordered_map<std::string, std::pair<std::vector<int>, std::vector<int>>> lexicon;
+    size_t max_phrase_length;
+    std::pair<std::vector<int>, std::vector<int>> unknown_token;
+    std::unordered_map<int, std::string> reverse_tokens;
 
 public:
-    Lexicon(const std::string& lexicon_filename, const std::string& tokens_filename) {
+    // Setter for debug logging
+    static void setDebugLogging(bool enable)
+    {
+        DEBUG_LOGGING = enable;
+    }
+    Lexicon(const std::string& lexicon_filename, const std::string& tokens_filename) : max_phrase_length(0)
+    {
+        DEBUG_LOG("Dictionary loading: %s Pronunciation table loading: %s", tokens_filename.c_str(),
+                  lexicon_filename.c_str());
+
         std::unordered_map<std::string, int> tokens;
         std::ifstream ifs(tokens_filename);
         assert(ifs.is_open());
-
         std::string line;
-        while ( std::getline(ifs, line) ) {
+        while (std::getline(ifs, line)) {
             auto splitted_line = split(line, ' ');
-            tokens.insert({splitted_line[0], std::stoi(splitted_line[1])});
+            if (splitted_line.size() >= 2) {
+                int token_id = std::stoi(splitted_line[1]);
+                tokens.insert({splitted_line[0], token_id});
+                reverse_tokens[token_id] = splitted_line[0];
+            }
         }
         ifs.close();
-
         ifs.open(lexicon_filename);
         assert(ifs.is_open());
-        while ( std::getline(ifs, line) ) {
+        while (std::getline(ifs, line)) {
             auto splitted_line = split(line, ' ');
+            if (splitted_line.empty()) continue;
             std::string word_or_phrase = splitted_line[0];
-            size_t phone_tone_len = splitted_line.size() - 1;
-            size_t half_len = phone_tone_len / 2;
+            auto chars                 = splitEachChar(word_or_phrase);
+            max_phrase_length          = std::max(max_phrase_length, chars.size());
+            size_t phone_tone_len      = splitted_line.size() - 1;
+            size_t half_len            = phone_tone_len / 2;
             std::vector<int> phones, tones;
             for (size_t i = 0; i < phone_tone_len; i++) {
                 auto phone_or_tone = splitted_line[i + 1];
                 if (i < half_len) {
-                    phones.push_back(tokens[phone_or_tone]);
+                    if (tokens.find(phone_or_tone) != tokens.end()) {
+                        phones.push_back(tokens[phone_or_tone]);
+                    }
                 } else {
                     tones.push_back(std::stoi(phone_or_tone));
                 }
             }
-
-            lexicon.insert({word_or_phrase, std::make_pair(phones, tones)});
+            lexicon[word_or_phrase] = std::make_pair(phones, tones);
         }
-
-        lexicon["呣"] = lexicon["母"];
-        lexicon["嗯"] = lexicon["恩"];
-
         const std::vector<std::string> punctuation{"!", "?", "…", ",", ".", "'", "-"};
-        for (auto p : punctuation) {
-            int i = tokens[p];
-            int tone = 0;
-            lexicon[p] = std::make_pair(std::vector<int>{i}, std::vector<int>{tone});
+        for (const auto& p : punctuation) {
+            if (tokens.find(p) != tokens.end()) {
+                int i      = tokens[p];
+                lexicon[p] = std::make_pair(std::vector<int>{i}, std::vector<int>{0});
+            }
         }
-        lexicon[" "] = std::make_pair(std::vector<int>{tokens["_"]}, std::vector<int>{0});
+        assert(tokens.find("_") != tokens.end());
+        unknown_token = std::make_pair(std::vector<int>{tokens["_"]}, std::vector<int>{0});
+        lexicon[" "]  = unknown_token;
+        lexicon["，"] = lexicon[","];
+        lexicon["。"] = lexicon["."];
+        lexicon["！"] = lexicon["!"];
+        lexicon["？"] = lexicon["?"];
+        DEBUG_LOG("Dictionary loading complete, containing %zu entries, longest phrase length: %zu", lexicon.size(),
+                  max_phrase_length);
     }
 
     std::vector<std::string> splitEachChar(const std::string& text)
     {
         std::vector<std::string> words;
-        std::string input(text);
-        int len = input.length();
-        int i = 0;
-        
+        int len = text.length();
+        int i   = 0;
         while (i < len) {
-        int next = 1;
-        if ((input[i] & 0x80) == 0x00) {
-            // std::cout << "one character: " << input[i] << std::endl;
-        } else if ((input[i] & 0xE0) == 0xC0) {
-            next = 2;
-            // std::cout << "two character: " << input.substr(i, next) << std::endl;
-        } else if ((input[i] & 0xF0) == 0xE0) {
-            next = 3;
-            // std::cout << "three character: " << input.substr(i, next) << std::endl;
-        } else if ((input[i] & 0xF8) == 0xF0) {
-            next = 4;
-            // std::cout << "four character: " << input.substr(i, next) << std::endl;
-        }
-        words.push_back(input.substr(i, next));
-        i += next;
+            int next = 1;
+            if ((text[i] & 0x80) == 0x00) {
+                // ASCII
+            } else if ((text[i] & 0xE0) == 0xC0) {
+                next = 2;  // 2-byte UTF-8
+            } else if ((text[i] & 0xF0) == 0xE0) {
+                next = 3;  // 3-byte UTF-8
+            } else if ((text[i] & 0xF8) == 0xF0) {
+                next = 4;  // 4-byte UTF-8
+            }
+            words.push_back(text.substr(i, next));
+            i += next;
         }
         return words;
-    } 
+    }
 
-    bool is_english(std::string s) {
-        if (s.size() == 1)
-            return (s[0] >= 'A' && s[0] <= 'Z') || (s[0] >= 'a' && s[0] <= 'z');
-        else
-            return false;
+    bool is_english(const std::string& s)
+    {
+        return s.size() == 1 && ((s[0] >= 'A' && s[0] <= 'Z') || (s[0] >= 'a' && s[0] <= 'z'));
+    }
+    bool is_english_token_char(const std::string& s)
+    {
+        if (s.size() != 1) return false;
+        char c = s[0];
+        return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '-' || c == '_';
+    }
+    void process_unknown_english(const std::string& word, std::vector<int>& phones, std::vector<int>& tones)
+    {
+        DEBUG_LOG("Processing unknown term: %s", word.c_str());
+        std::string orig_word = word;
+        std::vector<std::string> parts;
+        std::vector<std::string> phonetic_parts;
+        size_t start = 0;
+        while (start < word.size()) {
+            bool matched = false;
+            for (size_t len = std::min(word.size() - start, (size_t)10); len > 0 && !matched; --len) {
+                std::string sub_word       = word.substr(start, len);
+                std::string lower_sub_word = sub_word;
+                std::transform(lower_sub_word.begin(), lower_sub_word.end(), lower_sub_word.begin(),
+                               [](unsigned char c) { return std::tolower(c); });
+                if (lexicon.find(lower_sub_word) != lexicon.end()) {
+                    // Substring found in lexicon
+                    auto& [sub_phones, sub_tones] = lexicon[lower_sub_word];
+                    phones.insert(phones.end(), sub_phones.begin(), sub_phones.end());
+                    tones.insert(tones.end(), sub_tones.begin(), sub_tones.end());
+                    parts.push_back(sub_word);
+                    phonetic_parts.push_back(phonesToString(sub_phones));
+                    DEBUG_LOG("  Matched: '%s' -> %s", sub_word.c_str(), phonesToString(sub_phones).c_str());
+                    start += len;
+                    matched = true;
+                    break;
+                }
+            }
+            if (!matched) {
+                std::string single_char = word.substr(start, 1);
+                std::string lower_char  = single_char;
+                std::transform(lower_char.begin(), lower_char.end(), lower_char.begin(),
+                               [](unsigned char c) { return std::tolower(c); });
+                if (lexicon.find(lower_char) != lexicon.end()) {
+                    auto& [char_phones, char_tones] = lexicon[lower_char];
+                    phones.insert(phones.end(), char_phones.begin(), char_phones.end());
+                    tones.insert(tones.end(), char_tones.begin(), char_tones.end());
+                    parts.push_back(single_char);
+                    phonetic_parts.push_back(phonesToString(char_phones));
+                    DEBUG_LOG("  Single char: '%s' -> %s", single_char.c_str(), phonesToString(char_phones).c_str());
+                } else {
+                    phones.insert(phones.end(), unknown_token.first.begin(), unknown_token.first.end());
+                    tones.insert(tones.end(), unknown_token.second.begin(), unknown_token.second.end());
+                    parts.push_back(single_char);
+                    phonetic_parts.push_back("_unknown_");
+                    DEBUG_LOG("  Unknown: '%s'", single_char.c_str());
+                }
+                start++;
+            }
+        }
+        std::string parts_str, phonetic_str;
+        for (size_t i = 0; i < parts.size(); i++) {
+            if (i > 0) {
+                parts_str += " ";
+                phonetic_str += " ";
+            }
+            parts_str += parts[i];
+            phonetic_str += phonetic_parts[i];
+        }
+        DEBUG_LOG("%s\t|\tDecomposed: %s\t|\tPhonetics: %s", orig_word.c_str(), parts_str.c_str(),
+                  phonetic_str.c_str());
     }
 
-    std::vector<std::string> merge_english(const std::vector<std::string>& splitted_text) {
-        std::vector<std::string> words;
-        int i = 0;
-        while (i < splitted_text.size()) {
-            std::string s;
-            if (is_english(splitted_text[i])) {
-                while (i < splitted_text.size()) {
-                    if (!is_english(splitted_text[i])) {
-                        break;
-                    }
-                    s += splitted_text[i];
-                    i++;
+    void convert(const std::string& text, std::vector<int>& phones, std::vector<int>& tones)
+    {
+        DEBUG_LOG("\nStarting text processing: \"%s\"", text.c_str());
+        DEBUG_LOG("=======Matching Results=======");
+        DEBUG_LOG("Unit\t|\tPhonemes\t|\tTones");
+        DEBUG_LOG("-----------------------------");
+        phones.insert(phones.end(), unknown_token.first.begin(), unknown_token.first.end());
+        tones.insert(tones.end(), unknown_token.second.begin(), unknown_token.second.end());
+        DEBUG_LOG("<BOS>\t|\t%s\t|\t%s", phonesToString(unknown_token.first).c_str(),
+                  tonesToString(unknown_token.second).c_str());
+        auto chars = splitEachChar(text);
+        int i      = 0;
+        while (i < chars.size()) {
+            if (is_english(chars[i])) {
+                std::string eng_word;
+                int start = i;
+                while (i < chars.size() && is_english(chars[i])) {
+                    eng_word += chars[i++];
+                }
+                std::string orig_word = eng_word;
+                std::transform(eng_word.begin(), eng_word.end(), eng_word.begin(),
+                               [](unsigned char c) { return std::tolower(c); });
+                if (lexicon.find(eng_word) != lexicon.end()) {
+                    auto& [eng_phones, eng_tones] = lexicon[eng_word];
+                    phones.insert(phones.end(), eng_phones.begin(), eng_phones.end());
+                    tones.insert(tones.end(), eng_tones.begin(), eng_tones.end());
+                    DEBUG_LOG("%s\t|\t%s\t|\t%s", orig_word.c_str(), phonesToString(eng_phones).c_str(),
+                              tonesToString(eng_tones).c_str());
+                } else {
+                    process_unknown_english(orig_word, phones, tones);
+                }
+                continue;
+            }
+            std::string c = chars[i++];
+            if (c == " ") continue;
+            i--;
+            bool matched = false;
+            for (size_t len = std::min(max_phrase_length, chars.size() - i); len > 0 && !matched; --len) {
+                std::string phrase;
+                for (size_t j = 0; j < len; ++j) {
+                    phrase += chars[i + j];
                 }
-                // to lowercase
-                std::transform(s.begin(), s.end(), s.begin(),
-                    [](unsigned char c){ return std::tolower(c); });
-                words.push_back(s);
-                if (i >= splitted_text.size())
+                if (lexicon.find(phrase) != lexicon.end()) {
+                    auto& [phrase_phones, phrase_tones] = lexicon[phrase];
+                    phones.insert(phones.end(), phrase_phones.begin(), phrase_phones.end());
+                    tones.insert(tones.end(), phrase_tones.begin(), phrase_tones.end());
+                    DEBUG_LOG("%s\t|\t%s\t|\t%s", phrase.c_str(), phonesToString(phrase_phones).c_str(),
+                              tonesToString(phrase_tones).c_str());
+                    i += len;
+                    matched = true;
                     break;
+                }
             }
-            else {
-                words.push_back(splitted_text[i]);
-                i++;
+            if (!matched) {
+                std::string c         = chars[i++];
+                std::string s         = c;
+                std::string orig_char = s;
+                if (s == "，")
+                    s = ",";
+                else if (s == "。")
+                    s = ".";
+                else if (s == "！")
+                    s = "!";
+                else if (s == "？")
+                    s = "?";
+                if (lexicon.find(s) != lexicon.end()) {
+                    auto& [char_phones, char_tones] = lexicon[s];
+                    phones.insert(phones.end(), char_phones.begin(), char_phones.end());
+                    tones.insert(tones.end(), char_tones.begin(), char_tones.end());
+                    DEBUG_LOG("%s\t|\t%s\t|\t%s", orig_char.c_str(), phonesToString(char_phones).c_str(),
+                              tonesToString(char_tones).c_str());
+                } else {
+                    phones.insert(phones.end(), unknown_token.first.begin(), unknown_token.first.end());
+                    tones.insert(tones.end(), unknown_token.second.begin(), unknown_token.second.end());
+                    DEBUG_LOG("%s\t|\t%s (Not matched)\t|\t%s", orig_char.c_str(),
+                              phonesToString(unknown_token.first).c_str(), tonesToString(unknown_token.second).c_str());
+                }
             }
         }
-        return words;
+        phones.insert(phones.end(), unknown_token.first.begin(), unknown_token.first.end());
+        tones.insert(tones.end(), unknown_token.second.begin(), unknown_token.second.end());
+        DEBUG_LOG("<EOS>\t|\t%s\t|\t%s", phonesToString(unknown_token.first).c_str(),
+                  tonesToString(unknown_token.second).c_str());
+        DEBUG_LOG("\nProcessing Summary:");
+        DEBUG_LOG("Original text: %s", text.c_str());
+        DEBUG_LOG("Phonemes: %s", phonesToString(phones).c_str());
+        DEBUG_LOG("Tones: %s", tonesToString(tones).c_str());
+        DEBUG_LOG("====================");
     }
 
-    void convert(const std::string& text, std::vector<int>& phones, std::vector<int>& tones) {
-        auto splitted_text = splitEachChar(text);
-        auto zh_mix_en = merge_english(splitted_text);
-        for (auto c : zh_mix_en) {
-            std::string s{c};
-            if (s == "，") 
-                s = ",";
-            else if (s == "。")
-                s = ".";
-            else if (s == "！")
-                s = "!";
-            else if (s == "？")
-                s = "?";
-
-            auto phones_and_tones = lexicon[" "];
-            if (lexicon.find(s) != lexicon.end()) {
-                phones_and_tones = lexicon[s];
+private:
+    void processChar(const std::string& c, std::vector<int>& phones, std::vector<int>& tones)
+    {
+        std::string s = c;
+        if (s == "，")
+            s = ",";
+        else if (s == "。")
+            s = ".";
+        else if (s == "！")
+            s = "!";
+        else if (s == "？")
+            s = "?";
+        auto& phones_and_tones = (lexicon.find(s) != lexicon.end()) ? lexicon[s] : unknown_token;
+        phones.insert(phones.end(), phones_and_tones.first.begin(), phones_and_tones.first.end());
+        tones.insert(tones.end(), phones_and_tones.second.begin(), phones_and_tones.second.end());
+    }
+    std::string phonesToString(const std::vector<int>& phones)
+    {
+        std::string result;
+        for (auto id : phones) {
+            if (!result.empty()) result += " ";
+            if (reverse_tokens.find(id) != reverse_tokens.end()) {
+                result += reverse_tokens[id];
+            } else {
+                result += "<" + std::to_string(id) + ">";
             }
-            phones.insert(phones.end(), phones_and_tones.first.begin(), phones_and_tones.first.end());
-            tones.insert(tones.end(), phones_and_tones.second.begin(), phones_and_tones.second.end());
         }
+        return result;
+    }
+    std::string tonesToString(const std::vector<int>& tones)
+    {
+        std::string result;
+        for (auto tone : tones) {
+            if (!result.empty()) result += " ";
+            result += std::to_string(tone);
+        }
+        return result;
     }
 };
\ No newline at end of file
diff --git a/projects/llm_framework/main_openai_api/SConstruct b/projects/llm_framework/main_openai_api/SConstruct
index ac778df9..9098a679 100644
--- a/projects/llm_framework/main_openai_api/SConstruct
+++ b/projects/llm_framework/main_openai_api/SConstruct
@@ -18,8 +18,8 @@ LINK_SEARCH_PATH = []
 STATIC_FILES = []
 
 
-ModuleLLMOpenAIPluginPath = wget_github_commit('https://github.com/Abandon-ht/ModuleLLM-OpenAI-Plugin.git', '1077efbe201ea3f29517f5ce4a0cfc3b04c25d1d', True)
-python_venv = check_wget_down("https://m5stack.oss-cn-shenzhen.aliyuncs.com/resource/linux/llm/m5stack_llm-openai-api-python-venv_v1.5.tar.gz", 'm5stack_llm-llm-openai-api-python-venv_v1.5.tar.gz')
+ModuleLLMOpenAIPluginPath = wget_github_commit('https://github.com/m5stack/ModuleLLM-OpenAI-Plugin.git', '9612b20800eb1708e648744e3cb333c4f743811c', True)
+python_venv = check_wget_down("https://m5stack.oss-cn-shenzhen.aliyuncs.com/resource/linux/llm/m5stack_llm-openai-api-python-venv_v1.6.tar.gz", 'm5stack_llm-openai-api-python-venv_v1.6.tar.gz')
 
 
 DEFINITIONS += ['-O3', '-fopenmp', '-std=c++17']
@@ -52,7 +52,7 @@ ignore['ignore'] = list(set(ignore['ignore']))
 with open('../dist/fileignore', 'w') as f:
     json.dump(ignore, f, indent=4)
 
-env['COMPONENTS'].append({'target':'llm_openai_api',
+env['COMPONENTS'].append({'target':'llm_openai_api-1.7',
                           'SRCS':SRCS,
                           'INCLUDE':INCLUDE,
                           'PRIVATE_INCLUDE':PRIVATE_INCLUDE,
diff --git a/projects/llm_framework/main_skel/SConstruct b/projects/llm_framework/main_skel/SConstruct
index f40e42d3..bae9b432 100644
--- a/projects/llm_framework/main_skel/SConstruct
+++ b/projects/llm_framework/main_skel/SConstruct
@@ -31,7 +31,7 @@ STATIC_LIB += static_file * 2
 
 STATIC_FILES += Glob('mode_*.json')
 
-env['COMPONENTS'].append({'target':'llm_skel',
+env['COMPONENTS'].append({'target':'llm_skel-0.1',
                           'SRCS':SRCS,
                           'INCLUDE':INCLUDE,
                           'PRIVATE_INCLUDE':PRIVATE_INCLUDE,
diff --git a/projects/llm_framework/main_sys/SConstruct b/projects/llm_framework/main_sys/SConstruct
index 1168d25c..d9a48304 100644
--- a/projects/llm_framework/main_sys/SConstruct
+++ b/projects/llm_framework/main_sys/SConstruct
@@ -37,7 +37,7 @@ LDFLAGS+=['-Wl,-rpath=/opt/m5stack/lib', '-Wl,-rpath=/usr/local/m5stack/lib', '-
 
 STATIC_FILES += [AFile('sys_config.json')]
 REQUIREMENTS += ['simdjson_component']
-env['COMPONENTS'].append({'target':'llm_sys',
+env['COMPONENTS'].append({'target':'llm_sys-1.6',
                           'SRCS':SRCS,
                           'INCLUDE':INCLUDE,
                           'PRIVATE_INCLUDE':PRIVATE_INCLUDE,
diff --git a/projects/llm_framework/main_sys/src/event_loop.cpp b/projects/llm_framework/main_sys/src/event_loop.cpp
index 00b9d9b4..16aa2a0c 100644
--- a/projects/llm_framework/main_sys/src/event_loop.cpp
+++ b/projects/llm_framework/main_sys/src/event_loop.cpp
@@ -38,7 +38,7 @@
 #include "remote_action.h"
 #include <simdjson.h>
 #include "hv/ifconfig.h"
-
+#include <glob.h>
 #include "StackFlowUtil.h"
 
 void usr_print_error(const std::string &request_id, const std::string &work_id, const std::string &error_msg,
@@ -707,7 +707,26 @@ int sys_reset(int com_id, const nlohmann::json &json_obj)
 
 int sys_version(int com_id, const nlohmann::json &json_obj)
 {
-    usr_out(json_obj["request_id"], json_obj["work_id"], std::string("v1.4"), com_id);
+    usr_out(json_obj["request_id"], json_obj["work_id"], std::string("v1.6"), com_id);
+
+    int out = 0;
+    return out;
+}
+
+int sys_version2(int com_id, const nlohmann::json &json_obj)
+{
+    nlohmann::json data_body = nlohmann::json::array();
+    glob_t glob_result;
+    int ret = glob("/opt/m5stack/bin/llm_*-*", GLOB_TILDE, NULL, &glob_result);  // 匹配所有.txt文件
+    if (ret == 0) {
+        for (size_t i = 0; i < glob_result.gl_pathc; i++) {
+            const char *separator = strrchr(glob_result.gl_pathv[i], '/');
+            const char *filename  = (separator != NULL) ? separator + 1 : glob_result.gl_pathv[i];
+            data_body.push_back(std::string(filename));
+        }
+    }
+    globfree(&glob_result);
+    usr_out(json_obj["request_id"], json_obj["work_id"], data_body, com_id);
     int out = 0;
     return out;
 }
@@ -739,6 +758,7 @@ void server_work()
     key_sql["sys.rmmode"]    = sys_rmmode;
     key_sql["sys.unit_call"] = sys_unit_call;
     key_sql["sys.cmminfo"]   = sys_cmminfo;
+    key_sql["sys.version2"]  = sys_version2;
 }
 
 void server_stop_work()
diff --git a/projects/llm_framework/main_sys/src/remote_action.cpp b/projects/llm_framework/main_sys/src/remote_action.cpp
index 25933692..a6818c8d 100644
--- a/projects/llm_framework/main_sys/src/remote_action.cpp
+++ b/projects/llm_framework/main_sys/src/remote_action.cpp
@@ -20,12 +20,10 @@ int remote_call(int com_id, const std::string &json_str)
     std::string work_unit = work_id.substr(0, work_id.find("."));
     std::string action    = sample_json_str_get(json_str, "action");
     char com_url[256];
-    int length = snprintf(com_url, 255, zmq_c_format.c_str(), com_id);
-    std::string send_data;
-    std::string com_urls(com_url);
-    RPC_PUSH_PARAM(send_data, com_urls, json_str);
+    snprintf(com_url, 255, zmq_c_format.c_str(), com_id);
     pzmq clent(work_unit);
-    return clent.call_rpc_action(action, send_data, [](pzmq *_pzmq, const std::string &val) {});
+    return clent.call_rpc_action(action, pzmq_data::set_param(com_url, json_str),
+                                 [](pzmq *_pzmq, const std::shared_ptr<pzmq_data> &val) {});
 }
 
 void remote_action_work()
diff --git a/projects/llm_framework/main_sys/src/remote_server.cpp b/projects/llm_framework/main_sys/src/remote_server.cpp
index 5a4567f7..64d65dba 100644
--- a/projects/llm_framework/main_sys/src/remote_server.cpp
+++ b/projects/llm_framework/main_sys/src/remote_server.cpp
@@ -171,40 +171,36 @@ int c_sys_release_unit(char const *unit)
     return sys_release_unit(unit);
 }
 
-std::string rpc_allocate_unit(pzmq *_pzmq, const std::string &raw)
+std::string rpc_allocate_unit(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &raw)
 {
-    unit_data *unit_info = sys_allocate_unit(raw);
-    std::string send_data;
-    std::string send_data1;
-    std::string str_port = std::to_string(unit_info->port_);
-    RPC_PUSH_PARAM(send_data1, unit_info->output_url, unit_info->inference_url);
-    RPC_PUSH_PARAM(send_data, str_port, send_data1);
-    return send_data;
+    unit_data *unit_info = sys_allocate_unit(raw->string());
+    return pzmq_data::set_param(std::to_string(unit_info->port_),
+                                pzmq_data::set_param(unit_info->output_url, unit_info->inference_url));
 }
 
-std::string rpc_release_unit(pzmq *_pzmq, const std::string &raw)
+std::string rpc_release_unit(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &raw)
 {
-    sys_release_unit(raw);
+    sys_release_unit(raw->string());
     return "Success";
 }
 
-std::string rpc_sql_select(pzmq *_pzmq, const std::string &raw)
+std::string rpc_sql_select(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &raw)
 {
-    return sys_sql_select(raw);
+    return sys_sql_select(raw->string());
 }
 
-std::string rpc_sql_set(pzmq *_pzmq, const std::string &raw)
+std::string rpc_sql_set(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &raw)
 {
-    std::string key = sample_json_str_get(raw, "key");
-    std::string val = sample_json_str_get(raw, "val");
+    std::string key = sample_json_str_get(raw->string(), "key");
+    std::string val = sample_json_str_get(raw->string(), "val");
     if (key.empty()) return "False";
     sys_sql_set(key, val);
     return "Success";
 }
 
-std::string rpc_sql_unset(pzmq *_pzmq, const std::string &raw)
+std::string rpc_sql_unset(pzmq *_pzmq, const std::shared_ptr<pzmq_data> &raw)
 {
-    sys_sql_unset(raw);
+    sys_sql_unset(raw->string());
     return "Success";
 }
 
diff --git a/projects/llm_framework/main_sys/src/zmq_bus.cpp b/projects/llm_framework/main_sys/src/zmq_bus.cpp
index 60693c93..b62196eb 100644
--- a/projects/llm_framework/main_sys/src/zmq_bus.cpp
+++ b/projects/llm_framework/main_sys/src/zmq_bus.cpp
@@ -42,7 +42,7 @@ void zmq_bus_com::work(const std::string &zmq_url_format, int port)
     _zmq_url = std::string((char *)buff.data());
     SAFE_SETTING("serial_zmq_url", _zmq_url);
     user_chennal_           = std::make_unique<pzmq>(_zmq_url, ZMQ_PULL,
-                                                     [this](pzmq *_pzmq, const std::string &data) { this->send_data(data); });
+                                                     [this](pzmq *_pzmq, const std::shared_ptr<pzmq_data> &data) { this->send_data(data->string()); });
     reace_data_event_thread = std::make_unique<std::thread>(std::bind(&zmq_bus_com::reace_data_event, this));
 }
 
diff --git a/projects/llm_framework/main_tts/SConstruct b/projects/llm_framework/main_tts/SConstruct
index c21e0aa8..a38e56f9 100644
--- a/projects/llm_framework/main_tts/SConstruct
+++ b/projects/llm_framework/main_tts/SConstruct
@@ -27,7 +27,7 @@ INCLUDE += [ADir('src/runner/eigen-3.4.0'), ADir('src/runner/src/tn/header'), AD
 
 STATIC_FILES += Glob('mode_*.json')
 
-env['COMPONENTS'].append({'target':'llm_tts',
+env['COMPONENTS'].append({'target':'llm_tts-1.6',
                           'SRCS':SRCS,
                           'INCLUDE':INCLUDE,
                           'PRIVATE_INCLUDE':PRIVATE_INCLUDE,
diff --git a/projects/llm_framework/main_tts/mode_single-speaker-english-fast.json b/projects/llm_framework/main_tts/mode_single-speaker-english-fast.json
index e311a4ad..cbaccaac 100644
--- a/projects/llm_framework/main_tts/mode_single-speaker-english-fast.json
+++ b/projects/llm_framework/main_tts/mode_single-speaker-english-fast.json
@@ -1,5 +1,5 @@
 {
-    "mode": "single_speaker_english_fast",
+    "mode": "single-speaker-english-fast",
     "type": "tts",
     "homepage":"https://github.com/huakunyang/SummerTTS",
     "capabilities": [
diff --git a/projects/llm_framework/main_tts/mode_single-speaker-fast.json b/projects/llm_framework/main_tts/mode_single-speaker-fast.json
index ca1ede7f..6fadf2f5 100644
--- a/projects/llm_framework/main_tts/mode_single-speaker-fast.json
+++ b/projects/llm_framework/main_tts/mode_single-speaker-fast.json
@@ -1,5 +1,5 @@
 {
-    "mode": "single_speaker_fast",
+    "mode": "single-speaker-fast",
     "type": "tts",
     "homepage":"https://github.com/huakunyang/SummerTTS",
     "capabilities": [
diff --git a/projects/llm_framework/main_tts/src/main.cpp b/projects/llm_framework/main_tts/src/main.cpp
index 055412e2..88ef4a8e 100644
--- a/projects/llm_framework/main_tts/src/main.cpp
+++ b/projects/llm_framework/main_tts/src/main.cpp
@@ -111,6 +111,7 @@ class llm_task {
                     SLOGW("config file :%s miss", file_name.c_str());
                     continue;
                 }
+                SLOGI("config file :%s read", file_name.c_str());
                 config_file >> file_body;
                 config_file.close();
                 break;
@@ -166,8 +167,17 @@ class llm_task {
     {
     }
 
+    void start()
+    {
+    }
+
+    void stop()
+    {
+    }
+
     ~llm_task()
     {
+        stop();
     }
 };
 
@@ -495,6 +505,7 @@ class llm_tts : public StackFlow {
             send("None", "None", error_body, work_id);
             return -1;
         }
+        llm_task_[work_id_num]->stop();
         auto llm_channel = get_channel(work_id_num);
         llm_channel->stop_subscriber("");
         llm_task_.erase(work_id_num);
@@ -509,6 +520,7 @@ class llm_tts : public StackFlow {
             if (iteam == llm_task_.end()) {
                 break;
             }
+            iteam->second->stop();
             get_channel(iteam->first)->stop_subscriber("");
             iteam->second.reset();
             llm_task_.erase(iteam->first);
diff --git a/projects/llm_framework/main_vad/SConstruct b/projects/llm_framework/main_vad/SConstruct
index f7ad0932..2493e159 100644
--- a/projects/llm_framework/main_vad/SConstruct
+++ b/projects/llm_framework/main_vad/SConstruct
@@ -28,7 +28,7 @@ LDFLAGS += ['-l:libsherpa-onnx-core.a',
 
 STATIC_FILES += Glob('mode_*.json')
 
-env['COMPONENTS'].append({'target':'llm_vad',
+env['COMPONENTS'].append({'target':'llm_vad-1.6',
                           'SRCS':SRCS,
                           'INCLUDE':INCLUDE,
                           'PRIVATE_INCLUDE':PRIVATE_INCLUDE,
diff --git a/projects/llm_framework/main_vad/mode_silero-vad.json b/projects/llm_framework/main_vad/mode_silero-vad.json
index d262af4d..a4d8eae9 100644
--- a/projects/llm_framework/main_vad/mode_silero-vad.json
+++ b/projects/llm_framework/main_vad/mode_silero-vad.json
@@ -1,5 +1,5 @@
 {
-  "mode": "silero_vad",
+  "mode": "silero-vad",
   "type": "vad",
   "homepage":"https://github.com/snakers4/silero-vad",
   "capabilities": [
diff --git a/projects/llm_framework/main_vad/src/main.cpp b/projects/llm_framework/main_vad/src/main.cpp
index d9c1f7d9..3bd53f3f 100644
--- a/projects/llm_framework/main_vad/src/main.cpp
+++ b/projects/llm_framework/main_vad/src/main.cpp
@@ -106,6 +106,7 @@ class llm_task {
                     SLOGW("config file :%s miss", file_name.c_str());
                     continue;
                 }
+                SLOGI("config file :%s read", file_name.c_str());
                 config_file >> file_body;
                 config_file.close();
                 break;
@@ -216,11 +217,17 @@ class llm_task {
         pcmdata      = buffer_create();
     }
 
+    void start()
+    {
+    }
+
+    void stop()
+    {
+    }
+
     ~llm_task()
     {
-        if (vad_) {
-            vad_.reset();
-        }
+        stop();
         buffer_destroy(pcmdata);
     }
 };
@@ -237,8 +244,7 @@ class llm_vad : public StackFlow {
     llm_vad() : StackFlow("vad")
     {
         task_count_ = 1;
-        event_queue_.appendListener(
-            EVENT_TASK_PAUSE, std::bind(&llm_vad::_task_pause, this, std::placeholders::_1, std::placeholders::_2));
+        event_queue_.appendListener(EVENT_TASK_PAUSE, std::bind(&llm_vad::_task_pause, this, std::placeholders::_1));
     }
 
     void task_output(const std::weak_ptr<llm_task> llm_task_obj_weak,
@@ -299,9 +305,10 @@ class llm_vad : public StackFlow {
         llm_task_obj->sys_pcm_on_data((*next_data));
     }
 
-    void _task_pause(const std::string &work_id, const std::string &data)
+    void _task_pause(const std::shared_ptr<void> &arg)
     {
-        int work_id_num = sample_get_work_id_num(work_id);
+        std::shared_ptr<std::string> work_id = std::static_pointer_cast<std::string>(arg);
+        int work_id_num                      = sample_get_work_id_num(*work_id);
         if (llm_task_.find(work_id_num) == llm_task_.end()) {
             return;
         }
@@ -315,7 +322,7 @@ class llm_vad : public StackFlow {
 
     void task_pause(const std::string &work_id, const std::string &data)
     {
-        event_queue_.enqueue(EVENT_TASK_PAUSE, work_id, "");
+        event_queue_.enqueue(EVENT_TASK_PAUSE, std::make_shared<std::string>(work_id));
     }
 
     void task_work(const std::weak_ptr<llm_task> llm_task_obj_weak,
@@ -328,8 +335,8 @@ class llm_vad : public StackFlow {
         }
         if ((!audio_url_.empty()) && (llm_task_obj->audio_flage_ == false)) {
             std::weak_ptr<llm_task> _llm_task_obj = llm_task_obj;
-            llm_channel->subscriber(audio_url_, [_llm_task_obj](pzmq *_pzmq, const std::string &raw) {
-                _llm_task_obj.lock()->sys_pcm_on_data(raw);
+            llm_channel->subscriber(audio_url_, [_llm_task_obj](pzmq *_pzmq, const std::shared_ptr<pzmq_data> &raw) {
+                _llm_task_obj.lock()->sys_pcm_on_data(raw->string());
             });
             llm_task_obj->audio_flage_ = true;
         }
@@ -416,9 +423,10 @@ class llm_vad : public StackFlow {
                 if (input.find("sys") != std::string::npos) {
                     audio_url_                            = unit_call("audio", "cap", "None");
                     std::weak_ptr<llm_task> _llm_task_obj = llm_task_obj;
-                    llm_channel->subscriber(audio_url_, [_llm_task_obj](pzmq *_pzmq, const std::string &raw) {
-                        _llm_task_obj.lock()->sys_pcm_on_data(raw);
-                    });
+                    llm_channel->subscriber(audio_url_,
+                                            [_llm_task_obj](pzmq *_pzmq, const std::shared_ptr<pzmq_data> &raw) {
+                                                _llm_task_obj.lock()->sys_pcm_on_data(raw->string());
+                                            });
                     llm_task_obj->audio_flage_ = true;
                 } else if (input.find("vad") != std::string::npos) {
                     llm_channel->subscriber_work_id(
@@ -464,8 +472,8 @@ class llm_vad : public StackFlow {
         if (data.find("sys") != std::string::npos) {
             if (audio_url_.empty()) audio_url_ = unit_call("audio", "cap", data);
             std::weak_ptr<llm_task> _llm_task_obj = llm_task_obj;
-            llm_channel->subscriber(audio_url_, [_llm_task_obj](pzmq *_pzmq, const std::string &raw) {
-                _llm_task_obj.lock()->sys_pcm_on_data(raw);
+            llm_channel->subscriber(audio_url_, [_llm_task_obj](pzmq *_pzmq, const std::shared_ptr<pzmq_data> &raw) {
+                _llm_task_obj.lock()->sys_pcm_on_data(raw->string());
             });
             llm_task_obj->audio_flage_ = true;
             llm_task_obj->inputs_.push_back(data);
@@ -553,6 +561,7 @@ class llm_vad : public StackFlow {
             send("None", "None", error_body, work_id);
             return -1;
         }
+        llm_task_[work_id_num]->stop();
         auto llm_channel = get_channel(work_id_num);
         llm_channel->stop_subscriber("");
         if (llm_task_[work_id_num]->audio_flage_) {
@@ -570,6 +579,7 @@ class llm_vad : public StackFlow {
             if (iteam == llm_task_.end()) {
                 break;
             }
+            iteam->second->stop();
             if (iteam->second->audio_flage_) {
                 unit_call("audio", "cap_stop", "None");
             }
diff --git a/projects/llm_framework/main_vlm/SConstruct b/projects/llm_framework/main_vlm/SConstruct
index 31539576..d1046f23 100644
--- a/projects/llm_framework/main_vlm/SConstruct
+++ b/projects/llm_framework/main_vlm/SConstruct
@@ -17,7 +17,9 @@ LDFLAGS = []
 LINK_SEARCH_PATH = []
 STATIC_FILES = []
 
+python_venv = check_wget_down("https://m5stack.oss-cn-shenzhen.aliyuncs.com/resource/linux/llm/m5stack_llm-vlm-python-venv_v1.6.tar.gz", 'm5stack_llm-vlm-python-venv_v1.6.tar.gz')
 
+DEFINITIONS += ['-O2']
 DEFINITIONS += ['-std=c++17']
 LDFLAGS+=['-Wl,-rpath=/opt/m5stack/lib', '-Wl,-rpath=/usr/local/m5stack/lib', '-Wl,-rpath=/usr/local/m5stack/lib/gcc-10.3', '-Wl,-rpath=/opt/lib', '-Wl,-rpath=/opt/usr/lib', '-Wl,-rpath=./']
 REQUIREMENTS += ['ax_engine', 'ax_interpreter', 'ax_sys']
@@ -49,10 +51,29 @@ static_file += [AFile('../static_lib/libopencv-4.6-aarch64-none/lib/libtegra_hal
 static_file += [AFile('../static_lib/libopencv-4.6-aarch64-none/lib/libzlib.a')]
 STATIC_LIB += static_file * 4
 
+STATIC_FILES += [os.path.join(python_venv, 'vlm')]
 STATIC_FILES += Glob('scripts/tokenizer_*.py')
 STATIC_FILES += Glob('models/mode_*.json')
 
-env['COMPONENTS'].append({'target':'llm_vlm',
+IGNORE_FILES = []
+IGNORE_FILES += ['vlm']
+
+import json
+if not os.path.exists('../dist'):
+    os.makedirs('../dist')
+ignore = {'ignore':[]}
+try:
+    with open('../dist/fileignore', 'a+') as f:
+        f.seek(0)
+        ignore = json.load(f)
+except:
+    pass
+ignore['ignore'] += IGNORE_FILES
+ignore['ignore'] = list(set(ignore['ignore']))
+with open('../dist/fileignore', 'w') as f:
+    json.dump(ignore, f, indent=4)
+
+env['COMPONENTS'].append({'target':'llm_vlm-1.7',
                           'SRCS':SRCS,
                           'INCLUDE':INCLUDE,
                           'PRIVATE_INCLUDE':PRIVATE_INCLUDE,
diff --git a/projects/llm_framework/main_vlm/models/mode_internvl2.5-1B-364-ax630c.json b/projects/llm_framework/main_vlm/models/mode_internvl2.5-1B-364-ax630c.json
new file mode 100644
index 00000000..8f6a396a
--- /dev/null
+++ b/projects/llm_framework/main_vlm/models/mode_internvl2.5-1B-364-ax630c.json
@@ -0,0 +1,35 @@
+{
+    "mode":"internvl2.5-1B-364-ax630c",
+    "type":"vlm",
+    "homepage":"https://huggingface.co/AXERA-TECH/InternVL2_5-1B",
+    "capabilities":[
+        "text_generation",
+        "chat"
+    ],
+    "input_type":[
+        "vlm.chat_completion",
+        "vlm.chat_completion.stream"
+    ],
+    "output_type":[
+        "vlm.utf-8",
+        "vlm.utf-8.stream"
+    ],
+    "mode_param":{
+        "tokenizer_type":2,
+        "filename_tokenizer_model":"http://localhost:8080",
+        "filename_tokens_embed":"model.embed_tokens.weight.bfloat16.bin",
+        "filename_post_axmodel":"qwen2_post.axmodel",
+        "template_filename_axmodel":"qwen2_p256_l%d_together.axmodel",
+        "filename_vpm_resampler_axmodedl":"vit_intern_sim_space2depth.axmodel",
+        "b_use_topk":false,
+        "b_bos":false,
+        "b_eos":false,
+        "axmodel_num":24,
+        "tokens_embed_num":151674,
+        "img_token_id":151667,
+        "tokens_embed_size":896,
+        "b_use_mmap_load_embed":true,
+        "b_dynamic_load_axmodel_layer":false,
+        "ext_scripts":["tokenizer_internvl2.5-1B-364-ax630c.py"]
+    }
+}
\ No newline at end of file
diff --git a/projects/llm_framework/main_vlm/models/mode_smolvlm-256M-ax630c.json b/projects/llm_framework/main_vlm/models/mode_smolvlm-256M-ax630c.json
new file mode 100644
index 00000000..1d3a2931
--- /dev/null
+++ b/projects/llm_framework/main_vlm/models/mode_smolvlm-256M-ax630c.json
@@ -0,0 +1,35 @@
+{
+    "mode":"smolvlm-256M-ax630c",
+    "type":"vlm",
+    "homepage":"https://huggingface.co/HuggingFaceTB/SmolVLM-256M-Instruct",
+    "capabilities":[
+        "text_generation",
+        "chat"
+    ],
+    "input_type":[
+        "vlm.chat_completion",
+        "vlm.chat_completion.stream"
+    ],
+    "output_type":[
+        "vlm.utf-8",
+        "vlm.utf-8.stream"
+    ],
+    "mode_param":{
+        "tokenizer_type":2,
+        "filename_tokenizer_model":"http://localhost:8080",
+        "filename_tokens_embed":"model.embed_tokens.weight.bfloat16.bin",
+        "filename_post_axmodel":"llama_post.axmodel",
+        "template_filename_axmodel":"llama_p128_l%d_together.axmodel",
+        "filename_vpm_resampler_axmodedl":"SmolVLM-256M-Instruct_vision_nhwc.axmodel",
+        "b_use_topk":false,
+        "b_bos":false,
+        "b_eos":false,
+        "axmodel_num":30,
+        "tokens_embed_num":49280,
+        "img_token_id":49190,
+        "tokens_embed_size":576,
+        "b_use_mmap_load_embed":true,
+        "b_dynamic_load_axmodel_layer":false,
+        "ext_scripts":["tokenizer_smolvlm-256M-ax630c.py"]
+    }
+}
\ No newline at end of file
diff --git a/projects/llm_framework/main_vlm/models/mode_smolvlm-500M-ax630c.json b/projects/llm_framework/main_vlm/models/mode_smolvlm-500M-ax630c.json
new file mode 100644
index 00000000..3ce09a08
--- /dev/null
+++ b/projects/llm_framework/main_vlm/models/mode_smolvlm-500M-ax630c.json
@@ -0,0 +1,37 @@
+{
+    "mode":"smolvlm-500M-ax630c",
+    "type":"vlm",
+    "homepage":"https://huggingface.co/HuggingFaceTB/SmolVLM-500M-Instruct",
+    "compile_flage":"pulsar2 build --input HuggingFaceTB/SmolVLM-500M-w8a16/SmolVLM-500M-Instruct_vision.onnx --config AXERA/SmolVLM-256M-Instruct.axera/model_convert/config.json --output_dir HuggingFaceTB/SmolVLM-500M-w8a16/build-output --output_name SmolVLM-500M-Instruct_vision.axmodel --target_hardware AX620E --compiler.check 0 --npu_mode NPU2",
+    "pulsar_version":"3.4-983bb35e",    
+    "capabilities":[
+        "text_generation",
+        "chat"
+    ],
+    "input_type":[
+        "vlm.chat_completion",
+        "vlm.chat_completion.stream"
+    ],
+    "output_type":[
+        "vlm.utf-8",
+        "vlm.utf-8.stream"
+    ],
+    "mode_param":{
+        "tokenizer_type":2,
+        "filename_tokenizer_model":"http://localhost:8080",
+        "filename_tokens_embed":"model.embed_tokens.weight.bfloat16.bin",
+        "filename_post_axmodel":"llama_post.axmodel",
+        "template_filename_axmodel":"llama_p128_l%d_together.axmodel",
+        "filename_vpm_resampler_axmodedl":"SmolVLM-500M-Instruct_vision.axmodel",
+        "b_use_topk":false,
+        "b_bos":false,
+        "b_eos":false,
+        "axmodel_num":32,
+        "tokens_embed_num":49280,
+        "img_token_id":49190,
+        "tokens_embed_size":960,
+        "b_use_mmap_load_embed":true,
+        "b_dynamic_load_axmodel_layer":false,
+        "ext_scripts":["tokenizer_smolvlm-500M-ax630c.py"]
+    }
+}
\ No newline at end of file
diff --git a/projects/llm_framework/main_vlm/scripts/tokenizer_internvl2.5-1B-364-ax630c.py b/projects/llm_framework/main_vlm/scripts/tokenizer_internvl2.5-1B-364-ax630c.py
new file mode 100644
index 00000000..569c5da6
--- /dev/null
+++ b/projects/llm_framework/main_vlm/scripts/tokenizer_internvl2.5-1B-364-ax630c.py
@@ -0,0 +1,138 @@
+from transformers import AutoTokenizer, PreTrainedTokenizerFast
+from http.server import HTTPServer, BaseHTTPRequestHandler
+import json
+import argparse
+
+
+class Tokenizer_Http:
+
+    def __init__(self, model_id):
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            model_id, trust_remote_code=True, use_fast=False
+        )
+
+    def encode(self, prompt, content):
+        prompt = f"<|im_start|>system\n{content}<|im_end|><|im_start|>user\n{prompt}<|im_end|><|im_start|>assistant\n"
+        input_ids = self.tokenizer.encode(prompt)
+        return input_ids
+
+    def encode_vpm(self, prompt, content="Please describe the image shortly."):
+        prompt = f"<|im_start|>system\n{content}<|im_end|><|im_start|>user\n<img>" + "<IMG_CONTEXT>" * 169 + f"</img>\n{prompt}<|im_end|><|im_start|>assistant\n"
+        input_ids = self.tokenizer.encode(prompt)
+        return input_ids
+
+    def decode(self, token_ids):
+        return self.tokenizer.decode(token_ids, clean_up_tokenization_spaces=False)
+
+    @property
+    def bos_id(self):
+        return self.tokenizer.bos_token_id
+
+    @property
+    def eos_id(self):
+        return self.tokenizer.eos_token_id
+
+    @property
+    def bos_token(self):
+        return self.tokenizer.bos_token
+
+    @property
+    def eos_token(self):
+        return self.tokenizer.eos_token
+
+class Request(BaseHTTPRequestHandler):
+    # 通过类继承，新定义类
+    timeout = 5
+    server_version = "Apache"
+
+    def do_GET(self):
+        print(self.path)
+        # 在新类中定义get的内容（当客户端向该服务端使用get请求时，本服务端将如下运行）
+        self.send_response(200)
+        self.send_header("type", "get")  # 设置响应头，可省略或设置多个
+        self.end_headers()
+
+        if self.path == "/bos_id":
+            bos_id = tokenizer.bos_id
+            # print(bos_id)
+            # to json
+            if bos_id is None:
+                msg = json.dumps({"bos_id": -1})
+            else:
+                msg = json.dumps({"bos_id": bos_id})
+        elif self.path == "/eos_id":
+            eos_id = tokenizer.eos_id
+            if eos_id is None:
+                msg = json.dumps({"eos_id": -1})
+            else:
+                msg = json.dumps({"eos_id": eos_id})
+        else:
+            msg = "error"
+
+        print(msg)
+        msg = str(msg).encode()  # 转为str再转为byte格式
+
+        self.wfile.write(msg)  # 将byte格式的信息返回给客户端
+
+    def do_POST(self):
+        # 在新类中定义post的内容（当客户端向该服务端使用post请求时，本服务端将如下运行）
+        data = self.rfile.read(
+            int(self.headers["content-length"])
+        )  # 获取从客户端传入的参数（byte格式）
+        data = data.decode()  # 将byte格式转为str格式
+
+        self.send_response(200)
+        self.send_header("type", "post")  # 设置响应头，可省略或设置多个
+        self.end_headers()
+
+        if self.path == "/encode":
+            req = json.loads(data)
+            print(req)
+            prompt = req["text"]
+            b_img_prompt = False
+            if "img_prompt" in req:
+                b_img_prompt = req["img_prompt"]
+            if b_img_prompt:
+                token_ids = tokenizer.encode_vpm(prompt)
+            else:
+                token_ids = tokenizer.encode(prompt, args.content)
+            if token_ids is None:
+                msg = json.dumps({"token_ids": -1})
+            else:
+                msg = json.dumps({"token_ids": token_ids})
+
+        elif self.path == "/decode":
+            req = json.loads(data)
+            token_ids = req["token_ids"]
+            text = tokenizer.decode(token_ids)
+            if text is None:
+                msg = json.dumps({"text": ""})
+            else:
+                msg = json.dumps({"text": text})
+        else:
+            msg = "error"
+        print(msg)
+        msg = str(msg).encode()  # 转为str再转为byte格式
+
+        self.wfile.write(msg)  # 将byte格式的信息返回给客户端
+
+
+if __name__ == "__main__":
+
+    args = argparse.ArgumentParser()
+    args.add_argument("--host", type=str, default="localhost")
+    args.add_argument("--port", type=int, default=8080)
+    args.add_argument('--model_id', type=str, default='internvl2_tokenizer')
+    args.add_argument('--content', type=str, default='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型，英文名叫InternVL, 是一个有用无害的人工智能助手。')
+    args = args.parse_args()
+
+    tokenizer = Tokenizer_Http(args.model_id)
+
+
+    # print(tokenizer.bos_id, tokenizer.bos_token, tokenizer.eos_id, tokenizer.eos_token)
+    # print(tokenizer.encode("hello world", args.content))
+
+    host = (args.host, args.port)  # 设定地址与端口号，'localhost'等价于'127.0.0.1'
+    print("http://%s:%s" % host)
+    server = HTTPServer(host, Request)  # 根据地址端口号和新定义的类，创建服务器实例
+    server.serve_forever()  # 开启服务
diff --git a/projects/llm_framework/main_vlm/scripts/tokenizer_smolvlm-256M-ax630c.py b/projects/llm_framework/main_vlm/scripts/tokenizer_smolvlm-256M-ax630c.py
new file mode 100644
index 00000000..560a71f3
--- /dev/null
+++ b/projects/llm_framework/main_vlm/scripts/tokenizer_smolvlm-256M-ax630c.py
@@ -0,0 +1,248 @@
+from transformers import AutoTokenizer, PreTrainedTokenizerFast
+from transformers.tokenization_utils_base import AddedToken
+from http.server import HTTPServer, BaseHTTPRequestHandler
+import json
+import argparse
+
+def _prompt_split_image(
+    image_seq_len,
+    image_rows,
+    image_cols,
+    fake_token_around_image,
+    image_token,
+    global_img_token,
+):
+    """Prompt with expanded image tokens for when the image is split into patches."""
+    text_split_images = ""
+    for n_h in range(image_rows):
+        for n_w in range(image_cols):
+            text_split_images += (
+                f"{fake_token_around_image}"
+                + f"<row_{n_h + 1}_col_{n_w + 1}>"
+                + f"{image_token}" * image_seq_len
+            )
+        text_split_images += "\n"
+
+    text_split_images += (
+        f"\n{fake_token_around_image}"
+        + f"{global_img_token}"
+        + f"{image_token}" * image_seq_len
+        + f"{fake_token_around_image}"
+    )
+    return text_split_images
+
+
+def _prompt_single_image(
+    image_seq_len, fake_token_around_image, image_token, global_img_token
+):
+    """Prompt with expanded image tokens for a single image."""
+    return (
+        f"{fake_token_around_image}"
+        + f"{global_img_token}"
+        + f"{image_token}" * image_seq_len
+        + f"{fake_token_around_image}"
+    )
+
+
+def get_image_prompt_string(
+    image_rows,
+    image_cols,
+    image_seq_len,
+    fake_token_around_image,
+    image_token,
+    global_img_token,
+):
+    if image_rows == 0 and image_cols == 0:
+        return _prompt_single_image(
+            image_seq_len,
+            fake_token_around_image=fake_token_around_image,
+            image_token=image_token,
+            global_img_token=global_img_token,
+        )
+    return _prompt_split_image(
+        image_seq_len,
+        image_rows,
+        image_cols,
+        fake_token_around_image,
+        image_token,
+        global_img_token,
+    )
+
+class Tokenizer_Http:
+
+    def __init__(self, model_id):
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            model_id, trust_remote_code=True, use_fast=False
+        )
+
+    def encode(self, prompt, content):
+        prompt = f"<|im_start|>User:{content}<end_of_utterance>\nAssistant:"
+        input_ids = self.tokenizer(prompt)
+        return input_ids["input_ids"]
+
+    def encode_vpm(self, prompt, content="Please describe the image shortly."):
+        prompt = f"<|im_start|>User:<image>{prompt}<end_of_utterance>\nAssistant:"
+        text = [prompt]
+        image_rows = [[0]]
+        image_cols = [[0]]
+        image_seq_len = 64
+        image_token = "<image>"
+        fake_image_token = "<fake_token_around_image>"
+        global_img_token = "<global-img>"
+        prompt_strings = []
+        for sample, sample_rows, sample_cols in zip(text, image_rows, image_cols):
+            # Replace the image token with fake tokens around the expanded image token sequence of length `image_seq_len`
+            image_prompt_strings = []
+            for n_rows, n_cols in zip(sample_rows, sample_cols):
+                image_prompt_string = get_image_prompt_string(
+                    n_rows,
+                    n_cols,
+                    image_seq_len,
+                    image_token=image_token,
+                    fake_token_around_image=fake_image_token,
+                    global_img_token=global_img_token,
+                )
+                image_prompt_strings.append(image_prompt_string)
+
+            split_sample = sample.split(image_token)
+            if len(split_sample) == 0:
+                raise ValueError("The image token should be present in the text.")
+
+            # Place in the image prompt strings where the image tokens are
+            sample = split_sample[0]
+            for i, image_prompt_string in enumerate(image_prompt_strings):
+                sample += image_prompt_string + split_sample[i + 1]
+            prompt_strings.append(sample)
+
+        fake_image_token = AddedToken(fake_image_token, normalized=False, special=True)
+        image_token = AddedToken(image_token, normalized=False, special=True)
+        end_of_utterance_token = AddedToken(
+            "<end_of_utterance>", normalized=False, special=True
+        )
+        tokens_to_add = {
+            "additional_special_tokens": [
+                fake_image_token,
+                image_token,
+                end_of_utterance_token,
+            ]
+        }
+        self.tokenizer.add_special_tokens(tokens_to_add)
+
+        input_ids = self.tokenizer(prompt_strings)["input_ids"][0]
+        return input_ids
+
+    def decode(self, token_ids):
+        return self.tokenizer.decode(token_ids, clean_up_tokenization_spaces=False)
+
+    @property
+    def bos_id(self):
+        return self.tokenizer.bos_token_id
+
+    @property
+    def eos_id(self):
+        return self.tokenizer.eos_token_id
+
+    @property
+    def bos_token(self):
+        return self.tokenizer.bos_token
+
+    @property
+    def eos_token(self):
+        return self.tokenizer.eos_token
+
+class Request(BaseHTTPRequestHandler):
+    # 通过类继承，新定义类
+    timeout = 5
+    server_version = "Apache"
+
+    def do_GET(self):
+        print(self.path)
+        # 在新类中定义get的内容（当客户端向该服务端使用get请求时，本服务端将如下运行）
+        self.send_response(200)
+        self.send_header("type", "get")  # 设置响应头，可省略或设置多个
+        self.end_headers()
+
+        if self.path == "/bos_id":
+            bos_id = tokenizer.bos_id
+            # print(bos_id)
+            # to json
+            if bos_id is None:
+                msg = json.dumps({"bos_id": -1})
+            else:
+                msg = json.dumps({"bos_id": bos_id})
+        elif self.path == "/eos_id":
+            eos_id = tokenizer.eos_id
+            if eos_id is None:
+                msg = json.dumps({"eos_id": -1})
+            else:
+                msg = json.dumps({"eos_id": eos_id})
+        else:
+            msg = "error"
+
+        print(msg)
+        msg = str(msg).encode()  # 转为str再转为byte格式
+
+        self.wfile.write(msg)  # 将byte格式的信息返回给客户端
+
+    def do_POST(self):
+        # 在新类中定义post的内容（当客户端向该服务端使用post请求时，本服务端将如下运行）
+        data = self.rfile.read(
+            int(self.headers["content-length"])
+        )  # 获取从客户端传入的参数（byte格式）
+        data = data.decode()  # 将byte格式转为str格式
+
+        self.send_response(200)
+        self.send_header("type", "post")  # 设置响应头，可省略或设置多个
+        self.end_headers()
+
+        if self.path == "/encode":
+            req = json.loads(data)
+            print(req)
+            prompt = req["text"]
+            b_img_prompt = False
+            if "img_prompt" in req:
+                b_img_prompt = req["img_prompt"]
+            if b_img_prompt:
+                token_ids = tokenizer.encode_vpm(prompt)
+            else:
+                token_ids = tokenizer.encode(prompt, args.content)
+            if token_ids is None:
+                msg = json.dumps({"token_ids": -1})
+            else:
+                msg = json.dumps({"token_ids": token_ids})
+
+        elif self.path == "/decode":
+            req = json.loads(data)
+            token_ids = req["token_ids"]
+            text = tokenizer.decode(token_ids)
+            if text is None:
+                msg = json.dumps({"text": ""})
+            else:
+                msg = json.dumps({"text": text})
+        else:
+            msg = "error"
+        print(msg)
+        msg = str(msg).encode()  # 转为str再转为byte格式
+
+        self.wfile.write(msg)  # 将byte格式的信息返回给客户端
+
+
+if __name__ == "__main__":
+
+    args = argparse.ArgumentParser()
+    args.add_argument("--host", type=str, default="localhost")
+    args.add_argument("--port", type=int, default=8080)
+    args.add_argument('--model_id', type=str, default='internvl2_tokenizer')
+    args.add_argument('--content', type=str, default='')
+    args = args.parse_args()
+
+    tokenizer = Tokenizer_Http(args.model_id)
+
+
+    # print(tokenizer.bos_id, tokenizer.bos_token, tokenizer.eos_id, tokenizer.eos_token)
+    # print(tokenizer.encode("hello world", args.content))
+
+    host = (args.host, args.port)  # 设定地址与端口号，'localhost'等价于'127.0.0.1'
+    print("http://%s:%s" % host)
+    server = HTTPServer(host, Request)  # 根据地址端口号和新定义的类，创建服务器实例
+    server.serve_forever()  # 开启服务
diff --git a/projects/llm_framework/main_vlm/scripts/tokenizer_smolvlm-500M-ax630c.py b/projects/llm_framework/main_vlm/scripts/tokenizer_smolvlm-500M-ax630c.py
new file mode 100644
index 00000000..560a71f3
--- /dev/null
+++ b/projects/llm_framework/main_vlm/scripts/tokenizer_smolvlm-500M-ax630c.py
@@ -0,0 +1,248 @@
+from transformers import AutoTokenizer, PreTrainedTokenizerFast
+from transformers.tokenization_utils_base import AddedToken
+from http.server import HTTPServer, BaseHTTPRequestHandler
+import json
+import argparse
+
+def _prompt_split_image(
+    image_seq_len,
+    image_rows,
+    image_cols,
+    fake_token_around_image,
+    image_token,
+    global_img_token,
+):
+    """Prompt with expanded image tokens for when the image is split into patches."""
+    text_split_images = ""
+    for n_h in range(image_rows):
+        for n_w in range(image_cols):
+            text_split_images += (
+                f"{fake_token_around_image}"
+                + f"<row_{n_h + 1}_col_{n_w + 1}>"
+                + f"{image_token}" * image_seq_len
+            )
+        text_split_images += "\n"
+
+    text_split_images += (
+        f"\n{fake_token_around_image}"
+        + f"{global_img_token}"
+        + f"{image_token}" * image_seq_len
+        + f"{fake_token_around_image}"
+    )
+    return text_split_images
+
+
+def _prompt_single_image(
+    image_seq_len, fake_token_around_image, image_token, global_img_token
+):
+    """Prompt with expanded image tokens for a single image."""
+    return (
+        f"{fake_token_around_image}"
+        + f"{global_img_token}"
+        + f"{image_token}" * image_seq_len
+        + f"{fake_token_around_image}"
+    )
+
+
+def get_image_prompt_string(
+    image_rows,
+    image_cols,
+    image_seq_len,
+    fake_token_around_image,
+    image_token,
+    global_img_token,
+):
+    if image_rows == 0 and image_cols == 0:
+        return _prompt_single_image(
+            image_seq_len,
+            fake_token_around_image=fake_token_around_image,
+            image_token=image_token,
+            global_img_token=global_img_token,
+        )
+    return _prompt_split_image(
+        image_seq_len,
+        image_rows,
+        image_cols,
+        fake_token_around_image,
+        image_token,
+        global_img_token,
+    )
+
+class Tokenizer_Http:
+
+    def __init__(self, model_id):
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            model_id, trust_remote_code=True, use_fast=False
+        )
+
+    def encode(self, prompt, content):
+        prompt = f"<|im_start|>User:{content}<end_of_utterance>\nAssistant:"
+        input_ids = self.tokenizer(prompt)
+        return input_ids["input_ids"]
+
+    def encode_vpm(self, prompt, content="Please describe the image shortly."):
+        prompt = f"<|im_start|>User:<image>{prompt}<end_of_utterance>\nAssistant:"
+        text = [prompt]
+        image_rows = [[0]]
+        image_cols = [[0]]
+        image_seq_len = 64
+        image_token = "<image>"
+        fake_image_token = "<fake_token_around_image>"
+        global_img_token = "<global-img>"
+        prompt_strings = []
+        for sample, sample_rows, sample_cols in zip(text, image_rows, image_cols):
+            # Replace the image token with fake tokens around the expanded image token sequence of length `image_seq_len`
+            image_prompt_strings = []
+            for n_rows, n_cols in zip(sample_rows, sample_cols):
+                image_prompt_string = get_image_prompt_string(
+                    n_rows,
+                    n_cols,
+                    image_seq_len,
+                    image_token=image_token,
+                    fake_token_around_image=fake_image_token,
+                    global_img_token=global_img_token,
+                )
+                image_prompt_strings.append(image_prompt_string)
+
+            split_sample = sample.split(image_token)
+            if len(split_sample) == 0:
+                raise ValueError("The image token should be present in the text.")
+
+            # Place in the image prompt strings where the image tokens are
+            sample = split_sample[0]
+            for i, image_prompt_string in enumerate(image_prompt_strings):
+                sample += image_prompt_string + split_sample[i + 1]
+            prompt_strings.append(sample)
+
+        fake_image_token = AddedToken(fake_image_token, normalized=False, special=True)
+        image_token = AddedToken(image_token, normalized=False, special=True)
+        end_of_utterance_token = AddedToken(
+            "<end_of_utterance>", normalized=False, special=True
+        )
+        tokens_to_add = {
+            "additional_special_tokens": [
+                fake_image_token,
+                image_token,
+                end_of_utterance_token,
+            ]
+        }
+        self.tokenizer.add_special_tokens(tokens_to_add)
+
+        input_ids = self.tokenizer(prompt_strings)["input_ids"][0]
+        return input_ids
+
+    def decode(self, token_ids):
+        return self.tokenizer.decode(token_ids, clean_up_tokenization_spaces=False)
+
+    @property
+    def bos_id(self):
+        return self.tokenizer.bos_token_id
+
+    @property
+    def eos_id(self):
+        return self.tokenizer.eos_token_id
+
+    @property
+    def bos_token(self):
+        return self.tokenizer.bos_token
+
+    @property
+    def eos_token(self):
+        return self.tokenizer.eos_token
+
+class Request(BaseHTTPRequestHandler):
+    # 通过类继承，新定义类
+    timeout = 5
+    server_version = "Apache"
+
+    def do_GET(self):
+        print(self.path)
+        # 在新类中定义get的内容（当客户端向该服务端使用get请求时，本服务端将如下运行）
+        self.send_response(200)
+        self.send_header("type", "get")  # 设置响应头，可省略或设置多个
+        self.end_headers()
+
+        if self.path == "/bos_id":
+            bos_id = tokenizer.bos_id
+            # print(bos_id)
+            # to json
+            if bos_id is None:
+                msg = json.dumps({"bos_id": -1})
+            else:
+                msg = json.dumps({"bos_id": bos_id})
+        elif self.path == "/eos_id":
+            eos_id = tokenizer.eos_id
+            if eos_id is None:
+                msg = json.dumps({"eos_id": -1})
+            else:
+                msg = json.dumps({"eos_id": eos_id})
+        else:
+            msg = "error"
+
+        print(msg)
+        msg = str(msg).encode()  # 转为str再转为byte格式
+
+        self.wfile.write(msg)  # 将byte格式的信息返回给客户端
+
+    def do_POST(self):
+        # 在新类中定义post的内容（当客户端向该服务端使用post请求时，本服务端将如下运行）
+        data = self.rfile.read(
+            int(self.headers["content-length"])
+        )  # 获取从客户端传入的参数（byte格式）
+        data = data.decode()  # 将byte格式转为str格式
+
+        self.send_response(200)
+        self.send_header("type", "post")  # 设置响应头，可省略或设置多个
+        self.end_headers()
+
+        if self.path == "/encode":
+            req = json.loads(data)
+            print(req)
+            prompt = req["text"]
+            b_img_prompt = False
+            if "img_prompt" in req:
+                b_img_prompt = req["img_prompt"]
+            if b_img_prompt:
+                token_ids = tokenizer.encode_vpm(prompt)
+            else:
+                token_ids = tokenizer.encode(prompt, args.content)
+            if token_ids is None:
+                msg = json.dumps({"token_ids": -1})
+            else:
+                msg = json.dumps({"token_ids": token_ids})
+
+        elif self.path == "/decode":
+            req = json.loads(data)
+            token_ids = req["token_ids"]
+            text = tokenizer.decode(token_ids)
+            if text is None:
+                msg = json.dumps({"text": ""})
+            else:
+                msg = json.dumps({"text": text})
+        else:
+            msg = "error"
+        print(msg)
+        msg = str(msg).encode()  # 转为str再转为byte格式
+
+        self.wfile.write(msg)  # 将byte格式的信息返回给客户端
+
+
+if __name__ == "__main__":
+
+    args = argparse.ArgumentParser()
+    args.add_argument("--host", type=str, default="localhost")
+    args.add_argument("--port", type=int, default=8080)
+    args.add_argument('--model_id', type=str, default='internvl2_tokenizer')
+    args.add_argument('--content', type=str, default='')
+    args = args.parse_args()
+
+    tokenizer = Tokenizer_Http(args.model_id)
+
+
+    # print(tokenizer.bos_id, tokenizer.bos_token, tokenizer.eos_id, tokenizer.eos_token)
+    # print(tokenizer.encode("hello world", args.content))
+
+    host = (args.host, args.port)  # 设定地址与端口号，'localhost'等价于'127.0.0.1'
+    print("http://%s:%s" % host)
+    server = HTTPServer(host, Request)  # 根据地址端口号和新定义的类，创建服务器实例
+    server.serve_forever()  # 开启服务
diff --git a/projects/llm_framework/main_vlm/src/main.cpp b/projects/llm_framework/main_vlm/src/main.cpp
index 758dff2b..b625b5f9 100644
--- a/projects/llm_framework/main_vlm/src/main.cpp
+++ b/projects/llm_framework/main_vlm/src/main.cpp
@@ -50,8 +50,8 @@ class llm_task {
     std::string response_format_;
     std::vector<std::string> inputs_;
     std::vector<unsigned short> prompt_data_;
-    std::vector<std::vector<unsigned char>> image_datas_;
-    std::vector<std::vector<unsigned short>> img_embeds;
+    std::vector<unsigned char> image_data_;
+    std::vector<unsigned short> img_embed;
     std::string prompt_;
     task_callback_t out_callback_;
     bool enoutput_;
@@ -102,6 +102,7 @@ class llm_task {
                     SLOGW("config file :%s miss", file_name.c_str());
                     continue;
                 }
+                SLOGI("config file :%s read", file_name.c_str());
                 config_file >> file_body;
                 config_file.close();
                 break;
@@ -125,6 +126,7 @@ class llm_task {
             CONFIG_AUTO_SET(file_body["mode_param"], b_eos);
             CONFIG_AUTO_SET(file_body["mode_param"], axmodel_num);
             CONFIG_AUTO_SET(file_body["mode_param"], tokens_embed_num);
+            CONFIG_AUTO_SET(file_body["mode_param"], img_token_id);
             CONFIG_AUTO_SET(file_body["mode_param"], tokens_embed_size);
             CONFIG_AUTO_SET(file_body["mode_param"], b_use_mmap_load_embed);
             CONFIG_AUTO_SET(file_body["mode_param"], b_dynamic_load_axmodel_layer);
@@ -153,6 +155,7 @@ class llm_task {
                 if (!tokenizer_server_flage_.load()) {
                     tokenizer_pid_ = fork();
                     if (tokenizer_pid_ == 0) {
+                        setenv("PYTHONPATH", "/opt/m5stack/lib/vlm/site-packages", 1);
                         execl("/usr/bin/python3", "python3", tokenizer_file.c_str(), "--host", "localhost", "--port",
                               std::to_string(port_).c_str(), "--model_id", (base_model + "tokenizer").c_str(),
                               "--content", ("'" + prompt_ + "'").c_str(), nullptr);
@@ -215,32 +218,25 @@ class llm_task {
                 oss_prompt << input;
                 break;
         }
-        SLOGI("prompt_complete:%s", oss_prompt.str().c_str());
+        // SLOGI("prompt_complete:%s", oss_prompt.str().c_str());
         return oss_prompt.str();
     }
 
     void inference(const std::string &msg)
     {
         try {
-            if (image_datas_.empty()) {
+            if (image_data_.empty()) {
                 lLaMa_->Encode(prompt_data_, prompt_complete(msg));
                 std::string out = lLaMa_->Run(prompt_data_);
                 if (out_callback_) out_callback_(out, true);
             } else {
-                img_embeds.clear();
-                for (auto &img_data : image_datas_) {
-                    cv::Mat src = cv::imdecode(img_data, cv::IMREAD_COLOR);
-                    if (src.empty()) continue;
-                    std::vector<unsigned short> embed;
-                    lLaMa_->Encode(src, embed);
-                    img_embeds.push_back(embed);
-                }
-                image_datas_.clear();
-                if (!img_embeds.empty()) {
-                    lLaMa_->Encode(img_embeds, prompt_data_, prompt_complete(msg));
-                    std::string out = lLaMa_->Run(prompt_data_);
-                    if (out_callback_) out_callback_(out, true);
-                }
+                cv::Mat src = cv::imdecode(image_data_, cv::IMREAD_COLOR);
+                if (src.empty()) return;
+                image_data_.clear();
+                lLaMa_->Encode(src, img_embed);
+                lLaMa_->Encode(img_embed, prompt_data_, prompt_complete(msg));
+                std::string out = lLaMa_->Run(prompt_data_);
+                if (out_callback_) out_callback_(out, true);
             }
         } catch (...) {
             SLOGW("lLaMa_->Run have error!");
@@ -279,14 +275,25 @@ class llm_task {
     {
     }
 
+    void start()
+    {
+    }
+
+    void stop()
+    {
+    }
+
     ~llm_task()
     {
+        stop();
         if (tokenizer_pid_ != -1) {
             kill(tokenizer_pid_, SIGTERM);
             waitpid(tokenizer_pid_, nullptr, WNOHANG);
+            // tokenizer_pid_ = -1;
         }
         if (lLaMa_) {
             lLaMa_->Deinit();
+            // lLaMa_.reset();
         }
     }
 };
@@ -404,7 +411,7 @@ class llm_llm : public StackFlow {
             next_data = &tmp_msg2;
         }
         if (object.find("jpeg") != std::string::npos) {
-            llm_task_obj->image_datas_.emplace_back(next_data->begin(), next_data->end());
+            llm_task_obj->image_data_.assign(next_data->begin(), next_data->end());
             return;
         }
         llm_task_obj->inference((*next_data));
@@ -605,6 +612,7 @@ class llm_llm : public StackFlow {
             send("None", "None", error_body, work_id);
             return -1;
         }
+        llm_task_[work_id_num]->stop();
         task_pause(llm_task_[work_id_num], get_channel(work_id_num));
         auto llm_channel = get_channel(work_id_num);
         llm_channel->stop_subscriber("");
@@ -620,6 +628,7 @@ class llm_llm : public StackFlow {
             if (iteam == llm_task_.end()) {
                 break;
             }
+            iteam->second->stop();
             get_channel(iteam->first)->stop_subscriber("");
             iteam->second.reset();
             llm_task_.erase(iteam->first);
diff --git a/projects/llm_framework/main_vlm/src/runner/LLM.hpp b/projects/llm_framework/main_vlm/src/runner/LLM.hpp
index a1d34edc..2cbbf388 100644
--- a/projects/llm_framework/main_vlm/src/runner/LLM.hpp
+++ b/projects/llm_framework/main_vlm/src/runner/LLM.hpp
@@ -26,8 +26,6 @@ struct LLMAttrType {
 
     std::string filename_post_axmodel = "tinyllama-int8/tinyllama_post.axmodel";
 
-    bool b_use_topk = false;
-
     std::string filename_vpm_encoder_axmodedl   = "minicpmv/vpm_resampler_version0_fp16.axmodel";
     std::string filename_vpm_resampler_axmodedl = "minicpmv/vpm_resampler_version0_fp16.axmodel";
     int vpm_width                               = 280;
@@ -39,6 +37,7 @@ struct LLMAttrType {
     bool b_bos = true, b_eos = false;
     std::string filename_tokens_embed = "tinyllama.model.embed_tokens.weight.bfloat16.bin";
     int tokens_embed_num              = 32000;
+    int img_token_id                  = 151667;  // InternVL2.5
     int tokens_embed_size             = 2048;
 
     int max_token_len = 127;  // auto calc
@@ -53,6 +52,9 @@ struct LLMAttrType {
 
     bool b_use_mmap_load_layer = true;
 
+    bool b_use_topk              = false;
+    std::string post_config_path = "post_config.json";
+
     // bool b_live_print = true;
     LLMRuningCallback runing_callback = nullptr;
     void *reserve                     = nullptr;
@@ -84,36 +86,17 @@ class LLM {
 
     bool b_stop = false;
 
-    int post_process(unsigned short *p, int n, std::vector<int> &history, float *val = 0)
+    LLMPostprocess postprocess;
+    static int post_process(LLMPostprocess &postprocess, unsigned short *p, int n, std::vector<int> &history,
+                            float *val = 0)
     {
         std::vector<float> logits(n);
         for (int i = 0; i < n; i++) {
             unsigned int proc = p[i] << 16;
             logits[i]         = *reinterpret_cast<float *>(&proc);
         }
-        LLMPostprocess postprocess;
-        postprocess.set_temperature(true, _attr.temperature);
-        postprocess.set_repetition_penalty(true, 1.2f);
-        // postprocess.set_top_k_sampling(true, 40);
-        postprocess.set_top_p_sampling(true, _attr.top_p);
 
         return postprocess.apply(logits, history);
-
-        // float max_val = -MAXFLOAT;
-        // int max_index = 0;
-        // for (int i = 0; i < n; i++)
-        // {
-        //     unsigned int proc = p[i] << 16;
-        //     float tmp = *reinterpret_cast<float *>(&proc);
-        //     if (tmp > max_val)
-        //     {
-        //         max_val = tmp;
-        //         max_index = i;
-        //     }
-        // }
-        // if (val)
-        //     *val = max_val;
-        // return max_index;
     }
 
 public:
@@ -308,18 +291,24 @@ class LLM {
             vpm_encoder.inference();
             AX_SYS_MinvalidateCache(vpm_encoder.get_output(0).phyAddr, vpm_encoder.get_output(0).pVirAddr,
                                     vpm_encoder.get_output(0).nSize);
-            memcpy(vpm_resampler.get_input("input").pVirAddr, vpm_encoder.get_output(0).pVirAddr,
+            memcpy(vpm_resampler.get_input(0).pVirAddr, vpm_encoder.get_output(0).pVirAddr,
                    vpm_encoder.get_output(0).nSize);
         } else {
-            void *data = vpm_resampler.get_input("input").pVirAddr;
+            void *data = vpm_resampler.get_input(0).pVirAddr;
             memcpy(data, dst.data, dst.rows * dst.cols * 3);
         }
 
         vpm_resampler.inference();
-        out_embed.resize(vpm_resampler.get_output("output").nSize / sizeof(unsigned short));
-        AX_SYS_MinvalidateCache(vpm_resampler.get_output("output").phyAddr, vpm_resampler.get_output("output").pVirAddr,
-                                vpm_resampler.get_output("output").nSize);
-        memcpy(out_embed.data(), vpm_resampler.get_output("output").pVirAddr, vpm_resampler.get_output("output").nSize);
+        out_embed.resize(vpm_resampler.get_output(0).nSize / sizeof(float));
+        AX_SYS_MinvalidateCache(vpm_resampler.get_output(0).phyAddr, vpm_resampler.get_output(0).pVirAddr,
+                                vpm_resampler.get_output(0).nSize);
+
+        float *output_data = (float *)vpm_resampler.get_output(0).pVirAddr;
+        for (size_t i = 0; i < out_embed.size(); i++) {
+            out_embed[i] = bfloat16(output_data[i]).data;
+        }
+
+        // memcpy(out_embed.data(), vpm_resampler.get_output(0).pVirAddr, vpm_resampler.get_output(0).nSize);
         ALOGI("image encode time : %f ms, size : %d", t.cost(), out_embed.size());
         return 0;
     }
@@ -337,27 +326,49 @@ class LLM {
             embed_selector.getByIndex(input_ids[i], out_embed.data() + i * _attr.tokens_embed_size);
         }
 
-        // memcpy(out_embed.data() + 5 * _attr.tokens_embed_size, vpm_resampler.get_output("output").pVirAddr,
-        // vpm_resampler.get_output("output").nSize);
+        // memcpy(out_embed.data() + 5 * _attr.tokens_embed_size, vpm_resampler.get_output(0).pVirAddr,
+        // vpm_resampler.get_output(0).nSize);
 
         return 0;
     }
 
-    int Encode(std::vector<std::vector<unsigned short>> &img_embeds, std::vector<unsigned short> &out_embed,
-               std::string prompt = "What is in the images?")
+    int Encode(std::vector<unsigned short> &img_embed, std::vector<unsigned short> &out_embed,
+               std::string prompt = "What is in the image?")
     {
         std::vector<int> input_ids = tokenizer->Encode(prompt, true);
 
-        constexpr int IMG_CONTEXT = 151667;  // InternVL2.5
-        std::vector<int> img_positions;
+        // constexpr int img_token_id = 49190;	// smolvlm
+        // constexpr int img_token_id = 151667; // InternVL2.5
+        int offset            = 0;
+        int img_context_count = 0;
 
         for (size_t i = 0; i < input_ids.size(); i++) {
-            if (input_ids[i] == IMG_CONTEXT) {
-                img_positions.push_back(i);
+            if (input_ids[i] == _attr.img_token_id) {
+                img_context_count++;
+                if (img_context_count == 1) {
+                    offset = i;
+                }
             }
         }
 
-        if (img_positions.size() > _attr.prefill_token_num) {
+        if (offset == 0) {
+            ALOGE("offset == 0");
+            return -1;
+        }
+
+        if (img_context_count != img_embed.size() / _attr.tokens_embed_size) {
+            ALOGE("img_context_count(%d) != img_embed.size() / tokens_embed_size(%d)", img_context_count,
+                  img_embed.size() / _attr.tokens_embed_size);
+            return -1;
+        }
+
+        // for (size_t i = 0; i < input_ids.size(); i++)
+        // {
+        //     printf("%d ", input_ids[i]);
+        // }
+        // printf("\n");
+
+        if (input_ids.size() > _attr.prefill_token_num) {
             ALOGE("input_ids(%d) > prefill_token_num(%d)", input_ids.size(), _attr.prefill_token_num);
             return -1;
         }
@@ -366,11 +377,8 @@ class LLM {
         for (size_t i = 0; i < input_ids.size(); i++) {
             embed_selector.getByIndex(input_ids[i], out_embed.data() + i * _attr.tokens_embed_size);
         }
-        for (size_t img_idx = 0; img_idx < img_embeds.size(); img_idx++) {
-            // int pos = img_positions[img_idx];
-            memcpy(out_embed.data() + (14 + img_idx * 64) * _attr.tokens_embed_size, img_embeds[img_idx].data(),
-                   img_embeds[img_idx].size() * sizeof(unsigned short));
-        }
+        memcpy(out_embed.data() + offset * _attr.tokens_embed_size, img_embed.data(),
+               img_embed.size() * sizeof(unsigned short));
 
         return 0;
     }
@@ -504,7 +512,7 @@ class LLM {
                 AX_SYS_MinvalidateCache(output_post.phyAddr, output_post.pVirAddr, output_post.nSize);
                 unsigned short *post_out = (unsigned short *)output_post.pVirAddr;
                 float max_val            = -MAXFLOAT;
-                max_index                = post_process(post_out, _attr.tokens_embed_num, token_ids, &max_val);
+                max_index = post_process(postprocess, post_out, _attr.tokens_embed_num, token_ids, &max_val);
             }
             next_token = max_index;
 
@@ -599,7 +607,7 @@ class LLM {
                     AX_SYS_MinvalidateCache(output_post.phyAddr, output_post.pVirAddr, output_post.nSize);
                     unsigned short *post_out = (unsigned short *)output_post.pVirAddr;
                     float max_val            = -MAXFLOAT;
-                    max_index                = post_process(post_out, _attr.tokens_embed_num, token_ids, &max_val);
+                    max_index = post_process(postprocess, post_out, _attr.tokens_embed_num, token_ids, &max_val);
                 }
                 next_token = max_index;
 
diff --git a/projects/llm_framework/main_whisper/SConstruct b/projects/llm_framework/main_whisper/SConstruct
index 4dee5cf5..4c61edce 100644
--- a/projects/llm_framework/main_whisper/SConstruct
+++ b/projects/llm_framework/main_whisper/SConstruct
@@ -21,7 +21,7 @@ DEFINITIONS += ['-O3', '-fopenmp', '-std=c++17']
 LDFLAGS+=['-Wl,-rpath=/opt/m5stack/lib', '-Wl,-rpath=/usr/local/m5stack/lib', '-Wl,-rpath=/usr/local/m5stack/lib/gcc-10.3', '-Wl,-rpath=/opt/lib', '-Wl,-rpath=/opt/usr/lib', '-Wl,-rpath=./']
 LINK_SEARCH_PATH += [ADir('../static_lib')]
 REQUIREMENTS += ['ax_engine', 'ax_interpreter', 'ax_sys']
-REQUIREMENTS += ['onnxruntime', 'samplerate']
+# REQUIREMENTS += ['onnxruntime', 'samplerate']
 
 INCLUDE += [ADir('../include')]
 INCLUDE += [ADir('src/runner'), ADir('../include/onnxruntime/core/session')]
@@ -33,7 +33,7 @@ LDFLAGS += ['-l:libopencc.a', '-l:libmarisa.a']
 
 STATIC_FILES += Glob('mode_*.json')
 
-env['COMPONENTS'].append({'target':'llm_whisper',
+env['COMPONENTS'].append({'target':'llm_whisper-1.7',
                           'SRCS':SRCS,
                           'INCLUDE':INCLUDE,
                           'PRIVATE_INCLUDE':PRIVATE_INCLUDE,
diff --git a/projects/llm_framework/main_whisper/mode_whisper-small.json b/projects/llm_framework/main_whisper/mode_whisper-small.json
new file mode 100644
index 00000000..db13bef8
--- /dev/null
+++ b/projects/llm_framework/main_whisper/mode_whisper-small.json
@@ -0,0 +1,44 @@
+{
+    "mode": "whisper-small",
+    "type": "asr",
+    "homepage":"https://huggingface.co/openai/whisper-small",
+    "compile_flage":"pulsar2 build --input small-encoder.onnx --config config_whisper_encoder_u16.json --output_dir small_encoder --output_name small-encoder.axmodel --target_hardware AX620E --compiler.check 0 --npu_mode NPU2",
+    "pulsar_version":"3.3-f0b32d03",
+    "capabilities": [
+        "Automatic_Speech_Recognition",
+        "English",
+        "Chinese",
+        "Japanese"
+    ],
+    "input_type": [
+        "sys.pcm"
+    ],
+    "output_type": [
+        "asr.utf-8"
+    ],
+    "mode_param": {
+        "model_type": "small",
+        "language": "en",
+        "encoder": "small-encoder.axmodel",
+        "decoder_main": "small-decoder-main.axmodel",
+        "decoder_loop": "small-decoder-loop.axmodel",
+        "positional_embedding": "small-positional_embedding.bin",
+        "tokens": "small-tokens.txt",
+        "t2s": "t2s.json",
+        "whisper_sample_rate": 16000,
+        "whisper_n_fft": 400,
+        "awake_delay": 1000,
+        "whisper_hop_length": 160,
+        "whisper_chunk_size": 30,
+        "whisper_n_mels": 80,
+        "whisper_sot": 50258,
+        "whisper_eot": 50257,
+        "whisper_blank": 220,
+        "whisper_no_timestamps": 50363,
+        "whisper_no_speech": 50362,
+        "whisper_translate": 50358,
+        "whisper_transcribe": 50359,
+        "whisper_vocab_size": 51865,
+        "whisper_n_text_ctx": 448
+    }
+}
\ No newline at end of file
diff --git a/projects/llm_framework/main_whisper/src/main.cpp b/projects/llm_framework/main_whisper/src/main.cpp
index 768e3878..cc3aaa96 100644
--- a/projects/llm_framework/main_whisper/src/main.cpp
+++ b/projects/llm_framework/main_whisper/src/main.cpp
@@ -192,6 +192,34 @@ class llm_task {
         return tv.tv_sec * 1000.0 + tv.tv_usec / 1000.0;
     }
 
+    bool is_valid_utf8(const std::string &str)
+    {
+        int bytes = 0;
+        for (unsigned char c : str) {
+            if (bytes == 0) {
+                if ((c >> 5) == 0b110)
+                    bytes = 1;
+                else if ((c >> 4) == 0b1110)
+                    bytes = 2;
+                else if ((c >> 3) == 0b11110)
+                    bytes = 3;
+                else if ((c >> 7))
+                    return false;
+            } else {
+                if ((c >> 6) != 0b10) return false;
+                bytes--;
+            }
+        }
+        return bytes == 0;
+    }
+
+    void fix_utf8_string(std::string &s)
+    {
+        while (!s.empty() && !is_valid_utf8(s)) {
+            s.pop_back();
+        }
+    }
+
     int load_model(const nlohmann::json &config_body)
     {
         if (parse_config(config_body)) {
@@ -212,6 +240,7 @@ class llm_task {
                     SLOGW("config file :%s miss", file_name.c_str());
                     continue;
                 }
+                SLOGI("config file :%s read", file_name.c_str());
                 config_file >> file_body;
                 config_file.close();
                 break;
@@ -259,7 +288,7 @@ class llm_task {
             positional_embedding.resize(mode_config_.whisper_n_text_ctx * WHISPER_N_TEXT_STATE);
             FILE *fp = fopen(mode_config_.positional_embedding.c_str(), "rb");
             if (!fp) {
-                printf("Open %s failed!\n", mode_config_.positional_embedding.c_str());
+                SLOGE("Open %s failed!\n", mode_config_.positional_embedding.c_str());
                 return -3;
             }
             fread(positional_embedding.data(), sizeof(float), mode_config_.whisper_n_text_ctx * WHISPER_N_TEXT_STATE,
@@ -281,15 +310,15 @@ class llm_task {
             decoder_main_ = std::make_unique<DecoderMain>();
             decoder_loop_ = std::make_unique<DecoderLoop>();
             if (0 != encoder_->Init(mode_config_.encoder.c_str())) {
-                printf("encoder init failed!\n");
+                SLOGE("encoder init failed!\n");
                 return -4;
             }
             if (0 != decoder_main_->Init(mode_config_.decoder_main.c_str())) {
-                printf("Init decoder_main model failed!\n");
+                SLOGE("Init decoder_main model failed!\n");
                 return -5;
             }
             if (0 != decoder_loop_->Init(mode_config_.decoder_loop.c_str())) {
-                printf("Init decoder_main model failed!\n");
+                SLOGE("Init decoder_main model failed!\n");
                 return -6;
             }
         } catch (...) {
@@ -315,6 +344,7 @@ class llm_task {
             if (endpoint_flage_) return;
         }
         endpoint_flage_ = true;
+        if (delay_audio_frame_ == 0) buffer_resize(pcmdata, 0);
         buffer_write_char(pcmdata, raw.c_str(), raw.length());
         buffer_position_set(pcmdata, 0);
         count = 0;
@@ -390,7 +420,7 @@ class llm_task {
             return;
         }
         end = get_current_time();
-        printf("Encoder run take %.2f ms\n", (end - start));
+        SLOGI("Encoder run take %.2f ms\n", (end - start));
 
         // detect language
         SOT_SEQUENCE[1] = detect_language(language_);
@@ -416,7 +446,7 @@ class llm_task {
         supress_tokens(logits, true);
 
         max_token_id = argmax(logits);
-        printf("First token: %d \t take %.2fms\n", max_token_id, (end - start));
+        SLOGI("First token: %d \t take %.2fms\n", max_token_id, (end - start));
         mode_config_.neg_inf = -std::numeric_limits<float>::infinity();
         std::vector<float> mask(mode_config_.whisper_n_text_ctx);
         for (int n = 0; n < mode_config_.whisper_n_text_ctx - offset - 1; n++) {
@@ -445,7 +475,7 @@ class llm_task {
 
             ret = decoder_loop_->Run();
             if (ret) {
-                printf("decoder_loop run failed!\n");
+                SLOGE("decoder_loop run failed!\n");
                 return;
             }
 
@@ -460,11 +490,11 @@ class llm_task {
             max_token_id = argmax(logits);
             end          = get_current_time();
 
-            printf("Next Token: %d \t take %.2fms\n", max_token_id, (end - start));
+            SLOGI("Next Token: %d \t take %.2fms\n", max_token_id, (end - start));
         }
 
         end_all = get_current_time();
-        printf("All take %.2f ms\n", (end_all - start_all));
+        SLOGI("All take %.2f ms\n", (end_all - start_all));
 
         std::string s;
         for (const auto i : results) {
@@ -473,14 +503,12 @@ class llm_task {
                           (uint32)mode_config_.token_tables[i].size(), str);
             s += str;
         }
-
+        fix_utf8_string(s);
         if (mode_config_.language == "en" || mode_config_.language == "ja") {
-            printf("Result: %s\n", s.c_str());
             if (out_callback_) out_callback_(s, true);
         } else {
             const opencc::SimpleConverter converter(mode_config_.t2s.c_str());
             std::string simple_str = converter.Convert(s);
-            printf("Result: %s\n", simple_str.c_str());
             if ((!simple_str.empty()) && out_callback_) {
                 out_callback_(simple_str, true);
             }
@@ -533,8 +561,20 @@ class llm_task {
         _ax_init();
     }
 
+    void start()
+    {
+    }
+
+    void stop()
+    {
+    }
+
     ~llm_task()
     {
+        stop();
+        if (encoder_) encoder_->Release();
+        if (decoder_main_) decoder_main_->Release();
+        if (decoder_loop_) decoder_loop_->Release();
         _ax_deinit();
         buffer_destroy(pcmdata);
     }
@@ -555,8 +595,8 @@ class llm_whisper : public StackFlow {
     llm_whisper() : StackFlow("whisper")
     {
         task_count_ = 1;
-        event_queue_.appendListener(
-            EVENT_TASK_PAUSE, std::bind(&llm_whisper::_task_pause, this, std::placeholders::_1, std::placeholders::_2));
+        event_queue_.appendListener(EVENT_TASK_PAUSE,
+                                    std::bind(&llm_whisper::_task_pause, this, std::placeholders::_1));
     }
 
     void task_output(const std::weak_ptr<llm_task> llm_task_obj_weak,
@@ -673,9 +713,10 @@ class llm_whisper : public StackFlow {
         llm_task_obj->sys_pcm_on_data((*next_data));
     }
 
-    void _task_pause(const std::string &work_id, const std::string &data)
+    void _task_pause(const std::shared_ptr<void> &arg)
     {
-        int work_id_num = sample_get_work_id_num(work_id);
+        std::shared_ptr<std::string> work_id = std::static_pointer_cast<std::string>(arg);
+        int work_id_num                      = sample_get_work_id_num(*work_id);
         if (llm_task_.find(work_id_num) == llm_task_.end()) {
             return;
         }
@@ -689,7 +730,7 @@ class llm_whisper : public StackFlow {
 
     void task_pause(const std::string &work_id, const std::string &data)
     {
-        event_queue_.enqueue(EVENT_TASK_PAUSE, work_id, "");
+        event_queue_.enqueue(EVENT_TASK_PAUSE, std::make_shared<std::string>(work_id));
     }
 
     void task_work(const std::weak_ptr<llm_task> llm_task_obj_weak,
@@ -703,8 +744,8 @@ class llm_whisper : public StackFlow {
         llm_task_obj->kws_awake();
         if ((!audio_url_.empty()) && (llm_task_obj->audio_flage_ == false)) {
             std::weak_ptr<llm_task> _llm_task_obj = llm_task_obj;
-            llm_channel->subscriber(audio_url_, [_llm_task_obj](pzmq *_pzmq, const std::string &raw) {
-                _llm_task_obj.lock()->sys_pcm_on_data(raw);
+            llm_channel->subscriber(audio_url_, [_llm_task_obj](pzmq *_pzmq, const std::shared_ptr<pzmq_data> &raw) {
+                _llm_task_obj.lock()->sys_pcm_on_data(raw->string());
             });
             llm_task_obj->audio_flage_ = true;
         }
@@ -806,11 +847,13 @@ class llm_whisper : public StackFlow {
                 if (input.find("sys") != std::string::npos) {
                     audio_url_                            = unit_call("audio", "cap", input);
                     std::weak_ptr<llm_task> _llm_task_obj = llm_task_obj;
-                    llm_channel->subscriber(audio_url_, [_llm_task_obj](pzmq *_pzmq, const std::string &raw) {
-                        _llm_task_obj.lock()->sys_pcm_on_data(raw);
-                    });
+                    llm_channel->subscriber(audio_url_,
+                                            [_llm_task_obj](pzmq *_pzmq, const std::shared_ptr<pzmq_data> &raw) {
+                                                _llm_task_obj.lock()->sys_pcm_on_data(raw->string());
+                                            });
                     llm_task_obj->audio_flage_ = true;
                 } else if (input.find("whisper") != std::string::npos) {
+                    if (input.find("stream.base64") != std::string::npos) llm_task_obj->delay_audio_frame_ = 0;
                     llm_channel->subscriber_work_id(
                         "", std::bind(&llm_whisper::task_user_data, this, std::weak_ptr<llm_task>(llm_task_obj),
                                       std::weak_ptr<llm_channel_obj>(llm_channel), std::placeholders::_1,
@@ -861,8 +904,8 @@ class llm_whisper : public StackFlow {
         if (data.find("sys") != std::string::npos) {
             if (audio_url_.empty()) audio_url_ = unit_call("audio", "cap", data);
             std::weak_ptr<llm_task> _llm_task_obj = llm_task_obj;
-            llm_channel->subscriber(audio_url_, [_llm_task_obj](pzmq *_pzmq, const std::string &raw) {
-                _llm_task_obj.lock()->sys_pcm_on_data(raw);
+            llm_channel->subscriber(audio_url_, [_llm_task_obj](pzmq *_pzmq, const std::shared_ptr<pzmq_data> &raw) {
+                _llm_task_obj.lock()->sys_pcm_on_data(raw->string());
             });
             llm_task_obj->audio_flage_ = true;
             llm_task_obj->inputs_.push_back(data);
@@ -957,6 +1000,7 @@ class llm_whisper : public StackFlow {
             send("None", "None", error_body, work_id);
             return -1;
         }
+        llm_task_[work_id_num]->stop();
         auto llm_channel = get_channel(work_id_num);
         llm_channel->stop_subscriber("");
         if (llm_task_[work_id_num]->audio_flage_) {
@@ -974,6 +1018,7 @@ class llm_whisper : public StackFlow {
             if (iteam == llm_task_.end()) {
                 break;
             }
+            iteam->second->stop();
             if (iteam->second->audio_flage_) {
                 unit_call("audio", "cap_stop", "None");
             }
diff --git a/projects/llm_framework/main_whisper/src/runner/EngineWrapper.cpp b/projects/llm_framework/main_whisper/src/runner/EngineWrapper.cpp
index 4bc6aedd..6d219645 100644
--- a/projects/llm_framework/main_whisper/src/runner/EngineWrapper.cpp
+++ b/projects/llm_framework/main_whisper/src/runner/EngineWrapper.cpp
@@ -12,8 +12,15 @@
 
 #include <cstdlib>
 
+#include <global_config.h>
 
+#if defined(CONFIG_AX_650N_MSP_ENABLED)
+static const char *strAlgoModelType[AX_ENGINE_MODEL_TYPE_BUTT] = {"3.6T", "7.2T", "18T"};
+#endif
+
+#if defined(CONFIG_AX_620E_MSP_ENABLED) || defined(CONFIG_AX_620Q_MSP_ENABLED)
 static const char *strAlgoModelType[AX_ENGINE_MODEL_TYPE_BUTT] = {"HalfOCM", "FullOCM"};
+#endif
 
 
 /// @brief npu type
@@ -26,7 +33,104 @@ typedef enum axNPU_TYPE_E {
     AX_BL_VNPU_2 = (1 << 4)    /* running under BIG-LITTLE VNPU2 */
 } AX_NPU_TYPE_E;
 
+#if defined(CHIP_AX650)
+static AX_S32 CheckModelVNpu(const std::string &strModel, const AX_ENGINE_MODEL_TYPE_T &eModelType, const AX_S32 &nNpuType, AX_U32 &nNpuSet) {
+    AX_ENGINE_NPU_ATTR_T stNpuAttr;
+    memset(&stNpuAttr, 0x00, sizeof(stNpuAttr));
+
+    auto ret = AX_ENGINE_GetVNPUAttr(&stNpuAttr);
+    if (ret == 0) {
+        // VNPU DISABLE
+        if (stNpuAttr.eHardMode == AX_ENGINE_VIRTUAL_NPU_DISABLE) {
+            nNpuSet = 0x01; // NON-VNPU (0b111)
+            // printf("%s will run under VNPU-DISABLE [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
+        }
+        // STD VNPU
+        else if (stNpuAttr.eHardMode == AX_ENGINE_VIRTUAL_NPU_STD) {
+            // 7.2T & 10.8T no allow
+            if (eModelType == AX_ENGINE_MODEL_TYPE1
+                || eModelType == AX_ENGINE_MODEL_TYPE2) {
+                // printf("%s model type%d: [%s], no allow run under STD VNPU\n", strModel.c_str(), eModelType, strAlgoModelType[eModelType]);
+                return -1;
+            }
+
+            // default STD VNPU2
+            if (nNpuType == 0) {
+                nNpuSet = 0x02; // VNPU2 (0b010)
+                // printf("%s will run under default STD-VNPU2 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
+            }
+            else {
+                if (nNpuType & AX_STD_VNPU_1) {
+                    nNpuSet |= 0x01; // VNPU1 (0b001)
+                    // printf("%s will run under STD-VNPU1 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
+                }
+                if (nNpuType & AX_STD_VNPU_2) {
+                    nNpuSet |= 0x02; // VNPU2 (0b010)
+                    // printf("%s will run under STD-VNPU2 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
+                }
+                if (nNpuType & AX_STD_VNPU_3) {
+                    nNpuSet |= 0x04; // VNPU3 (0b100)
+                    // printf("%s will run under STD-VNPU3 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
+                }
+            }
+        }
+        // BL VNPU
+        else if (stNpuAttr.eHardMode == AX_ENGINE_VIRTUAL_NPU_BIG_LITTLE) {
+            // 10.8T no allow
+            if (eModelType == AX_ENGINE_MODEL_TYPE2) {
+                // printf("%s model type%d: [%s], no allow run under BL VNPU\n", strModel.c_str(), eModelType, strAlgoModelType[eModelType]);
+                return -1;
+            }
+
+            // default BL VNPU
+            if (nNpuType == 0) {
+                // 7.2T default BL VNPU1
+                if (eModelType == AX_ENGINE_MODEL_TYPE1) {
+                    nNpuSet = 0x01; // VNPU1 (0b001)
+                    // printf("%s will run under default BL-VNPU1 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
+                }
+                // 3.6T default BL VNPU2
+                else {
+                    nNpuSet = 0x02; // VNPU2 (0b010)
+                    // printf("%s will run under default BL-VNPU2 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
+                }
+            }
+            else {
+                // 7.2T
+                if (eModelType == AX_ENGINE_MODEL_TYPE1) {
+                    // no allow set to BL VNPU2
+                    if (nNpuType & AX_BL_VNPU_2) {
+                        // printf("%s model type%d: [%s], no allow run under BL VNPU2\n", strModel.c_str(), eModelType, strAlgoModelType[eModelType]);
+                        return -1;
+                    }
+                    if (nNpuType & AX_BL_VNPU_1) {
+                        nNpuSet |= 0x01; // VNPU1 (0b001)
+                        // printf("%s will run under BL-VNPU1 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
+                    }
+                }
+                // 3.6T
+                else {
+                    if (nNpuType & AX_BL_VNPU_1) {
+                        nNpuSet |= 0x01; // VNPU1 (0b001)
+                        // printf("%s will run under BL-VNPU1 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
+                    }
+                    if (nNpuType & AX_BL_VNPU_2) {
+                        nNpuSet |= 0x02; // VNPU2 (0b010)
+                        // printf("%s will run under BL-VNPU2 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
+                    }
+                }
+            }
+        }
+    }
+    else {
+        printf("AX_ENGINE_GetVNPUAttr fail ret = %x\n", ret);
+    }
+
+    return ret;
+}
+#endif
 
+#if defined(CONFIG_AX_620E_MSP_ENABLED) || defined(CONFIG_AX_620Q_MSP_ENABLED)
 static AX_S32 CheckModelVNpu(const std::string &strModel, const AX_ENGINE_MODEL_TYPE_T &eModelType, const AX_S32 &nNpuType, AX_U32 &nNpuSet) {
     AX_ENGINE_NPU_ATTR_T stNpuAttr;
     memset(&stNpuAttr, 0x00, sizeof(stNpuAttr));
@@ -69,13 +173,15 @@ static AX_S32 CheckModelVNpu(const std::string &strModel, const AX_ENGINE_MODEL_
 
     return ret;
 }
+#endif
+
 
 int EngineWrapper::Init(const char* strModelPath, uint32_t nNpuType)
 {
     AX_S32 ret = 0;
 
     // 1. load model
-    AX_BOOL bLoadModelUseCmm = AX_FALSE;
+    AX_BOOL bLoadModelUseCmm = AX_TRUE;
     AX_CHAR *pModelBufferVirAddr = nullptr;
     AX_U64 u64ModelBufferPhyAddr = 0;
     AX_U32 nModelBufferSize = 0;
diff --git a/projects/llm_framework/main_yolo/SConstruct b/projects/llm_framework/main_yolo/SConstruct
index 8400aa9a..99f1e1ef 100644
--- a/projects/llm_framework/main_yolo/SConstruct
+++ b/projects/llm_framework/main_yolo/SConstruct
@@ -39,7 +39,7 @@ STATIC_FILES += Glob('mode_*.json')
 #                 AFile('../static_lib/libbz2.so.1.0')]
 # DEFINITIONS += ["-DENABLE_BACKWARD"]
 
-env['COMPONENTS'].append({'target':'llm_yolo',
+env['COMPONENTS'].append({'target':'llm_yolo-1.8',
                           'SRCS':SRCS,
                           'INCLUDE':INCLUDE,
                           'PRIVATE_INCLUDE':PRIVATE_INCLUDE,
diff --git a/projects/llm_framework/main_yolo/mode_yolo11n-hand-pose.json b/projects/llm_framework/main_yolo/mode_yolo11n-hand-pose.json
index a75f0c01..051518cb 100644
--- a/projects/llm_framework/main_yolo/mode_yolo11n-hand-pose.json
+++ b/projects/llm_framework/main_yolo/mode_yolo11n-hand-pose.json
@@ -2,6 +2,8 @@
     "mode":"yolo11n-hand-pose",
     "type":"cv",
     "homepage":"https://github.com/ultralytics/ultralytics",
+    "compile_flage":"pulsar2 build  --target_hardware AX620E --input yolo11n-hand.onnx --output_dir output --config yolo11n-hand_config.json",
+    "pulsar_version":"3.4-983bb35e",
     "capabilities":[
         "Pose"
     ],
diff --git a/projects/llm_framework/main_yolo/mode_yolo11n-pose.json b/projects/llm_framework/main_yolo/mode_yolo11n-pose.json
index 7879c01f..785135bd 100644
--- a/projects/llm_framework/main_yolo/mode_yolo11n-pose.json
+++ b/projects/llm_framework/main_yolo/mode_yolo11n-pose.json
@@ -2,6 +2,8 @@
     "mode":"yolo11n-pose",
     "type":"cv",
     "homepage":"https://github.com/ultralytics/ultralytics",
+    "compile_flage":"pulsar2 build  --target_hardware AX620E --input yolo11n-pose.onnx --output_dir output --config yolo11n-pose_config.json",
+    "pulsar_version":"3.4-983bb35e",
     "capabilities":[
         "Pose"
     ],
diff --git a/projects/llm_framework/main_yolo/mode_yolo11n-seg.json b/projects/llm_framework/main_yolo/mode_yolo11n-seg.json
index a992a517..9da34ef9 100644
--- a/projects/llm_framework/main_yolo/mode_yolo11n-seg.json
+++ b/projects/llm_framework/main_yolo/mode_yolo11n-seg.json
@@ -1,7 +1,9 @@
 {
-    "mode":"yolo11s-seg",
+    "mode":"yolo11n-seg",
     "type":"cv",
     "homepage":"https://github.com/ultralytics/ultralytics",
+    "compile_flage":"pulsar2 build  --target_hardware AX620E --input yolo11n-seg.onnx --output_dir output --config yolo11n-seg_config.json",
+    "pulsar_version":"3.4-983bb35e",
     "capabilities":[
         "Segmentation"
     ],
diff --git a/projects/llm_framework/main_yolo/mode_yolo11n.json b/projects/llm_framework/main_yolo/mode_yolo11n.json
index e8bc8dce..b2c6cf4a 100644
--- a/projects/llm_framework/main_yolo/mode_yolo11n.json
+++ b/projects/llm_framework/main_yolo/mode_yolo11n.json
@@ -2,6 +2,8 @@
     "mode":"yolo11n",
     "type":"cv",
     "homepage":"https://github.com/ultralytics/ultralytics",
+    "compile_flage":"pulsar2 build  --target_hardware AX620E --input yolo11n.onnx --output_dir output --config yolo11n_config.json",
+    "pulsar_version":"3.4-983bb35e",
     "capabilities":[
         "Detection"
     ],
diff --git a/projects/llm_framework/main_yolo/src/EngineWrapper.cpp b/projects/llm_framework/main_yolo/src/EngineWrapper.cpp
index 035e259a..84085db0 100644
--- a/projects/llm_framework/main_yolo/src/EngineWrapper.cpp
+++ b/projects/llm_framework/main_yolo/src/EngineWrapper.cpp
@@ -13,7 +13,15 @@
 #include "utils/io.hpp"
 #include <cstdlib>
 
-static const char* strAlgoModelType[AX_ENGINE_VIRTUAL_NPU_BUTT] = {"1.6T", "3.2T"};
+#include <global_config.h>
+
+#if defined(CONFIG_AX_650N_MSP_ENABLED)
+static const char* strAlgoModelType[AX_ENGINE_MODEL_TYPE_BUTT] = {"3.6T", "7.2T", "18T"};
+#endif
+
+#if defined(CONFIG_AX_620E_MSP_ENABLED) || defined(CONFIG_AX_620Q_MSP_ENABLED)
+static const char* strAlgoModelType[AX_ENGINE_MODEL_TYPE_BUTT] = {"HalfOCM", "FullOCM"};
+#endif
 
 /// @brief npu type
 typedef enum axNPU_TYPE_E {
@@ -25,6 +33,7 @@ typedef enum axNPU_TYPE_E {
     AX_BL_VNPU_2   = (1 << 4)  /* running under BIG-LITTLE VNPU2 */
 } AX_NPU_TYPE_E;
 
+#if defined(CHIP_AX650)
 static AX_S32 CheckModelVNpu(const std::string& strModel, const AX_ENGINE_MODEL_TYPE_T& eModelType,
                              const AX_S32& nNpuType, AX_U32& nNpuSet)
 {
@@ -36,33 +45,42 @@ static AX_S32 CheckModelVNpu(const std::string& strModel, const AX_ENGINE_MODEL_
         // VNPU DISABLE
         if (stNpuAttr.eHardMode == AX_ENGINE_VIRTUAL_NPU_DISABLE) {
             nNpuSet = 0x01;  // NON-VNPU (0b111)
+            // printf("%s will run under VNPU-DISABLE [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
         }
         // STD VNPU
-        else if (stNpuAttr.eHardMode == AX_ENGINE_VIRTUAL_NPU_BUTT) {
+        else if (stNpuAttr.eHardMode == AX_ENGINE_VIRTUAL_NPU_STD) {
             // 7.2T & 10.8T no allow
-            if (eModelType == AX_ENGINE_MODEL_TYPE1 || eModelType == AX_ENGINE_MODEL_TYPE1) {
+            if (eModelType == AX_ENGINE_MODEL_TYPE1 || eModelType == AX_ENGINE_MODEL_TYPE2) {
+                // printf("%s model type%d: [%s], no allow run under STD VNPU\n", strModel.c_str(), eModelType,
+                // strAlgoModelType[eModelType]);
                 return -1;
             }
 
             // default STD VNPU2
             if (nNpuType == 0) {
                 nNpuSet = 0x02;  // VNPU2 (0b010)
+                // printf("%s will run under default STD-VNPU2 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
             } else {
                 if (nNpuType & AX_STD_VNPU_1) {
                     nNpuSet |= 0x01;  // VNPU1 (0b001)
+                    // printf("%s will run under STD-VNPU1 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
                 }
                 if (nNpuType & AX_STD_VNPU_2) {
                     nNpuSet |= 0x02;  // VNPU2 (0b010)
+                    // printf("%s will run under STD-VNPU2 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
                 }
                 if (nNpuType & AX_STD_VNPU_3) {
                     nNpuSet |= 0x04;  // VNPU3 (0b100)
+                    // printf("%s will run under STD-VNPU3 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
                 }
             }
         }
         // BL VNPU
-        else if (stNpuAttr.eHardMode == AX_ENGINE_VIRTUAL_NPU_BUTT) {
+        else if (stNpuAttr.eHardMode == AX_ENGINE_VIRTUAL_NPU_BIG_LITTLE) {
             // 10.8T no allow
-            if (eModelType == AX_ENGINE_MODEL_TYPE1) {
+            if (eModelType == AX_ENGINE_MODEL_TYPE2) {
+                // printf("%s model type%d: [%s], no allow run under BL VNPU\n", strModel.c_str(), eModelType,
+                // strAlgoModelType[eModelType]);
                 return -1;
             }
 
@@ -71,29 +89,38 @@ static AX_S32 CheckModelVNpu(const std::string& strModel, const AX_ENGINE_MODEL_
                 // 7.2T default BL VNPU1
                 if (eModelType == AX_ENGINE_MODEL_TYPE1) {
                     nNpuSet = 0x01;  // VNPU1 (0b001)
+                    // printf("%s will run under default BL-VNPU1 [%s]\n", strModel.c_str(),
+                    // strAlgoModelType[eModelType]);
                 }
                 // 3.6T default BL VNPU2
                 else {
                     nNpuSet = 0x02;  // VNPU2 (0b010)
+                    // printf("%s will run under default BL-VNPU2 [%s]\n", strModel.c_str(),
+                    // strAlgoModelType[eModelType]);
                 }
             } else {
                 // 7.2T
                 if (eModelType == AX_ENGINE_MODEL_TYPE1) {
                     // no allow set to BL VNPU2
                     if (nNpuType & AX_BL_VNPU_2) {
+                        // printf("%s model type%d: [%s], no allow run under BL VNPU2\n", strModel.c_str(), eModelType,
+                        // strAlgoModelType[eModelType]);
                         return -1;
                     }
                     if (nNpuType & AX_BL_VNPU_1) {
                         nNpuSet |= 0x01;  // VNPU1 (0b001)
+                        // printf("%s will run under BL-VNPU1 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
                     }
                 }
                 // 3.6T
                 else {
                     if (nNpuType & AX_BL_VNPU_1) {
                         nNpuSet |= 0x01;  // VNPU1 (0b001)
+                        // printf("%s will run under BL-VNPU1 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
                     }
                     if (nNpuType & AX_BL_VNPU_2) {
                         nNpuSet |= 0x02;  // VNPU2 (0b010)
+                        // printf("%s will run under BL-VNPU2 [%s]\n", strModel.c_str(), strAlgoModelType[eModelType]);
                     }
                 }
             }
@@ -104,13 +131,60 @@ static AX_S32 CheckModelVNpu(const std::string& strModel, const AX_ENGINE_MODEL_
 
     return ret;
 }
+#endif
+
+#if defined(CONFIG_AX_620E_MSP_ENABLED) || defined(CONFIG_AX_620Q_MSP_ENABLED)
+static AX_S32 CheckModelVNpu(const std::string& strModel, const AX_ENGINE_MODEL_TYPE_T& eModelType,
+                             const AX_S32& nNpuType, AX_U32& nNpuSet)
+{
+    AX_ENGINE_NPU_ATTR_T stNpuAttr;
+    memset(&stNpuAttr, 0x00, sizeof(stNpuAttr));
+
+    auto ret = AX_ENGINE_GetVNPUAttr(&stNpuAttr);
+    if (ret == 0) {
+        // VNPU DISABLE
+        if (stNpuAttr.eHardMode == AX_ENGINE_VIRTUAL_NPU_DISABLE) {
+            nNpuSet = 0x01;  // NON-VNPU (0b111)
+            // ALOGN("%s will run under VNPU-DISABLE [%s]", strModel.c_str(), strAlgoModelType[eModelType]);
+        }
+        // STD VNPU
+        else if (stNpuAttr.eHardMode == AX_ENGINE_VIRTUAL_NPU_ENABLE) {
+            // full ocm model was no allowned
+            if (eModelType == AX_ENGINE_MODEL_TYPE1) {
+                // printf("%s model type%d: [%s], no allow run under STD VNPU", strModel.c_str(), eModelType,
+                // strAlgoModelType[eModelType]);
+                return -1;
+            }
+
+            // default STD VNPU2
+            if (nNpuType == 0) {
+                nNpuSet = 0x02;  // VNPU2 (0b010)
+                // printf("%s will run under default STD-VNPU2 [%s]", strModel.c_str(), strAlgoModelType[eModelType]);
+            } else {
+                if (nNpuType & AX_STD_VNPU_1) {
+                    nNpuSet |= 0x01;  // VNPU1 (0b001)
+                    // printf("%s will run under STD-VNPU1 [%s]", strModel.c_str(), strAlgoModelType[eModelType]);
+                }
+                if (nNpuType & AX_STD_VNPU_2) {
+                    nNpuSet |= 0x02;  // VNPU2 (0b010)
+                    // printf("%s will run under STD-VNPU2 [%s]", strModel.c_str(), strAlgoModelType[eModelType]);
+                }
+            }
+        }
+    } else {
+        printf("AX_ENGINE_GetVNPUAttr fail ret = %x", ret);
+    }
+
+    return ret;
+}
+#endif
 
 int EngineWrapper::Init(const char* strModelPath, uint32_t nNpuType)
 {
     AX_S32 ret = 0;
 
     // 1. load model
-    AX_BOOL bLoadModelUseCmm     = AX_FALSE;
+    AX_BOOL bLoadModelUseCmm     = AX_TRUE;
     AX_CHAR* pModelBufferVirAddr = nullptr;
     AX_U64 u64ModelBufferPhyAddr = 0;
     AX_U32 nModelBufferSize      = 0;
@@ -220,7 +294,7 @@ int EngineWrapper::Init(const char* strModelPath, uint32_t nNpuType)
 
     // 6. prepare io
     // AX_U32 nIoDepth = (stCtx.vecOutputBufferFlag.size() == 0) ? 1 : stCtx.vecOutputBufferFlag.size();
-    ret = utils::prepare_io(strModelPath, m_io_info, m_io, utils::IO_BUFFER_STRATEGY_DEFAULT);
+    ret = utils::prepare_io(strModelPath, m_io_info, m_io, utils::IO_BUFFER_STRATEGY_CACHED);
     if (0 != ret) {
         printf("prepare io failed!\n");
         utils::free_io(m_io);
@@ -238,7 +312,7 @@ int EngineWrapper::SetInput(void* pInput, int index)
     return utils::push_io_input(pInput, index, m_io);
 }
 
-int EngineWrapper::RunSync()
+int EngineWrapper::Run()
 {
     if (!m_hasInit) return -1;
 
@@ -252,6 +326,37 @@ int EngineWrapper::RunSync()
     return 0;
 }
 
+int EngineWrapper::GetOutput(void* pOutput, int index)
+{
+    return utils::push_io_output(pOutput, index, m_io);
+}
+
+int EngineWrapper::GetInputSize(int index)
+{
+    return m_io.pInputs[index].nSize;
+}
+
+int EngineWrapper::GetOutputSize(int index)
+{
+    return m_io.pOutputs[index].nSize;
+}
+
+void* EngineWrapper::GetOutputPtr(int index)
+{
+    utils::cache_io_flush(&m_io.pOutputs[index]);
+    return m_io.pOutputs[index].pVirAddr;
+}
+
+int EngineWrapper::Release()
+{
+    if (m_handle) {
+        utils::free_io(m_io);
+        AX_ENGINE_DestroyHandle(m_handle);
+        m_handle = nullptr;
+    }
+    return 0;
+}
+
 const char* CLASS_NAMES[] = {
     "person",         "bicycle",    "car",           "motorcycle",    "airplane",     "bus",           "train",
     "truck",          "boat",       "traffic light", "fire hydrant",  "stop sign",    "parking meter", "bench",
@@ -375,29 +480,4 @@ int EngineWrapper::Post_Process(cv::Mat& mat, int& input_w, int& input_h, int& c
     post_process(m_io_info, &m_io, mat, input_w, input_h, cls_num, point_num, pron_threshold, nms_threshold, objects,
                  model_type);
     return 0;
-}
-
-int EngineWrapper::GetOutput(void* pOutput, int index)
-{
-    return utils::push_io_output(pOutput, index, m_io);
-}
-
-int EngineWrapper::GetInputSize(int index)
-{
-    return m_io.pInputs[index].nSize;
-}
-
-int EngineWrapper::GetOutputSize(int index)
-{
-    return m_io.pOutputs[index].nSize;
-}
-
-int EngineWrapper::Release()
-{
-    if (m_handle) {
-        utils::free_io(m_io);
-        AX_ENGINE_DestroyHandle(m_handle);
-        m_handle = nullptr;
-    }
-    return 0;
-}
+}
\ No newline at end of file
diff --git a/projects/llm_framework/main_yolo/src/EngineWrapper.hpp b/projects/llm_framework/main_yolo/src/EngineWrapper.hpp
index 1a5b9e0c..5d42a07e 100644
--- a/projects/llm_framework/main_yolo/src/EngineWrapper.hpp
+++ b/projects/llm_framework/main_yolo/src/EngineWrapper.hpp
@@ -47,22 +47,24 @@ class EngineWrapper {
 
     int SetInput(void* pInput, int index);
 
-    int RunSync();
-
-    int Post_Process(cv::Mat& mat, int& input_w, int& input_, int& cls_num, int& point_num, float& pron_threshold,
-                     float& nms_threshold, std::vector<detection::Object>& objects, std::string& model_type);
+    int Run();
 
     int GetOutput(void* pOutput, int index);
 
     int GetInputSize(int index);
     int GetOutputSize(int index);
 
+    void* GetOutputPtr(int index);
+
     int Release();
 
+    int Post_Process(cv::Mat& mat, int& input_w, int& input_, int& cls_num, int& point_num, float& pron_threshold,
+                     float& nms_threshold, std::vector<detection::Object>& objects, std::string& model_type);
+
 protected:
     bool m_hasInit;
     AX_ENGINE_HANDLE m_handle;
     AX_ENGINE_IO_INFO_T* m_io_info{};
     AX_ENGINE_IO_T m_io{};
     int m_input_num{}, m_output_num{};
-};
+};
\ No newline at end of file
diff --git a/projects/llm_framework/main_yolo/src/main.cpp b/projects/llm_framework/main_yolo/src/main.cpp
index 9e30f5f6..208a3757 100644
--- a/projects/llm_framework/main_yolo/src/main.cpp
+++ b/projects/llm_framework/main_yolo/src/main.cpp
@@ -111,6 +111,7 @@ class llm_task {
                     SLOGW("config file :%s miss", file_name.c_str());
                     continue;
                 }
+                SLOGI("config file :%s read", file_name.c_str());
                 config_file >> file_body;
                 config_file.close();
                 break;
@@ -227,7 +228,7 @@ class llm_task {
             common::get_input_data_letterbox(src, image, mode_config_.img_h, mode_config_.img_w, bgr2rgb);
             cv::Mat img_mat(mode_config_.img_h, mode_config_.img_w, CV_8UC3, image.data());
             yolo_->SetInput((void *)image.data(), 0);
-            if (0 != yolo_->RunSync()) {
+            if (0 != yolo_->Run()) {
                 SLOGE("Run yolo model failed!\n");
                 throw std::string("yolo_ RunSync error");
             }
@@ -339,6 +340,7 @@ class llm_task {
     ~llm_task()
     {
         stop();
+        if (yolo_) yolo_->Release();
         _ax_deinit();
     }
 };
@@ -502,8 +504,8 @@ class llm_yolo : public StackFlow {
                         std::weak_ptr<llm_task> _llm_task_obj       = llm_task_obj;
                         std::weak_ptr<llm_channel_obj> _llm_channel = llm_channel;
                         llm_channel->subscriber(
-                            input_url, [this, _llm_task_obj, _llm_channel](pzmq *_pzmq, const std::string &raw) {
-                                this->task_camera_data(_llm_task_obj, _llm_channel, raw);
+                            input_url, [this, _llm_task_obj, _llm_channel](pzmq *_pzmq, const std::shared_ptr<pzmq_data> &raw) {
+                                this->task_camera_data(_llm_task_obj, _llm_channel, raw->string());
                             });
                     }
                 }
@@ -549,8 +551,8 @@ class llm_yolo : public StackFlow {
                 std::weak_ptr<llm_task> _llm_task_obj       = llm_task_obj;
                 std::weak_ptr<llm_channel_obj> _llm_channel = llm_channel;
                 llm_channel->subscriber(input_url,
-                                        [this, _llm_task_obj, _llm_channel](pzmq *_pzmq, const std::string &raw) {
-                                            this->task_camera_data(_llm_task_obj, _llm_channel, raw);
+                                        [this, _llm_task_obj, _llm_channel](pzmq *_pzmq, const std::shared_ptr<pzmq_data> &raw) {
+                                            this->task_camera_data(_llm_task_obj, _llm_channel, raw->string());
                                         });
             }
             llm_task_obj->inputs_.push_back(data);
diff --git a/projects/llm_framework/tools/llm_pack.py b/projects/llm_framework/tools/llm_pack.py
index 257db7ff..9fd6c580 100755
--- a/projects/llm_framework/tools/llm_pack.py
+++ b/projects/llm_framework/tools/llm_pack.py
@@ -9,6 +9,7 @@
 import concurrent.futures
 import json
 import glob
+from datetime import datetime
 '''
 {package_name}_{version}-{revision}_{architecture}.deb
 lib-llm_1.0-m5stack1_arm64.deb
@@ -96,6 +97,7 @@ def create_lib_deb(package_name, version, src_folder, revision = 'm5stack1'):
         f.write(f'Section: llm-module\n')
         f.write(f'Priority: optional\n')
         f.write(f'Homepage: https://www.m5stack.com\n')
+        f.write(f'Packaged-Date: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}\n')
         f.write(f'Description: llm-module\n')
         f.write(f' bsp.\n')
     with open(os.path.join(deb_folder, 'DEBIAN/postinst'),'w') as f:
@@ -148,7 +150,7 @@ def create_lib_deb(package_name, version, src_folder, revision = 'm5stack1'):
     shutil.rmtree(deb_folder)
     return package_name + " creat success!"
 
-def create_data_deb(package_name, version, src_folder, revision = 'm5stack1'):
+def create_data_deb(package_name, version, src_folder, revision = 'm5stack1', depends = 'lib-llm (>= 1.6)'):
     deb_file = f"{package_name}_{version}-{revision}_arm64.deb"
     deb_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'debian-{}'.format(package_name))
     if os.path.exists(deb_folder):
@@ -201,12 +203,13 @@ def create_data_deb(package_name, version, src_folder, revision = 'm5stack1'):
         f.write(f'Original-Maintainer: m5stack <m5stack@m5stack.com>\n')
         f.write(f'Section: llm-module\n')
         f.write(f'Priority: optional\n')
-        f.write(f'Depends: lib-llm (>= 1.6)\n')
+        f.write(f'Depends: {depends}\n')
         f.write(f'Homepage: https://www.m5stack.com\n')
         if deb_file.startswith('llm-model-'):
             deb_name = deb_file[:deb_file.find('_')]
             old_deb_name = deb_name.replace('model-','').lower()
-            f.write(f'Conflicts: {old_deb_name}\n')        
+            f.write(f'Conflicts: {old_deb_name}\n')   
+        f.write(f'Packaged-Date: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}\n')     
         f.write(f'Description: llm-module\n')
         f.write(f' bsp.\n')
     with open(os.path.join(deb_folder, 'DEBIAN/postinst'),'w') as f:
@@ -222,7 +225,17 @@ def create_data_deb(package_name, version, src_folder, revision = 'm5stack1'):
     shutil.rmtree(deb_folder)
     return package_name + " creat success!"
 
-def create_bin_deb(package_name, version, src_folder, revision = 'm5stack1'):
+def create_bin_deb(package_name, version, src_folder, revision = 'm5stack1', depends = 'lib-llm (>= 1.7)'):
+    bin_files = glob.glob(os.path.join(src_folder, package_name.replace("-", "_") + "-*"))
+    version_info = 0.0
+    print(os.path.join(src_folder, package_name + "-*"))
+    if bin_files:
+        for bin_file in bin_files:
+            version_info = float(bin_file.split('-')[-1])
+            if float(bin_file.split('-')[-1]) > version_info:
+                version_info = float(bin_file.split('-')[-1])
+        version = str(version_info)
+
     deb_file = f"{package_name}_{version}-{revision}_arm64.deb"
     deb_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'debian-{}'.format(package_name))
     # os.makedirs(deb_folder, exist_ok=True)
@@ -238,7 +251,23 @@ def create_bin_deb(package_name, version, src_folder, revision = 'm5stack1'):
         openai_api_dir = os.path.join(src_folder, 'openai-api')
         if os.path.exists(openai_api_dir):
             shutil.copytree(openai_api_dir, os.path.join(deb_folder, 'opt/m5stack/lib/openai-api'))
-    shutil.copy2(os.path.join(src_folder, package_name.replace("-", "_")), os.path.join(deb_folder, 'opt/m5stack/bin', package_name.replace("-", "_")))
+    if package_name == 'llm-kws':
+        sherpa_dir = os.path.join(src_folder, 'sherpa-onnx')
+        if os.path.exists(sherpa_dir):
+            shutil.copytree(sherpa_dir, os.path.join(deb_folder, 'opt/m5stack/lib/sherpa-onnx'))
+    if package_name == 'llm-llm':
+        llm_dir = os.path.join(src_folder, 'llm')
+        if os.path.exists(llm_dir):
+            shutil.copytree(llm_dir, os.path.join(deb_folder, 'opt/m5stack/lib/llm'))
+    if package_name == 'llm-vlm':
+        vlm_dir = os.path.join(src_folder, 'vlm')
+        if os.path.exists(vlm_dir):
+            shutil.copytree(vlm_dir, os.path.join(deb_folder, 'opt/m5stack/lib/vlm'))
+    
+    bin_file_name = package_name.replace("-", "_")
+    if version_info != 0.0:
+        bin_file_name = package_name.replace("-", "_") + f'-{version}'
+    shutil.copy2(os.path.join(src_folder, bin_file_name), os.path.join(deb_folder, 'opt/m5stack/bin', bin_file_name))
     ext_scripts_files = glob.glob(os.path.join(src_folder, package_name + "_*"))
     if ext_scripts_files:
         os.makedirs(os.path.join(deb_folder, 'opt/m5stack/scripts'), exist_ok = True)
@@ -253,8 +282,10 @@ def create_bin_deb(package_name, version, src_folder, revision = 'm5stack1'):
         f.write(f'Original-Maintainer: m5stack <m5stack@m5stack.com>\n')
         f.write(f'Section: llm-module\n')
         f.write(f'Priority: optional\n')
-        f.write(f'Depends: lib-llm\n')
+        # f.write(f'Depends: lib-llm\n')
+        f.write(f'Depends: {depends}\n')
         f.write(f'Homepage: https://www.m5stack.com\n')
+        f.write(f'Packaged-Date: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}\n')
         f.write(f'Description: llm-module\n')
         f.write(f' bsp.\n')
     with open(os.path.join(deb_folder, 'DEBIAN/postinst'),'w') as f:
@@ -276,7 +307,7 @@ def create_bin_deb(package_name, version, src_folder, revision = 'm5stack1'):
             f.write(f'Requires=llm-sys.service\n')
         f.write(f'\n')
         f.write(f'[Service]\n')
-        f.write(f'ExecStart=/opt/m5stack/bin/{package_name.replace("-", "_")}\n')
+        f.write(f'ExecStart=/opt/m5stack/bin/{bin_file_name}\n')
         f.write(f'WorkingDirectory=/opt/m5stack\n')
         f.write(f'Restart=always\n')
         f.write(f'RestartSec=1\n')
@@ -320,82 +351,44 @@ def create_bin_deb(package_name, version, src_folder, revision = 'm5stack1'):
     else:
         cpu_count = cpu_count - 2
     # cpu_count = 50
-#################################################注意################################################
-#################################################注意################################################
-#################################################注意################################################
-#################################################注意################################################
-# 添加新模型版本号从 0.1 版本号开始累加
-# 当单元和前单元不兼容时提升大版本号
-# 当模型和前模型不兼容时提升大版本号
-# 加速单元和模型单元的大版本号保持一致，以有的更新暂不改变，从2025年 04月 03日开始
-# Start adding new model version numbers from the 0.1 version number.
-# Increment the major version number when units and previous units are incompatible
-# Increment the major version number when models and previous models are incompatible
-# Keep the major version numbers of acceleration units and model units consistent, with some updates not changing them, starting from April 3, 2025.
-#################################################注意################################################
-#################################################注意################################################
-#################################################注意################################################
-#################################################注意################################################
-#################################################注意################################################
-# 添加新模型版本号从 0.1 版本号开始累加
-# 当单元和前单元不兼容时提升大版本号
-# 当模型和前模型不兼容时提升大版本号
-# 加速单元和模型单元的大版本号保持一致，以有的更新暂不改变，从2025年 04月 03日开始
-# Start adding new model version numbers from the 0.1 version number.
-# Increment the major version number when units and previous units are incompatible
-# Increment the major version number when models and previous models are incompatible
-# Keep the major version numbers of acceleration units and model units consistent, with some updates not changing them, starting from April 3, 2025.
-#################################################注意################################################
-#################################################注意################################################
-#################################################注意################################################
-#################################################注意################################################
-# 添加新模型版本号从 0.1 版本号开始累加
-# 当单元和前单元不兼容时提升大版本号
-# 当模型和前模型不兼容时提升大版本号
-# 加速单元和模型单元的大版本号保持一致，以有的更新暂不改变，从2025年 04月 03日开始
-# Start adding new model version numbers from the 0.1 version number.
-# Increment the major version number when units and previous units are incompatible
-# Increment the major version number when models and previous models are incompatible
-# Keep the major version numbers of acceleration units and model units consistent, with some updates not changing them, starting from April 3, 2025.
-#################################################注意################################################
-#################################################注意################################################
-#################################################注意################################################
-#################################################注意################################################
-#################################################注意################################################
     Tasks = {
-        'lib-llm':[create_lib_deb,'lib-llm', 1.6, src_folder, revision],
-        'llm-sys':[create_bin_deb,'llm-sys', version, src_folder, revision],
-        'llm-audio':[create_bin_deb,'llm-audio', version, src_folder, revision],
-        'llm-kws':[create_bin_deb,'llm-kws', version, src_folder, revision],
-        'llm-asr':[create_bin_deb,'llm-asr', version, src_folder, revision],
-        'llm-llm':[create_bin_deb,'llm-llm', '1.6', src_folder, revision],
-        'llm-tts':[create_bin_deb,'llm-tts', version, src_folder, revision],
-        'llm-melotts':[create_bin_deb,'llm-melotts', version, src_folder, revision],
-        'llm-camera':[create_bin_deb,'llm-camera', '1.6', src_folder, revision],
-        'llm-vlm':[create_bin_deb,'llm-vlm', version, src_folder, revision],
-        'llm-yolo':[create_bin_deb,'llm-yolo', '1.6', src_folder, revision],
+        'lib-llm':[create_lib_deb,'lib-llm', '1.7', src_folder, revision],
+        'llm-sys':[create_bin_deb,'llm-sys', '1.6', src_folder, revision],
+        'llm-audio':[create_bin_deb,'llm-audio', '1.6', src_folder, revision],
+        'llm-kws':[create_bin_deb,'llm-kws', '1.7', src_folder, revision],
+        'llm-asr':[create_bin_deb,'llm-asr', '1.6', src_folder, revision],
+        'llm-llm':[create_bin_deb,'llm-llm', '1.8', src_folder, revision],
+        'llm-tts':[create_bin_deb,'llm-tts', '1.6', src_folder, revision],
+        'llm-melotts':[create_bin_deb,'llm-melotts', '1.7', src_folder, revision],
+        'llm-camera':[create_bin_deb,'llm-camera', '1.8', src_folder, revision, 'lib-llm'],
+        'llm-vlm':[create_bin_deb,'llm-vlm', '1.7', src_folder, revision],
+        'llm-yolo':[create_bin_deb,'llm-yolo', '1.8', src_folder, revision],
         'llm-skel':[create_bin_deb,'llm-skel', version, src_folder, revision],
-        'llm-depth-anything':[create_bin_deb,'llm-depth-anything', version, src_folder, revision],
-        'llm-vad':[create_bin_deb,'llm-vad', version, src_folder, revision],
-        'llm-whisper':[create_bin_deb,'llm-whisper', version, src_folder, revision],
-        'llm-openai-api':[create_bin_deb,'llm-openai-api', version, src_folder, revision],
+        'llm-depth-anything':[create_bin_deb,'llm-depth-anything', '1.6', src_folder, revision],
+        'llm-vad':[create_bin_deb,'llm-vad', '1.6', src_folder, revision],
+        'llm-whisper':[create_bin_deb,'llm-whisper', '1.7', src_folder, revision],
+        'llm-openai-api':[create_bin_deb,'llm-openai-api', '1.7', src_folder, revision],
         'llm-model-audio-en-us':[create_data_deb,'llm-model-audio-en-us', data_version, src_folder, revision],
         'llm-model-audio-zh-cn':[create_data_deb,'llm-model-audio-zh-cn', data_version, src_folder, revision],
         'llm-model-sherpa-ncnn-streaming-zipformer-20M-2023-02-17':[create_data_deb,'llm-model-sherpa-ncnn-streaming-zipformer-20M-2023-02-17', data_version, src_folder, revision],
         'llm-model-sherpa-ncnn-streaming-zipformer-zh-14M-2023-02-23':[create_data_deb,'llm-model-sherpa-ncnn-streaming-zipformer-zh-14M-2023-02-23', data_version, src_folder, revision],
         'llm-model-sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01':[create_data_deb,'llm-model-sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01', '0.3', src_folder, revision],
         'llm-model-sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01':[create_data_deb,'llm-model-sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01', '0.3', src_folder, revision],
-        'llm-model-single-speaker-english-fast':[create_data_deb,'llm-model-single-speaker-english-fast', data_version, src_folder, revision],
-        'llm-model-single-speaker-fast':[create_data_deb,'llm-model-single-speaker-fast', data_version, src_folder, revision],
-        'llm-model-melotts-zh-cn':[create_data_deb,'llm-model-melotts-zh-cn', '0.4', src_folder, revision],
+        'llm-model-single-speaker-english-fast':[create_data_deb,'llm-model-single-speaker-english-fast', '0.3', src_folder, revision],
+        'llm-model-single-speaker-fast':[create_data_deb,'llm-model-single-speaker-fast', '0.3', src_folder, revision],
+        'llm-model-melotts-zh-cn':[create_data_deb,'llm-model-melotts-zh-cn', '0.5', src_folder, revision],
+        'llm-model-melotts-en-us':[create_data_deb,'llm-model-melotts-en-us', '0.5', src_folder, revision],
+        'llm-model-melotts-en-default':[create_data_deb,'llm-model-melotts-en-default', '0.5', src_folder, revision],
+        'llm-model-melotts-ja-jp':[create_data_deb,'llm-model-melotts-ja-jp', '0.5', src_folder, revision],
         'llm-model-yolo11n':[create_data_deb,'llm-model-yolo11n', data_version, src_folder, revision],
         'llm-model-yolo11n-pose':[create_data_deb,'llm-model-yolo11n-pose', '0.3', src_folder, revision],
         'llm-model-yolo11n-hand-pose':[create_data_deb,'llm-model-yolo11n-hand-pose', '0.3', src_folder, revision],
-        'llm-model-yolo11n-seg':[create_data_deb,'llm-model-yolo11n-seg', data_version, src_folder, revision],
-        'llm-model-depth-anything-ax630c':[create_data_deb,'llm-model-depth-anything-ax630c', '0.3', src_folder, revision],
-        'llm-model-whisper-tiny':[create_data_deb,'llm-model-whisper-tiny', '0.3', src_folder, revision],
-        'llm-model-whisper-base':[create_data_deb,'llm-model-whisper-base', '0.3', src_folder, revision],
-        'llm-model-silero-vad':[create_data_deb,'llm-model-silero-vad', '0.3', src_folder, revision],
+        'llm-model-yolo11n-seg':[create_data_deb,'llm-model-yolo11n-seg', '0.3', src_folder, revision],
+        'llm-model-depth-anything-ax630c':[create_data_deb,'llm-model-depth-anything-ax630c', '0.4', src_folder, revision],
+        'llm-model-whisper-tiny':[create_data_deb,'llm-model-whisper-tiny', '0.4', src_folder, revision],
+        'llm-model-whisper-base':[create_data_deb,'llm-model-whisper-base', '0.4', src_folder, revision],
+        'llm-model-whisper-small':[create_data_deb,'llm-model-whisper-small', '0.4', src_folder, revision],
+        'llm-model-silero-vad':[create_data_deb,'llm-model-silero-vad', '0.4', src_folder, revision],
         'llm-model-qwen2.5-0.5B-prefill-20e':[create_data_deb,'llm-model-qwen2.5-0.5B-prefill-20e', data_version, src_folder, revision],
         'llm-model-qwen2.5-0.5B-p256-ax630c':[create_data_deb,'llm-model-qwen2.5-0.5B-p256-ax630c', '0.4', src_folder, revision],
         'llm-model-qwen2.5-0.5B-Int4-ax630c':[create_data_deb,'llm-model-qwen2.5-0.5B-Int4-ax630c', '0.4', src_folder, revision],
@@ -403,12 +396,16 @@ def create_bin_deb(package_name, version, src_folder, revision = 'm5stack1'):
         'llm-model-qwen2.5-1.5B-p256-ax630c':[create_data_deb,'llm-model-qwen2.5-1.5B-p256-ax630c', '0.4', src_folder, revision],
         'llm-model-qwen2.5-1.5B-Int4-ax630c':[create_data_deb,'llm-model-qwen2.5-1.5B-Int4-ax630c', '0.4', src_folder, revision],
         'llm-model-qwen2.5-coder-0.5B-ax630c':[create_data_deb,'llm-model-qwen2.5-coder-0.5B-ax630c', data_version, src_folder, revision],
+        'llm-model-qwen3-0.6B-ax630c':[create_data_deb,'llm-model-qwen3-0.6B-ax630c', '0.4', src_folder, revision],
         'llm-model-llama3.2-1B-prefill-ax630c':[create_data_deb,'llm-model-llama3.2-1B-prefill-ax630c', data_version, src_folder, revision],
         'llm-model-llama3.2-1B-p256-ax630c':[create_data_deb,'llm-model-llama3.2-1B-p256-ax630c', '0.4', src_folder, revision],
         'llm-model-openbuddy-llama3.2-1B-ax630c':[create_data_deb,'llm-model-openbuddy-llama3.2-1B-ax630c', data_version, src_folder, revision],
         'llm-model-internvl2.5-1B-ax630c':[create_data_deb,'llm-model-internvl2.5-1B-ax630c', '0.4', src_folder, revision],
+        'llm-model-internvl2.5-1B-364-ax630c':[create_data_deb,'llm-model-internvl2.5-1B-364-ax630c', '0.4', src_folder, revision],
         'llm-model-deepseek-r1-1.5B-ax630c':[create_data_deb,'llm-model-deepseek-r1-1.5B-ax630c', '0.3', src_folder, revision],
         'llm-model-deepseek-r1-1.5B-p256-ax630c':[create_data_deb,'llm-model-deepseek-r1-1.5B-p256-ax630c', '0.4', src_folder, revision],
+        'llm-model-smolvlm-256M-ax630c':[create_data_deb,'llm-model-smolvlm-256M-ax630c', '0.4', src_folder, revision],
+        'llm-model-smolvlm-500M-ax630c':[create_data_deb,'llm-model-smolvlm-500M-ax630c', '0.4', src_folder, revision],
         # 'llm-model-qwen2-0.5B-prefill-20e':[create_data_deb,'llm-model-qwen2-0.5B-prefill-20e', data_version, src_folder, revision],
         # 'llm-model-qwen2-1.5B-prefill-20e':[create_data_deb,'llm-model-qwen2-1.5B-prefill-20e', data_version, src_folder, revision]
     }
diff --git a/projects/llm_framework/tools/test_tools/test-melo.py b/projects/llm_framework/tools/test_tools/test-melo.py
new file mode 100644
index 00000000..b257f2fc
--- /dev/null
+++ b/projects/llm_framework/tools/test_tools/test-melo.py
@@ -0,0 +1,242 @@
+import socket
+import json
+import argparse
+import uuid
+import time
+
+def create_tcp_connection(host, port):
+    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    sock.connect((host, port))
+    return sock
+
+def send_json(sock, data):
+    json_data = json.dumps(data, ensure_ascii=False) + '\n'
+    print(f"Sending: {json_data}")
+    sock.sendall(json_data.encode('utf-8'))
+
+def receive_response(sock, timeout=None):
+    """接收响应，带可选的超时设置"""
+    old_timeout = sock.gettimeout()
+    try:
+        if timeout is not None:
+            sock.settimeout(timeout)
+        response = ''
+        while True:
+            part = sock.recv(4096).decode('utf-8')
+            if not part:  # 连接已关闭
+                return response.strip()
+            response += part
+            if '\n' in response:
+                break
+        return response.strip()
+    except socket.timeout:
+        return None
+    finally:
+        sock.settimeout(old_timeout)
+
+def close_connection(sock):
+    if sock:
+        sock.close()
+
+def create_melotts_setup_data(request_id="melotts_setup"):
+    return {
+        "request_id": request_id,
+        "work_id": "melotts",
+        "action": "setup",
+        "object": "melotts.setup",
+        "data": {
+            "model": "melotts_zh-cn",
+            "response_format": "sys.pcm",
+            "input": "tts.utf-8",
+            "enoutput": False
+        }
+    }
+
+def list_available_tasks(sock):
+    """获取可用的任务列表"""
+    request_id = str(uuid.uuid4())
+    send_json(sock, {
+        "request_id": request_id,
+        "work_id": "melotts",
+        "action": "taskinfo"
+    })
+    
+    response = receive_response(sock)
+    if not response:
+        return {"error": "No response received"}
+    try:
+        return json.loads(response)
+    except:
+        return {"error": "Failed to parse response"}
+
+def parse_setup_response(response_data, sent_request_id):
+    error = response_data.get('error')
+    request_id = response_data.get('request_id')
+
+    if request_id != sent_request_id:
+        print(f"Request ID mismatch: sent {sent_request_id}, received {request_id}")
+        return None
+    if error and error.get('code') != 0:
+        print(f"Error Code: {error['code']}, Message: {error['message']}")
+        return None
+    return response_data.get('work_id')
+
+def setup(sock, setup_data):
+    sent_request_id = setup_data['request_id']
+    send_json(sock, setup_data)
+    response = receive_response(sock)
+    if not response:
+        print("No response received during setup")
+        return None
+    try:
+        response_data = json.loads(response)
+        return parse_setup_response(response_data, sent_request_id)
+    except json.JSONDecodeError:
+        print(f"Invalid JSON response: {response}")
+        return None
+
+def melotts_tts_inference(sock, melotts_work_id, text, use_stream=False):
+    request_id = str(uuid.uuid4())
+    
+    # 根据文档，选择流式或非流式请求格式
+    if use_stream:
+        send_json(sock, {
+            "request_id": request_id,
+            "work_id": melotts_work_id,
+            "action": "inference",
+            "object": "melotts.utf-8.stream",
+            "data": {
+                "delta": text,
+                "index": 0,
+                "finish": True
+            }
+        })
+    else:
+        # 非流式请求
+        send_json(sock, {
+            "request_id": request_id,
+            "work_id": melotts_work_id,
+            "action": "inference",
+            "object": "melotts.utf-8",
+            "data": text
+        })
+
+    # 关键更改：不等待响应或设置更长的超时时间
+    # 由于使用sys.pcm格式，音频会直接播放，可能不会立即返回响应
+    print("语音合成请求已发送，正在播放...")
+    
+    # 可选：设置一个较短的超时来检查是否有响应，但不要因为没响应就认为失败
+    response = receive_response(sock, timeout=0.5)  # 设置短超时，只是尝试看有没有响应
+    if response:
+        try:
+            response_data = json.loads(response)
+            error = response_data.get('error')
+            if error and error.get('code') != 0:
+                print(f"收到错误响应: Code={error['code']}, Message={error['message']}")
+                return False
+            print("收到成功响应")
+        except:
+            print(f"收到非JSON响应: {response[:100]}...")
+    else:
+        # 不收到响应也视为成功，因为服务器可能正忙于播放音频
+        print("未收到响应，但这不一定表示失败(服务器可能正忙于处理音频)")
+    
+    # 这里给TTS处理一些时间
+    # 根据文本长度估计播放时间
+    estimated_time = len(text) * 0.1  # 假设每个字符需要0.1秒
+    estimated_time = max(1.0, min(estimated_time, 10.0))  # 至少1秒，最多10秒
+    print(f"等待大约 {estimated_time:.1f} 秒让音频播放完...")
+    time.sleep(estimated_time)
+    
+    return True
+
+def exit_session(sock, melotts_work_id):
+    send_json(sock, {
+        "request_id": "melotts_exit",
+        "work_id": melotts_work_id,
+        "action": "exit"
+    })
+    response = receive_response(sock, timeout=2.0)
+    if not response:
+        print("退出命令已发送，但未收到响应")
+        return True  # 假设成功
+    try:
+        response_data = json.loads(response)
+        print("Exit Response:", response_data)
+        return response_data.get('error', {}).get('code', -1) == 0
+    except:
+        print("Failed to parse exit response")
+        return False
+
+def get_task_info(sock, work_id):
+    """获取任务的详细信息"""
+    request_id = str(uuid.uuid4())
+    send_json(sock, {
+        "request_id": request_id,
+        "work_id": work_id,
+        "action": "taskinfo"
+    })
+    
+    response = receive_response(sock)
+    if not response:
+        return {"error": "No response received"}
+    try:
+        return json.loads(response)
+    except:
+        return {"error": "Failed to parse response"}
+
+def main(host, port):
+    sock = create_tcp_connection(host, port)
+    try:
+        print("Setting up MeloTTS...")
+        setup_data = create_melotts_setup_data()
+        melotts_work_id = setup(sock, setup_data)
+        
+        if not melotts_work_id:
+            print("Setup failed. Checking available tasks...")
+            task_list = list_available_tasks(sock)
+            print("Available tasks:", task_list)
+            return
+            
+        print(f"MeloTTS SETUP finished, work_id: {melotts_work_id}")
+        
+        # 获取并显示任务详细信息
+        task_info = get_task_info(sock, melotts_work_id)
+        print("Task info:", task_info)
+
+        # 选择流式或非流式模式
+        use_stream = input("是否使用流式输入? (y/n, 默认n): ").lower() == 'y'
+        
+        while True:
+            text = input("请输入你要合成语音的中文文本（输入exit退出）：")
+            if text.lower() == 'exit':
+                break
+                
+            print("正在合成语音...", flush=True)
+            success = melotts_tts_inference(sock, melotts_work_id, text, use_stream)
+            
+            if success:
+                print("语音合成处理完成")
+            else:
+                print("语音合成处理失败")
+                
+            # 每次请求间隔
+            time.sleep(1)
+
+        # 退出会话
+        if exit_session(sock, melotts_work_id):
+            print("成功退出会话")
+        else:
+            print("退出会话可能有问题")
+            
+    except Exception as e:
+        print(f"程序异常: {e}")
+    finally:
+        close_connection(sock)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='TCP Client for MeloTTS Unit.')
+    parser.add_argument('--host', type=str, default='localhost', help='Server hostname (default: localhost)')
+    parser.add_argument('--port', type=int, default=10001, help='Server port (default: 10001)')
+    args = parser.parse_args()
+    main(args.host, args.port)
diff --git a/projects/llm_framework/tools/test_tools/test-tts.py b/projects/llm_framework/tools/test_tools/test-tts.py
new file mode 100644
index 00000000..7368140c
--- /dev/null
+++ b/projects/llm_framework/tools/test_tools/test-tts.py
@@ -0,0 +1,412 @@
+import socket
+import json
+import argparse
+import uuid
+import time
+import sys
+
+def create_tcp_connection(host, port):
+    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    sock.connect((host, port))
+    return sock
+
+def send_json(sock, data):
+    json_data = json.dumps(data, ensure_ascii=False) + '\n'
+    print(f"Sending: {json_data}")
+    sock.sendall(json_data.encode('utf-8'))
+
+def receive_response(sock, timeout=None):
+    """接收响应，带可选的超时设置"""
+    old_timeout = sock.gettimeout()
+    try:
+        if timeout is not None:
+            sock.settimeout(timeout)
+        response = ''
+        while True:
+            part = sock.recv(4096).decode('utf-8')
+            if not part:  # 连接已关闭
+                return response.strip()
+            response += part
+            if '\n' in response:
+                break
+        return response.strip()
+    except socket.timeout:
+        return None
+    finally:
+        sock.settimeout(old_timeout)
+
+def close_connection(sock):
+    if sock:
+        sock.close()
+
+def create_tts_setup_data(request_id=None, link_with=None):
+    if request_id is None:
+        request_id = str(uuid.uuid4())
+    
+    # 基本设置
+    data = {
+        "model": "single_speaker_fast",
+        "response_format": "sys.pcm",
+        "input": "tts.utf-8",
+        "enoutput": False
+    }
+    
+    # 如果需要链接其他单元
+    if link_with:
+        if isinstance(link_with, list):
+            inputs = ["tts.utf-8"] + link_with
+            data["input"] = inputs
+        else:
+            inputs = ["tts.utf-8", link_with]
+            data["input"] = inputs
+    
+    return {
+        "request_id": request_id,
+        "work_id": "tts",
+        "action": "setup",
+        "object": "tts.setup",
+        "data": data
+    }
+
+def list_available_tasks(sock, work_id="tts"):
+    """获取可用的任务列表"""
+    request_id = str(uuid.uuid4())
+    send_json(sock, {
+        "request_id": request_id,
+        "work_id": work_id,
+        "action": "taskinfo"
+    })
+    
+    response = receive_response(sock)
+    if not response:
+        return {"error": "No response received"}
+    try:
+        return json.loads(response)
+    except:
+        return {"error": "Failed to parse response"}
+
+def parse_setup_response(response_data, sent_request_id):
+    error = response_data.get('error')
+    request_id = response_data.get('request_id')
+
+    if request_id != sent_request_id:
+        print(f"Request ID mismatch: sent {sent_request_id}, received {request_id}")
+        return None
+    if error and error.get('code') != 0:
+        print(f"Error Code: {error['code']}, Message: {error['message']}")
+        return None
+    return response_data.get('work_id')
+
+def setup(sock, setup_data):
+    sent_request_id = setup_data['request_id']
+    send_json(sock, setup_data)
+    response = receive_response(sock)
+    if not response:
+        print("No response received during setup")
+        return None
+    try:
+        response_data = json.loads(response)
+        return parse_setup_response(response_data, sent_request_id)
+    except json.JSONDecodeError:
+        print(f"Invalid JSON response: {response}")
+        return None
+
+def link_units(sock, tts_work_id, target_work_id):
+    """链接TTS单元与其他单元"""
+    request_id = str(uuid.uuid4())
+    send_json(sock, {
+        "request_id": request_id,
+        "work_id": tts_work_id,
+        "action": "link",
+        "object": "work_id",
+        "data": target_work_id
+    })
+    
+    response = receive_response(sock)
+    if not response:
+        print("No response received for link request")
+        return False
+    
+    try:
+        response_data = json.loads(response)
+        error = response_data.get('error', {})
+        if error.get('code') == 0:
+            print(f"成功链接 {tts_work_id} 与 {target_work_id}")
+            return True
+        else:
+            print(f"链接失败: {error.get('message', '未知错误')}")
+            return False
+    except:
+        print(f"Failed to parse link response: {response}")
+        return False
+
+def unlink_units(sock, tts_work_id, target_work_id):
+    """取消TTS单元与其他单元的链接"""
+    request_id = str(uuid.uuid4())
+    send_json(sock, {
+        "request_id": request_id,
+        "work_id": tts_work_id,
+        "action": "unlink",
+        "object": "work_id",
+        "data": target_work_id
+    })
+    
+    response = receive_response(sock)
+    if not response:
+        print("No response received for unlink request")
+        return False
+    
+    try:
+        response_data = json.loads(response)
+        error = response_data.get('error', {})
+        if error.get('code') == 0:
+            print(f"成功取消链接 {tts_work_id} 与 {target_work_id}")
+            return True
+        else:
+            print(f"取消链接失败: {error.get('message', '未知错误')}")
+            return False
+    except:
+        print(f"Failed to parse unlink response: {response}")
+        return False
+
+def pause_unit(sock, tts_work_id):
+    """暂停TTS单元工作"""
+    request_id = str(uuid.uuid4())
+    send_json(sock, {
+        "request_id": request_id,
+        "work_id": tts_work_id,
+        "action": "pause"
+    })
+    
+    response = receive_response(sock)
+    if not response:
+        print("No response received for pause request")
+        return False
+    
+    try:
+        response_data = json.loads(response)
+        error = response_data.get('error', {})
+        if error.get('code') == 0:
+            print(f"成功暂停 {tts_work_id}")
+            return True
+        else:
+            print(f"暂停失败: {error.get('message', '未知错误')}")
+            return False
+    except:
+        print(f"Failed to parse pause response: {response}")
+        return False
+
+def resume_unit(sock, tts_work_id):
+    """恢复TTS单元工作"""
+    request_id = str(uuid.uuid4())
+    send_json(sock, {
+        "request_id": request_id,
+        "work_id": tts_work_id,
+        "action": "work"
+    })
+    
+    response = receive_response(sock)
+    if not response:
+        print("No response received for resume request")
+        return False
+    
+    try:
+        response_data = json.loads(response)
+        error = response_data.get('error', {})
+        if error.get('code') == 0:
+            print(f"成功恢复 {tts_work_id}")
+            return True
+        else:
+            print(f"恢复失败: {error.get('message', '未知错误')}")
+            return False
+    except:
+        print(f"Failed to parse resume response: {response}")
+        return False
+
+def tts_inference(sock, tts_work_id, text):
+    request_id = str(uuid.uuid4())
+    
+    # 非流式请求
+    send_json(sock, {
+        "request_id": request_id,
+        "work_id": tts_work_id,
+        "action": "inference",
+        "object": "tts.utf-8",
+        "data": text
+    })
+
+    print("语音合成请求已发送，正在播放...")
+    
+    # 可选：设置一个较短的超时来检查是否有响应，但不要因为没响应就认为失败
+    response = receive_response(sock, timeout=0.5)  # 设置短超时，只是尝试看有没有响应
+    if response:
+        try:
+            response_data = json.loads(response)
+            error = response_data.get('error')
+            if error and error.get('code') != 0:
+                print(f"收到错误响应: Code={error['code']}, Message={error['message']}")
+                return False
+            print("收到成功响应")
+        except:
+            print(f"收到非JSON响应: {response[:100]}...")
+    else:
+        # 不收到响应也视为成功，因为服务器可能正忙于播放音频
+        print("未收到响应，但这不一定表示失败(服务器可能正忙于处理音频)")
+    
+    # 这里给TTS处理一些时间
+    # 根据文本长度估计播放时间
+    estimated_time = len(text) * 0.1  # 假设每个字符需要0.1秒
+    estimated_time = max(1.0, min(estimated_time, 10.0))  # 至少1秒，最多10秒
+    print(f"等待大约 {estimated_time:.1f} 秒让音频播放完...")
+    time.sleep(estimated_time)
+    
+    return True
+
+def exit_session(sock, tts_work_id):
+    request_id = str(uuid.uuid4())
+    send_json(sock, {
+        "request_id": request_id,
+        "work_id": tts_work_id,
+        "action": "exit"
+    })
+    response = receive_response(sock, timeout=2.0)
+    if not response:
+        print("退出命令已发送，但未收到响应")
+        return True  # 假设成功
+    try:
+        response_data = json.loads(response)
+        error = response_data.get('error', {})
+        if error.get('code') == 0:
+            print(f"成功退出 {tts_work_id}")
+            return True
+        else:
+            print(f"退出失败: {error.get('message', '未知错误')}")
+            return False
+    except:
+        print("Failed to parse exit response")
+        return False
+
+def get_task_info(sock, work_id):
+    """获取任务的详细信息"""
+    request_id = str(uuid.uuid4())
+    send_json(sock, {
+        "request_id": request_id,
+        "work_id": work_id,
+        "action": "taskinfo"
+    })
+    
+    response = receive_response(sock)
+    if not response:
+        return {"error": "No response received"}
+    try:
+        return json.loads(response)
+    except:
+        return {"error": "Failed to parse response"}
+
+def print_menu():
+    print("\n===== TTS控制菜单 =====")
+    print("1. 合成语音")
+    print("2. 链接到其他单元")
+    print("3. 取消链接")
+    print("4. 暂停TTS单元")
+    print("5. 恢复TTS单元")
+    print("6. 获取任务信息")
+    print("7. 退出TTS单元")
+    print("0. 退出程序")
+    print("======================")
+
+def main(host, port):
+    sock = create_tcp_connection(host, port)
+    try:
+        print("Setting up TTS...")
+        setup_data = create_tts_setup_data()
+        tts_work_id = setup(sock, setup_data)
+        
+        if not tts_work_id:
+            print("Setup failed. Checking available tasks...")
+            task_list = list_available_tasks(sock)
+            print("Available tasks:", task_list)
+            if task_list.get('data') and isinstance(task_list.get('data'), list) and len(task_list.get('data')) > 0:
+                tts_work_id = task_list.get('data')[0]
+                print(f"使用已存在的TTS任务: {tts_work_id}")
+            else:
+                print("找不到可用的TTS任务，程序退出")
+                return
+        
+        print(f"TTS SETUP finished, work_id: {tts_work_id}")
+        
+        # 获取并显示任务详细信息
+        task_info = get_task_info(sock, tts_work_id)
+        print("Task info:", task_info)
+
+        while True:
+            print_menu()
+            choice = input("请选择操作 (0-7): ")
+            
+            if choice == '0':
+                print("程序退出")
+                break
+                
+            elif choice == '1':
+                text = input("请输入要合成语音的文本: ")
+                if text:
+                    print("正在合成语音...", flush=True)
+                    success = tts_inference(sock, tts_work_id, text)
+                    if success:
+                        print("语音合成处理完成")
+                    else:
+                        print("语音合成处理失败")
+                else:
+                    print("文本为空，取消合成")
+                
+            elif choice == '2':
+                target_id = input("请输入要链接的单元ID (例如 kws.1000): ")
+                if target_id:
+                    link_units(sock, tts_work_id, target_id)
+                else:
+                    print("单元ID为空，取消链接操作")
+                    
+            elif choice == '3':
+                target_id = input("请输入要取消链接的单元ID (例如 kws.1000): ")
+                if target_id:
+                    unlink_units(sock, tts_work_id, target_id)
+                else:
+                    print("单元ID为空，取消操作")
+                    
+            elif choice == '4':
+                pause_unit(sock, tts_work_id)
+                
+            elif choice == '5':
+                resume_unit(sock, tts_work_id)
+                
+            elif choice == '6':
+                task_info = get_task_info(sock, tts_work_id)
+                print("Task info:", json.dumps(task_info, indent=2, ensure_ascii=False))
+                
+            elif choice == '7':
+                if exit_session(sock, tts_work_id):
+                    print("TTS单元已退出")
+                    # 重新检查可用任务
+                    task_list = list_available_tasks(sock)
+                    print("Available tasks:", task_list)
+                else:
+                    print("TTS单元退出失败")
+                
+            else:
+                print("无效的选择，请重试")
+                
+            # 每次操作间隔
+            time.sleep(0.5)
+
+    except KeyboardInterrupt:
+        print("\n程序被用户中断")
+    except Exception as e:
+        print(f"程序异常: {e}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='TCP Client for MeloTTS Unit.')
+    parser.add_argument('--host', type=str, default='localhost', help='Server hostname (default: localhost)')
+    parser.add_argument('--port', type=int, default=10001, help='Server port (default: 10001)')
+    args = parser.parse_args()
+    main(args.host, args.port)
\ No newline at end of file
diff --git a/projects/llm_framework/tools/test_tools/test.py b/projects/llm_framework/tools/test_tools/test.py
new file mode 100644
index 00000000..9ea712bf
--- /dev/null
+++ b/projects/llm_framework/tools/test_tools/test.py
@@ -0,0 +1,145 @@
+import socket
+import json
+import argparse
+
+
+def create_tcp_connection(host, port):
+    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    sock.connect((host, port))
+    return sock
+
+
+def send_json(sock, data):
+    json_data = json.dumps(data, ensure_ascii=False) + '\n'
+    sock.sendall(json_data.encode('utf-8'))
+
+
+def receive_response(sock):
+    response = ''
+    while True:
+        part = sock.recv(4096).decode('utf-8')
+        response += part
+        if '\n' in response:
+            break
+    return response.strip()
+
+
+def close_connection(sock):
+    if sock:
+        sock.close()
+
+
+def create_init_data():
+    return {
+        "request_id": "llm_001",
+        "work_id": "llm",
+        "action": "setup",
+        "object": "llm.setup",
+        "data": {
+            "model": "qwen2.5-0.5B-prefill-20e",
+            "response_format": "llm.utf-8.stream",
+            "input": "llm.utf-8.stream",
+            "enoutput": True,
+            "max_token_len": 1023,
+            "prompt": "You are a knowledgeable assistant capable of answering various questions and providing information."
+        }
+    }
+
+
+def parse_setup_response(response_data, sent_request_id):
+    error = response_data.get('error')
+    request_id = response_data.get('request_id')
+
+    if request_id != sent_request_id:
+        print(f"Request ID mismatch: sent {sent_request_id}, received {request_id}")
+        return None
+
+    if error and error.get('code') != 0:
+        print(f"Error Code: {error['code']}, Message: {error['message']}")
+        return None
+
+    return response_data.get('work_id')
+
+
+def setup(sock, init_data):
+    sent_request_id = init_data['request_id']
+    send_json(sock, init_data)
+    response = receive_response(sock)
+    response_data = json.loads(response)
+    return parse_setup_response(response_data, sent_request_id)
+
+
+def exit_session(sock, deinit_data):
+    send_json(sock, deinit_data)
+    response = receive_response(sock)
+    response_data = json.loads(response)
+    print("Exit Response:", response_data)
+
+
+def parse_inference_response(response_data):
+    error = response_data.get('error')
+    if error and error.get('code') != 0:
+        print(f"Error Code: {error['code']}, Message: {error['message']}")
+        return None
+
+    return response_data.get('data')
+
+
+def main(host, port):
+    sock = create_tcp_connection(host, port)
+
+    try:
+        print("Setup LLM...")
+        init_data = create_init_data()
+        llm_work_id = setup(sock, init_data)
+        print("Setup LLM finished.")
+
+        while True:
+            user_input = input("Enter your message (or 'exit' to quit): ")
+            if user_input.lower() == 'exit':
+                break
+
+            send_json(sock, {
+                "request_id": "llm_001",
+                "work_id": llm_work_id,
+                "action": "inference",
+                "object": "llm.utf-8.stream",
+                "data": {
+                    "delta": user_input,
+                    "index": 0,
+                    "finish": True
+                }
+            })
+
+            while True:
+                response = receive_response(sock)
+                response_data = json.loads(response)
+
+                data = parse_inference_response(response_data)
+                if data is None:
+                    break
+
+                delta = data.get('delta')
+                finish = data.get('finish')
+                print(delta, end='', flush=True)
+
+                if finish:
+                    print()
+                    break
+
+        exit_session(sock, {
+            "request_id": "llm_exit",
+            "work_id": llm_work_id,
+            "action": "exit"
+        })
+    finally:
+        close_connection(sock)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='TCP Client to send JSON data.')
+    parser.add_argument('--host', type=str, default='localhost', help='Server hostname (default: localhost)')
+    parser.add_argument('--port', type=int, default=10001, help='Server port (default: 10001)')
+
+    args = parser.parse_args()
+    main(args.host, args.port)