m5stack · dianjixz · May 9, 2025 · Apr 3, 2025 · Apr 3, 2025 · Apr 7, 2025
diff --git a/.clang-format b/.clang-format
@@ -163,5 +163,4 @@ StatementMacros:
   - QT_REQUIRE_VERSION
 TabWidth:        4
 UseCRLF:         false
-UseTab:          Never
-...
+UseTab:          Never
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -0,0 +1,18 @@
+name: Benchmark Test
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - dev
+jobs:
+  build:
+    runs-on: [self-hosted, linux, arm64]
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Start Benchmark Test
+        run: |
+          echo "This job runs on a self-hosted runner!"
+          echo "Running benchmark test..."
+          python3 benchmark/benchmodulellm.py
diff --git a/benchmark/README.md b/benchmark/README.md
@@ -0,0 +1,10 @@
+benchmodulellm can be used to test llm unit inference performance
+
+Only the llm unit definition files (model json) are required.
+
+If no model specified, it would benchmark default list. More model networks may be added later.
+
+Usage
+```shell
+python benchmodulellm.py --host 192.168.20.100 --port 10001 --test-items default.yaml
+```
diff --git a/benchmark/RESULTS.md b/benchmark/RESULTS.md
@@ -0,0 +1,39 @@
+# Results
+
+## ModuleLLM (AX630C)
+
+### LLM
+| model                           | ttft (ms)  | avg-token/s | model version | llm version |
+|---------------------------------|------------|-------------|---------------|-------------|
+| qwen2.5-0.5B-prefill-20e        | 359.8      | 10.32       | v0.2          | v1.8        |
+| qwen2.5-0.5B-p256-ax630c        | 1126.19    | 10.30       | v0.4          | v1.8        |
+| qwen2.5-0.5B-Int4-ax630c        | 442.95     | 12.52       | v0.4          | v1.8        |
+| qwen2.5-coder-0.5B-ax630c       | 361.81     | 10.28       | v0.2          | v1.8        |
+| qwen2.5-1.5B-ax630c             | 1029.41    | 3.59        | v0.3          | v1.8        |
+| qwen2.5-1.5B-p256-ax630c        | 3056.54    | 3.57        | v0.4          | v1.8        |
+| qwen2.5-1.5B-Int4-ax630c        | 1219.54    | 4.63        | v0.4          | v1.8        |
+| deepseek-r1-1.5B-ax630c         | 1075.04    | 3.57        | v0.3          | v1.8        |
+| deepseek-r1-1.5B-p256-ax630c    | 3056.86    | 3.57        | v0.4          | v1.8        |
+| llama3.2-1B-prefill-ax630c      | 891.00     | 4.48        | v0.2          | v1.8        |
+| llama3.2-1B-p256-ax630c         | 2601.11    | 4.49        | v0.4          | v1.8        |
+| openbuddy-llama3.2-1B-ax630c    | 891.02     | 4.52        | v0.2          | v1.8        |
+
+`The input text used by the llm test is "hello!“`
+
+### VLM
+| model                           | ttft (ms)  | avg-token/s | image encode (ms) | model version | vlm version |
+|---------------------------------|------------|-------------|-------------------|---------------|-------------|
+| internvl2.5-1B-364-ax630c       | 1117.27    | 10.56       | 1164.61           | v0.4          | v1.7        |
+| smolvlm-256M-ax630c             | 185.75     | 30.16       | 799.11            | v0.4          | v1.7        |
+| smolvlm-500M-ax630c             | 365.69     | 13.14       | 838.30            | v0.4          | v1.7        |
+
+`The image encoding test uses a jpg image with a size of 810*1080`
+
+### STT
+| model              | encode (ms) | avg-decode (ms) | model version | whisper version |
+|--------------------|-------------|-----------------|---------------|-----------------|
+| whisper-tiny       | 248.0       | 32.54           | v0.4          | v1.7            |
+| whisper-base       | 660.31      | 51.11           | v0.4          | v1.7            |
+| whisper-small      | 1606.08     | 148.92          | v0.4          | v1.7            |
+
+`The STT test uses a 30-second wav English audio`
diff --git a/benchmark/benchmodulellm.py b/benchmark/benchmodulellm.py
@@ -0,0 +1,126 @@
+import argparse
+import os
+import sys
+
+import yaml
+import logging
+
+from pathlib import Path
+
+from utils import LLMClient
+
+FILE = Path(__file__).resolve()
+ROOT = FILE.parents[0]
+if str(ROOT) not in sys.path:
+    sys.path.append(str(ROOT))
+ROOT = Path(os.path.relpath(ROOT, Path.cwd()))
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+
+def parse_opt(known=False):
+    """
+    Parse command-line options.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--host", type=str, default="127.0.0.1", help="ModuleLLM IP Address")
+    parser.add_argument("--port", type=int, default=10001, help="ModuleLLM TCP Port")
+    parser.add_argument("--test-items", type=str, default=ROOT / "default.yaml", help="testitems.yaml path")
+
+    args = parser.parse_known_args()[0] if known else parser.parse_args()
+
+    return args
+
+def read_yaml(file_path):
+    """
+    Read a YAML file and return its content.
+    """
+    if not os.path.exists(file_path):
+        logging.error(f"YAML file '{file_path}' does not exist.")
+        sys.exit(1)
+
+    try:
+        with open(file_path, "r") as file:
+            data = yaml.safe_load(file)
+            if data is None:
+                logging.warning(f"YAML file '{file_path}' is empty.")
+                return {}
+
+            logging.info(f"YAML file '{file_path}' read successfully.")
+
+            if "items" in data:
+                return data["items"]
+            else:
+                logging.warning(f"'items' not found in YAML file.")
+                return []
+    except Exception as e:
+        logging.error(f"Failed to read YAML file '{file_path}': {e}")
+        sys.exit(1)
+
+def write_yaml(file_path, data):
+    """
+    Write data to a YAML file.
+    """
+    try:
+        with open(file_path, "w") as file:
+            yaml.safe_dump(data, file)
+            logging.info(f"YAML file '{file_path}' written successfully.")
+    except Exception as e:
+        logging.error(f"Failed to write YAML file '{file_path}': {e}")
+        sys.exit(1)
+
+def categorize_and_deduplicate(items):
+    """
+    Categorize items by 'type' and remove duplicate 'model_name'.
+    """
+    categorized = {}
+    for item in items:
+        item_type = item.get("type")
+        model_name = item.get("model_name")
+        if not item_type or not model_name:
+            continue
+
+        if item_type not in categorized:
+            categorized[item_type] = set()
+
+        categorized[item_type].add(model_name)
+
+    # Convert sets back to lists for easier usage
+    return {key: list(value) for key, value in categorized.items()}
+
+def main(opt):
+    items = read_yaml(opt.test_items)
+    if not items:
+        logging.warning(f"No items found in YAML file '{opt.test_items}'.")
+        return
+
+    categorized_items = categorize_and_deduplicate(items)
+
+    logging.info("Categorized items:")
+    for item_type, models in categorized_items.items():
+        logging.info(f"Type: {item_type}, Models: {models}")
+
+        if item_type == "llm":
+            logging.info("Initializing LLMClient...")
+            llm_client = LLMClient(opt.host, opt.port)
+
+            for model_name in models:
+                logging.info(f"Testing model: {model_name}")
+                input_text = "Tell me an adventure story."
+                try:
+                    result = llm_client.test(model_name, input_text)
+                    logging.info(f"Test result for model '{model_name}': {result}")
+                except Exception as e:
+                    logging.error(f"Error testing model '{model_name}': {e}")
+
+            del llm_client
+            logging.info("LLMClient deleted successfully.")
+
+    return categorized_items
+
+if __name__ == "__main__":
+    opt = parse_opt()
+    main(opt)
diff --git a/benchmark/default.yaml b/benchmark/default.yaml
@@ -0,0 +1,31 @@
+items:
+- model_name: qwen2.5-0.5B-p256-ax630c
+  type: llm
+- model_name: internvl2.5-1B-364-ax630c
+  type: vlm
+- model_name: whisper-tiny
+  type: whisper
+- model_name: whisper-base
+  type: whisper
+- model_name: whisper-small
+  type: whisper
+- model_name: sherpa-ncnn-streaming-zipformer-20M-2023-02-17
+  type: asr
+- model_name: sherpa-ncnn-streaming-zipformer-zh-14M-2023-02-23
+  type: asr
+- model_name: sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01
+  type: kws
+- model_name: sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01
+  type: kws
+- model_name: melotts-zh-cn
+  type: melotts
+- model_name: single_speaker_english_fast
+  type: tts
+- model_name: single_speaker_fast
+  type: tts
+- model_name: yolo11n
+  type: yolo
+- model_name: yolo11n-seg
+  type: yolo
+- model_name: yolo11n-pose
+  type: yolo
diff --git a/benchmark/utils/__init__.py b/benchmark/utils/__init__.py
@@ -0,0 +1,3 @@
+from .llm import LLMClient
+
+__all__ = ["LLMClient"]
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from .llm import LLMClient

		__all__ = ["LLMClient"]