tenstorrent
diff --git a/‎.cursor/rules/coding-standards.mdc‎
Lines changed: 1 addition & 0 deletions b/‎.cursor/rules/coding-standards.mdc‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.cursor/rules/project-files.mdc‎
Lines changed: 1 addition & 1 deletion b/‎.cursor/rules/project-files.mdc‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.cursor/rules/testing.mdc‎
Lines changed: 1 addition & 1 deletion b/‎.cursor/rules/testing.mdc‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.cursorignore‎
Lines changed: 1 addition & 0 deletions b/‎.cursorignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/CODEOWNERS‎
Lines changed: 41 additions & 1 deletion b/‎.github/CODEOWNERS‎
Lines changed: 41 additions & 1 deletion
diff --git a/‎.github/ISSUE_TEMPLATE/model-readiness.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/ISSUE_TEMPLATE/model-readiness.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/test-gate.yml‎
Lines changed: 201 additions & 9 deletions b/‎.github/workflows/test-gate.yml‎
Lines changed: 201 additions & 9 deletions
diff --git a/‎.gitignore‎
Lines changed: 5 additions & 0 deletions b/‎.gitignore‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 3 additions & 3 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 3 additions & 3 deletions
@@ -11,3 +11,4 @@ alwaysApply: true
 - Minimize new dependencies that are not in the Python 3.8 standard library and not already used in the project.
 - Do not provide verbose explainations, writting code that correctly meets the requirements is more important, UNLESS there is a bug found in the code. If there is a bug it should be explained and then a fix to the bug proposed.
 - Use clear and professional names for all variables, functions, classes, and files.
+- Python string formatting uses F-string style (f"{}") not C printf style ("%s")
@@ -199,7 +199,7 @@
 │   ├── scripts
 │   │   └── download_sdxl_weights.py
 │   ├── security
-│   │   └── api_key_cheker.py
+│   │   └── api_key_checker.py
 │   ├── static
 │   │   ├── data
 │   │   │   ├── audio_test.json
 
@@ -5,7 +5,7 @@ globs:
 alwaysApply: true
 ---
 
-- source .venv/bin/activate to run pytest locally. IMPORTANT If that environment does not exist you must SKIP trying to run the tests. Do not try to install the python venv.
+- source .pre-commit/bin/activate to run pytest locally. IMPORTANT If that environment does not exist you must SKIP trying to run the tests. Do not try to install the python venv.
 - Ensure new functionality has test coverage that is meaningful and tests full components with minimal mocking only of external calls.
 - Make sure to run tests via `pytest` to check for breaking changes.
 - If you find a bug in code that is being tested, call it out, don't change tests to go around it.
@@ -15,3 +15,4 @@ dist/
 persistent_volume/
 workflow_logs/
 model_specs_output.json
+docs/model_support/**
@@ -2,5 +2,45 @@
 # the repo. Unless a later match takes precedence. This will be
 # requested for review when someone opens a pull request.
 *       @tenstorrent/tt-inference-server-codeowners
-workflows/run_reports.py @tenstorrent/tt-inference-server-codeowners @acvejicTT @mjeremicTT @mdobrosavljevicTT @vmaksimovicTT
+
+# tt-media-server
+tt-media-server/ @idjuricTT @ztorlakTT @fivanovicTT @ljovanovicTT @dmadicTT @knovokmetTT @vpetrovicTT
 tt-media-server/tt_model_runners/forge_runners/ @vmilosevic
+
+# tt-vllm-plugin
+tt-vllm-plugin/ @idjuricTT @ztorlakTT @fivanovicTT @ljovanovicTT @dmadicTT @bgoelTT @knovokmetTT @vpetrovicTT
+
+# vllm-tt-metal
+vllm-tt-metal-llama3/ @tstescoTT @bgoelTT @stisiTT
+
+# docker entrypoint
+docker-entrypoint.sh @tstescoTT @bgoelTT @idjuricTT @acvejicTT
+
+# workflows
+run.py @tstescoTT @bgoelTT @stisiTT @tenstorrent/tt-inference-server-codeowners
+workflows/ @tstescoTT @tenstorrent/tt-inference-server-codeowners
+workflows/model_spec.py @tstescoTT @bgoelTT @idjuricTT @tenstorrent/tt-inference-server-codeowners
+workflows/run_reports.py @acvejicTT @mjeremicTT @mdobrosavljevicTT @vmaksimovicTT @tenstorrent/tt-inference-server-codeowners
+
+# benchmarking
+benchmarking/ @tstescoTT @bgoelTT @stisiTT @gtobarTT @arobergeTT @ssanjayTT @tenstorrent/tt-inference-server-codeowners
+
+# evals
+evals/ @tstescoTT @bgoelTT @stisiTT @gtobarTT @arobergeTT @ssanjayTT @tenstorrent/tt-inference-server-codeowners
+
+# utils
+utils/ @tenstorrent/tt-inference-server-codeowners
+
+# tests
+tests/ @tenstorrent/tt-inference-server-codeowners
+
+# scripts
+scripts/* @tstescoTT @tenstorrent/tt-inference-server-codeowners
+scripts/release/* @vmaksimovicTT @tstescoTT @bgoelTT
+
+# github actions
+.github/workflows/ @tstescoTT @bgoelTT @idjuricTT @acvejicTT @vmaksimovicTT @mjeremicTT @tenstorrent/tt-inference-server-codeowners
+
+# docs
+docs/ @tenstorrent/tt-inference-server-codeowners
+README.md @tstescoTT @bgoelTT @idjuricTT
@@ -1,5 +1,5 @@
-name: Add a Model to Model Readiness
-description: Add support for a new model in the Model Readiness suite.
+name: Add Model Readiness Support for a New Model
+description: Add support for a new model in the Model Readiness test suite.
 title: "[Model Readiness Support]: "
 labels: ["model_readiness_support"]
 projects: ["tenstorrent/130"]
 
@@ -12,6 +12,7 @@ jobs:
     runs-on: ubuntu-latest
     outputs:
       should-test: ${{ steps.filter.outputs.code }}
+      cpp-server: ${{ steps.filter.outputs.cpp_server }}
     steps:
       - uses: actions/checkout@v4
       - uses: dorny/paths-filter@v3
@@ -23,6 +24,8 @@ jobs:
               - '**/requirements*.txt'
               - 'pyproject.toml'
               - '.github/workflows/*.yml'
+            cpp_server:
+              - 'tt-media-server/cpp_server/**'
 
   lint:
     needs: detect-changes
@@ -38,7 +41,7 @@ jobs:
           python-version: "3.10"
 
       - name: Install ruff
-        run: pip install ruff
+        run: pip install ruff==0.15.0
 
       - name: Run ruff linter
         run: ruff check .
@@ -434,13 +437,16 @@ jobs:
           pytest performance_tests/test_llm_streaming.py -sv 2>&1 | tee test_output.txt
           TEST_EXIT_CODE=${PIPESTATUS[0]}
 
-          # Parse CI report metrics
-          TOKENS=$(grep 'tokens_received=' test_output.txt | cut -d= -f2 || echo "N/A")
-          TOTAL_TIME=$(grep 'total_time_ms=' test_output.txt | cut -d= -f2 || echo "N/A")
-          MEAN_INTERVAL=$(grep 'mean_interval_ms=' test_output.txt | cut -d= -f2 || echo "N/A")
-          THROUGHPUT=$(grep 'throughput_tps=' test_output.txt | cut -d= -f2 || echo "N/A")
-          OVERHEAD=$(grep 'overhead_ms=' test_output.txt | cut -d= -f2 || echo "N/A")
-          THRESHOLD=$(grep 'threshold_ms=' test_output.txt | cut -d= -f2 || echo "N/A")
+          # Extract only the CI report section (between markers) to avoid matching source code
+          sed -n '/::CI_REPORT_START::/,/::CI_REPORT_END::/p' test_output.txt > ci_report.txt
+
+          # Parse CI report metrics from the extracted section
+          TOKENS=$(grep '^tokens_received=' ci_report.txt | cut -d= -f2 || echo "N/A")
+          TOTAL_TIME=$(grep '^total_time_ms=' ci_report.txt | cut -d= -f2 || echo "N/A")
+          MEAN_INTERVAL=$(grep '^mean_interval_ms=' ci_report.txt | cut -d= -f2 || echo "N/A")
+          THROUGHPUT=$(grep '^throughput_tps=' ci_report.txt | cut -d= -f2 || echo "N/A")
+          OVERHEAD=$(grep '^overhead_ms=' ci_report.txt | cut -d= -f2 || echo "N/A")
+          THRESHOLD=$(grep '^threshold_ms=' ci_report.txt | cut -d= -f2 || echo "N/A")
 
           # Determine status
           if [ $TEST_EXIT_CODE -eq 0 ]; then
@@ -463,8 +469,194 @@ jobs:
             echo "| **Overhead/Token** | ${OVERHEAD}ms (threshold: ${THRESHOLD}ms) |"
           } >> $GITHUB_STEP_SUMMARY
 
-          exit $TEST_EXIT_CODE
+          # Store exit code for later
+          echo "TEST_EXIT_CODE=$TEST_EXIT_CODE" >> $GITHUB_ENV
+
+      - name: Show server logs
+        if: always()
+        run: |
+          echo "=== Server Logs ==="
+          if [ -f performance_tests/server.log ]; then
+            cat performance_tests/server.log
+          else
+            echo "No server.log file found"
+          fi
+
+      - name: Upload server logs
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: llm-streaming-server-logs
+          path: tt-media-server/performance_tests/server.log
+          retention-days: 1
+          if-no-files-found: warn
+
+      - name: Check test result
+        run: exit ${{ env.TEST_EXIT_CODE }}
+
+  cpp-server-ttnn-build:
+    needs: detect-changes
+    if: ${{ needs.detect-changes.outputs.cpp-server == 'true' }}
+    name: C++ Server TTNN Build
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    env:
+      PYTHONPATH: ${{ github.workspace }}/tt-media-server
+    defaults:
+      run:
+        working-directory: tt-media-server
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Install C++ build dependencies
+        run: |
+          sudo apt-get update -qq
+          sudo apt-get install -y -qq cmake g++ pkg-config \
+            libjsoncpp-dev uuid-dev zlib1g-dev
+
+      - name: Clone Drogon for build
+        run: |
+          mkdir -p cpp_server/deps
+          git clone --depth 1 --branch v1.9.8 https://github.com/drogonframework/drogon.git cpp_server/deps/drogon
+          cd cpp_server/deps/drogon
+          git submodule update --init
+          cd ../../..
+
+      - name: Build C++ server
+        run: |
+          cd cpp_server
+          ./build.sh --ttnn
+
+  cpp-server-llm-streaming:
+    needs: detect-changes
+    if: ${{ needs.detect-changes.outputs.cpp-server == 'true' }}
+    name: C++ Server LLM Streaming Performance Test
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    env:
+      PYTHONPATH: ${{ github.workspace }}/tt-media-server
+    defaults:
+      run:
+        working-directory: tt-media-server
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Install C++ build dependencies
+        run: |
+          sudo apt-get update -qq
+          sudo apt-get install -y -qq cmake g++ pkg-config \
+            libjsoncpp-dev uuid-dev zlib1g-dev
+
+      - name: Clone Drogon for build
+        run: |
+          mkdir -p cpp_server/deps
+          git clone --depth 1 --branch v1.9.8 https://github.com/drogonframework/drogon.git cpp_server/deps/drogon
+          cd cpp_server/deps/drogon
+          git submodule update --init
+          cd ../../..
+
+      - name: Build C++ server
+        run: |
+          cd cpp_server
+          ./build.sh --test
+          cd ..
+
+      - name: Run C++ unit tests
+        run: |
+          cd cpp_server/build
+          ctest --output-on-failure
+          cd ../..
+
+      - name: Start C++ server (test runner mode)
+        run: |
+          cd cpp_server/build
+          export TT_RUNNER_TYPE=llm_test
+          export TEST_RUNNER_FREQUENCY_MS=1
+          ./tt_media_server_cpp -p 8000 > ../../cpp_server.log 2>&1 &
+          echo $! > ../../cpp_server.pid
+          cd ../..
+          for i in $(seq 1 30); do
+            curl -sf http://127.0.0.1:8000/health | grep -q '"status"' && break
+            sleep 1
+          done
+          curl -sf http://127.0.0.1:8000/health || exit 1
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+          cache: 'pip'
+
+      - name: Install Python test dependencies
+        run: |
+          pip install --upgrade pip
+          pip install pytest pytest-asyncio aiohttp requests
+
+      - name: Run LLM Streaming test against C++ server
+        env:
+          EXTERNAL_LLM_SERVER: "1"
+          SERVER_BASE_URL: "http://127.0.0.1:8000"
+          TEST_RUNNER_FREQUENCY_MS: "1"
+        run: |
+          pytest performance_tests/test_llm_streaming.py -sv 2>&1 | tee test_output.txt
+          TEST_EXIT_CODE=${PIPESTATUS[0]}
+
+          # Extract CI report and write job summary (same as llm-streaming-performance)
+          sed -n '/::CI_REPORT_START::/,/::CI_REPORT_END::/p' test_output.txt > ci_report.txt
+          TOKENS=$(grep '^tokens_received=' ci_report.txt | cut -d= -f2 || echo "N/A")
+          TOTAL_TIME=$(grep '^total_time_ms=' ci_report.txt | cut -d= -f2 || echo "N/A")
+          MEAN_INTERVAL=$(grep '^mean_interval_ms=' ci_report.txt | cut -d= -f2 || echo "N/A")
+          THROUGHPUT=$(grep '^throughput_tps=' ci_report.txt | cut -d= -f2 || echo "N/A")
+          OVERHEAD=$(grep '^overhead_ms=' ci_report.txt | cut -d= -f2 || echo "N/A")
+          THRESHOLD=$(grep '^threshold_ms=' ci_report.txt | cut -d= -f2 || echo "N/A")
+          if [ $TEST_EXIT_CODE -eq 0 ]; then STATUS="✅ PASSED"; else STATUS="❌ FAILED"; fi
+          {
+            echo "## 🚀 C++ Server LLM Streaming Test"
+            echo ""
+            echo "| Metric | Value |"
+            echo "|--------|-------|"
+            echo "| **Status** | $STATUS |"
+            echo "| **Tokens Received** | $TOKENS |"
+            echo "| **Total Time** | ${TOTAL_TIME}ms |"
+            echo "| **Mean Interval** | ${MEAN_INTERVAL}ms |"
+            echo "| **Throughput** | ${THROUGHPUT} tokens/s |"
+            echo "| **Overhead/Token** | ${OVERHEAD}ms (threshold: ${THRESHOLD}ms) |"
+          } >> $GITHUB_STEP_SUMMARY
+
+          echo "TEST_EXIT_CODE=$TEST_EXIT_CODE" >> $GITHUB_ENV
+
+      - name: Stop C++ server
+        if: always()
+        run: |
+          [ -f cpp_server.pid ] && kill $(cat cpp_server.pid) 2>/dev/null || true
+
+      - name: Show server logs
+        if: always()
+        run: |
+          echo "=== C++ Server Logs ==="
+          if [ -f cpp_server.log ]; then
+            cat cpp_server.log
+          else
+            echo "No cpp_server.log file found"
+          fi
+
+      - name: Upload server logs
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: cpp-server-llm-streaming-logs
+          path: tt-media-server/cpp_server.log
+          retention-days: 1
+          if-no-files-found: warn
 
+      - name: Check test result
+        run: exit ${{ env.TEST_EXIT_CODE }}
 
   forge-runner-changes:
     name: Detect Forge Runner Changes
 
@@ -47,6 +47,7 @@ db.sqlite3
 scripts/examples/example_data
 
 # unignore
+!**/CMakeLists.txt
 !requirements*.txt
 !workflows/model_performance_reference.json
 !tests/server_tests/test_config.json
@@ -64,3 +65,7 @@ tt-media-server/huggingface
 
 # ignore downloaded datsets
 tests/server_tests/datasets/*
+
+# ignore generated model support docs
+# use 'git add -f docs/model_support/**' to commit updates
+docs/model_support/**
@@ -4,21 +4,21 @@ repos:
     # Run the linter.
     - id: ruff
       name: ruff
-      entry: bash -c 'source .venv/bin/activate && ruff check "$@"' --
+      entry: bash -c 'source .pre-commit/bin/activate && ruff check "$@"' --
       language: system
       types: [python]
     # Run the formatter.
     - id: ruff-format
       name: ruff-format
-      entry: bash -c 'source .venv/bin/activate && ruff format "$@"' --
+      entry: bash -c 'source .pre-commit/bin/activate && ruff format "$@"' --
       language: system
       types: [python]
 
 - repo: local
   hooks:
     - id: pytest
       name: pytest
-      entry: bash -c 'source .venv/bin/activate && pytest'
+      entry: bash -c 'source .pre-commit/bin/activate && pytest'
       language: system
       pass_filenames: false
       always_run: true