vllm-project · autopear · Nov 7, 2025 · Jul 29, 2025 · Dec 18, 2025
diff --git a/.github/workflows/complete-testing.yml b/.github/workflows/complete-testing.yml
diff --git a/.github/workflows/nightly-performance.yml b/.github/workflows/nightly-performance.yml
@@ -0,0 +1,287 @@
+name: Nightly Performance Tests
+
+on:
+  schedule:
+    - cron: '0 3 * * *'  # Run at 3 AM UTC daily
+  workflow_dispatch:
+    inputs:
+      baseline_sha:
+        description: 'Git SHA to use as baseline for comparison'
+        required: false
+        default: ''
+
+env:
+  GO_VERSION: '1.23'
+  RESULTS_RETENTION_DAYS: 90
+
+jobs:
+  performance-benchmarks:
+    name: Run Performance Benchmarks
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        benchmark:
+          - name: "KVEventProcessingLatency"
+            test: "BenchmarkKVEventProcessingLatency"
+          - name: "KVEventThroughput"
+            test: "BenchmarkKVEventThroughput"
+          - name: "KVEventMemoryUsage"
+            test: "BenchmarkKVEventMemoryUsage"
+          - name: "KVEventConcurrency"
+            test: "BenchmarkKVEventConcurrency"
+          - name: "KVEventLargePrefix"
+            test: "BenchmarkKVEventLargePrefix"
+          - name: "KVEventBurstLoad"
+            test: "BenchmarkKVEventBurstLoad"
+          - name: "KVEventRoutingDecision"
+            test: "BenchmarkKVEventRoutingDecision"
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Go
+      uses: actions/setup-go@v5
+      with:
+        go-version: ${{ env.GO_VERSION }}
+
+    - name: Install dependencies
+      run: |
+        sudo apt-get update
+        sudo apt-get install -y libzmq3-dev pkg-config
+
+    - name: Run benchmark
+      run: |
+        cd test/benchmark
+        go test -bench=${{ matrix.benchmark.test }} -benchmem -benchtime=30s -count=5 -cpu=1,2,4,8 -tags="zmq" \
+          -timeout=30m | tee ${{ matrix.benchmark.name }}-results.txt
+
+    - name: Upload benchmark results
+      uses: actions/upload-artifact@v4
+      with:
+        name: benchmark-${{ matrix.benchmark.name }}-${{ github.sha }}
+        path: test/benchmark/${{ matrix.benchmark.name }}-results.txt
+        retention-days: ${{ env.RESULTS_RETENTION_DAYS }}
+
+  compare-results:
+    name: Compare with Baseline
+    runs-on: ubuntu-latest
+    needs: performance-benchmarks
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Set up Go
+      uses: actions/setup-go@v5
+      with:
+        go-version: ${{ env.GO_VERSION }}
+
+    - name: Install benchstat
+      run: go install golang.org/x/perf/cmd/benchstat@latest
+
+    - name: Download current results
+      uses: actions/download-artifact@v4
+      with:
+        pattern: benchmark-*-${{ github.sha }}
+        path: current-results
+
+    - name: Determine baseline SHA
+      id: baseline
+      run: |
+        if [ -n "${{ github.event.inputs.baseline_sha }}" ]; then
+          echo "sha=${{ github.event.inputs.baseline_sha }}" >> $GITHUB_OUTPUT
+        else
+          # Get SHA from 7 days ago
+          echo "sha=$(git rev-list -n 1 --before='7 days ago' HEAD)" >> $GITHUB_OUTPUT
+        fi
+
+    - name: Download baseline results
+      uses: dawidd6/action-download-artifact@v3
+      with:
+        workflow: nightly-performance.yml
+        commit: ${{ steps.baseline.outputs.sha }}
+        path: baseline-results
+      continue-on-error: true
+
+    - name: Compare results
+      run: |
+        mkdir -p comparison-reports
+
+        for bench in KVEventProcessingLatency KVEventThroughput KVEventMemoryUsage KVEventConcurrency KVEventLargePrefix KVEventBurstLoad KVEventRoutingDecision; do
+          if [ -f "baseline-results/benchmark-${bench}-${{ steps.baseline.outputs.sha }}/${bench}-results.txt" ] && \
+             [ -f "current-results/benchmark-${bench}-${{ github.sha }}/${bench}-results.txt" ]; then
+            benchstat \
+              "baseline-results/benchmark-${bench}-${{ steps.baseline.outputs.sha }}/${bench}-results.txt" \
+              "current-results/benchmark-${bench}-${{ github.sha }}/${bench}-results.txt" \
+              > "comparison-reports/${bench}-comparison.txt"
+
+            echo "## ${bench} Comparison" >> comparison-reports/summary.md
+            echo '```' >> comparison-reports/summary.md
+            cat "comparison-reports/${bench}-comparison.txt" >> comparison-reports/summary.md
+            echo '```' >> comparison-reports/summary.md
+            echo "" >> comparison-reports/summary.md
+          else
+            echo "## ${bench} Comparison" >> comparison-reports/summary.md
+            echo "No baseline data available for comparison" >> comparison-reports/summary.md
+            echo "" >> comparison-reports/summary.md
+          fi
+        done
+
+    - name: Check for regressions
+      id: regression-check
+      run: |
+        cd comparison-reports
+        regression_found=false
+
+        for file in *-comparison.txt; do
+          if [ -f "$file" ]; then
+            # Check for significant regression (>10% slower)
+            if grep -E "\\+[1-9][0-9]\\..*%" "$file" > /dev/null; then
+              regression_found=true
+              echo "Performance regression detected in $file"
+            fi
+          fi
+        done
+
+        echo "regression_found=$regression_found" >> $GITHUB_OUTPUT
+
+    - name: Upload comparison report
+      uses: actions/upload-artifact@v4
+      with:
+        name: performance-comparison-${{ github.sha }}
+        path: comparison-reports/
+        retention-days: ${{ env.RESULTS_RETENTION_DAYS }}
+
+  generate-dashboard:
+    name: Generate Performance Dashboard
+    runs-on: ubuntu-latest
+    needs: [performance-benchmarks, compare-results]
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Download all results
+      uses: actions/download-artifact@v4
+      with:
+        path: all-results
+
+    - name: Generate dashboard data
+      run: |
+        mkdir -p dashboard
+
+        # Create JSON data for visualization
+        cat > dashboard/performance-data.json << 'EOF'
+        {
+          "timestamp": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
+          "commit": "${{ github.sha }}",
+          "benchmarks": []
+        }
+        EOF
+
+        # Process benchmark results and add to JSON
+        # This is a placeholder - implement actual data processing
+
+    - name: Create dashboard HTML
+      run: |
+        cat > dashboard/index.html << 'EOF'
+        <!DOCTYPE html>
+        <html>
+        <head>
+            <title>KV Sync Performance Dashboard</title>
+            <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
+            <style>
+                body { font-family: Arial, sans-serif; margin: 20px; }
+                .chart-container { width: 80%; margin: 20px auto; }
+                h1, h2 { text-align: center; }
+                .metric { background: #f0f0f0; padding: 10px; margin: 10px; border-radius: 5px; }
+            </style>
+        </head>
+        <body>
+            <h1>KV Event Sync Performance Dashboard</h1>
+            <p>Last updated: ${{ github.sha }} at $(date -u)</p>
+
+            <div class="metric">
+                <h2>Event Processing Latency</h2>
+                <canvas id="latencyChart"></canvas>
+            </div>
+
+            <div class="metric">
+                <h2>Throughput (Events/sec)</h2>
+                <canvas id="throughputChart"></canvas>
+            </div>
+
+            <div class="metric">
+                <h2>Memory Usage</h2>
+                <canvas id="memoryChart"></canvas>
+            </div>
+
+            <script>
+                // Placeholder for chart initialization
+                // Would load actual data from performance-data.json
+            </script>
+        </body>
+        </html>
+        EOF
+
+    - name: Deploy dashboard to GitHub Pages
+      if: github.ref == 'refs/heads/main'
+      uses: peaceiris/actions-gh-pages@v3
+      with:
+        github_token: ${{ secrets.GITHUB_TOKEN }}
+        publish_dir: ./dashboard
+        destination_dir: performance/${{ github.sha }}
+
+  notify-regressions:
+    name: Notify Performance Regressions
+    runs-on: ubuntu-latest
+    needs: compare-results
+    if: needs.compare-results.outputs.regression_found == 'true'
+    steps:
+    - name: Create issue for regression
+      uses: actions/github-script@v7
+      with:
+        script: |
+          const title = `Performance Regression Detected - ${new Date().toISOString().split('T')[0]}`;
+          const body = `A performance regression was detected in the nightly benchmarks.
+
+          **Commit:** ${{ github.sha }}
+          **Workflow Run:** ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+
+          Please review the [performance comparison report](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details.
+
+          cc @vllm-project/aibrix-performance`;
+
+          await github.rest.issues.create({
+            owner: context.repo.owner,
+            repo: context.repo.repo,
+            title: title,
+            body: body,
+            labels: ['performance', 'regression']
+          });
+
+  cleanup-old-results:
+    name: Cleanup Old Results
+    runs-on: ubuntu-latest
+    if: github.event_name == 'schedule'
+    steps:
+    - name: Delete old artifacts
+      uses: actions/github-script@v7
+      with:
+        script: |
+          const days = 90;
+          const ms_per_day = 24 * 60 * 60 * 1000;
+          const cutoff_date = new Date(Date.now() - days * ms_per_day);
+
+          const artifacts = await github.rest.actions.listArtifactsForRepo({
+            owner: context.repo.owner,
+            repo: context.repo.repo,
+            per_page: 100
+          });
+
+          for (const artifact of artifacts.data.artifacts) {
+            if (artifact.name.startsWith('benchmark-') && new Date(artifact.created_at) < cutoff_date) {
+              await github.rest.actions.deleteArtifact({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                artifact_id: artifact.id
+              });
+              console.log(`Deleted old artifact: ${artifact.name}`);
+            }
+          }
diff --git a/docs/source/features/kv-event-sync.rst b/docs/source/features/kv-event-sync.rst
@@ -194,11 +194,12 @@ Published when new KV cache blocks are stored:
 .. code-block:: go
 
    type BlockStoredEvent struct {
-       BlockHashes  []int64    // Hash values of stored blocks
-       TokenIDs     [][]int32  // Token IDs for each block
-       ModelName    string     // Model identifier
-       LoraID       int64      // LoRA adapter ID (-1 if none)
-       PodName      string     // Source pod name
+       BlockHashes     []int64    // Hash values of stored blocks
+       TokenIDs        [][]byte   // Token IDs for each block (each token is a big-endian uint32)
+       ModelName       string     // Model identifier
+       LoraID          int64      // LoRA adapter ID (-1 if none)
+       SourcePod       string     // Source pod name
+       ParentBlockHash *int64     // Hash value of the parent block or nil
    }
 
 BlockRemovedEvent
@@ -212,7 +213,7 @@ Published when blocks are removed from cache:
        BlockHashes  []int64    // Hash values of removed blocks
        ModelName    string     // Model identifier
        LoraID       int64      // LoRA adapter ID
-       PodName      string     // Source pod name
+       SourcePod    string     // Source pod name
    }
 
 Troubleshooting
@@ -321,4 +322,4 @@ Best Practices
 
    - Use dedicated network for ZMQ traffic if possible
    - Configure appropriate timeouts based on network latency
-   - Plan for graceful degradation if KV sync fails
+   - Plan for graceful degradation if KV sync fails
diff --git a/go.mod b/go.mod
@@ -16,16 +16,16 @@ require (
 	github.com/onsi/gomega v1.35.1
 	github.com/open-policy-agent/cert-controller v0.12.0
 	github.com/openai/openai-go v1.12.0
-	github.com/pebbe/zmq4 v1.2.10
+	github.com/pebbe/zmq4 v1.4.0
 	github.com/pkoukk/tiktoken-go v0.1.7
 	github.com/pkoukk/tiktoken-go-loader v0.0.1
 	github.com/prometheus/client_golang v1.20.2
 	github.com/prometheus/client_model v0.6.1
 	github.com/prometheus/common v0.55.0
 	github.com/ray-project/kuberay/ray-operator v1.2.1
 	github.com/redis/go-redis/v9 v9.6.1
-	github.com/shamaton/msgpack/v2 v2.1.1
 	github.com/stretchr/testify v1.10.0
+	github.com/vmihailenco/msgpack/v5 v5.4.1
 	go.uber.org/atomic v1.11.0
 	google.golang.org/grpc v1.65.0
 	k8s.io/api v0.31.8
@@ -91,6 +91,7 @@ require (
 	github.com/tidwall/match v1.1.1 // indirect
 	github.com/tidwall/pretty v1.2.1 // indirect
 	github.com/tidwall/sjson v1.2.5 // indirect
+	github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect
 	github.com/x448/float16 v0.8.4 // indirect
 	go.uber.org/multierr v1.11.0 // indirect
 	go.uber.org/zap v1.27.0 // indirect

diff --git a/go.sum b/go.sum
@@ -134,8 +134,8 @@ github.com/open-policy-agent/frameworks/constraint v0.0.0-20241101234656-e78c8ab
 github.com/open-policy-agent/frameworks/constraint v0.0.0-20241101234656-e78c8abd754a/go.mod h1:tI7nc6H6os2UYZRvSm9Y7bq4oMoXqhwA0WfnqKpoAgc=
 github.com/openai/openai-go v1.12.0 h1:NBQCnXzqOTv5wsgNC36PrFEiskGfO5wccfCWDo9S1U0=
 github.com/openai/openai-go v1.12.0/go.mod h1:g461MYGXEXBVdV5SaR/5tNzNbSfwTBBefwc+LlDCK0Y=
-github.com/pebbe/zmq4 v1.2.10 h1:wQkqRZ3CZeABIeidr3e8uQZMMH5YAykA/WN0L5zkd1c=
-github.com/pebbe/zmq4 v1.2.10/go.mod h1:nqnPueOapVhE2wItZ0uOErngczsJdLOGkebMxaO8r48=
+github.com/pebbe/zmq4 v1.4.0 h1:gO5P92Ayl8GXpPZdYcD62Cwbq0slSBVVQRIXwGSJ6eQ=
+github.com/pebbe/zmq4 v1.4.0/go.mod h1:nqnPueOapVhE2wItZ0uOErngczsJdLOGkebMxaO8r48=
 github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
 github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
 github.com/pkoukk/tiktoken-go v0.1.7 h1:qOBHXX4PHtvIvmOtyg1EeKlwFRiMKAcoMp4Q+bLQDmw=
@@ -159,8 +159,6 @@ github.com/redis/go-redis/v9 v9.6.1 h1:HHDteefn6ZkTtY5fGUE8tj8uy85AHk6zP7CpzIAM0
 github.com/redis/go-redis/v9 v9.6.1/go.mod h1:0C0c6ycQsdpVNQpxb1njEQIqkx5UcsM8FJCQLgE9+RA=
 github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
 github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
-github.com/shamaton/msgpack/v2 v2.1.1 h1:gAMxOtVJz93R0EwewwUc8tx30n34aV6BzJuwHE8ogAk=
-github.com/shamaton/msgpack/v2 v2.1.1/go.mod h1:aTUEmh31ziGX1Ml7wMPLVY0f4vT3CRsCvZRoSCs+VGg=
 github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
 github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
@@ -180,6 +178,10 @@ github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4=
 github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
 github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY=
 github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28=
+github.com/vmihailenco/msgpack/v5 v5.4.1 h1:cQriyiUvjTwOHg8QZaPihLWeRAAVoCpE00IUPn0Bjt8=
+github.com/vmihailenco/msgpack/v5 v5.4.1/go.mod h1:GaZTsDaehaPpQVyxrf5mtQlH+pc21PIudVV/E3rRQok=
+github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g=
+github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds=
 github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
 github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
 github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=