FluidAudio/.github/workflows/offline-pipeline.yml at ac05a6360d354d9dcf3ed65cb60b81db80a25939 · SGD2718/FluidAudio · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
name: Offline Speaker diarization pipeline

on:
  pull_request:
    branches: [main]
    types: [opened, synchronize, reopened]

concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true

jobs:
  benchmark:
    runs-on: macos-latest
    permissions:
      contents: read
      pull-requests: write

    steps:
      - name: Checkout code
        uses: actions/checkout@v5

      - name: Setup Swift 6.1
        uses: swift-actions/setup-swift@v2
        with:
          swift-version: "6.1"

      - name: Build package
        run: swift build

      - name: Run Offline Pipeline Benchmark
        id: benchmark
        run: |
          echo "Running offline VBx pipeline benchmark..."

          # Record start time
          BENCHMARK_START=$(date +%s)

          swift run fluidaudio diarization-benchmark --mode offline --auto-download --single-file ES2004a --output offline_results.json

          # Check if results file was generated
          if [ -f offline_results.json ]; then
            echo "SUCCESS=true" >> $GITHUB_OUTPUT
          else
            echo "Benchmark failed - no results file generated"
            echo "SUCCESS=false" >> $GITHUB_OUTPUT
          fi

          # Calculate execution time
          BENCHMARK_END=$(date +%s)
          EXECUTION_TIME=$((BENCHMARK_END - BENCHMARK_START))
          EXECUTION_MINS=$((EXECUTION_TIME / 60))
          EXECUTION_SECS=$((EXECUTION_TIME % 60))

          echo "EXECUTION_TIME=${EXECUTION_MINS}m ${EXECUTION_SECS}s" >> $GITHUB_OUTPUT
        timeout-minutes: 30

      - name: Show offline_results.json
        if: always()
        run: |
          echo "--- offline_results.json ---"
          cat offline_results.json || echo "offline_results.json not found"
          echo "-----------------------------"

      - name: Extract benchmark metrics with jq
        id: extract
        run: |
          # The output is now an array, so we need to access the first element
          DER=$(jq '.[0].der' offline_results.json)
          RTF=$(jq '.[0].rtfx' offline_results.json)
          DURATION="1049"  # ES2004a duration in seconds
          SPEAKER_COUNT=$(jq '.[0].detectedSpeakers' offline_results.json)

          # Extract detailed timing information
          TOTAL_TIME=$(jq '.[0].timings.totalProcessingSeconds' offline_results.json)
          MODEL_DOWNLOAD_TIME=$(jq '.[0].timings.modelDownloadSeconds' offline_results.json)
          MODEL_COMPILE_TIME=$(jq '.[0].timings.modelCompilationSeconds' offline_results.json)
          AUDIO_LOAD_TIME=$(jq '.[0].timings.audioLoadingSeconds' offline_results.json)
          SEGMENTATION_TIME=$(jq '.[0].timings.segmentationSeconds' offline_results.json)
          EMBEDDING_TIME=$(jq '.[0].timings.embeddingExtractionSeconds' offline_results.json)
          CLUSTERING_TIME=$(jq '.[0].timings.speakerClusteringSeconds' offline_results.json)
          INFERENCE_TIME=$(jq '.[0].timings.totalInferenceSeconds' offline_results.json)

          echo "DER=${DER}" >> $GITHUB_OUTPUT
          echo "RTF=${RTF}" >> $GITHUB_OUTPUT
          echo "DURATION=${DURATION}" >> $GITHUB_OUTPUT
          echo "SPEAKER_COUNT=${SPEAKER_COUNT}" >> $GITHUB_OUTPUT
          echo "TOTAL_TIME=${TOTAL_TIME}" >> $GITHUB_OUTPUT
          echo "MODEL_DOWNLOAD_TIME=${MODEL_DOWNLOAD_TIME}" >> $GITHUB_OUTPUT
          echo "MODEL_COMPILE_TIME=${MODEL_COMPILE_TIME}" >> $GITHUB_OUTPUT
          echo "AUDIO_LOAD_TIME=${AUDIO_LOAD_TIME}" >> $GITHUB_OUTPUT
          echo "SEGMENTATION_TIME=${SEGMENTATION_TIME}" >> $GITHUB_OUTPUT
          echo "EMBEDDING_TIME=${EMBEDDING_TIME}" >> $GITHUB_OUTPUT
          echo "CLUSTERING_TIME=${CLUSTERING_TIME}" >> $GITHUB_OUTPUT
          echo "INFERENCE_TIME=${INFERENCE_TIME}" >> $GITHUB_OUTPUT

      - name: Comment PR with Offline Pipeline Results
        if: always()
        uses: actions/github-script@v7
        with:
          script: |
            const der = parseFloat('${{ steps.extract.outputs.DER }}');
            const rtf = parseFloat('${{ steps.extract.outputs.RTF }}');
            const duration = parseFloat('${{ steps.extract.outputs.DURATION }}').toFixed(1);
            const speakerCount = '${{ steps.extract.outputs.SPEAKER_COUNT }}';
            const totalTime = parseFloat('${{ steps.extract.outputs.TOTAL_TIME }}');
            const inferenceTime = parseFloat('${{ steps.extract.outputs.INFERENCE_TIME }}');
            const modelDownloadTime = parseFloat('${{ steps.extract.outputs.MODEL_DOWNLOAD_TIME }}');
            const modelCompileTime = parseFloat('${{ steps.extract.outputs.MODEL_COMPILE_TIME }}');
            const audioLoadTime = parseFloat('${{ steps.extract.outputs.AUDIO_LOAD_TIME }}');
            const segmentationTime = parseFloat('${{ steps.extract.outputs.SEGMENTATION_TIME }}');
            const embeddingTime = parseFloat('${{ steps.extract.outputs.EMBEDDING_TIME }}');
            const clusteringTime = parseFloat('${{ steps.extract.outputs.CLUSTERING_TIME }}');
            const executionTime = '${{ steps.benchmark.outputs.EXECUTION_TIME }}' || 'N/A';

            let comment = '## Offline VBx Pipeline Results\n\n';
            comment += '### Speaker Diarization Performance (VBx Batch Mode)\n';
            comment += '_Optimal clustering with Hungarian algorithm for maximum accuracy_\n\n';
            comment += '| Metric | Value | Target | Status | Description |\n';
            comment += '|--------|-------|--------|---------|-------------|\n';
            comment += `| **DER** | **${der.toFixed(1)}%** | <20% | ${der < 20 ? '✅' : '⚠️'} | Diarization Error Rate (lower is better) |\n`;
            comment += `| **RTFx** | **${rtf.toFixed(2)}x** | >1.0x | ${rtf > 1.0 ? '✅' : '⚠️'} | Real-Time Factor (higher is faster) |\n\n`;

            comment += '### Offline VBx Pipeline Timing Breakdown\n';
            comment += '_Time spent in each stage of batch diarization_\n\n';
            comment += '| Stage | Time (s) | % | Description |\n';
            comment += '|-------|----------|---|-------------|\n';
            comment += `| Model Download | ${modelDownloadTime.toFixed(3)} | ${(modelDownloadTime/totalTime*100).toFixed(1)} | Fetching diarization models |\n`;
            comment += `| Model Compile | ${modelCompileTime.toFixed(3)} | ${(modelCompileTime/totalTime*100).toFixed(1)} | CoreML compilation |\n`;
            comment += `| Audio Load | ${audioLoadTime.toFixed(3)} | ${(audioLoadTime/totalTime*100).toFixed(1)} | Loading audio file |\n`;
            comment += `| Segmentation | ${segmentationTime.toFixed(3)} | ${(segmentationTime/totalTime*100).toFixed(1)} | VAD + speech detection |\n`;
            comment += `| Embedding | ${embeddingTime.toFixed(3)} | ${(embeddingTime/totalTime*100).toFixed(1)} | Speaker embedding extraction |\n`;
            comment += `| Clustering (VBx) | ${clusteringTime.toFixed(3)} | ${(clusteringTime/totalTime*100).toFixed(1)} | Hungarian algorithm + VBx clustering |\n`;
            comment += `| **Total** | **${totalTime.toFixed(3)}** | **100** | **Full VBx pipeline** |\n\n`;

            comment += '### Speaker Diarization Research Comparison\n';
            comment += '_Offline VBx achieves competitive accuracy with batch processing_\n\n';
            comment += '| Method | DER | Mode | Description |\n';
            comment += '|--------|-----|------|-------------|\n';
            comment += '| **FluidAudio (Offline)** | **' + der.toFixed(1) + '%** | **VBx Batch** | **On-device CoreML with optimal clustering** |\n';
            comment += '| FluidAudio (Streaming) | 17.7% | Chunk-based | First-occurrence speaker mapping |\n';
            comment += '| Research baseline | 18-30% | Various | Standard dataset performance |\n\n';

            comment += '**Pipeline Details**:\n';
            comment += '- **Mode**: Offline VBx with Hungarian algorithm for optimal speaker-to-cluster assignment\n';
            comment += '- **Segmentation**: VAD-based voice activity detection\n';
            comment += '- **Embeddings**: WeSpeaker-compatible speaker embeddings\n';
            comment += '- **Clustering**: PowerSet with VBx refinement\n';
            comment += '- **Accuracy**: Higher than streaming due to optimal post-hoc mapping\n\n';

            comment += `<sub>🎯 **Offline VBx Test** • AMI Corpus ES2004a • ${duration}s meeting audio • ${inferenceTime.toFixed(1)}s processing • Test runtime: ${executionTime} • ${new Date().toLocaleString('en-US', { timeZone: 'America/New_York', year: 'numeric', month: '2-digit', day: '2-digit', hour: '2-digit', minute: '2-digit', hour12: true })} EST</sub>\n\n`;

            // Add hidden identifier for reliable comment detection
            comment += '<!-- fluidaudio-offline-pipeline -->';

            try {
              // First, try to find existing benchmark comment
              const comments = await github.rest.issues.listComments({
                issue_number: context.issue.number,
                owner: context.repo.owner,
                repo: context.repo.repo,
              });

              // Look for existing offline pipeline comment (identified by the hidden tag)
              const existingComment = comments.data.find(comment => {
                const isBot = comment.user.type === 'Bot' ||
                             comment.user.login === 'github-actions[bot]' ||
                             comment.user.login.includes('[bot]');

                const hasIdentifier = comment.body.includes('<!-- fluidaudio-offline-pipeline -->');
                const hasHeader = comment.body.includes('## Offline VBx Pipeline Results');

                return isBot && (hasIdentifier || hasHeader);
              });

              if (existingComment) {
                // Update existing comment
                await github.rest.issues.updateComment({
                  comment_id: existingComment.id,
                  owner: context.repo.owner,
                  repo: context.repo.repo,
                  body: comment
                });
                console.log('Successfully updated existing offline pipeline comment');
              } else {
                // Create new comment if none exists
                await github.rest.issues.createComment({
                  issue_number: context.issue.number,
                  owner: context.repo.owner,
                  repo: context.repo.repo,
                  body: comment
                });
                console.log('Successfully posted new offline pipeline results comment');
              }
            } catch (error) {
              console.error('Failed to update/post comment:', error.message);
              // Don't fail the workflow just because commenting failed
            }