forked from FluidInference/FluidAudio
-
Notifications
You must be signed in to change notification settings - Fork 0
198 lines (170 loc) · 10.5 KB
/
offline-pipeline.yml
File metadata and controls
198 lines (170 loc) · 10.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
name: Offline Speaker diarization pipeline
on:
pull_request:
branches: [main]
types: [opened, synchronize, reopened]
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
benchmark:
runs-on: macos-latest
permissions:
contents: read
pull-requests: write
steps:
- name: Checkout code
uses: actions/checkout@v5
- name: Setup Swift 6.1
uses: swift-actions/setup-swift@v2
with:
swift-version: "6.1"
- name: Build package
run: swift build
- name: Run Offline Pipeline Benchmark
id: benchmark
run: |
echo "Running offline VBx pipeline benchmark..."
# Record start time
BENCHMARK_START=$(date +%s)
swift run fluidaudio diarization-benchmark --mode offline --auto-download --single-file ES2004a --output offline_results.json
# Check if results file was generated
if [ -f offline_results.json ]; then
echo "SUCCESS=true" >> $GITHUB_OUTPUT
else
echo "Benchmark failed - no results file generated"
echo "SUCCESS=false" >> $GITHUB_OUTPUT
fi
# Calculate execution time
BENCHMARK_END=$(date +%s)
EXECUTION_TIME=$((BENCHMARK_END - BENCHMARK_START))
EXECUTION_MINS=$((EXECUTION_TIME / 60))
EXECUTION_SECS=$((EXECUTION_TIME % 60))
echo "EXECUTION_TIME=${EXECUTION_MINS}m ${EXECUTION_SECS}s" >> $GITHUB_OUTPUT
timeout-minutes: 30
- name: Show offline_results.json
if: always()
run: |
echo "--- offline_results.json ---"
cat offline_results.json || echo "offline_results.json not found"
echo "-----------------------------"
- name: Extract benchmark metrics with jq
id: extract
run: |
# The output is now an array, so we need to access the first element
DER=$(jq '.[0].der' offline_results.json)
RTF=$(jq '.[0].rtfx' offline_results.json)
DURATION="1049" # ES2004a duration in seconds
SPEAKER_COUNT=$(jq '.[0].detectedSpeakers' offline_results.json)
# Extract detailed timing information
TOTAL_TIME=$(jq '.[0].timings.totalProcessingSeconds' offline_results.json)
MODEL_DOWNLOAD_TIME=$(jq '.[0].timings.modelDownloadSeconds' offline_results.json)
MODEL_COMPILE_TIME=$(jq '.[0].timings.modelCompilationSeconds' offline_results.json)
AUDIO_LOAD_TIME=$(jq '.[0].timings.audioLoadingSeconds' offline_results.json)
SEGMENTATION_TIME=$(jq '.[0].timings.segmentationSeconds' offline_results.json)
EMBEDDING_TIME=$(jq '.[0].timings.embeddingExtractionSeconds' offline_results.json)
CLUSTERING_TIME=$(jq '.[0].timings.speakerClusteringSeconds' offline_results.json)
INFERENCE_TIME=$(jq '.[0].timings.totalInferenceSeconds' offline_results.json)
echo "DER=${DER}" >> $GITHUB_OUTPUT
echo "RTF=${RTF}" >> $GITHUB_OUTPUT
echo "DURATION=${DURATION}" >> $GITHUB_OUTPUT
echo "SPEAKER_COUNT=${SPEAKER_COUNT}" >> $GITHUB_OUTPUT
echo "TOTAL_TIME=${TOTAL_TIME}" >> $GITHUB_OUTPUT
echo "MODEL_DOWNLOAD_TIME=${MODEL_DOWNLOAD_TIME}" >> $GITHUB_OUTPUT
echo "MODEL_COMPILE_TIME=${MODEL_COMPILE_TIME}" >> $GITHUB_OUTPUT
echo "AUDIO_LOAD_TIME=${AUDIO_LOAD_TIME}" >> $GITHUB_OUTPUT
echo "SEGMENTATION_TIME=${SEGMENTATION_TIME}" >> $GITHUB_OUTPUT
echo "EMBEDDING_TIME=${EMBEDDING_TIME}" >> $GITHUB_OUTPUT
echo "CLUSTERING_TIME=${CLUSTERING_TIME}" >> $GITHUB_OUTPUT
echo "INFERENCE_TIME=${INFERENCE_TIME}" >> $GITHUB_OUTPUT
- name: Comment PR with Offline Pipeline Results
if: always()
uses: actions/github-script@v7
with:
script: |
const der = parseFloat('${{ steps.extract.outputs.DER }}');
const rtf = parseFloat('${{ steps.extract.outputs.RTF }}');
const duration = parseFloat('${{ steps.extract.outputs.DURATION }}').toFixed(1);
const speakerCount = '${{ steps.extract.outputs.SPEAKER_COUNT }}';
const totalTime = parseFloat('${{ steps.extract.outputs.TOTAL_TIME }}');
const inferenceTime = parseFloat('${{ steps.extract.outputs.INFERENCE_TIME }}');
const modelDownloadTime = parseFloat('${{ steps.extract.outputs.MODEL_DOWNLOAD_TIME }}');
const modelCompileTime = parseFloat('${{ steps.extract.outputs.MODEL_COMPILE_TIME }}');
const audioLoadTime = parseFloat('${{ steps.extract.outputs.AUDIO_LOAD_TIME }}');
const segmentationTime = parseFloat('${{ steps.extract.outputs.SEGMENTATION_TIME }}');
const embeddingTime = parseFloat('${{ steps.extract.outputs.EMBEDDING_TIME }}');
const clusteringTime = parseFloat('${{ steps.extract.outputs.CLUSTERING_TIME }}');
const executionTime = '${{ steps.benchmark.outputs.EXECUTION_TIME }}' || 'N/A';
let comment = '## Offline VBx Pipeline Results\n\n';
comment += '### Speaker Diarization Performance (VBx Batch Mode)\n';
comment += '_Optimal clustering with Hungarian algorithm for maximum accuracy_\n\n';
comment += '| Metric | Value | Target | Status | Description |\n';
comment += '|--------|-------|--------|---------|-------------|\n';
comment += `| **DER** | **${der.toFixed(1)}%** | <20% | ${der < 20 ? '✅' : '⚠️'} | Diarization Error Rate (lower is better) |\n`;
comment += `| **RTFx** | **${rtf.toFixed(2)}x** | >1.0x | ${rtf > 1.0 ? '✅' : '⚠️'} | Real-Time Factor (higher is faster) |\n\n`;
comment += '### Offline VBx Pipeline Timing Breakdown\n';
comment += '_Time spent in each stage of batch diarization_\n\n';
comment += '| Stage | Time (s) | % | Description |\n';
comment += '|-------|----------|---|-------------|\n';
comment += `| Model Download | ${modelDownloadTime.toFixed(3)} | ${(modelDownloadTime/totalTime*100).toFixed(1)} | Fetching diarization models |\n`;
comment += `| Model Compile | ${modelCompileTime.toFixed(3)} | ${(modelCompileTime/totalTime*100).toFixed(1)} | CoreML compilation |\n`;
comment += `| Audio Load | ${audioLoadTime.toFixed(3)} | ${(audioLoadTime/totalTime*100).toFixed(1)} | Loading audio file |\n`;
comment += `| Segmentation | ${segmentationTime.toFixed(3)} | ${(segmentationTime/totalTime*100).toFixed(1)} | VAD + speech detection |\n`;
comment += `| Embedding | ${embeddingTime.toFixed(3)} | ${(embeddingTime/totalTime*100).toFixed(1)} | Speaker embedding extraction |\n`;
comment += `| Clustering (VBx) | ${clusteringTime.toFixed(3)} | ${(clusteringTime/totalTime*100).toFixed(1)} | Hungarian algorithm + VBx clustering |\n`;
comment += `| **Total** | **${totalTime.toFixed(3)}** | **100** | **Full VBx pipeline** |\n\n`;
comment += '### Speaker Diarization Research Comparison\n';
comment += '_Offline VBx achieves competitive accuracy with batch processing_\n\n';
comment += '| Method | DER | Mode | Description |\n';
comment += '|--------|-----|------|-------------|\n';
comment += '| **FluidAudio (Offline)** | **' + der.toFixed(1) + '%** | **VBx Batch** | **On-device CoreML with optimal clustering** |\n';
comment += '| FluidAudio (Streaming) | 17.7% | Chunk-based | First-occurrence speaker mapping |\n';
comment += '| Research baseline | 18-30% | Various | Standard dataset performance |\n\n';
comment += '**Pipeline Details**:\n';
comment += '- **Mode**: Offline VBx with Hungarian algorithm for optimal speaker-to-cluster assignment\n';
comment += '- **Segmentation**: VAD-based voice activity detection\n';
comment += '- **Embeddings**: WeSpeaker-compatible speaker embeddings\n';
comment += '- **Clustering**: PowerSet with VBx refinement\n';
comment += '- **Accuracy**: Higher than streaming due to optimal post-hoc mapping\n\n';
comment += `<sub>🎯 **Offline VBx Test** • AMI Corpus ES2004a • ${duration}s meeting audio • ${inferenceTime.toFixed(1)}s processing • Test runtime: ${executionTime} • ${new Date().toLocaleString('en-US', { timeZone: 'America/New_York', year: 'numeric', month: '2-digit', day: '2-digit', hour: '2-digit', minute: '2-digit', hour12: true })} EST</sub>\n\n`;
// Add hidden identifier for reliable comment detection
comment += '<!-- fluidaudio-offline-pipeline -->';
try {
// First, try to find existing benchmark comment
const comments = await github.rest.issues.listComments({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
});
// Look for existing offline pipeline comment (identified by the hidden tag)
const existingComment = comments.data.find(comment => {
const isBot = comment.user.type === 'Bot' ||
comment.user.login === 'github-actions[bot]' ||
comment.user.login.includes('[bot]');
const hasIdentifier = comment.body.includes('<!-- fluidaudio-offline-pipeline -->');
const hasHeader = comment.body.includes('## Offline VBx Pipeline Results');
return isBot && (hasIdentifier || hasHeader);
});
if (existingComment) {
// Update existing comment
await github.rest.issues.updateComment({
comment_id: existingComment.id,
owner: context.repo.owner,
repo: context.repo.repo,
body: comment
});
console.log('Successfully updated existing offline pipeline comment');
} else {
// Create new comment if none exists
await github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: comment
});
console.log('Successfully posted new offline pipeline results comment');
}
} catch (error) {
console.error('Failed to update/post comment:', error.message);
// Don't fail the workflow just because commenting failed
}