Skip to content

Commit a2b484c

Browse files
committed
ops: Add OTEL metric for STT latency
1 parent beb893e commit a2b484c

File tree

2 files changed

+23
-2
lines changed

2 files changed

+23
-2
lines changed

app/helpers/call_llm.py

+20-2
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
SpanAttributeEnum,
4040
call_answer_latency,
4141
call_cutoff_latency,
42+
call_stt_complete_latency,
4243
gauge_set,
4344
tracer,
4445
)
@@ -233,6 +234,17 @@ async def _commit_answer(
233234
if wait:
234235
await last_chat
235236

237+
async def _compute_stt_metrics() -> None:
238+
"""
239+
Report the recognition latency.
240+
"""
241+
start = time.monotonic()
242+
await stt_complete_gate.wait()
243+
gauge_set(
244+
metric=call_stt_complete_latency,
245+
value=time.monotonic() - start,
246+
)
247+
236248
async def _response_callback(_retry: bool = False) -> None:
237249
"""
238250
Triggered when the audio buffer needs to be processed.
@@ -243,6 +255,9 @@ async def _response_callback(_retry: bool = False) -> None:
243255
nonlocal answer_start
244256
answer_start = time.monotonic()
245257

258+
# Report the STT metrics
259+
stt_metrics_task = asyncio.create_task(_compute_stt_metrics())
260+
246261
# Wait the complete recognition for 50ms maximum
247262
try:
248263
await asyncio.wait_for(stt_complete_gate.wait(), timeout=0.05)
@@ -277,8 +292,11 @@ async def _response_callback(_retry: bool = False) -> None:
277292
)
278293
)
279294

280-
# Process the response
281-
await _commit_answer(wait=True)
295+
# Process the response and wait for latency metrics
296+
await asyncio.gather(
297+
_commit_answer(wait=False),
298+
stt_metrics_task,
299+
)
282300

283301
# First call
284302
if len(call.messages) <= 1:

app/helpers/monitoring.py

+3
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ class SpanMeterEnum(str, Enum):
6767
"""Audio frames in latency in seconds."""
6868
CALL_FRAMES_OUT_LATENCY = "call.frames.out.latency"
6969
"""Audio frames out latency in seconds."""
70+
CALL_STT_COMPLETE_LATENCY = "call.stt.complete.latency"
71+
"""Speech-to-text missed complete latency."""
7072

7173
def counter(
7274
self,
@@ -127,6 +129,7 @@ def gauge(
127129
call_cutoff_latency = SpanMeterEnum.CALL_CUTOFF_LATENCY.gauge("s")
128130
call_frames_in_latency = SpanMeterEnum.CALL_FRAMES_IN_LATENCY.gauge("s")
129131
call_frames_out_latency = SpanMeterEnum.CALL_FRAMES_OUT_LATENCY.gauge("s")
132+
call_stt_complete_latency = SpanMeterEnum.CALL_STT_COMPLETE_LATENCY.gauge("s")
130133

131134

132135
def gauge_set(

0 commit comments

Comments
 (0)