Skip to content

Commit 47d8472

Browse files
pavanputhraclaude
andcommitted
Add OTEL metrics attributes to all links and add metric validation tests (CON-6)
- Thread link.name and vcon.uuid attributes through all existing metric calls in 16 link modules - Add chain.name attribute to main-loop processing-time histogram and vcon-processed counter - Add conserver.link.execution_time histogram per-link in VconChainRequest._process_link() - Add conserver.lib.vcon_redis.get_vcon_not_found counter in VconRedis.get_vcon() - Add failure counters to datatrails, diet, jq_link, post_analysis_to_slack, sampler, webhook, tag_router - Add conserver/tests/ with InMemoryMetricReader fixture and 25 OTEL metric validation tests Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 741a267 commit 47d8472

23 files changed

Lines changed: 784 additions & 70 deletions

File tree

common/lib/vcon_redis.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from typing import Optional
22
from lib.logging_utils import init_logger
3+
from lib.metrics import increment_counter
34
from redis.commands.json.path import Path
45
from redis_mgr import redis
56
from settings import VCON_REDIS_EXPIRY
@@ -49,6 +50,7 @@ def get_vcon(self, vcon_id: str) -> Optional[vcon.Vcon]:
4950
f"vcon:{vcon_id}", Path.root_path()
5051
)
5152
if not vcon_dict:
53+
increment_counter("conserver.lib.vcon_redis.get_vcon_not_found")
5254
return None
5355
_vcon = vcon.Vcon(vcon_dict)
5456
return _vcon

conserver/links/analyze/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,14 +153,14 @@ def run(
153153
)
154154
increment_counter(
155155
"conserver.link.openai.analysis_failures",
156-
attributes={"analysis_type": opts['analysis_type']},
156+
attributes={"analysis_type": opts['analysis_type'], "link.name": link_name, "vcon.uuid": vcon_uuid},
157157
)
158158
raise e
159159

160160
record_histogram(
161161
"conserver.link.openai.analysis_time",
162162
time.time() - start,
163-
attributes={"analysis_type": opts['analysis_type']},
163+
attributes={"analysis_type": opts['analysis_type'], "link.name": link_name, "vcon.uuid": vcon_uuid},
164164
)
165165

166166
vendor_schema = {}

conserver/links/analyze_and_label/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -149,14 +149,14 @@ def run(
149149
increment_counter(
150150
"conserver.link.openai.labels_added",
151151
value=len(labels),
152-
attributes={"analysis_type": opts['analysis_type']},
152+
attributes={"analysis_type": opts['analysis_type'], "link.name": link_name, "vcon.uuid": vcon_uuid},
153153
)
154154

155155
except json.JSONDecodeError as e:
156156
logger.error(f"Failed to parse JSON response for vCon {vcon_uuid}: {e}")
157157
increment_counter(
158158
"conserver.link.openai.json_parse_failures",
159-
attributes={"analysis_type": opts['analysis_type']},
159+
attributes={"analysis_type": opts['analysis_type'], "link.name": link_name, "vcon.uuid": vcon_uuid},
160160
)
161161
# Add the raw text anyway as the analysis
162162
vCon.add_analysis(
@@ -182,14 +182,14 @@ def run(
182182
)
183183
increment_counter(
184184
"conserver.link.openai.analysis_failures",
185-
attributes={"analysis_type": opts['analysis_type']},
185+
attributes={"analysis_type": opts['analysis_type'], "link.name": link_name, "vcon.uuid": vcon_uuid},
186186
)
187187
raise e
188188

189189
record_histogram(
190190
"conserver.link.openai.analysis_time",
191191
time.time() - start,
192-
attributes={"analysis_type": opts['analysis_type']},
192+
attributes={"analysis_type": opts['analysis_type'], "link.name": link_name, "vcon.uuid": vcon_uuid},
193193
)
194194

195195
vcon_redis.store_vcon(vCon)

conserver/links/analyze_vcon/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -148,10 +148,10 @@ def run(
148148
)
149149
increment_counter(
150150
"conserver.link.openai.invalid_json",
151-
attributes={"analysis_type": opts['analysis_type']},
151+
attributes={"analysis_type": opts['analysis_type'], "link.name": link_name, "vcon.uuid": vcon_uuid},
152152
)
153153
raise ValueError("Invalid JSON response from OpenAI")
154-
154+
155155
except Exception as e:
156156
logger.error(
157157
"Failed to generate analysis for vCon %s after multiple retries: %s",
@@ -160,14 +160,14 @@ def run(
160160
)
161161
increment_counter(
162162
"conserver.link.openai.analysis_failures",
163-
attributes={"analysis_type": opts['analysis_type']},
163+
attributes={"analysis_type": opts['analysis_type'], "link.name": link_name, "vcon.uuid": vcon_uuid},
164164
)
165165
raise e
166166

167167
record_histogram(
168168
"conserver.link.openai.analysis_time",
169169
time.time() - start,
170-
attributes={"analysis_type": opts['analysis_type']},
170+
attributes={"analysis_type": opts['analysis_type'], "link.name": link_name, "vcon.uuid": vcon_uuid},
171171
)
172172

173173
vendor_schema = {}

conserver/links/check_and_tag/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ def run(
182182
logger.info(f"Applied tag: {opts['tag_name']}:{opts['tag_value']} (evaluation: {applies})")
183183
increment_counter(
184184
"conserver.link.openai.tags_applied",
185-
attributes={"analysis_type": opts['analysis_type'], "tag_name": opts['tag_name'], "tag_value": opts['tag_value']},
185+
attributes={"analysis_type": opts['analysis_type'], "tag_name": opts['tag_name'], "tag_value": opts['tag_value'], "link.name": link_name, "vcon.uuid": vcon_uuid},
186186
)
187187
else:
188188
logger.info(f"Tag not applied: {opts['tag_name']}:{opts['tag_value']} (evaluation: {applies})")
@@ -194,14 +194,14 @@ def run(
194194
)
195195
increment_counter(
196196
"conserver.link.openai.evaluation_failures",
197-
attributes={"analysis_type": opts['analysis_type'], "tag_name": opts['tag_name'], "tag_value": opts['tag_value']},
197+
attributes={"analysis_type": opts['analysis_type'], "tag_name": opts['tag_name'], "tag_value": opts['tag_value'], "link.name": link_name, "vcon.uuid": vcon_uuid},
198198
)
199199
raise e
200200

201201
record_histogram(
202202
"conserver.link.openai.evaluation_time",
203203
time.time() - start,
204-
attributes={"analysis_type": opts['analysis_type'], "tag_name": opts['tag_name'], "tag_value": opts['tag_value']},
204+
attributes={"analysis_type": opts['analysis_type'], "tag_name": opts['tag_name'], "tag_value": opts['tag_value'], "link.name": link_name, "vcon.uuid": vcon_uuid},
205205
)
206206

207207
vcon_redis.store_vcon(vCon)

conserver/links/datatrails/__init__.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from fastapi import HTTPException
55
from lib.vcon_redis import VconRedis
66
from lib.logging_utils import init_logger
7+
from lib.metrics import increment_counter
78
from starlette.status import HTTP_404_NOT_FOUND, HTTP_501_NOT_IMPLEMENTED
89
from vcon import Vcon
910

@@ -382,7 +383,13 @@ def run(vcon_uuid: str, link_name: str, opts: dict = default_options) -> str:
382383
# }
383384
# )
384385

385-
event = create_asset_event(opts, asset_id, auth, event_attributes)
386+
attrs = {"link.name": link_name, "vcon.uuid": vcon_uuid}
387+
388+
try:
389+
event = create_asset_event(opts, asset_id, auth, event_attributes)
390+
except Exception:
391+
increment_counter("conserver.link.datatrails.event_creation_failures", attributes=attrs)
392+
raise
386393
event_id = event["identity"]
387394
logger.info(f"DataTrails: Event Created: {event_id}")
388395

@@ -395,6 +402,10 @@ def run(vcon_uuid: str, link_name: str, opts: dict = default_options) -> str:
395402
event_id = event["identity"]
396403
logger.info(f"DataTrails: New Event Created: {event_id}")
397404
except:
405+
increment_counter(
406+
"conserver.link.datatrails.event_creation_failures",
407+
attributes={**attrs, "event_type": "asset_free"},
408+
)
398409
logger.info(f"DataTrails: New Event Creation Failure")
399410

400411

conserver/links/deepgram_link/__init__.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,7 @@ def run(
235235
logger.info("Dialog %s already transcribed on vCon: %s", index, vCon.uuid)
236236
continue
237237

238+
attrs = {"link.name": link_name, "vcon.uuid": vcon_uuid}
238239
start = time.time()
239240
try:
240241
if opts.get("LITELLM_PROXY_URL") and opts.get("LITELLM_MASTER_KEY"):
@@ -248,26 +249,26 @@ def run(
248249
result = transcribe_dg(dg_client, dialog, opts["api"], vcon_uuid=vcon_uuid, run_opts=opts)
249250
except Exception as e:
250251
logger.error("Failed to transcribe vCon %s after multiple retries: %s", vcon_uuid, e, exc_info=True)
251-
increment_counter("conserver.link.deepgram.transcription_failures")
252+
increment_counter("conserver.link.deepgram.transcription_failures", attributes=attrs)
252253
raise e
253254
elapsed = time.time() - start
254-
record_histogram("conserver.link.deepgram.transcription_time", elapsed)
255+
record_histogram("conserver.link.deepgram.transcription_time", elapsed, attributes=attrs)
255256
logger.info(f"Transcription for dialog {index} took {elapsed:.2f} seconds.")
256257

257258
if not result:
258259
logger.warning("No transcription generated for vCon %s, dialog %s", vcon_uuid, index)
259-
increment_counter("conserver.link.deepgram.transcription_failures")
260+
increment_counter("conserver.link.deepgram.transcription_failures", attributes=attrs)
260261
break
261262

262263
# Log and track confidence (not available for LiteLLM/OpenAI-format transcription)
263264
confidence = result.get("confidence")
264265
if confidence is not None:
265-
record_histogram("conserver.link.deepgram.confidence", confidence)
266+
record_histogram("conserver.link.deepgram.confidence", confidence, attributes=attrs)
266267
logger.info(f"Transcription confidence for dialog {index}: {confidence}")
267268
# If the confidence is too low, don't store the transcript
268269
if confidence < opts["minimum_confidence"]:
269270
logger.warning("Low confidence result for vCon %s, dialog %s: %s", vcon_uuid, index, confidence)
270-
increment_counter("conserver.link.deepgram.transcription_failures")
271+
increment_counter("conserver.link.deepgram.transcription_failures", attributes=attrs)
271272
continue
272273
else:
273274
logger.info(f"Confidence not available for dialog {index} (LiteLLM path), skipping threshold check")

conserver/links/detect_engagement/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ def run(
144144
increment_counter(
145145
"conserver.link.openai.engagement_detected",
146146
value=1 if is_engaged else 0,
147-
attributes={"analysis_type": opts['analysis_type']},
147+
attributes={"analysis_type": opts['analysis_type'], "link.name": link_name, "vcon.uuid": vcon_uuid},
148148
)
149149

150150
except Exception as e:
@@ -158,14 +158,14 @@ def run(
158158
)
159159
increment_counter(
160160
"conserver.link.openai.engagement_analysis_failures",
161-
attributes={"analysis_type": opts['analysis_type']},
161+
attributes={"analysis_type": opts['analysis_type'], "link.name": link_name, "vcon.uuid": vcon_uuid},
162162
)
163163
raise e
164164

165165
record_histogram(
166166
"conserver.link.openai.engagement_analysis_time",
167167
time.time() - start,
168-
attributes={"analysis_type": opts['analysis_type']},
168+
attributes={"analysis_type": opts['analysis_type'], "link.name": link_name, "vcon.uuid": vcon_uuid},
169169
)
170170

171171
vcon_redis.store_vcon(vCon)

conserver/links/diet/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from redis_mgr import redis
22
from lib.logging_utils import init_logger
3+
from lib.metrics import increment_counter
34
import json
45
import requests
56
import uuid
@@ -134,6 +135,8 @@ def run(vcon_uuid, link_name, opts=default_options):
134135
for key, value in options.items():
135136
logger.info("diet::%s: %s", key, _redact_option_value(key, value))
136137

138+
attrs = {"link.name": link_name, "vcon.uuid": vcon_uuid}
139+
137140
# Load vCon from Redis using JSON.GET
138141
vcon = redis.json().get(f"vcon:{vcon_uuid}")
139142
if not vcon:
@@ -180,9 +183,11 @@ def run(vcon_uuid, link_name, opts=default_options):
180183
else:
181184
dialog["body"] = ""
182185
else:
186+
increment_counter("conserver.link.diet.media_post_failures", attributes=attrs)
183187
logger.error(f"Failed to post media: {response.status_code}")
184188
dialog["body"] = ""
185189
except Exception as e:
190+
increment_counter("conserver.link.diet.media_post_failures", attributes=attrs)
186191
logger.error(f"Exception posting media: {e}")
187192
dialog["body"] = ""
188193
else:

conserver/links/groq_whisper/__init__.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,7 @@ def run(
321321
vCon.uuid)
322322
continue
323323

324+
attrs = {"link.name": link_name, "vcon.uuid": vcon_uuid}
324325
try:
325326
# Attempt transcription with timing metrics
326327
start = time.time()
@@ -329,25 +330,26 @@ def run(
329330
result = transcribe_groq_whisper(dialog, opts)
330331
record_histogram(
331332
"conserver.link.groq_whisper.transcription_time",
332-
time.time() - start
333+
time.time() - start,
334+
attributes=attrs,
333335
)
334336
except RetryError as re:
335337
logger.error(
336338
"Failed to transcribe vCon %s after multiple retry attempts: %s",
337339
vcon_uuid, re)
338-
increment_counter("conserver.link.groq_whisper.transcription_failures")
340+
increment_counter("conserver.link.groq_whisper.transcription_failures", attributes=attrs)
339341
break
340342
except Exception as e:
341343
logger.error(
342344
"Unexpected error transcribing vCon %s: %s",
343345
vcon_uuid, e)
344-
increment_counter("conserver.link.groq_whisper.transcription_failures")
346+
increment_counter("conserver.link.groq_whisper.transcription_failures", attributes=attrs)
345347
break
346348

347349
if not result:
348350
logger.warning("No transcription generated for vCon %s", vcon_uuid)
349351
increment_counter(
350-
"conserver.link.groq_whisper.transcription_failures")
352+
"conserver.link.groq_whisper.transcription_failures", attributes=attrs)
351353
break
352354

353355
logger.info("Transcribed vCon: %s", vCon.uuid)
@@ -357,7 +359,7 @@ def run(
357359
# Check if result is a successful transcription
358360
if not hasattr(result, 'text'):
359361
logger.warning(f"Unexpected result format: {result}")
360-
increment_counter("conserver.link.groq_whisper.transcription_failures")
362+
increment_counter("conserver.link.groq_whisper.transcription_failures", attributes=attrs)
361363
break
362364

363365
# Handle different response formats from the Groq API

0 commit comments

Comments
 (0)