Skip to content

Commit cc51c22

Browse files
pavanputhraclaude
andcommitted
Fix Copilot review issues: fd leak, audio streaming, confidence handling, duplicate patch
- chatgpt_files: use context manager for file upload to close fd after upload - milvus: fix comment "opts/env" -> "provided in opts" - deepgram: stream audio to temp file instead of loading all into RAM - deepgram: omit fabricated confidence/detected_language from LiteLLM path; run() skips minimum_confidence threshold when confidence is absent (LiteLLM path); change low-confidence break -> continue so remaining dialogs still get transcribed - test_analyze_and_label: remove duplicate @patch decorator on test_run_analysis_exception that caused argument mismatch Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 27c3f60 commit cc51c22

4 files changed

Lines changed: 30 additions & 22 deletions

File tree

server/links/analyze_and_label/tests/test_analyze_and_label.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,6 @@ def test_run_json_parse_error(mock_sampling, mock_is_included, mock_generate_ana
344344
assert tags_attachment is None or len(tags_attachment["body"]) == 0
345345

346346

347-
@patch('server.links.analyze_and_label.get_openai_client')
348347
@patch('server.links.analyze_and_label.get_openai_client')
349348
@patch('server.links.analyze_and_label.generate_analysis_with_labels')
350349
@patch('server.links.analyze_and_label.is_included', return_value=True)

server/links/deepgram_link/__init__.py

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -49,30 +49,35 @@ def transcribe_via_litellm(url: str, opts: dict) -> Optional[dict]:
4949
if not litellm_url or not litellm_key:
5050
return None
5151
model = opts.get("model") or (opts.get("api") or {}).get("model", "nova-3")
52-
# Download audio
52+
# Download audio (stream to temp file to avoid loading large files into memory)
5353
audio_response = requests.get(url, stream=True, timeout=60)
5454
audio_response.raise_for_status()
55-
content = audio_response.content
56-
if not content:
57-
logger.warning("Empty audio content from %s", url)
58-
return None
59-
# OpenAI client expects a file-like object with optional .name
6055
ext = os.path.splitext(url.split("?")[0])[-1] or ".mp3"
6156
with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
62-
tmp.write(content)
57+
bytes_written = 0
58+
for chunk in audio_response.iter_content(chunk_size=8192):
59+
if chunk:
60+
tmp.write(chunk)
61+
bytes_written += len(chunk)
6362
tmp_path = tmp.name
63+
if bytes_written == 0:
64+
logger.warning("Empty audio content from %s", url)
65+
try:
66+
os.unlink(tmp_path)
67+
except OSError:
68+
pass
69+
return None
6470
try:
6571
client = get_openai_client(opts)
6672
with open(tmp_path, "rb") as f:
6773
response = client.audio.transcriptions.create(model=model, file=f)
6874
text = getattr(response, "text", None) or (response.model_dump().get("text") if hasattr(response, "model_dump") else str(response))
6975
if text is None:
7076
return None
71-
# Shape expected by rest of link (confidence not provided by OpenAI-format response)
77+
# confidence and detected_language are not available in the OpenAI-format response;
78+
# omit them so callers can skip confidence filtering instead of applying a fake threshold.
7279
return {
7380
"transcript": text,
74-
"confidence": 1.0,
75-
"detected_language": "en",
7681
}
7782
finally:
7883
try:
@@ -254,15 +259,18 @@ def run(
254259
increment_counter("conserver.link.deepgram.transcription_failures")
255260
break
256261

257-
# Log and track confidence
258-
record_histogram("conserver.link.deepgram.confidence", result["confidence"])
259-
logger.info(f"Transcription confidence for dialog {index}: {result['confidence']}")
260-
261-
# If the confidence is too low, don't store the transcript
262-
if result["confidence"] < opts["minimum_confidence"]:
263-
logger.warning("Low confidence result for vCon %s, dialog %s: %s", vcon_uuid, index, result["confidence"])
264-
increment_counter("conserver.link.deepgram.transcription_failures")
265-
break
262+
# Log and track confidence (not available for LiteLLM/OpenAI-format transcription)
263+
confidence = result.get("confidence")
264+
if confidence is not None:
265+
record_histogram("conserver.link.deepgram.confidence", confidence)
266+
logger.info(f"Transcription confidence for dialog {index}: {confidence}")
267+
# If the confidence is too low, don't store the transcript
268+
if confidence < opts["minimum_confidence"]:
269+
logger.warning("Low confidence result for vCon %s, dialog %s: %s", vcon_uuid, index, confidence)
270+
increment_counter("conserver.link.deepgram.transcription_failures")
271+
continue
272+
else:
273+
logger.info(f"Confidence not available for dialog {index} (LiteLLM path), skipping threshold check")
266274

267275
logger.info("Transcribed vCon: %s, dialog: %s", vCon.uuid, index)
268276

server/storage/chatgpt_files/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ def save(vcon_uuid: str, options: dict = default_options) -> None:
3030
with open(file_name, "w") as file:
3131
json.dump(vcon, file)
3232
client = get_openai_client(options)
33-
file = client.files.create(file=open(file_name, "rb"), purpose=options["purpose"])
33+
with open(file_name, "rb") as upload_file:
34+
file = client.files.create(file=upload_file, purpose=options["purpose"])
3435
os.remove(file_name)
3536
client.beta.vector_stores.files.create(vector_store_id=options["vector_store_id"], file_id=file.id)
3637
except Exception as error:

server/storage/milvus/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,7 @@ def save(vcon_uuid: str, opts=default_options) -> None:
446446
logger.info(f"vCon {vcon_uuid} already exists in Milvus collection {collection_name}, skipping")
447447
return
448448

449-
# Initialize OpenAI client (supports LiteLLM proxy via LITELLM_PROXY_URL + LITELLM_MASTER_KEY in opts/env)
449+
# Initialize OpenAI client (supports LiteLLM proxy via LITELLM_PROXY_URL + LITELLM_MASTER_KEY provided in opts)
450450
openai_client = get_openai_client(opts)
451451

452452
# Extract text content from vCon

0 commit comments

Comments
 (0)