Fix Copilot review issues: fd leak, audio streaming, confidence handling, duplicate patch

pavanputhra · claude · pavanputhra · commit cc51c223bd11 · 2026-03-22T16:39:55.000+05:30
- chatgpt_files: use context manager for file upload to close fd after upload - milvus: fix comment "opts/env" -> "provided in opts" - deepgram: stream audio to temp file instead of loading all into RAM - deepgram: omit fabricated confidence/detected_language from LiteLLM path; run() skips minimum_confidence threshold when confidence is absent (LiteLLM path); change low-confidence break -> continue so remaining dialogs still get transcribed - test_analyze_and_label: remove duplicate @patch decorator on test_run_analysis_exception that caused argument mismatch Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
diff --git a/server/links/analyze_and_label/tests/test_analyze_and_label.py b/server/links/analyze_and_label/tests/test_analyze_and_label.py
@@ -344,7 +344,6 @@ def test_run_json_parse_error(mock_sampling, mock_is_included, mock_generate_ana
     assert tags_attachment is None or len(tags_attachment["body"]) == 0
 
 
-@patch('server.links.analyze_and_label.get_openai_client')
 @patch('server.links.analyze_and_label.get_openai_client')
 @patch('server.links.analyze_and_label.generate_analysis_with_labels')
 @patch('server.links.analyze_and_label.is_included', return_value=True)
diff --git a/server/links/deepgram_link/__init__.py b/server/links/deepgram_link/__init__.py
@@ -49,30 +49,35 @@ def transcribe_via_litellm(url: str, opts: dict) -> Optional[dict]:
     if not litellm_url or not litellm_key:
         return None
     model = opts.get("model") or (opts.get("api") or {}).get("model", "nova-3")
-    # Download audio
+    # Download audio (stream to temp file to avoid loading large files into memory)
     audio_response = requests.get(url, stream=True, timeout=60)
     audio_response.raise_for_status()
-    content = audio_response.content
-    if not content:
-        logger.warning("Empty audio content from %s", url)
-        return None
-    # OpenAI client expects a file-like object with optional .name
     ext = os.path.splitext(url.split("?")[0])[-1] or ".mp3"
     with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
-        tmp.write(content)
+        bytes_written = 0
+        for chunk in audio_response.iter_content(chunk_size=8192):
+            if chunk:
+                tmp.write(chunk)
+                bytes_written += len(chunk)
         tmp_path = tmp.name
+    if bytes_written == 0:
+        logger.warning("Empty audio content from %s", url)
+        try:
+            os.unlink(tmp_path)
+        except OSError:
+            pass
+        return None
     try:
         client = get_openai_client(opts)
         with open(tmp_path, "rb") as f:
             response = client.audio.transcriptions.create(model=model, file=f)
         text = getattr(response, "text", None) or (response.model_dump().get("text") if hasattr(response, "model_dump") else str(response))
         if text is None:
             return None
-        # Shape expected by rest of link (confidence not provided by OpenAI-format response)
+        # confidence and detected_language are not available in the OpenAI-format response;
+        # omit them so callers can skip confidence filtering instead of applying a fake threshold.
         return {
             "transcript": text,
-            "confidence": 1.0,
-            "detected_language": "en",
         }
     finally:
         try:
@@ -254,15 +259,18 @@ def run(
             increment_counter("conserver.link.deepgram.transcription_failures")
             break
 
-        # Log and track confidence
-        record_histogram("conserver.link.deepgram.confidence", result["confidence"])
-        logger.info(f"Transcription confidence for dialog {index}: {result['confidence']}")
-
-        # If the confidence is too low, don't store the transcript
-        if result["confidence"] < opts["minimum_confidence"]:
-            logger.warning("Low confidence result for vCon %s, dialog %s: %s", vcon_uuid, index, result["confidence"])
-            increment_counter("conserver.link.deepgram.transcription_failures")
-            break
+        # Log and track confidence (not available for LiteLLM/OpenAI-format transcription)
+        confidence = result.get("confidence")
+        if confidence is not None:
+            record_histogram("conserver.link.deepgram.confidence", confidence)
+            logger.info(f"Transcription confidence for dialog {index}: {confidence}")
+            # If the confidence is too low, don't store the transcript
+            if confidence < opts["minimum_confidence"]:
+                logger.warning("Low confidence result for vCon %s, dialog %s: %s", vcon_uuid, index, confidence)
+                increment_counter("conserver.link.deepgram.transcription_failures")
+                continue
+        else:
+            logger.info(f"Confidence not available for dialog {index} (LiteLLM path), skipping threshold check")
 
         logger.info("Transcribed vCon: %s, dialog: %s", vCon.uuid, index)
 
diff --git a/server/storage/chatgpt_files/__init__.py b/server/storage/chatgpt_files/__init__.py
@@ -30,7 +30,8 @@ def save(vcon_uuid: str, options: dict = default_options) -> None:
         with open(file_name, "w") as file:
             json.dump(vcon, file)
         client = get_openai_client(options)
-        file = client.files.create(file=open(file_name, "rb"), purpose=options["purpose"])
+        with open(file_name, "rb") as upload_file:
+            file = client.files.create(file=upload_file, purpose=options["purpose"])
         os.remove(file_name)
         client.beta.vector_stores.files.create(vector_store_id=options["vector_store_id"], file_id=file.id)
     except Exception as error:
diff --git a/server/storage/milvus/__init__.py b/server/storage/milvus/__init__.py
@@ -446,7 +446,7 @@ def save(vcon_uuid: str, opts=default_options) -> None:
                 logger.info(f"vCon {vcon_uuid} already exists in Milvus collection {collection_name}, skipping")
                 return
         
-        # Initialize OpenAI client (supports LiteLLM proxy via LITELLM_PROXY_URL + LITELLM_MASTER_KEY in opts/env)
+        # Initialize OpenAI client (supports LiteLLM proxy via LITELLM_PROXY_URL + LITELLM_MASTER_KEY provided in opts)
         openai_client = get_openai_client(opts)
 
         # Extract text content from vCon