hteeyeoh
diff --git a/‎microservices/audio-analyzer/audio_analyzer/api/endpoints/transcription.py‎
Lines changed: 7 additions & 2 deletions b/‎microservices/audio-analyzer/audio_analyzer/api/endpoints/transcription.py‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎microservices/audio-analyzer/audio_analyzer/core/transcriber.py‎
Lines changed: 8 additions & 3 deletions b/‎microservices/audio-analyzer/audio_analyzer/core/transcriber.py‎
Lines changed: 8 additions & 3 deletions
diff --git a/‎microservices/audio-analyzer/audio_analyzer/utils/logger.py‎
Lines changed: 13 additions & 0 deletions b/‎microservices/audio-analyzer/audio_analyzer/utils/logger.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎microservices/audio-analyzer/docker/Dockerfile‎
Lines changed: 2 additions & 2 deletions b/‎microservices/audio-analyzer/docker/Dockerfile‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎microservices/audio-analyzer/poetry.lock‎
Lines changed: 27 additions & 9 deletions b/‎microservices/audio-analyzer/poetry.lock‎
Lines changed: 27 additions & 9 deletions
diff --git a/‎microservices/audio-analyzer/pyproject.toml‎
Lines changed: 2 additions & 1 deletion b/‎microservices/audio-analyzer/pyproject.toml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎microservices/multimodal-embedding-serving/docker/Dockerfile‎
Lines changed: 1 addition & 1 deletion b/‎microservices/multimodal-embedding-serving/docker/Dockerfile‎
Lines changed: 1 addition & 1 deletion
@@ -18,7 +18,7 @@
 from audio_analyzer.utils.file_utils import get_file_duration
 from audio_analyzer.utils.validation import RequestValidation
 from audio_analyzer.utils.transcription_utils import get_video_path, store_transcript_output
-from audio_analyzer.utils.logger import logger
+from audio_analyzer.utils.logger import logger, sanitize_for_log
 
 router = APIRouter()
 
@@ -63,7 +63,12 @@ async def transcribe_video(
         RequestValidation.validate_form_data(request)
 
         logger.info(f"Received transcription request for {'file upload' if request.file else 'MinIO video'}")
-        logger.debug(f"Transcription parameters: model={request.model_name}, device={request.device}, language={language}")
+        logger.debug(
+            "Transcription parameters: model=%s, device=%s, language_provided=%s",
+            sanitize_for_log(request.model_name),
+            sanitize_for_log(request.device),
+            language is not None,
+        )
 
         # Get video path either from direct upload or MinIO
         video_path, filename = await get_video_path(request)
 
@@ -13,7 +13,7 @@
 from audio_analyzer.core.settings import settings
 from audio_analyzer.schemas.types import DeviceType, WhisperModel, TranscriptionBackend
 from audio_analyzer.utils.hardware_utils import is_intel_gpu_available
-from audio_analyzer.utils.logger import logger
+from audio_analyzer.utils.logger import logger, sanitize_for_log
 from audio_analyzer.utils.model_manager import ModelManager
 
 class TranscriptionService:
@@ -191,7 +191,12 @@ async def transcribe(
             Tuple containing the job ID and path to the transcription file
         """
         logger.info(f"Starting transcription for audio: {audio_path}")
-        logger.debug(f"Transcription parameters - language: {language}, include_timestamps: {include_timestamps}, video_duration: {video_duration}")
+        logger.debug(
+            "Transcription parameters - language_provided: %s, include_timestamps: %s, video_duration: %s",
+            language is not None,
+            sanitize_for_log(include_timestamps),
+            sanitize_for_log(video_duration),
+        )
 
         try:
             self._load_model()
@@ -271,7 +276,7 @@ async def _transcribe_with_whisper_cpp(
 
             if lang_code:
                 params["language"] = lang_code
-                logger.debug(f"Set language to: {lang_code}")
+                logger.debug("Language override configured for whispercpp transcription")
 
             # Calculate optimal number of processors based on video duration and core count
             # Each processor will handle at least 1 minute (60 seconds) of audio
 
@@ -2,11 +2,24 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import logging
+import re
 import sys
 from typing import Optional
 
 from audio_analyzer.core.settings import settings
 
+_SAFE_LOG_PATTERN = re.compile(r"[\r\n\t\x00-\x1f\x7f]+")
+
+
+def sanitize_for_log(value, max_len: int = 1024) -> str:
+    """Return a compact, single-line representation for safe logging."""
+    text = "" if value is None else str(value)
+    text = _SAFE_LOG_PATTERN.sub(" ", text)
+    text = " ".join(text.split())
+    if len(text) > max_len:
+        return text[: max_len - 3] + "..."
+    return text
+
 
 def setup_logger(name: Optional[str] = None) -> logging.Logger:
     """
 
@@ -33,7 +33,7 @@ ENV PATH="$PATH:$POETRY_HOME/bin:$VENV_PATH/bin"
 ENV HOME=/home/appuser
 
 # Install curl to be used for health checks in final stages
-RUN apt update -y && apt install -y curl
+RUN apt-get update -y && apt-get install --no-install-recommends -y curl && rm -rf /var/lib/apt/lists/*
 # Upgrade pip to the latest version
 RUN pip install --upgrade pip 
 
@@ -141,7 +141,7 @@ RUN if [ "$COPYLEFT_SOURCES" = "true" ]; then \
         # Get source code for installed Python packages with copyleft licenses \
         mkdir -p /copyleft_sources/python && \
         cd /copyleft_sources/python && \
-        apt-get update && apt-get install -y gcc build-essential libffi-dev python3-dev && \
+        apt-get update && apt-get install --no-install-recommends -y gcc build-essential libffi-dev python3-dev && \
         # Download python package sources with relevant licenses \
         pip3 freeze | cut -d= -f1 | while read pkg; do \
             meta=$(pip3 show $pkg 2>/dev/null); \
 
@@ -14,7 +14,7 @@ gunicorn = "^23.0.0"
 python-multipart = "^0.0.22"
 pydantic = "^2.11.5"
 pydantic-settings = "^2.9.1"
-setuptools = "^80.9.0"
+setuptools = "^82.0.1"
 transformers = "^4.51.3"
 pywhispercpp = { git = "https://github.com/absadiki/pywhispercpp.git", tag = "v1.4.0" }
 moviepy = "^2.2.1"
@@ -30,6 +30,7 @@ torchvision = { version = "^0.22.0", source = "pytorch-cpu" }
 urllib3 = "^2.6.3"
 protobuf = "^6.33.5"
 filelock = "^3.20.3"
+wheel = "^0.46.3"
 
 
 [tool.poetry.group.dev.dependencies]
 
@@ -82,7 +82,7 @@ RUN if [ "$COPYLEFT_SOURCES" = "true" ]; then \
         # Get source code for installed Python packages with copyleft licenses \
         mkdir -p /copyleft_sources/python && \
         cd /copyleft_sources/python && \
-        apt-get update && apt-get install -y gcc build-essential libffi-dev python3-dev && \
+        apt-get update && apt-get install --no-install-recommends -y gcc build-essential libffi-dev python3-dev && \
         # Download python package sources with relevant licenses \
         pip3 freeze | cut -d= -f1 | while read pkg; do \
             meta=$(pip3 show $pkg 2>/dev/null); \