Skip to content

Commit faa6304

Browse files
chore: fix scan issues in video search and summarization stack (open-edge-platform#1959)
1 parent 9d7c7d3 commit faa6304

File tree

56 files changed

+5174
-4322
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+5174
-4322
lines changed

microservices/audio-analyzer/audio_analyzer/api/endpoints/transcription.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from audio_analyzer.utils.file_utils import get_file_duration
1919
from audio_analyzer.utils.validation import RequestValidation
2020
from audio_analyzer.utils.transcription_utils import get_video_path, store_transcript_output
21-
from audio_analyzer.utils.logger import logger
21+
from audio_analyzer.utils.logger import logger, sanitize_for_log
2222

2323
router = APIRouter()
2424

@@ -63,7 +63,12 @@ async def transcribe_video(
6363
RequestValidation.validate_form_data(request)
6464

6565
logger.info(f"Received transcription request for {'file upload' if request.file else 'MinIO video'}")
66-
logger.debug(f"Transcription parameters: model={request.model_name}, device={request.device}, language={language}")
66+
logger.debug(
67+
"Transcription parameters: model=%s, device=%s, language_provided=%s",
68+
sanitize_for_log(request.model_name),
69+
sanitize_for_log(request.device),
70+
language is not None,
71+
)
6772

6873
# Get video path either from direct upload or MinIO
6974
video_path, filename = await get_video_path(request)

microservices/audio-analyzer/audio_analyzer/core/transcriber.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from audio_analyzer.core.settings import settings
1414
from audio_analyzer.schemas.types import DeviceType, WhisperModel, TranscriptionBackend
1515
from audio_analyzer.utils.hardware_utils import is_intel_gpu_available
16-
from audio_analyzer.utils.logger import logger
16+
from audio_analyzer.utils.logger import logger, sanitize_for_log
1717
from audio_analyzer.utils.model_manager import ModelManager
1818

1919
class TranscriptionService:
@@ -191,7 +191,12 @@ async def transcribe(
191191
Tuple containing the job ID and path to the transcription file
192192
"""
193193
logger.info(f"Starting transcription for audio: {audio_path}")
194-
logger.debug(f"Transcription parameters - language: {language}, include_timestamps: {include_timestamps}, video_duration: {video_duration}")
194+
logger.debug(
195+
"Transcription parameters - language_provided: %s, include_timestamps: %s, video_duration: %s",
196+
language is not None,
197+
sanitize_for_log(include_timestamps),
198+
sanitize_for_log(video_duration),
199+
)
195200

196201
try:
197202
self._load_model()
@@ -271,7 +276,7 @@ async def _transcribe_with_whisper_cpp(
271276

272277
if lang_code:
273278
params["language"] = lang_code
274-
logger.debug(f"Set language to: {lang_code}")
279+
logger.debug("Language override configured for whispercpp transcription")
275280

276281
# Calculate optimal number of processors based on video duration and core count
277282
# Each processor will handle at least 1 minute (60 seconds) of audio

microservices/audio-analyzer/audio_analyzer/utils/logger.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,24 @@
22
# SPDX-License-Identifier: Apache-2.0
33

44
import logging
5+
import re
56
import sys
67
from typing import Optional
78

89
from audio_analyzer.core.settings import settings
910

11+
_SAFE_LOG_PATTERN = re.compile(r"[\r\n\t\x00-\x1f\x7f]+")
12+
13+
14+
def sanitize_for_log(value, max_len: int = 1024) -> str:
15+
"""Return a compact, single-line representation for safe logging."""
16+
text = "" if value is None else str(value)
17+
text = _SAFE_LOG_PATTERN.sub(" ", text)
18+
text = " ".join(text.split())
19+
if len(text) > max_len:
20+
return text[: max_len - 3] + "..."
21+
return text
22+
1023

1124
def setup_logger(name: Optional[str] = None) -> logging.Logger:
1225
"""

microservices/audio-analyzer/docker/Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ ENV PATH="$PATH:$POETRY_HOME/bin:$VENV_PATH/bin"
3333
ENV HOME=/home/appuser
3434

3535
# Install curl to be used for health checks in final stages
36-
RUN apt update -y && apt install -y curl
36+
RUN apt-get update -y && apt-get install --no-install-recommends -y curl && rm -rf /var/lib/apt/lists/*
3737
# Upgrade pip to the latest version
3838
RUN pip install --upgrade pip
3939

@@ -141,7 +141,7 @@ RUN if [ "$COPYLEFT_SOURCES" = "true" ]; then \
141141
# Get source code for installed Python packages with copyleft licenses \
142142
mkdir -p /copyleft_sources/python && \
143143
cd /copyleft_sources/python && \
144-
apt-get update && apt-get install -y gcc build-essential libffi-dev python3-dev && \
144+
apt-get update && apt-get install --no-install-recommends -y gcc build-essential libffi-dev python3-dev && \
145145
# Download python package sources with relevant licenses \
146146
pip3 freeze | cut -d= -f1 | while read pkg; do \
147147
meta=$(pip3 show $pkg 2>/dev/null); \

microservices/audio-analyzer/poetry.lock

Lines changed: 27 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

microservices/audio-analyzer/pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ gunicorn = "^23.0.0"
1414
python-multipart = "^0.0.22"
1515
pydantic = "^2.11.5"
1616
pydantic-settings = "^2.9.1"
17-
setuptools = "^80.9.0"
17+
setuptools = "^82.0.1"
1818
transformers = "^4.51.3"
1919
pywhispercpp = { git = "https://github.com/absadiki/pywhispercpp.git", tag = "v1.4.0" }
2020
moviepy = "^2.2.1"
@@ -30,6 +30,7 @@ torchvision = { version = "^0.22.0", source = "pytorch-cpu" }
3030
urllib3 = "^2.6.3"
3131
protobuf = "^6.33.5"
3232
filelock = "^3.20.3"
33+
wheel = "^0.46.3"
3334

3435

3536
[tool.poetry.group.dev.dependencies]

microservices/multimodal-embedding-serving/docker/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ RUN if [ "$COPYLEFT_SOURCES" = "true" ]; then \
8282
# Get source code for installed Python packages with copyleft licenses \
8383
mkdir -p /copyleft_sources/python && \
8484
cd /copyleft_sources/python && \
85-
apt-get update && apt-get install -y gcc build-essential libffi-dev python3-dev && \
85+
apt-get update && apt-get install --no-install-recommends -y gcc build-essential libffi-dev python3-dev && \
8686
# Download python package sources with relevant licenses \
8787
pip3 freeze | cut -d= -f1 | while read pkg; do \
8888
meta=$(pip3 show $pkg 2>/dev/null); \

0 commit comments

Comments
 (0)