etalab-ia
diff --git a/‎.github/workflows/docker.yml‎
Lines changed: 0 additions & 1 deletion b/‎.github/workflows/docker.yml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 33 additions & 2 deletions b/‎README.md‎
Lines changed: 33 additions & 2 deletions
diff --git a/‎app/Dockerfile‎
Lines changed: 1 addition & 1 deletion b/‎app/Dockerfile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎app/endpoints/audio.py‎
Lines changed: 7 additions & 1 deletion b/‎app/endpoints/audio.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎app/main.py‎
Lines changed: 8 additions & 0 deletions b/‎app/main.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎app/tests/conftest.py‎
Lines changed: 23 additions & 80 deletions b/‎app/tests/conftest.py‎
Lines changed: 23 additions & 80 deletions
@@ -4,7 +4,6 @@ on:
   push:
     branches:
       - main
-      - feat/make-diarization-optional # tmp for dev
   workflow_dispatch:
 
 jobs:
 
@@ -33,6 +33,36 @@ export LOGGING_CONFIG=logging-config.yaml
 python app/main.py
 ```
 
+## Deployment
+
+### Build the image
+
+```bash
+docker build -f app/Dockerfile -t whisperx-openai-api .
+```
+
+### Run
+
+```bash
+docker run -d \
+  --gpus all \
+  -p 8000:8000 \
+  -e API_KEY=your-api-key \
+  -e HF_TOKEN=your-hf-token \
+  -v /path/to/models:/data/models \
+  whisperx-openai-api
+```
+
+Models are downloaded on first startup and cached in `/data/models`. Mount a persistent volume to avoid re-downloading on restart.
+
+**1 worker is recommended.** GPU inference is serialized internally : multiple workers each load a full model copy in VRAM, and it doesn't improve throughput unless you have multiple GPUs.
+
+To scale workers (each worker loads its own model in VRAM):
+
+```bash
+docker run -d --gpus all ... -e WORKERS=2 whisperx-openai-api
+```
+
 ## Testing
 
 Tests mock actual inference and can be run locally:
@@ -52,7 +82,7 @@ Check the [documentation to run integration tests](docs/testing_with_gpu.md) on
 | -------- | ----------- | ------- |
 | API_KEY | API key for API access | Required |
 | HF_TOKEN | Hugging Face token (required for diarization) | Required |
-| TRANSCRIBE_MODEL | WhisperX model to load | `large-v2` |
+| TRANSCRIBE_MODEL | WhisperX model to load | `large-v3-turbo` |
 | BATCH_SIZE | Transcription batch size | `16` |
 | DIARIZE_MODEL | Pyannote diarization model | `pyannote/speaker-diarization-community-1` |
 | PRELOADED_ALIGN_MODEL_LANGUAGES | Languages to pre-load alignment models for | `["en", "fr", "nl", "de"]` |
@@ -61,8 +91,9 @@ Check the [documentation to run integration tests](docs/testing_with_gpu.md) on
 | FILL_NEAREST | Fill nearest gaps in speaker assignment (diarization only) | `false` |
 | TIMEOUT_KEEP_ALIVE | Keep-alive timeout (seconds) | `60` |
 | PORT | Server port | `8000` |
+| WORKERS | Number of uvicorn workers (each loads its own model in VRAM) | `1` |
 | RELOAD | Enable auto-reload | `false` |
 | ROOT_PATH | API root path | `None` |
-| LOGGING_CONFIG | Path to logging config file | `None` |
+| LOGGING_CONFIG | Path to logging config file | `logging-config.yaml` |
 | DEBUG | Enable debug logging | `false` |
 
@@ -75,4 +75,4 @@ COPY --chown=whisperuser:whisperuser ./logging-config.yaml /app/logging-config.y
 
 USER whisperuser
 
-CMD ["python3", "main.py"]
+ENTRYPOINT ["sh", "-c", "gunicorn main:app --workers ${WORKERS:-1} --worker-class uvicorn.workers.UvicornWorker --timeout 120 --bind 0.0.0.0:${PORT:-8000}"]
@@ -1,7 +1,9 @@
+import asyncio
 import logging
 import os
 import tempfile
 import time
+from functools import partial
 from typing import Annotated, Optional
 
 import numpy as np
@@ -16,6 +18,7 @@
     UploadFile,
 )
 from services.transcription import transcribe
+from utils.lifespan import gpu_executor
 import whisperx
 
 from schemas.audio import AudioTranscription, InputTokenDetails, Segment, Usage
@@ -122,6 +125,9 @@ async def audio_transcriptions(
     audio = whisperx.load_audio(temp_file_path)
     os.remove(temp_file_path)
 
-    result = transcribe(audio, settings, language, is_diarize=is_diarize)
+    loop = asyncio.get_event_loop()
+    result = await loop.run_in_executor(
+        gpu_executor, partial(transcribe, audio, settings, language, is_diarize=is_diarize)
+    )
 
     return _build_response(result, audio, is_diarize=is_diarize)
@@ -1,12 +1,19 @@
+import logging
+import logging.config
 from typing import Annotated
 
 from fastapi import Depends, FastAPI
 import uvicorn
+import yaml
 
 from endpoints import audio, models, monitoring
 from utils.config import Settings, get_settings, settings
 from utils.lifespan import lifespan
 
+if settings.logging_config:
+    with open(settings.logging_config) as f:
+        logging.config.dictConfig(yaml.safe_load(f))
+
 # Setup FastAPI
 app = FastAPI(
     title="LaSuite Meet WhisperX",
@@ -39,4 +46,5 @@ async def info(settings: Annotated[Settings, Depends(get_settings)]):
         log_config=settings.logging_config,
         reload=settings.reload,
         timeout_keep_alive=settings.timeout_keep_alive,
+        workers=settings.workers,
     )
@@ -2,115 +2,58 @@
 import sys
 from unittest.mock import MagicMock, patch
 
-# ---------------------------------------------------------------------------
-# 1. Mock heavy third-party libraries in sys.modules BEFORE any app import.
-#    app modules import whisperx, torch, numpy at the top level.
-# ---------------------------------------------------------------------------
-_mock_np = MagicMock()
+import pytest
+
+# Mock libs before import
 _mock_torch = MagicMock()
 _mock_torch.cuda.is_available.return_value = False
 _mock_torch.float32 = "float32"
 
 _mock_whisperx = MagicMock()
-FAKE_LANGUAGES = {"en": "english", "fr": "french", "cz": "czech"}
-FAKE_ALIGN_MODELS_HF = {"en": "WAV2VEC2_ASR_BASE_960H", "fr": "some-fr-model"}
-FAKE_ALIGN_MODELS_TORCH = {"en": "WAV2VEC2_ASR_BASE_960H"}
-
-_mock_whisperx.utils.LANGUAGES = FAKE_LANGUAGES
-_mock_whisperx.alignment.DEFAULT_ALIGN_MODELS_HF = FAKE_ALIGN_MODELS_HF
-_mock_whisperx.alignment.DEFAULT_ALIGN_MODELS_TORCH = FAKE_ALIGN_MODELS_TORCH
+_mock_whisperx.utils.LANGUAGES = {"en": "english", "fr": "french", "cz": "czech"}
+_mock_whisperx.alignment.DEFAULT_ALIGN_MODELS_HF = {"en": "...", "fr": "..."}
+_mock_whisperx.alignment.DEFAULT_ALIGN_MODELS_TORCH = {}
 
-for mod_name, mock_obj in {
-    "numpy": _mock_np,
-    "np": _mock_np,
+for name, mock in {
+    "numpy": MagicMock(),
     "torch": _mock_torch,
     "whisperx": _mock_whisperx,
     "whisperx.utils": _mock_whisperx.utils,
     "whisperx.alignment": _mock_whisperx.alignment,
     "whisperx.asr": _mock_whisperx.asr,
     "whisperx.diarize": _mock_whisperx.diarize,
 }.items():
-    sys.modules.setdefault(mod_name, mock_obj)
+    sys.modules.setdefault(name, mock)
 
-# ---------------------------------------------------------------------------
-# 2. Set required env vars BEFORE any app module is imported.
-#    config.py and security.py evaluate settings at module level.
-# ---------------------------------------------------------------------------
-TEST_API_KEY = "test-api-key"
-TEST_HF_TOKEN = "test-hf-token"
+os.environ.setdefault("API_KEY", "test-key")
+os.environ.setdefault("HF_TOKEN", "test-token")
 
-os.environ.setdefault("API_KEY", TEST_API_KEY)
-os.environ.setdefault("HF_TOKEN", TEST_HF_TOKEN)
+# App imports after mocking)
 
-# ---------------------------------------------------------------------------
-# 3. Now it is safe to import app modules.
-# ---------------------------------------------------------------------------
 from fastapi import FastAPI  # noqa: E402
 from fastapi.testclient import TestClient  # noqa: E402
-import pytest  # noqa: E402
 
 from endpoints import audio  # noqa: E402
 from utils.config import Settings, get_settings  # noqa: E402
 from utils.security import check_api_key  # noqa: E402
 
-_FAKE_WORDS = [
-    {"word": "Hello", "start": 0.0, "end": 0.7, "score": 0.95},
-    {"word": "world.", "start": 0.8, "end": 1.5, "score": 0.92},
-]
-
-MOCK_TRANSCRIPTION_RESULT = {
-    "segments": [
-        {"start": 0.0, "end": 1.5, "text": "Hello world.", "words": _FAKE_WORDS}
-    ],
-    "word_segments": _FAKE_WORDS,
+FAKE_TRANSCRIPTION = {
+    "segments": [{"start": 0.0, "end": 1.5, "text": "Hello world.", "speaker": "SPEAKER_00"}],
 }
 
-FAKE_AUDIO = MagicMock(name="fake_audio_array")
-
-
-def _test_settings() -> Settings:
-    return Settings(
-        api_key=TEST_API_KEY,
-        hf_token=TEST_HF_TOKEN,
-        transcribe_model="large-v2",
-        batch_size=4,
-    )
-
 
 @pytest.fixture()
 def client():
-    """TestClient with dependency overrides and lifespan disabled."""
     app = FastAPI()
     app.include_router(audio.router, prefix="/v1")
+    app.dependency_overrides[get_settings] = lambda: Settings(
+        api_key="test-key", hf_token="test-token", transcribe_model="large-v3-turbo"
+    )
+    app.dependency_overrides[check_api_key] = lambda: "test-key"
 
-    app.dependency_overrides[get_settings] = _test_settings
-    app.dependency_overrides[check_api_key] = lambda: TEST_API_KEY
-
-    with TestClient(app) as c:
+    with (
+        patch.object(audio.whisperx, "load_audio", return_value=MagicMock()),
+        patch.object(audio, "transcribe", return_value=FAKE_TRANSCRIPTION),
+        TestClient(app) as c,
+    ):
         yield c
-
-
-@pytest.fixture()
-def mock_whisperx():
-    """Patch whisperx symbols used directly in the endpoint module."""
-    with patch.object(
-        audio.whisperx, "load_audio", return_value=FAKE_AUDIO
-    ) as load_audio:
-        yield {"load_audio": load_audio, "fake_audio": FAKE_AUDIO}
-
-
-@pytest.fixture()
-def mock_transcribe():
-    """Patch the transcribe service function as imported in the endpoint module."""
-    with patch.object(
-        audio,
-        "transcribe",
-        return_value=MOCK_TRANSCRIPTION_RESULT,
-    ) as mock:
-        yield mock
-
-
-@pytest.fixture()
-def sample_audio_bytes() -> bytes:
-    """Minimal bytes to simulate an uploaded audio file."""
-    return b"\x00" * 1024
Original file line number	Diff line number	Diff line change
`@@ -75,4 +75,4 @@ COPY --chown=whisperuser:whisperuser ./logging-config.yaml /app/logging-config.y`
`75`	`75`
`76`	`76`	`USER whisperuser`
`77`	`77`
`78`		`-CMD ["python3", "main.py"]`
	`78`	`+ENTRYPOINT ["sh", "-c", "gunicorn main:app --workers ${WORKERS:-1} --worker-class uvicorn.workers.UvicornWorker --timeout 120 --bind 0.0.0.0:${PORT:-8000}"]`