fix(audio): fix request_format for Albert integration (#665)

leoguillaume · web-flow · commit f6544fa12236 · 2026-01-29T09:10:28.000+01:00
* fix(tests): audio transcription and chat tests

* Update unit coverage badge

---------

Co-authored-by: leoguillaume &lt;leoguillaume@users.noreply.github.com&gt;
diff --git a/.github/.env.ci.example b/.github/.env.ci.example
@@ -6,10 +6,11 @@
 # CELERY_EXTRA_ARGS="--loglevel DEBUG"
 
 # Dependencies
-POSTGRES_PORT=5432
-REDIS_PORT=6379
-ELASTICSEARCH_PORT=9200
-RABBITMQ_PORT=5672
+POSTGRES_PORT=15432
+REDIS_PORT=16379
+ELASTICSEARCH_PORT=19200
+RABBITMQ_PORT=25672
+RABBITMQ_UI_PORT=15672
 
 # Secrets (to complete)
 # ALBERT_API_KEY=
diff --git a/.github/badges/coverage.json b/.github/badges/coverage.json
@@ -1 +1 @@
-{"schemaVersion":1,"label":"coverage","message":"51.05%","color":"red"}
+{"schemaVersion":1,"label":"coverage","message":"51.16%","color":"red"}
diff --git a/.github/compose.ci.yml b/.github/compose.ci.yml
@@ -34,7 +34,7 @@ services:
     image: rabbitmq:3.13-management
     restart: always
     ports:
-      - "15672:15672"
+      - "${RABBITMQ_UI_PORT:-15672}:15672"
       - "${RABBITMQ_PORT:-5672}:5672"
     environment:
       - RABBITMQ_DEFAULT_USER=rabbitmq
diff --git a/api/endpoints/audio.py b/api/endpoints/audio.py
@@ -1,17 +1,14 @@
 from contextvars import ContextVar
-from typing import Literal
+from typing import Annotated
 
-from fastapi import APIRouter, Depends, File, Form, Request, Security, UploadFile
+from fastapi import APIRouter, Depends, Form, Request, Security
 from fastapi.responses import JSONResponse, PlainTextResponse
 from redis.asyncio import Redis as AsyncRedis
 from sqlalchemy.ext.asyncio import AsyncSession
 
 from api.helpers._accesscontroller import AccessController
 from api.helpers.models import ModelRegistry
-from api.schemas.audio import (
-    AudioTranscription,
-    AudioTranscriptionLanguage,
-)
+from api.schemas.audio import AudioTranscription, CreateAudioTranscription
 from api.schemas.core.context import RequestContext
 from api.schemas.core.models import RequestContent
 from api.utils.dependencies import get_model_registry, get_postgres_session, get_redis_client, get_request_context
@@ -20,16 +17,12 @@
 router = APIRouter(prefix="/v1", tags=[ROUTER__AUDIO.title()])
 
 
-# fmt: off
-@router.post(path=ENDPOINT__AUDIO_TRANSCRIPTIONS, dependencies=[Security(dependency=AccessController())], status_code=200, response_model=AudioTranscription)
+@router.post(
+    path=ENDPOINT__AUDIO_TRANSCRIPTIONS, dependencies=[Security(dependency=AccessController())], status_code=200, response_model=AudioTranscription
+)
 async def audio_transcriptions(
     request: Request,
-    file: UploadFile = File(description="The audio file object (not file name) to transcribe, in one of these formats: mp3 or wav."),
-    model: str = Form(default=..., description="ID of the model to use. Call `/v1/models` endpoint to get the list of available models, only `automatic-speech-recognition` model type is supported."),
-    language: AudioTranscriptionLanguage = Form(default=AudioTranscriptionLanguage.EMPTY, description="The language of the output audio. If the output language is different than the audio language, the audio language will be translated into the output language. Supplying the output language in ISO-639-1 (e.g. en, fr) format will improve accuracy and latency."),
-    prompt: str | None = Form(default=None, description="An optional text to tell the model what to do with the input audio. Default is `Transcribe this audio in this language : {language}`"),
-    response_format: Literal["json", "text"] = Form(default="json", description="The format of the transcript output, in one of these formats: `json` or `text`."),
-    temperature: float | None = Form(default=None, ge=0, le=1, description="The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit."),
+    data: Annotated[CreateAudioTranscription, Form()],
     model_registry: ModelRegistry = Depends(get_model_registry),
     redis_client: AsyncRedis = Depends(get_redis_client),
     postgres_session: AsyncSession = Depends(get_postgres_session),
@@ -39,28 +32,29 @@ async def audio_transcriptions(
     Transcribes audio into the input language.
     """
     model_provider = await model_registry.get_model_provider(
-        model=model,
+        model=data.model,
         endpoint=ENDPOINT__AUDIO_TRANSCRIPTIONS,
         postgres_session=postgres_session,
         redis_client=redis_client,
-        request_context=request_context
+        request_context=request_context,
     )
 
-    file_content = await file.read()
-    form = {"model": model, "response_format": response_format, "temperature": temperature, "language": language.value, "prompt": prompt}
+    file_content = await data.file.read()
+    form = data.model_dump()
+    form.pop("file")
 
     response = await model_provider.forward_request(
         request_content=RequestContent(
             method="POST",
-            model=model,
+            model=data.model,
             endpoint=ENDPOINT__AUDIO_TRANSCRIPTIONS,
-            files={"file": (file.filename, file_content, file.content_type)},
+            files={"file": (data.file.filename, file_content, data.file.content_type)},
             form=form,
         ),
         redis_client=redis_client,
     )
 
-    if response_format == "text":
+    if data.response_format == "text":
         response = PlainTextResponse(content=response.json()["text"], status_code=response.status_code)
     else:
         response = JSONResponse(content=AudioTranscription(**response.json()).model_dump(), status_code=response.status_code)
diff --git a/api/schemas/audio.py b/api/schemas/audio.py
@@ -1,8 +1,10 @@
 import base64
 from enum import Enum
+from typing import Literal
 
+from fastapi import File, UploadFile
 from mistralai.models import AudioChunk, ChatCompletionRequest, TextChunk, UserMessage
-from pydantic import Field
+from pydantic import Field, field_validator
 
 from api.schemas import BaseModel
 from api.schemas.admin.providers import ProviderType
@@ -18,6 +20,17 @@
 
 
 class CreateAudioTranscription(BaseModel):
+    file: UploadFile = File(description="The audio file object (not file name) to transcribe, in one of these formats: mp3 or wav.")  # fmt: off
+    model: str = Field(default=..., description="ID of the model to use. Call `/v1/models` endpoint to get the list of available models, only `automatic-speech-recognition` model type is supported.")  # fmt: off
+    language: AudioTranscriptionLanguage = Field(default=AudioTranscriptionLanguage.EMPTY, description="The language of the output audio. If the output language is different than the audio language, the audio language will be translated into the output language. Supplying the output language in ISO-639-1 (e.g. en, fr) format will improve accuracy and latency.")  # fmt: off
+    prompt: str | None = Field(default=None, description="An optional text to tell the model what to do with the input audio. Default is `Transcribe this audio in this language : {language}`")  # fmt: off
+    response_format: Literal["json", "text"] = Field(default="json", description="The format of the transcript output, in one of these formats: `json` or `text`.")  # fmt: off
+    temperature: float | None = Field(default=None, ge=0, le=1, description="The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.")  # fmt: off
+
+    @field_validator("language", mode="after")
+    def extract_value_language(cls, language: AudioTranscriptionLanguage) -> str:
+        return language.value
+
     @staticmethod
     def format_request(provider_type: ProviderType, request_content: RequestContent):
         match provider_type:
@@ -43,13 +56,10 @@ def format_request(provider_type: ProviderType, request_content: RequestContent)
                 return request_content
 
             case ProviderType.VLLM:
-                if request_content.form["language"] == AudioTranscriptionLanguage.EMPTY:
-                    request_content.form.pop("language")
+                request_content.form["language"] = "en" if request_content.form["language"] == "" else request_content.form["language"]
+                request_content.form["temperature"] = 0 if request_content.form["temperature"] is None else request_content.form["temperature"]
+                request_content.form["prompt"] = "" if request_content.form["prompt"] is None else request_content.form["prompt"]
 
-                if request_content.form.get("temperature") is None:
-                    request_content.form.pop("temperature")
-
-                request_content.form["response_format"] = "json"
                 return request_content
 
             case _:
diff --git a/api/tests/integ/test_chat.py b/api/tests/integ/test_chat.py
@@ -197,7 +197,7 @@ def test_chat_completions_search_no_collections(self, client: TestClient, setup)
             },
         }
         response = client.post_without_permissions(url=f"/v1{ENDPOINT__CHAT_COMPLETIONS}", json=params)
-        assert response.status_code == 200, response.text
+        assert response.status_code == 422, response.text
 
     def test_chat_completions_search_template(self, client: TestClient, setup):
         """Test the GET /chat/completions search template."""

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-{"schemaVersion":1,"label":"coverage","message":"51.05%","color":"red"}`
	`1`	`+{"schemaVersion":1,"label":"coverage","message":"51.16%","color":"red"}`
Original file line number	Diff line number	Diff line change
`@@ -197,7 +197,7 @@ def test_chat_completions_search_no_collections(self, client: TestClient, setup)`
`197`	`197`	`},`
`198`	`198`	`}`
`199`	`199`	`response = client.post_without_permissions(url=f"/v1{ENDPOINT__CHAT_COMPLETIONS}", json=params)`
`200`		`- assert response.status_code == 200, response.text`
	`200`	`+ assert response.status_code == 422, response.text`
`201`	`201`
`202`	`202`	`def test_chat_completions_search_template(self, client: TestClient, setup):`
`203`	`203`	`"""Test the GET /chat/completions search template."""`