diff --git a/.github/badges/coverage.json b/.github/badges/coverage.json
index 086c0da97..30757f0fc 100644
--- a/.github/badges/coverage.json
+++ b/.github/badges/coverage.json
@@ -1 +1 @@
-{"schemaVersion":1,"label":"coverage","message":"49.88%","color":"red"}
+{"schemaVersion":1,"label":"coverage","message":"51.02%","color":"red"}
diff --git a/api/endpoints/files.py b/api/endpoints/files.py
deleted file mode 100644
index fc73670ff..000000000
--- a/api/endpoints/files.py
+++ /dev/null
@@ -1,120 +0,0 @@
-from contextvars import ContextVar
-from io import BytesIO
-import json
-from pathlib import Path
-
-from elasticsearch import AsyncElasticsearch
-from fastapi import APIRouter, Body, Depends, File, Security, UploadFile
-from fastapi.responses import JSONResponse
-from pydantic import ValidationError
-from redis.asyncio import Redis as AsyncRedis
-from sqlalchemy.ext.asyncio import AsyncSession
-from starlette.datastructures import Headers
-
-from api.helpers._accesscontroller import AccessController
-from api.helpers._elasticsearchvectorstore import ElasticsearchVectorStore
-from api.helpers.models import ModelRegistry
-from api.schemas.core.context import RequestContext
-from api.schemas.core.documents import JsonFile
-from api.schemas.files import ChunkerArgs, FileResponse, FilesRequest
-from api.utils.context import global_context
-from api.utils.dependencies import (
-    get_elasticsearch_client,
-    get_elasticsearch_vector_store,
-    get_model_registry,
-    get_postgres_session,
-    get_redis_client,
-    get_request_context,
-)
-from api.utils.exceptions import CollectionNotFoundException, FileSizeLimitExceededException, InvalidJSONFormatException
-from api.utils.variables import EndpointRoute
-
-router = APIRouter(prefix="/v1", tags=["Legacy"])
-
-
-@router.post(path=EndpointRoute.FILES, status_code=201, response_model=FileResponse, dependencies=[Security(dependency=AccessController())])
-async def upload_file(
-    file: UploadFile = File(...),
-    request: FilesRequest = Body(...),
-    redis_client: AsyncRedis = Depends(get_redis_client),
-    model_registry: ModelRegistry = Depends(get_model_registry),
-    postgres_session: AsyncSession = Depends(get_postgres_session),
-    request_context: ContextVar[RequestContext] = Depends(get_request_context),
-    elasticsearch_vector_store: ElasticsearchVectorStore = Depends(get_elasticsearch_vector_store),
-    elasticsearch_client: AsyncElasticsearch = Depends(get_elasticsearch_client),
-) -> JSONResponse:
-    """
-    **[DEPRECATED]** Upload a file to be processed, chunked, and stored into a vector database. Supported file types : pdf, html, json.
-
-    Supported files types:
-    - pdf: Portable Document Format file.
-    - json: JavaScript Object Notation file.
-        For JSON, file structure like a list of documents: [{"text": "hello world", "title": "my document", "metadata": {"autor": "me"}}, ...]} or [{"text": "hello world", "title": "my document"}, ...]}
-        Each document must have a "text" and "title" keys and "metadata" key (optional) with dict type value.
-    - html: Hypertext Markup Language file.
-    - markdown: Markdown Language file.
-
-    Max file size is 20MB.
-    """
-    if not global_context.document_manager:  # no vector store available
-        raise CollectionNotFoundException()
-
-    file_size = len(file.file.read())
-    if file_size > FileSizeLimitExceededException.MAX_CONTENT_SIZE:
-        raise FileSizeLimitExceededException()
-    file.file.seek(0)  # reset file pointer to the beginning of the file
-
-    if request.chunker:
-        chunker_args = request.chunker.args.model_dump() if request.chunker.args else ChunkerArgs().model_dump()
-        chunker = request.chunker.name
-    else:
-        chunker_args = ChunkerArgs().model_dump()
-        chunker = None
-
-    chunker_args["length_function"] = len if chunker_args["length_function"] == "len" else chunker_args["length_function"]
-
-    filename = file.filename
-    extension = Path(filename).suffix.lower()
-    if extension == ".json" and file.content_type in ["application/json", "application/octet-stream"]:
-        try:
-            file = JsonFile(documents=json.loads(file.file.read())).documents
-        except ValidationError as e:
-            detail = "; ".join([f"{error["loc"][-1]}: {error["msg"]}" for error in e.errors()])
-            raise InvalidJSONFormatException(detail=detail)
-
-        files = list()
-
-        for document in file:
-            document_text = document.model_dump()
-            text = document_text.get("text", "")
-            metadata = document_text.get("metadata", {})
-            name = f"{document_text["title"]}.json" if document_text.get("title") else f"{filename}.json"
-
-            # Convert json into txt file
-            file = UploadFile(filename=name, file=BytesIO(text.encode("utf-8")), headers=Headers({"content-type": "text/txt"}))
-            files.append((file, metadata))
-    else:
-        files = [(file, None)]
-
-    for file, metadata in files:
-        document_id = await global_context.document_manager.create_document(
-            request_context=request_context,
-            postgres_session=postgres_session,
-            elasticsearch_vector_store=elasticsearch_vector_store,
-            elasticsearch_client=elasticsearch_client,
-            redis_client=redis_client,
-            model_registry=model_registry,
-            collection_id=request.collection,
-            file=file,
-            chunker=chunker,
-            chunk_min_size=chunker_args["chunk_min_size"],
-            chunk_size=chunker_args["chunk_size"],
-            chunk_overlap=chunker_args["chunk_overlap"],
-            separators=chunker_args["separators"],
-            is_separator_regex=chunker_args["is_separator_regex"],
-            metadata=None,
-        )
-
-        file.file.close()
-
-    return JSONResponse(status_code=201, content={"id": document_id})
diff --git a/api/endpoints/ocr.py b/api/endpoints/ocr.py
index f221f16c6..f5f13aa8c 100644
--- a/api/endpoints/ocr.py
+++ b/api/endpoints/ocr.py
@@ -1,25 +1,18 @@
-import base64
 from contextvars import ContextVar
 
-from fastapi import APIRouter, Depends, File, HTTPException, Request, Security, UploadFile
+from fastapi import APIRouter, Depends, Request, Security
 from fastapi.responses import JSONResponse
-import pymupdf
 from redis.asyncio import Redis as AsyncRedis
 from sqlalchemy.ext.asyncio import AsyncSession
 
 from api.helpers._accesscontroller import AccessController
 from api.helpers.models import ModelRegistry
 from api.schemas.core.context import RequestContext
-from api.schemas.core.documents import FileType
 from api.schemas.core.models import RequestContent
 from api.schemas.exception import HTTPExceptionModel
-from api.schemas.ocr import OCR, CreateOCR, DPIForm, ModelForm, PromptForm
-from api.schemas.parse import ParsedDocument, ParsedDocumentMetadata, ParsedDocumentPage
-from api.schemas.usage import Usage
-from api.utils.context import global_context
+from api.schemas.ocr import OCR, CreateOCR
 from api.utils.dependencies import get_model_registry, get_postgres_session, get_redis_client, get_request_context
 from api.utils.exceptions import (
-    FileSizeLimitExceededException,
     ModelIsTooBusyException,
     ModelNotFoundException,
     WrongModelTypeException,
@@ -66,79 +59,3 @@ async def ocr(
     )
 
     return JSONResponse(content=OCR(**response.json()).model_dump(), status_code=response.status_code)
-
-
-@router.post(path=EndpointRoute.OCR_BETA, dependencies=[Security(dependency=AccessController())], status_code=200, response_model=ParsedDocument)
-@hooks
-async def ocr_beta(
-    request: Request,
-    file: UploadFile = File(..., description="The file to parse."),
-    model: str = ModelForm,
-    dpi: int = DPIForm,
-    prompt: str = PromptForm,
-    model_registry: ModelRegistry = Depends(get_model_registry),
-    redis_client: AsyncRedis = Depends(get_redis_client),
-    postgres_session: AsyncSession = Depends(get_postgres_session),
-    request_context: ContextVar[RequestContext] = Depends(get_request_context),
-) -> JSONResponse:
-    """
-    Extracts text from PDF files using OCR.
-    """
-    # check if file is a pdf (raises UnsupportedFileTypeException if not a PDF)
-    global_context.document_manager.parser_manager.check_file_type(file=file, type=FileType.PDF)
-
-    # check file size
-    if file.size > FileSizeLimitExceededException.MAX_CONTENT_SIZE:
-        raise FileSizeLimitExceededException()
-
-    file_content = await file.read()
-    pdf = pymupdf.open(stream=file_content, filetype="pdf")
-    document = ParsedDocument(data=[], usage=Usage())
-    for i, page in enumerate(pdf):
-        image = page.get_pixmap(dpi=dpi)
-        img_byte_arr = image.tobytes("png")
-        payload = {
-            "model": model,
-            "messages": [
-                {
-                    "role": "user",
-                    "content": [
-                        {"type": "text", "text": prompt},
-                        {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64.b64encode(img_byte_arr).decode("utf-8")}"}},
-                    ],
-                }
-            ],
-            "n": 1,
-            "stream": False,
-        }
-
-        model_provider = await model_registry.get_model_provider(
-            model=model,
-            endpoint=EndpointRoute.OCR_BETA,
-            postgres_session=postgres_session,
-            redis_client=redis_client,
-            request_context=request_context,
-        )
-
-        response = await model_provider.forward_request(
-            request_content=RequestContent(method="POST", endpoint=EndpointRoute.CHAT_COMPLETIONS, json=payload, model=model),
-            redis_client=redis_client,
-        )
-        status = response.status_code
-        body_json = response.json()
-        if status // 100 != 2:
-            pdf.close()
-            raise HTTPException(status_code=status, detail=body_json.get("detail", "OCR request failed"))
-        text = body_json.get("choices", [{}])[0].get("message", {}).get("content", "")
-        document.data.append(
-            ParsedDocumentPage(
-                content=text,
-                images={},
-                metadata=ParsedDocumentMetadata(page=i, document_name=file.filename, **pdf.metadata),
-            )
-        )
-        if body_json.get("usage"):
-            document.usage = Usage(**body_json["usage"])
-    pdf.close()
-
-    return JSONResponse(content=document.model_dump(), status_code=200)
diff --git a/api/endpoints/parse.py b/api/endpoints/parse.py
index 7f91bb0bb..4f62122a3 100644
--- a/api/endpoints/parse.py
+++ b/api/endpoints/parse.py
@@ -12,7 +12,13 @@
 router = APIRouter(prefix="/v1", tags=[RouterName.PARSE.title()])
 
 
-@router.post(path=EndpointRoute.PARSE, dependencies=[Security(dependency=AccessController())], status_code=200, response_model=ParsedDocument)
+@router.post(
+    path=EndpointRoute.PARSE,
+    dependencies=[Security(dependency=AccessController())],
+    status_code=200,
+    response_model=ParsedDocument,
+    deprecated=True,
+)
 async def parse(
     request: Request,
     data: Annotated[CreateParseForm, Depends(CreateParseForm.as_form)],
diff --git a/api/helpers/_accesscontroller.py b/api/helpers/_accesscontroller.py
index 5ff5b93f2..de42df950 100755
--- a/api/helpers/_accesscontroller.py
+++ b/api/helpers/_accesscontroller.py
@@ -64,9 +64,6 @@ async def __call__(
         if request.url.path.endswith(EndpointRoute.EMBEDDINGS) and request.method in ["POST"]:
             await self._check_embeddings(body=body, user_info=user_info, postgres_session=postgres_session)
 
-        if request.url.path.endswith(EndpointRoute.FILES) and request.method in ["POST"]:
-            await self._check_files(user_info=user_info, postgres_session=postgres_session)
-
         if request.url.path.endswith(EndpointRoute.OCR) and request.method in ["POST"]:
             await self._check_ocr(body=body, user_info=user_info, postgres_session=postgres_session)
 
@@ -160,16 +157,6 @@ async def _check_embeddings(body: dict, user_info: UserInfo, postgres_session: A
         prompt_tokens = global_context.tokenizer.get_prompt_tokens(endpoint=EndpointRoute.EMBEDDINGS, body=body)
         await global_context.limiter.check_user_limits(user_info=user_info, router_id=router_id, prompt_tokens=prompt_tokens)
 
-    @staticmethod
-    async def _check_files(user_info: UserInfo, postgres_session: AsyncSession) -> None:
-        router_id = await global_context.model_registry.get_router_id_from_model_name(
-            model_name=global_context.document_manager.vector_store_model,
-            postgres_session=postgres_session,
-        )
-        if router_id is None:
-            return
-        await global_context.limiter.check_user_limits(user_info=user_info, router_id=router_id)
-
     @staticmethod
     async def _check_ocr(body: dict, user_info: UserInfo, postgres_session: AsyncSession) -> None:
         router_id = await global_context.model_registry.get_router_id_from_model_name(model_name=body.get("model"), postgres_session=postgres_session)
diff --git a/api/helpers/_documentmanager.py b/api/helpers/_documentmanager.py
index 534cf243c..a2b76876a 100755
--- a/api/helpers/_documentmanager.py
+++ b/api/helpers/_documentmanager.py
@@ -200,6 +200,7 @@ async def create_document(
             content = await self.parser_manager.parse(file=file)
         except Exception as e:
             logger.exception(f"failed to parse {document_name} ({e}).")
+            print(e)
             raise ParsingDocumentFailedException()
 
         # split the content into chunks
diff --git a/api/helpers/models/_modelregistry.py b/api/helpers/models/_modelregistry.py
index 1976927e2..fbb268ab2 100644
--- a/api/helpers/models/_modelregistry.py
+++ b/api/helpers/models/_modelregistry.py
@@ -80,7 +80,6 @@ class ModelRegistry:
         EndpointRoute.CHAT_COMPLETIONS: [ModelType.TEXT_GENERATION, ModelType.IMAGE_TEXT_TO_TEXT],
         EndpointRoute.EMBEDDINGS: [ModelType.TEXT_EMBEDDINGS_INFERENCE],
         EndpointRoute.OCR: [ModelType.IMAGE_TO_TEXT],
-        EndpointRoute.OCR_BETA: [ModelType.IMAGE_TEXT_TO_TEXT],
         EndpointRoute.RERANK: [ModelType.TEXT_CLASSIFICATION],
     }
 
diff --git a/api/routers/registry.py b/api/routers/registry.py
index c45a9623d..a9fd85a2e 100644
--- a/api/routers/registry.py
+++ b/api/routers/registry.py
@@ -26,7 +26,6 @@ class RouterDefinition:
     RouterDefinition(name=RouterName.COLLECTIONS, module_path="api.endpoints.collections"),
     RouterDefinition(name=RouterName.DOCUMENTS, module_path="api.endpoints.documents"),
     RouterDefinition(name=RouterName.EMBEDDINGS, module_path="api.endpoints.embeddings"),
-    RouterDefinition(name=RouterName.FILES, module_path="api.endpoints.files"),  # Inexistant ?
     RouterDefinition(name=RouterName.MODELS, module_path="api.infrastructure.fastapi.endpoints.models"),
     RouterDefinition(name=RouterName.OCR, module_path="api.endpoints.ocr"),
     RouterDefinition(name=RouterName.PARSE, module_path="api.endpoints.parse"),
diff --git a/api/schemas/core/configuration.py b/api/schemas/core/configuration.py
index 8ef833751..e27c566ad 100644
--- a/api/schemas/core/configuration.py
+++ b/api/schemas/core/configuration.py
@@ -151,7 +151,7 @@ class Model(ConfigBaseModel):
 
     name: constr(strip_whitespace=True, min_length=1, max_length=64) = Field(..., description="Unique name exposed to clients when selecting the model.", examples=["gpt-4o"])  # fmt: off
     type: ModelType = Field(..., description="Type of the model. It will be used to identify the model type.", examples=["text-generation"])  # fmt: off
-    aliases: list[constr(strip_whitespace=True, min_length=1, max_length=64)] = Field(default_factory=list, description="Aliases of the model. It will be used to identify the model by users.", examples=[["model-alias", "model-alias-2"]], json_extra_schema={"default": []})  # fmt: off
+    aliases: list[constr(strip_whitespace=True, min_length=1, max_length=64)] = Field(default_factory=list, description="Aliases of the model. It will be used to identify the model by users.", examples=[["model-alias", "model-alias-2"]], json_schema_extra={"default": []})  # fmt: off
     load_balancing_strategy: RouterLoadBalancingStrategy = Field(default=RouterLoadBalancingStrategy.SHUFFLE, description="Routing strategy for load balancing between providers of the model.", examples=["least_busy"])  # fmt: off
     cost_prompt_tokens: float = Field(default=0.0, ge=0.0, description="Model costs prompt tokens for user budget computation. The cost is by 1M tokens.", examples=[0.1])  # fmt: off
     cost_completion_tokens: float = Field(default=0.0, ge=0.0, description="Model costs completion tokens for user budget computation. The cost is by 1M tokens. Set to `0.0` to disable budget computation for this model.", examples=[0.1])  # fmt: off
@@ -183,7 +183,7 @@ class AlbertDependency(ConfigBaseModel):
     """
 
     url: constr(strip_whitespace=True, min_length=1) = Field(default="https://albert.api.etalab.gouv.fr", description="Albert API url.")  # fmt: off
-    headers: dict[str, str] = Field(default_factory=dict, description="Albert API request headers.", examples=[{"Authorization": "Bearer my-api-key"}], json_extra_schema={"default": {}})  # fmt: off
+    headers: dict[str, str] = Field(default_factory=dict, description="Albert API request headers.", examples=[{"Authorization": "Bearer my-api-key"}], json_schema_extra={"default": {}})  # fmt: off
     timeout: int = Field(default=DEFAULT_TIMEOUT, ge=1, description="Timeout for the Albert API requests.", examples=[10])  # fmt: off
 
 
@@ -375,7 +375,7 @@ class Settings(ConfigBaseModel):
     swagger_contact: dict | None = Field(default=None, description="Contact informations of the API in swagger UI, see https://fastapi.tiangolo.com/tutorial/metadata for more information.")  # fmt: off
     swagger_license_info: dict = Field(default={"name": "MIT Licence", "identifier": "MIT", "url": "https://raw.githubusercontent.com/etalab-ia/opengatellm/refs/heads/main/LICENSE"}, description="Licence informations of the API in swagger UI, see https://fastapi.tiangolo.com/tutorial/metadata for more information.")  # fmt: off
     swagger_terms_of_service: str | None = Field(default=None, description="A URL to the Terms of Service for the API in swagger UI. If provided, this has to be a URL.", examples=["https://example.com/terms-of-service"])  # fmt: off
-    swagger_openapi_tags: list[dict[str, str | dict[str, str]]] = Field(default_factory=list, description="OpenAPI tags of the API in swagger UI, see https://fastapi.tiangolo.com/tutorial/metadata for more information.", json_extra_schema={"default": []})  # fmt: off
+    swagger_openapi_tags: list[dict[str, str | dict[str, str]]] = Field(default_factory=list, description="OpenAPI tags of the API in swagger UI, see https://fastapi.tiangolo.com/tutorial/metadata for more information.", json_schema_extra={"default": []})  # fmt: off
     swagger_openapi_url: str = Field(default="/openapi.json", pattern=r"^/", description="OpenAPI URL of swagger UI, see https://fastapi.tiangolo.com/tutorial/metadata for more information.")  # fmt: off
     swagger_docs_url: str = Field(default="/docs", pattern=r"^/", description="Docs URL of swagger UI, see https://fastapi.tiangolo.com/tutorial/metadata for more information.")  # fmt: off
     swagger_redoc_url: str = Field(default="/redoc", pattern=r"^/", description="Redoc URL of swagger UI, see https://fastapi.tiangolo.com/tutorial/metadata for more information.")  # fmt: off
diff --git a/api/schemas/core/models.py b/api/schemas/core/models.py
index ae27de66b..1fc7b3467 100644
--- a/api/schemas/core/models.py
+++ b/api/schemas/core/models.py
@@ -16,7 +16,6 @@ class ProviderEndpoints(BaseModel):
     embeddings: Annotated[str | None, StringConstraints(strip_whitespace=True, min_length=1, pattern=r"^/", to_lower=True), Field(default=None)]
     models: Annotated[str | None, StringConstraints(strip_whitespace=True, min_length=1, pattern=r"^/", to_lower=True), Field(default=None)]
     ocr: Annotated[str | None, StringConstraints(strip_whitespace=True, min_length=1, pattern=r"^/", to_lower=True), Field(default=None)]
-    ocr_beta: Annotated[str | None, StringConstraints(strip_whitespace=True, min_length=1, pattern=r"^/", to_lower=True), Field(default=None)]
     rerank: Annotated[str | None, StringConstraints(strip_whitespace=True, min_length=1, pattern=r"^/", to_lower=True), Field(default=None)]
 
     def get_endpoint(self, endpoint: EndpointRoute) -> str | None:
@@ -30,8 +29,6 @@ def get_endpoint(self, endpoint: EndpointRoute) -> str | None:
             return self.models
         elif endpoint == EndpointRoute.OCR:
             return self.ocr
-        elif endpoint == EndpointRoute.OCR_BETA:
-            return self.ocr_beta
         elif endpoint == EndpointRoute.RERANK:
             return self.rerank
         else:
diff --git a/api/schemas/files.py b/api/schemas/files.py
deleted file mode 100644
index 230966915..000000000
--- a/api/schemas/files.py
+++ /dev/null
@@ -1,45 +0,0 @@
-import json
-from typing import Literal
-
-from pydantic import Field, field_validator, model_validator
-
-from api.schemas import BaseModel
-from api.schemas.documents import Chunker as DocumentChunker
-
-
-class ChunkerArgs(BaseModel):
-    chunk_size: int = Field(default=2048, description="The size of the chunks to use for the file upload.")  # fmt: off
-    chunk_overlap: int = Field(default=0, description="The overlap of the chunks to use for the file upload.")  # fmt: off
-    length_function: Literal["len"] = Field(default="len", description="The function to use to calculate the length of the chunks to use for the file upload.")  # fmt: off
-    is_separator_regex: bool = Field(default=False, description="Whether the separator is a regex to use for the file upload.")  # fmt: off
-    separators: list[str] = Field(default=["\n\n", "\n", ". ", " "], description="The separators to use for the file upload.")  # fmt: off
-    chunk_min_size: int = Field(default=0, description="The minimum size of the chunks to use for the file upload.")  # fmt: off
-
-
-class Chunker(BaseModel):
-    name: Literal[DocumentChunker.RECURSIVE_CHARACTER_TEXT_SPLITTER, DocumentChunker.NO_SPLITTER, "LangchainRecursiveCharacterTextSplitter", "NoSplitter"] = Field(default=DocumentChunker.RECURSIVE_CHARACTER_TEXT_SPLITTER, description="The name of the chunker to use for the file upload.")  # fmt: off
-    args: ChunkerArgs = Field(default_factory=ChunkerArgs, description="The arguments to use for the chunker to use for the file upload.")  # fmt: off
-
-    @field_validator("name")
-    def validate_name(cls, name):
-        if name == "LangchainRecursiveCharacterTextSplitter":
-            name = DocumentChunker.RECURSIVE_CHARACTER_TEXT_SPLITTER
-        elif name == "NoChunker":
-            name = DocumentChunker.NO_SPLITTER
-        return name
-
-
-class FileResponse(BaseModel):
-    id: int = Field(default=..., description="The ID of the file.")
-
-
-class FilesRequest(BaseModel):
-    collection: int = Field(default=..., description="The collection ID to use for the file upload. The file will be vectorized with model defined by the collection.")  # fmt: off
-    chunker: Chunker = Field(default_factory=Chunker, description="The chunker to use for the file upload.")  # fmt: off
-
-    @model_validator(mode="before")
-    @classmethod
-    def convert_form_to_json(cls, values):
-        if isinstance(values, str):
-            return cls(**json.loads(values))
-        return values
diff --git a/api/tests/integ/assets/json.json b/api/tests/integ/assets/json.json
deleted file mode 100644
index 89b3f9f25..000000000
--- a/api/tests/integ/assets/json.json
+++ /dev/null
@@ -1,34 +0,0 @@
-[
-    {
-        "title": "Lorem ipsum",
-        "text": "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.",
-        "metadata": {
-            "source": "Lorem ipsum dolor sit amet",
-            "metadata_test_type_string": "test"
-        }
-    },
-    {
-        "title": "Li Europan lingues (en)",
-        "text": "The European languages are members of the same family. Their separate existence is a myth. For science, music, sport, etc, Europe uses the same vocabulary. The languages only differ in their grammar, their pronunciation and their most common words. Everyone realizes why a new common language would be desirable: one could refuse to pay expensive translators. To achieve this, it would be necessary to have uniform grammar, pronunciation and more common words. If several languages coalesce, the grammar of the resulting language is more simple and regular than that of the individual languages. The new common language will be more simple and regular than the existing European languages. It will be as simple as Occidental; in fact, it will be Occidental. To an English person, it will seem like simplified English, as a skeptical Cambridge friend of mine told me what Occidental is. The European languages are members of the same family. Their separate existence is a myth. For science, music, sport, etc, Europe uses the same vocabulary. The languages only differ in their grammar, their pronunciation and their most common words. Everyone realizes why a new common language would be desirable: one could refuse to pay expensive translators. To achieve this, it would be necessary to have uniform grammar, pronunciation and more common words. If several languages coalesce, the grammar of the resulting language is more simple and regular than that of the individual languages. The new common language will be more simple and regular than the existing European languages. It will be as simple as Occidental; in fact, it will be Occidental. To an English person, it will seem like simplified English, as a skeptical Cambridge friend of mine told me what Occidental is. The European languages are members of the same family. Their separate existence is a myth. For science, music, sport, etc, Europe uses the same vocabulary. The languages only differ in their grammar, their pronunciation and their most common words. Everyone realizes why a new common language would be desirable: one could refuse to pay expensive translators. To achieve this, it would be necessary to have uniform grammar, pronunciation and more common words. If several languages coalesce, the grammar of the resulting language is more simple and regular than that of the individual languages. The new common language will be more simple and regular than the existing European languages. It will be as simple as Occidental; in fact, it will be Occidental. To an English person, it will seem like simplified English, as a skeptical Cambridge friend of mine told me what Occidental is. The European languages are members of the same family. Their separate existence is a myth. For science, music, sport, etc, Europe uses the same vocabulary. The languages only differ in their grammar, their pronunciation and their most common words. Everyone realizes why a new common language would be desirable: one could refuse to pay expensive translators. To achieve this, it would be necessary to have uniform grammar, pronunciation and more common words. If several languages coalesce, the grammar of the resulting language is more simple and regular than that of the individual languages. The new common language will be more simple and regular than the existing European languages. It will be as simple as Occidental; in fact, it will be Occidental. To an English person, it will seem like simplified English, as a skeptical Cambridge friend of mine told me what Occidental is.The European languages are members of the same family. Their separate existence is a myth. For science, music, sport, etc, Europe uses the same vocabulary. The languages only differ in their grammar, their pronunciation and their most common words. Everyone realizes why a new common language would be desirable: one could refuse to pay expensive translators. To achieve this, it would be necessary to have uniform grammar, pronunciation and more common words. If several languages coalesce, the grammar of the resulting language is more simple and regular than that of the individual languages. The new common language will be more simple and regular than the existing European languages. It will be as simple as Occidental; in fact, it will be Occidental.",
-        "metadata": {
-            "source": "Lorem ipsum dolor sit amet",
-            "metadata_test_type_bool": true
-        }
-    },
-    {
-        "title": "Qui est Albert ?",
-        "text": "Albert peut tout aussi bien être un scientifique que le modèle ia souverain de l'état français.",
-        "metadata": {
-            "source": "Lorem ipsum dolor sit amet",
-            "metadata_test_type_int": 10
-        }
-    },
-    {
-        "title": "Qui est Erasmus ?",
-        "text": "Erasmus sous entend tout autant le nom d'un savant qu'une programme éducatif de la nation française.",
-        "metadata": {
-            "source": "Lorem ipsum dolor sit amet",
-            "metadata_test_type_float": 101.2
-        }
-    }
-]
\ No newline at end of file
diff --git a/api/tests/integ/assets/json_wrong_format.json b/api/tests/integ/assets/json_wrong_format.json
deleted file mode 100644
index 3bb802a16..000000000
--- a/api/tests/integ/assets/json_wrong_format.json
+++ /dev/null
@@ -1,5 +0,0 @@
-{
-    "fruit": "Apple",
-    "size": "Large",
-    "color": "Red"
-}
diff --git a/api/tests/integ/test_chat.py b/api/tests/integ/test_chat.py
index ea2c42db6..a370f7208 100644
--- a/api/tests/integ/test_chat.py
+++ b/api/tests/integ/test_chat.py
@@ -1,6 +1,7 @@
 import json
 import logging
 import os
+import time
 from uuid import uuid4
 
 from fastapi.testclient import TestClient
@@ -31,19 +32,32 @@ def setup(client: TestClient):
     COLLECTION_ID = response.json()["id"]
 
     # Upload the file to the collection
-    file_path = "api/tests/integ/assets/json.json"
+    data = {
+        "collection": str(COLLECTION_ID),
+        "output_format": "markdown",
+        "force_ocr": "false",
+        "chunk_size": "1000",
+        "chunk_overlap": "200",
+        "use_llm": "false",
+        "paginate_output": "false",
+        "chunker": "RecursiveCharacterTextSplitter",
+        "chunk_min_size": "0",
+        "is_separator_regex": "false",
+        "metadata": json.dumps({"source_title": "test", "source_tags": ["tag-1", "tag-2"]}),
+    }
+
+    file_path = "api/tests/integ/assets/pdf.pdf"
     with open(file_path, "rb") as file:
-        files = {"file": (os.path.basename(file_path), file, "application/json")}
-        data = {"request": '{"collection": "%s", "chunker": {"args": {"chunk_size": 1000}}}' % COLLECTION_ID}
-        response = client.post_without_permissions(url=f"/v1{EndpointRoute.FILES}", data=data, files=files)
+        files = {"file": (os.path.basename(file_path), file, "application/pdf")}
+        response = client.post_without_permissions(url=f"/v1{EndpointRoute.DOCUMENTS}", data=data, files=files)
         file.close()
+
     assert response.status_code == 201, response.text
+    DOCUMENT_ID = response.json()["id"]
 
-    # Get document IDS
-    response = client.get_without_permissions(url=f"/v1{EndpointRoute.DOCUMENTS}", params={"collection": COLLECTION_ID})
-    DOCUMENT_IDS = [row["id"] for row in response.json()["data"]]
+    time.sleep(1)
 
-    yield MODEL_ID, DOCUMENT_IDS, COLLECTION_ID
+    yield MODEL_ID, DOCUMENT_ID, COLLECTION_ID
 
 
 @pytest.fixture(scope="module")
@@ -59,7 +73,7 @@ class TestChat:
     @pytest.mark.asyncio
     async def test_chat_completions_unstreamed_response(self, client: TestClient, setup):
         """Test the POST /chat/completions unstreamed response."""
-        MODEL_ID, DOCUMENT_IDS, COLLECTION_ID = setup
+        MODEL_ID, DOCUMENT_ID, COLLECTION_ID = setup
 
         params = {"model": MODEL_ID, "messages": [{"role": "user", "content": "Hello, how are you?"}], "stream": False, "n": 1, "max_tokens": 10}
         response = client.post_without_permissions(url=f"/v1{EndpointRoute.CHAT_COMPLETIONS}", json=params)
@@ -70,7 +84,7 @@ async def test_chat_completions_unstreamed_response(self, client: TestClient, se
     @pytest.mark.asyncio
     async def test_chat_completions_streamed_response(self, client: TestClient, setup):
         """Test the POST /chat/completions streamed response."""
-        MODEL_ID, DOCUMENT_IDS, COLLECTION_ID = setup
+        MODEL_ID, DOCUMENT_ID, COLLECTION_ID = setup
 
         params = {"model": MODEL_ID, "messages": [{"role": "user", "content": "Hello, how are you?"}], "stream": True, "n": 1, "max_tokens": 10}
 
@@ -87,7 +101,7 @@ async def test_chat_completions_streamed_response(self, client: TestClient, setu
 
     def test_chat_completions_unknown_params(self, client: TestClient, setup):
         """Test the POST /chat/completions unknown params."""
-        MODEL_ID, DOCUMENT_IDS, COLLECTION_ID = setup
+        MODEL_ID, DOCUMENT_ID, COLLECTION_ID = setup
         params = {
             "model": MODEL_ID,
             "messages": [{"role": "user", "content": "Hello, how are you?"}],
@@ -102,7 +116,7 @@ def test_chat_completions_unknown_params(self, client: TestClient, setup):
 
     def test_chat_completions_forward_error(self, client: TestClient, setup):
         """Test the POST /chat/completions forward errors from the model backend. This test works only if the model backend is vLLM."""
-        MODEL_ID, DOCUMENT_IDS, COLLECTION_ID = setup
+        MODEL_ID, DOCUMENT_ID, COLLECTION_ID = setup
 
         params = {
             "model": MODEL_ID,
@@ -118,7 +132,7 @@ def test_chat_completions_forward_error(self, client: TestClient, setup):
 
     def test_chat_completions_search_unstreamed_response(self, client: TestClient, setup):
         """Test the GET /chat/completions search unstreamed response."""
-        MODEL_ID, DOCUMENT_IDS, COLLECTION_ID = setup
+        MODEL_ID, DOCUMENT_ID, COLLECTION_ID = setup
 
         params = {
             "model": MODEL_ID,
@@ -135,11 +149,11 @@ def test_chat_completions_search_unstreamed_response(self, client: TestClient, s
 
         response_json = response.json()
         ChatCompletion(**response_json)  # test output format
-        assert response_json["search_results"][0]["chunk"]["document_id"] in DOCUMENT_IDS
+        assert response_json["search_results"][0]["chunk"]["document_id"] == DOCUMENT_ID
 
     def test_chat_completions_search_streamed_response(self, client: TestClient, setup):
         """Test the GET /chat/completions search streamed response."""
-        MODEL_ID, DOCUMENT_IDS, COLLECTION_ID = setup
+        MODEL_ID, DOCUMENT_ID, COLLECTION_ID = setup
         params = {
             "model": MODEL_ID,
             "messages": [{"role": "user", "content": "Qui est Albert ?"}],
@@ -163,12 +177,12 @@ def test_chat_completions_search_streamed_response(self, client: TestClient, set
                     chunks.append(chunk)
                     continue
                 # check that the last chunk has a search result
-                assert chunks[i - 1].search_results[0].chunk.document_id in DOCUMENT_IDS
+                assert chunks[i - 1].search_results[0].chunk.document_id == DOCUMENT_ID
                 break
 
     def test_chat_completions_search_no_args(self, client: TestClient, setup):
         """Test the GET /chat/completions search template not found."""
-        MODEL_ID, DOCUMENT_IDS, COLLECTION_ID = setup
+        MODEL_ID, DOCUMENT_ID, COLLECTION_ID = setup
         params = {
             "model": MODEL_ID,
             "messages": [{"role": "user", "content": "Qui est Albert ?"}],
@@ -182,7 +196,7 @@ def test_chat_completions_search_no_args(self, client: TestClient, setup):
 
     def test_chat_completions_search_no_collections(self, client: TestClient, setup):
         """Test the GET /chat/completions search no collections."""
-        MODEL_ID, DOCUMENT_IDS, COLLECTION_ID = setup
+        MODEL_ID, DOCUMENT_ID, COLLECTION_ID = setup
         params = {
             "model": MODEL_ID,
             "messages": [{"role": "user", "content": "Qui est Albert ?"}],
@@ -201,7 +215,7 @@ def test_chat_completions_search_no_collections(self, client: TestClient, setup)
 
     def test_chat_completions_search_template(self, client: TestClient, setup):
         """Test the GET /chat/completions search template."""
-        MODEL_ID, DOCUMENT_IDS, COLLECTION_ID = setup
+        MODEL_ID, DOCUMENT_ID, COLLECTION_ID = setup
         params = {
             "model": MODEL_ID,
             "messages": [{"role": "user", "content": "Qui est Albert ?"}],
@@ -223,7 +237,7 @@ def test_chat_completions_search_template(self, client: TestClient, setup):
 
     def test_chat_completions_search_template_missing_placeholders(self, client: TestClient, setup):
         """Test the GET /chat/completions search template missing placeholders."""
-        MODEL_ID, DOCUMENT_IDS, COLLECTION_ID = setup
+        MODEL_ID, DOCUMENT_ID, COLLECTION_ID = setup
         params = {
             "model": MODEL_ID,
             "messages": [{"role": "user", "content": "Qui est Albert ?"}],
@@ -243,7 +257,7 @@ def test_chat_completions_search_template_missing_placeholders(self, client: Tes
 
     def test_chat_completions_search_wrong_collection(self, client: TestClient, setup):
         """Test the GET /chat/completions search wrong collection."""
-        MODEL_ID, DOCUMENT_IDS, COLLECTION_ID = setup
+        MODEL_ID, DOCUMENT_ID, COLLECTION_ID = setup
         params = {
             "model": MODEL_ID,
             "messages": [{"role": "user", "content": "Qui est Albert ?"}],
@@ -258,7 +272,7 @@ def test_chat_completions_search_wrong_collection(self, client: TestClient, setu
 
     def test_chat_completions_usage(self, client: TestClient, setup, tokenizer):
         """Test the GET /chat/completions usage."""
-        MODEL_ID, DOCUMENT_IDS, COLLECTION_ID = setup
+        MODEL_ID, DOCUMENT_ID, COLLECTION_ID = setup
         prompt = "Hi, write a story about a cat."
         params = {
             "model": MODEL_ID,
diff --git a/api/tests/integ/test_chunks.py b/api/tests/integ/test_chunks.py
index 1fb44f2da..56304b3f8 100644
--- a/api/tests/integ/test_chunks.py
+++ b/api/tests/integ/test_chunks.py
@@ -1,4 +1,6 @@
+import json
 import os
+import time
 from uuid import uuid4
 
 from fastapi.testclient import TestClient
@@ -19,19 +21,31 @@ def setup(client: TestClient):
     assert response.status_code == 201
     COLLECTION_ID = response.json()["id"]
 
-    # Upload a file
-    file_path = "api/tests/integ/assets/json.json"
+    # Upload the file to the collection
+    data = {
+        "collection": str(COLLECTION_ID),
+        "output_format": "markdown",
+        "force_ocr": "false",
+        "chunk_size": "1000",
+        "chunk_overlap": "200",
+        "use_llm": "false",
+        "paginate_output": "false",
+        "chunker": "RecursiveCharacterTextSplitter",
+        "chunk_min_size": "0",
+        "is_separator_regex": "false",
+        "metadata": json.dumps({"source_title": "test", "source_tags": ["tag-1", "tag-2"]}),
+    }
+
+    file_path = "api/tests/integ/assets/pdf.pdf"
     with open(file_path, "rb") as file:
-        files = {"file": (os.path.basename(file_path), file, "application/json")}
-        data = {"request": '{"collection": "%s"}' % COLLECTION_ID}
-        response = client.post_without_permissions(url=f"/v1{EndpointRoute.FILES}", data=data, files=files)
+        files = {"file": (os.path.basename(file_path), file, "application/pdf")}
+        response = client.post_without_permissions(url=f"/v1{EndpointRoute.DOCUMENTS}", data=data, files=files)
         file.close()
+
     assert response.status_code == 201, response.text
+    DOCUMENT_ID = response.json()["id"]
 
-    # Retrieve the document ID
-    response = client.get_without_permissions(url=f"/v1{EndpointRoute.DOCUMENTS}", params={"collection": COLLECTION_ID})
-    assert response.status_code == 200, response.text
-    DOCUMENT_ID = response.json()["data"][0]["id"]
+    time.sleep(1)
 
     yield COLLECTION_ID, DOCUMENT_ID
 
@@ -40,6 +54,7 @@ def setup(client: TestClient):
 class TestChunks:
     def test_get_chunks(self, client: TestClient, setup):
         COLLECTION_ID, DOCUMENT_ID = setup
+
         response = client.get_without_permissions(url=f"/v1{EndpointRoute.CHUNKS}/{DOCUMENT_ID}")
         assert response.status_code == 200, response.text
 
diff --git a/api/tests/integ/test_files.py b/api/tests/integ/test_files.py
deleted file mode 100644
index 4a9220555..000000000
--- a/api/tests/integ/test_files.py
+++ /dev/null
@@ -1,160 +0,0 @@
-import os
-from uuid import uuid4
-
-from fastapi.testclient import TestClient
-import pytest
-
-from api.schemas.collections import CollectionVisibility
-from api.utils.variables import EndpointRoute
-
-
-@pytest.fixture(scope="module")
-def setup(client: TestClient):
-    response = client.post_without_permissions(
-        url=f"/v1{EndpointRoute.COLLECTIONS}",
-        json={"name": f"test_collection_{str(uuid4())}", "visibility": CollectionVisibility.PRIVATE},
-    )
-    assert response.status_code == 201, response.text
-    PRIVATE_COLLECTION_ID = response.json()["id"]
-
-    response = client.post_with_permissions(
-        url=f"/v1{EndpointRoute.COLLECTIONS}",
-        json={"name": f"test_collection_{str(uuid4())}", "visibility": CollectionVisibility.PUBLIC},
-    )
-    assert response.status_code == 201, response.text
-    PUBLIC_COLLECTION_ID = response.json()["id"]
-
-    yield PRIVATE_COLLECTION_ID, PUBLIC_COLLECTION_ID
-
-
-@pytest.mark.usefixtures("client", "setup")
-class TestFiles:
-    def test_upload_pdf_file(self, client: TestClient, setup):
-        PRIVATE_COLLECTION_ID, PUBLIC_COLLECTION_ID = setup
-
-        file_path = "api/tests/integ/assets/pdf.pdf"
-        with open(file_path, "rb") as file:
-            files = {"file": (os.path.basename(file_path), file, "application/pdf")}
-            data = {"request": '{"collection": "%s"}' % PRIVATE_COLLECTION_ID}
-            response = client.post_without_permissions(url=f"/v1{EndpointRoute.FILES}", data=data, files=files)
-            file.close()
-
-        assert response.status_code == 201, response.text
-
-    def test_upload_pdf_file_chunker_parameters(self, client: TestClient, setup):
-        PRIVATE_COLLECTION_ID, _ = setup
-
-        file_path = "api/tests/integ/assets/pdf.pdf"
-        with open(file_path, "rb") as file:
-            files = {"file": (os.path.basename(file_path), file, "application/pdf")}
-            data = {"request": '{"collection": "%s", "chunker": {"args": {"chunk_size": 1000}}}' % PRIVATE_COLLECTION_ID}
-            response = client.post_without_permissions(url=f"/v1{EndpointRoute.FILES}", data=data, files=files)
-            file.close()
-
-        assert response.status_code == 201, response.text
-
-    def test_upload_html_file(self, client: TestClient, setup):
-        PRIVATE_COLLECTION_ID, _ = setup
-
-        file_path = "api/tests/integ/assets/html.html"
-        with open(file_path, "rb") as file:
-            files = {"file": (os.path.basename(file_path), file, "application/html")}
-            data = {"request": '{"collection": "%s"}' % PRIVATE_COLLECTION_ID}
-            response = client.post_without_permissions(url=f"/v1{EndpointRoute.FILES}", data=data, files=files)
-            file.close()
-        assert response.status_code == 201, response.text
-
-    def test_upload_html_file_chunker_parameters(self, client: TestClient, setup):
-        PRIVATE_COLLECTION_ID, _ = setup
-
-        file_path = "api/tests/integ/assets/html.html"
-        with open(file_path, "rb") as file:
-            files = {"file": (os.path.basename(file_path), file, "application/html")}
-            data = {"request": '{"collection": "%s", "chunker": {"args": {"chunk_size": 1000}}}' % PRIVATE_COLLECTION_ID}
-            response = client.post_without_permissions(url=f"/v1{EndpointRoute.FILES}", data=data, files=files)
-            file.close()
-
-        assert response.status_code == 201, response.text
-
-    def test_upload_markdown_file(self, client: TestClient, setup):
-        PRIVATE_COLLECTION_ID, _ = setup
-
-        file_path = "api/tests/integ/assets/markdown.md"
-        with open(file_path, "rb") as file:
-            files = {"file": (os.path.basename(file_path), file, "text/markdown")}
-            data = {"request": '{"collection": "%s"}' % PRIVATE_COLLECTION_ID}
-            response = client.post_without_permissions(url=f"/v1{EndpointRoute.FILES}", data=data, files=files)
-            file.close()
-
-        assert response.status_code == 201, response.text
-
-    def test_upload_mardown_file_chunker_parameters(self, client: TestClient, setup):
-        PRIVATE_COLLECTION_ID, PUBLIC_COLLECTION_ID = setup
-
-        file_path = "api/tests/integ/assets/markdown.md"
-        with open(file_path, "rb") as file:
-            files = {"file": (os.path.basename(file_path), file, "text/markdown")}
-            data = {"request": '{"collection": "%s", "chunker": {"args": {"chunk_size": 1000}}}' % PRIVATE_COLLECTION_ID}
-            response = client.post_without_permissions(url=f"/v1{EndpointRoute.FILES}", data=data, files=files)
-            file.close()
-
-        assert response.status_code == 201, response.text
-
-    def test_upload_json_file(self, client: TestClient, setup):
-        PRIVATE_COLLECTION_ID, PUBLIC_COLLECTION_ID = setup
-
-        file_path = "api/tests/integ/assets/json.json"
-        with open(file_path, "rb") as file:
-            files = {"file": (os.path.basename(file_path), file, "application/json")}
-            data = {"request": '{"collection": "%s"}' % PRIVATE_COLLECTION_ID}
-            response = client.post_without_permissions(url=f"/v1{EndpointRoute.FILES}", data=data, files=files)
-            file.close()
-
-        assert response.status_code == 201, response.text
-
-    def test_upload_json_file_wrong_format(self, client: TestClient, setup):
-        PRIVATE_COLLECTION_ID, PUBLIC_COLLECTION_ID = setup
-
-        file_path = "api/tests/integ/assets/json_wrong_format.json"
-        with open(file_path, "rb") as file:
-            files = {"file": (os.path.basename(file_path), file, "application/json")}
-            data = {"request": '{"collection": "%s"}' % PRIVATE_COLLECTION_ID}
-            response = client.post_without_permissions(url=f"/v1{EndpointRoute.FILES}", data=data, files=files)
-            file.close()
-
-        assert response.status_code == 422, response.text
-
-    def test_upload_too_large_file(self, client: TestClient, setup):
-        PRIVATE_COLLECTION_ID, PUBLIC_COLLECTION_ID = setup
-
-        file_path = "api/tests/integ/assets/pdf_too_large.pdf"
-        with open(file_path, "rb") as file:
-            files = {"file": (os.path.basename(file_path), file, "application/pdf")}
-            data = {"request": '{"collection": "%s"}' % PRIVATE_COLLECTION_ID}
-            response = client.post_without_permissions(url=f"/v1{EndpointRoute.FILES}", data=data, files=files)
-            file.close()
-
-        assert response.status_code == 413, response.text
-
-    def test_upload_in_public_collection_with_admin(self, client: TestClient, setup):
-        PRIVATE_COLLECTION_ID, PUBLIC_COLLECTION_ID = setup
-
-        file_path = "api/tests/integ/assets/pdf.pdf"
-        with open(file_path, "rb") as file:
-            files = {"file": (os.path.basename(file_path), file, "application/pdf")}
-            data = {"request": '{"collection": "%s"}' % PUBLIC_COLLECTION_ID}
-            response = client.post_with_permissions(url=f"/v1{EndpointRoute.FILES}", data=data, files=files)
-            file.close()
-        assert response.status_code == 201, response.text
-
-    def test_upload_in_public_collection_with_user(self, client: TestClient, setup):
-        PRIVATE_COLLECTION_ID, PUBLIC_COLLECTION_ID = setup
-
-        file_path = "api/tests/integ/assets/pdf.pdf"
-        with open(file_path, "rb") as file:
-            files = {"file": (os.path.basename(file_path), file, "application/pdf")}
-            data = {"request": '{"collection": "%s"}' % PUBLIC_COLLECTION_ID}
-            response = client.post_without_permissions(url=f"/v1{EndpointRoute.FILES}", data=data, files=files)
-            file.close()
-
-        assert response.status_code == 404, response.text
diff --git a/api/tests/integ/test_search.py b/api/tests/integ/test_search.py
index c31eccd4b..189c43e99 100644
--- a/api/tests/integ/test_search.py
+++ b/api/tests/integ/test_search.py
@@ -1,3 +1,4 @@
+import json
 import logging
 import os
 import time
@@ -20,29 +21,39 @@ def setup(client: TestClient):
     COLLECTION_ID = response.json()["id"]
 
     # Upload the file to the collection
-    file_path = "api/tests/integ/assets/json.json"
+    data = {
+        "collection": str(COLLECTION_ID),
+        "output_format": "markdown",
+        "force_ocr": "false",
+        "chunk_size": "1000",
+        "chunk_overlap": "200",
+        "use_llm": "false",
+        "paginate_output": "false",
+        "chunker": "RecursiveCharacterTextSplitter",
+        "chunk_min_size": "0",
+        "is_separator_regex": "false",
+        "metadata": json.dumps({"source_title": "test", "source_tags": ["tag-1", "tag-2"]}),
+    }
+
+    file_path = "api/tests/integ/assets/pdf.pdf"
     with open(file_path, "rb") as file:
-        files = {"file": (os.path.basename(file_path), file, "application/json")}
-        data = {"request": '{"collection": "%s", "chunker": {"args": {"chunk_size": 1000}}}' % COLLECTION_ID}
-        response = client.post_without_permissions(url=f"/v1{EndpointRoute.FILES}", data=data, files=files)
+        files = {"file": (os.path.basename(file_path), file, "application/pdf")}
+        response = client.post_without_permissions(url=f"/v1{EndpointRoute.DOCUMENTS}", data=data, files=files)
         file.close()
-    assert response.status_code == 201, response.text
 
-    # Get document IDS
-    response = client.get_without_permissions(url=f"/v1{EndpointRoute.DOCUMENTS}", params={"collection": COLLECTION_ID})
-    assert response.status_code == 200, response.text
-    DOCUMENT_IDS = [document["id"] for document in response.json()["data"]]
+    assert response.status_code == 201, response.text
+    DOCUMENT_ID = response.json()["id"]
 
     time.sleep(1)
 
-    yield COLLECTION_ID, DOCUMENT_IDS
+    yield COLLECTION_ID, DOCUMENT_ID
 
 
 @pytest.mark.usefixtures("client", "setup")
 class TestSearch:
     def test_search(self, client: TestClient, setup):
         """Test the POST /search response status code."""
-        COLLECTION_ID, DOCUMENT_IDS = setup
+        COLLECTION_ID, DOCUMENT_ID = setup
 
         data = {"prompt": "Qui est Albert ?", "collections": [COLLECTION_ID], "k": 3}
         response = client.post_without_permissions(url=f"/v1{EndpointRoute.SEARCH}", json=data)
@@ -51,32 +62,32 @@ def test_search(self, client: TestClient, setup):
         searches = Searches(**response.json())  # test output format
 
         search = searches.data[0]
-        assert search.chunk.document_id in DOCUMENT_IDS
+        assert search.chunk.document_id == DOCUMENT_ID
 
     def test_search_with_score_threshold(self, client: TestClient, setup):
         """Test search with a score threshold."""
-        COLLECTION_ID, DOCUMENT_IDS = setup
+        COLLECTION_ID, DOCUMENT_ID = setup
         data = {"prompt": "Erasmus", "collections": [COLLECTION_ID], "k": 3, "score_threshold": 0.5}
         response = client.post_without_permissions(url=f"/v1{EndpointRoute.SEARCH}", json=data)
         assert response.status_code == 200, response.text
 
     def test_search_invalid_collection(self, client: TestClient, setup):
         """Test search with an invalid collection."""
-        COLLECTION_ID, DOCUMENT_IDS = setup
+        COLLECTION_ID, DOCUMENT_ID = setup
         data = {"prompt": "Erasmus", "collections": [100], "k": 3}
         response = client.post_without_permissions(url=f"/v1{EndpointRoute.SEARCH}", json=data)
         assert response.status_code == 404, response.text
 
     def test_search_invalid_k(self, client: TestClient, setup):
         """Test search with an invalid k value."""
-        COLLECTION_ID, DOCUMENT_IDS = setup
+        COLLECTION_ID, DOCUMENT_ID = setup
         data = {"prompt": "Erasmus", "collections": [COLLECTION_ID], "k": 0}
         response = client.post_without_permissions(url=f"/v1{EndpointRoute.SEARCH}", json=data)
         assert response.status_code == 422, response.text
 
     def test_search_empty_prompt(self, client: TestClient, setup):
         """Test search with an empty prompt."""
-        COLLECTION_ID, DOCUMENT_IDS = setup
+        COLLECTION_ID, DOCUMENT_ID = setup
         data = {"prompt": "", "collections": [COLLECTION_ID], "k": 3}
         response = client.post_without_permissions(url=f"/v1{EndpointRoute.SEARCH}", json=data)
         assert response.status_code == 422, response.text
diff --git a/api/utils/variables.py b/api/utils/variables.py
index 01108d7a1..cbbbeb2e3 100644
--- a/api/utils/variables.py
+++ b/api/utils/variables.py
@@ -43,13 +43,11 @@ class EndpointRoute(StrEnum):
     COLLECTIONS = f"/{RouterName.COLLECTIONS}"
     DOCUMENTS = f"/{RouterName.DOCUMENTS}"
     EMBEDDINGS = f"/{RouterName.EMBEDDINGS}"
-    FILES = f"/{RouterName.FILES}"
     ME_INFO = f"/{RouterName.ME}/info"
     ME_KEYS = f"/{RouterName.ME}/keys"
     ME_USAGE = f"/{RouterName.ME}/usage"
     MODELS = f"/{RouterName.MODELS}"
     OCR = f"/{RouterName.OCR}"
-    OCR_BETA = f"/{RouterName.OCR}-beta"
     PARSE = f"/{RouterName.PARSE}-beta"
     RERANK = f"/{RouterName.RERANK}"
     SEARCH = f"/{RouterName.SEARCH}"
diff --git a/docs/docs/getting-started/configuration_file.md b/docs/docs/getting-started/configuration_file.md
index 00a8d9923..70d8f0d01 100644
--- a/docs/docs/getting-started/configuration_file.md
+++ b/docs/docs/getting-started/configuration_file.md
@@ -172,7 +172,7 @@ General settings configuration fields.
 | swagger_description | string | Display description of your API in swagger UI, see https://fastapi.tiangolo.com/tutorial/metadata for more information. | [See documentation](https://github.com/etalab-ia/opengatellm/blob/main/README.md) |  | [See documentation](https://github.com/etalab-ia/opengatellm/blob/main/README.md) |
 | swagger_docs_url | string | Docs URL of swagger UI, see https://fastapi.tiangolo.com/tutorial/metadata for more information. | /docs |  |  |
 | swagger_license_info | object | Licence informations of the API in swagger UI, see https://fastapi.tiangolo.com/tutorial/metadata for more information. | `{'name': 'MIT Licence', 'identifier': 'MIT', 'url': 'https://raw.githubusercontent.com/etalab-ia/opengatellm/refs/heads/main/LICENSE'}` |  |  |
-| swagger_openapi_tags | array | OpenAPI tags of the API in swagger UI, see https://fastapi.tiangolo.com/tutorial/metadata for more information. | **required** |  |  |
+| swagger_openapi_tags | array | OpenAPI tags of the API in swagger UI, see https://fastapi.tiangolo.com/tutorial/metadata for more information. | [] |  |  |
 | swagger_openapi_url | string | OpenAPI URL of swagger UI, see https://fastapi.tiangolo.com/tutorial/metadata for more information. | /openapi.json |  |  |
 | swagger_redoc_url | string | Redoc URL of swagger UI, see https://fastapi.tiangolo.com/tutorial/metadata for more information. | /redoc |  |  |
 | swagger_summary | string | Display summary of your API in swagger UI, see https://fastapi.tiangolo.com/tutorial/metadata for more information. | OpenGateLLM connect to your models. You can configuration this swagger UI in the configuration file, like hide routes or change the title. |  | My API description. |
@@ -195,7 +195,7 @@ For more information to configure model providers, see the [ModelProvider sectio
 
 | Attribute | Type | Description | Default | Values | Examples |
 | --- | --- | --- | --- | --- | --- |
-| aliases | array | Aliases of the model. It will be used to identify the model by users. | **required** |  | ['model-alias', 'model-alias-2'] |
+| aliases | array | Aliases of the model. It will be used to identify the model by users. | [] |  | ['model-alias', 'model-alias-2'] |
 | cost_completion_tokens | number | Model costs completion tokens for user budget computation. The cost is by 1M tokens. Set to `0.0` to disable budget computation for this model. | 0.0 |  | 0.1 |
 | cost_prompt_tokens | number | Model costs prompt tokens for user budget computation. The cost is by 1M tokens. | 0.0 |  | 0.1 |
 | load_balancing_strategy | string | Routing strategy for load balancing between providers of the model. | shuffle | • shuffle<br></br>• least_busy | least_busy |
@@ -310,7 +310,7 @@ Other arguments declared below are used to configure the Elasticsearch index.
 
 | Attribute | Type | Description | Default | Values | Examples |
 | --- | --- | --- | --- | --- | --- |
-| headers | object | Albert API request headers. | **required** |  | `{'Authorization': 'Bearer my-api-key'}` |
+| headers | object | Albert API request headers. | `{}` |  | `{'Authorization': 'Bearer my-api-key'}` |
 | timeout | integer | Timeout for the Albert API requests. | 300 |  | 10 |
 | url | string | Albert API url. | https://albert.api.etalab.gouv.fr |  |  |