docs: update the Swagger doc for the retrieval API

JCHAVEROT · JCHAVEROT · commit f82668a09ca8 · 2026-06-20T18:31:27.000+02:00
diff --git a/src/mmore/run_retriever.py b/src/mmore/run_retriever.py
@@ -140,14 +140,56 @@ def make_router(config_file: str) -> APIRouter:
     retriever_obj = Retriever.from_config(config)
     logger.info("Retriever loaded!")
 
-    @router.get("/list_files", tags=["Files"])
+    @router.get(
+        "/list_files",
+        tags=["Files"],
+        summary="List files in a collection",
+        responses={
+            200: {
+                "description": "Files currently stored in the collection",
+                "content": {
+                    "application/json": {
+                        "example": [
+                            {"id": "doc1", "filename": "report.pdf"},
+                            {"id": "doc2", "filename": "notes.md"},
+                        ]
+                    }
+                },
+            },
+        },
+    )
     def list_files(
         collection_name: str, limit: int = Query(default=16000, ge=1, le=100000)
     ):
         """List all files currently in the database."""
         return retriever_obj.list_files(collection_name=collection_name, limit=limit)
 
-    @router.post("/v1/retrieve", tags=["Retrieval"])
+    @router.post(
+        "/v1/retrieve",
+        tags=["Retrieval"],
+        summary="Retrieve the most similar chunks for a query",
+        responses={
+            200: {
+                "description": "Matching chunks ordered by similarity",
+                "content": {
+                    "application/json": {
+                        "example": [
+                            {
+                                "fileId": "doc1",
+                                "chunkId": "3",
+                                "content": "the matched passage...",
+                                "similarity": 0.87,
+                                "metadata": {
+                                    "first": {"page": 0, "paragraph": 2},
+                                    "last": {"page": 0, "paragraph": 2},
+                                },
+                            }
+                        ]
+                    }
+                },
+            },
+        },
+    )
     def retriever(query: RetrieverQuery):
         """Query the retriever"""
 
@@ -180,7 +222,32 @@ def retriever(query: RetrieverQuery):
 
         return docs_info
 
-    @router.get("/v1/chunks/{fileId}/{chunkId}", tags=["Retrieval"])
+    @router.get(
+        "/v1/chunks/{fileId}/{chunkId}",
+        tags=["Retrieval"],
+        summary="Fetch a chunk's content and metadata by reference",
+        responses={
+            200: {
+                "description": "Chunk content and positional metadata",
+                "content": {
+                    "application/json": {
+                        "example": {
+                            "fileId": "doc1",
+                            "chunkId": "3",
+                            "filename": "report.pdf",
+                            "content": "the chunk text...",
+                            "metadata": {
+                                "first": {"page": 0, "paragraph": 2},
+                                "last": {"page": 1, "paragraph": 0},
+                            },
+                        }
+                    }
+                },
+            },
+            400: {"description": "fileId or chunkId contains a forbidden character ('+' or '\"')"},
+            404: {"description": "Chunk not found for the given file"},
+        },
+    )
     def get_chunk(fileId: str, chunkId: str):
         """Fetch a chunk's content and positional metadata by reference."""
         if not _ID_PATTERN.match(fileId) or not _ID_PATTERN.match(chunkId):