feat(vector_store): add lifespan management for vector store without configuration

Tibo Pendino · Tibo Pendino · commit 1da5c3b024c1 · 2026-02-03T12:08:38.000+01:00
- update RouterTable
- add SQLAlchemy migration
- add  stronger typing
- add default management with type constraint
- add is_default property for create and update routers
- update tests (not tested yet)
diff --git a/api/alembic/versions/2026_02_02_2204-4ad3859d4c64_add_is_default_property_and_partial_.py b/api/alembic/versions/2026_02_02_2204-4ad3859d4c64_add_is_default_property_and_partial_.py
@@ -0,0 +1,51 @@
+"""add is_default property and partial unique constraint on is_default property for models
+
+Revision ID: 4ad3859d4c64
+Revises: f02a2525b97c
+Create Date: 2026-02-02 22:04:55.231167
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+import logging
+
+
+# revision identifiers, used by Alembic.
+revision: str = '4ad3859d4c64'
+down_revision: Union[str, None] = 'f02a2525b97c'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+logger = logging.getLogger(__name__)
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    logger.warning("Upgrade: adding 'is_default' column to 'router' with server_default=FALSE for existing rows")
+    op.add_column(
+        'router',
+        sa.Column('is_default', sa.Boolean(), nullable=False, server_default=sa.text('FALSE')),
+    )
+    logger.warning("Upgrade: creating partial unique index 'unique_default_per_model_type' on 'router(type)' where is_default IS TRUE")
+    op.create_index(
+        'unique_default_per_model_type',
+        'router',
+        ['type'],
+        unique=True,
+        postgresql_where=sa.text('is_default IS TRUE'),
+    )
+    logger.warning("Upgrade: finished")
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    logger.warning("Downgrade: dropping partial unique index 'unique_default_per_model_type'")
+    op.drop_index('unique_default_per_model_type', table_name='router', postgresql_where=sa.text('is_default IS TRUE'))
+    logger.warning("Downgrade: dropping column 'is_default' from 'router'")
+    op.drop_column('router', 'is_default')
+    logger.warning("Downgrade: finished")
+    # ### end Alembic commands ###
diff --git a/api/endpoints/admin/routers.py b/api/endpoints/admin/routers.py
@@ -30,6 +30,7 @@ async def create_router(
         type=body.type,
         aliases=body.aliases,
         load_balancing_strategy=body.load_balancing_strategy,
+        is_default=body.is_default,
         cost_prompt_tokens=body.cost_prompt_tokens,
         cost_completion_tokens=body.cost_completion_tokens,
         user_id=request_context.get().user_info.id,
@@ -78,6 +79,7 @@ async def update_router(
         type=body.type,
         aliases=body.aliases,
         load_balancing_strategy=body.load_balancing_strategy,
+        is_default=body.is_default,
         cost_prompt_tokens=body.cost_prompt_tokens,
         cost_completion_tokens=body.cost_completion_tokens,
         postgres_session=postgres_session,
diff --git a/api/endpoints/ocr.py b/api/endpoints/ocr.py
@@ -84,7 +84,7 @@ async def ocr_beta(
     """
     Extracts text from PDF files using OCR.
     """
-    # check if file is a pdf (raises UnsupportedFileTypeException if not a PDF)
+    # check if file is a PDF (raises UnsupportedFileTypeException if not a PDF)
     global_context.document_manager.parser_manager._detect_file_type(file=file, type=FileType.PDF)
 
     # check file size
diff --git a/api/helpers/models/_modelregistry.py b/api/helpers/models/_modelregistry.py
@@ -122,6 +122,7 @@ async def setup(self, models: list[ModelConfiguration], postgres_session: AsyncS
                     type=model.type,
                     aliases=model.aliases,
                     load_balancing_strategy=model.load_balancing_strategy,
+                    is_default=False,
                     cost_prompt_tokens=model.cost_prompt_tokens,
                     cost_completion_tokens=model.cost_completion_tokens,
                     user_id=0,  # setup as master user
@@ -181,6 +182,7 @@ async def create_router(
         type: ModelType,
         aliases: list[str],
         load_balancing_strategy: RouterLoadBalancingStrategy,
+        is_default: bool,
         cost_prompt_tokens: float,
         cost_completion_tokens: float,
         user_id: int,
@@ -194,6 +196,7 @@ async def create_router(
             type(ModelType): The type of model
             aliases(List[str]): List of aliases for the model
             load_balancing_strategy(RouterLoadBalancingStrategy): The routing strategy to use
+            is_default(bool): Whether the router is default for its type
             cost_prompt_tokens(float): The cost of a million prompt tokens
             cost_completion_tokens(float): The cost of a million completion tokens
             user_id(int): The user ID of owner of the router
@@ -213,6 +216,7 @@ async def create_router(
                     name=name,
                     type=type.value,
                     load_balancing_strategy=load_balancing_strategy.value,
+                    is_default=is_default,
                     cost_prompt_tokens=cost_prompt_tokens,
                     cost_completion_tokens=cost_completion_tokens,
                 )
@@ -276,6 +280,7 @@ async def update_router(
         type: ModelType | None,
         aliases: list[str] | None,
         load_balancing_strategy: RouterLoadBalancingStrategy | None,
+        is_default: bool | None,
         cost_prompt_tokens: float | None,
         cost_completion_tokens: float | None,
         postgres_session: AsyncSession,
@@ -289,6 +294,7 @@ async def update_router(
             type(Optional[ModelType]): Optional new type
             aliases(Optional[List[str]]): Optional new aliases list (replaces existing)
             load_balancing_strategy(Optional[RouterLoadBalancingStrategy]): Optional new routing strategy
+            is_default(Optional[bool]): Optional new is_default flag (one True per type)
             cost_prompt_tokens(Optional[float]): Optional new cost of a million prompt tokens
             cost_completion_tokens(Optional[float]): Optional new cost of a million completion tokens
             postgres_session(AsyncSession): Database postgres_session
@@ -311,6 +317,8 @@ async def update_router(
             update_values["type"] = type.value
         if load_balancing_strategy is not None:
             update_values["load_balancing_strategy"] = load_balancing_strategy.value
+        if is_default is not None:
+            update_values["is_default"] = is_default
         if name is not None:
             update_values["name"] = name
         if cost_prompt_tokens is not None:
@@ -330,6 +338,7 @@ async def update_router(
                 await postgres_session.execute(query)
 
         await postgres_session.commit()
+        # TODO: Make the update method return the updated router
 
     @staticmethod
     async def get_routers(
@@ -379,6 +388,7 @@ async def get_routers(
                     RouterTable.user_id,
                     RouterTable.type,
                     RouterTable.load_balancing_strategy,
+                    RouterTable.is_default,
                     RouterTable.cost_prompt_tokens,
                     RouterTable.cost_completion_tokens,
                     first_provider_subquery.c.max_context_length,
@@ -429,6 +439,7 @@ async def get_routers(
                     type=ModelType(row["type"]),
                     aliases=aliases.get(row["id"], []),
                     load_balancing_strategy=RouterLoadBalancingStrategy(row["load_balancing_strategy"]),
+                    is_default=row["is_default"],
                     vector_size=row["vector_size"],
                     max_context_length=row["max_context_length"],
                     cost_prompt_tokens=row["cost_prompt_tokens"] or 0.0,
diff --git a/api/schemas/admin/routers.py b/api/schemas/admin/routers.py
@@ -17,6 +17,7 @@ class CreateRouter(BaseModel):
     type: ModelType = Field(..., description="Type of the model router. It will be used to identify the model router type.", examples=["text-generation"])  # fmt: off
     aliases: list[constr(strip_whitespace=True, min_length=1, max_length=64)] = Field(default_factory=list, description="Aliases of the model. It will be used to identify the model by users.", examples=[["model-alias", "model-alias-2"]])  # fmt: off
     load_balancing_strategy: RouterLoadBalancingStrategy = Field(default=RouterLoadBalancingStrategy.SHUFFLE, description="Routing strategy for load balancing between providers of the model. It will be used to identify the model type.", examples=["least_busy"])  # fmt: off
+    is_default: bool = Field(default=False, description="Whether the router is the default one for its type.")
     cost_prompt_tokens: float = Field(default=0.0, ge=0.0, description="Cost of a million prompt tokens (decrease user budget)")
     cost_completion_tokens: float = Field(default=0.0, ge=0.0, description="Cost of a million completion tokens (decrease user budget)")
 
@@ -30,6 +31,7 @@ class UpdateRouter(BaseModel):
     type: ModelType | None = Field(default=None, description="Type of the model router. It will be used to identify the model router type.", examples=["text-generation"])  # fmt: off
     aliases: list[constr(strip_whitespace=True, min_length=1, max_length=64)] | None = Field(default=None, description="Aliases of the model. It will be used to identify the model by users.", examples=[["model-alias", "model-alias-2"]])  # fmt: off
     load_balancing_strategy: RouterLoadBalancingStrategy | None = Field(default=None, description="Routing strategy for load balancing between providers of the model. It will be used to identify the model type.", examples=["least_busy"])  # fmt: off
+    is_default: bool | None = Field(default=None, description="Whether the router is the default one for its type.")
     cost_prompt_tokens: float | None = Field(default=None, ge=0.0, description="Cost of a million prompt tokens (decrease user budget)")
     cost_completion_tokens: float | None = Field(default=None, ge=0.0, description="Cost of a million completion tokens (decrease user budget)")
 
@@ -42,6 +44,7 @@ class Router(BaseModel):
     type: ModelType = Field(..., description="Type of the model router. It will be used to identify the model router type.", examples=["text-generation"])  # fmt: off
     aliases: list[str] | None = Field(default=None, description="Aliases of the model. It will be used to identify the model by users.", examples=[["model-alias", "model-alias-2"]])  # fmt: off
     load_balancing_strategy: RouterLoadBalancingStrategy = Field(..., description="Routing strategy for load balancing between providers of the model. It will be used to identify the model type.", examples=["least_busy"])  # fmt: off
+    is_default: bool = Field(..., description="Whether the router is the default one for its type.")
     vector_size: int | None = Field(default=None, description="Dimension of the vectors, if the models are embeddings. Make sure it is the same for all models.")  # fmt: off
     max_context_length: int | None = Field(default=None, description="Maximum amount of tokens a context could contains. Make sure it is the same for all models.")  # fmt: off
     cost_prompt_tokens: float = Field(description="Cost of a million prompt tokens (decrease user budget)")
diff --git a/api/schemas/core/configuration.py b/api/schemas/core/configuration.py
@@ -361,9 +361,6 @@ class Settings(ConfigBaseModel):
     monitoring_postgres_enabled: bool = Field(default=True, description="If true, the log usage will be written in the PostgreSQL database.")  # fmt: off
     monitoring_prometheus_enabled: bool = Field(default=True, description="If true, Prometheus metrics will be exposed in the `/metrics` endpoint.")  # fmt: off
 
-    # vector store
-    vector_store_model: str | None = Field(default=None, description="Model used to vectorize the text in the vector store database. Is required if a vector store dependency is provided (Elasticsearch or Qdrant). This model must be defined in the `models` section and have type `text-embeddings-inference`.")  # fmt: off
-
     # postgres_session
     session_secret_key: str | None = Field(default=None, description='Secret key for postgres_session middleware. If not provided, the master key will be used.', examples=["knBnU1foGtBEwnOGTOmszldbSwSYLTcE6bdibC8bPGM"])  # fmt: off
 
@@ -420,11 +417,6 @@ def validate_models(self) -> Any:
         if duplicated_models:
             raise ValueError(f"Duplicated model or alias names found: {", ".join(set(duplicated_models))}")
 
-        # check for interdependencies
-        if self.dependencies.vector_store and self.settings.vector_store_model:
-            assert self.settings.vector_store_model in models["all"], "Vector store model must be defined in models section."
-            assert self.settings.vector_store_model in models[ModelType.TEXT_EMBEDDINGS_INFERENCE.value], f"The vector store model must have type {ModelType.TEXT_EMBEDDINGS_INFERENCE}."  # fmt: off
-
         return self
 
 
diff --git a/api/schemas/search.py b/api/schemas/search.py
@@ -19,7 +19,7 @@ class SearchMethod(str, Enum):
 
 class SearchArgs(BaseModel):
     collections: list[int] = Field(min_items=1, description="List of collections ID")
-    rff_k: int = Field(default=20, description="k constant in RFF algorithm")
+    rff_k: int = Field(default=20, description="k constant in RFF algorithm")  # TO FIX: Does this allow zero or negative? Risk of invalid value IMO.
     k: int = Field(gt=0, le=200, default=10, deprecated=True, description="[DEPRECATED: use limit instead]Number of results to return")
     limit: int = Field(gt=0, le=200, default=10, description="Number of results to return")
     offset: int = Field(ge=0, default=0, description="Offset for pagination, specifying how many results to skip from the beginning")
diff --git a/api/sql/models.py b/api/sql/models.py
@@ -2,7 +2,7 @@
 from http import HTTPMethod
 from typing import Optional
 
-from sqlalchemy import ForeignKey, UniqueConstraint, func
+from sqlalchemy import ForeignKey, Index, UniqueConstraint, func, text
 from sqlalchemy.orm import Mapped, declarative_base, mapped_column, relationship
 
 from api.schemas.admin.providers import ProviderCarbonFootprintZone, ProviderType
@@ -191,11 +191,21 @@ class Router(Base):
     name: Mapped[str] = mapped_column(unique=True)
     type: Mapped[ModelType]
     load_balancing_strategy: Mapped[RouterLoadBalancingStrategy]
+    is_default: Mapped[bool] = mapped_column(default=False)
     cost_prompt_tokens: Mapped[float] = mapped_column(default=0.0)
     cost_completion_tokens: Mapped[float] = mapped_column(default=0.0)
     created: Mapped[dt.datetime] = mapped_column(insert_default=func.now())
     updated: Mapped[dt.datetime] = mapped_column(insert_default=func.now(), onupdate=func.now())
 
+    __table_args__ = (
+        Index(
+            "unique_default_per_model_type",
+            "type",
+            unique=True,
+            postgresql_where=text("is_default IS TRUE"),
+        ),
+    )
+
     user: Mapped["User"] = relationship(back_populates="router")
     alias: Mapped[list["RouterAlias"]] = relationship(back_populates="router", cascade="all, delete-orphan", passive_deletes=True)
     provider: Mapped[list["Provider"]] = relationship(back_populates="router", cascade="all, delete-orphan", passive_deletes=True)
diff --git a/api/tests/unit/test_helpers/test_modelregistry/test_routers.py b/api/tests/unit/test_helpers/test_modelregistry/test_routers.py
@@ -78,6 +78,7 @@ async def test_create_router_success(postgres_session: AsyncSession, model_regis
         type=ModelType.TEXT_GENERATION,
         aliases=["alias1", "alias2"],
         load_balancing_strategy=RouterLoadBalancingStrategy.SHUFFLE,
+        is_default=False,
         cost_prompt_tokens=1.0,
         cost_completion_tokens=2.0,
         user_id=1,
@@ -104,6 +105,7 @@ async def test_create_router_master_user(postgres_session: AsyncSession, model_r
         type=ModelType.TEXT_GENERATION,
         aliases=[],
         load_balancing_strategy=RouterLoadBalancingStrategy.LEAST_BUSY,
+        is_default=False,
         cost_prompt_tokens=0.0,
         cost_completion_tokens=0.0,
         user_id=0,  # master user
@@ -124,6 +126,7 @@ async def test_create_router_already_exists(postgres_session: AsyncSession, mode
             type=ModelType.TEXT_GENERATION,
             aliases=[],
             load_balancing_strategy=RouterLoadBalancingStrategy.SHUFFLE,
+            is_default=False,
             cost_prompt_tokens=0.0,
             cost_completion_tokens=0.0,
             user_id=1,
@@ -146,6 +149,7 @@ async def test_create_router_alias_already_exists(postgres_session: AsyncSession
             type=ModelType.TEXT_GENERATION,
             aliases=["existing-alias"],
             load_balancing_strategy=RouterLoadBalancingStrategy.SHUFFLE,
+            is_default=False,
             cost_prompt_tokens=0.0,
             cost_completion_tokens=0.0,
             user_id=1,
@@ -198,6 +202,7 @@ async def test_update_router_success_all_fields(postgres_session: AsyncSession,
         type=ModelType.TEXT_GENERATION,
         aliases=["old-alias"],
         load_balancing_strategy=RouterLoadBalancingStrategy.SHUFFLE,
+        is_default=False,
         vector_size=None,
         max_context_length=4096,
         cost_prompt_tokens=1.0,
@@ -220,6 +225,7 @@ async def test_update_router_success_all_fields(postgres_session: AsyncSession,
         type=ModelType.TEXT_EMBEDDINGS_INFERENCE,
         aliases=["new-alias1", "new-alias2"],
         load_balancing_strategy=RouterLoadBalancingStrategy.LEAST_BUSY,
+        is_default=None,
         cost_prompt_tokens=3.0,
         cost_completion_tokens=4.0,
         postgres_session=postgres_session,
@@ -237,6 +243,7 @@ async def test_update_router_alias_conflict(postgres_session: AsyncSession, mode
         type=ModelType.TEXT_GENERATION,
         aliases=[],
         load_balancing_strategy=RouterLoadBalancingStrategy.SHUFFLE,
+        is_default=False,
         vector_size=None,
         max_context_length=4096,
         cost_prompt_tokens=0.0,
@@ -260,6 +267,7 @@ async def test_update_router_alias_conflict(postgres_session: AsyncSession, mode
             type=None,
             aliases=["conflicting-alias"],
             load_balancing_strategy=None,
+            is_default=None,
             cost_prompt_tokens=None,
             cost_completion_tokens=None,
             postgres_session=postgres_session,
@@ -275,6 +283,7 @@ async def test_update_router_noop(postgres_session: AsyncSession, model_registry
         type=ModelType.TEXT_GENERATION,
         aliases=[],
         load_balancing_strategy=RouterLoadBalancingStrategy.SHUFFLE,
+        is_default=False,
         vector_size=None,
         max_context_length=4096,
         cost_prompt_tokens=0.0,
@@ -293,6 +302,7 @@ async def test_update_router_noop(postgres_session: AsyncSession, model_registry
         type=None,
         aliases=None,
         load_balancing_strategy=None,
+        is_default=None,
         cost_prompt_tokens=None,
         cost_completion_tokens=None,
         postgres_session=postgres_session,
diff --git a/api/utils/dependencies.py b/api/utils/dependencies.py
@@ -1,8 +1,7 @@
+from collections.abc import AsyncGenerator, AsyncIterator
 from contextvars import ContextVar
 
 import redis.asyncio as redis
-from redis.asyncio import Redis as AsyncRedis
-from sqlalchemy.ext.asyncio import AsyncSession
 
 from api.helpers._usagemanager import UsageManager
 from api.helpers.models import ModelRegistry
@@ -32,7 +31,7 @@ def get_model_registry() -> ModelRegistry:
     return global_context.model_registry
 
 
-async def get_redis_client() -> AsyncRedis:
+async def get_redis_client() -> AsyncGenerator:
     """
     Get a Redis client built from the shared connection pool.
 
@@ -47,7 +46,7 @@ async def get_redis_client() -> AsyncRedis:
     await client.aclose()
 
 
-async def get_postgres_session() -> AsyncSession:
+async def get_postgres_session() -> AsyncIterator:
     """
     Get a PostgreSQL postgres_session from the global context.
 
diff --git a/api/utils/lifespan.py b/api/utils/lifespan.py