Allow api_base/api_key in embedding config (#2269)

ctfliblime · pjones9 · web-flow · commit be0c05022205 · 2025-11-06T19:01:28.000-06:00
Currently, to have embedding and completion hit two distinct
API base URLs, you have to define the embedding base in the
environment variable and set the completion base in the toml
config. This patch obviates the need to use the environment
at all and allows setting api_base and api_key in the config
for embedding, as well.

Co-authored-by: Clay Fouts &lt;cfouts@ptfs.com&gt;
diff --git a/py/core/base/providers/embedding.py b/py/core/base/providers/embedding.py
@@ -29,6 +29,8 @@ class EmbeddingConfig(ProviderConfig):
     max_retries: int = 3
     initial_backoff: float = 1
     max_backoff: float = 64.0
+    api_base: Optional[str] = None
+    api_key: Optional[str] = None
     quantization_settings: VectorQuantizationSettings = (
         VectorQuantizationSettings()
     )
diff --git a/py/core/providers/embeddings/litellm.py b/py/core/providers/embeddings/litellm.py
@@ -69,6 +69,10 @@ def _get_embedding_kwargs(self, **kwargs):
             "model": self.base_model,
             "dimensions": self.base_dimension,
         }
+        if self.config.api_base:
+            embedding_kwargs["api_base"] = self.config.api_base
+        if self.config.api_key:
+            embedding_kwargs["api_key"] = self.config.api_key
         embedding_kwargs.update(kwargs)
         return embedding_kwargs
 

Original file line number	Diff line number	Diff line change
`@@ -29,6 +29,8 @@ class EmbeddingConfig(ProviderConfig):`
`29`	`29`	`max_retries: int = 3`
`30`	`30`	`initial_backoff: float = 1`
`31`	`31`	`max_backoff: float = 64.0`
	`32`	`+ api_base: Optional[str] = None`
	`33`	`+ api_key: Optional[str] = None`
`32`	`34`	`quantization_settings: VectorQuantizationSettings = (`
`33`	`35`	`VectorQuantizationSettings()`
`34`	`36`	`)`