BerriAI · ishaan-jaff · May 8, 2025 · May 7, 2025 · May 7, 2025 · May 7, 2025
diff --git a/docs/my-website/docs/providers/nscale.md b/docs/my-website/docs/providers/nscale.md
@@ -0,0 +1,178 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# Nscale (EU Sovereign)
+
+| Property | Details |
+|-------|-------|
+| Description | European-domiciled full-stack AI cloud platform for LLMs and image generation. |
+| Provider Route on LiteLLM | `nscale/` |
+| Supported Endpoints | `/chat/completions`, `/images/generations` |
+| API Reference | [Nscale docs](https://docs.nscale.com/docs/getting-started/overview) |
+
+## Required Variables
+
+```python showLineNumbers title="Environment Variables"
+os.environ["NSCALE_API_KEY"] = ""  # your Nscale API key
+```
+
+## Supported Models
+
+### Chat Models
+
+| Model Name | Description | Input Cost | Output Cost |
+|------------|-------------|------------|-------------|
+| nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct | 17B parameter model | $0.09/M tokens | $0.29/M tokens |
+| nscale/Qwen/Qwen2.5-Coder-3B-Instruct | 3B parameter coding model | $0.01/M tokens | $0.03/M tokens |
+| nscale/Qwen/Qwen2.5-Coder-7B-Instruct | 7B parameter coding model | $0.01/M tokens | $0.03/M tokens |
+| nscale/Qwen/Qwen2.5-Coder-32B-Instruct | 32B parameter coding model | $0.06/M tokens | $0.20/M tokens |
+| nscale/Qwen/QwQ-32B | 32B parameter model | $0.18/M tokens | $0.20/M tokens |
+| nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-70B | 70B parameter distilled model | $0.375/M tokens | $0.375/M tokens |
+| nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-8B | 8B parameter distilled model | $0.025/M tokens | $0.025/M tokens |
+| nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B | 1.5B parameter distilled model | $0.09/M tokens | $0.09/M tokens |
+| nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B | 7B parameter distilled model | $0.20/M tokens | $0.20/M tokens |
+| nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B | 14B parameter distilled model | $0.07/M tokens | $0.07/M tokens |
+| nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B | 32B parameter distilled model | $0.15/M tokens | $0.15/M tokens |
+| nscale/mistralai/mixtral-8x22b-instruct-v0.1 | Mixtral 8x22B model | $0.60/M tokens | $0.60/M tokens |
+| nscale/meta-llama/Llama-3.1-8B-Instruct | 8B parameter model | $0.03/M tokens | $0.03/M tokens |
+| nscale/meta-llama/Llama-3.3-70B-Instruct | 70B parameter model | $0.20/M tokens | $0.20/M tokens |
+
+### Image Generation Models
+
+| Model Name | Description | Cost per Pixel |
+|------------|-------------|----------------|
+| nscale/black-forest-labs/FLUX.1-schnell | Fast image generation model | $0.0000000013 |
+| nscale/stabilityai/stable-diffusion-xl-base-1.0 | SDXL base model | $0.000000003 |
+
+## Key Features
+- **EU Sovereign**: Full data sovereignty and compliance with European regulations
+- **Ultra-Low Cost (starting at $0.01 / M tokens)**: Extremely competitive pricing for both text and image generation models
+- **Production Grade**: Reliable serverless deployments with full isolation
+- **No Setup Required**: Instant access to compute without infrastructure management
+- **Full Control**: Your data remains private and isolated
+
+## Usage - LiteLLM Python SDK
+
+### Text Generation
+
+```python showLineNumbers title="Nscale Text Generation"
+from litellm import completion
+import os
+
+os.environ["NSCALE_API_KEY"] = ""  # your Nscale API key
+response = completion(
+    model="nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct",
+    messages=[{"role": "user", "content": "What is LiteLLM?"}]
+)
+print(response)
+```
+
+### Image Generation
+
+```python showLineNumbers title="Nscale Image Generation"
+from litellm import image_generation
+import os
+
+os.environ["NSCALE_API_KEY"] = ""  # your Nscale API key
+response = image_generation(
+    model="nscale/stabilityai/stable-diffusion-xl-base-1.0",
+    prompt="A beautiful sunset over mountains",
+    n=1,
+    size="1024x1024"
+)
+print(response)
+```
+
+## Usage - LiteLLM Proxy
+
+Add the following to your LiteLLM Proxy configuration file:
+
+```yaml showLineNumbers title="config.yaml"
+model_list:
+  - model_name: nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct
+    litellm_params:
+      model: nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct
+      api_key: os.environ/NSCALE_API_KEY
+  - model_name: nscale/meta-llama/Llama-3.3-70B-Instruct
+    litellm_params:
+      model: nscale/meta-llama/Llama-3.3-70B-Instruct
+      api_key: os.environ/NSCALE_API_KEY
+  - model_name: nscale/stabilityai/stable-diffusion-xl-base-1.0
+    litellm_params:
+      model: nscale/stabilityai/stable-diffusion-xl-base-1.0
+      api_key: os.environ/NSCALE_API_KEY
+```
+
+Start your LiteLLM Proxy server:
+
+```bash showLineNumbers title="Start LiteLLM Proxy"
+litellm --config config.yaml
+
+# RUNNING on http://0.0.0.0:4000
+```
+
+<Tabs>
+<TabItem value="openai-sdk" label="OpenAI SDK">
+
+```python showLineNumbers title="Nscale via Proxy - Non-streaming"
+from openai import OpenAI
+
+# Initialize client with your proxy URL
+client = OpenAI(
+    base_url="http://localhost:4000",  # Your proxy URL
+    api_key="your-proxy-api-key"       # Your proxy API key
+)
+
+# Non-streaming response
+response = client.chat.completions.create(
+    model="nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct",
+    messages=[{"role": "user", "content": "What is LiteLLM?"}]
+)
+
+print(response.choices[0].message.content)
+```
+
+</TabItem>
+
+<TabItem value="litellm-sdk" label="LiteLLM SDK">
+
+```python showLineNumbers title="Nscale via Proxy - LiteLLM SDK"
+import litellm
+
+# Configure LiteLLM to use your proxy
+response = litellm.completion(
+    model="litellm_proxy/nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct",
+    messages=[{"role": "user", "content": "What is LiteLLM?"}],
+    api_base="http://localhost:4000",
+    api_key="your-proxy-api-key"
+)
+
+print(response.choices[0].message.content)
+```
+
+</TabItem>
+
+<TabItem value="curl" label="cURL">
+
+```bash showLineNumbers title="Nscale via Proxy - cURL"
+curl http://localhost:4000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer your-proxy-api-key" \
+  -d '{
+    "model": "nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct",
+    "messages": [{"role": "user", "content": "What is LiteLLM?"}]
+  }'
+```
+
+</TabItem>
+</Tabs>
+
+## Getting Started
+1. Create an account at [console.nscale.com](https://console.nscale.com)
+2. Add credit to your account (minimum $5)
+3. Create an API key in settings
+4. Start making API calls using LiteLLM
+
+## Additional Resources
+- [Nscale Documentation](https://docs.nscale.com/docs/getting-started/overview)
+- [Blog: Sovereign Serverless](https://www.nscale.com/blog/sovereign-serverless-how-we-designed-full-isolation-without-sacrificing-performance) 
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
@@ -237,6 +237,7 @@ const sidebars = {
         "providers/watsonx",
         "providers/predibase",
         "providers/nvidia_nim",
+        { type: "doc", id: "providers/nscale", label: "Nscale (EU Sovereign)" },
         "providers/xai",
         "providers/lm_studio",
         "providers/cerebras",

diff --git a/litellm/__init__.py b/litellm/__init__.py
@@ -1032,6 +1032,7 @@ def add_known_models():
 from .llms.deepseek.chat.transformation import DeepSeekChatConfig
 from .llms.lm_studio.chat.transformation import LMStudioChatConfig
 from .llms.lm_studio.embed.transformation import LmStudioEmbeddingConfig
+from .llms.nscale.chat.transformation import NscaleConfig
 from .llms.perplexity.chat.transformation import PerplexityChatConfig
 from .llms.azure.chat.o_series_transformation import AzureOpenAIO1Config
 from .llms.watsonx.completion.transformation import IBMWatsonXAIConfig

diff --git a/litellm/constants.py b/litellm/constants.py
@@ -162,6 +162,7 @@
     "lm_studio",
     "galadriel",
     "meta_llama",
+    "nscale",
 ]
 
 
@@ -223,6 +224,7 @@
     "api.x.ai/v1",
     "api.galadriel.ai/v1",
     "api.llama.com/compat/v1/",
+    "inference.api.nscale.com/v1",
 ]
 
 
@@ -254,6 +256,7 @@
     "lm_studio",
     "galadriel",
     "meta_llama",
+    "nscale",
 ]
 openai_text_completion_compatible_providers: List = (
     [  # providers that support `/v1/completions`

diff --git a/litellm/litellm_core_utils/get_llm_provider_logic.py b/litellm/litellm_core_utils/get_llm_provider_logic.py
@@ -218,6 +218,9 @@ def get_llm_provider(  # noqa: PLR0915
                     elif endpoint == "https://api.llama.com/compat/v1":
                         custom_llm_provider = "meta_llama"
                         dynamic_api_key = api_key or get_secret_str("LLAMA_API_KEY")
+                    elif endpoint == litellm.NscaleConfig.API_BASE_URL:
+                        custom_llm_provider = "nscale"
+                        dynamic_api_key = litellm.NscaleConfig.get_api_key()
 
                     if api_base is not None and not isinstance(api_base, str):
                         raise Exception(
@@ -597,6 +600,13 @@ def _get_openai_compatible_provider_info(  # noqa: PLR0915
             or f"https://{get_secret('SNOWFLAKE_ACCOUNT_ID')}.snowflakecomputing.com/api/v2/cortex/inference:complete"
         )  # type: ignore
         dynamic_api_key = api_key or get_secret_str("SNOWFLAKE_JWT")
+    elif custom_llm_provider == "nscale":
+        (
+            api_base,
+            dynamic_api_key,
+        ) = litellm.NscaleConfig()._get_openai_compatible_provider_info(
+            api_base=api_base, api_key=api_key
+        )
 
     if api_base is not None and not isinstance(api_base, str):
         raise Exception("api base needs to be a string. api_base={}".format(api_base))

diff --git a/litellm/litellm_core_utils/get_supported_openai_params.py b/litellm/litellm_core_utils/get_supported_openai_params.py
@@ -202,6 +202,8 @@ def get_supported_openai_params(  # noqa: PLR0915
         return litellm.DeepInfraConfig().get_supported_openai_params(model=model)
     elif custom_llm_provider == "perplexity":
         return litellm.PerplexityChatConfig().get_supported_openai_params(model=model)
+    elif custom_llm_provider == "nscale":
+        return litellm.NscaleConfig().get_supported_openai_params(model=model)
     elif custom_llm_provider == "anyscale":
         return [
             "temperature",

diff --git a/litellm/llms/nscale/chat/transformation.py b/litellm/llms/nscale/chat/transformation.py
@@ -0,0 +1,44 @@
+from typing import Optional
+
+from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
+from litellm.secret_managers.main import get_secret_str
+
+
+class NscaleConfig(OpenAIGPTConfig):
+    """
+    Reference: Nscale is OpenAI compatible.
+    API Key: NSCALE_API_KEY
+    Default API Base: https://inference.api.nscale.com/v1
+    """
+
+    API_BASE_URL = "https://inference.api.nscale.com/v1"
+
+    @property
+    def custom_llm_provider(self) -> Optional[str]:
+        return "nscale"
+
+    @staticmethod
+    def get_api_key(api_key: Optional[str] = None) -> Optional[str]:
+        return api_key or get_secret_str("NSCALE_API_KEY")
+
+    @staticmethod
+    def get_api_base(api_base: Optional[str] = None) -> Optional[str]:
+        return (
+            api_base or get_secret_str("NSCALE_API_BASE") or NscaleConfig.API_BASE_URL
+        )
+
+    def _get_openai_compatible_provider_info(
+        self, api_base: Optional[str], api_key: Optional[str]
+    ) -> tuple[Optional[str], Optional[str]]:
+        # This method is called by get_llm_provider to resolve api_base and api_key
+        resolved_api_base = NscaleConfig.get_api_base(api_base)
+        resolved_api_key = NscaleConfig.get_api_key(api_key)
+        return resolved_api_base, resolved_api_key
+
+    def get_supported_openai_params(self, model: str) -> list:
+        return [
+            "max_tokens",
+            "n",
+            "temperature",
+            "top_p",
+        ]
diff --git a/litellm/main.py b/litellm/main.py
@@ -4788,7 +4788,7 @@ def image_generation(  # noqa: PLR0915
                 model=model,
                 prompt=prompt,
                 timeout=timeout,
-                api_key=api_key,
+                api_key=api_key or dynamic_api_key,
                 api_base=api_base,
                 logging_obj=litellm_logging_obj,
                 optional_params=optional_params,