|
| 1 | +import Tabs from '@theme/Tabs'; |
| 2 | +import TabItem from '@theme/TabItem'; |
| 3 | + |
| 4 | +# Nscale (EU Sovereign) |
| 5 | + |
| 6 | +| Property | Details | |
| 7 | +|-------|-------| |
| 8 | +| Description | European-domiciled full-stack AI cloud platform for LLMs and image generation. | |
| 9 | +| Provider Route on LiteLLM | `nscale/` | |
| 10 | +| Supported Endpoints | `/chat/completions`, `/images/generations` | |
| 11 | +| API Reference | [Nscale docs](https://docs.nscale.com/docs/getting-started/overview) | |
| 12 | + |
| 13 | +## Required Variables |
| 14 | + |
| 15 | +```python showLineNumbers title="Environment Variables" |
| 16 | +os.environ["NSCALE_API_KEY"] = "" # your Nscale API key |
| 17 | +``` |
| 18 | + |
| 19 | +## Supported Models |
| 20 | + |
| 21 | +### Chat Models |
| 22 | + |
| 23 | +| Model Name | Description | Input Cost | Output Cost | |
| 24 | +|------------|-------------|------------|-------------| |
| 25 | +| nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct | 17B parameter model | $0.09/M tokens | $0.29/M tokens | |
| 26 | +| nscale/Qwen/Qwen2.5-Coder-3B-Instruct | 3B parameter coding model | $0.01/M tokens | $0.03/M tokens | |
| 27 | +| nscale/Qwen/Qwen2.5-Coder-7B-Instruct | 7B parameter coding model | $0.01/M tokens | $0.03/M tokens | |
| 28 | +| nscale/Qwen/Qwen2.5-Coder-32B-Instruct | 32B parameter coding model | $0.06/M tokens | $0.20/M tokens | |
| 29 | +| nscale/Qwen/QwQ-32B | 32B parameter model | $0.18/M tokens | $0.20/M tokens | |
| 30 | +| nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-70B | 70B parameter distilled model | $0.375/M tokens | $0.375/M tokens | |
| 31 | +| nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-8B | 8B parameter distilled model | $0.025/M tokens | $0.025/M tokens | |
| 32 | +| nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B | 1.5B parameter distilled model | $0.09/M tokens | $0.09/M tokens | |
| 33 | +| nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B | 7B parameter distilled model | $0.20/M tokens | $0.20/M tokens | |
| 34 | +| nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B | 14B parameter distilled model | $0.07/M tokens | $0.07/M tokens | |
| 35 | +| nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B | 32B parameter distilled model | $0.15/M tokens | $0.15/M tokens | |
| 36 | +| nscale/mistralai/mixtral-8x22b-instruct-v0.1 | Mixtral 8x22B model | $0.60/M tokens | $0.60/M tokens | |
| 37 | +| nscale/meta-llama/Llama-3.1-8B-Instruct | 8B parameter model | $0.03/M tokens | $0.03/M tokens | |
| 38 | +| nscale/meta-llama/Llama-3.3-70B-Instruct | 70B parameter model | $0.20/M tokens | $0.20/M tokens | |
| 39 | + |
| 40 | +### Image Generation Models |
| 41 | + |
| 42 | +| Model Name | Description | Cost per Pixel | |
| 43 | +|------------|-------------|----------------| |
| 44 | +| nscale/black-forest-labs/FLUX.1-schnell | Fast image generation model | $0.0000000013 | |
| 45 | +| nscale/stabilityai/stable-diffusion-xl-base-1.0 | SDXL base model | $0.000000003 | |
| 46 | + |
| 47 | +## Key Features |
| 48 | +- **EU Sovereign**: Full data sovereignty and compliance with European regulations |
| 49 | +- **Ultra-Low Cost (starting at $0.01 / M tokens)**: Extremely competitive pricing for both text and image generation models |
| 50 | +- **Production Grade**: Reliable serverless deployments with full isolation |
| 51 | +- **No Setup Required**: Instant access to compute without infrastructure management |
| 52 | +- **Full Control**: Your data remains private and isolated |
| 53 | + |
| 54 | +## Usage - LiteLLM Python SDK |
| 55 | + |
| 56 | +### Text Generation |
| 57 | + |
| 58 | +```python showLineNumbers title="Nscale Text Generation" |
| 59 | +from litellm import completion |
| 60 | +import os |
| 61 | + |
| 62 | +os.environ["NSCALE_API_KEY"] = "" # your Nscale API key |
| 63 | +response = completion( |
| 64 | + model="nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct", |
| 65 | + messages=[{"role": "user", "content": "What is LiteLLM?"}] |
| 66 | +) |
| 67 | +print(response) |
| 68 | +``` |
| 69 | + |
| 70 | +### Image Generation |
| 71 | + |
| 72 | +```python showLineNumbers title="Nscale Image Generation" |
| 73 | +from litellm import image_generation |
| 74 | +import os |
| 75 | + |
| 76 | +os.environ["NSCALE_API_KEY"] = "" # your Nscale API key |
| 77 | +response = image_generation( |
| 78 | + model="nscale/stabilityai/stable-diffusion-xl-base-1.0", |
| 79 | + prompt="A beautiful sunset over mountains", |
| 80 | + n=1, |
| 81 | + size="1024x1024" |
| 82 | +) |
| 83 | +print(response) |
| 84 | +``` |
| 85 | + |
| 86 | +## Usage - LiteLLM Proxy |
| 87 | + |
| 88 | +Add the following to your LiteLLM Proxy configuration file: |
| 89 | + |
| 90 | +```yaml showLineNumbers title="config.yaml" |
| 91 | +model_list: |
| 92 | + - model_name: nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct |
| 93 | + litellm_params: |
| 94 | + model: nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct |
| 95 | + api_key: os.environ/NSCALE_API_KEY |
| 96 | + - model_name: nscale/meta-llama/Llama-3.3-70B-Instruct |
| 97 | + litellm_params: |
| 98 | + model: nscale/meta-llama/Llama-3.3-70B-Instruct |
| 99 | + api_key: os.environ/NSCALE_API_KEY |
| 100 | + - model_name: nscale/stabilityai/stable-diffusion-xl-base-1.0 |
| 101 | + litellm_params: |
| 102 | + model: nscale/stabilityai/stable-diffusion-xl-base-1.0 |
| 103 | + api_key: os.environ/NSCALE_API_KEY |
| 104 | +``` |
| 105 | +
|
| 106 | +Start your LiteLLM Proxy server: |
| 107 | +
|
| 108 | +```bash showLineNumbers title="Start LiteLLM Proxy" |
| 109 | +litellm --config config.yaml |
| 110 | + |
| 111 | +# RUNNING on http://0.0.0.0:4000 |
| 112 | +``` |
| 113 | + |
| 114 | +<Tabs> |
| 115 | +<TabItem value="openai-sdk" label="OpenAI SDK"> |
| 116 | + |
| 117 | +```python showLineNumbers title="Nscale via Proxy - Non-streaming" |
| 118 | +from openai import OpenAI |
| 119 | + |
| 120 | +# Initialize client with your proxy URL |
| 121 | +client = OpenAI( |
| 122 | + base_url="http://localhost:4000", # Your proxy URL |
| 123 | + api_key="your-proxy-api-key" # Your proxy API key |
| 124 | +) |
| 125 | + |
| 126 | +# Non-streaming response |
| 127 | +response = client.chat.completions.create( |
| 128 | + model="nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct", |
| 129 | + messages=[{"role": "user", "content": "What is LiteLLM?"}] |
| 130 | +) |
| 131 | + |
| 132 | +print(response.choices[0].message.content) |
| 133 | +``` |
| 134 | + |
| 135 | +</TabItem> |
| 136 | + |
| 137 | +<TabItem value="litellm-sdk" label="LiteLLM SDK"> |
| 138 | + |
| 139 | +```python showLineNumbers title="Nscale via Proxy - LiteLLM SDK" |
| 140 | +import litellm |
| 141 | + |
| 142 | +# Configure LiteLLM to use your proxy |
| 143 | +response = litellm.completion( |
| 144 | + model="litellm_proxy/nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct", |
| 145 | + messages=[{"role": "user", "content": "What is LiteLLM?"}], |
| 146 | + api_base="http://localhost:4000", |
| 147 | + api_key="your-proxy-api-key" |
| 148 | +) |
| 149 | + |
| 150 | +print(response.choices[0].message.content) |
| 151 | +``` |
| 152 | + |
| 153 | +</TabItem> |
| 154 | + |
| 155 | +<TabItem value="curl" label="cURL"> |
| 156 | + |
| 157 | +```bash showLineNumbers title="Nscale via Proxy - cURL" |
| 158 | +curl http://localhost:4000/v1/chat/completions \ |
| 159 | + -H "Content-Type: application/json" \ |
| 160 | + -H "Authorization: Bearer your-proxy-api-key" \ |
| 161 | + -d '{ |
| 162 | + "model": "nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct", |
| 163 | + "messages": [{"role": "user", "content": "What is LiteLLM?"}] |
| 164 | + }' |
| 165 | +``` |
| 166 | + |
| 167 | +</TabItem> |
| 168 | +</Tabs> |
| 169 | + |
| 170 | +## Getting Started |
| 171 | +1. Create an account at [console.nscale.com](https://console.nscale.com) |
| 172 | +2. Add credit to your account (minimum $5) |
| 173 | +3. Create an API key in settings |
| 174 | +4. Start making API calls using LiteLLM |
| 175 | + |
| 176 | +## Additional Resources |
| 177 | +- [Nscale Documentation](https://docs.nscale.com/docs/getting-started/overview) |
| 178 | +- [Blog: Sovereign Serverless](https://www.nscale.com/blog/sovereign-serverless-how-we-designed-full-isolation-without-sacrificing-performance) |
0 commit comments