diff --git a/docs/my-website/docs/providers/nscale.md b/docs/my-website/docs/providers/nscale.md new file mode 100644 index 000000000000..5ebd948db624 --- /dev/null +++ b/docs/my-website/docs/providers/nscale.md @@ -0,0 +1,178 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Nscale (EU Sovereign) + +| Property | Details | +|-------|-------| +| Description | European-domiciled full-stack AI cloud platform for LLMs and image generation. | +| Provider Route on LiteLLM | `nscale/` | +| Supported Endpoints | `/chat/completions`, `/images/generations` | +| API Reference | [Nscale docs](https://docs.nscale.com/docs/getting-started/overview) | + +## Required Variables + +```python showLineNumbers title="Environment Variables" +os.environ["NSCALE_API_KEY"] = "" # your Nscale API key +``` + +## Supported Models + +### Chat Models + +| Model Name | Description | Input Cost | Output Cost | +|------------|-------------|------------|-------------| +| nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct | 17B parameter model | $0.09/M tokens | $0.29/M tokens | +| nscale/Qwen/Qwen2.5-Coder-3B-Instruct | 3B parameter coding model | $0.01/M tokens | $0.03/M tokens | +| nscale/Qwen/Qwen2.5-Coder-7B-Instruct | 7B parameter coding model | $0.01/M tokens | $0.03/M tokens | +| nscale/Qwen/Qwen2.5-Coder-32B-Instruct | 32B parameter coding model | $0.06/M tokens | $0.20/M tokens | +| nscale/Qwen/QwQ-32B | 32B parameter model | $0.18/M tokens | $0.20/M tokens | +| nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-70B | 70B parameter distilled model | $0.375/M tokens | $0.375/M tokens | +| nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-8B | 8B parameter distilled model | $0.025/M tokens | $0.025/M tokens | +| nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B | 1.5B parameter distilled model | $0.09/M tokens | $0.09/M tokens | +| nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B | 7B parameter distilled model | $0.20/M tokens | $0.20/M tokens | +| nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B | 14B parameter distilled model | $0.07/M tokens | $0.07/M tokens | +| nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B | 32B parameter distilled model | $0.15/M tokens | $0.15/M tokens | +| nscale/mistralai/mixtral-8x22b-instruct-v0.1 | Mixtral 8x22B model | $0.60/M tokens | $0.60/M tokens | +| nscale/meta-llama/Llama-3.1-8B-Instruct | 8B parameter model | $0.03/M tokens | $0.03/M tokens | +| nscale/meta-llama/Llama-3.3-70B-Instruct | 70B parameter model | $0.20/M tokens | $0.20/M tokens | + +### Image Generation Models + +| Model Name | Description | Cost per Pixel | +|------------|-------------|----------------| +| nscale/black-forest-labs/FLUX.1-schnell | Fast image generation model | $0.0000000013 | +| nscale/stabilityai/stable-diffusion-xl-base-1.0 | SDXL base model | $0.000000003 | + +## Key Features +- **EU Sovereign**: Full data sovereignty and compliance with European regulations +- **Ultra-Low Cost (starting at $0.01 / M tokens)**: Extremely competitive pricing for both text and image generation models +- **Production Grade**: Reliable serverless deployments with full isolation +- **No Setup Required**: Instant access to compute without infrastructure management +- **Full Control**: Your data remains private and isolated + +## Usage - LiteLLM Python SDK + +### Text Generation + +```python showLineNumbers title="Nscale Text Generation" +from litellm import completion +import os + +os.environ["NSCALE_API_KEY"] = "" # your Nscale API key +response = completion( + model="nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct", + messages=[{"role": "user", "content": "What is LiteLLM?"}] +) +print(response) +``` + +### Image Generation + +```python showLineNumbers title="Nscale Image Generation" +from litellm import image_generation +import os + +os.environ["NSCALE_API_KEY"] = "" # your Nscale API key +response = image_generation( + model="nscale/stabilityai/stable-diffusion-xl-base-1.0", + prompt="A beautiful sunset over mountains", + n=1, + size="1024x1024" +) +print(response) +``` + +## Usage - LiteLLM Proxy + +Add the following to your LiteLLM Proxy configuration file: + +```yaml showLineNumbers title="config.yaml" +model_list: + - model_name: nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct + litellm_params: + model: nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct + api_key: os.environ/NSCALE_API_KEY + - model_name: nscale/meta-llama/Llama-3.3-70B-Instruct + litellm_params: + model: nscale/meta-llama/Llama-3.3-70B-Instruct + api_key: os.environ/NSCALE_API_KEY + - model_name: nscale/stabilityai/stable-diffusion-xl-base-1.0 + litellm_params: + model: nscale/stabilityai/stable-diffusion-xl-base-1.0 + api_key: os.environ/NSCALE_API_KEY +``` + +Start your LiteLLM Proxy server: + +```bash showLineNumbers title="Start LiteLLM Proxy" +litellm --config config.yaml + +# RUNNING on http://0.0.0.0:4000 +``` + + + + +```python showLineNumbers title="Nscale via Proxy - Non-streaming" +from openai import OpenAI + +# Initialize client with your proxy URL +client = OpenAI( + base_url="http://localhost:4000", # Your proxy URL + api_key="your-proxy-api-key" # Your proxy API key +) + +# Non-streaming response +response = client.chat.completions.create( + model="nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct", + messages=[{"role": "user", "content": "What is LiteLLM?"}] +) + +print(response.choices[0].message.content) +``` + + + + + +```python showLineNumbers title="Nscale via Proxy - LiteLLM SDK" +import litellm + +# Configure LiteLLM to use your proxy +response = litellm.completion( + model="litellm_proxy/nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct", + messages=[{"role": "user", "content": "What is LiteLLM?"}], + api_base="http://localhost:4000", + api_key="your-proxy-api-key" +) + +print(response.choices[0].message.content) +``` + + + + + +```bash showLineNumbers title="Nscale via Proxy - cURL" +curl http://localhost:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer your-proxy-api-key" \ + -d '{ + "model": "nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct", + "messages": [{"role": "user", "content": "What is LiteLLM?"}] + }' +``` + + + + +## Getting Started +1. Create an account at [console.nscale.com](https://console.nscale.com) +2. Add credit to your account (minimum $5) +3. Create an API key in settings +4. Start making API calls using LiteLLM + +## Additional Resources +- [Nscale Documentation](https://docs.nscale.com/docs/getting-started/overview) +- [Blog: Sovereign Serverless](https://www.nscale.com/blog/sovereign-serverless-how-we-designed-full-isolation-without-sacrificing-performance) \ No newline at end of file diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index e59b8aae5cd7..b8e8b5929a43 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -237,6 +237,7 @@ const sidebars = { "providers/watsonx", "providers/predibase", "providers/nvidia_nim", + { type: "doc", id: "providers/nscale", label: "Nscale (EU Sovereign)" }, "providers/xai", "providers/lm_studio", "providers/cerebras", diff --git a/litellm/__init__.py b/litellm/__init__.py index a08230c6ed85..aea62a79b0a2 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -1032,6 +1032,7 @@ def add_known_models(): from .llms.deepseek.chat.transformation import DeepSeekChatConfig from .llms.lm_studio.chat.transformation import LMStudioChatConfig from .llms.lm_studio.embed.transformation import LmStudioEmbeddingConfig +from .llms.nscale.chat.transformation import NscaleConfig from .llms.perplexity.chat.transformation import PerplexityChatConfig from .llms.azure.chat.o_series_transformation import AzureOpenAIO1Config from .llms.watsonx.completion.transformation import IBMWatsonXAIConfig diff --git a/litellm/constants.py b/litellm/constants.py index fa944c0dfaa1..329118c3024d 100644 --- a/litellm/constants.py +++ b/litellm/constants.py @@ -162,6 +162,7 @@ "lm_studio", "galadriel", "meta_llama", + "nscale", ] @@ -223,6 +224,7 @@ "api.x.ai/v1", "api.galadriel.ai/v1", "api.llama.com/compat/v1/", + "inference.api.nscale.com/v1", ] @@ -254,6 +256,7 @@ "lm_studio", "galadriel", "meta_llama", + "nscale", ] openai_text_completion_compatible_providers: List = ( [ # providers that support `/v1/completions` diff --git a/litellm/litellm_core_utils/get_llm_provider_logic.py b/litellm/litellm_core_utils/get_llm_provider_logic.py index bdfe0e902523..29f6b2d3f6ce 100644 --- a/litellm/litellm_core_utils/get_llm_provider_logic.py +++ b/litellm/litellm_core_utils/get_llm_provider_logic.py @@ -218,6 +218,9 @@ def get_llm_provider( # noqa: PLR0915 elif endpoint == "https://api.llama.com/compat/v1": custom_llm_provider = "meta_llama" dynamic_api_key = api_key or get_secret_str("LLAMA_API_KEY") + elif endpoint == litellm.NscaleConfig.API_BASE_URL: + custom_llm_provider = "nscale" + dynamic_api_key = litellm.NscaleConfig.get_api_key() if api_base is not None and not isinstance(api_base, str): raise Exception( @@ -597,6 +600,13 @@ def _get_openai_compatible_provider_info( # noqa: PLR0915 or f"https://{get_secret('SNOWFLAKE_ACCOUNT_ID')}.snowflakecomputing.com/api/v2/cortex/inference:complete" ) # type: ignore dynamic_api_key = api_key or get_secret_str("SNOWFLAKE_JWT") + elif custom_llm_provider == "nscale": + ( + api_base, + dynamic_api_key, + ) = litellm.NscaleConfig()._get_openai_compatible_provider_info( + api_base=api_base, api_key=api_key + ) if api_base is not None and not isinstance(api_base, str): raise Exception("api base needs to be a string. api_base={}".format(api_base)) diff --git a/litellm/litellm_core_utils/get_supported_openai_params.py b/litellm/litellm_core_utils/get_supported_openai_params.py index 2cb8daa4c57f..11981e627bab 100644 --- a/litellm/litellm_core_utils/get_supported_openai_params.py +++ b/litellm/litellm_core_utils/get_supported_openai_params.py @@ -202,6 +202,8 @@ def get_supported_openai_params( # noqa: PLR0915 return litellm.DeepInfraConfig().get_supported_openai_params(model=model) elif custom_llm_provider == "perplexity": return litellm.PerplexityChatConfig().get_supported_openai_params(model=model) + elif custom_llm_provider == "nscale": + return litellm.NscaleConfig().get_supported_openai_params(model=model) elif custom_llm_provider == "anyscale": return [ "temperature", diff --git a/litellm/llms/nscale/chat/transformation.py b/litellm/llms/nscale/chat/transformation.py new file mode 100644 index 000000000000..4fb9c213fde0 --- /dev/null +++ b/litellm/llms/nscale/chat/transformation.py @@ -0,0 +1,44 @@ +from typing import Optional + +from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig +from litellm.secret_managers.main import get_secret_str + + +class NscaleConfig(OpenAIGPTConfig): + """ + Reference: Nscale is OpenAI compatible. + API Key: NSCALE_API_KEY + Default API Base: https://inference.api.nscale.com/v1 + """ + + API_BASE_URL = "https://inference.api.nscale.com/v1" + + @property + def custom_llm_provider(self) -> Optional[str]: + return "nscale" + + @staticmethod + def get_api_key(api_key: Optional[str] = None) -> Optional[str]: + return api_key or get_secret_str("NSCALE_API_KEY") + + @staticmethod + def get_api_base(api_base: Optional[str] = None) -> Optional[str]: + return ( + api_base or get_secret_str("NSCALE_API_BASE") or NscaleConfig.API_BASE_URL + ) + + def _get_openai_compatible_provider_info( + self, api_base: Optional[str], api_key: Optional[str] + ) -> tuple[Optional[str], Optional[str]]: + # This method is called by get_llm_provider to resolve api_base and api_key + resolved_api_base = NscaleConfig.get_api_base(api_base) + resolved_api_key = NscaleConfig.get_api_key(api_key) + return resolved_api_base, resolved_api_key + + def get_supported_openai_params(self, model: str) -> list: + return [ + "max_tokens", + "n", + "temperature", + "top_p", + ] diff --git a/litellm/main.py b/litellm/main.py index 4ab47398a7c2..2fb495ab8fc8 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -4788,7 +4788,7 @@ def image_generation( # noqa: PLR0915 model=model, prompt=prompt, timeout=timeout, - api_key=api_key, + api_key=api_key or dynamic_api_key, api_base=api_base, logging_obj=litellm_logging_obj, optional_params=optional_params, diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 2e473a683694..8bf06e61b2c6 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -12197,5 +12197,132 @@ "max_output_tokens": 8192, "litellm_provider": "snowflake", "mode": "chat" + }, + "nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct": { + "input_cost_per_token": 9e-8, + "output_cost_per_token": 2.9e-7, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/Qwen/Qwen2.5-Coder-3B-Instruct": { + "input_cost_per_token": 1e-8, + "output_cost_per_token": 3e-8, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/Qwen/Qwen2.5-Coder-7B-Instruct": { + "input_cost_per_token": 1e-8, + "output_cost_per_token": 3e-8, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/Qwen/Qwen2.5-Coder-32B-Instruct": { + "input_cost_per_token": 6e-8, + "output_cost_per_token": 2e-7, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/Qwen/QwQ-32B": { + "input_cost_per_token": 1.8e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-70B": { + "input_cost_per_token": 3.75e-7, + "output_cost_per_token": 3.75e-7, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", + "notes": "Pricing listed as $0.75/1M tokens total. Assumed 50/50 split for input/output." + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-8B": { + "input_cost_per_token": 2.5e-8, + "output_cost_per_token": 2.5e-8, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", + "notes": "Pricing listed as $0.05/1M tokens total. Assumed 50/50 split for input/output." + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": { + "input_cost_per_token": 9e-8, + "output_cost_per_token": 9e-8, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", + "notes": "Pricing listed as $0.18/1M tokens total. Assumed 50/50 split for input/output." + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B": { + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", + "notes": "Pricing listed as $0.40/1M tokens total. Assumed 50/50 split for input/output." + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": { + "input_cost_per_token": 7e-8, + "output_cost_per_token": 7e-8, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", + "notes": "Pricing listed as $0.14/1M tokens total. Assumed 50/50 split for input/output." + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B": { + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 1.5e-7, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", + "notes": "Pricing listed as $0.30/1M tokens total. Assumed 50/50 split for input/output." + }, + "nscale/mistralai/mixtral-8x22b-instruct-v0.1": { + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", + "notes": "Pricing listed as $1.20/1M tokens total. Assumed 50/50 split for input/output." + }, + "nscale/meta-llama/Llama-3.1-8B-Instruct": { + "input_cost_per_token": 3e-8, + "output_cost_per_token": 3e-8, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", + "notes": "Pricing listed as $0.06/1M tokens total. Assumed 50/50 split for input/output." + }, + "nscale/meta-llama/Llama-3.3-70B-Instruct": { + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", + "notes": "Pricing listed as $0.40/1M tokens total. Assumed 50/50 split for input/output." + }, + "nscale/black-forest-labs/FLUX.1-schnell": { + "mode": "image_generation", + "input_cost_per_pixel": 1.3e-9, + "output_cost_per_pixel": 0.0, + "litellm_provider": "nscale", + "supported_endpoints": [ + "/v1/images/generations" + ], + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#image-models" + }, + "nscale/stabilityai/stable-diffusion-xl-base-1.0": { + "mode": "image_generation", + "input_cost_per_pixel": 3e-9, + "output_cost_per_pixel": 0.0, + "litellm_provider": "nscale", + "supported_endpoints": [ + "/v1/images/generations" + ], + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#image-models" } } diff --git a/litellm/types/utils.py b/litellm/types/utils.py index ab3f0d9d2e26..a3eb6f7fda8f 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -2157,6 +2157,7 @@ class LlmProviders(str, Enum): ASSEMBLYAI = "assemblyai" SNOWFLAKE = "snowflake" LLAMA = "meta_llama" + NSCALE = "nscale" # Create a set of all provider values for quick lookup diff --git a/litellm/utils.py b/litellm/utils.py index e28b0ceca159..0bc579b1182f 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -6383,6 +6383,8 @@ def get_provider_chat_config( # noqa: PLR0915 return litellm.LiteLLMProxyChatConfig() elif litellm.LlmProviders.OPENAI == provider: return litellm.OpenAIGPTConfig() + elif litellm.LlmProviders.NSCALE == provider: + return litellm.NscaleConfig() return None @staticmethod diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 138c7313029c..42092d34c943 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -12197,5 +12197,132 @@ "max_output_tokens": 8192, "litellm_provider": "snowflake", "mode": "chat" + }, + "nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct": { + "input_cost_per_token": 9e-8, + "output_cost_per_token": 2.9e-7, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/Qwen/Qwen2.5-Coder-3B-Instruct": { + "input_cost_per_token": 1e-8, + "output_cost_per_token": 3e-8, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/Qwen/Qwen2.5-Coder-7B-Instruct": { + "input_cost_per_token": 1e-8, + "output_cost_per_token": 3e-8, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/Qwen/Qwen2.5-Coder-32B-Instruct": { + "input_cost_per_token": 6e-8, + "output_cost_per_token": 2e-7, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/Qwen/QwQ-32B": { + "input_cost_per_token": 1.8e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-70B": { + "input_cost_per_token": 3.75e-7, + "output_cost_per_token": 3.75e-7, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", + "notes": "Pricing listed as $0.75/1M tokens total. Assumed 50/50 split for input/output." + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-8B": { + "input_cost_per_token": 2.5e-8, + "output_cost_per_token": 2.5e-8, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", + "notes": "Pricing listed as $0.05/1M tokens total. Assumed 50/50 split for input/output." + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": { + "input_cost_per_token": 9e-8, + "output_cost_per_token": 9e-8, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", + "notes": "Pricing listed as $0.18/1M tokens total. Assumed 50/50 split for input/output." + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B": { + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", + "notes": "Pricing listed as $0.40/1M tokens total. Assumed 50/50 split for input/output." + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": { + "input_cost_per_token": 7e-8, + "output_cost_per_token": 7e-8, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", + "notes": "Pricing listed as $0.14/1M tokens total. Assumed 50/50 split for input/output." + }, + "nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B": { + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 1.5e-7, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", + "notes": "Pricing listed as $0.30/1M tokens total. Assumed 50/50 split for input/output." + }, + "nscale/mistralai/mixtral-8x22b-instruct-v0.1": { + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", + "notes": "Pricing listed as $1.20/1M tokens total. Assumed 50/50 split for input/output." + }, + "nscale/meta-llama/Llama-3.1-8B-Instruct": { + "input_cost_per_token": 3e-8, + "output_cost_per_token": 3e-8, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", + "notes": "Pricing listed as $0.06/1M tokens total. Assumed 50/50 split for input/output." + }, + "nscale/meta-llama/Llama-3.3-70B-Instruct": { + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "nscale", + "mode": "chat", + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", + "notes": "Pricing listed as $0.40/1M tokens total. Assumed 50/50 split for input/output." + }, + "nscale/black-forest-labs/FLUX.1-schnell": { + "mode": "image_generation", + "input_cost_per_pixel": 1.3e-9, + "output_cost_per_pixel": 0.0, + "litellm_provider": "nscale", + "supported_endpoints": [ + "/v1/images/generations" + ], + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#image-models" + }, + "nscale/stabilityai/stable-diffusion-xl-base-1.0": { + "mode": "image_generation", + "input_cost_per_pixel": 3e-9, + "output_cost_per_pixel": 0.0, + "litellm_provider": "nscale", + "supported_endpoints": [ + "/v1/images/generations" + ], + "source": "https://docs.nscale.com/docs/inference/serverless-models/current#image-models" } } diff --git a/tests/litellm/llms/nscale/chat/test_nscale_chat_transformation.py b/tests/litellm/llms/nscale/chat/test_nscale_chat_transformation.py new file mode 100644 index 000000000000..4fcd79ae2a13 --- /dev/null +++ b/tests/litellm/llms/nscale/chat/test_nscale_chat_transformation.py @@ -0,0 +1,55 @@ +import os +import sys +from unittest.mock import patch + +sys.path.insert( + 0, os.path.abspath("../../../../..") +) # Adds the parent directory to the system path + +from litellm.llms.nscale.chat.transformation import NscaleConfig + + +class TestNscaleConfig: + def setup_method(self): + self.config = NscaleConfig() + + def test_custom_llm_provider(self): + """Test that custom_llm_provider returns the correct value""" + assert self.config.custom_llm_provider == "nscale" + + def test_get_api_key(self): + """Test that get_api_key returns the correct API key""" + # Test with provided API key + assert self.config.get_api_key("test-key") == "test-key" + + # Test with environment variable + with patch( + "litellm.llms.nscale.chat.transformation.get_secret_str", + return_value="env-key", + ): + assert self.config.get_api_key() == "env-key" + + # Test with patching environment variable + with patch.dict(os.environ, {"NSCALE_API_KEY": "env-key"}): + assert self.config.get_api_key() == "env-key" + + def test_get_api_base(self): + """Test that get_api_base returns the correct API base URL""" + # Test with provided API base + assert ( + self.config.get_api_base("https://custom-base.com") + == "https://custom-base.com" + ) + + # Test with environment variable + with patch( + "litellm.llms.nscale.chat.transformation.get_secret_str", + return_value="https://env-base.com", + ): + assert self.config.get_api_base() == "https://env-base.com" + + # Test with default API base + with patch( + "litellm.llms.nscale.chat.transformation.get_secret_str", return_value=None + ): + assert self.config.get_api_base() == NscaleConfig.API_BASE_URL