From 20357be696ce7f17c26ad14f14c138cea35f6b3d Mon Sep 17 00:00:00 2001 From: Albert Date: Wed, 29 Apr 2026 17:30:03 +0800 Subject: [PATCH] feat: add NVIDIA NIM provider with streaming support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add nvidia_nim provider to the registry (is_gateway, detect_by_key_prefix=nvapi-, detect_by_base_keyword=api.nvidia.com, default_api_base=https://integrate.api.nvidia.com/v1) - Add supports_stream_options field to ProviderSpec (defaults True, preserves existing provider behavior) - Set supports_stream_options=False for nvidia_nim — NVIDIA NIM silently hangs when stream_options={"include_usage": True} is sent, so we omit it for this provider Co-Authored-By: Claude Opus 4.6 --- .../llm/provider_core/openai_compat_provider.py | 3 ++- deeptutor/services/provider_registry.py | 13 +++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/deeptutor/services/llm/provider_core/openai_compat_provider.py b/deeptutor/services/llm/provider_core/openai_compat_provider.py index 4400170df..a62ace3c7 100644 --- a/deeptutor/services/llm/provider_core/openai_compat_provider.py +++ b/deeptutor/services/llm/provider_core/openai_compat_provider.py @@ -819,7 +819,8 @@ async def _timed_stream(): self._record_responses_failure(model, reasoning_effort) request_kwargs["stream"] = True - request_kwargs["stream_options"] = {"include_usage": True} + if self._spec is None or self._spec.supports_stream_options: + request_kwargs["stream_options"] = {"include_usage": True} try: stream = await self._client.chat.completions.create(**request_kwargs) except Exception as exc: diff --git a/deeptutor/services/provider_registry.py b/deeptutor/services/provider_registry.py index 93d54406a..0a804c16c 100644 --- a/deeptutor/services/provider_registry.py +++ b/deeptutor/services/provider_registry.py @@ -42,6 +42,7 @@ class ProviderSpec: strip_model_prefix: bool = False supports_max_completion_tokens: bool = False supports_prompt_caching: bool = False + supports_stream_options: bool = True model_overrides: tuple[tuple[str, dict[str, Any]], ...] = () is_oauth: bool = False is_direct: bool = False @@ -389,6 +390,18 @@ def canonical_provider_name(name: str | None) -> str | None: default_api_base="http://localhost:8000/v3", ), # === Auxiliary ========================================================== + ProviderSpec( + name="nvidia_nim", + keywords=("nvidia_nim", "nvidia-nim", "nim"), + env_key="NVIDIA_NIM_API_KEY", + display_name="NVIDIA NIM", + backend="openai_compat", + is_gateway=True, + detect_by_key_prefix="nvapi-", + detect_by_base_keyword="api.nvidia.com", + default_api_base="https://integrate.api.nvidia.com/v1", + supports_stream_options=False, + ), ProviderSpec( name="groq", keywords=("groq",),