diff --git a/deeptutor/services/llm/provider_core/openai_compat_provider.py b/deeptutor/services/llm/provider_core/openai_compat_provider.py index 4400170df..a62ace3c7 100644 --- a/deeptutor/services/llm/provider_core/openai_compat_provider.py +++ b/deeptutor/services/llm/provider_core/openai_compat_provider.py @@ -819,7 +819,8 @@ async def _timed_stream(): self._record_responses_failure(model, reasoning_effort) request_kwargs["stream"] = True - request_kwargs["stream_options"] = {"include_usage": True} + if self._spec is None or self._spec.supports_stream_options: + request_kwargs["stream_options"] = {"include_usage": True} try: stream = await self._client.chat.completions.create(**request_kwargs) except Exception as exc: diff --git a/deeptutor/services/provider_registry.py b/deeptutor/services/provider_registry.py index 93d54406a..0a804c16c 100644 --- a/deeptutor/services/provider_registry.py +++ b/deeptutor/services/provider_registry.py @@ -42,6 +42,7 @@ class ProviderSpec: strip_model_prefix: bool = False supports_max_completion_tokens: bool = False supports_prompt_caching: bool = False + supports_stream_options: bool = True model_overrides: tuple[tuple[str, dict[str, Any]], ...] = () is_oauth: bool = False is_direct: bool = False @@ -389,6 +390,18 @@ def canonical_provider_name(name: str | None) -> str | None: default_api_base="http://localhost:8000/v3", ), # === Auxiliary ========================================================== + ProviderSpec( + name="nvidia_nim", + keywords=("nvidia_nim", "nvidia-nim", "nim"), + env_key="NVIDIA_NIM_API_KEY", + display_name="NVIDIA NIM", + backend="openai_compat", + is_gateway=True, + detect_by_key_prefix="nvapi-", + detect_by_base_keyword="api.nvidia.com", + default_api_base="https://integrate.api.nvidia.com/v1", + supports_stream_options=False, + ), ProviderSpec( name="groq", keywords=("groq",),