pydantic · DouweM · Mar 30, 2026 · Mar 24, 2026 · Mar 25, 2026 · Mar 25, 2026
diff --git a/docs/thinking.md b/docs/thinking.md
@@ -39,15 +39,15 @@ The unified `thinking` setting maps to each provider's native format:
 
 | Provider | `thinking=True` | `thinking='high'` | Notes |
 |---|---|---|---|
-| Anthropic (Opus 4.6+) | `anthropic_thinking={'type': 'adaptive'}` | + `anthropic_effort='high'` | Adaptive thinking |
-| Anthropic (older) | `anthropic_thinking={'type': 'enabled', 'budget_tokens': 10000}` | `budget_tokens=16384` | Budget-based |
+| Anthropic (Opus 4.6+) | `anthropic_thinking={'type': 'adaptive'}` | `budget_tokens=16384` + `effort='high'` | `True`/`'medium'` → adaptive; other levels → budget |
+| Anthropic (older) | `anthropic_thinking={'type': 'enabled', 'budget_tokens': 10000}` | `budget_tokens=16384` | Budget-based; `'low'` → 2048 tokens |
 | OpenAI | `reasoning_effort='medium'` | `reasoning_effort='high'` | |
 | Google (Gemini 3+) | `include_thoughts=True` | `thinking_level='HIGH'` | |
 | Google (Gemini 2.5) | `include_thoughts=True` | `thinking_budget=24576` | |
-| Groq | `reasoning_format='parsed'` | `reasoning_format='parsed'` | Effort ignored |
+| Groq | `reasoning_format='parsed'` | `reasoning_format='parsed'` | `thinking=False` → `'hidden'` (no true disable) |
 | OpenRouter | `reasoning.effort='medium'` | `reasoning.effort='high'` | Via `extra_body` |
-| Cerebras | *(default)* | *(default)* | Only `thinking=False` → `disable_reasoning` |
-| xAI | `reasoning_effort='high'` | `reasoning_effort='high'` | Only `'low'` and `'high'` supported |
+| Cerebras | `disable_reasoning=False` | `disable_reasoning=False` | `thinking=False` → `disable_reasoning=True` |
+| xAI | `reasoning_effort='high'` | `reasoning_effort='high'` | Only `'low'` and `'high'` |
 | Bedrock (Claude) | `thinking.type='enabled'` | `budget_tokens=16384` | No adaptive support |
 | Bedrock (OpenAI) | `reasoning_effort='medium'` | `reasoning_effort='high'` | |
 
@@ -246,12 +246,15 @@ To enable thinking, use the [`GroqModelSettings.groq_reasoning_format`][pydantic
 from pydantic_ai import Agent
 from pydantic_ai.models.groq import GroqModel, GroqModelSettings
 
-model = GroqModel('qwen-qwq-32b')
+model = GroqModel('qwen/qwen3-32b')
 settings = GroqModelSettings(groq_reasoning_format='parsed')
 agent = Agent(model, model_settings=settings)
 ...
 ```
 
+!!! note
+    Groq does not support truly disabling thinking. When `thinking=False` is set via the unified setting, Pydantic AI sends `reasoning_format='hidden'`, which suppresses reasoning output but the model may still reason internally.
+
 ## OpenRouter
 
 To enable thinking, use the [`OpenRouterModelSettings.openrouter_reasoning`][pydantic_ai.models.openrouter.OpenRouterModelSettings.openrouter_reasoning] [model setting](agent.md#model-run-settings).

diff --git a/pydantic_ai_slim/pydantic_ai/models/__init__.py b/pydantic_ai_slim/pydantic_ai/models/__init__.py
@@ -14,7 +14,7 @@
 from dataclasses import dataclass, field, replace
 from datetime import datetime
 from functools import cache, cached_property
-from typing import Any, Generic, Literal, TypeVar, get_args, overload
+from typing import Any, Generic, Literal, TypeVar, cast, get_args, overload
 
 import httpx
 from typing_extensions import TypeAliasType, TypedDict
@@ -775,15 +775,14 @@ def prepare_request(
 
         params = self.customize_request_parameters(model_request_parameters)
 
-        # Resolve unified thinking setting
-        thinking_value = model_settings.get('thinking') if model_settings else None
-        if thinking_value is not None:
+        # Resolve unified thinking setting and strip from model_settings
+        if model_settings and 'thinking' in model_settings:
+            thinking_value = model_settings['thinking']
             if self.profile.supports_thinking or self.profile.thinking_always_enabled:
-                if thinking_value is False and self.profile.thinking_always_enabled:
-                    pass  # Silent ignore: model always thinks, can't disable
-                else:
+                if not (thinking_value is False and self.profile.thinking_always_enabled):
                     params = replace(params, thinking=thinking_value)
-            # else: silent ignore for unsupported models
+            stripped = {k: v for k, v in model_settings.items() if k != 'thinking'}
+            model_settings = cast(ModelSettings, stripped) if stripped else None
 
         if builtin_tools := params.builtin_tools:
             # Deduplicate builtin tools

diff --git a/pydantic_ai_slim/pydantic_ai/models/anthropic.py b/pydantic_ai_slim/pydantic_ai/models/anthropic.py
@@ -414,7 +414,7 @@ def prepare_request(
             )
         return super().prepare_request(model_settings, model_request_parameters)
 
-    def _get_thinking_param(
+    def _translate_thinking(
         self,
         model_settings: AnthropicModelSettings,
         model_request_parameters: ModelRequestParameters,
@@ -426,7 +426,7 @@ def _get_thinking_param(
         if thinking is None or thinking is False:
             return OMIT  # type: ignore[return-value]
         profile = AnthropicModelProfile.from_profile(self.profile)
-        if profile.anthropic_supports_adaptive_thinking:
+        if profile.anthropic_supports_adaptive_thinking and thinking in (True, 'medium'):
             return {'type': 'adaptive'}
         return {'type': 'enabled', 'budget_tokens': ANTHROPIC_THINKING_BUDGET_MAP[thinking]}
 
@@ -485,7 +485,7 @@ async def _messages_create(
                 output_config=output_config or OMIT,
                 betas=sorted(betas) or OMIT,
                 stream=stream,
-                thinking=self._get_thinking_param(model_settings, model_request_parameters),
+                thinking=self._translate_thinking(model_settings, model_request_parameters),
                 stop_sequences=model_settings.get('stop_sequences', OMIT),
                 temperature=model_settings.get('temperature', OMIT),
                 top_p=model_settings.get('top_p', OMIT),
@@ -573,7 +573,7 @@ async def _messages_count_tokens(
                 mcp_servers=mcp_servers or OMIT,
                 betas=sorted(betas) or OMIT,
                 output_config=output_config or OMIT,
-                thinking=self._get_thinking_param(model_settings, model_request_parameters),
+                thinking=self._translate_thinking(model_settings, model_request_parameters),
                 timeout=model_settings.get('timeout', NOT_GIVEN),
                 extra_headers=extra_headers,
                 extra_body=model_settings.get('extra_body'),

diff --git a/pydantic_ai_slim/pydantic_ai/models/bedrock.py b/pydantic_ai_slim/pydantic_ai/models/bedrock.py
@@ -565,7 +565,7 @@ async def _process_response(self, response: ConverseResponseTypeDef) -> ModelRes
             provider_details=provider_details,
         )
 
-    def _get_thinking_fields(
+    def _translate_thinking(
         self,
         model_settings: BedrockModelSettings,
         model_request_parameters: ModelRequestParameters,
@@ -659,13 +659,14 @@ async def _messages_create(
                 'bedrock_additional_model_response_fields_paths', None
             ):
                 params['additionalModelResponseFieldPaths'] = additional_model_response_fields_paths
-            if additional_model_requests_fields := self._get_thinking_fields(model_settings, model_request_parameters):
-                params['additionalModelRequestFields'] = additional_model_requests_fields
             if prompt_variables := model_settings.get('bedrock_prompt_variables', None):
                 params['promptVariables'] = prompt_variables
             if service_tier := model_settings.get('bedrock_service_tier', None):
                 params['serviceTier'] = service_tier
 
+        if additional_model_requests_fields := self._translate_thinking(settings, model_request_parameters):
+            params['additionalModelRequestFields'] = additional_model_requests_fields
+
         try:
             if stream:
                 model_response = await anyio.to_thread.run_sync(

diff --git a/pydantic_ai_slim/pydantic_ai/models/cerebras.py b/pydantic_ai_slim/pydantic_ai/models/cerebras.py
@@ -86,7 +86,7 @@ def __init__(
         super().__init__(model_name, provider=provider, profile=profile, settings=settings)
 
     @override
-    def _get_reasoning_effort(
+    def _translate_thinking(
         self,
         model_settings: OpenAIChatModelSettings,
         model_request_parameters: ModelRequestParameters,

diff --git a/pydantic_ai_slim/pydantic_ai/models/google.py b/pydantic_ai_slim/pydantic_ai/models/google.py
@@ -522,7 +522,7 @@ async def _generate_content(
                 ) from e
             raise ModelAPIError(model_name=self._model_name, message=str(e)) from e
 
-    def _get_thinking_config(
+    def _translate_thinking(
         self,
         model_settings: GoogleModelSettings,
         model_request_parameters: ModelRequestParameters,
@@ -615,7 +615,7 @@ async def _build_content_and_config(
             frequency_penalty=model_settings.get('frequency_penalty'),
             seed=model_settings.get('seed'),
             safety_settings=model_settings.get('google_safety_settings'),
-            thinking_config=self._get_thinking_config(model_settings, model_request_parameters),
+            thinking_config=self._translate_thinking(model_settings, model_request_parameters),
             labels=model_settings.get('google_labels'),
             media_resolution=model_settings.get('google_video_resolution'),
             cached_content=model_settings.get('google_cached_content'),

diff --git a/pydantic_ai_slim/pydantic_ai/models/groq.py b/pydantic_ai_slim/pydantic_ai/models/groq.py
@@ -242,7 +242,7 @@ async def request_stream(
         async with response:
             yield await self._process_streamed_response(response, model_request_parameters)
 
-    def _get_reasoning_format(
+    def _translate_thinking(
         self,
         model_settings: GroqModelSettings,
         model_request_parameters: ModelRequestParameters,
@@ -327,7 +327,7 @@ async def _completions_create(
                 timeout=model_settings.get('timeout', NOT_GIVEN),
                 seed=model_settings.get('seed', NOT_GIVEN),
                 presence_penalty=model_settings.get('presence_penalty', NOT_GIVEN),
-                reasoning_format=self._get_reasoning_format(model_settings, model_request_parameters),
+                reasoning_format=self._translate_thinking(model_settings, model_request_parameters),
                 frequency_penalty=model_settings.get('frequency_penalty', NOT_GIVEN),
                 logit_bias=model_settings.get('logit_bias', NOT_GIVEN),
                 extra_headers=extra_headers,

diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py
@@ -648,7 +648,7 @@ async def request(
         model_response = self._process_response(response)
         return model_response
 
-    def _get_reasoning_effort(
+    def _translate_thinking(
         self,
         model_settings: OpenAIChatModelSettings,
         model_request_parameters: ModelRequestParameters,
@@ -750,7 +750,7 @@ async def _completions_create(
                 timeout=model_settings.get('timeout', NOT_GIVEN),
                 response_format=response_format or OMIT,
                 seed=model_settings.get('seed', OMIT),
-                reasoning_effort=self._get_reasoning_effort(model_settings, model_request_parameters),
+                reasoning_effort=self._translate_thinking(model_settings, model_request_parameters),
                 user=model_settings.get('openai_user', OMIT),
                 web_search_options=web_search_options or OMIT,
                 service_tier=model_settings.get('openai_service_tier', OMIT),
@@ -1688,7 +1688,7 @@ async def _responses_create(  # noqa: C901
             previous_response_id, messages = self._get_previous_response_id_and_new_messages(messages)
 
         instructions, openai_messages = await self._map_messages(messages, model_settings, model_request_parameters)
-        reasoning = self._get_reasoning(model_settings, model_request_parameters)
+        reasoning = self._translate_thinking(model_settings, model_request_parameters)
 
         text: responses.ResponseTextConfigParam | None = None
         if model_request_parameters.output_mode == 'native':
@@ -1783,7 +1783,7 @@ async def _responses_create(  # noqa: C901
         except APIConnectionError as e:
             raise ModelAPIError(model_name=self.model_name, message=e.message) from e
 
-    def _get_reasoning(
+    def _translate_thinking(
         self,
         model_settings: OpenAIResponsesModelSettings,
         model_request_parameters: ModelRequestParameters,

diff --git a/pydantic_ai_slim/pydantic_ai/models/openrouter.py b/pydantic_ai_slim/pydantic_ai/models/openrouter.py
@@ -603,7 +603,7 @@ def prepare_request(
         return new_settings, customized_parameters
 
     @override
-    def _get_reasoning_effort(
+    def _translate_thinking(
         self,
         model_settings: OpenAIChatModelSettings,
         model_request_parameters: ModelRequestParameters,

diff --git a/pydantic_ai_slim/pydantic_ai/models/xai.py b/pydantic_ai_slim/pydantic_ai/models/xai.py
@@ -55,6 +55,16 @@
 from ..settings import ModelSettings, ThinkingLevel
 from ..usage import RequestUsage
 
+XAI_EFFORT_MAP: dict[ThinkingLevel, Literal['low', 'high']] = {
+    True: 'high',
+    'minimal': 'low',
+    'low': 'low',
+    'medium': 'high',
+    'high': 'high',
+    'xhigh': 'high',
+}
+"""Maps unified thinking values to xAI reasoning_effort. xAI only supports 'low' and 'high'."""
+
 try:
     import xai_sdk.chat as chat_types
     from xai_sdk import AsyncClient
@@ -557,16 +567,7 @@ async def _create_chat(
         if 'reasoning_effort' not in xai_settings and model_request_parameters.thinking is not None:
             thinking = model_request_parameters.thinking
             if thinking is not False:
-                # xAI only supports 'low' and 'high'; map others to closest
-                xai_map: dict[ThinkingLevel, str] = {
-                    True: 'high',
-                    'minimal': 'low',
-                    'low': 'low',
-                    'medium': 'high',
-                    'high': 'high',
-                    'xhigh': 'high',
-                }
-                xai_settings['reasoning_effort'] = xai_map[thinking]
+                xai_settings['reasoning_effort'] = XAI_EFFORT_MAP[thinking]
 
         # Populate use_encrypted_content and include based on model settings
         include: list[chat_pb2.IncludeOption] = []

diff --git a/tests/test_settings.py b/tests/test_settings.py
@@ -4,7 +4,7 @@
 
 from pydantic_ai import Agent
 from pydantic_ai.models import Model
-from pydantic_ai.settings import ModelSettings
+from pydantic_ai.settings import ModelSettings, merge_model_settings
 
 pytestmark = [pytest.mark.anyio, pytest.mark.vcr]
 
@@ -44,3 +44,44 @@ async def test_stop_settings(allow_model_requests: None, model: Model) -> None:
         assert result.output.endswith('Paris')
     else:
         assert 'Paris' not in result.output
+
+
+class TestMergeModelSettingsThinking:
+    """merge_model_settings with unified thinking fields."""
+
+    def test_merge_thinking_bool_override(self):
+        base: ModelSettings = {'thinking': True}
+        overrides: ModelSettings = {'thinking': False}
+        result = merge_model_settings(base, overrides)
+        assert result is not None
+        assert result.get('thinking') is False
+
+    def test_merge_effort_override(self):
+        base: ModelSettings = {'thinking': 'low'}
+        overrides: ModelSettings = {'thinking': 'high'}
+        result = merge_model_settings(base, overrides)
+        assert result is not None
+        assert result.get('thinking') == 'high'
+
+    def test_merge_preserves_non_thinking_settings(self):
+        base: ModelSettings = {'max_tokens': 1000, 'temperature': 0.5}
+        overrides: ModelSettings = {'thinking': True}
+        result = merge_model_settings(base, overrides)
+        assert result is not None
+        assert result.get('max_tokens') == 1000
+        assert result.get('temperature') == 0.5
+        assert result.get('thinking') is True
+
+    def test_merge_with_none_returns_base(self):
+        base: ModelSettings = {'thinking': True}
+        result = merge_model_settings(base, None)
+        assert result == base
+
+    def test_merge_with_none_base_returns_overrides(self):
+        overrides: ModelSettings = {'thinking': True}
+        result = merge_model_settings(None, overrides)
+        assert result == overrides
+
+    def test_merge_with_both_none(self):
+        result = merge_model_settings(None, None)
+        assert result is None