-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Strip thinking from model_settings after resolution, unify _translate_thinking naming #4829
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
3b27945
62f1556
0598d25
24a5db8
5bb1381
1bf00fd
c4b4494
6a55e56
e9b5284
8064089
8e05754
359f034
8ef888d
a9dd96e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -414,7 +414,7 @@ def prepare_request( | |
| ) | ||
| return super().prepare_request(model_settings, model_request_parameters) | ||
|
|
||
| def _get_thinking_param( | ||
| def _translate_thinking( | ||
| self, | ||
| model_settings: AnthropicModelSettings, | ||
| model_request_parameters: ModelRequestParameters, | ||
|
|
@@ -426,7 +426,7 @@ def _get_thinking_param( | |
| if thinking is None or thinking is False: | ||
| return OMIT # type: ignore[return-value] | ||
| profile = AnthropicModelProfile.from_profile(self.profile) | ||
| if profile.anthropic_supports_adaptive_thinking: | ||
| if profile.anthropic_supports_adaptive_thinking and thinking in (True, 'medium'): | ||
|
||
| return {'type': 'adaptive'} | ||
| return {'type': 'enabled', 'budget_tokens': ANTHROPIC_THINKING_BUDGET_MAP[thinking]} | ||
sarth6 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
|
|
@@ -485,7 +485,7 @@ async def _messages_create( | |
| output_config=output_config or OMIT, | ||
| betas=sorted(betas) or OMIT, | ||
| stream=stream, | ||
| thinking=self._get_thinking_param(model_settings, model_request_parameters), | ||
| thinking=self._translate_thinking(model_settings, model_request_parameters), | ||
| stop_sequences=model_settings.get('stop_sequences', OMIT), | ||
| temperature=model_settings.get('temperature', OMIT), | ||
| top_p=model_settings.get('top_p', OMIT), | ||
|
|
@@ -573,7 +573,7 @@ async def _messages_count_tokens( | |
| mcp_servers=mcp_servers or OMIT, | ||
| betas=sorted(betas) or OMIT, | ||
| output_config=output_config or OMIT, | ||
| thinking=self._get_thinking_param(model_settings, model_request_parameters), | ||
| thinking=self._translate_thinking(model_settings, model_request_parameters), | ||
| timeout=model_settings.get('timeout', NOT_GIVEN), | ||
| extra_headers=extra_headers, | ||
| extra_body=model_settings.get('extra_body'), | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -648,7 +648,7 @@ async def request( | |
| model_response = self._process_response(response) | ||
| return model_response | ||
|
|
||
| def _get_reasoning_effort( | ||
| def _translate_thinking( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The rename from descriptive provider-specific names ( Since these are internal methods and never called polymorphically (each provider calls its own), the consistency benefit is modest, while the descriptive names helped readers understand each provider's translation target. This is ultimately a judgment call, but @DouweM's earlier comment on the plan suggests he wasn't strongly in favor of this rename either — worth confirming it's desired.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Douwe already confirmed he was fine with the rename: #4829 (comment)
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like it better this way, so it's easier to find the equivalent logic when looking in different model implementations. |
||
| self, | ||
| model_settings: OpenAIChatModelSettings, | ||
| model_request_parameters: ModelRequestParameters, | ||
|
|
@@ -750,7 +750,7 @@ async def _completions_create( | |
| timeout=model_settings.get('timeout', NOT_GIVEN), | ||
| response_format=response_format or OMIT, | ||
| seed=model_settings.get('seed', OMIT), | ||
| reasoning_effort=self._get_reasoning_effort(model_settings, model_request_parameters), | ||
| reasoning_effort=self._translate_thinking(model_settings, model_request_parameters), | ||
| user=model_settings.get('openai_user', OMIT), | ||
| web_search_options=web_search_options or OMIT, | ||
| service_tier=model_settings.get('openai_service_tier', OMIT), | ||
|
|
@@ -1688,7 +1688,7 @@ async def _responses_create( # noqa: C901 | |
| previous_response_id, messages = self._get_previous_response_id_and_new_messages(messages) | ||
|
|
||
| instructions, openai_messages = await self._map_messages(messages, model_settings, model_request_parameters) | ||
| reasoning = self._get_reasoning(model_settings, model_request_parameters) | ||
| reasoning = self._translate_thinking(model_settings, model_request_parameters) | ||
|
|
||
| text: responses.ResponseTextConfigParam | None = None | ||
| if model_request_parameters.output_mode == 'native': | ||
|
|
@@ -1783,7 +1783,7 @@ async def _responses_create( # noqa: C901 | |
| except APIConnectionError as e: | ||
| raise ModelAPIError(model_name=self.model_name, message=e.message) from e | ||
|
|
||
| def _get_reasoning( | ||
| def _translate_thinking( | ||
| self, | ||
| model_settings: OpenAIResponsesModelSettings, | ||
| model_request_parameters: ModelRequestParameters, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -55,6 +55,16 @@ | |
| from ..settings import ModelSettings, ThinkingLevel | ||
| from ..usage import RequestUsage | ||
|
|
||
| XAI_EFFORT_MAP: dict[ThinkingLevel, Literal['low', 'high']] = { | ||
| True: 'high', | ||
| 'minimal': 'low', | ||
| 'low': 'low', | ||
| 'medium': 'high', | ||
| 'high': 'high', | ||
| 'xhigh': 'high', | ||
| } | ||
| """Maps unified thinking values to xAI reasoning_effort. xAI only supports 'low' and 'high'.""" | ||
|
Comment on lines
+58
to
+66
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This constant is only used in one place ( I'd suggest keeping the map inline where it's used, as it was before. This also eliminates the need for the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good will adjust, but I'll keep my
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Constant are great for things like this, I'd actually rather have all the models use constants instead of inline dicts for this. |
||
|
|
||
| try: | ||
| import xai_sdk.chat as chat_types | ||
| from xai_sdk import AsyncClient | ||
|
|
@@ -557,16 +567,7 @@ async def _create_chat( | |
| if 'reasoning_effort' not in xai_settings and model_request_parameters.thinking is not None: | ||
| thinking = model_request_parameters.thinking | ||
| if thinking is not False: | ||
| # xAI only supports 'low' and 'high'; map others to closest | ||
| xai_map: dict[ThinkingLevel, str] = { | ||
| True: 'high', | ||
| 'minimal': 'low', | ||
| 'low': 'low', | ||
| 'medium': 'high', | ||
| 'high': 'high', | ||
| 'xhigh': 'high', | ||
| } | ||
| xai_settings['reasoning_effort'] = xai_map[thinking] | ||
| xai_settings['reasoning_effort'] = XAI_EFFORT_MAP[thinking] | ||
|
|
||
| # Populate use_encrypted_content and include based on model settings | ||
| include: list[chat_pb2.IncludeOption] = [] | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@DouweM — This is a new behavioral change: after resolving the unified
thinkingsetting, the key is stripped frommodel_settingsvia a dict comprehension +cast. The intent is to prevent downstream providers from seeingthinkingas an unknown key, which is reasonable.A couple of things worth considering:
cast(ModelSettings, stripped)creates a plaindictthat satisfiesModelSettingsstructurally but isn't actually aTypedDictinstance. This is fine at runtime but worth being aware of.prepare_requestcall whenthinkingis present. Not a performance concern in practice, but worth a sanity check that this is the pattern you want going forward — vs, say, having providers ignore unknown keys or havingthinkingbe aModelRequestParameters-only field.If this approach is approved, the logic looks correct — the no-mutation and no-leakage tests cover it well.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The approach was discussed in the call (came from the transcript) - prevents downstream model code from accidentally reading the raw unresolved value
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Makes sense to me.