Skip to content

Commit 6880a93

Browse files
committed
[owl] Support disabling reasoning for GPT-5.1 (#845)
Backend - owl (API server) - Support disabling reasoning for GPT-5.1 - Fix tool call step formatting: Add double new lines
1 parent 5ef49cc commit 6880a93

File tree

2 files changed

+30
-29
lines changed
  • clients/python/src/jamaibase/types
  • services/api/src/owl/utils

2 files changed

+30
-29
lines changed

clients/python/src/jamaibase/types/lm.py

Lines changed: 13 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1136,29 +1136,19 @@ class ChatRequestBase(BaseModel):
11361136
),
11371137
examples=[{}],
11381138
)
1139-
reasoning_effort: Literal["disable", "minimal", "low", "medium", "high"] | None = Field(
1140-
"minimal",
1141-
description=(
1142-
"Constrains effort on reasoning for reasoning models. "
1143-
"Currently supported values are `disable`, `minimal`, `low`, `medium`, and `high`. "
1144-
"Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response. "
1145-
"For non-OpenAI models, `low` ~ 1024 tokens, `medium` ~ 2048 tokens, `high` ~ 4096 tokens. "
1146-
"Note that this parameter will be ignored when using models that do not support it, "
1147-
"such as non-reasoning models."
1148-
),
1149-
examples=["low"],
1150-
)
1151-
reasoning_effort: Literal["disable", "minimal", "low", "medium", "high"] | None = Field(
1152-
None,
1153-
description=(
1154-
"Constrains effort on reasoning for reasoning models. "
1155-
"Currently supported values are `disable`, `minimal`, `low`, `medium`, and `high`. "
1156-
"Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response. "
1157-
"For non-OpenAI models, `low` ~ 1024 tokens, `medium` ~ 4096 tokens, `high` ~ 8192 tokens. "
1158-
"Note that this parameter will be ignored when using models that do not support it, "
1159-
"such as non-reasoning models."
1160-
),
1161-
examples=["low"],
1139+
reasoning_effort: Literal["disable", "minimal", "none", "low", "medium", "high"] | None = (
1140+
Field(
1141+
None,
1142+
description=(
1143+
"Constrains effort on reasoning for reasoning models. "
1144+
'Currently supported values are `None`, "disable", "none", "minimal", "low", "medium", and "high". '
1145+
"Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response. "
1146+
"For non-OpenAI models, `low` ~ 1024 tokens, `medium` ~ 4096 tokens, `high` ~ 8192 tokens. "
1147+
"Note that this parameter will be ignored when using models that do not support it, "
1148+
"such as non-reasoning models."
1149+
),
1150+
examples=["low"],
1151+
)
11621152
)
11631153
thinking_budget: int | None = Field(
11641154
None,

services/api/src/owl/utils/lm.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -519,7 +519,7 @@ def _prepare_hyperparams(
519519
return
520520
# Disable reasoning if requested
521521
if (
522-
reasoning_effort in ("disable", "minimal")
522+
reasoning_effort in ("disable", "minimal", "none")
523523
or thinking_budget == 0
524524
or (reasoning_effort is None and thinking_budget is None)
525525
):
@@ -540,7 +540,14 @@ def _prepare_hyperparams(
540540
hyperparams["thinking"] = {"type": "disabled"}
541541
return
542542
elif ctx.inference_provider == CloudProvider.OPENAI:
543-
if "gpt-5" in ctx.routing_id:
543+
if "gpt-5.1" in ctx.routing_id:
544+
# gpt-5.1: Supported values are: 'none', 'low', 'medium', and 'high'.
545+
hyperparams["reasoning"] = {
546+
"effort": "none",
547+
"summary": reasoning_summary,
548+
}
549+
return
550+
elif "gpt-5" in ctx.routing_id:
544551
hyperparams["reasoning"] = {
545552
"effort": "minimal",
546553
"summary": reasoning_summary,
@@ -668,20 +675,22 @@ async def _openai_responses_stream(
668675
and hasattr(chunk.item.action, "query")
669676
and chunk.item.action.query
670677
):
678+
yield self._stream_delta(Delta(role="assistant", reasoning_content="\n\n"))
671679
yield self._stream_delta(
672680
Delta(
673681
role="assistant",
674-
reasoning_content=f'\n\nSearched the web for "{chunk.item.action.query}".',
682+
reasoning_content=f'Searched the web for "{chunk.item.action.query}".',
675683
)
676684
)
677685
yield self._stream_delta(Delta(role="assistant", reasoning_content="\n\n"))
678686
elif isinstance(chunk.item, ResponseCodeInterpreterToolCall):
679687
usage_stats["code_interpreter_calls"] += 1
680688
code_snippet = chunk.item.code
689+
yield self._stream_delta(Delta(role="assistant", reasoning_content="\n\n"))
681690
yield self._stream_delta(
682691
Delta(
683692
role="assistant",
684-
reasoning_content=f"\n\nRan Python code:\n\n```python\n{code_snippet}\n```",
693+
reasoning_content=f"Ran Python code:\n\n```python\n{code_snippet}\n```",
685694
)
686695
)
687696
yield self._stream_delta(Delta(role="assistant", reasoning_content="\n\n"))
@@ -744,11 +753,13 @@ async def _openai_responses(
744753
elif isinstance(item, ResponseFunctionWebSearch) and item.status == "completed":
745754
usage_stats["web_search_calls"] += 1
746755
if item.action and hasattr(item.action, "query") and item.action.query:
747-
reasoning_parts.append(f'Searched the web for "{item.action.query}".')
756+
reasoning_parts.append(f'\n\nSearched the web for "{item.action.query}".\n\n')
748757
elif isinstance(item, ResponseCodeInterpreterToolCall) and item.status == "completed":
749758
usage_stats["code_interpreter_calls"] += 1
750759
code_snippet = item.code
751-
reasoning_parts.append(f"Ran Python code:\n\n```python\n{code_snippet}\n```")
760+
reasoning_parts.append(
761+
f"\n\nRan Python code:\n\n```python\n{code_snippet}\n```\n\n"
762+
)
752763
elif isinstance(item, ResponseOutputMessage) and item.status == "completed":
753764
text_content = item.content[0].text if item.content else ""
754765
result_parts.append(text_content)

0 commit comments

Comments
 (0)