Skip to content

Commit f4a48c3

Browse files
feat(anthropic): support output_config.effort and budget-free adaptive thinking (#17)
* feat(anthropic): support output_config.effort and budget-free adaptive thinking Add output_config.effort to the messages request and consume params.ReasoningEffort so callers can drive reasoning depth with effort strings instead of token budgets. Adaptive thinking no longer requires budget_tokens, and resolveMaxTokens falls back to a fixed reasoning ceiling when reasoning is active without an explicit budget. Co-authored-by: Cursor <cursoragent@cursor.com> * fix: normalize OpenAI reasoning max effort * fix: satisfy lint for thinking disabled type --------- Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent eedffe3 commit f4a48c3

10 files changed

Lines changed: 320 additions & 29 deletions

File tree

provider/anthropic/messages/messages.go

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,18 @@ const (
1717
defaultBaseURL = "https://api.anthropic.com/v1"
1818
defaultAnthropicVer = "2023-06-01"
1919
defaultMaxTokens = 4096
20+
// defaultReasoningMaxTokens is the fallback output cap when reasoning is
21+
// active without an explicit budget (adaptive / output_config.effort). The
22+
// plain 4096 default would truncate reasoning + answer; modern Claude models
23+
// support far larger outputs.
24+
defaultReasoningMaxTokens = 32000
2025

2126
// Content block types for Anthropic API
2227
blockTypeText = "text"
2328
blockTypeThinking = "thinking"
2429
blockTypeToolUse = "tool_use"
30+
31+
thinkingTypeDisabled = "disabled"
2532
)
2633

2734
// ThinkingConfig controls extended thinking for Anthropic models.
@@ -238,13 +245,23 @@ func (p *Provider) buildRequest(params *sdk.GenerateParams) *messagesRequest {
238245
req.ToolChoice = convertToolChoice(params.ToolChoice)
239246
}
240247

241-
if p.thinking != nil && p.thinking.Type != "" && p.thinking.Type != "disabled" {
248+
if p.thinking != nil && p.thinking.Type != "" && p.thinking.Type != thinkingTypeDisabled {
242249
req.Thinking = &anthropicThinking{
243250
Type: p.thinking.Type,
244251
BudgetTokens: p.thinking.BudgetTokens,
245252
}
246253
}
247254

255+
// Reasoning effort is carried via output_config.effort. On modern Claude
256+
// models (>= 4.6) this is the supported control; budget_tokens is deprecated
257+
// (4.6) or rejected (4.7+). The caller is responsible for only sending an
258+
// effort the target model accepts; errors surface as-is.
259+
if params.ReasoningEffort != nil {
260+
if effort := strings.TrimSpace(*params.ReasoningEffort); effort != "" {
261+
req.OutputConfig = &anthropicOutputConfig{Effort: effort}
262+
}
263+
}
264+
248265
return req
249266
}
250267

@@ -254,13 +271,33 @@ func resolveMaxTokens(params *sdk.GenerateParams, thinking *ThinkingConfig) *int
254271
}
255272

256273
maxTokens := defaultMaxTokens
257-
if thinking != nil && thinking.Type != "" && thinking.Type != "disabled" && thinking.BudgetTokens > 0 {
274+
switch {
275+
case thinking != nil && thinking.Type != "" && thinking.Type != thinkingTypeDisabled && thinking.BudgetTokens > 0:
276+
// Explicit budget thinking: reserve room for the thinking budget on top
277+
// of the answer budget.
258278
maxTokens += thinking.BudgetTokens
279+
case reasoningActive(params, thinking):
280+
// Effort-based or adaptive thinking carries no explicit budget, but the
281+
// model still needs generous headroom (reasoning + answer). The low 4096
282+
// default would truncate; use a reasoning-aware default instead.
283+
maxTokens = defaultReasoningMaxTokens
259284
}
260285

261286
return &maxTokens
262287
}
263288

289+
// reasoningActive reports whether the request enables reasoning without an
290+
// explicit token budget (adaptive thinking and/or output_config.effort).
291+
func reasoningActive(params *sdk.GenerateParams, thinking *ThinkingConfig) bool {
292+
if thinking != nil && thinking.Type != "" && thinking.Type != thinkingTypeDisabled {
293+
return true
294+
}
295+
if params.ReasoningEffort != nil && strings.TrimSpace(*params.ReasoningEffort) != "" {
296+
return true
297+
}
298+
return false
299+
}
300+
264301
func convertTools(tools []sdk.Tool) []anthropicTool {
265302
out := make([]anthropicTool, 0, len(tools))
266303
for _, t := range tools {

provider/anthropic/messages/messages_test.go

Lines changed: 105 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,106 @@ func TestDoGenerate_DefaultMaxTokens_ThinkingBudgetReserveAnswerBudget(t *testin
179179
}
180180
}
181181

182+
func TestDoGenerate_ReasoningEffort_OutputConfig(t *testing.T) {
183+
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
184+
var body struct {
185+
MaxTokens int `json:"max_tokens"`
186+
Thinking *struct {
187+
Type string `json:"type"`
188+
} `json:"thinking"`
189+
OutputConfig *struct {
190+
Effort string `json:"effort"`
191+
} `json:"output_config"`
192+
}
193+
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
194+
t.Fatalf("decode request: %v", err)
195+
}
196+
if body.OutputConfig == nil || body.OutputConfig.Effort != "high" {
197+
t.Fatalf("output_config.effort: got %+v, want high", body.OutputConfig)
198+
}
199+
if body.Thinking != nil {
200+
t.Fatalf("thinking should be absent without WithThinking, got %+v", body.Thinking)
201+
}
202+
if body.MaxTokens != 32000 {
203+
t.Fatalf("max_tokens: got %d, want 32000 (reasoning default)", body.MaxTokens)
204+
}
205+
w.Header().Set("Content-Type", "application/json")
206+
json.NewEncoder(w).Encode(map[string]any{
207+
"id": "msg_effort", "type": "message", "model": "claude-opus-4-8", "role": "assistant",
208+
"content": []map[string]any{{"type": "text", "text": "OK"}},
209+
"stop_reason": "end_turn",
210+
"usage": map[string]any{"input_tokens": 5, "output_tokens": 2},
211+
})
212+
}))
213+
defer srv.Close()
214+
215+
p := messages.New(messages.WithAPIKey("test-key"), messages.WithBaseURL(srv.URL))
216+
effort := "high"
217+
result, err := p.DoGenerate(context.Background(), sdk.GenerateParams{
218+
Model: &sdk.Model{ID: "claude-opus-4-8"},
219+
Messages: []sdk.Message{sdk.UserMessage("Hi")},
220+
ReasoningEffort: &effort,
221+
})
222+
if err != nil {
223+
t.Fatalf("DoGenerate failed: %v", err)
224+
}
225+
if result.Text != "OK" {
226+
t.Errorf("text: got %q", result.Text)
227+
}
228+
}
229+
230+
func TestDoGenerate_AdaptiveThinking_EffortNoBudget(t *testing.T) {
231+
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
232+
var body struct {
233+
MaxTokens int `json:"max_tokens"`
234+
Thinking struct {
235+
Type string `json:"type"`
236+
BudgetTokens int `json:"budget_tokens"`
237+
} `json:"thinking"`
238+
OutputConfig struct {
239+
Effort string `json:"effort"`
240+
} `json:"output_config"`
241+
}
242+
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
243+
t.Fatalf("decode request: %v", err)
244+
}
245+
if body.Thinking.Type != "adaptive" {
246+
t.Fatalf("thinking.type: got %q, want adaptive", body.Thinking.Type)
247+
}
248+
if body.Thinking.BudgetTokens != 0 {
249+
t.Fatalf("budget_tokens must be omitted for adaptive, got %d", body.Thinking.BudgetTokens)
250+
}
251+
if body.OutputConfig.Effort != "xhigh" {
252+
t.Fatalf("output_config.effort: got %q, want xhigh", body.OutputConfig.Effort)
253+
}
254+
if body.MaxTokens != 32000 {
255+
t.Fatalf("max_tokens: got %d, want 32000 (reasoning default)", body.MaxTokens)
256+
}
257+
w.Header().Set("Content-Type", "application/json")
258+
json.NewEncoder(w).Encode(map[string]any{
259+
"id": "msg_adaptive", "type": "message", "model": "claude-opus-4-8", "role": "assistant",
260+
"content": []map[string]any{{"type": "text", "text": "OK"}},
261+
"stop_reason": "end_turn",
262+
"usage": map[string]any{"input_tokens": 5, "output_tokens": 2},
263+
})
264+
}))
265+
defer srv.Close()
266+
267+
p := messages.New(
268+
messages.WithAPIKey("test-key"),
269+
messages.WithBaseURL(srv.URL),
270+
messages.WithThinking(messages.ThinkingConfig{Type: "adaptive"}),
271+
)
272+
effort := "xhigh"
273+
if _, err := p.DoGenerate(context.Background(), sdk.GenerateParams{
274+
Model: &sdk.Model{ID: "claude-opus-4-8"},
275+
Messages: []sdk.Message{sdk.UserMessage("Hi")},
276+
ReasoningEffort: &effort,
277+
}); err != nil {
278+
t.Fatalf("DoGenerate failed: %v", err)
279+
}
280+
}
281+
182282
func TestDoGenerate_SystemMessage(t *testing.T) {
183283
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
184284
var body struct {
@@ -1068,7 +1168,7 @@ func TestDoGenerate_CacheControl_Tools(t *testing.T) {
10681168

10691169
p := messages.New(messages.WithAPIKey("k"), messages.WithBaseURL(srv.URL))
10701170
_, err := p.DoGenerate(context.Background(), sdk.GenerateParams{
1071-
Model: &sdk.Model{ID: "claude-sonnet-4-20250514"},
1171+
Model: &sdk.Model{ID: "claude-sonnet-4-20250514"},
10721172
Messages: []sdk.Message{sdk.UserMessage("Hi")},
10731173
Tools: []sdk.Tool{
10741174
{Name: "search", Description: "Search the web", Parameters: map[string]any{"type": "object"}},
@@ -1093,10 +1193,10 @@ func TestDoGenerate_CacheControl_DetailedUsage(t *testing.T) {
10931193
"content": []map[string]any{{"type": "text", "text": "OK"}},
10941194
"stop_reason": "end_turn",
10951195
"usage": map[string]any{
1096-
"input_tokens": 10,
1097-
"output_tokens": 5,
1098-
"cache_creation_input_tokens": 556,
1099-
"cache_read_input_tokens": 200,
1196+
"input_tokens": 10,
1197+
"output_tokens": 5,
1198+
"cache_creation_input_tokens": 556,
1199+
"cache_read_input_tokens": 200,
11001200
"cache_creation": map[string]any{
11011201
"ephemeral_5m_input_tokens": 456,
11021202
"ephemeral_1h_input_tokens": 100,

provider/anthropic/messages/types.go

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,33 @@ package messages
33
// --- Request types ---
44

55
type messagesRequest struct {
6-
Model string `json:"model"`
7-
MaxTokens *int `json:"max_tokens,omitempty"`
8-
System []contentBlock `json:"system,omitempty"`
9-
Messages []anthropicMessage `json:"messages"`
10-
Tools []anthropicTool `json:"tools,omitempty"`
11-
ToolChoice *anthropicToolChoice `json:"tool_choice,omitempty"`
12-
Temperature *float64 `json:"temperature,omitempty"`
13-
TopP *float64 `json:"top_p,omitempty"`
14-
TopK *int `json:"top_k,omitempty"`
15-
StopSequences []string `json:"stop_sequences,omitempty"`
16-
Stream bool `json:"stream,omitempty"`
17-
Thinking *anthropicThinking `json:"thinking,omitempty"`
6+
Model string `json:"model"`
7+
MaxTokens *int `json:"max_tokens,omitempty"`
8+
System []contentBlock `json:"system,omitempty"`
9+
Messages []anthropicMessage `json:"messages"`
10+
Tools []anthropicTool `json:"tools,omitempty"`
11+
ToolChoice *anthropicToolChoice `json:"tool_choice,omitempty"`
12+
Temperature *float64 `json:"temperature,omitempty"`
13+
TopP *float64 `json:"top_p,omitempty"`
14+
TopK *int `json:"top_k,omitempty"`
15+
StopSequences []string `json:"stop_sequences,omitempty"`
16+
Stream bool `json:"stream,omitempty"`
17+
Thinking *anthropicThinking `json:"thinking,omitempty"`
18+
OutputConfig *anthropicOutputConfig `json:"output_config,omitempty"`
1819
}
1920

2021
type anthropicThinking struct {
2122
Type string `json:"type"`
2223
BudgetTokens int `json:"budget_tokens,omitempty"`
2324
}
2425

26+
// anthropicOutputConfig maps to Anthropic's output_config object. Effort is the
27+
// reasoning-effort tier ("low"/"medium"/"high"/"xhigh"/"max"); it controls total
28+
// token output and supersedes thinking.budget_tokens on modern Claude models.
29+
type anthropicOutputConfig struct {
30+
Effort string `json:"effort,omitempty"`
31+
}
32+
2533
type anthropicMessage struct {
2634
Role string `json:"role"`
2735
Content []contentBlock `json:"content"`
@@ -85,14 +93,14 @@ type anthropicToolChoice struct {
8593
// --- Response types ---
8694

8795
type messagesResponse struct {
88-
ID string `json:"id"`
89-
Type string `json:"type"`
90-
Model string `json:"model"`
91-
Role string `json:"role"`
96+
ID string `json:"id"`
97+
Type string `json:"type"`
98+
Model string `json:"model"`
99+
Role string `json:"role"`
92100
Content []responseBlock `json:"content"`
93-
StopReason string `json:"stop_reason"`
94-
StopSequence string `json:"stop_sequence"`
95-
Usage messagesUsage `json:"usage"`
101+
StopReason string `json:"stop_reason"`
102+
StopSequence string `json:"stop_sequence"`
103+
Usage messagesUsage `json:"usage"`
96104
}
97105

98106
type responseBlock struct {

provider/openai/codex/codex.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"time"
1010

1111
"github.com/memohai/twilight-ai/internal/utils"
12+
openaiutil "github.com/memohai/twilight-ai/provider/openai"
1213
"github.com/memohai/twilight-ai/sdk"
1314
)
1415

@@ -388,7 +389,7 @@ func (p *Provider) buildRequest(params *sdk.GenerateParams) *codexRequest {
388389
}
389390

390391
if params.ReasoningEffort != nil && *params.ReasoningEffort != "" {
391-
req.Reasoning = &codexReasoning{Effort: *params.ReasoningEffort}
392+
req.Reasoning = &codexReasoning{Effort: openaiutil.NormalizeReasoningEffort(*params.ReasoningEffort)}
392393
}
393394
return req
394395
}

provider/openai/codex/codex_test.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,45 @@ func TestCodexDoGenerate_RequestShapeAndStream(t *testing.T) {
9696
}
9797
}
9898

99+
func TestCodexDoGenerate_MapsMaxReasoningEffortToXHigh(t *testing.T) {
100+
var body struct {
101+
Reasoning *struct {
102+
Effort string `json:"effort"`
103+
} `json:"reasoning"`
104+
}
105+
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
106+
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
107+
t.Fatalf("decode body: %v", err)
108+
}
109+
w.Header().Set("Content-Type", "text/event-stream")
110+
_, _ = w.Write([]byte("event: response.created\n"))
111+
_, _ = w.Write([]byte("data: {\"response\":{\"id\":\"resp_123\",\"created_at\":1700000000,\"model\":\"gpt-5.2\"}}\n\n"))
112+
_, _ = w.Write([]byte("event: response.output_item.added\n"))
113+
_, _ = w.Write([]byte("data: {\"output_index\":0,\"item\":{\"type\":\"message\",\"id\":\"msg_1\"}}\n\n"))
114+
_, _ = w.Write([]byte("event: response.output_text.delta\n"))
115+
_, _ = w.Write([]byte("data: {\"item_id\":\"msg_1\",\"delta\":\"ok\"}\n\n"))
116+
_, _ = w.Write([]byte("event: response.output_item.done\n"))
117+
_, _ = w.Write([]byte("data: {\"output_index\":0,\"item\":{\"type\":\"message\",\"id\":\"msg_1\"}}\n\n"))
118+
_, _ = w.Write([]byte("event: response.completed\n"))
119+
_, _ = w.Write([]byte("data: {\"response\":{\"usage\":{\"input_tokens\":1,\"output_tokens\":1}}}\n\n"))
120+
}))
121+
defer srv.Close()
122+
123+
p := codex.New(codex.WithAccessToken("token-123"), codex.WithBaseURL(srv.URL))
124+
effort := "max"
125+
_, err := p.DoGenerate(context.Background(), sdk.GenerateParams{
126+
Model: p.ChatModel("gpt-5.2"),
127+
Messages: []sdk.Message{sdk.UserMessage("hi")},
128+
ReasoningEffort: &effort,
129+
})
130+
if err != nil {
131+
t.Fatalf("DoGenerate: %v", err)
132+
}
133+
if body.Reasoning == nil || body.Reasoning.Effort != "xhigh" {
134+
t.Fatalf("reasoning.effort: got %#v, want xhigh", body.Reasoning)
135+
}
136+
}
137+
99138
func TestCodexListModels(t *testing.T) {
100139
p := codex.New(codex.WithAccessToken("token-123"), codex.WithAccountID("acct_123"))
101140
models, err := p.ListModels(context.Background())

provider/openai/completions/completions.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"time"
1111

1212
"github.com/memohai/twilight-ai/internal/utils"
13+
openaiutil "github.com/memohai/twilight-ai/provider/openai"
1314
"github.com/memohai/twilight-ai/sdk"
1415
)
1516

@@ -222,7 +223,7 @@ func (p *Provider) buildRequest(params *sdk.GenerateParams) *chatRequest {
222223
FrequencyPenalty: params.FrequencyPenalty,
223224
PresencePenalty: params.PresencePenalty,
224225
Seed: params.Seed,
225-
ReasoningEffort: params.ReasoningEffort,
226+
ReasoningEffort: normalizeReasoningEffort(params.ReasoningEffort),
226227
}
227228
if len(params.StopSequences) > 0 {
228229
req.Stop = params.StopSequences
@@ -241,6 +242,14 @@ func (p *Provider) buildRequest(params *sdk.GenerateParams) *chatRequest {
241242
return req
242243
}
243244

245+
func normalizeReasoningEffort(effort *string) *string {
246+
if effort == nil {
247+
return nil
248+
}
249+
normalized := openaiutil.NormalizeReasoningEffort(*effort)
250+
return &normalized
251+
}
252+
244253
func (p *Provider) applyChatCompletionsCompat(req *chatRequest) {
245254
switch p.compat {
246255
case chatCompletionsCompatDeepSeek:

0 commit comments

Comments
 (0)