memohai
diff --git a/‎provider/anthropic/messages/messages.go‎
Lines changed: 39 additions & 2 deletions b/‎provider/anthropic/messages/messages.go‎
Lines changed: 39 additions & 2 deletions
diff --git a/‎provider/anthropic/messages/messages_test.go‎
Lines changed: 105 additions & 5 deletions b/‎provider/anthropic/messages/messages_test.go‎
Lines changed: 105 additions & 5 deletions
diff --git a/‎provider/anthropic/messages/types.go‎
Lines changed: 27 additions & 19 deletions b/‎provider/anthropic/messages/types.go‎
Lines changed: 27 additions & 19 deletions
diff --git a/‎provider/openai/codex/codex.go‎
Lines changed: 2 additions & 1 deletion b/‎provider/openai/codex/codex.go‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎provider/openai/codex/codex_test.go‎
Lines changed: 39 additions & 0 deletions b/‎provider/openai/codex/codex_test.go‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎provider/openai/completions/completions.go‎
Lines changed: 10 additions & 1 deletion b/‎provider/openai/completions/completions.go‎
Lines changed: 10 additions & 1 deletion
@@ -17,11 +17,18 @@ const (
 	defaultBaseURL      = "https://api.anthropic.com/v1"
 	defaultAnthropicVer = "2023-06-01"
 	defaultMaxTokens    = 4096
+	// defaultReasoningMaxTokens is the fallback output cap when reasoning is
+	// active without an explicit budget (adaptive / output_config.effort). The
+	// plain 4096 default would truncate reasoning + answer; modern Claude models
+	// support far larger outputs.
+	defaultReasoningMaxTokens = 32000
 
 	// Content block types for Anthropic API
 	blockTypeText     = "text"
 	blockTypeThinking = "thinking"
 	blockTypeToolUse  = "tool_use"
+
+	thinkingTypeDisabled = "disabled"
 )
 
 // ThinkingConfig controls extended thinking for Anthropic models.
@@ -238,13 +245,23 @@ func (p *Provider) buildRequest(params *sdk.GenerateParams) *messagesRequest {
 		req.ToolChoice = convertToolChoice(params.ToolChoice)
 	}
 
-	if p.thinking != nil && p.thinking.Type != "" && p.thinking.Type != "disabled" {
+	if p.thinking != nil && p.thinking.Type != "" && p.thinking.Type != thinkingTypeDisabled {
 		req.Thinking = &anthropicThinking{
 			Type:         p.thinking.Type,
 			BudgetTokens: p.thinking.BudgetTokens,
 		}
 	}
 
+	// Reasoning effort is carried via output_config.effort. On modern Claude
+	// models (>= 4.6) this is the supported control; budget_tokens is deprecated
+	// (4.6) or rejected (4.7+). The caller is responsible for only sending an
+	// effort the target model accepts; errors surface as-is.
+	if params.ReasoningEffort != nil {
+		if effort := strings.TrimSpace(*params.ReasoningEffort); effort != "" {
+			req.OutputConfig = &anthropicOutputConfig{Effort: effort}
+		}
+	}
+
 	return req
 }
 
@@ -254,13 +271,33 @@ func resolveMaxTokens(params *sdk.GenerateParams, thinking *ThinkingConfig) *int
 	}
 
 	maxTokens := defaultMaxTokens
-	if thinking != nil && thinking.Type != "" && thinking.Type != "disabled" && thinking.BudgetTokens > 0 {
+	switch {
+	case thinking != nil && thinking.Type != "" && thinking.Type != thinkingTypeDisabled && thinking.BudgetTokens > 0:
+		// Explicit budget thinking: reserve room for the thinking budget on top
+		// of the answer budget.
 		maxTokens += thinking.BudgetTokens
+	case reasoningActive(params, thinking):
+		// Effort-based or adaptive thinking carries no explicit budget, but the
+		// model still needs generous headroom (reasoning + answer). The low 4096
+		// default would truncate; use a reasoning-aware default instead.
+		maxTokens = defaultReasoningMaxTokens
 	}
 
 	return &maxTokens
 }
 
+// reasoningActive reports whether the request enables reasoning without an
+// explicit token budget (adaptive thinking and/or output_config.effort).
+func reasoningActive(params *sdk.GenerateParams, thinking *ThinkingConfig) bool {
+	if thinking != nil && thinking.Type != "" && thinking.Type != thinkingTypeDisabled {
+		return true
+	}
+	if params.ReasoningEffort != nil && strings.TrimSpace(*params.ReasoningEffort) != "" {
+		return true
+	}
+	return false
+}
+
 func convertTools(tools []sdk.Tool) []anthropicTool {
 	out := make([]anthropicTool, 0, len(tools))
 	for _, t := range tools {
 
@@ -179,6 +179,106 @@ func TestDoGenerate_DefaultMaxTokens_ThinkingBudgetReserveAnswerBudget(t *testin
 	}
 }
 
+func TestDoGenerate_ReasoningEffort_OutputConfig(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		var body struct {
+			MaxTokens int `json:"max_tokens"`
+			Thinking  *struct {
+				Type string `json:"type"`
+			} `json:"thinking"`
+			OutputConfig *struct {
+				Effort string `json:"effort"`
+			} `json:"output_config"`
+		}
+		if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
+			t.Fatalf("decode request: %v", err)
+		}
+		if body.OutputConfig == nil || body.OutputConfig.Effort != "high" {
+			t.Fatalf("output_config.effort: got %+v, want high", body.OutputConfig)
+		}
+		if body.Thinking != nil {
+			t.Fatalf("thinking should be absent without WithThinking, got %+v", body.Thinking)
+		}
+		if body.MaxTokens != 32000 {
+			t.Fatalf("max_tokens: got %d, want 32000 (reasoning default)", body.MaxTokens)
+		}
+		w.Header().Set("Content-Type", "application/json")
+		json.NewEncoder(w).Encode(map[string]any{
+			"id": "msg_effort", "type": "message", "model": "claude-opus-4-8", "role": "assistant",
+			"content":     []map[string]any{{"type": "text", "text": "OK"}},
+			"stop_reason": "end_turn",
+			"usage":       map[string]any{"input_tokens": 5, "output_tokens": 2},
+		})
+	}))
+	defer srv.Close()
+
+	p := messages.New(messages.WithAPIKey("test-key"), messages.WithBaseURL(srv.URL))
+	effort := "high"
+	result, err := p.DoGenerate(context.Background(), sdk.GenerateParams{
+		Model:           &sdk.Model{ID: "claude-opus-4-8"},
+		Messages:        []sdk.Message{sdk.UserMessage("Hi")},
+		ReasoningEffort: &effort,
+	})
+	if err != nil {
+		t.Fatalf("DoGenerate failed: %v", err)
+	}
+	if result.Text != "OK" {
+		t.Errorf("text: got %q", result.Text)
+	}
+}
+
+func TestDoGenerate_AdaptiveThinking_EffortNoBudget(t *testing.T) {
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		var body struct {
+			MaxTokens int `json:"max_tokens"`
+			Thinking  struct {
+				Type         string `json:"type"`
+				BudgetTokens int    `json:"budget_tokens"`
+			} `json:"thinking"`
+			OutputConfig struct {
+				Effort string `json:"effort"`
+			} `json:"output_config"`
+		}
+		if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
+			t.Fatalf("decode request: %v", err)
+		}
+		if body.Thinking.Type != "adaptive" {
+			t.Fatalf("thinking.type: got %q, want adaptive", body.Thinking.Type)
+		}
+		if body.Thinking.BudgetTokens != 0 {
+			t.Fatalf("budget_tokens must be omitted for adaptive, got %d", body.Thinking.BudgetTokens)
+		}
+		if body.OutputConfig.Effort != "xhigh" {
+			t.Fatalf("output_config.effort: got %q, want xhigh", body.OutputConfig.Effort)
+		}
+		if body.MaxTokens != 32000 {
+			t.Fatalf("max_tokens: got %d, want 32000 (reasoning default)", body.MaxTokens)
+		}
+		w.Header().Set("Content-Type", "application/json")
+		json.NewEncoder(w).Encode(map[string]any{
+			"id": "msg_adaptive", "type": "message", "model": "claude-opus-4-8", "role": "assistant",
+			"content":     []map[string]any{{"type": "text", "text": "OK"}},
+			"stop_reason": "end_turn",
+			"usage":       map[string]any{"input_tokens": 5, "output_tokens": 2},
+		})
+	}))
+	defer srv.Close()
+
+	p := messages.New(
+		messages.WithAPIKey("test-key"),
+		messages.WithBaseURL(srv.URL),
+		messages.WithThinking(messages.ThinkingConfig{Type: "adaptive"}),
+	)
+	effort := "xhigh"
+	if _, err := p.DoGenerate(context.Background(), sdk.GenerateParams{
+		Model:           &sdk.Model{ID: "claude-opus-4-8"},
+		Messages:        []sdk.Message{sdk.UserMessage("Hi")},
+		ReasoningEffort: &effort,
+	}); err != nil {
+		t.Fatalf("DoGenerate failed: %v", err)
+	}
+}
+
 func TestDoGenerate_SystemMessage(t *testing.T) {
 	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		var body struct {
@@ -1068,7 +1168,7 @@ func TestDoGenerate_CacheControl_Tools(t *testing.T) {
 
 	p := messages.New(messages.WithAPIKey("k"), messages.WithBaseURL(srv.URL))
 	_, err := p.DoGenerate(context.Background(), sdk.GenerateParams{
-		Model: &sdk.Model{ID: "claude-sonnet-4-20250514"},
+		Model:    &sdk.Model{ID: "claude-sonnet-4-20250514"},
 		Messages: []sdk.Message{sdk.UserMessage("Hi")},
 		Tools: []sdk.Tool{
 			{Name: "search", Description: "Search the web", Parameters: map[string]any{"type": "object"}},
@@ -1093,10 +1193,10 @@ func TestDoGenerate_CacheControl_DetailedUsage(t *testing.T) {
 			"content":     []map[string]any{{"type": "text", "text": "OK"}},
 			"stop_reason": "end_turn",
 			"usage": map[string]any{
-				"input_tokens":                  10,
-				"output_tokens":                 5,
-				"cache_creation_input_tokens":   556,
-				"cache_read_input_tokens":       200,
+				"input_tokens":                10,
+				"output_tokens":               5,
+				"cache_creation_input_tokens": 556,
+				"cache_read_input_tokens":     200,
 				"cache_creation": map[string]any{
 					"ephemeral_5m_input_tokens": 456,
 					"ephemeral_1h_input_tokens": 100,
 
@@ -3,25 +3,33 @@ package messages
 // --- Request types ---
 
 type messagesRequest struct {
-	Model         string               `json:"model"`
-	MaxTokens     *int                 `json:"max_tokens,omitempty"`
-	System        []contentBlock       `json:"system,omitempty"`
-	Messages      []anthropicMessage   `json:"messages"`
-	Tools         []anthropicTool      `json:"tools,omitempty"`
-	ToolChoice    *anthropicToolChoice `json:"tool_choice,omitempty"`
-	Temperature   *float64             `json:"temperature,omitempty"`
-	TopP          *float64             `json:"top_p,omitempty"`
-	TopK          *int                 `json:"top_k,omitempty"`
-	StopSequences []string             `json:"stop_sequences,omitempty"`
-	Stream        bool                 `json:"stream,omitempty"`
-	Thinking      *anthropicThinking   `json:"thinking,omitempty"`
+	Model         string                 `json:"model"`
+	MaxTokens     *int                   `json:"max_tokens,omitempty"`
+	System        []contentBlock         `json:"system,omitempty"`
+	Messages      []anthropicMessage     `json:"messages"`
+	Tools         []anthropicTool        `json:"tools,omitempty"`
+	ToolChoice    *anthropicToolChoice   `json:"tool_choice,omitempty"`
+	Temperature   *float64               `json:"temperature,omitempty"`
+	TopP          *float64               `json:"top_p,omitempty"`
+	TopK          *int                   `json:"top_k,omitempty"`
+	StopSequences []string               `json:"stop_sequences,omitempty"`
+	Stream        bool                   `json:"stream,omitempty"`
+	Thinking      *anthropicThinking     `json:"thinking,omitempty"`
+	OutputConfig  *anthropicOutputConfig `json:"output_config,omitempty"`
 }
 
 type anthropicThinking struct {
 	Type         string `json:"type"`
 	BudgetTokens int    `json:"budget_tokens,omitempty"`
 }
 
+// anthropicOutputConfig maps to Anthropic's output_config object. Effort is the
+// reasoning-effort tier ("low"/"medium"/"high"/"xhigh"/"max"); it controls total
+// token output and supersedes thinking.budget_tokens on modern Claude models.
+type anthropicOutputConfig struct {
+	Effort string `json:"effort,omitempty"`
+}
+
 type anthropicMessage struct {
 	Role    string         `json:"role"`
 	Content []contentBlock `json:"content"`
@@ -85,14 +93,14 @@ type anthropicToolChoice struct {
 // --- Response types ---
 
 type messagesResponse struct {
-	ID           string        `json:"id"`
-	Type         string        `json:"type"`
-	Model        string        `json:"model"`
-	Role         string        `json:"role"`
+	ID           string          `json:"id"`
+	Type         string          `json:"type"`
+	Model        string          `json:"model"`
+	Role         string          `json:"role"`
 	Content      []responseBlock `json:"content"`
-	StopReason   string        `json:"stop_reason"`
-	StopSequence string        `json:"stop_sequence"`
-	Usage        messagesUsage `json:"usage"`
+	StopReason   string          `json:"stop_reason"`
+	StopSequence string          `json:"stop_sequence"`
+	Usage        messagesUsage   `json:"usage"`
 }
 
 type responseBlock struct {
 
@@ -9,6 +9,7 @@ import (
 	"time"
 
 	"github.com/memohai/twilight-ai/internal/utils"
+	openaiutil "github.com/memohai/twilight-ai/provider/openai"
 	"github.com/memohai/twilight-ai/sdk"
 )
 
@@ -388,7 +389,7 @@ func (p *Provider) buildRequest(params *sdk.GenerateParams) *codexRequest {
 	}
 
 	if params.ReasoningEffort != nil && *params.ReasoningEffort != "" {
-		req.Reasoning = &codexReasoning{Effort: *params.ReasoningEffort}
+		req.Reasoning = &codexReasoning{Effort: openaiutil.NormalizeReasoningEffort(*params.ReasoningEffort)}
 	}
 	return req
 }
 
@@ -96,6 +96,45 @@ func TestCodexDoGenerate_RequestShapeAndStream(t *testing.T) {
 	}
 }
 
+func TestCodexDoGenerate_MapsMaxReasoningEffortToXHigh(t *testing.T) {
+	var body struct {
+		Reasoning *struct {
+			Effort string `json:"effort"`
+		} `json:"reasoning"`
+	}
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
+			t.Fatalf("decode body: %v", err)
+		}
+		w.Header().Set("Content-Type", "text/event-stream")
+		_, _ = w.Write([]byte("event: response.created\n"))
+		_, _ = w.Write([]byte("data: {\"response\":{\"id\":\"resp_123\",\"created_at\":1700000000,\"model\":\"gpt-5.2\"}}\n\n"))
+		_, _ = w.Write([]byte("event: response.output_item.added\n"))
+		_, _ = w.Write([]byte("data: {\"output_index\":0,\"item\":{\"type\":\"message\",\"id\":\"msg_1\"}}\n\n"))
+		_, _ = w.Write([]byte("event: response.output_text.delta\n"))
+		_, _ = w.Write([]byte("data: {\"item_id\":\"msg_1\",\"delta\":\"ok\"}\n\n"))
+		_, _ = w.Write([]byte("event: response.output_item.done\n"))
+		_, _ = w.Write([]byte("data: {\"output_index\":0,\"item\":{\"type\":\"message\",\"id\":\"msg_1\"}}\n\n"))
+		_, _ = w.Write([]byte("event: response.completed\n"))
+		_, _ = w.Write([]byte("data: {\"response\":{\"usage\":{\"input_tokens\":1,\"output_tokens\":1}}}\n\n"))
+	}))
+	defer srv.Close()
+
+	p := codex.New(codex.WithAccessToken("token-123"), codex.WithBaseURL(srv.URL))
+	effort := "max"
+	_, err := p.DoGenerate(context.Background(), sdk.GenerateParams{
+		Model:           p.ChatModel("gpt-5.2"),
+		Messages:        []sdk.Message{sdk.UserMessage("hi")},
+		ReasoningEffort: &effort,
+	})
+	if err != nil {
+		t.Fatalf("DoGenerate: %v", err)
+	}
+	if body.Reasoning == nil || body.Reasoning.Effort != "xhigh" {
+		t.Fatalf("reasoning.effort: got %#v, want xhigh", body.Reasoning)
+	}
+}
+
 func TestCodexListModels(t *testing.T) {
 	p := codex.New(codex.WithAccessToken("token-123"), codex.WithAccountID("acct_123"))
 	models, err := p.ListModels(context.Background())
 
@@ -10,6 +10,7 @@ import (
 	"time"
 
 	"github.com/memohai/twilight-ai/internal/utils"
+	openaiutil "github.com/memohai/twilight-ai/provider/openai"
 	"github.com/memohai/twilight-ai/sdk"
 )
 
@@ -222,7 +223,7 @@ func (p *Provider) buildRequest(params *sdk.GenerateParams) *chatRequest {
 		FrequencyPenalty:    params.FrequencyPenalty,
 		PresencePenalty:     params.PresencePenalty,
 		Seed:                params.Seed,
-		ReasoningEffort:     params.ReasoningEffort,
+		ReasoningEffort:     normalizeReasoningEffort(params.ReasoningEffort),
 	}
 	if len(params.StopSequences) > 0 {
 		req.Stop = params.StopSequences
@@ -241,6 +242,14 @@ func (p *Provider) buildRequest(params *sdk.GenerateParams) *chatRequest {
 	return req
 }
 
+func normalizeReasoningEffort(effort *string) *string {
+	if effort == nil {
+		return nil
+	}
+	normalized := openaiutil.NormalizeReasoningEffort(*effort)
+	return &normalized
+}
+
 func (p *Provider) applyChatCompletionsCompat(req *chatRequest) {
 	switch p.compat {
 	case chatCompletionsCompatDeepSeek:
Original file line number	Diff line number	Diff line change
`@@ -9,6 +9,7 @@ import (`
`9`	`9`	`"time"`
`10`	`10`
`11`	`11`	`"github.com/memohai/twilight-ai/internal/utils"`
	`12`	`+ openaiutil "github.com/memohai/twilight-ai/provider/openai"`
`12`	`13`	`"github.com/memohai/twilight-ai/sdk"`
`13`	`14`	`)`
`14`	`15`
`@@ -388,7 +389,7 @@ func (p Provider) buildRequest(params sdk.GenerateParams) *codexRequest {`
`388`	`389`	`}`
`389`	`390`
`390`	`391`	`if params.ReasoningEffort != nil && *params.ReasoningEffort != "" {`
`391`		`- req.Reasoning = &codexReasoning{Effort: *params.ReasoningEffort}`
	`392`	`+ req.Reasoning = &codexReasoning{Effort: openaiutil.NormalizeReasoningEffort(*params.ReasoningEffort)}`
`392`	`393`	`}`
`393`	`394`	`return req`
`394`	`395`	`}`