derekmisler
diff --git a/‎agent-schema.json‎
Lines changed: 7 additions & 3 deletions b/‎agent-schema.json‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎examples/thinking_budget.yaml‎
Lines changed: 13 additions & 3 deletions b/‎examples/thinking_budget.yaml‎
Lines changed: 13 additions & 3 deletions
diff --git a/‎pkg/config/latest/types.go‎
Lines changed: 42 additions & 2 deletions b/‎pkg/config/latest/types.go‎
Lines changed: 42 additions & 2 deletions
diff --git a/‎pkg/config/latest/types_test.go‎
Lines changed: 71 additions & 0 deletions b/‎pkg/config/latest/types_test.go‎
Lines changed: 71 additions & 0 deletions
diff --git a/‎pkg/model/provider/anthropic/beta_client.go‎
Lines changed: 31 additions & 13 deletions b/‎pkg/model/provider/anthropic/beta_client.go‎
Lines changed: 31 additions & 13 deletions
diff --git a/‎pkg/model/provider/anthropic/client.go‎
Lines changed: 54 additions & 2 deletions b/‎pkg/model/provider/anthropic/client.go‎
Lines changed: 54 additions & 2 deletions
diff --git a/‎pkg/model/provider/bedrock/client.go‎
Lines changed: 16 additions & 10 deletions b/‎pkg/model/provider/bedrock/client.go‎
Lines changed: 16 additions & 10 deletions
@@ -535,7 +535,7 @@
           "description": "Whether to track usage"
         },
         "thinking_budget": {
-          "description": "Controls reasoning effort/budget. Use 'none' or 0 to disable thinking. OpenAI: string levels ('minimal','low','medium','high'), default 'medium'. Anthropic: integer token budget (1024-32768), default 8192. Amazon Bedrock (Claude): same as Anthropic. Google Gemini 2.5: integer token budget (-1 for dynamic, 0 to disable, 24576 max), default -1. Google Gemini 3: string levels ('minimal' Flash only,'low','medium','high'), default 'high' for Pro, 'medium' for Flash.",
+          "description": "Controls reasoning effort/budget. Use 'none' or 0 to disable thinking. OpenAI: string levels ('minimal','low','medium','high'). Anthropic: integer token budget (1024-32768), 'adaptive' (lets the model decide), or effort levels ('low','medium','high','max') which use adaptive thinking with the given effort. Amazon Bedrock (Claude): integer token budget or effort levels ('low','medium','high') mapped to token budgets. Google Gemini 2.5: integer token budget (-1 for dynamic, 0 to disable, 24576 max). Google Gemini 3: string levels ('minimal' Flash only,'low','medium','high'). Thinking is only enabled when explicitly configured.",
           "oneOf": [
             {
               "type": "string",
@@ -544,9 +544,11 @@
                 "minimal",
                 "low",
                 "medium",
-                "high"
+                "high",
+                "max",
+                "adaptive"
               ],
-              "description": "Reasoning effort level (OpenAI, Gemini 3). Use 'none' to disable thinking."
+              "description": "Reasoning effort level. 'adaptive'/'max' are Anthropic-specific. Use 'none' to disable thinking."
             },
             {
               "type": "integer",
@@ -562,6 +564,8 @@
             "low",
             "medium",
             "high",
+            "max",
+            "adaptive",
             -1,
             1024,
             8192,
 
@@ -6,7 +6,7 @@
 agents:
   root:
     model: gpt-5-mini-min # <- try with gpt-5-mini-high
-    # model: claude-4-5-sonnet-min # <- try with claude-4-5-sonnet-high
+    # model: claude-4-5-sonnet-min # <- try with claude-4-5-sonnet-high or claude-opus-4-6-adaptive
     # model: gemini-2-5-flash-dynamic-thinking # <- try with -no-thinking, -low or -high variants
     description: a helpful assistant that thinks
     instruction: you are a helpful assistant who can also use tools, but only if you need to
@@ -29,15 +29,25 @@ models:
   claude-4-5-sonnet-min:
     provider: anthropic
     model: claude-sonnet-4-5-20250929
-    thinking_budget: 1024 # <- tokens, 1024 is the minimum
+    thinking_budget: 1024 # <- explicit token budget (1024-32768) for older models
 
   claude-4-5-sonnet-high:
     provider: anthropic
     model: claude-sonnet-4-5-20250929
-    thinking_budget: 32768 # <- tokens, 32768 is the Anthropic suggested maximum without batching
+    thinking_budget: 32768 # <- explicit token budget (32768 is the Anthropic suggested maximum)
     provider_opts:
       interleaved_thinking: true # <- enables interleaved thinking, aka tool calling during model reasoning
 
+  claude-opus-4-6-adaptive:
+    provider: anthropic
+    model: claude-opus-4-6
+    thinking_budget: adaptive # <- lets the model decide when and how much to think (recommended for 4.6)
+
+  claude-opus-4-6-low:
+    provider: anthropic
+    model: claude-opus-4-6
+    thinking_budget: low # <- adaptive thinking with low effort: "low", "medium", "high", "max"
+
   gemini-2-5-flash-dynamic-thinking:
     provider: google
     model: gemini-2.5-flash
 
@@ -397,7 +397,10 @@ type ModelConfig struct {
 	TrackUsage   *bool          `json:"track_usage,omitempty"`
 	// ThinkingBudget controls reasoning effort/budget:
 	// - For OpenAI: accepts string levels "minimal", "low", "medium", "high"
-	// - For Anthropic: accepts integer token budget (1024-32000)
+	// - For Anthropic: accepts integer token budget (1024-32000), "adaptive",
+	//   or string levels "low", "medium", "high", "max" (uses adaptive thinking with effort)
+	// - For Bedrock Claude: accepts integer token budget or string levels
+	//   "minimal", "low", "medium", "high" (mapped to token budgets via EffortTokens)
 	// - For other providers: may be ignored
 	ThinkingBudget *ThinkingBudget `json:"thinking_budget,omitempty"`
 	// Routing defines rules for routing requests to different models.
@@ -670,6 +673,7 @@ func (d DeferConfig) MarshalYAML() (any, error) {
 // ThinkingBudget represents reasoning budget configuration.
 // It accepts either a string effort level or an integer token budget:
 // - String: "minimal", "low", "medium", "high" (for OpenAI)
+// - String: "adaptive" (for Anthropic models that support adaptive thinking)
 // - Integer: token count (for Anthropic, range 1024-32768)
 type ThinkingBudget struct {
 	// Effort stores string-based reasoning effort levels
@@ -717,14 +721,50 @@ func (t ThinkingBudget) MarshalYAML() (any, error) {
 // NOT disabled when:
 //   - Tokens > 0 or Tokens == -1 (explicit token budget)
 //   - Effort is a real level like "medium" or "high"
+//   - Effort is "adaptive"
 func (t *ThinkingBudget) IsDisabled() bool {
 	if t == nil {
 		return false
 	}
 	if t.Tokens == 0 && t.Effort == "" {
 		return true
 	}
-	return t.Effort == "none"
+	return strings.EqualFold(t.Effort, "none")
+}
+
+// IsAdaptive returns true if the thinking budget is set to adaptive mode.
+// Adaptive thinking lets the model decide how much thinking to do.
+func (t *ThinkingBudget) IsAdaptive() bool {
+	if t == nil {
+		return false
+	}
+	return strings.EqualFold(t.Effort, "adaptive")
+}
+
+// EffortTokens maps a string effort level to a token budget for providers
+// that only support token-based thinking (e.g. Bedrock Claude).
+//
+// The Anthropic direct API uses adaptive thinking + output_config.effort
+// for string levels instead; see anthropicEffort in the anthropic package.
+//
+// Returns (tokens, true) when a mapping exists, or (0, false) when
+// the budget uses an explicit token count or an unrecognised effort string.
+func (t *ThinkingBudget) EffortTokens() (int, bool) {
+	if t == nil || t.Effort == "" {
+		return 0, false
+	}
+	switch strings.ToLower(strings.TrimSpace(t.Effort)) {
+	case "minimal":
+		return 1024, true
+	case "low":
+		return 2048, true
+	case "medium":
+		return 8192, true
+	case "high":
+		return 16384, true
+	default:
+		return 0, false
+	}
 }
 
 // MarshalJSON implements custom marshaling to output simple string or int format
 
@@ -121,6 +121,77 @@ func TestThinkingBudget_MarshalUnmarshal_Zero(t *testing.T) {
 	require.Equal(t, "thinking_budget: 0\n", string(output))
 }
 
+func TestThinkingBudget_IsDisabled(t *testing.T) {
+	t.Parallel()
+
+	for _, tt := range []struct {
+		name string
+		b    *ThinkingBudget
+		want bool
+	}{
+		{"nil", nil, false},
+		{"zero tokens", &ThinkingBudget{Tokens: 0}, true},
+		{"none effort", &ThinkingBudget{Effort: "none"}, true},
+		{"positive tokens", &ThinkingBudget{Tokens: 8192}, false},
+		{"medium effort", &ThinkingBudget{Effort: "medium"}, false},
+		{"adaptive effort", &ThinkingBudget{Effort: "adaptive"}, false},
+		{"negative tokens (dynamic)", &ThinkingBudget{Tokens: -1}, false},
+	} {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			require.Equal(t, tt.want, tt.b.IsDisabled())
+		})
+	}
+}
+
+func TestThinkingBudget_IsAdaptive(t *testing.T) {
+	t.Parallel()
+
+	for _, tt := range []struct {
+		name string
+		b    *ThinkingBudget
+		want bool
+	}{
+		{"nil", nil, false},
+		{"adaptive", &ThinkingBudget{Effort: "adaptive"}, true},
+		{"medium", &ThinkingBudget{Effort: "medium"}, false},
+		{"tokens", &ThinkingBudget{Tokens: 8192}, false},
+	} {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			require.Equal(t, tt.want, tt.b.IsAdaptive())
+		})
+	}
+}
+
+func TestThinkingBudget_EffortTokens(t *testing.T) {
+	t.Parallel()
+
+	for _, tt := range []struct {
+		name       string
+		b          *ThinkingBudget
+		wantTokens int
+		wantOK     bool
+	}{
+		{"nil", nil, 0, false},
+		{"minimal", &ThinkingBudget{Effort: "minimal"}, 1024, true},
+		{"low", &ThinkingBudget{Effort: "low"}, 2048, true},
+		{"medium", &ThinkingBudget{Effort: "medium"}, 8192, true},
+		{"high", &ThinkingBudget{Effort: "high"}, 16384, true},
+		{"adaptive", &ThinkingBudget{Effort: "adaptive"}, 0, false},
+		{"none", &ThinkingBudget{Effort: "none"}, 0, false},
+		{"explicit tokens", &ThinkingBudget{Tokens: 4096}, 0, false},
+		{"empty effort", &ThinkingBudget{}, 0, false},
+	} {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			tokens, ok := tt.b.EffortTokens()
+			require.Equal(t, tt.wantOK, ok)
+			require.Equal(t, tt.wantTokens, tokens)
+		})
+	}
+}
+
 func TestAgents_UnmarshalYAML_RejectsUnknownFields(t *testing.T) {
 	t.Parallel()
 
 
@@ -95,20 +95,38 @@ func (c *Client) createBetaStream(
 	// For interleaved thinking to make sense, we use a default of 16384 tokens for the thinking budget
 	thinkingEnabled := c.ModelOptions.Thinking() == nil || *c.ModelOptions.Thinking()
 	if thinkingEnabled {
-		thinkingTokens := int64(16384)
-		if c.ModelConfig.ThinkingBudget != nil {
-			thinkingTokens = int64(c.ModelConfig.ThinkingBudget.Tokens)
+		if c.ModelConfig.ThinkingBudget != nil && c.ModelConfig.ThinkingBudget.IsAdaptive() {
+			// Adaptive thinking: let the model decide how much thinking to do
+			adaptive := anthropic.NewBetaThinkingConfigAdaptiveParam()
+			params.Thinking = anthropic.BetaThinkingConfigParamUnion{
+				OfAdaptive: &adaptive,
+			}
+			slog.Debug("Anthropic Beta API using adaptive thinking")
+		} else if effort, ok := anthropicEffort(c.ModelConfig.ThinkingBudget); ok {
+			// Effort level: use adaptive thinking + output_config.effort
+			adaptive := anthropic.NewBetaThinkingConfigAdaptiveParam()
+			params.Thinking = anthropic.BetaThinkingConfigParamUnion{
+				OfAdaptive: &adaptive,
+			}
+			params.OutputConfig.Effort = anthropic.BetaOutputConfigEffort(effort)
+			slog.Debug("Anthropic Beta API using adaptive thinking with effort",
+				"effort", effort)
 		} else {
-			slog.Info("Anthropic Beta API using default thinking_budget with interleaved thinking", "budget_tokens", thinkingTokens)
-		}
-		switch {
-		case thinkingTokens >= 1024 && thinkingTokens < maxTokens:
-			params.Thinking = anthropic.BetaThinkingConfigParamOfEnabled(thinkingTokens)
-			slog.Debug("Anthropic Beta API using thinking_budget with interleaved thinking", "budget_tokens", thinkingTokens)
-		case thinkingTokens >= maxTokens:
-			slog.Warn("Anthropic Beta API thinking_budget must be less than max_tokens, ignoring", "tokens", thinkingTokens, "max_tokens", maxTokens)
-		default:
-			slog.Warn("Anthropic Beta API thinking_budget below minimum (1024), ignoring", "tokens", thinkingTokens)
+			thinkingTokens := int64(16384)
+			if c.ModelConfig.ThinkingBudget != nil {
+				thinkingTokens = int64(c.ModelConfig.ThinkingBudget.Tokens)
+			} else {
+				slog.Info("Anthropic Beta API using default thinking_budget with interleaved thinking", "budget_tokens", thinkingTokens)
+			}
+			switch {
+			case thinkingTokens >= 1024 && thinkingTokens < maxTokens:
+				params.Thinking = anthropic.BetaThinkingConfigParamOfEnabled(thinkingTokens)
+				slog.Debug("Anthropic Beta API using thinking_budget with interleaved thinking", "budget_tokens", thinkingTokens)
+			case thinkingTokens >= maxTokens:
+				slog.Warn("Anthropic Beta API thinking_budget must be less than max_tokens, ignoring", "tokens", thinkingTokens, "max_tokens", maxTokens)
+			default:
+				slog.Warn("Anthropic Beta API thinking_budget below minimum (1024), ignoring", "tokens", thinkingTokens)
+			}
 		}
 	} else {
 		slog.Debug("Anthropic Beta API: Thinking disabled via /think command")
 
@@ -50,12 +50,23 @@ func (c *Client) getResponseTrailer() http.Header {
 // adjustMaxTokensForThinking checks if max_tokens needs adjustment for thinking_budget.
 // Anthropic's max_tokens represents the combined budget for thinking + output tokens.
 // Returns the adjusted maxTokens value and an error if user-set max_tokens is too low.
+//
+// This only applies to fixed token budgets. Adaptive thinking and effort-based
+// budgets don't need adjustment since the model manages its own thinking allocation.
 func (c *Client) adjustMaxTokensForThinking(maxTokens int64) (int64, error) {
-	if c.ModelConfig.ThinkingBudget == nil || c.ModelConfig.ThinkingBudget.Tokens <= 0 {
+	if c.ModelConfig.ThinkingBudget == nil || c.ModelConfig.ThinkingBudget.IsAdaptive() {
+		return maxTokens, nil
+	}
+	// Effort-based budgets use adaptive thinking — no token adjustment needed.
+	if _, ok := anthropicEffort(c.ModelConfig.ThinkingBudget); ok {
 		return maxTokens, nil
 	}
 
 	thinkingTokens := int64(c.ModelConfig.ThinkingBudget.Tokens)
+	if thinkingTokens <= 0 {
+		return maxTokens, nil
+	}
+
 	minRequired := thinkingTokens + 1024 // configured thinking budget + minimum output buffer
 
 	if maxTokens <= thinkingTokens {
@@ -297,7 +308,25 @@ func (c *Client) CreateChatCompletionStream(
 
 	// Apply thinking budget first, as it affects whether we can set temperature
 	thinkingEnabled := false
-	if c.ModelConfig.ThinkingBudget != nil && c.ModelConfig.ThinkingBudget.Tokens > 0 {
+	if c.ModelConfig.ThinkingBudget != nil && c.ModelConfig.ThinkingBudget.IsAdaptive() {
+		// Adaptive thinking: let the model decide how much thinking to do
+		adaptive := anthropic.NewThinkingConfigAdaptiveParam()
+		params.Thinking = anthropic.ThinkingConfigParamUnion{
+			OfAdaptive: &adaptive,
+		}
+		thinkingEnabled = true
+		slog.Debug("Anthropic API using adaptive thinking (standard messages)")
+	} else if effort, ok := anthropicEffort(c.ModelConfig.ThinkingBudget); ok {
+		// Effort level: use adaptive thinking + output_config.effort
+		adaptive := anthropic.NewThinkingConfigAdaptiveParam()
+		params.Thinking = anthropic.ThinkingConfigParamUnion{
+			OfAdaptive: &adaptive,
+		}
+		params.OutputConfig.Effort = anthropic.OutputConfigEffort(effort)
+		thinkingEnabled = true
+		slog.Debug("Anthropic API using adaptive thinking with effort",
+			"effort", effort)
+	} else if c.ModelConfig.ThinkingBudget != nil && c.ModelConfig.ThinkingBudget.Tokens > 0 {
 		thinkingTokens := int64(c.ModelConfig.ThinkingBudget.Tokens)
 		switch {
 		case thinkingTokens >= 1024 && thinkingTokens < maxTokens:
@@ -895,6 +924,29 @@ func differenceIDs(a, b map[string]struct{}) []string {
 	return missing
 }
 
+// anthropicEffort maps a ThinkingBudget effort string to an Anthropic API
+// effort level ("low", "medium", "high", "max"). Returns ("", false) when
+// the budget uses token counts, adaptive mode, or an unrecognised string.
+func anthropicEffort(b *latest.ThinkingBudget) (string, bool) {
+	if b == nil {
+		return "", false
+	}
+	switch strings.ToLower(strings.TrimSpace(b.Effort)) {
+	case "low":
+		return "low", true
+	case "minimal": // "minimal" is not in the Anthropic API; map to closest
+		return "low", true
+	case "medium":
+		return "medium", true
+	case "high":
+		return "high", true
+	case "max":
+		return "max", true
+	default:
+		return "", false
+	}
+}
+
 // anthropicContextLimit returns a reasonable default context window for Anthropic models.
 // We default to 200k tokens, which is what 3.5-4.5 models support; adjust as needed over time.
 func anthropicContextLimit(model string) int64 {
 
@@ -275,16 +275,23 @@ func (c *Client) buildInferenceConfig() *types.InferenceConfiguration {
 	return cfg
 }
 
+// resolveThinkingTokens returns the effective token budget for thinking.
+// It handles both explicit token counts and effort-level strings.
+// Returns 0 if no valid thinking budget is configured.
+func (c *Client) resolveThinkingTokens() int {
+	if c.ModelConfig.ThinkingBudget == nil {
+		return 0
+	}
+	if tokens, ok := c.ModelConfig.ThinkingBudget.EffortTokens(); ok {
+		return tokens
+	}
+	return c.ModelConfig.ThinkingBudget.Tokens
+}
+
 // isThinkingEnabled mirrors the validation in buildAdditionalModelRequestFields
 // to determine if thinking params will affect inference config (temp/topP constraints).
 func (c *Client) isThinkingEnabled() bool {
-	if c.ModelConfig.ThinkingBudget == nil || c.ModelConfig.ThinkingBudget.Tokens <= 0 {
-		return false
-	}
-
-	tokens := c.ModelConfig.ThinkingBudget.Tokens
-
-	// Check minimum (Claude requires at least 1024 tokens for thinking)
+	tokens := c.resolveThinkingTokens()
 	if tokens < 1024 {
 		return false
 	}
@@ -310,12 +317,11 @@ func (c *Client) promptCachingEnabled() bool {
 
 // buildAdditionalModelRequestFields configures Claude's extended thinking (reasoning) mode.
 func (c *Client) buildAdditionalModelRequestFields() document.Interface {
-	if c.ModelConfig.ThinkingBudget == nil || c.ModelConfig.ThinkingBudget.Tokens <= 0 {
+	tokens := c.resolveThinkingTokens()
+	if tokens <= 0 {
 		return nil
 	}
 
-	tokens := c.ModelConfig.ThinkingBudget.Tokens
-
 	// Validate minimum (Claude requires at least 1024 tokens for thinking)
 	if tokens < 1024 {
 		slog.Warn("Bedrock thinking_budget below minimum (1024), ignoring",