kagent-dev
diff --git a/‎go/adk/pkg/agent/agent.go‎
Lines changed: 2 additions & 0 deletions b/‎go/adk/pkg/agent/agent.go‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎go/adk/pkg/models/bedrock.go‎
Lines changed: 56 additions & 2 deletions b/‎go/adk/pkg/models/bedrock.go‎
Lines changed: 56 additions & 2 deletions
diff --git a/‎go/adk/pkg/models/bedrock_test.go‎
Lines changed: 78 additions & 4 deletions b/‎go/adk/pkg/models/bedrock_test.go‎
Lines changed: 78 additions & 4 deletions
diff --git a/‎go/api/adk/types.go‎
Lines changed: 9 additions & 0 deletions b/‎go/api/adk/types.go‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎go/api/config/crd/bases/kagent.dev_modelconfigs.yaml‎
Lines changed: 38 additions & 0 deletions b/‎go/api/config/crd/bases/kagent.dev_modelconfigs.yaml‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎go/api/v1alpha2/modelconfig_types.go‎
Lines changed: 36 additions & 0 deletions b/‎go/api/v1alpha2/modelconfig_types.go‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎go/core/internal/controller/translator/agent/adk_api_translator.go‎
Lines changed: 2 additions & 0 deletions b/‎go/core/internal/controller/translator/agent/adk_api_translator.go‎
Lines changed: 2 additions & 0 deletions
@@ -304,6 +304,8 @@ func CreateLLM(ctx context.Context, m adk.Model, log logr.Logger) (adkmodel.LLM,
 			Model:                        modelName,
 			Region:                       region,
 			AdditionalModelRequestFields: m.AdditionalModelRequestFields,
+			PromptCaching:                m.PromptCaching,
+			CacheTTL:                     m.CacheTTL,
 		}
 		return models.NewBedrockModelWithLogger(ctx, cfg, log)
 
 
@@ -77,6 +77,33 @@ type BedrockConfig struct {
 	Temperature                  *float64
 	TopP                         *float64
 	AdditionalModelRequestFields map[string]any
+	// PromptCaching, when true, appends a default CachePoint block at the
+	// end of the Converse request's system content array and the end of
+	// the toolConfig.tools array. Bedrock caches up to and including those markers
+	// across requests in the same region; cached prefix is billed at a
+	// reduced rate. The marker is silently ignored by Bedrock for models
+	// that do not support prompt caching.
+	PromptCaching bool
+	// CacheTTL selects the cache retention window when PromptCaching is on.
+	// "" or "5m" uses Bedrock's default 5-minute cache (broadest model
+	// support); "1h" opts into extended-TTL caching. See bedrockCachePointBlock.
+	CacheTTL string
+}
+
+// bedrockCachePointBlock builds a Converse CachePoint marker honoring the
+// configured cache TTL.
+//
+// An empty or "5m" ttl leaves the SDK Ttl field unset: Bedrock then applies its
+// standard 5-minute sliding cache, which is supported by every prompt-caching
+// model. Only "1h" sets the Ttl explicitly, opting into extended-TTL caching —
+// supported on fewer models and billed at a higher cache-write rate, so it is
+// not a free upgrade over 5m.
+func bedrockCachePointBlock(cacheTTL string) types.CachePointBlock {
+	block := types.CachePointBlock{Type: types.CachePointTypeDefault}
+	if cacheTTL == string(types.CacheTTLOneHour) {
+		block.Ttl = types.CacheTTLOneHour
+	}
+	return block
 }
 
 // BedrockModel implements model.LLM for Amazon Bedrock using the Converse API.
@@ -151,7 +178,7 @@ func (m *BedrockModel) GenerateContent(ctx context.Context, req *model.LLMReques
 		var toolConfig *types.ToolConfiguration
 		nameMap := make(map[string]string)
 		if req.Config != nil && len(req.Config.Tools) > 0 {
-			tools, nm := convertGenaiToolsToBedrock(req.Config.Tools)
+			tools, nm := convertGenaiToolsToBedrock(req.Config.Tools, m.Config.PromptCaching, m.Config.CacheTTL)
 			nameMap = nm
 			if len(tools) > 0 {
 				toolConfig = &types.ToolConfiguration{
@@ -182,6 +209,16 @@ func (m *BedrockModel) GenerateContent(ctx context.Context, req *model.LLMReques
 				Value: systemInstruction,
 			})
 		}
+		// If prompt caching is enabled, mark the end of the system content
+		// as a cache breakpoint. Bedrock caches everything up to and including
+		// this point for ~5 minutes; subsequent requests with the same prefix
+		// hit the cache. Skipped for empty systems — caching nothing is a no-op
+		// that wastes a marker.
+		if m.Config.PromptCaching && len(systemPrompt) > 0 {
+			systemPrompt = append(systemPrompt, &types.SystemContentBlockMemberCachePoint{
+				Value: bedrockCachePointBlock(m.Config.CacheTTL),
+			})
+		}
 
 		additionalFields := m.buildAdditionalModelRequestFields()
 
@@ -654,7 +691,13 @@ func convertGenaiContentsToBedrockMessages(contents []*genai.Content, nameMap ma
 // It sanitizes tool names to satisfy Bedrock's [a-zA-Z0-9_-]+ constraint and
 // returns the original->sanitized name mapping so callers can apply it to
 // conversation history and reverse it when restoring names from responses.
-func convertGenaiToolsToBedrock(tools []*genai.Tool) ([]types.Tool, map[string]string) {
+//
+// When promptCaching is true, a CachePoint marker is appended after the
+// last tool spec — Bedrock then caches the entire (typically large) tool
+// definitions array, billing the prefix at a reduced rate on cache hits. The
+// cacheTTL argument selects the retention window for that marker (see
+// bedrockCachePointBlock).
+func convertGenaiToolsToBedrock(tools []*genai.Tool, promptCaching bool, cacheTTL string) ([]types.Tool, map[string]string) {
 	if len(tools) == 0 {
 		return nil, nil
 	}
@@ -711,6 +754,17 @@ func convertGenaiToolsToBedrock(tools []*genai.Tool) ([]types.Tool, map[string]s
 		}
 	}
 
+	// If prompt caching is enabled, append a CachePoint at the END of the
+	// tool list. Bedrock caches the entire tool definitions array up to
+	// this marker; this is usually the biggest single chunk of static
+	// prefix in an agent conversation and benefits most from caching.
+	// Skipped when there are no tools — a cache marker by itself is a no-op.
+	if promptCaching && len(bedrockTools) > 0 {
+		bedrockTools = append(bedrockTools, &types.ToolMemberCachePoint{
+			Value: bedrockCachePointBlock(cacheTTL),
+		})
+	}
+
 	return bedrockTools, nameMap
 }
 
 
@@ -198,7 +198,7 @@ func TestConvertGenaiToolsToBedrock(t *testing.T) {
 			},
 		}}}}
 
-		bt1, nm1 := convertGenaiToolsToBedrock(tools)
+		bt1, nm1 := convertGenaiToolsToBedrock(tools, false, "")
 		schema := extractSchema(t, bt1, nm1)
 
 		props := schema["properties"].(map[string]any)
@@ -226,7 +226,7 @@ func TestConvertGenaiToolsToBedrock(t *testing.T) {
 			},
 		}}}}
 
-		bt2, nm2 := convertGenaiToolsToBedrock(tools)
+		bt2, nm2 := convertGenaiToolsToBedrock(tools, false, "")
 		schema := extractSchema(t, bt2, nm2)
 		props, ok := schema["properties"].(map[string]any)
 		if !ok || len(props) == 0 {
@@ -247,7 +247,7 @@ func TestConvertGenaiToolsToBedrock(t *testing.T) {
 			ParametersJsonSchema: s,
 		}}}}
 
-		bt3, nm3 := convertGenaiToolsToBedrock(tools)
+		bt3, nm3 := convertGenaiToolsToBedrock(tools, false, "")
 		schema := extractSchema(t, bt3, nm3)
 		props, ok := schema["properties"].(map[string]any)
 		if !ok || len(props) == 0 {
@@ -402,7 +402,7 @@ func TestConvertGenaiToolsToBedrockSanitizesNames(t *testing.T) {
 		{Name: "filesystem:read_file", Description: "Read a file"},
 	}}}
 
-	bedrockTools, nameMap := convertGenaiToolsToBedrock(tools)
+	bedrockTools, nameMap := convertGenaiToolsToBedrock(tools, false, "")
 	if len(bedrockTools) != 2 {
 		t.Fatalf("expected 2 tools, got %d", len(bedrockTools))
 	}
@@ -672,3 +672,77 @@ func TestBuildInferenceConfig(t *testing.T) {
 		})
 	}
 }
+
+func TestConvertGenaiToolsToBedrockPromptCaching(t *testing.T) {
+	tools := []*genai.Tool{{FunctionDeclarations: []*genai.FunctionDeclaration{
+		{Name: "get_weather", Description: "lookup weather"},
+		{Name: "list_pods", Description: "list pods"},
+	}}}
+
+	t.Run("disabled: no cache marker appended", func(t *testing.T) {
+		out, _ := convertGenaiToolsToBedrock(tools, false, "")
+		if len(out) != 2 {
+			t.Fatalf("expected 2 tools, got %d", len(out))
+		}
+		for i, tool := range out {
+			if _, ok := tool.(*types.ToolMemberCachePoint); ok {
+				t.Fatalf("did not expect a CachePoint at index %d when caching disabled", i)
+			}
+		}
+	})
+
+	t.Run("enabled: cache marker appended at the END of the tool list", func(t *testing.T) {
+		out, _ := convertGenaiToolsToBedrock(tools, true, "")
+		if len(out) != 3 {
+			t.Fatalf("expected 3 entries (2 tools + 1 CachePoint), got %d", len(out))
+		}
+		// The first two must remain ToolSpec entries (order preserved).
+		for i := range 2 {
+			if _, ok := out[i].(*types.ToolMemberToolSpec); !ok {
+				t.Fatalf("entry %d: expected ToolMemberToolSpec, got %T", i, out[i])
+			}
+		}
+		// The trailing entry must be a CachePoint with type=default.
+		cp, ok := out[2].(*types.ToolMemberCachePoint)
+		if !ok {
+			t.Fatalf("trailing entry: expected ToolMemberCachePoint, got %T", out[2])
+		}
+		if cp.Value.Type != types.CachePointTypeDefault {
+			t.Errorf("expected CachePointType=default, got %v", cp.Value.Type)
+		}
+		// Default (empty) TTL must leave Ttl unset so Bedrock applies its
+		// standard 5-minute cache (broadest model support).
+		if cp.Value.Ttl != "" {
+			t.Errorf("expected unset Ttl for default cache, got %q", cp.Value.Ttl)
+		}
+	})
+
+	t.Run(`cacheTTL "5m": Ttl left unset (default 5-minute cache)`, func(t *testing.T) {
+		out, _ := convertGenaiToolsToBedrock(tools, true, "5m")
+		cp, ok := out[len(out)-1].(*types.ToolMemberCachePoint)
+		if !ok {
+			t.Fatalf("trailing entry: expected ToolMemberCachePoint, got %T", out[len(out)-1])
+		}
+		if cp.Value.Ttl != "" {
+			t.Errorf("expected unset Ttl for 5m, got %q", cp.Value.Ttl)
+		}
+	})
+
+	t.Run(`cacheTTL "1h": Ttl set to extended-TTL caching`, func(t *testing.T) {
+		out, _ := convertGenaiToolsToBedrock(tools, true, "1h")
+		cp, ok := out[len(out)-1].(*types.ToolMemberCachePoint)
+		if !ok {
+			t.Fatalf("trailing entry: expected ToolMemberCachePoint, got %T", out[len(out)-1])
+		}
+		if cp.Value.Ttl != types.CacheTTLOneHour {
+			t.Errorf("expected Ttl=%q, got %q", types.CacheTTLOneHour, cp.Value.Ttl)
+		}
+	})
+
+	t.Run("enabled but no tools: no cache marker (skipped)", func(t *testing.T) {
+		out, _ := convertGenaiToolsToBedrock(nil, true, "")
+		if len(out) != 0 {
+			t.Fatalf("expected empty slice for no tools, got %d entries", len(out))
+		}
+	})
+}
@@ -251,6 +251,15 @@ type Bedrock struct {
 	// additionalModelRequestFields in the Converse API. Use this for provider-specific
 	// options outside the standard InferenceConfiguration block.
 	AdditionalModelRequestFields map[string]any `json:"additional_model_request_fields,omitempty"`
+	// PromptCaching enables Bedrock prompt caching by appending a CachePoint
+	// block to the end of the system content array and the end of the
+	// toolConfig.tools array in the Converse request. See the
+	// v1alpha2.BedrockConfig CRD doc for context.
+	PromptCaching bool `json:"prompt_caching,omitempty"`
+	// CacheTTL selects the cache retention window when PromptCaching is on:
+	// "5m" (default) or "1h". See the v1alpha2.BedrockConfig CRD doc for the
+	// cost/compatibility trade-offs of "1h".
+	CacheTTL string `json:"cache_ttl,omitempty"`
 }
 
 func (b *Bedrock) MarshalJSON() ([]byte, error) {
 
@@ -483,6 +483,44 @@ spec:
                       Claude extended thinking or top_k. Values are forwarded as-is to the API.
                       Example: {"top_k": 5, "thinking": {"type": "enabled", "budget_tokens": 16000}}
                     x-kubernetes-preserve-unknown-fields: true
+                  cacheTTL:
+                    default: 5m
+                    description: |-
+                      CacheTTL controls how long Bedrock retains a cached prefix when
+                      PromptCaching is enabled. Only meaningful when PromptCaching is true.
+
+                        - "5m" (default): Bedrock's standard 5-minute sliding cache. Each cache
+                          hit refreshes the window. Supported by all prompt-caching models.
+                        - "1h": extended-TTL caching, useful for tasks whose Converse calls are
+                          spaced more than 5 minutes apart.
+
+                      NOTE: "1h" is NOT strictly better than "5m". Extended-TTL cache writes are
+                      billed at a higher per-token rate than 5-minute writes, and 1h is supported
+                      on a narrower set of models. Only choose "1h" when calls are spaced far
+                      enough apart that a 5-minute cache would expire between them; otherwise the
+                      higher write cost is wasted. See the AWS prompt-caching docs above.
+                    enum:
+                    - 5m
+                    - 1h
+                    type: string
+                  promptCaching:
+                    default: false
+                    description: |-
+                      PromptCaching enables Bedrock prompt caching by appending a CachePoint
+                      block at the end of the Converse request's `system` content array and
+                      the end of the `toolConfig.tools` array. Bedrock will cache the prefix up to and
+                      including those cache points across requests in the same region for
+                      roughly 5 minutes after first use, billing the cached portion at a
+                      reduced rate on cache hits.
+
+                      Recommended for tool-using agents that make many Converse calls per
+                      task with a stable system prompt and tool set — the per-call input
+                      token count can drop by 70-90% on hit. Has no effect on models that
+                      don't support caching; the marker is ignored by Bedrock for those.
+
+                      See https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html
+                      for the current list of supported models and minimum prefix sizes.
+                    type: boolean
                   region:
                     description: AWS region where the Bedrock model is available (e.g.,
                       us-east-1, us-west-2)
 
@@ -256,6 +256,42 @@ type BedrockConfig struct {
 	// +optional
 	// +kubebuilder:pruning:PreserveUnknownFields
 	AdditionalModelRequestFields *apiextensionsv1.JSON `json:"additionalModelRequestFields,omitempty"`
+
+	// PromptCaching enables Bedrock prompt caching by appending a CachePoint
+	// block at the end of the Converse request's `system` content array and
+	// the end of the `toolConfig.tools` array. Bedrock will cache the prefix up to and
+	// including those cache points across requests in the same region for
+	// roughly 5 minutes after first use, billing the cached portion at a
+	// reduced rate on cache hits.
+	//
+	// Recommended for tool-using agents that make many Converse calls per
+	// task with a stable system prompt and tool set — the per-call input
+	// token count can drop by 70-90% on hit. Has no effect on models that
+	// don't support caching; the marker is ignored by Bedrock for those.
+	//
+	// See https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html
+	// for the current list of supported models and minimum prefix sizes.
+	// +optional
+	// +kubebuilder:default=false
+	PromptCaching bool `json:"promptCaching,omitempty"`
+
+	// CacheTTL controls how long Bedrock retains a cached prefix when
+	// PromptCaching is enabled. Only meaningful when PromptCaching is true.
+	//
+	//   - "5m" (default): Bedrock's standard 5-minute sliding cache. Each cache
+	//     hit refreshes the window. Supported by all prompt-caching models.
+	//   - "1h": extended-TTL caching, useful for tasks whose Converse calls are
+	//     spaced more than 5 minutes apart.
+	//
+	// NOTE: "1h" is NOT strictly better than "5m". Extended-TTL cache writes are
+	// billed at a higher per-token rate than 5-minute writes, and 1h is supported
+	// on a narrower set of models. Only choose "1h" when calls are spaced far
+	// enough apart that a 5-minute cache would expire between them; otherwise the
+	// higher write cost is wasted. See the AWS prompt-caching docs above.
+	// +optional
+	// +kubebuilder:validation:Enum="5m";"1h"
+	// +kubebuilder:default="5m"
+	CacheTTL string `json:"cacheTTL,omitempty"`
 }
 
 // SAPAICoreConfig contains SAP AI Core-specific configuration options.
 
@@ -806,6 +806,8 @@ func (a *adkApiTranslator) translateModel(ctx context.Context, namespace, modelC
 			},
 			Region:                       model.Spec.Bedrock.Region,
 			AdditionalModelRequestFields: additionalFields,
+			PromptCaching:                model.Spec.Bedrock.PromptCaching,
+			CacheTTL:                     model.Spec.Bedrock.CacheTTL,
 		}
 
 		// Populate TLS fields in BaseModel
Original file line number	Diff line number	Diff line change
`@@ -304,6 +304,8 @@ func CreateLLM(ctx context.Context, m adk.Model, log logr.Logger) (adkmodel.LLM,`
`304`	`304`	`Model: modelName,`
`305`	`305`	`Region: region,`
`306`	`306`	`AdditionalModelRequestFields: m.AdditionalModelRequestFields,`
	`307`	`+ PromptCaching: m.PromptCaching,`
	`308`	`+ CacheTTL: m.CacheTTL,`
`307`	`309`	`}`
`308`	`310`	`return models.NewBedrockModelWithLogger(ctx, cfg, log)`
`309`	`311`
Original file line number	Diff line number	Diff line change
`@@ -806,6 +806,8 @@ func (a *adkApiTranslator) translateModel(ctx context.Context, namespace, modelC`
`806`	`806`	`},`
`807`	`807`	`Region: model.Spec.Bedrock.Region,`
`808`	`808`	`AdditionalModelRequestFields: additionalFields,`
	`809`	`+ PromptCaching: model.Spec.Bedrock.PromptCaching,`
	`810`	`+ CacheTTL: model.Spec.Bedrock.CacheTTL,`
`809`	`811`	`}`
`810`	`812`
`811`	`813`	`// Populate TLS fields in BaseModel`