Skip to content

Commit c91089d

Browse files
authored
Merge branch 'main' into fix/skills-privileged-pss
2 parents 8a35f7f + b9aae28 commit c91089d

15 files changed

Lines changed: 1207 additions & 305 deletions

File tree

go/adk/pkg/agent/agent.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,8 @@ func CreateLLM(ctx context.Context, m adk.Model, log logr.Logger) (adkmodel.LLM,
304304
Model: modelName,
305305
Region: region,
306306
AdditionalModelRequestFields: m.AdditionalModelRequestFields,
307+
PromptCaching: m.PromptCaching,
308+
CacheTTL: m.CacheTTL,
307309
}
308310
return models.NewBedrockModelWithLogger(ctx, cfg, log)
309311

go/adk/pkg/models/bedrock.go

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,33 @@ type BedrockConfig struct {
7777
Temperature *float64
7878
TopP *float64
7979
AdditionalModelRequestFields map[string]any
80+
// PromptCaching, when true, appends a default CachePoint block at the
81+
// end of the Converse request's system content array and the end of
82+
// the toolConfig.tools array. Bedrock caches up to and including those markers
83+
// across requests in the same region; cached prefix is billed at a
84+
// reduced rate. The marker is silently ignored by Bedrock for models
85+
// that do not support prompt caching.
86+
PromptCaching bool
87+
// CacheTTL selects the cache retention window when PromptCaching is on.
88+
// "" or "5m" uses Bedrock's default 5-minute cache (broadest model
89+
// support); "1h" opts into extended-TTL caching. See bedrockCachePointBlock.
90+
CacheTTL string
91+
}
92+
93+
// bedrockCachePointBlock builds a Converse CachePoint marker honoring the
94+
// configured cache TTL.
95+
//
96+
// An empty or "5m" ttl leaves the SDK Ttl field unset: Bedrock then applies its
97+
// standard 5-minute sliding cache, which is supported by every prompt-caching
98+
// model. Only "1h" sets the Ttl explicitly, opting into extended-TTL caching —
99+
// supported on fewer models and billed at a higher cache-write rate, so it is
100+
// not a free upgrade over 5m.
101+
func bedrockCachePointBlock(cacheTTL string) types.CachePointBlock {
102+
block := types.CachePointBlock{Type: types.CachePointTypeDefault}
103+
if cacheTTL == string(types.CacheTTLOneHour) {
104+
block.Ttl = types.CacheTTLOneHour
105+
}
106+
return block
80107
}
81108

82109
// BedrockModel implements model.LLM for Amazon Bedrock using the Converse API.
@@ -151,7 +178,7 @@ func (m *BedrockModel) GenerateContent(ctx context.Context, req *model.LLMReques
151178
var toolConfig *types.ToolConfiguration
152179
nameMap := make(map[string]string)
153180
if req.Config != nil && len(req.Config.Tools) > 0 {
154-
tools, nm := convertGenaiToolsToBedrock(req.Config.Tools)
181+
tools, nm := convertGenaiToolsToBedrock(req.Config.Tools, m.Config.PromptCaching, m.Config.CacheTTL)
155182
nameMap = nm
156183
if len(tools) > 0 {
157184
toolConfig = &types.ToolConfiguration{
@@ -182,6 +209,16 @@ func (m *BedrockModel) GenerateContent(ctx context.Context, req *model.LLMReques
182209
Value: systemInstruction,
183210
})
184211
}
212+
// If prompt caching is enabled, mark the end of the system content
213+
// as a cache breakpoint. Bedrock caches everything up to and including
214+
// this point for ~5 minutes; subsequent requests with the same prefix
215+
// hit the cache. Skipped for empty systems — caching nothing is a no-op
216+
// that wastes a marker.
217+
if m.Config.PromptCaching && len(systemPrompt) > 0 {
218+
systemPrompt = append(systemPrompt, &types.SystemContentBlockMemberCachePoint{
219+
Value: bedrockCachePointBlock(m.Config.CacheTTL),
220+
})
221+
}
185222

186223
additionalFields := m.buildAdditionalModelRequestFields()
187224

@@ -654,7 +691,13 @@ func convertGenaiContentsToBedrockMessages(contents []*genai.Content, nameMap ma
654691
// It sanitizes tool names to satisfy Bedrock's [a-zA-Z0-9_-]+ constraint and
655692
// returns the original->sanitized name mapping so callers can apply it to
656693
// conversation history and reverse it when restoring names from responses.
657-
func convertGenaiToolsToBedrock(tools []*genai.Tool) ([]types.Tool, map[string]string) {
694+
//
695+
// When promptCaching is true, a CachePoint marker is appended after the
696+
// last tool spec — Bedrock then caches the entire (typically large) tool
697+
// definitions array, billing the prefix at a reduced rate on cache hits. The
698+
// cacheTTL argument selects the retention window for that marker (see
699+
// bedrockCachePointBlock).
700+
func convertGenaiToolsToBedrock(tools []*genai.Tool, promptCaching bool, cacheTTL string) ([]types.Tool, map[string]string) {
658701
if len(tools) == 0 {
659702
return nil, nil
660703
}
@@ -711,6 +754,17 @@ func convertGenaiToolsToBedrock(tools []*genai.Tool) ([]types.Tool, map[string]s
711754
}
712755
}
713756

757+
// If prompt caching is enabled, append a CachePoint at the END of the
758+
// tool list. Bedrock caches the entire tool definitions array up to
759+
// this marker; this is usually the biggest single chunk of static
760+
// prefix in an agent conversation and benefits most from caching.
761+
// Skipped when there are no tools — a cache marker by itself is a no-op.
762+
if promptCaching && len(bedrockTools) > 0 {
763+
bedrockTools = append(bedrockTools, &types.ToolMemberCachePoint{
764+
Value: bedrockCachePointBlock(cacheTTL),
765+
})
766+
}
767+
714768
return bedrockTools, nameMap
715769
}
716770

go/adk/pkg/models/bedrock_test.go

Lines changed: 78 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ func TestConvertGenaiToolsToBedrock(t *testing.T) {
198198
},
199199
}}}}
200200

201-
bt1, nm1 := convertGenaiToolsToBedrock(tools)
201+
bt1, nm1 := convertGenaiToolsToBedrock(tools, false, "")
202202
schema := extractSchema(t, bt1, nm1)
203203

204204
props := schema["properties"].(map[string]any)
@@ -226,7 +226,7 @@ func TestConvertGenaiToolsToBedrock(t *testing.T) {
226226
},
227227
}}}}
228228

229-
bt2, nm2 := convertGenaiToolsToBedrock(tools)
229+
bt2, nm2 := convertGenaiToolsToBedrock(tools, false, "")
230230
schema := extractSchema(t, bt2, nm2)
231231
props, ok := schema["properties"].(map[string]any)
232232
if !ok || len(props) == 0 {
@@ -247,7 +247,7 @@ func TestConvertGenaiToolsToBedrock(t *testing.T) {
247247
ParametersJsonSchema: s,
248248
}}}}
249249

250-
bt3, nm3 := convertGenaiToolsToBedrock(tools)
250+
bt3, nm3 := convertGenaiToolsToBedrock(tools, false, "")
251251
schema := extractSchema(t, bt3, nm3)
252252
props, ok := schema["properties"].(map[string]any)
253253
if !ok || len(props) == 0 {
@@ -402,7 +402,7 @@ func TestConvertGenaiToolsToBedrockSanitizesNames(t *testing.T) {
402402
{Name: "filesystem:read_file", Description: "Read a file"},
403403
}}}
404404

405-
bedrockTools, nameMap := convertGenaiToolsToBedrock(tools)
405+
bedrockTools, nameMap := convertGenaiToolsToBedrock(tools, false, "")
406406
if len(bedrockTools) != 2 {
407407
t.Fatalf("expected 2 tools, got %d", len(bedrockTools))
408408
}
@@ -672,3 +672,77 @@ func TestBuildInferenceConfig(t *testing.T) {
672672
})
673673
}
674674
}
675+
676+
func TestConvertGenaiToolsToBedrockPromptCaching(t *testing.T) {
677+
tools := []*genai.Tool{{FunctionDeclarations: []*genai.FunctionDeclaration{
678+
{Name: "get_weather", Description: "lookup weather"},
679+
{Name: "list_pods", Description: "list pods"},
680+
}}}
681+
682+
t.Run("disabled: no cache marker appended", func(t *testing.T) {
683+
out, _ := convertGenaiToolsToBedrock(tools, false, "")
684+
if len(out) != 2 {
685+
t.Fatalf("expected 2 tools, got %d", len(out))
686+
}
687+
for i, tool := range out {
688+
if _, ok := tool.(*types.ToolMemberCachePoint); ok {
689+
t.Fatalf("did not expect a CachePoint at index %d when caching disabled", i)
690+
}
691+
}
692+
})
693+
694+
t.Run("enabled: cache marker appended at the END of the tool list", func(t *testing.T) {
695+
out, _ := convertGenaiToolsToBedrock(tools, true, "")
696+
if len(out) != 3 {
697+
t.Fatalf("expected 3 entries (2 tools + 1 CachePoint), got %d", len(out))
698+
}
699+
// The first two must remain ToolSpec entries (order preserved).
700+
for i := range 2 {
701+
if _, ok := out[i].(*types.ToolMemberToolSpec); !ok {
702+
t.Fatalf("entry %d: expected ToolMemberToolSpec, got %T", i, out[i])
703+
}
704+
}
705+
// The trailing entry must be a CachePoint with type=default.
706+
cp, ok := out[2].(*types.ToolMemberCachePoint)
707+
if !ok {
708+
t.Fatalf("trailing entry: expected ToolMemberCachePoint, got %T", out[2])
709+
}
710+
if cp.Value.Type != types.CachePointTypeDefault {
711+
t.Errorf("expected CachePointType=default, got %v", cp.Value.Type)
712+
}
713+
// Default (empty) TTL must leave Ttl unset so Bedrock applies its
714+
// standard 5-minute cache (broadest model support).
715+
if cp.Value.Ttl != "" {
716+
t.Errorf("expected unset Ttl for default cache, got %q", cp.Value.Ttl)
717+
}
718+
})
719+
720+
t.Run(`cacheTTL "5m": Ttl left unset (default 5-minute cache)`, func(t *testing.T) {
721+
out, _ := convertGenaiToolsToBedrock(tools, true, "5m")
722+
cp, ok := out[len(out)-1].(*types.ToolMemberCachePoint)
723+
if !ok {
724+
t.Fatalf("trailing entry: expected ToolMemberCachePoint, got %T", out[len(out)-1])
725+
}
726+
if cp.Value.Ttl != "" {
727+
t.Errorf("expected unset Ttl for 5m, got %q", cp.Value.Ttl)
728+
}
729+
})
730+
731+
t.Run(`cacheTTL "1h": Ttl set to extended-TTL caching`, func(t *testing.T) {
732+
out, _ := convertGenaiToolsToBedrock(tools, true, "1h")
733+
cp, ok := out[len(out)-1].(*types.ToolMemberCachePoint)
734+
if !ok {
735+
t.Fatalf("trailing entry: expected ToolMemberCachePoint, got %T", out[len(out)-1])
736+
}
737+
if cp.Value.Ttl != types.CacheTTLOneHour {
738+
t.Errorf("expected Ttl=%q, got %q", types.CacheTTLOneHour, cp.Value.Ttl)
739+
}
740+
})
741+
742+
t.Run("enabled but no tools: no cache marker (skipped)", func(t *testing.T) {
743+
out, _ := convertGenaiToolsToBedrock(nil, true, "")
744+
if len(out) != 0 {
745+
t.Fatalf("expected empty slice for no tools, got %d entries", len(out))
746+
}
747+
})
748+
}

go/api/adk/types.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,15 @@ type Bedrock struct {
251251
// additionalModelRequestFields in the Converse API. Use this for provider-specific
252252
// options outside the standard InferenceConfiguration block.
253253
AdditionalModelRequestFields map[string]any `json:"additional_model_request_fields,omitempty"`
254+
// PromptCaching enables Bedrock prompt caching by appending a CachePoint
255+
// block to the end of the system content array and the end of the
256+
// toolConfig.tools array in the Converse request. See the
257+
// v1alpha2.BedrockConfig CRD doc for context.
258+
PromptCaching bool `json:"prompt_caching,omitempty"`
259+
// CacheTTL selects the cache retention window when PromptCaching is on:
260+
// "5m" (default) or "1h". See the v1alpha2.BedrockConfig CRD doc for the
261+
// cost/compatibility trade-offs of "1h".
262+
CacheTTL string `json:"cache_ttl,omitempty"`
254263
}
255264

256265
func (b *Bedrock) MarshalJSON() ([]byte, error) {

go/api/config/crd/bases/kagent.dev_modelconfigs.yaml

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,44 @@ spec:
483483
Claude extended thinking or top_k. Values are forwarded as-is to the API.
484484
Example: {"top_k": 5, "thinking": {"type": "enabled", "budget_tokens": 16000}}
485485
x-kubernetes-preserve-unknown-fields: true
486+
cacheTTL:
487+
default: 5m
488+
description: |-
489+
CacheTTL controls how long Bedrock retains a cached prefix when
490+
PromptCaching is enabled. Only meaningful when PromptCaching is true.
491+
492+
- "5m" (default): Bedrock's standard 5-minute sliding cache. Each cache
493+
hit refreshes the window. Supported by all prompt-caching models.
494+
- "1h": extended-TTL caching, useful for tasks whose Converse calls are
495+
spaced more than 5 minutes apart.
496+
497+
NOTE: "1h" is NOT strictly better than "5m". Extended-TTL cache writes are
498+
billed at a higher per-token rate than 5-minute writes, and 1h is supported
499+
on a narrower set of models. Only choose "1h" when calls are spaced far
500+
enough apart that a 5-minute cache would expire between them; otherwise the
501+
higher write cost is wasted. See the AWS prompt-caching docs above.
502+
enum:
503+
- 5m
504+
- 1h
505+
type: string
506+
promptCaching:
507+
default: false
508+
description: |-
509+
PromptCaching enables Bedrock prompt caching by appending a CachePoint
510+
block at the end of the Converse request's `system` content array and
511+
the end of the `toolConfig.tools` array. Bedrock will cache the prefix up to and
512+
including those cache points across requests in the same region for
513+
roughly 5 minutes after first use, billing the cached portion at a
514+
reduced rate on cache hits.
515+
516+
Recommended for tool-using agents that make many Converse calls per
517+
task with a stable system prompt and tool set — the per-call input
518+
token count can drop by 70-90% on hit. Has no effect on models that
519+
don't support caching; the marker is ignored by Bedrock for those.
520+
521+
See https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html
522+
for the current list of supported models and minimum prefix sizes.
523+
type: boolean
486524
region:
487525
description: AWS region where the Bedrock model is available (e.g.,
488526
us-east-1, us-west-2)

go/api/v1alpha2/modelconfig_types.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,42 @@ type BedrockConfig struct {
256256
// +optional
257257
// +kubebuilder:pruning:PreserveUnknownFields
258258
AdditionalModelRequestFields *apiextensionsv1.JSON `json:"additionalModelRequestFields,omitempty"`
259+
260+
// PromptCaching enables Bedrock prompt caching by appending a CachePoint
261+
// block at the end of the Converse request's `system` content array and
262+
// the end of the `toolConfig.tools` array. Bedrock will cache the prefix up to and
263+
// including those cache points across requests in the same region for
264+
// roughly 5 minutes after first use, billing the cached portion at a
265+
// reduced rate on cache hits.
266+
//
267+
// Recommended for tool-using agents that make many Converse calls per
268+
// task with a stable system prompt and tool set — the per-call input
269+
// token count can drop by 70-90% on hit. Has no effect on models that
270+
// don't support caching; the marker is ignored by Bedrock for those.
271+
//
272+
// See https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html
273+
// for the current list of supported models and minimum prefix sizes.
274+
// +optional
275+
// +kubebuilder:default=false
276+
PromptCaching bool `json:"promptCaching,omitempty"`
277+
278+
// CacheTTL controls how long Bedrock retains a cached prefix when
279+
// PromptCaching is enabled. Only meaningful when PromptCaching is true.
280+
//
281+
// - "5m" (default): Bedrock's standard 5-minute sliding cache. Each cache
282+
// hit refreshes the window. Supported by all prompt-caching models.
283+
// - "1h": extended-TTL caching, useful for tasks whose Converse calls are
284+
// spaced more than 5 minutes apart.
285+
//
286+
// NOTE: "1h" is NOT strictly better than "5m". Extended-TTL cache writes are
287+
// billed at a higher per-token rate than 5-minute writes, and 1h is supported
288+
// on a narrower set of models. Only choose "1h" when calls are spaced far
289+
// enough apart that a 5-minute cache would expire between them; otherwise the
290+
// higher write cost is wasted. See the AWS prompt-caching docs above.
291+
// +optional
292+
// +kubebuilder:validation:Enum="5m";"1h"
293+
// +kubebuilder:default="5m"
294+
CacheTTL string `json:"cacheTTL,omitempty"`
259295
}
260296

261297
// SAPAICoreConfig contains SAP AI Core-specific configuration options.

go/core/internal/controller/translator/agent/adk_api_translator.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -806,6 +806,8 @@ func (a *adkApiTranslator) translateModel(ctx context.Context, namespace, modelC
806806
},
807807
Region: model.Spec.Bedrock.Region,
808808
AdditionalModelRequestFields: additionalFields,
809+
PromptCaching: model.Spec.Bedrock.PromptCaching,
810+
CacheTTL: model.Spec.Bedrock.CacheTTL,
809811
}
810812

811813
// Populate TLS fields in BaseModel

0 commit comments

Comments
 (0)