Skip to content

Commit d241a81

Browse files
authored
Merge branch 'main' into epd-sidecar
2 parents e06754f + ed55c9c commit d241a81

File tree

3 files changed

+10
-16
lines changed

3 files changed

+10
-16
lines changed

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ require (
88
github.com/google/uuid v1.6.0
99
github.com/hashicorp/golang-lru/v2 v2.0.7
1010
github.com/jellydator/ttlcache/v3 v3.4.0
11-
github.com/llm-d/llm-d-kv-cache v0.5.1-0.20260218092800-cbfd0b78c70b
11+
github.com/llm-d/llm-d-kv-cache v0.5.1-rc2
1212
github.com/onsi/ginkgo/v2 v2.28.1
1313
github.com/onsi/gomega v1.39.1
1414
github.com/openai/openai-go v1.12.0

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,8 @@ github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0
211211
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
212212
github.com/llm-d/llm-d-kv-cache v0.5.1-0.20260218092800-cbfd0b78c70b h1:246D1wR0JnYe6rz7E5RRmyHqM9L8a1S1qnXgnmo4GEg=
213213
github.com/llm-d/llm-d-kv-cache v0.5.1-0.20260218092800-cbfd0b78c70b/go.mod h1:/Llnuds5IN8xyc4y2ikyXeLR6XDdU5g2Ff2zwVB12Wc=
214+
github.com/llm-d/llm-d-kv-cache v0.5.1-rc2 h1:WQ0zd3nkgjQ5vxYHAExzTI2urdUim1I275Hn8dukcdA=
215+
github.com/llm-d/llm-d-kv-cache v0.5.1-rc2/go.mod h1:y78v3jkKXoSD5MKFnkuyNUZ0XliGfW6CwZjD4UClwOE=
214216
github.com/maruel/natural v1.1.1 h1:Hja7XhhmvEFhcByqDoHz9QZbkWey+COd9xWfCfn1ioo=
215217
github.com/maruel/natural v1.1.1/go.mod h1:v+Rfd79xlw1AgVBjbO0BEQmptqb5HvL/k9GRHB7ZKEg=
216218
github.com/mfridman/tparse v0.18.0 h1:wh6dzOKaIwkUGyKgOntDW4liXSo37qg5AXbIhkMV3vE=

pkg/plugins/scorer/precise_prefix_cache_test.go

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -234,20 +234,13 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
234234
})
235235
require.NoError(t, err)
236236

237-
// render the chat template
238-
renderReq := &preprocessing.ApplyChatTemplateRequest{
237+
// render the chat template and tokenize
238+
renderReq := &preprocessing.RenderChatRequest{
239239
Key: tokenizerCacheKey,
240-
Conversation: [][]preprocessing.Conversation{conversations},
240+
Conversation: conversations,
241241
ChatTemplate: req.ChatCompletions.ChatTemplate,
242242
}
243-
rendered, err := processor.ApplyChatTemplate(t.Context(), renderReq)
244-
require.NoError(t, err)
245-
246-
// tokenize rendered prompt
247-
testTokenizer, err := tokenization.NewCachedLocalTokenizer(t.Context(), model, localTokenizerConfig)
248-
require.NoError(t, err)
249-
250-
tokens, _, err := testTokenizer.Encode(rendered, model, false)
243+
tokens, _, err := processor.RenderChat(t.Context(), renderReq)
251244
require.NoError(t, err)
252245

253246
tokenProcessor := kvblock.NewChunkedTokenDatabase(kvblock.DefaultTokenProcessorConfig())
@@ -539,10 +532,9 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
539532

540533
kvcacheConfig, err := kvcache.NewDefaultConfig()
541534
kvcacheConfig.TokenizersPoolConfig = &tokenization.Config{
542-
ModelName: "test-model",
543-
WorkersCount: 1,
544-
MinPrefixOverlapRatio: 0.8,
545-
LocalTokenizerConfig: &localTokenizerConfig,
535+
ModelName: "test-model",
536+
WorkersCount: 1,
537+
LocalTokenizerConfig: &localTokenizerConfig,
546538
}
547539
require.NoError(t, err)
548540

0 commit comments

Comments
 (0)