Skip to content

Commit 91a8393

Browse files
committed
fix: config should use new precise-prefix-cache-score
- we have rename prefix-cache-score to precise-prefix-cache-score in 0.3.0, configs need migrate from the old one to the new one with spec. - rename plugin name - remove parameters.autoTune and parameters.mode: cache_tracking and lruCapacityPerServer - move hashBlockSize, maxPrefixBlocksToMatch under indexrConfig - for config using food-review keep old prefix-cache-scorer Signed-off-by: Wen Zhou <wenzhou@redhat.com>
1 parent 4884fc2 commit 91a8393

File tree

6 files changed

+45
-37
lines changed

6 files changed

+45
-37
lines changed

deploy/config/epp-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
apiVersion: inference.networking.x-k8s.io/v1alpha1
44
kind: EndpointPickerConfig
55
plugins:
6-
- type: prefix-cache-scorer
6+
- type: precise-prefix-cache-scorer
77
- type: decode-filter
88
- type: max-score-picker
99
- type: single-profile-handler
@@ -12,5 +12,5 @@ schedulingProfiles:
1212
plugins:
1313
- pluginRef: decode-filter
1414
- pluginRef: max-score-picker
15-
- pluginRef: prefix-cache-scorer
15+
- pluginRef: precise-prefix-cache-scorer
1616
weight: 2

deploy/config/pd-epp-config.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ apiVersion: inference.networking.x-k8s.io/v1alpha1
33
kind: EndpointPickerConfig
44
plugins:
55
- type: prefill-header-handler
6-
- type: prefix-cache-scorer
6+
- type: precise-prefix-cache-scorer
77
- type: prefill-filter
88
- type: decode-filter
99
- type: max-score-picker
@@ -13,11 +13,11 @@ schedulingProfiles:
1313
plugins:
1414
- pluginRef: prefill-filter
1515
- pluginRef: max-score-picker
16-
- pluginRef: prefix-cache-scorer
16+
- pluginRef: precise-prefix-cache-scorer
1717
weight: 2
1818
- name: decode
1919
plugins:
2020
- pluginRef: decode-filter
2121
- pluginRef: max-score-picker
22-
- pluginRef: prefix-cache-scorer
22+
- pluginRef: precise-prefix-cache-scorer
2323
weight: 2

deploy/config/sim-epp-kvcache-config.yaml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,13 @@
33
apiVersion: inference.networking.x-k8s.io/v1alpha1
44
kind: EndpointPickerConfig
55
plugins:
6-
- type: prefix-cache-scorer
6+
- type: precise-prefix-cache-scorer
77
parameters:
8-
mode: cache_tracking
98
kvEventsConfig:
109
zmqEndpoint: tcp://0.0.0.0:5557
1110
indexerConfig:
1211
prefixStoreConfig:
13-
blockSize: 16
12+
blockSize: 16
1413
tokenProcessorConfig:
1514
blockSize: 16 # must match vLLM block size if not default (16)
1615
hashSeed: "42" # must match PYTHONHASHSEED in vLLM pods
@@ -28,5 +27,5 @@ schedulingProfiles:
2827
plugins:
2928
- pluginRef: decode-filter
3029
- pluginRef: max-score-picker
31-
- pluginRef: prefix-cache-scorer
30+
- pluginRef: precise-prefix-cache-scorer
3231
weight: 10

deploy/config/sim-epp-no-hit-lru.yaml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,13 @@
33
apiVersion: inference.networking.x-k8s.io/v1alpha1
44
kind: EndpointPickerConfig
55
plugins:
6-
- type: prefix-cache-scorer
6+
- type: precise-prefix-cache-scorer
77
parameters:
8-
hashBlockSize: 5
9-
maxPrefixBlocksToMatch: 256
10-
lruCapacityPerServer: 31250
8+
indexerConfig:
9+
tokenProcessorConfig:
10+
blockSize: 5
11+
kvBlockIndexConfig:
12+
maxPrefixBlocksToMatch: 256
1113
- type: no-hit-lru-scorer
1214
parameters:
1315
lruSize: 2048
@@ -19,7 +21,7 @@ schedulingProfiles:
1921
plugins:
2022
- pluginRef: decode-filter
2123
- pluginRef: max-score-picker
22-
- pluginRef: prefix-cache-scorer
24+
- pluginRef: precise-prefix-cache-scorer
2325
weight: 2
2426
- pluginRef: no-hit-lru-scorer
2527
weight: 1

docs/architecture.md

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -161,11 +161,13 @@ A complete configuration might look like this:
161161
apiVersion: inference.networking.x-k8s.io/v1alpha1
162162
kind: EndpointPickerConfig
163163
plugins:
164-
- type: prefix-cache-scorer
164+
- type: precise-prefix-cache-scorer
165165
parameters:
166-
hashBlockSize: 5
167-
maxPrefixBlocksToMatch: 256
168-
lruCapacityPerServer: 31250
166+
indexerConfig:
167+
tokenProcessorConfig:
168+
blockSize: 5
169+
kvBlockIndexConfig:
170+
maxPrefixBlocksToMatch: 256
169171
- type: decode-filter
170172
- type: max-score-picker
171173
- type: single-profile-handler
@@ -174,7 +176,7 @@ schedulingProfiles:
174176
plugins:
175177
- pluginRef: decode-filter
176178
- pluginRef: max-score-picker
177-
- pluginRef: prefix-cache-scorer
179+
- pluginRef: precise-prefix-cache-scorer
178180
weight: 50
179181
```
180182
@@ -434,11 +436,13 @@ Example configuration:
434436

435437
```yaml
436438
plugins:
437-
- type: prefix-cache-scorer
439+
- type: precise-prefix-cache-scorer
438440
parameters:
439-
hashBlockSize: 5
440-
maxPrefixBlocksToMatch: 256
441-
lruCapacityPerServer: 31250
441+
indexerConfig:
442+
tokenProcessorConfig:
443+
blockSize: 5
444+
kvBlockIndexConfig:
445+
maxPrefixBlocksToMatch: 256
442446
- type: no-hit-lru-scorer
443447
parameters:
444448
lruSize: 2048
@@ -450,7 +454,7 @@ schedulingProfiles:
450454
plugins:
451455
- pluginRef: decode-filter
452456
- pluginRef: max-score-picker
453-
- pluginRef: prefix-cache-scorer
457+
- pluginRef: precise-prefix-cache-scorer
454458
weight: 2
455459
- pluginRef: no-hit-lru-scorer
456460
weight: 1
@@ -471,11 +475,13 @@ apiVersion: inference.networking.x-k8s.io/v1alpha1
471475
kind: EndpointPickerConfig
472476
plugins:
473477
- type: prefill-header-handler
474-
- type: prefix-cache-scorer
478+
- type: precise-prefix-cache-scorer
475479
parameters:
476-
hashBlockSize: 5
477-
maxPrefixBlocksToMatch: 256
478-
lruCapacityPerServer: 31250
480+
indexerConfig:
481+
tokenProcessorConfig:
482+
blockSize: 5
483+
kvBlockIndexConfig:
484+
maxPrefixBlocksToMatch: 256
479485
- type: prefill-filter
480486
- type: decode-filter
481487
- type: max-score-picker
@@ -488,13 +494,13 @@ schedulingProfiles:
488494
plugins:
489495
- pluginRef: prefill-filter
490496
- pluginRef: max-score-picker
491-
- pluginRef: prefix-cache-scorer
497+
- pluginRef: precise-prefix-cache-scorer
492498
weight: 50
493499
- name: decode
494500
plugins:
495501
- pluginRef: decode-filter
496502
- pluginRef: max-score-picker
497-
- pluginRef: prefix-cache-scorer
503+
- pluginRef: precise-prefix-cache-scorer
498504
weight: 50
499505
```
500506

docs/disagg_pd.md

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -168,12 +168,13 @@ plugins:
168168
parameters:
169169
label: "role"
170170
validValues: ["decode"]
171-
- type: prefix-cache-scorer
171+
- type: precise-prefix-cache-scorer
172172
parameters:
173-
autoTune: false
174-
blockSize: 5
175-
maxPrefixBlocksToMatch: 256
176-
lruCapacityPerServer: 31250
173+
indexerConfig:
174+
tokenProcessorConfig:
175+
blockSize: 5
176+
kvBlockIndexConfig:
177+
maxPrefixBlocksToMatch: 256
177178
- type: max-score-picker
178179
- type: prefill-header-handler
179180
- type: pd-profile-handler
@@ -186,12 +187,12 @@ schedulingProfiles:
186187
plugins:
187188
- pluginRef: "prefill-pods"
188189
- pluginRef: "max-score-picker"
189-
- pluginRef: "prefix-cache-scorer"
190+
- pluginRef: "precise-prefix-cache-scorer"
190191
- name: decode
191192
plugins:
192193
- pluginRef: "decode-pods"
193194
- pluginRef: "max-score-picker"
194-
- pluginRef: "prefix-cache-scorer"
195+
- pluginRef: "precise-prefix-cache-scorer"
195196
```
196197

197198
---

0 commit comments

Comments
 (0)