Skip to content

Commit ed5a307

Browse files
committed
fix precise-prefix-cache
Signed-off-by: threcc <trecchiu@redhat.com>
1 parent b720300 commit ed5a307

File tree

2 files changed

+7
-13
lines changed

2 files changed

+7
-13
lines changed

tests/model_serving/model_server/llmd/llmd_configs/config_precise_prefix_cache.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ class PrecisePrefixCacheConfig(QwenHfConfig):
1616
model_name = "Qwen/Qwen2.5-7B-Instruct"
1717
replicas = 2
1818
block_size = 64
19-
hash_algo = "sha256_cbor"
19+
hash_algo = "sha256"
2020
hash_seed = "42"
2121
enable_auth = True
2222

@@ -59,19 +59,16 @@ def _scheduler_config(cls):
5959
{
6060
"type": "precise-prefix-cache-scorer",
6161
"parameters": {
62+
"tokenProcessorConfig": {
63+
"blockSize": cls.block_size,
64+
"hashSeed": cls.hash_seed,
65+
},
6266
"kvEventsConfig": {"zmqEndpoint": "tcp://*:5557", "topicFilter": "kv"},
6367
"indexerConfig": {
64-
"tokenProcessorConfig": {
65-
"blockSize": cls.block_size,
66-
"hashSeed": cls.hash_seed,
67-
},
6868
"kvBlockIndexConfig": {
6969
"enableMetrics": True,
7070
"metricsLoggingInterval": 60000000000,
7171
},
72-
"tokenizersPoolConfig": {
73-
"hf": {"tokenizersCacheDir": "/mnt/tokenizers"},
74-
},
7572
},
7673
},
7774
},
@@ -101,8 +98,6 @@ def _scheduler_container(cls):
10198
{"name": "metrics", "containerPort": 9090, "protocol": "TCP"},
10299
{"name": "zmq", "containerPort": 5557, "protocol": "TCP"},
103100
],
104-
"env": [{"name": "HF_HOME", "value": "/mnt/tokenizers"}],
105-
"volumeMounts": [{"name": "tokenizers", "mountPath": "/mnt/tokenizers", "readOnly": False}],
106101
"args": [
107102
"--v=4",
108103
"--pool-name",
@@ -133,7 +128,6 @@ def router_config(cls):
133128
return {
134129
"scheduler": {
135130
"template": {
136-
"volumes": [{"name": "tokenizers", "emptyDir": {}}],
137131
"containers": [cls._scheduler_container()],
138132
}
139133
},

tests/model_serving/model_server/llmd/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -342,10 +342,10 @@ def get_scheduler_decision_logs(
342342

343343
# Get all logs from the scheduler pod
344344
# Note: The router-scheduler container is the default/main container
345-
raw_logs = router_scheduler_pod.log()
345+
raw_logs = router_scheduler_pod.log(container="main")
346346

347347
# Target decision message
348-
target_decision_msg = "Selecting pods from candidates sorted by max score"
348+
target_decision_msg = "Selecting endpoints from candidates sorted by max score"
349349

350350
# Filtering logs
351351
filtered_logs = "\n".join(line for line in raw_logs.splitlines() if target_decision_msg in line)

0 commit comments

Comments
 (0)