Skip to content

Commit eb84bb9

Browse files
committed
update: revert back some config to keep using prefix-cache-scorer
- revert file renaming Signed-off-by: Wen Zhou <wenzhou@redhat.com>
1 parent 7aa5bc3 commit eb84bb9

File tree

5 files changed

+41
-36
lines changed

5 files changed

+41
-36
lines changed

deploy/config/epp-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
apiVersion: inference.networking.x-k8s.io/v1alpha1
44
kind: EndpointPickerConfig
55
plugins:
6-
- type: precise-prefix-cache-scorer
6+
- type: prefix-cache-scorer
77
- type: decode-filter
88
- type: max-score-picker
99
- type: single-profile-handler
@@ -12,5 +12,5 @@ schedulingProfiles:
1212
plugins:
1313
- pluginRef: decode-filter
1414
- pluginRef: max-score-picker
15-
- pluginRef: precise-prefix-cache-scorer
15+
- pluginRef: prefix-cache-scorer
1616
weight: 2

deploy/config/sim-epp-config.yaml

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,11 @@
33
apiVersion: inference.networking.x-k8s.io/v1alpha1
44
kind: EndpointPickerConfig
55
plugins:
6-
- type: precise-prefix-cache-scorer
6+
- type: prefix-cache-scorer
77
parameters:
8-
indexerConfig:
9-
tokenProcessorConfig:
10-
blockSize: 5
11-
kvBlockIndexConfig:
12-
maxPrefixBlocksToMatch: 256
8+
hashBlockSize: 5
9+
maxPrefixBlocksToMatch: 256
10+
lruCapacityPerServer: 31250
1311
- type: decode-filter
1412
- type: max-score-picker
1513
- type: single-profile-handler
@@ -18,5 +16,5 @@ schedulingProfiles:
1816
plugins:
1917
- pluginRef: decode-filter
2018
- pluginRef: max-score-picker
21-
- pluginRef: precise-prefix-cache-scorer
19+
- pluginRef: prefix-cache-scorer
2220
weight: 2

deploy/config/sim-pd-epp-config.yaml

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,11 @@ apiVersion: inference.networking.x-k8s.io/v1alpha1
44
kind: EndpointPickerConfig
55
plugins:
66
- type: prefill-header-handler
7-
- type: precise-prefix-cache-scorer
7+
- type: prefix-cache-scorer
88
parameters:
9-
indexerConfig:
10-
tokenProcessorConfig:
11-
blockSize: 5
12-
kvBlockIndexConfig:
13-
maxPrefixBlocksToMatch: 256
9+
hashBlockSize: 5
10+
maxPrefixBlocksToMatch: 256
11+
lruCapacityPerServer: 31250
1412
- type: prefill-filter
1513
- type: decode-filter
1614
- type: max-score-picker
@@ -24,11 +22,11 @@ schedulingProfiles:
2422
plugins:
2523
- pluginRef: prefill-filter
2624
- pluginRef: max-score-picker
27-
- pluginRef: precise-prefix-cache-scorer
25+
- pluginRef: prefix-cache-scorer
2826
weight: 2
2927
- name: decode
3028
plugins:
3129
- pluginRef: decode-filter
3230
- pluginRef: max-score-picker
33-
- pluginRef: precise-prefix-cache-scorer
31+
- pluginRef: prefix-cache-scorer
3432
weight: 2

scripts/kind-dev-env.sh

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -74,32 +74,41 @@ export VLLM_REPLICA_COUNT_D="${VLLM_REPLICA_COUNT_D:-2}"
7474
# Data Parallel size
7575
export VLLM_DATA_PARALLEL_SIZE="${VLLM_DATA_PARALLEL_SIZE:-1}"
7676

77-
PRIMARY_PORT="0"
78-
if [ "${PD_ENABLED}" != "\"true\"" ] && [ ${VLLM_DATA_PARALLEL_SIZE} -eq 1 ]; then
79-
if [ "${KV_CACHE_ENABLED}" != "true" ]; then
80-
DEFAULT_EPP_CONFIG="deploy/config/sim-epp-config.yaml"
81-
else
82-
DEFAULT_EPP_CONFIG="deploy/config/sim-epp-kvcache-config.yaml"
83-
fi
84-
else
85-
if [ "${KV_CACHE_ENABLED}" != "true" ]; then
86-
if [ "${PD_ENABLED}" == "\"true\"" ]; then
87-
DEFAULT_EPP_CONFIG="deploy/config/sim-pd-epp-config.yaml"
88-
if [ ${VLLM_DATA_PARALLEL_SIZE} -ne 1 ]; then
89-
PRIMARY_PORT="8000"
90-
fi
91-
else
92-
DEFAULT_EPP_CONFIG="deploy/config/sim-dp-epp-config.yaml"
93-
fi
94-
else
77+
# Validate configuration constraints
78+
if [ "${KV_CACHE_ENABLED}" == "true" ]; then
79+
# KV cache requires simple mode: no PD and DP size must be 1
80+
if [ "${PD_ENABLED}" == "\"true\"" ] || [ ${VLLM_DATA_PARALLEL_SIZE} -ne 1 ]; then
9581
echo "Invalid configuration: PD_ENABLED=true and KV_CACHE_ENABLED=true is not supported"
9682
exit 1
9783
fi
9884
fi
9985

100-
export EPP_CONFIG="${EPP_CONFIG:-${DEFAULT_EPP_CONFIG}}"
86+
# Set PRIMARY_PORT based on PD mode with data parallelism
87+
if [ "${PD_ENABLED}" == "\"true\"" ] && [ ${VLLM_DATA_PARALLEL_SIZE} -ne 1 ]; then
88+
PRIMARY_PORT="8000"
89+
else
90+
PRIMARY_PORT="0"
91+
fi
10192
export PRIMARY_PORT
10293

94+
# Determine EPP config file based on feature flags
95+
if [ "${KV_CACHE_ENABLED}" == "true" ]; then
96+
# KV cache mode (simple mode only)
97+
DEFAULT_EPP_CONFIG="deploy/config/sim-epp-kvcache-config.yaml"
98+
elif [ "${PD_ENABLED}" == "\"true\"" ]; then
99+
# Prefill-Decode mode
100+
DEFAULT_EPP_CONFIG="deploy/config/sim-pd-epp-config.yaml"
101+
elif [ ${VLLM_DATA_PARALLEL_SIZE} -ne 1 ]; then
102+
# Data Parallel mode (only needed for Istio pre-1.28.1)
103+
# Not really called in kind(docker.io/istio/pilot:1.28.1) by "make env-dev-kind"
104+
DEFAULT_EPP_CONFIG="deploy/config/dp-epp-config.yaml"
105+
else
106+
# Simple mode
107+
DEFAULT_EPP_CONFIG="deploy/config/sim-epp-config.yaml"
108+
fi
109+
110+
export EPP_CONFIG="${EPP_CONFIG:-${DEFAULT_EPP_CONFIG}}"
111+
103112
# ------------------------------------------------------------------------------
104113
# Setup & Requirement Checks
105114
# ------------------------------------------------------------------------------

0 commit comments

Comments
 (0)