forked from opendatahub-io/opendatahub-tests
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconstants.py
More file actions
44 lines (42 loc) · 1.27 KB
/
constants.py
File metadata and controls
44 lines (42 loc) · 1.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# Liveness probe for single-node configurations
LLMD_LIVENESS_PROBE = {
"httpGet": {"path": "/health", "port": 8000, "scheme": "HTTPS"},
"initialDelaySeconds": 120,
"periodSeconds": 30,
"timeoutSeconds": 30,
"failureThreshold": 5,
}
# Common parameters for vLLM and llm-d scheduler
PREFIX_CACHE_BLOCK_SIZE = 64
PREFIX_CACHE_HASH_ALGO = "sha256"
PREFIX_CACHE_HASH_SEED = "42"
# Scheduler configuration for single-node with estimated prefix cache
ROUTER_SCHEDULER_CONFIG_ESTIMATED_PREFIX_CACHE = {
"apiVersion": "inference.networking.x-k8s.io/v1alpha1",
"kind": "EndpointPickerConfig",
"plugins": [
{
"type": "prefix-cache-scorer",
"parameters": {
"indexerConfig": {
"tokenProcessorConfig": {
"blockSize": PREFIX_CACHE_BLOCK_SIZE,
"hashAlgo": PREFIX_CACHE_HASH_ALGO,
"hashSeed": PREFIX_CACHE_HASH_SEED,
}
}
},
}
],
"schedulingProfiles": [
{
"name": "default",
"plugins": [
{
"pluginRef": "prefix-cache-scorer",
"weight": 5.0,
}
],
}
],
}