Skip to content

Commit d046d75

Browse files
committed
read configuration of prefix aware scorer from environment variables
Signed-off-by: Maya Barnea <mayab@il.ibm.com>
1 parent 60dd83d commit d046d75

File tree

3 files changed

+27
-19
lines changed

3 files changed

+27
-19
lines changed

docs/architecture.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ These components are maintained in the `llm-d-inference-scheduler` repository an
8181
| Scorer | Description | Env Vars |
8282
|------------------|--------------------------------------------|----------|
8383
| Session-aware | Prefers pods from same session | `ENABLE_SESSION_AWARE_SCORER`, `SESSION_AWARE_SCORER_WEIGHT`, `PREFILL_ENABLE_SESSION_AWARE_SCORER`, `PREFILL_SESSION_AWARE_SCORER_WEIGHT` |
84-
| Prefix-aware | Matches prompt prefix | `ENABLE_PREFIX_AWARE_SCORER`, `PREFIX_AWARE_SCORER_WEIGHT`, `PREFILL_ENABLE_PREFIX_AWARE_SCORER`, `PREFILL_PREFIX_AWARE_SCORER_WEIGHT`, `PREFIX_SCORER_BLOCK_SIZE`|
84+
| Prefix-aware | Matches prompt prefix | `ENABLE_PREFIX_AWARE_SCORER`, `PREFIX_AWARE_SCORER_WEIGHT`, `PREFILL_ENABLE_PREFIX_AWARE_SCORER`, `PREFILL_PREFIX_AWARE_SCORER_WEIGHT`, `PREFIX_SCORER_BLOCK_SIZE`, `PREFIX_SCORER_MAX_CACHE_SIZE`, `PREFIX_SCORER_MAX_BLOCK_CACHE_SIZE`|
8585
| KVCache-aware | Optimizes for KV reuse | `ENABLE_KVCACHE_AWARE_SCORER`, `KVCACHE_INDEXER_REDIS_ADDR`, `PREFILL_ENABLE_KVCACHE_AWARE_SCORER`, `PREFILL_KVCACHE_INDEXER_REDIS_ADDR`, `HF_TOKEN`, `KVCACHE_INDEXER_REDIS_ADDR` |
8686
| Load-aware | Avoids busy pods | `ENABLE_LOAD_AWARE_SCORER`, `LOAD_AWARE_SCORER_WEIGHT`, `PREFILL_ENABLE_LOAD_AWARE_SCORER`, `PREFILL_LOAD_AWARE_SCORER_WEIGHT` |
8787

pkg/config/config.go

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,17 @@ const (
5252
pdPromptLenThresholdEnvKey = "PD_PROMPT_LEN_THRESHOLD"
5353
pdPromptLenThresholdDefault = 100
5454

55-
prefixScorerBlockSizeEnvKey = "PREFIX_SCORER_BLOCK_SIZE"
56-
prefixScorerBlockSizeDefault = 256
55+
prefixMaxCacheSizeKey = "PREFIX_SCORER_MAX_CACHE_SIZE"
56+
// DefaultPrefixMaxCacheSize sets the maximum number of blocks the LRU cache can store.
57+
DefaultPrefixMaxCacheSize = 500000
58+
59+
prefixScorerBlockSizeEnvKey = "PREFIX_SCORER_BLOCK_SIZE"
60+
// DefaultPrefixBlockSize defines how many runes each block contains in the prefix cache.
61+
DefaultPrefixBlockSize = 256
62+
63+
prefixMaxBlockCacheSizeKey = "PREFIX_SCORER_MAX_BLOCK_CACHE_SIZE"
64+
// DefaultPrefixMaxBlockCacheSize sets the maximum number of pods a block can store.
65+
DefaultPrefixMaxBlockCacheSize = 100
5766
)
5867

5968
// Config contains scheduler configuration, currently configuration is loaded from environment variables
@@ -62,9 +71,11 @@ type Config struct {
6271
DecodeSchedulerPlugins map[string]int
6372
PrefillSchedulerPlugins map[string]int
6473

65-
PDEnabled bool
66-
PDThreshold int
67-
PrefixBlockSize int
74+
PDEnabled bool
75+
PDThreshold int
76+
PrefixBlockSize int
77+
PrefixCacheSize int
78+
PrefixBlockCacheSize int
6879
}
6980

7081
// NewConfig creates a new instance if Config
@@ -75,7 +86,9 @@ func NewConfig(logger logr.Logger) *Config {
7586
PrefillSchedulerPlugins: map[string]int{},
7687
PDEnabled: false,
7788
PDThreshold: math.MaxInt,
78-
PrefixBlockSize: prefixScorerBlockSizeDefault,
89+
PrefixBlockSize: DefaultPrefixBlockSize,
90+
PrefixCacheSize: DefaultPrefixMaxCacheSize,
91+
PrefixBlockCacheSize: DefaultPrefixMaxBlockCacheSize,
7992
}
8093
}
8194

@@ -95,7 +108,9 @@ func (c *Config) LoadConfig() {
95108

96109
c.PDEnabled = env.GetEnvString(pdEnabledEnvKey, "false", c.logger) == "true"
97110
c.PDThreshold = env.GetEnvInt(pdPromptLenThresholdEnvKey, pdPromptLenThresholdDefault, c.logger)
98-
c.PrefixBlockSize = env.GetEnvInt(prefixScorerBlockSizeEnvKey, prefixScorerBlockSizeDefault, c.logger)
111+
c.PrefixBlockSize = env.GetEnvInt(prefixScorerBlockSizeEnvKey, DefaultPrefixBlockSize, c.logger)
112+
c.PrefixCacheSize = env.GetEnvInt(prefixMaxCacheSizeKey, DefaultPrefixMaxCacheSize, c.logger)
113+
c.PrefixBlockCacheSize = env.GetEnvInt(prefixMaxBlockCacheSizeKey, DefaultPrefixMaxBlockCacheSize, c.logger)
99114
}
100115

101116
func (c *Config) loadPluginInfo(plugins map[string]int, prefill bool, pluginNames ...string) {

pkg/scheduling/plugins/scorer/prefix_store.go

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,8 @@ import (
1010

1111
"github.com/cespare/xxhash/v2"
1212
lru "github.com/hashicorp/golang-lru/v2"
13-
)
1413

15-
const (
16-
// defaultMaxCacheSize sets the maximum number of blocks the LRU cache can store.
17-
defaultMaxCacheSize = 500000
18-
// defaultBlockSize defines how many runes each block contains in the prefix cache.
19-
defaultBlockSize = 256
20-
// defaultMaxBlockCacheSize sets the maximum number of pods a block can store.
21-
defaultMaxBlockCacheSize = 100
14+
"github.com/llm-d/llm-d-inference-scheduler/pkg/config"
2215
)
2316

2417
// PrefixStoreConfig contains initialization configuration for PrefixStore.
@@ -35,9 +28,9 @@ type PrefixStoreConfig struct {
3528
// configuration.
3629
func DefaultPrefixStoreConfig() *PrefixStoreConfig {
3730
return &PrefixStoreConfig{
38-
CacheSize: defaultMaxCacheSize,
39-
BlockSize: defaultBlockSize,
40-
BlockCacheSize: defaultMaxBlockCacheSize,
31+
CacheSize: config.DefaultPrefixMaxCacheSize,
32+
BlockSize: config.DefaultPrefixBlockSize,
33+
BlockCacheSize: config.DefaultPrefixMaxBlockCacheSize,
4134
}
4235
}
4336

0 commit comments

Comments
 (0)