@@ -52,8 +52,17 @@ const (
5252 pdPromptLenThresholdEnvKey = "PD_PROMPT_LEN_THRESHOLD"
5353 pdPromptLenThresholdDefault = 100
5454
55- prefixScorerBlockSizeEnvKey = "PREFIX_SCORER_BLOCK_SIZE"
56- prefixScorerBlockSizeDefault = 256
55+ prefixMaxCacheSizeKey = "PREFIX_SCORER_MAX_CACHE_SIZE"
56+ // DefaultPrefixMaxCacheSize sets the maximum number of blocks the LRU cache can store.
57+ DefaultPrefixMaxCacheSize = 500000
58+
59+ prefixScorerBlockSizeEnvKey = "PREFIX_SCORER_BLOCK_SIZE"
60+ // DefaultPrefixBlockSize defines how many runes each block contains in the prefix cache.
61+ DefaultPrefixBlockSize = 256
62+
63+ prefixMaxBlockCacheSizeKey = "PREFIX_SCORER_MAX_BLOCK_CACHE_SIZE"
64+ // DefaultPrefixMaxBlockCacheSize sets the maximum number of pods a block can store.
65+ DefaultPrefixMaxBlockCacheSize = 100
5766)
5867
5968// Config contains scheduler configuration, currently configuration is loaded from environment variables
@@ -62,9 +71,11 @@ type Config struct {
6271 DecodeSchedulerPlugins map [string ]int
6372 PrefillSchedulerPlugins map [string ]int
6473
65- PDEnabled bool
66- PDThreshold int
67- PrefixBlockSize int
74+ PDEnabled bool
75+ PDThreshold int
76+ PrefixBlockSize int
77+ PrefixCacheSize int
78+ PrefixBlockCacheSize int
6879}
6980
7081// NewConfig creates a new instance if Config
@@ -75,7 +86,9 @@ func NewConfig(logger logr.Logger) *Config {
7586 PrefillSchedulerPlugins : map [string ]int {},
7687 PDEnabled : false ,
7788 PDThreshold : math .MaxInt ,
78- PrefixBlockSize : prefixScorerBlockSizeDefault ,
89+ PrefixBlockSize : DefaultPrefixBlockSize ,
90+ PrefixCacheSize : DefaultPrefixMaxCacheSize ,
91+ PrefixBlockCacheSize : DefaultPrefixMaxBlockCacheSize ,
7992 }
8093}
8194
@@ -95,7 +108,9 @@ func (c *Config) LoadConfig() {
95108
96109 c .PDEnabled = env .GetEnvString (pdEnabledEnvKey , "false" , c .logger ) == "true"
97110 c .PDThreshold = env .GetEnvInt (pdPromptLenThresholdEnvKey , pdPromptLenThresholdDefault , c .logger )
98- c .PrefixBlockSize = env .GetEnvInt (prefixScorerBlockSizeEnvKey , prefixScorerBlockSizeDefault , c .logger )
111+ c .PrefixBlockSize = env .GetEnvInt (prefixScorerBlockSizeEnvKey , DefaultPrefixBlockSize , c .logger )
112+ c .PrefixCacheSize = env .GetEnvInt (prefixMaxCacheSizeKey , DefaultPrefixMaxCacheSize , c .logger )
113+ c .PrefixBlockCacheSize = env .GetEnvInt (prefixMaxBlockCacheSizeKey , DefaultPrefixMaxBlockCacheSize , c .logger )
99114}
100115
101116func (c * Config ) loadPluginInfo (plugins map [string ]int , prefill bool , pluginNames ... string ) {
0 commit comments