@@ -44,17 +44,18 @@ const (
4444 // HeartbeatInterval is the interval for updating the provider heartbeat
4545 HeartbeatInterval = 1 * time .Minute
4646
47- // LLMDSchedulerImage is the llm-d Router Endpoint Picker (formerly known
48- // as the llm-d Inference Scheduler) image used as the EPP for all llm-d
49- // ModelDeployments. The image source is github.com/llm-d/llm-d-router .
50- LLMDSchedulerImage = "ghcr.io/llm-d/llm-d-router-endpoint-picker:0.0.3 "
47+ // LLMDSchedulerImage is the llm-d Inference Scheduler image used as the
48+ // EPP for all llm-d ModelDeployments. Source:
49+ // github.com/llm-d/llm-d-inference-scheduler .
50+ LLMDSchedulerImage = "ghcr.io/llm-d/llm-d-inference-scheduler:v0.6.0 "
5151
5252 // LLMDSchedulerDefaultConfig is the default EndpointPickerConfig shipped
5353 // with the llm-d provider. It mirrors deploy/config/epp-config.yaml from
54- // the upstream llm-d-router repository: a heuristic prefix-cache scorer
55- // combined with a decode filter and max-score picker. It does NOT require
56- // any special vLLM flags (--kv-events-config / precise prefix cache).
57- LLMDSchedulerDefaultConfig = `apiVersion: llm-d.ai/v1alpha1
54+ // llm-d-inference-scheduler: a heuristic prefix-cache scorer
55+ // combined with a decode filter and max-score picker. It does NOT
56+ // require any special vLLM flags (--kv-events-config / precise prefix
57+ // cache).
58+ LLMDSchedulerDefaultConfig = `apiVersion: inference.networking.x-k8s.io/v1alpha1
5859kind: EndpointPickerConfig
5960plugins:
6061- type: prefix-cache-scorer
0 commit comments