llm-d · nirrozenbaum · Aug 6, 2025 · Aug 6, 2025
diff --git a/docs/architecture.md b/docs/architecture.md
@@ -8,7 +8,7 @@
 
 The design enables:
 
-- Support for **multiple base models** and **LoRA adapters** within a shared cluster [Not supported in
+- Support for **multiple base models** within a shared cluster [Not supported in
 Phase1]
 - Efficient routing based on **KV cache locality**, **prefix**, **session affinity**, **load**, and
 **model metadata**

diff --git a/pkg/plugins/scorer/load_aware_scorer.go b/pkg/plugins/scorer/load_aware_scorer.go
@@ -74,8 +74,8 @@ func (s *LoadAwareScorer) WithName(name string) *LoadAwareScorer {
 // that can be processed in the given pod immediately.
 // Pod with empty waiting requests queue is scored with 0.5
 // Pod with requests in the queue will get score between 0.5 and 0.
-// Score 0 will get pod with number of requests in the queue equal to the threshold used in load-based filter (QueueingThresholdLoRA)
-// In future pods with additional capacity will get score higher than 0.5
+// Score 0 will get pod with number of requests in the queue equal to the threshold used in load-based filter
+// In the future, pods with additional capacity will get score higher than 0.5
 func (s *LoadAwareScorer) Score(_ context.Context, _ *types.CycleState, _ *types.LLMRequest, pods []types.Pod) map[types.Pod]float64 {
 	scoredPods := make(map[types.Pod]float64)