llm-d
diff --git a/‎api/indexerpb/indexer.pb.go‎
Lines changed: 23 additions & 9 deletions b/‎api/indexerpb/indexer.pb.go‎
Lines changed: 23 additions & 9 deletions
diff --git a/‎api/indexerpb/indexer.proto‎
Lines changed: 4 additions & 0 deletions b/‎api/indexerpb/indexer.proto‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎api/indexerpb/indexer_grpc.pb.go‎
Lines changed: 2 additions & 2 deletions b/‎api/indexerpb/indexer_grpc.pb.go‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/kv_cache_index/main.go‎
Lines changed: 1 addition & 1 deletion b/‎examples/kv_cache_index/main.go‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/kv_cache_index_service/server/server.go‎
Lines changed: 20 additions & 8 deletions b/‎examples/kv_cache_index_service/server/server.go‎
Lines changed: 20 additions & 8 deletions
diff --git a/‎examples/valkey_example/main.go‎
Lines changed: 2 additions & 2 deletions b/‎examples/valkey_example/main.go‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pkg/kvcache/kvblock/cost_aware_memory.go‎
Lines changed: 1 addition & 0 deletions b/‎pkg/kvcache/kvblock/cost_aware_memory.go‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pkg/kvcache/kvblock/cost_aware_memory_test.go‎
Lines changed: 4 additions & 4 deletions b/‎pkg/kvcache/kvblock/cost_aware_memory_test.go‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎pkg/kvcache/kvblock/in_memory_test.go‎
Lines changed: 10 additions & 10 deletions b/‎pkg/kvcache/kvblock/in_memory_test.go‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎pkg/kvcache/kvblock/index.go‎
Lines changed: 92 additions & 1 deletion b/‎pkg/kvcache/kvblock/index.go‎
Lines changed: 92 additions & 1 deletion
@@ -40,4 +40,8 @@ message GetPodScoresResponse {
 message PodScore {
   string pod = 1;
   double score = 2;
+  // Data parallel rank for this pod. -1 (or absent) means non-DP deployment.
+  // When present, the pod field still contains the base pod identifier (e.g., "pod-1"),
+  // and the rank is provided separately for structured routing decisions.
+  optional int32 data_parallel_rank = 3;
 }
@@ -151,7 +151,7 @@ func runPrompts(ctx context.Context, kvCacheIndexer *kvcache.Indexer) error {
 	requestKeys := engineKeys
 
 	if err := kvCacheIndexer.KVBlockIndex().Add(ctx, engineKeys, requestKeys,
-		[]kvblock.PodEntry{{PodIdentifier: "pod1", DeviceTier: "gpu"}}); err != nil {
+		[]kvblock.PodEntry{{PodIdentifier: "pod1", DeviceTier: "gpu", DataParallelRank: kvblock.NoDataParallelRank}}); err != nil {
 		return err
 	}
 
 
@@ -19,6 +19,8 @@ package main
 import (
 	"context"
 	"fmt"
+	"strconv"
+	"strings"
 
 	indexerpb "github.com/llm-d/llm-d-kv-cache/api/indexerpb"
 	"github.com/llm-d/llm-d-kv-cache/examples/testdata"
@@ -53,9 +55,9 @@ func (s *IndexerService) AddSampleDataToIndexer(ctx context.Context) error {
 
 	// Sample pod entries simulating different pods with different device tiers
 	podEntries := []kvblock.PodEntry{
-		{PodIdentifier: "pod-1", DeviceTier: "gpu"},
-		{PodIdentifier: "pod-2", DeviceTier: "gpu"},
-		{PodIdentifier: "pod-3", DeviceTier: "cpu"},
+		{PodIdentifier: "pod-1", DeviceTier: "gpu", DataParallelRank: kvblock.NoDataParallelRank},
+		{PodIdentifier: "pod-2", DeviceTier: "gpu", DataParallelRank: kvblock.NoDataParallelRank},
+		{PodIdentifier: "pod-3", DeviceTier: "cpu", DataParallelRank: kvblock.NoDataParallelRank},
 	}
 
 	// For this example, requestKeys are identical to engineKeys (sampleKeys)
@@ -80,13 +82,23 @@ func (s *IndexerService) GetPodScores(ctx context.Context,
 		return nil, fmt.Errorf("failed to get pod scores: %w", err)
 	}
 
-	// Convert map[string]int to []*indexerpb.PodScore
+	// Convert map[string]float64 to []*indexerpb.PodScore
+	// Scoring keys are "pod-1" (non-DP) or "pod-1@dp0" (DP-aware)
 	scores := make([]*indexerpb.PodScore, 0, len(podScores))
-	for pod, score := range podScores {
-		scores = append(scores, &indexerpb.PodScore{
-			Pod:   pod,
+	for scoringKey, score := range podScores {
+		ps := &indexerpb.PodScore{
 			Score: score,
-		})
+		}
+		if idx := strings.LastIndex(scoringKey, "@dp"); idx >= 0 {
+			ps.Pod = scoringKey[:idx]
+			if rank, err := strconv.ParseInt(scoringKey[idx+3:], 10, 32); err == nil {
+				r := int32(rank)
+				ps.DataParallelRank = &r
+			}
+		} else {
+			ps.Pod = scoringKey
+		}
+		scores = append(scores, ps)
 	}
 
 	return &indexerpb.GetPodScoresResponse{
 
@@ -134,8 +134,8 @@ func demonstrateValkeyOperations(ctx context.Context, indexer *kvcache.Indexer)
 	prompt := testdata.Prompt
 
 	podEntries := []kvblock.PodEntry{
-		{PodIdentifier: "demo-pod-1", DeviceTier: "gpu"},
-		{PodIdentifier: "demo-pod-2", DeviceTier: "gpu"},
+		{PodIdentifier: "demo-pod-1", DeviceTier: "gpu", DataParallelRank: kvblock.NoDataParallelRank},
+		{PodIdentifier: "demo-pod-2", DeviceTier: "gpu", DataParallelRank: kvblock.NoDataParallelRank},
 	}
 
 	logger.Info("Processing testdata prompt", "model", modelName, "promptLength", len(prompt))
 
@@ -150,6 +150,7 @@ func (c *CostPodCache) CalculateByteSize(keyStr string) int64 {
 		totalBytes += int64(len(entry.PodIdentifier)) // PodIdentifier string content
 		totalBytes += int64(len(entry.DeviceTier))    // DeviceTier string content
 		totalBytes += 32                              // string headers (16 bytes each for 2 strings)
+		totalBytes += 8                               // DataParallelRank int field
 		totalBytes += 8                               // struct padding/alignment
 		return true
 	})
 
@@ -47,7 +47,7 @@ func TestCostAwareIndexSize(t *testing.T) {
 	// first key
 	engineKey1 := BlockHash(32490241)
 	requestKey1 := BlockHash(18986637)
-	entry1 := PodEntry{PodIdentifier: "pod1", DeviceTier: "gpu"}
+	entry1 := PodEntry{PodIdentifier: "pod1", DeviceTier: "gpu", DataParallelRank: NoDataParallelRank}
 
 	costPodCache := &CostPodCache{}
 	costPodCache.Add(entry1)
@@ -66,13 +66,13 @@ func TestCostAwareIndexSize(t *testing.T) {
 	// Add second key
 	engineKey2 := BlockHash(48712468)
 	requestKey2 := BlockHash(87654321)
-	err = index.Add(ctx, []BlockHash{engineKey2}, []BlockHash{requestKey2}, []PodEntry{{PodIdentifier: "pod2", DeviceTier: "gpu"}})
+	err = index.Add(ctx, []BlockHash{engineKey2}, []BlockHash{requestKey2}, []PodEntry{{PodIdentifier: "pod2", DeviceTier: "gpu", DataParallelRank: NoDataParallelRank}})
 	require.NoError(t, err)
 
 	// Add third key - should evict the first one due to LRU
 	engineKey3 := BlockHash(96187092)
 	requestKey3 := BlockHash(56789012)
-	err = index.Add(ctx, []BlockHash{engineKey3}, []BlockHash{requestKey3}, []PodEntry{{PodIdentifier: "pod3", DeviceTier: "cpu"}})
+	err = index.Add(ctx, []BlockHash{engineKey3}, []BlockHash{requestKey3}, []PodEntry{{PodIdentifier: "pod3", DeviceTier: "cpu", DataParallelRank: NoDataParallelRank}})
 	require.NoError(t, err)
 
 	// Lookup should only return the last two keys
@@ -82,7 +82,7 @@ func TestCostAwareIndexSize(t *testing.T) {
 	assert.Len(t, podsPerKey, 1) // Only requestKey3 should be present
 	assert.Len(t, podsPerKey[requestKey3], 1)
 
-	assert.Contains(t, podsPerKey[requestKey3], PodEntry{PodIdentifier: "pod3", DeviceTier: "cpu"})
+	assert.Contains(t, podsPerKey[requestKey3], PodEntry{PodIdentifier: "pod3", DeviceTier: "cpu", DataParallelRank: NoDataParallelRank})
 }
 
 func TestSizeHumanize(t *testing.T) {
 
@@ -57,19 +57,19 @@ func TestInMemoryIndexSize(t *testing.T) {
 	// Add first key
 	engineKey1 := BlockHash(72735753)
 	requestKey1 := BlockHash(79215516)
-	err = index.Add(ctx, []BlockHash{engineKey1}, []BlockHash{requestKey1}, []PodEntry{{PodIdentifier: "pod1", DeviceTier: "gpu"}})
+	err = index.Add(ctx, []BlockHash{engineKey1}, []BlockHash{requestKey1}, []PodEntry{{PodIdentifier: "pod1", DeviceTier: "gpu", DataParallelRank: NoDataParallelRank}})
 	require.NoError(t, err)
 
 	// Add second key
 	engineKey2 := BlockHash(41341092)
 	requestKey2 := BlockHash(12871930)
-	err = index.Add(ctx, []BlockHash{engineKey2}, []BlockHash{requestKey2}, []PodEntry{{PodIdentifier: "pod2", DeviceTier: "gpu"}})
+	err = index.Add(ctx, []BlockHash{engineKey2}, []BlockHash{requestKey2}, []PodEntry{{PodIdentifier: "pod2", DeviceTier: "gpu", DataParallelRank: NoDataParallelRank}})
 	require.NoError(t, err)
 
 	// Add third key - should evict the first one due to LRU
 	engineKey3 := BlockHash(34012886)
 	requestKey3 := BlockHash(69914638)
-	err = index.Add(ctx, []BlockHash{engineKey3}, []BlockHash{requestKey3}, []PodEntry{{PodIdentifier: "pod3", DeviceTier: "cpu"}})
+	err = index.Add(ctx, []BlockHash{engineKey3}, []BlockHash{requestKey3}, []PodEntry{{PodIdentifier: "pod3", DeviceTier: "cpu", DataParallelRank: NoDataParallelRank}})
 	require.NoError(t, err)
 
 	// Lookup should only return the last two keys
@@ -79,8 +79,8 @@ func TestInMemoryIndexSize(t *testing.T) {
 	assert.Len(t, podsPerKey, 2) // Only key2 and key3 should be present
 	assert.Len(t, podsPerKey[requestKey2], 1)
 	assert.Len(t, podsPerKey[requestKey3], 1)
-	assert.Contains(t, podsPerKey[requestKey2], PodEntry{PodIdentifier: "pod2", DeviceTier: "gpu"})
-	assert.Contains(t, podsPerKey[requestKey3], PodEntry{PodIdentifier: "pod3", DeviceTier: "cpu"})
+	assert.Contains(t, podsPerKey[requestKey2], PodEntry{PodIdentifier: "pod2", DeviceTier: "gpu", DataParallelRank: NoDataParallelRank})
+	assert.Contains(t, podsPerKey[requestKey3], PodEntry{PodIdentifier: "pod3", DeviceTier: "cpu", DataParallelRank: NoDataParallelRank})
 }
 
 func TestInMemoryIndexPodCacheSize(t *testing.T) {
@@ -99,9 +99,9 @@ func TestInMemoryIndexPodCacheSize(t *testing.T) {
 	engineKey := BlockHash(28409753)
 	requestKey := BlockHash(51374550)
 	pods := []PodEntry{
-		{PodIdentifier: "pod1", DeviceTier: "gpu"},
-		{PodIdentifier: "pod2", DeviceTier: "gpu"},
-		{PodIdentifier: "pod3", DeviceTier: "cpu"}, // This should evict pod1 due to LRU
+		{PodIdentifier: "pod1", DeviceTier: "gpu", DataParallelRank: NoDataParallelRank},
+		{PodIdentifier: "pod2", DeviceTier: "gpu", DataParallelRank: NoDataParallelRank},
+		{PodIdentifier: "pod3", DeviceTier: "cpu", DataParallelRank: NoDataParallelRank}, // This should evict pod1 due to LRU
 	}
 
 	err = index.Add(ctx, []BlockHash{engineKey}, []BlockHash{requestKey}, pods)
@@ -112,8 +112,8 @@ func TestInMemoryIndexPodCacheSize(t *testing.T) {
 	require.NoError(t, err)
 	assert.Len(t, podsPerKey, 1)
 	assert.Len(t, podsPerKey[requestKey], 2, "Should only have 2 pods due to PodCacheSize limit")
-	assert.Contains(t, podsPerKey[requestKey], PodEntry{PodIdentifier: "pod2", DeviceTier: "gpu"})
-	assert.Contains(t, podsPerKey[requestKey], PodEntry{PodIdentifier: "pod3", DeviceTier: "cpu"})
+	assert.Contains(t, podsPerKey[requestKey], PodEntry{PodIdentifier: "pod2", DeviceTier: "gpu", DataParallelRank: NoDataParallelRank})
+	assert.Contains(t, podsPerKey[requestKey], PodEntry{PodIdentifier: "pod3", DeviceTier: "cpu", DataParallelRank: NoDataParallelRank})
 }
 
 // TestSpeculativeAnnotation tests that speculative and confirmed PodEntries
 
@@ -26,6 +26,12 @@ import (
 	"k8s.io/apimachinery/pkg/util/sets"
 )
 
+const (
+	// NoDataParallelRank indicates that no data parallel rank is set.
+	// This is the default value for non-DP deployments.
+	NoDataParallelRank = -1
+)
+
 // IndexConfig holds the configuration for the KV-block index.
 // It may configure several backends such as listed within the struct.
 // If multiple backends are configured, only the first one will be used.
@@ -172,13 +178,98 @@ type PodEntry struct {
 	DeviceTier string
 	// Speculative indicates the entry was added predictively before a KV event confirmed it.
 	Speculative bool
+	// DataParallelRank is the data parallel rank of the pod.
+	// A value of NoDataParallelRank (-1) indicates no DP rank is set (non-DP deployment).
+	DataParallelRank int
+}
+
+// NewPodEntry creates a PodEntry, converting a *int DP rank to the int sentinel form.
+// A nil dpRank is stored as NoDataParallelRank (-1).
+func NewPodEntry(podIdentifier, deviceTier string, dpRank *int) PodEntry {
+	rank := NoDataParallelRank
+	if dpRank != nil {
+		rank = *dpRank
+	}
+	return PodEntry{
+		PodIdentifier:    podIdentifier,
+		DeviceTier:       deviceTier,
+		DataParallelRank: rank,
+	}
 }
 
 // String returns a string representation of the PodEntry.
+// Format: "pod@tier" (no DP rank) or "pod@tier@dpN" (with DP rank).
 func (e *PodEntry) String() string {
 	suffix := ""
 	if e.Speculative {
 		suffix = "[speculative]"
 	}
-	return fmt.Sprintf("%s@%s%s", e.PodIdentifier, e.DeviceTier, suffix)
+
+	if e.DataParallelRank == NoDataParallelRank {
+		return fmt.Sprintf("%s@%s%s", e.PodIdentifier, e.DeviceTier, suffix)
+	}
+	return fmt.Sprintf("%s@%s%s@dp%s", e.PodIdentifier, e.DeviceTier, suffix, strconv.Itoa(e.DataParallelRank))
+}
+
+// ParsePodEntry parses a PodEntry from its string representation.
+// It handles both "pod@tier" and "pod@tier@dpN" formats.
+func ParsePodEntry(s string) (PodEntry, error) {
+	// Try 3-part format first: "pod@tier@dpN"
+	parts := splitPodEntryString(s)
+	switch len(parts) {
+	case 3:
+		dpStr := parts[2]
+		if len(dpStr) < 3 || dpStr[:2] != "dp" {
+			return PodEntry{}, fmt.Errorf("invalid dp rank format: %s", dpStr)
+		}
+		rank, err := strconv.Atoi(dpStr[2:])
+		if err != nil {
+			return PodEntry{}, fmt.Errorf("invalid dp rank number: %s", dpStr)
+		}
+		return PodEntry{
+			PodIdentifier:    parts[0],
+			DeviceTier:       parts[1],
+			DataParallelRank: rank,
+		}, nil
+	case 2:
+		return PodEntry{
+			PodIdentifier:    parts[0],
+			DeviceTier:       parts[1],
+			DataParallelRank: NoDataParallelRank,
+		}, nil
+	default:
+		return PodEntry{}, fmt.Errorf("invalid pod entry format: %s", s)
+	}
+}
+
+// splitPodEntryString splits a PodEntry string into its components.
+// It splits from the right to handle pod identifiers that may contain '@'.
+func splitPodEntryString(s string) []string {
+	// Check for dp suffix (3-part format)
+	lastAt := lastIndexByte(s, '@')
+	if lastAt < 0 {
+		return []string{s}
+	}
+	suffix := s[lastAt+1:]
+	if len(suffix) >= 3 && suffix[:2] == "dp" {
+		if _, err := strconv.Atoi(suffix[2:]); err == nil {
+			// This is "something@dpN" — find the tier separator
+			rest := s[:lastAt]
+			secondLastAt := lastIndexByte(rest, '@')
+			if secondLastAt >= 0 {
+				return []string{rest[:secondLastAt], rest[secondLastAt+1:], suffix}
+			}
+		}
+	}
+	// 2-part format: "pod@tier"
+	return []string{s[:lastAt], s[lastAt+1:]}
+}
+
+func lastIndexByte(s string, c byte) int {
+	for i := len(s) - 1; i >= 0; i-- {
+		if s[i] == c {
+			return i
+		}
+	}
+	return -1
 }
Original file line number	Diff line number	Diff line change
`@@ -40,4 +40,8 @@ message GetPodScoresResponse {`
`40`	`40`	`message PodScore {`
`41`	`41`	`string pod = 1;`
`42`	`42`	`double score = 2;`
	`43`	`+ // Data parallel rank for this pod. -1 (or absent) means non-DP deployment.`
	`44`	`+ // When present, the pod field still contains the base pod identifier (e.g., "pod-1"),`
	`45`	`+ // and the rank is provided separately for structured routing decisions.`
	`46`	`+ optional int32 data_parallel_rank = 3;`
`43`	`47`	`}`
Original file line number	Diff line number	Diff line change
`@@ -151,7 +151,7 @@ func runPrompts(ctx context.Context, kvCacheIndexer *kvcache.Indexer) error {`
`151`	`151`	`requestKeys := engineKeys`
`152`	`152`
`153`	`153`	`if err := kvCacheIndexer.KVBlockIndex().Add(ctx, engineKeys, requestKeys,`
`154`		`- []kvblock.PodEntry{{PodIdentifier: "pod1", DeviceTier: "gpu"}}); err != nil {`
	`154`	`+ []kvblock.PodEntry{{PodIdentifier: "pod1", DeviceTier: "gpu", DataParallelRank: kvblock.NoDataParallelRank}}); err != nil {`
`155`	`155`	`return err`
`156`	`156`	`}`
`157`	`157`
Original file line number	Diff line number	Diff line change
`@@ -134,8 +134,8 @@ func demonstrateValkeyOperations(ctx context.Context, indexer *kvcache.Indexer)`
`134`	`134`	`prompt := testdata.Prompt`
`135`	`135`
`136`	`136`	`podEntries := []kvblock.PodEntry{`
`137`		`- {PodIdentifier: "demo-pod-1", DeviceTier: "gpu"},`
`138`		`- {PodIdentifier: "demo-pod-2", DeviceTier: "gpu"},`
	`137`	`+ {PodIdentifier: "demo-pod-1", DeviceTier: "gpu", DataParallelRank: kvblock.NoDataParallelRank},`
	`138`	`+ {PodIdentifier: "demo-pod-2", DeviceTier: "gpu", DataParallelRank: kvblock.NoDataParallelRank},`
`139`	`139`	`}`
`140`	`140`
`141`	`141`	`logger.Info("Processing testdata prompt", "model", modelName, "promptLength", len(prompt))`