Skip to content

Commit 73fad94

Browse files
authored
[Feat] Valkey vector store implementation (#1671)
* valkey vector store implementation Signed-off-by: Daria Korenieva <daric2612@gmail.com> * formatting Signed-off-by: Daria Korenieva <daric2612@gmail.com> * improvements Signed-off-by: Daria Korenieva <daric2612@gmail.com> * fixes Signed-off-by: Daria Korenieva <daric2612@gmail.com> * fixes Signed-off-by: Daria Korenieva <daric2612@gmail.com> * fixes Signed-off-by: Daria Korenieva <daric2612@gmail.com> * fixes Signed-off-by: Daria Korenieva <daric2612@gmail.com> * chore: update go.sum with valkey-glide checksums Signed-off-by: Daria Korenieva <daric2612@gmail.com> * chore: sync go.onnx.mod valkey-glide to v2.3.0 Signed-off-by: Daria Korenieva <daric2612@gmail.com> * remove unused config Signed-off-by: Daria Korenieva <daric2612@gmail.com> * add config back Signed-off-by: Daria Korenieva <daric2612@gmail.com> * address comments Signed-off-by: Daria Korenieva <daric2612@gmail.com> --------- Signed-off-by: Daria Korenieva <daric2612@gmail.com>
1 parent 6e91324 commit 73fad94

File tree

18 files changed

+1827
-89
lines changed

18 files changed

+1827
-89
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,3 +208,6 @@ src/training/cache_embeddings/aws/vllm-inventory-*.ini
208208
grafana/
209209
grafana-data/
210210
prometheus-data/
211+
212+
# Local dev makefile (not committed)
213+
Makefile.local

config/config.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1534,6 +1534,16 @@ global:
15341534
embedding_model: all-MiniLM-L6-v2
15351535
request_timeout_seconds: 30
15361536
search_type: hybrid
1537+
valkey:
1538+
host: valkey
1539+
port: 6379
1540+
database: 0
1541+
password: valkey-secret
1542+
connect_timeout: 5
1543+
collection_prefix: "vsr_vs_"
1544+
metric_type: COSINE
1545+
index_m: 16
1546+
index_ef_construction: 200
15371547

15381548
integrations:
15391549
tools:
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
package main
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"log"
7+
"time"
8+
9+
candle_binding "github.com/vllm-project/semantic-router/candle-binding"
10+
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/vectorstore"
11+
)
12+
13+
type doc struct {
14+
id, fileID, filename, content string
15+
}
16+
17+
var sampleDocs = []doc{
18+
// Europe
19+
{"c1", "f1", "france.txt", "The capital of France is Paris. It is known for the Eiffel Tower."},
20+
{"c2", "f1", "france.txt", "France is a country in Western Europe with a rich cultural heritage."},
21+
{"c3", "f2", "germany.txt", "Berlin is the capital of Germany. It is famous for the Brandenburg Gate."},
22+
{"c4", "f2", "germany.txt", "Germany is the largest economy in Europe and a leader in engineering."},
23+
// Asia
24+
{"c5", "f3", "japan.txt", "Tokyo is the capital of Japan. It is one of the most populous cities in the world."},
25+
{"c6", "f3", "japan.txt", "Japan is an island nation in East Asia known for its technology and cuisine."},
26+
{"c7", "f4", "india.txt", "New Delhi is the capital of India. Mumbai is the most populated city in India."},
27+
{"c8", "f4", "india.txt", "India is the most populous country in the world with over 1.4 billion people."},
28+
{"c9", "f5", "china.txt", "Beijing is the capital of China. Shanghai is the largest city by population."},
29+
{"c10", "f5", "china.txt", "China has the second largest economy in the world and a long history of innovation."},
30+
}
31+
32+
func main() {
33+
fmt.Println("Valkey Vector Store Backend Example")
34+
fmt.Println("====================================")
35+
36+
ctx := context.Background()
37+
backend := initBackend()
38+
defer backend.Close()
39+
40+
storeID := fmt.Sprintf("demo_%d", time.Now().UnixNano())
41+
createCollection(ctx, backend, storeID)
42+
defer cleanupCollection(ctx, backend, storeID)
43+
44+
embedAndInsert(ctx, backend, storeID)
45+
time.Sleep(500 * time.Millisecond)
46+
runSearches(ctx, backend, storeID)
47+
runFilteredSearch(ctx, backend, storeID)
48+
49+
fmt.Println("\n✓ Example completed successfully!")
50+
}
51+
52+
func initBackend() *vectorstore.ValkeyBackend {
53+
fmt.Println("\n1. Initializing embedding model...")
54+
if err := candle_binding.InitModel("sentence-transformers/all-MiniLM-L6-v2", true); err != nil {
55+
log.Fatalf("Failed to initialize embedding model: %v", err)
56+
}
57+
fmt.Println("✓ Embedding model initialized")
58+
59+
fmt.Println("\n2. Connecting to Valkey...")
60+
backend, err := vectorstore.NewValkeyBackend(vectorstore.ValkeyBackendConfig{
61+
Host: "localhost",
62+
Port: 6379,
63+
CollectionPrefix: "example_vs_",
64+
MetricType: "COSINE",
65+
ConnectTimeout: 5,
66+
})
67+
if err != nil {
68+
log.Fatalf("Failed to connect to Valkey: %v", err)
69+
}
70+
fmt.Println("✓ Connected to Valkey")
71+
return backend
72+
}
73+
74+
func createCollection(ctx context.Context, backend *vectorstore.ValkeyBackend, storeID string) {
75+
dimension := 384
76+
fmt.Printf("\n3. Creating collection %q (dimension=%d)...\n", storeID, dimension)
77+
if err := backend.CreateCollection(ctx, storeID, dimension); err != nil {
78+
log.Fatalf("Failed to create collection: %v", err)
79+
}
80+
fmt.Println("✓ Collection created")
81+
}
82+
83+
func cleanupCollection(ctx context.Context, backend *vectorstore.ValkeyBackend, storeID string) {
84+
fmt.Printf("\n7. Cleaning up collection %q...\n", storeID)
85+
if err := backend.DeleteCollection(ctx, storeID); err != nil {
86+
log.Printf("Warning: cleanup failed: %v", err)
87+
} else {
88+
fmt.Println("✓ Collection deleted")
89+
}
90+
}
91+
92+
func embedAndInsert(ctx context.Context, backend *vectorstore.ValkeyBackend, storeID string) {
93+
fmt.Println("\n4. Embedding and inserting documents...")
94+
chunks := make([]vectorstore.EmbeddedChunk, 0, len(sampleDocs))
95+
for i, d := range sampleDocs {
96+
embedding, err := candle_binding.GetEmbedding(d.content, 0)
97+
if err != nil {
98+
log.Fatalf("Failed to embed document %d: %v", i, err)
99+
}
100+
chunks = append(chunks, vectorstore.EmbeddedChunk{
101+
ID: d.id, FileID: d.fileID, Filename: d.filename,
102+
Content: d.content, Embedding: embedding,
103+
ChunkIndex: i, VectorStoreID: storeID,
104+
})
105+
}
106+
if err := backend.InsertChunks(ctx, storeID, chunks); err != nil {
107+
log.Fatalf("Failed to insert chunks: %v", err)
108+
}
109+
fmt.Printf("✓ Inserted %d chunks\n", len(chunks))
110+
}
111+
112+
func runSearches(ctx context.Context, backend *vectorstore.ValkeyBackend, storeID string) {
113+
fmt.Println("\n5. Searching for similar documents (threshold=0.80)...")
114+
for _, query := range []string{
115+
"What is the capital of France?",
116+
"Tell me about German engineering",
117+
"Most populated city in Asia",
118+
} {
119+
fmt.Printf("\n Query: %q\n", query)
120+
qEmb, err := candle_binding.GetEmbedding(query, 0)
121+
if err != nil {
122+
log.Fatalf("Failed to embed query: %v", err)
123+
}
124+
results, err := backend.Search(ctx, storeID, qEmb, 3, 0.80, nil)
125+
if err != nil {
126+
log.Fatalf("Search failed: %v", err)
127+
}
128+
if len(results) == 0 {
129+
fmt.Println(" (no results above threshold)")
130+
}
131+
for rank, r := range results {
132+
fmt.Printf(" #%d [%.4f] %s: %s\n", rank+1, r.Score, r.Filename, truncate(r.Content, 70))
133+
}
134+
}
135+
}
136+
137+
func runFilteredSearch(ctx context.Context, backend *vectorstore.ValkeyBackend, storeID string) {
138+
fmt.Println("\n6. Searching with file_id filter (only germany.txt)...")
139+
qEmb, err := candle_binding.GetEmbedding("capital city", 0)
140+
if err != nil {
141+
log.Fatalf("Failed to embed query: %v", err)
142+
}
143+
results, err := backend.Search(ctx, storeID, qEmb, 5, 0.0, map[string]interface{}{"file_id": "f2"})
144+
if err != nil {
145+
log.Fatalf("Filtered search failed: %v", err)
146+
}
147+
for rank, r := range results {
148+
fmt.Printf(" #%d [%.4f] %s: %s\n", rank+1, r.Score, r.Filename, truncate(r.Content, 70))
149+
}
150+
}
151+
152+
func truncate(s string, maxLen int) string {
153+
if len(s) <= maxLen {
154+
return s
155+
}
156+
return s[:maxLen-3] + "..."
157+
}
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# Valkey Vector Store — Example Configuration
2+
#
3+
# Inline the `vector_store` block below into your canonical config.yaml under
4+
# `global.stores`. All field names match the ValkeyVectorStoreConfig schema
5+
# (flat, no sub-sections).
6+
#
7+
# Prerequisites:
8+
# • Valkey running with the valkey-search module enabled.
9+
# • The valkey-bundle image ships the module out of the box:
10+
# docker run -d --name valkey -p 6379:6379 valkey/valkey-bundle:latest
11+
#
12+
# Note: CI and local dev targets use valkey-bundle:unstable because
13+
# valkey-search 1.2.0-rc3+ (text search support) is only in that tag for now.
14+
15+
global:
16+
stores:
17+
vector_store:
18+
enabled: true
19+
backend_type: valkey
20+
valkey:
21+
# Valkey server hostname.
22+
host: "localhost"
23+
24+
# Valkey server port.
25+
port: 6379
26+
27+
# Database index (0–15).
28+
database: 0
29+
30+
# Password for Valkey AUTH (omit or leave empty for no auth).
31+
# password: ""
32+
33+
# Connection timeout in seconds.
34+
connect_timeout: 10
35+
36+
# Prefix applied to all hash keys and index names created by this backend.
37+
collection_prefix: "vsr_vs_"
38+
39+
# Distance metric used by the HNSW index.
40+
# Options: COSINE (default), L2, IP
41+
metric_type: COSINE
42+
43+
# HNSW graph connectivity parameter (M). Higher = better recall, more memory.
44+
index_m: 16
45+
46+
# HNSW build-time search scope (efConstruction). Higher = better index
47+
# quality at the cost of slower indexing.
48+
index_ef_construction: 200
49+
50+
# ---------------------------------------------------------------------------
51+
# Environment-specific overrides (inline into config.yaml as needed)
52+
# ---------------------------------------------------------------------------
53+
#
54+
# Local development:
55+
# valkey:
56+
# host: "localhost"
57+
# port: 6379
58+
# connect_timeout: 5
59+
#
60+
# Production (with auth):
61+
# valkey:
62+
# host: "valkey.production.svc.cluster.local"
63+
# port: 6379
64+
# password: "${VALKEY_PASSWORD}"
65+
# connect_timeout: 10
66+
# metric_type: COSINE
67+
# index_m: 16
68+
# index_ef_construction: 200
69+
#
70+
# Kubernetes (in-cluster):
71+
# valkey:
72+
# host: "valkey-service.valkey-system.svc.cluster.local"
73+
# port: 6379
74+
# connect_timeout: 10

src/semantic-router/cmd/runtime_bootstrap.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -574,6 +574,21 @@ func buildVectorStoreBackendConfigs(cfg *config.RouterConfig) vectorstore.Backen
574574
SearchType: lsCfg.SearchType,
575575
},
576576
}
577+
case "valkey":
578+
vCfg := cfg.VectorStore.Valkey
579+
return vectorstore.BackendConfigs{
580+
Valkey: vectorstore.ValkeyBackendConfig{
581+
Host: vCfg.Host,
582+
Port: vCfg.Port,
583+
Password: vCfg.Password,
584+
Database: vCfg.Database,
585+
CollectionPrefix: vCfg.CollectionPrefix,
586+
MetricType: vCfg.MetricType,
587+
IndexM: vCfg.IndexM,
588+
IndexEf: vCfg.IndexEfConstruction,
589+
ConnectTimeout: vCfg.ConnectTimeout,
590+
},
591+
}
577592
default:
578593
return vectorstore.BackendConfigs{}
579594
}

src/semantic-router/go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ require (
3333
github.com/stretchr/testify v1.11.1
3434
github.com/tidwall/gjson v1.18.0
3535
github.com/tidwall/sjson v1.2.5
36-
github.com/valkey-io/valkey-glide/go/v2 v2.2.7
36+
github.com/valkey-io/valkey-glide/go/v2 v2.3.0
3737
github.com/vllm-project/semantic-router/candle-binding v0.0.0-00010101000000-000000000000
3838
github.com/vllm-project/semantic-router/ml-binding v0.0.0-00010101000000-000000000000
3939
github.com/vllm-project/semantic-router/nlp-binding v0.0.0-00010101000000-000000000000

src/semantic-router/go.onnx.mod

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@ require (
3131
github.com/prometheus/client_model v0.6.2
3232
github.com/redis/go-redis/v9 v9.17.2
3333
github.com/stretchr/testify v1.11.1
34-
github.com/valkey-io/valkey-glide/go/v2 v2.2.7
34+
github.com/tidwall/gjson v1.18.0
35+
github.com/tidwall/sjson v1.2.5
36+
github.com/valkey-io/valkey-glide/go/v2 v2.3.0
3537
github.com/vllm-project/semantic-router/candle-binding v0.0.0-00010101000000-000000000000
3638
github.com/vllm-project/semantic-router/ml-binding v0.0.0-00010101000000-000000000000
3739
github.com/vllm-project/semantic-router/nlp-binding v0.0.0-00010101000000-000000000000
@@ -44,6 +46,7 @@ require (
4446
golang.org/x/net v0.43.0
4547
golang.org/x/sys v0.37.0
4648
google.golang.org/grpc v1.75.0
49+
google.golang.org/protobuf v1.36.9
4750
gopkg.in/yaml.v2 v2.4.0
4851
gopkg.in/yaml.v3 v3.0.1
4952
k8s.io/apimachinery v0.34.2
@@ -100,10 +103,8 @@ require (
100103
github.com/rogpeppe/go-internal v1.13.1 // indirect
101104
github.com/spf13/cast v1.7.1 // indirect
102105
github.com/spf13/pflag v1.0.6 // indirect
103-
github.com/tidwall/gjson v1.18.0 // indirect
104106
github.com/tidwall/match v1.1.1 // indirect
105107
github.com/tidwall/pretty v1.2.1 // indirect
106-
github.com/tidwall/sjson v1.2.5 // indirect
107108
github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect
108109
github.com/x448/float16 v0.8.4 // indirect
109110
github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
@@ -124,7 +125,6 @@ require (
124125
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
125126
google.golang.org/genproto/googleapis/api v0.0.0-20250929231259-57b25ae835d4 // indirect
126127
google.golang.org/genproto/googleapis/rpc v0.0.0-20250922171735-9219d122eba9 // indirect
127-
google.golang.org/protobuf v1.36.9 // indirect
128128
gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
129129
gopkg.in/inf.v0 v0.9.1 // indirect
130130
k8s.io/api v0.34.2 // indirect

src/semantic-router/go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -350,8 +350,8 @@ github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVM
350350
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
351351
github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY=
352352
github.com/urfave/negroni v1.0.0/go.mod h1:Meg73S6kFm/4PpbYdq35yYWoCZ9mS/YSx+lKnmiohz4=
353-
github.com/valkey-io/valkey-glide/go/v2 v2.2.7 h1:xOl37intKSQ1pty1tE4a+kQ5GWrX0Fk0OmYpfo2eVTk=
354-
github.com/valkey-io/valkey-glide/go/v2 v2.2.7/go.mod h1:LK5zmODJa5xnxZndarh1trntExb3GVGJXz4GwDCagho=
353+
github.com/valkey-io/valkey-glide/go/v2 v2.3.0 h1:oer4fOteJYJv3MY3T2t9RIdZHuZzuJYcKyFOyZksFNM=
354+
github.com/valkey-io/valkey-glide/go/v2 v2.3.0/go.mod h1:LK5zmODJa5xnxZndarh1trntExb3GVGJXz4GwDCagho=
355355
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
356356
github.com/valyala/fasthttp v1.6.0/go.mod h1:FstJa9V+Pj9vQ7OJie2qMHdwemEDaDiSdBnvPM1Su9w=
357357
github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8=

src/semantic-router/pkg/config/reference_config_global_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ func assertReferenceConfigVectorStoreCoverage(t testingT, vectorStore map[string
128128
assertMapCoversStructFields(t, mustMapAt(t, vectorStore, "memory"), reflect.TypeOf(VectorStoreMemoryConfig{}), "global.stores.vector_store.memory")
129129
assertMapCoversStructFields(t, mustMapAt(t, vectorStore, "llama_stack"), reflect.TypeOf(LlamaStackVectorStoreConfig{}), "global.stores.vector_store.llama_stack")
130130
assertMapCoversStructFields(t, mustMapAt(t, vectorStore, "milvus"), reflect.TypeOf(MilvusConfig{}), "global.stores.vector_store.milvus")
131+
assertMapCoversStructFields(t, mustMapAt(t, vectorStore, "valkey"), reflect.TypeOf(ValkeyVectorStoreConfig{}), "global.stores.vector_store.valkey")
131132
}
132133

133134
func assertReferenceConfigIntegrationGlobalCoverage(t testingT, integrations map[string]interface{}) {

0 commit comments

Comments
 (0)