A Go implementation of semantic caching for LLM responses using Valkey vector search.
- Semantic similarity matching using embeddings
- Configurable similarity threshold
- TTL-based cache expiration
- HTTP REST API
- Zero external dependencies (uses go-redis)
cd ../../../deployment/docker
docker-compose up -d valkey-stackgo mod download
go run main.goexport VALKEY_HOST=localhost
export VALKEY_PORT=6379
export SIMILARITY_THRESHOLD=0.92
export CACHE_TTL_SECONDS=86400
export PORT=8000# First query - cache miss
curl -X POST http://localhost:8000/query \
-H "Content-Type: application/json" \
-d '{"query": "What is machine learning?"}'
# Check stats
curl http://localhost:8000/cache/stats{
"query": "What is machine learning?",
"skip_cache": false
}Health check endpoint.
Get cache statistics.
Clear all cache entries.
.
├── main.go # Complete implementation
├── go.mod # Go module file
└── README.md # This file
| Variable | Default | Description |
|---|---|---|
VALKEY_HOST |
localhost | Valkey server host |
VALKEY_PORT |
6379 | Valkey server port |
SIMILARITY_THRESHOLD |
0.92 | Cache hit threshold |
CACHE_TTL_SECONDS |
86400 | Cache TTL (24 hours) |
PORT |
8000 | API server port |
package main
import (
"context"
)
func main() {
config := Config{
ValkeyHost: "localhost",
ValkeyPort: 6379,
SimilarityThreshold: 0.92,
CacheTTLSeconds: 86400,
EmbeddingDimensions: 1536,
}
cache := NewSemanticCache(config)
ctx := context.Background()
if err := cache.Initialize(ctx); err != nil {
panic(err)
}
// Lookup
embedding := generateEmbedding("What is AI?")
result, _ := cache.Lookup(ctx, embedding)
if result.Hit {
fmt.Println("Cache hit!", result.Response)
} else {
// Store
cache.Store(ctx, "What is AI?", "AI is...", "gpt-4", embedding)
}
}This example uses mock embeddings for demonstration. In production, integrate with an embedding API like OpenAI:
import "github.com/sashabaranov/go-openai"
func getEmbedding(client *openai.Client, text string) ([]float32, error) {
resp, err := client.CreateEmbeddings(context.Background(), openai.EmbeddingRequest{
Input: []string{text},
Model: openai.SmallEmbedding3,
})
if err != nil {
return nil, err
}
return resp.Data[0].Embedding, nil
}