Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/semantic-router/pkg/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package cache
import (
"encoding/json"
"fmt"

"github.com/vllm-project/semantic-router/src/semantic-router/pkg/consts"
)

// ChatMessage represents a message in the OpenAI chat format with role and content
Expand All @@ -27,7 +29,7 @@ func ExtractQueryFromOpenAIRequest(requestBody []byte) (string, string, error) {
// Find user messages in the conversation
var userMessages []string
for _, msg := range req.Messages {
if msg.Role == "user" {
if msg.Role == consts.USER {
userMessages = append(userMessages, msg.Content)
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/semantic-router/pkg/classification/mcp_classifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
candle_binding "github.com/vllm-project/semantic-router/candle-binding"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
mcpclient "github.com/vllm-project/semantic-router/src/semantic-router/pkg/mcp"
api "github.com/vllm-project/semantic-router/src/semantic-router/pkg/mcp/api"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/mcp/api"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability/logging"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability/metrics"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/utils/entropy"
Expand Down
13 changes: 13 additions & 0 deletions src/semantic-router/pkg/consts/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,16 @@ package consts
// UnknownLabel is a canonical fallback label value used across the codebase
// when a more specific value (e.g., model, category, reason) is not available.
const UnknownLabel = "unknown"

// Decision engine strategies.
const (
PriorityStrategy = "priority"
ConfidenceStrategy = "confidence"
)

// LLM message types
const (
USER = "user"
ASSISTANT = "assistant"
SYSTEM = "system"
)
3 changes: 2 additions & 1 deletion src/semantic-router/pkg/decision/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"sort"

"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/consts"
)

// DecisionEngine evaluates routing decisions based on rule combinations
Expand All @@ -41,7 +42,7 @@ func NewDecisionEngine(
strategy string,
) *DecisionEngine {
if strategy == "" {
strategy = "priority" // default strategy
strategy = consts.PriorityStrategy // default strategy
}
return &DecisionEngine{
keywordRules: keywordRules,
Expand Down
3 changes: 2 additions & 1 deletion src/semantic-router/pkg/k8s/converter.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (

"github.com/vllm-project/semantic-router/src/semantic-router/pkg/apis/vllm.ai/v1alpha1"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/config"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/consts"
)

// CRDConverter converts Kubernetes CRDs to internal configuration structures
Expand Down Expand Up @@ -88,7 +89,7 @@ func (c *CRDConverter) ConvertIntelligentRoute(route *v1alpha1.IntelligentRoute)
EmbeddingRules: make([]config.EmbeddingRule, 0),
Categories: make([]config.Category, 0),
Decisions: make([]config.Decision, 0),
Strategy: "priority", // Always use priority strategy
Strategy: consts.PriorityStrategy, // Always use priority strategy
}

// Convert keyword signals
Expand Down
101 changes: 56 additions & 45 deletions src/semantic-router/pkg/utils/http/response.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,42 +11,46 @@ import (
typev3 "github.com/envoyproxy/go-control-plane/envoy/type/v3"
"github.com/openai/openai-go"

"github.com/vllm-project/semantic-router/src/semantic-router/pkg/consts"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/headers"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability/logging"
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/observability/metrics"
)

// CreatePIIViolationResponse creates an HTTP response for PII policy violations
func CreatePIIViolationResponse(model string, deniedPII []string, isStreaming bool, decisionName string) *ext_proc.ProcessingResponse {

// Record PII violation metrics
metrics.RecordPIIViolations(model, deniedPII)

// Join denied PII types for header
deniedPIIStr := strings.Join(deniedPII, ",")

// Create OpenAI-compatible response format for PII violations
unixTimeStep := time.Now().Unix()
var responseBody []byte
var contentType string
var (
unixTimeStep = time.Now().Unix()
responseBody []byte
contentType string
)

if isStreaming {
// For streaming responses, use SSE format
contentType = "text/event-stream"

// Create streaming chunk with security violation message
streamChunk := map[string]interface{}{
"id": fmt.Sprintf("chatcmpl-pii-violation-%d", unixTimeStep),
"object": "chat.completion.chunk",
"created": unixTimeStep,
"model": model,
"choices": []map[string]interface{}{
streamChunk := openai.ChatCompletion{
ID: fmt.Sprintf("chatcmpl-pii-violation-%d", unixTimeStep),
Object: "chat.completion",
Created: unixTimeStep,
Model: model,
Choices: []openai.ChatCompletionChoice{
{
"index": 0,
"delta": map[string]interface{}{
"role": "assistant",
"content": fmt.Sprintf("I cannot process this request as it contains personally identifiable information (%v) that is not allowed for the '%s' model according to the configured privacy policy. Please remove any sensitive information and try again.", deniedPII, model),
Index: 0,
Message: openai.ChatCompletionMessage{
Role: consts.ASSISTANT,
Content: fmt.Sprintf("I cannot process this request as it contains personally identifiable information (%v) that is not allowed for the '%s' model according to the configured privacy policy. Please remove any sensitive information and try again.", deniedPII, model),
},
"finish_reason": "content_filter",
FinishReason: "content_filter",
},
},
}
Expand All @@ -71,7 +75,7 @@ func CreatePIIViolationResponse(model string, deniedPII []string, isStreaming bo
{
Index: 0,
Message: openai.ChatCompletionMessage{
Role: "assistant",
Role: consts.ASSISTANT,
Content: fmt.Sprintf("I cannot process this request as it contains personally identifiable information (%v) that is not allowed for the '%s' model according to the configured privacy policy. Please remove any sensitive information and try again.", deniedPII, model),
},
FinishReason: "content_filter",
Expand Down Expand Up @@ -137,29 +141,32 @@ func CreatePIIViolationResponse(model string, deniedPII []string, isStreaming bo

// CreateJailbreakViolationResponse creates an HTTP response for jailbreak detection violations
func CreateJailbreakViolationResponse(jailbreakType string, confidence float32, isStreaming bool) *ext_proc.ProcessingResponse {

// Create OpenAI-compatible response format for jailbreak violations
unixTimeStep := time.Now().Unix()
var responseBody []byte
var contentType string
var (
unixTimeStep = time.Now().Unix()
responseBody []byte
contentType string
)

if isStreaming {
// For streaming responses, use SSE format
contentType = "text/event-stream"

// Create streaming chunk with security violation message
streamChunk := map[string]interface{}{
"id": fmt.Sprintf("chatcmpl-jailbreak-blocked-%d", unixTimeStep),
"object": "chat.completion.chunk",
"created": unixTimeStep,
"model": "security-filter",
"choices": []map[string]interface{}{
streamChunk := openai.ChatCompletion{
ID: fmt.Sprintf("jailbreakcmpl-pii-violation-%d", unixTimeStep),
Object: "chat.completion",
Created: unixTimeStep,
Model: "security-filter",
Choices: []openai.ChatCompletionChoice{
{
"index": 0,
"delta": map[string]interface{}{
"role": "assistant",
"content": fmt.Sprintf("I cannot process this request as it appears to contain a potential jailbreak attempt (type: %s, confidence: %.3f). Please rephrase your request in a way that complies with our usage policies.", jailbreakType, confidence),
Index: 0,
Message: openai.ChatCompletionMessage{
Role: consts.ASSISTANT,
Content: fmt.Sprintf("I cannot process this request as it appears to contain a potential jailbreak attempt (type: %s, confidence: %.3f). Please rephrase your request in a way that complies with our usage policies.", jailbreakType, confidence),
},
"finish_reason": "content_filter",
FinishReason: "content_filter",
},
},
}
Expand All @@ -184,7 +191,7 @@ func CreateJailbreakViolationResponse(jailbreakType string, confidence float32,
{
Index: 0,
Message: openai.ChatCompletionMessage{
Role: "assistant",
Role: consts.ASSISTANT,
Content: fmt.Sprintf("I cannot process this request as it appears to contain a potential jailbreak attempt (type: %s, confidence: %.3f). Please rephrase your request in a way that complies with our usage policies.", jailbreakType, confidence),
},
FinishReason: "content_filter",
Expand Down Expand Up @@ -250,8 +257,11 @@ func CreateJailbreakViolationResponse(jailbreakType string, confidence float32,

// CreateCacheHitResponse creates an immediate response from cache
func CreateCacheHitResponse(cachedResponse []byte, isStreaming bool) *ext_proc.ProcessingResponse {
var responseBody []byte
var contentType string

var (
responseBody []byte
contentType string
)

if isStreaming {
// For streaming responses, convert cached JSON to SSE format
Expand All @@ -264,25 +274,26 @@ func CreateCacheHitResponse(cachedResponse []byte, isStreaming bool) *ext_proc.P
responseBody = []byte("data: {\"error\": \"Failed to convert cached response\"}\n\ndata: [DONE]\n\n")
} else {
// Convert chat.completion to chat.completion.chunk format
streamChunk := map[string]interface{}{
"id": cachedCompletion.ID,
"object": "chat.completion.chunk",
"created": cachedCompletion.Created,
"model": cachedCompletion.Model,
"choices": []map[string]interface{}{},
streamChunk := openai.ChatCompletion{
ID: cachedCompletion.ID,
Object: "chat.completion.chunk",
Created: cachedCompletion.Created,
Model: cachedCompletion.Model,
Choices: []openai.ChatCompletionChoice{},
}

// Convert choices from message format to delta format
for _, choice := range cachedCompletion.Choices {
streamChoice := map[string]interface{}{
"index": choice.Index,
"delta": map[string]interface{}{
"role": choice.Message.Role,
"content": choice.Message.Content,
streamChoice := &openai.ChatCompletionChoice{
Index: choice.Index,
Message: openai.ChatCompletionMessage{
Role: consts.ASSISTANT,
Content: choice.Message.Content, // Content will be in delta
},
"finish_reason": choice.FinishReason,
FinishReason: choice.FinishReason,
}
streamChunk["choices"] = append(streamChunk["choices"].([]map[string]interface{}), streamChoice)

streamChunk.Choices = append(streamChunk.Choices, *streamChoice)
}

chunkJSON, err := json.Marshal(streamChunk)
Expand Down
Loading