Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion deploy/helm/semantic-router/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
# Declare variables to be passed into your templates.

# Global settings
global:

Check warning on line 6 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

6:1 [document-start] missing document start "---"
# -- Namespace for all resources (if not specified, uses Release.Namespace)
namespace: ""
# -- Optional registry prefix applied to all images (e.g., mirror in China such as registry.cn-hangzhou.aliyuncs.com)

Check failure on line 9 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

9:81 [line-length] line too long (119 > 80 characters)
imageRegistry: ""

# -- Number of replicas for the deployment
Expand Down Expand Up @@ -49,7 +49,7 @@

# Pod security context
podSecurityContext: {}
# fsGroup: 2000

Check warning on line 52 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

52:3 [comments-indentation] comment not indented like content

# Container security context
securityContext:
Expand Down Expand Up @@ -102,7 +102,7 @@
className: ""
# -- Ingress annotations
annotations: {}
# kubernetes.io/ingress.class: nginx

Check warning on line 105 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

105:5 [comments-indentation] comment not indented like content
# kubernetes.io/tls-acme: "true"
# -- Ingress hosts configuration
hosts:
Expand Down Expand Up @@ -135,7 +135,7 @@
# -- Init container image
image:
repository: ghcr.io/vllm-project/semantic-router/model-downloader
# Leave empty to default to the chart AppVersion; override with a pinned tag if desired

Check failure on line 138 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

138:81 [line-length] line too long (91 > 80 characters)
tag: ""
pullPolicy: IfNotPresent
# -- Resource limits for init container
Expand Down Expand Up @@ -168,13 +168,13 @@
- name: lora_intent_classifier_bert-base-uncased_model
repo: LLM-Semantic-Router/lora_intent_classifier_bert-base-uncased_model
- name: category_classifier_modernbert-base_model
repo: LLM-Semantic-Router/category_classifier_modernbert-base_model
repo: LLM-Semantic-Router/category_classifier_modernbert-base_trained_model

Check failure on line 171 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

171:81 [line-length] line too long (81 > 80 characters)
- name: pii_classifier_modernbert-base_model
repo: LLM-Semantic-Router/pii_classifier_modernbert-base_model
- name: jailbreak_classifier_modernbert-base_model
repo: LLM-Semantic-Router/jailbreak_classifier_modernbert-base_model
- name: pii_classifier_modernbert-base_presidio_token_model
repo: LLM-Semantic-Router/pii_classifier_modernbert-base_presidio_token_model

Check failure on line 177 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

177:81 [line-length] line too long (83 > 80 characters)
# LoRA PII detector (for auto-detection feature)
- name: lora_pii_detector_bert-base-uncased_model
repo: LLM-Semantic-Router/lora_pii_detector_bert-base-uncased_model
Expand Down Expand Up @@ -253,7 +253,7 @@
size: 10Gi
# -- Annotations for PVC
annotations: {}
# -- Existing claim name (if provided, will use existing PVC instead of creating new one)

Check failure on line 256 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

256:81 [line-length] line too long (91 > 80 characters)
existingClaim: ""

# Application configuration
Expand Down Expand Up @@ -288,7 +288,7 @@
model_id: "models/jailbreak_classifier_modernbert-base_model"
threshold: 0.7
use_cpu: true
jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json"

Check failure on line 291 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

291:81 [line-length] line too long (107 > 80 characters)

# Classifier configuration
classifier:
Expand All @@ -297,13 +297,13 @@
use_modernbert: false # Use LoRA intent classifier with auto-detection
threshold: 0.6
use_cpu: true
category_mapping_path: "models/lora_intent_classifier_bert-base-uncased_model/category_mapping.json"

Check failure on line 300 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

300:81 [line-length] line too long (106 > 80 characters)
pii_model:
model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
use_modernbert: true
threshold: 0.7
use_cpu: true
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"

Check failure on line 306 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

306:81 [line-length] line too long (106 > 80 characters)

# Reasoning families
reasoning_families:
Expand Down Expand Up @@ -334,7 +334,7 @@
detailed_goroutine_tracking: true
high_resolution_timing: false
sample_rate: 1.0
duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]

Check failure on line 337 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

337:81 [line-length] line too long (94 > 80 characters)
size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]

# Observability configuration
Expand Down Expand Up @@ -372,7 +372,7 @@
enum: ["celsius", "fahrenheit"]
description: "Temperature unit"
required: ["location"]
description: "Get current weather information, temperature, conditions, forecast for any location, city, or place. Check weather today, now, current conditions, temperature, rain, sun, cloudy, hot, cold, storm, snow"

Check failure on line 375 in deploy/helm/semantic-router/values.yaml

View workflow job for this annotation

GitHub Actions / Run Validation Script

375:81 [line-length] line too long (220 > 80 characters)
category: "weather"
tags: ["weather", "temperature", "forecast", "climate"]
- tool:
Expand Down
13 changes: 5 additions & 8 deletions deploy/kubernetes/aibrix/semantic-router-values/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,7 @@ config:
- type: "pii"
configuration:
enabled: true
pii_types_allowed:
- "ORGANIZATION" # Allow - scientific terms like "photosynthesis" falsely detected as ORG
pii_types_allowed: []
- type: "system_prompt"
configuration:
enabled: true
Expand Down Expand Up @@ -190,8 +189,7 @@ config:
- type: "pii"
configuration:
enabled: true
pii_types_allowed:
- "GPE" # Allow - country/city names in general knowledge questions
pii_types_allowed: []
- type: "semantic-cache"
configuration:
enabled: true
Expand Down Expand Up @@ -433,11 +431,11 @@ config:
# Classifier configuration
classifier:
category_model:
model_id: "models/lora_intent_classifier_bert-base-uncased_model"
use_modernbert: false # Use LoRA intent classifier with auto-detection
model_id: "models/category_classifier_modernbert-base_model"
use_modernbert: true
threshold: 0.6
use_cpu: true
category_mapping_path: "models/lora_intent_classifier_bert-base-uncased_model/category_mapping.json"
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
pii_model:
# Support both traditional (modernbert) and LoRA-based PII detection
# When model_type is "auto", the system will auto-detect LoRA configuration
Expand Down Expand Up @@ -560,4 +558,3 @@ config:
service_name: "vllm-semantic-router"
service_version: "v0.1.0"
deployment_environment: "development"

17 changes: 7 additions & 10 deletions e2e/profiles/ai-gateway/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,7 @@ config:
- type: "pii"
configuration:
enabled: true
pii_types_allowed:
- "ORGANIZATION" # Allow - scientific terms like "photosynthesis" falsely detected as ORG
pii_types_allowed: []
- type: "system_prompt"
configuration:
enabled: true
Expand Down Expand Up @@ -446,12 +445,11 @@ config:
- type: "pii"
configuration:
enabled: true
pii_types_allowed:
- "GPE" # Allow - country/city names in general knowledge questions
pii_types_allowed: []
- type: "semantic-cache"
configuration:
enabled: true
similarity_threshold: 0.75
similarity_threshold: 0.95 # High threshold to avoid false cache hits during testing
- type: "system_prompt"
configuration:
enabled: true
Expand All @@ -472,7 +470,7 @@ config:
semantic_cache:
enabled: true
backend_type: "memory" # Options: "memory", "milvus", or "hybrid"
similarity_threshold: 0.8
similarity_threshold: 0.95 # High threshold during testing to avoid false cache hits
max_entries: 1000 # Only applies to memory backend
ttl_seconds: 3600
eviction_policy: "fifo"
Expand Down Expand Up @@ -509,11 +507,11 @@ config:
# Classifier configuration
classifier:
category_model:
model_id: "models/lora_intent_classifier_bert-base-uncased_model"
use_modernbert: false # Use LoRA intent classifier with auto-detection
model_id: "models/category_classifier_modernbert-base_model"
use_modernbert: true
threshold: 0.6
use_cpu: true
category_mapping_path: "models/lora_intent_classifier_bert-base-uncased_model/category_mapping.json"
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
pii_model:
# Support both traditional (modernbert) and LoRA-based PII detection
# When model_type is "auto", the system will auto-detect LoRA configuration
Expand Down Expand Up @@ -646,4 +644,3 @@ config:
service_name: "vllm-semantic-router"
service_version: "v0.1.0"
deployment_environment: "development"

14 changes: 4 additions & 10 deletions e2e/profiles/dynamic-config/profile.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,20 +108,14 @@ func (p *Profile) GetTestCases() []string {
"chat-completions-request",
"chat-completions-stress-request",

// Classification and routing tests
"domain-classify",

// Feature tests
"semantic-cache",
"pii-detection",
"jailbreak-detection",

// Signal-Decision engine tests
"decision-priority-selection", // Priority-based routing
// Signal-Decision engine tests (CRD-specific)
// These tests validate the CRD-based routing approach:
"decision-priority-selection", // Priority-based routing between signals
"plugin-chain-execution", // Plugin ordering and blocking
"rule-condition-logic", // AND/OR operators
"decision-fallback-behavior", // Fallback to default
"plugin-config-variations", // Plugin configuration testing
"rule-condition-logic", // AND/OR operators in signal conditions
"embedding-signal-routing", // EmbeddingSignal-based semantic similarity routing

// Load tests
Expand Down
9 changes: 4 additions & 5 deletions e2e/profiles/dynamic-config/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,14 @@ config:

classifier:
category_model:
model_id: "models/lora_intent_classifier_bert-base-uncased_model"
use_modernbert: false # Use LoRA intent classifier with auto-detection
model_id: "models/category_classifier_modernbert-base_model"
use_modernbert: true
threshold: 0.6
use_cpu: true
category_mapping_path: "models/lora_intent_classifier_bert-base-uncased_model/category_mapping.json"
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
pii_model:
model_id: "models/lora_pii_detector_bert-base-uncased_model"
use_modernbert: false # Use LoRA PII model with auto-detection
use_modernbert: false # Use LoRA PII model
threshold: 0.9
use_cpu: true
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
Expand Down Expand Up @@ -154,4 +154,3 @@ resources:
service_name: "vllm-semantic-router"
service_version: "v0.1.0"
deployment_environment: "development"

85 changes: 69 additions & 16 deletions e2e/profiles/llm-d/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@ config:
# Using LoRA models for better performance with auto-detection
classifier:
category_model:
model_id: models/lora_intent_classifier_bert-base-uncased_model
model_id: "models/category_classifier_modernbert-base_model"
use_modernbert: true
threshold: 0.6
use_modernbert: false
category_mapping_path: models/lora_intent_classifier_bert-base-uncased_model/category_mapping.json
use_cpu: true
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
pii_model:
model_id: ""
threshold: 1.0
Expand Down Expand Up @@ -49,8 +50,8 @@ config:
description: "General knowledge and miscellaneous topics"

decisions:
# High priority for math - use specialized model
- name: math_route
# Decision names match E2E test expectations (*_decision format)
- name: math_decision
priority: 100
rules:
operator: OR
Expand All @@ -61,8 +62,7 @@ config:
- model: phi4-mini
use_reasoning: false

# High priority for computer science - use llama3
- name: cs_route
- name: computer_science_decision
priority: 100
rules:
operator: OR
Expand All @@ -73,23 +73,40 @@ config:
- model: llama3-8b
use_reasoning: false

# Medium priority routes for other technical domains
- name: science_route
- name: physics_decision
priority: 50
rules:
operator: OR
conditions:
- type: domain
name: physics
modelRefs:
- model: llama3-8b
use_reasoning: false

- name: chemistry_decision
priority: 50
rules:
operator: OR
conditions:
- type: domain
name: chemistry
modelRefs:
- model: llama3-8b
use_reasoning: false

- name: biology_decision
priority: 50
rules:
operator: OR
conditions:
- type: domain
name: biology
modelRefs:
- model: llama3-8b
use_reasoning: false

- name: engineering_route
- name: engineering_decision
priority: 50
rules:
operator: OR
Expand All @@ -100,7 +117,7 @@ config:
- model: llama3-8b
use_reasoning: false

- name: health_route
- name: health_decision
priority: 50
rules:
operator: OR
Expand All @@ -111,39 +128,74 @@ config:
- model: llama3-8b
use_reasoning: false

# Social sciences and humanities
- name: social_sciences_route
- name: psychology_decision
priority: 40
rules:
operator: OR
conditions:
- type: domain
name: psychology
modelRefs:
- model: llama3-8b
use_reasoning: false

- name: economics_decision
priority: 40
rules:
operator: OR
conditions:
- type: domain
name: economics
modelRefs:
- model: llama3-8b
use_reasoning: false

- name: business_decision
priority: 40
rules:
operator: OR
conditions:
- type: domain
name: business
modelRefs:
- model: llama3-8b
use_reasoning: false

- name: humanities_route
- name: history_decision
priority: 40
rules:
operator: OR
conditions:
- type: domain
name: history
modelRefs:
- model: llama3-8b
use_reasoning: false

- name: philosophy_decision
priority: 40
rules:
operator: OR
conditions:
- type: domain
name: philosophy
modelRefs:
- model: llama3-8b
use_reasoning: false

- name: law_decision
priority: 40
rules:
operator: OR
conditions:
- type: domain
name: law
modelRefs:
- model: llama3-8b
use_reasoning: false

# Default fallback route with lowest priority
- name: default_route
# Default fallback
- name: other_decision
priority: 1
rules:
operator: OR
Expand All @@ -153,6 +205,7 @@ config:
modelRefs:
- model: llama3-8b
use_reasoning: false

semantic_cache:
enabled: false
prompt_guard:
Expand Down
6 changes: 3 additions & 3 deletions e2e/profiles/production-stack/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ config:
# Using LoRA models for better performance with auto-detection
classifier:
category_model:
model_id: models/lora_intent_classifier_bert-base-uncased_model
use_modernbert: false
model_id: models/category_classifier_modernbert-base_model
use_modernbert: true
threshold: 0.6
use_cpu: true
category_mapping_path: models/lora_intent_classifier_bert-base-uncased_model/category_mapping.json
category_mapping_path: models/category_classifier_modernbert-base_model/category_mapping.json
pii_model:
# Required for pii-detection test
model_id: models/lora_pii_detector_bert-base-uncased_model
Expand Down
4 changes: 2 additions & 2 deletions e2e/profiles/routing-strategies/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ config:
use_cpu: true
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
pii_model:
model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
use_modernbert: true
model_id: "models/lora_pii_detector_bert-base-uncased_model"
use_modernbert: false # Use LoRA PII model
threshold: 0.7
use_cpu: true
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
Expand Down
Loading
Loading