vllm-project
diff --git a/‎deploy/helm/semantic-router/values.yaml‎
Lines changed: 1 addition & 1 deletion b/‎deploy/helm/semantic-router/values.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎deploy/kubernetes/aibrix/semantic-router-values/values.yaml‎
Lines changed: 5 additions & 8 deletions b/‎deploy/kubernetes/aibrix/semantic-router-values/values.yaml‎
Lines changed: 5 additions & 8 deletions
diff --git a/‎e2e/profiles/ai-gateway/values.yaml‎
Lines changed: 7 additions & 10 deletions b/‎e2e/profiles/ai-gateway/values.yaml‎
Lines changed: 7 additions & 10 deletions
diff --git a/‎e2e/profiles/dynamic-config/profile.go‎
Lines changed: 4 additions & 10 deletions b/‎e2e/profiles/dynamic-config/profile.go‎
Lines changed: 4 additions & 10 deletions
diff --git a/‎e2e/profiles/dynamic-config/values.yaml‎
Lines changed: 4 additions & 5 deletions b/‎e2e/profiles/dynamic-config/values.yaml‎
Lines changed: 4 additions & 5 deletions
diff --git a/‎e2e/profiles/llm-d/values.yaml‎
Lines changed: 69 additions & 17 deletions b/‎e2e/profiles/llm-d/values.yaml‎
Lines changed: 69 additions & 17 deletions
diff --git a/‎e2e/profiles/production-stack/values.yaml‎
Lines changed: 3 additions & 4 deletions b/‎e2e/profiles/production-stack/values.yaml‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎e2e/profiles/routing-strategies/values.yaml‎
Lines changed: 2 additions & 2 deletions b/‎e2e/profiles/routing-strategies/values.yaml‎
Lines changed: 2 additions & 2 deletions
@@ -168,7 +168,7 @@ initContainer:
     - name: lora_intent_classifier_bert-base-uncased_model
       repo: LLM-Semantic-Router/lora_intent_classifier_bert-base-uncased_model
     - name: category_classifier_modernbert-base_model
-      repo: LLM-Semantic-Router/category_classifier_modernbert-base_model
+      repo: LLM-Semantic-Router/category_classifier_modernbert-base_trained_model
     - name: pii_classifier_modernbert-base_model
       repo: LLM-Semantic-Router/pii_classifier_modernbert-base_model
     - name: jailbreak_classifier_modernbert-base_model
 
@@ -123,8 +123,7 @@ config:
         - type: "pii"
           configuration:
             enabled: true
-            pii_types_allowed:
-              - "ORGANIZATION"  # Allow - scientific terms like "photosynthesis" falsely detected as ORG
+            pii_types_allowed: []
         - type: "system_prompt"
           configuration:
             enabled: true
@@ -190,8 +189,7 @@ config:
         - type: "pii"
           configuration:
             enabled: true
-            pii_types_allowed:
-              - "GPE"  # Allow - country/city names in general knowledge questions
+            pii_types_allowed: []
         - type: "semantic-cache"
           configuration:
             enabled: true
@@ -433,11 +431,11 @@ config:
   # Classifier configuration
   classifier:
     category_model:
-      model_id: "models/lora_intent_classifier_bert-base-uncased_model"
-      use_modernbert: false  # Use LoRA intent classifier with auto-detection
+      model_id: "models/category_classifier_modernbert-base_model"
+      use_modernbert: true
       threshold: 0.6
       use_cpu: true
-      category_mapping_path: "models/lora_intent_classifier_bert-base-uncased_model/category_mapping.json"
+      category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
     pii_model:
       # Support both traditional (modernbert) and LoRA-based PII detection
       # When model_type is "auto", the system will auto-detect LoRA configuration
@@ -560,4 +558,3 @@ config:
         service_name: "vllm-semantic-router"
         service_version: "v0.1.0"
         deployment_environment: "development"
-
@@ -142,8 +142,7 @@ config:
         - type: "pii"
           configuration:
             enabled: true
-            pii_types_allowed:
-              - "ORGANIZATION"  # Allow - scientific terms like "photosynthesis" falsely detected as ORG
+            pii_types_allowed: []
         - type: "system_prompt"
           configuration:
             enabled: true
@@ -446,12 +445,11 @@ config:
         - type: "pii"
           configuration:
             enabled: true
-            pii_types_allowed:
-              - "GPE"  # Allow - country/city names in general knowledge questions
+            pii_types_allowed: []
         - type: "semantic-cache"
           configuration:
             enabled: true
-            similarity_threshold: 0.75
+            similarity_threshold: 0.95  # High threshold to avoid false cache hits during testing
         - type: "system_prompt"
           configuration:
             enabled: true
@@ -472,7 +470,7 @@ config:
   semantic_cache:
     enabled: true
     backend_type: "memory"  # Options: "memory", "milvus", or "hybrid"
-    similarity_threshold: 0.8
+    similarity_threshold: 0.95  # High threshold during testing to avoid false cache hits
     max_entries: 1000  # Only applies to memory backend
     ttl_seconds: 3600
     eviction_policy: "fifo"
@@ -509,11 +507,11 @@ config:
   # Classifier configuration
   classifier:
     category_model:
-      model_id: "models/lora_intent_classifier_bert-base-uncased_model"
-      use_modernbert: false  # Use LoRA intent classifier with auto-detection
+      model_id: "models/category_classifier_modernbert-base_model"
+      use_modernbert: true
       threshold: 0.6
       use_cpu: true
-      category_mapping_path: "models/lora_intent_classifier_bert-base-uncased_model/category_mapping.json"
+      category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
     pii_model:
       # Support both traditional (modernbert) and LoRA-based PII detection
       # When model_type is "auto", the system will auto-detect LoRA configuration
@@ -646,4 +644,3 @@ config:
         service_name: "vllm-semantic-router"
         service_version: "v0.1.0"
         deployment_environment: "development"
-
@@ -108,20 +108,14 @@ func (p *Profile) GetTestCases() []string {
 		"chat-completions-request",
 		"chat-completions-stress-request",
 
-		// Classification and routing tests
-		"domain-classify",
-
 		// Feature tests
 		"semantic-cache",
-		"pii-detection",
-		"jailbreak-detection",
 
-		// Signal-Decision engine tests
-		"decision-priority-selection", // Priority-based routing
+		// Signal-Decision engine tests (CRD-specific)
+		// These tests validate the CRD-based routing approach:
+		"decision-priority-selection", // Priority-based routing between signals
 		"plugin-chain-execution",      // Plugin ordering and blocking
-		"rule-condition-logic",        // AND/OR operators
-		"decision-fallback-behavior",  // Fallback to default
-		"plugin-config-variations",    // Plugin configuration testing
+		"rule-condition-logic",        // AND/OR operators in signal conditions
 		"embedding-signal-routing",    // EmbeddingSignal-based semantic similarity routing
 
 		// Load tests
 
@@ -47,14 +47,14 @@ config:
 
   classifier:
     category_model:
-      model_id: "models/lora_intent_classifier_bert-base-uncased_model"
-      use_modernbert: false  # Use LoRA intent classifier with auto-detection
+      model_id: "models/category_classifier_modernbert-base_model"
+      use_modernbert: true
       threshold: 0.6
       use_cpu: true
-      category_mapping_path: "models/lora_intent_classifier_bert-base-uncased_model/category_mapping.json"
+      category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
     pii_model:
       model_id: "models/lora_pii_detector_bert-base-uncased_model"
-      use_modernbert: false  # Use LoRA PII model with auto-detection
+      use_modernbert: false  # Use LoRA PII model
       threshold: 0.9
       use_cpu: true
       pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
@@ -154,4 +154,3 @@ resources:
         service_name: "vllm-semantic-router"
         service_version: "v0.1.0"
         deployment_environment: "development"
-
@@ -8,10 +8,11 @@ config:
   # Using LoRA models for better performance with auto-detection
   classifier:
     category_model:
-      model_id: models/lora_intent_classifier_bert-base-uncased_model
+      model_id: "models/category_classifier_modernbert-base_model"
+      use_modernbert: true
       threshold: 0.6
-      use_modernbert: false
-      category_mapping_path: models/lora_intent_classifier_bert-base-uncased_model/category_mapping.json
+      use_cpu: true
+      category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
     pii_model:
       model_id: ""
       threshold: 1.0
@@ -49,8 +50,8 @@ config:
       description: "General knowledge and miscellaneous topics"
 
   decisions:
-    # High priority for math - use specialized model
-    - name: math_route
+    # Decision names match E2E test expectations (*_decision format)
+    - name: math_decision
       priority: 100
       rules:
         operator: OR
@@ -61,8 +62,7 @@ config:
         - model: phi4-mini
           use_reasoning: false
 
-    # High priority for computer science - use llama3
-    - name: cs_route
+    - name: computer_science_decision
       priority: 100
       rules:
         operator: OR
@@ -73,23 +73,40 @@ config:
         - model: llama3-8b
           use_reasoning: false
 
-    # Medium priority routes for other technical domains
-    - name: science_route
+    - name: physics_decision
       priority: 50
       rules:
         operator: OR
         conditions:
           - type: domain
             name: physics
+      modelRefs:
+        - model: llama3-8b
+          use_reasoning: false
+
+    - name: chemistry_decision
+      priority: 50
+      rules:
+        operator: OR
+        conditions:
           - type: domain
             name: chemistry
+      modelRefs:
+        - model: llama3-8b
+          use_reasoning: false
+
+    - name: biology_decision
+      priority: 50
+      rules:
+        operator: OR
+        conditions:
           - type: domain
             name: biology
       modelRefs:
         - model: llama3-8b
           use_reasoning: false
 
-    - name: engineering_route
+    - name: engineering_decision
       priority: 50
       rules:
         operator: OR
@@ -100,7 +117,7 @@ config:
         - model: llama3-8b
           use_reasoning: false
 
-    - name: health_route
+    - name: health_decision
       priority: 50
       rules:
         operator: OR
@@ -111,39 +128,74 @@ config:
         - model: llama3-8b
           use_reasoning: false
 
-    # Social sciences and humanities
-    - name: social_sciences_route
+    - name: psychology_decision
       priority: 40
       rules:
         operator: OR
         conditions:
           - type: domain
             name: psychology
+      modelRefs:
+        - model: llama3-8b
+          use_reasoning: false
+
+    - name: economics_decision
+      priority: 40
+      rules:
+        operator: OR
+        conditions:
           - type: domain
             name: economics
+      modelRefs:
+        - model: llama3-8b
+          use_reasoning: false
+
+    - name: business_decision
+      priority: 40
+      rules:
+        operator: OR
+        conditions:
           - type: domain
             name: business
       modelRefs:
         - model: llama3-8b
           use_reasoning: false
 
-    - name: humanities_route
+    - name: history_decision
       priority: 40
       rules:
         operator: OR
         conditions:
           - type: domain
             name: history
+      modelRefs:
+        - model: llama3-8b
+          use_reasoning: false
+
+    - name: philosophy_decision
+      priority: 40
+      rules:
+        operator: OR
+        conditions:
           - type: domain
             name: philosophy
+      modelRefs:
+        - model: llama3-8b
+          use_reasoning: false
+
+    - name: law_decision
+      priority: 40
+      rules:
+        operator: OR
+        conditions:
           - type: domain
             name: law
       modelRefs:
         - model: llama3-8b
           use_reasoning: false
 
-    # Default fallback route with lowest priority
-    - name: default_route
+    # Default fallback
+    - name: other_decision
       priority: 1
       rules:
         operator: OR
@@ -153,6 +205,7 @@ config:
       modelRefs:
         - model: llama3-8b
           use_reasoning: false
+
   semantic_cache:
     enabled: false
   prompt_guard:
@@ -164,6 +217,5 @@ config:
     threshold: 0.6
     use_cpu: true
 
-# Keep consistent with the default chart: initContainer, model downloads, and PVC use chart defaults
 image:
   pullPolicy: IfNotPresent
@@ -27,11 +27,11 @@ config:
   # Using LoRA models for better performance with auto-detection
   classifier:
     category_model:
-      model_id: models/lora_intent_classifier_bert-base-uncased_model
-      use_modernbert: false
+      model_id: models/category_classifier_modernbert-base_model
+      use_modernbert: true
       threshold: 0.6
       use_cpu: true
-      category_mapping_path: models/lora_intent_classifier_bert-base-uncased_model/category_mapping.json
+      category_mapping_path: models/category_classifier_modernbert-base_model/category_mapping.json
     pii_model:
       # Required for pii-detection test
       model_id: models/lora_pii_detector_bert-base-uncased_model
@@ -398,6 +398,5 @@ config:
     tracing:
       enabled: false
 
-# Keep consistent with the default chart: initContainer, model downloads, and PVC use chart defaults
 image:
   pullPolicy: IfNotPresent
@@ -46,8 +46,8 @@ config:
       use_cpu: true
       category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
     pii_model:
-      model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
-      use_modernbert: true
+      model_id: "models/lora_pii_detector_bert-base-uncased_model"
+      use_modernbert: false  # Use LoRA PII model
       threshold: 0.7
       use_cpu: true
       pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"