vllm-project · rootfs · Dec 17, 2025 · Dec 3, 2025
@@ -3,10 +3,10 @@
 # Declare variables to be passed into your templates.

 # Global settings
 global:
  # -- Namespace for all resources (if not specified, uses Release.Namespace)
  namespace: ""
  # -- Optional registry prefix applied to all images (e.g., mirror in China such as registry.cn-hangzhou.aliyuncs.com)
  imageRegistry: ""

 # -- Number of replicas for the deployment
@@ -49,7 +49,7 @@

 # Pod security context
 podSecurityContext: {}
  # fsGroup: 2000

 # Container security context
 securityContext:
@@ -102,7 +102,7 @@
  className: ""
  # -- Ingress annotations
  annotations: {}
    # kubernetes.io/ingress.class: nginx
    # kubernetes.io/tls-acme: "true"
  # -- Ingress hosts configuration
  hosts:
@@ -135,7 +135,7 @@
  # -- Init container image
  image:
    repository: ghcr.io/vllm-project/semantic-router/model-downloader
    # Leave empty to default to the chart AppVersion; override with a pinned tag if desired
    tag: ""
    pullPolicy: IfNotPresent
  # -- Resource limits for init container
@@ -168,13 +168,13 @@
     - name: lora_intent_classifier_bert-base-uncased_model
       repo: LLM-Semantic-Router/lora_intent_classifier_bert-base-uncased_model
     - name: category_classifier_modernbert-base_model
-      repo: LLM-Semantic-Router/category_classifier_modernbert-base_model
+      repo: LLM-Semantic-Router/category_classifier_modernbert-base_trained_model
     - name: pii_classifier_modernbert-base_model
       repo: LLM-Semantic-Router/pii_classifier_modernbert-base_model
     - name: jailbreak_classifier_modernbert-base_model
      repo: LLM-Semantic-Router/jailbreak_classifier_modernbert-base_model
    - name: pii_classifier_modernbert-base_presidio_token_model
      repo: LLM-Semantic-Router/pii_classifier_modernbert-base_presidio_token_model
    # LoRA PII detector (for auto-detection feature)
    - name: lora_pii_detector_bert-base-uncased_model
      repo: LLM-Semantic-Router/lora_pii_detector_bert-base-uncased_model
@@ -253,7 +253,7 @@
  size: 10Gi
  # -- Annotations for PVC
  annotations: {}
  # -- Existing claim name (if provided, will use existing PVC instead of creating new one)
  existingClaim: ""

 # Application configuration
@@ -288,7 +288,7 @@
    model_id: "models/jailbreak_classifier_modernbert-base_model"
    threshold: 0.7
    use_cpu: true
    jailbreak_mapping_path: "models/jailbreak_classifier_modernbert-base_model/jailbreak_type_mapping.json"

  # Classifier configuration
  classifier:
@@ -297,13 +297,13 @@
      use_modernbert: false  # Use LoRA intent classifier with auto-detection
      threshold: 0.6
      use_cpu: true
      category_mapping_path: "models/lora_intent_classifier_bert-base-uncased_model/category_mapping.json"
    pii_model:
      model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
      use_modernbert: true
      threshold: 0.7
      use_cpu: true
      pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"

  # Reasoning families
  reasoning_families:
@@ -334,7 +334,7 @@
        detailed_goroutine_tracking: true
        high_resolution_timing: false
        sample_rate: 1.0
        duration_buckets: [0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30]
        size_buckets: [1, 2, 5, 10, 20, 50, 100, 200]

  # Observability configuration
@@ -372,7 +372,7 @@
              enum: ["celsius", "fahrenheit"]
              description: "Temperature unit"
          required: ["location"]
    description: "Get current weather information, temperature, conditions, forecast for any location, city, or place. Check weather today, now, current conditions, temperature, rain, sun, cloudy, hot, cold, storm, snow"
    category: "weather"
    tags: ["weather", "temperature", "forecast", "climate"]
  - tool:

@@ -123,8 +123,7 @@ config:
         - type: "pii"
           configuration:
             enabled: true
-            pii_types_allowed:
-              - "ORGANIZATION"  # Allow - scientific terms like "photosynthesis" falsely detected as ORG
+            pii_types_allowed: []
         - type: "system_prompt"
           configuration:
             enabled: true
@@ -190,8 +189,7 @@ config:
         - type: "pii"
           configuration:
             enabled: true
-            pii_types_allowed:
-              - "GPE"  # Allow - country/city names in general knowledge questions
+            pii_types_allowed: []
         - type: "semantic-cache"
           configuration:
             enabled: true
@@ -433,11 +431,11 @@ config:
   # Classifier configuration
   classifier:
     category_model:
-      model_id: "models/lora_intent_classifier_bert-base-uncased_model"
-      use_modernbert: false  # Use LoRA intent classifier with auto-detection
+      model_id: "models/category_classifier_modernbert-base_model"
+      use_modernbert: true
       threshold: 0.6
       use_cpu: true
-      category_mapping_path: "models/lora_intent_classifier_bert-base-uncased_model/category_mapping.json"
+      category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
     pii_model:
       # Support both traditional (modernbert) and LoRA-based PII detection
       # When model_type is "auto", the system will auto-detect LoRA configuration
@@ -560,4 +558,3 @@ config:
         service_name: "vllm-semantic-router"
         service_version: "v0.1.0"
         deployment_environment: "development"
-
@@ -142,8 +142,7 @@ config:
         - type: "pii"
           configuration:
             enabled: true
-            pii_types_allowed:
-              - "ORGANIZATION"  # Allow - scientific terms like "photosynthesis" falsely detected as ORG
+            pii_types_allowed: []
         - type: "system_prompt"
           configuration:
             enabled: true
@@ -446,12 +445,11 @@ config:
         - type: "pii"
           configuration:
             enabled: true
-            pii_types_allowed:
-              - "GPE"  # Allow - country/city names in general knowledge questions
+            pii_types_allowed: []
         - type: "semantic-cache"
           configuration:
             enabled: true
-            similarity_threshold: 0.75
+            similarity_threshold: 0.95  # High threshold to avoid false cache hits during testing
         - type: "system_prompt"
           configuration:
             enabled: true
@@ -472,7 +470,7 @@ config:
   semantic_cache:
     enabled: true
     backend_type: "memory"  # Options: "memory", "milvus", or "hybrid"
-    similarity_threshold: 0.8
+    similarity_threshold: 0.95  # High threshold during testing to avoid false cache hits
     max_entries: 1000  # Only applies to memory backend
     ttl_seconds: 3600
     eviction_policy: "fifo"
@@ -509,11 +507,11 @@ config:
   # Classifier configuration
   classifier:
     category_model:
-      model_id: "models/lora_intent_classifier_bert-base-uncased_model"
-      use_modernbert: false  # Use LoRA intent classifier with auto-detection
+      model_id: "models/category_classifier_modernbert-base_model"
+      use_modernbert: true
       threshold: 0.6
       use_cpu: true
-      category_mapping_path: "models/lora_intent_classifier_bert-base-uncased_model/category_mapping.json"
+      category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
     pii_model:
       # Support both traditional (modernbert) and LoRA-based PII detection
       # When model_type is "auto", the system will auto-detect LoRA configuration
@@ -646,4 +644,3 @@ config:
         service_name: "vllm-semantic-router"
         service_version: "v0.1.0"
         deployment_environment: "development"
-
@@ -108,20 +108,14 @@ func (p *Profile) GetTestCases() []string {
 		"chat-completions-request",
 		"chat-completions-stress-request",
 
-		// Classification and routing tests
-		"domain-classify",
-
 		// Feature tests
 		"semantic-cache",
-		"pii-detection",
-		"jailbreak-detection",
 
-		// Signal-Decision engine tests
-		"decision-priority-selection", // Priority-based routing
+		// Signal-Decision engine tests (CRD-specific)
+		// These tests validate the CRD-based routing approach:
+		"decision-priority-selection", // Priority-based routing between signals
 		"plugin-chain-execution",      // Plugin ordering and blocking
-		"rule-condition-logic",        // AND/OR operators
-		"decision-fallback-behavior",  // Fallback to default
-		"plugin-config-variations",    // Plugin configuration testing
+		"rule-condition-logic",        // AND/OR operators in signal conditions
 		"embedding-signal-routing",    // EmbeddingSignal-based semantic similarity routing
 
 		// Load tests

@@ -47,14 +47,14 @@ config:
 
   classifier:
     category_model:
-      model_id: "models/lora_intent_classifier_bert-base-uncased_model"
-      use_modernbert: false  # Use LoRA intent classifier with auto-detection
+      model_id: "models/category_classifier_modernbert-base_model"
+      use_modernbert: true
       threshold: 0.6
       use_cpu: true
-      category_mapping_path: "models/lora_intent_classifier_bert-base-uncased_model/category_mapping.json"
+      category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
     pii_model:
       model_id: "models/lora_pii_detector_bert-base-uncased_model"
-      use_modernbert: false  # Use LoRA PII model with auto-detection
+      use_modernbert: false  # Use LoRA PII model
       threshold: 0.9
       use_cpu: true
       pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
@@ -154,4 +154,3 @@ resources:
         service_name: "vllm-semantic-router"
         service_version: "v0.1.0"
         deployment_environment: "development"
-
@@ -8,10 +8,11 @@ config:
   # Using LoRA models for better performance with auto-detection
   classifier:
     category_model:
-      model_id: models/lora_intent_classifier_bert-base-uncased_model
+      model_id: "models/category_classifier_modernbert-base_model"
+      use_modernbert: true
       threshold: 0.6
-      use_modernbert: false
-      category_mapping_path: models/lora_intent_classifier_bert-base-uncased_model/category_mapping.json
+      use_cpu: true
+      category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
     pii_model:
       model_id: ""
       threshold: 1.0
@@ -49,8 +50,8 @@ config:
       description: "General knowledge and miscellaneous topics"
 
   decisions:
-    # High priority for math - use specialized model
-    - name: math_route
+    # Decision names match E2E test expectations (*_decision format)
+    - name: math_decision
       priority: 100
       rules:
         operator: OR
@@ -61,8 +62,7 @@ config:
         - model: phi4-mini
           use_reasoning: false
 
-    # High priority for computer science - use llama3
-    - name: cs_route
+    - name: computer_science_decision
       priority: 100
       rules:
         operator: OR
@@ -73,23 +73,40 @@ config:
         - model: llama3-8b
           use_reasoning: false
 
-    # Medium priority routes for other technical domains
-    - name: science_route
+    - name: physics_decision
       priority: 50
       rules:
         operator: OR
         conditions:
           - type: domain
             name: physics
+      modelRefs:
+        - model: llama3-8b
+          use_reasoning: false
+
+    - name: chemistry_decision
+      priority: 50
+      rules:
+        operator: OR
+        conditions:
           - type: domain
             name: chemistry
+      modelRefs:
+        - model: llama3-8b
+          use_reasoning: false
+
+    - name: biology_decision
+      priority: 50
+      rules:
+        operator: OR
+        conditions:
           - type: domain
             name: biology
       modelRefs:
         - model: llama3-8b
           use_reasoning: false
 
-    - name: engineering_route
+    - name: engineering_decision
       priority: 50
       rules:
         operator: OR
@@ -100,7 +117,7 @@ config:
         - model: llama3-8b
           use_reasoning: false
 
-    - name: health_route
+    - name: health_decision
       priority: 50
       rules:
         operator: OR
@@ -111,39 +128,74 @@ config:
         - model: llama3-8b
           use_reasoning: false
 
-    # Social sciences and humanities
-    - name: social_sciences_route
+    - name: psychology_decision
       priority: 40
       rules:
         operator: OR
         conditions:
           - type: domain
             name: psychology
+      modelRefs:
+        - model: llama3-8b
+          use_reasoning: false
+
+    - name: economics_decision
+      priority: 40
+      rules:
+        operator: OR
+        conditions:
           - type: domain
             name: economics
+      modelRefs:
+        - model: llama3-8b
+          use_reasoning: false
+
+    - name: business_decision
+      priority: 40
+      rules:
+        operator: OR
+        conditions:
           - type: domain
             name: business
       modelRefs:
         - model: llama3-8b
           use_reasoning: false
 
-    - name: humanities_route
+    - name: history_decision
       priority: 40
       rules:
         operator: OR
         conditions:
           - type: domain
             name: history
+      modelRefs:
+        - model: llama3-8b
+          use_reasoning: false
+
+    - name: philosophy_decision
+      priority: 40
+      rules:
+        operator: OR
+        conditions:
           - type: domain
             name: philosophy
+      modelRefs:
+        - model: llama3-8b
+          use_reasoning: false
+
+    - name: law_decision
+      priority: 40
+      rules:
+        operator: OR
+        conditions:
           - type: domain
             name: law
       modelRefs:
         - model: llama3-8b
           use_reasoning: false
 
-    # Default fallback route with lowest priority
-    - name: default_route
+    # Default fallback
+    - name: other_decision
       priority: 1
       rules:
         operator: OR
@@ -153,6 +205,7 @@ config:
       modelRefs:
         - model: llama3-8b
           use_reasoning: false
+
   semantic_cache:
     enabled: false
   prompt_guard:

@@ -27,11 +27,11 @@ config:
   # Using LoRA models for better performance with auto-detection
   classifier:
     category_model:
-      model_id: models/lora_intent_classifier_bert-base-uncased_model
-      use_modernbert: false
+      model_id: models/category_classifier_modernbert-base_model
+      use_modernbert: true
       threshold: 0.6
       use_cpu: true
-      category_mapping_path: models/lora_intent_classifier_bert-base-uncased_model/category_mapping.json
+      category_mapping_path: models/category_classifier_modernbert-base_model/category_mapping.json
     pii_model:
       # Required for pii-detection test
       model_id: models/lora_pii_detector_bert-base-uncased_model

@@ -46,8 +46,8 @@ config:
       use_cpu: true
       category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
     pii_model:
-      model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
-      use_modernbert: true
+      model_id: "models/lora_pii_detector_bert-base-uncased_model"
+      use_modernbert: false  # Use LoRA PII model
       threshold: 0.7
       use_cpu: true
       pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"