Skip to content

Commit 77588d8

Browse files
author
Yehudit Kerido
committed
Fix Domain Classifier Returns Empty or Wrong Classifications
Signed-off-by: Yehudit Kerido <[email protected]>
1 parent 7d7ac5d commit 77588d8

File tree

22 files changed

+1676
-259
lines changed

22 files changed

+1676
-259
lines changed

deploy/helm/semantic-router/values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ initContainer:
168168
- name: lora_intent_classifier_bert-base-uncased_model
169169
repo: LLM-Semantic-Router/lora_intent_classifier_bert-base-uncased_model
170170
- name: category_classifier_modernbert-base_model
171-
repo: LLM-Semantic-Router/category_classifier_modernbert-base_model
171+
repo: LLM-Semantic-Router/category_classifier_modernbert-base_trained_model
172172
- name: pii_classifier_modernbert-base_model
173173
repo: LLM-Semantic-Router/pii_classifier_modernbert-base_model
174174
- name: jailbreak_classifier_modernbert-base_model

deploy/kubernetes/aibrix/semantic-router-values/values.yaml

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,7 @@ config:
123123
- type: "pii"
124124
configuration:
125125
enabled: true
126-
pii_types_allowed:
127-
- "ORGANIZATION" # Allow - scientific terms like "photosynthesis" falsely detected as ORG
126+
pii_types_allowed: []
128127
- type: "system_prompt"
129128
configuration:
130129
enabled: true
@@ -190,8 +189,7 @@ config:
190189
- type: "pii"
191190
configuration:
192191
enabled: true
193-
pii_types_allowed:
194-
- "GPE" # Allow - country/city names in general knowledge questions
192+
pii_types_allowed: []
195193
- type: "semantic-cache"
196194
configuration:
197195
enabled: true
@@ -433,11 +431,11 @@ config:
433431
# Classifier configuration
434432
classifier:
435433
category_model:
436-
model_id: "models/lora_intent_classifier_bert-base-uncased_model"
437-
use_modernbert: false # Use LoRA intent classifier with auto-detection
434+
model_id: "models/category_classifier_modernbert-base_model"
435+
use_modernbert: true
438436
threshold: 0.6
439437
use_cpu: true
440-
category_mapping_path: "models/lora_intent_classifier_bert-base-uncased_model/category_mapping.json"
438+
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
441439
pii_model:
442440
# Support both traditional (modernbert) and LoRA-based PII detection
443441
# When model_type is "auto", the system will auto-detect LoRA configuration
@@ -560,4 +558,3 @@ config:
560558
service_name: "vllm-semantic-router"
561559
service_version: "v0.1.0"
562560
deployment_environment: "development"
563-

e2e/profiles/ai-gateway/values.yaml

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,7 @@ config:
142142
- type: "pii"
143143
configuration:
144144
enabled: true
145-
pii_types_allowed:
146-
- "ORGANIZATION" # Allow - scientific terms like "photosynthesis" falsely detected as ORG
145+
pii_types_allowed: []
147146
- type: "system_prompt"
148147
configuration:
149148
enabled: true
@@ -446,12 +445,11 @@ config:
446445
- type: "pii"
447446
configuration:
448447
enabled: true
449-
pii_types_allowed:
450-
- "GPE" # Allow - country/city names in general knowledge questions
448+
pii_types_allowed: []
451449
- type: "semantic-cache"
452450
configuration:
453451
enabled: true
454-
similarity_threshold: 0.75
452+
similarity_threshold: 0.95 # High threshold to avoid false cache hits during testing
455453
- type: "system_prompt"
456454
configuration:
457455
enabled: true
@@ -472,7 +470,7 @@ config:
472470
semantic_cache:
473471
enabled: true
474472
backend_type: "memory" # Options: "memory", "milvus", or "hybrid"
475-
similarity_threshold: 0.8
473+
similarity_threshold: 0.95 # High threshold during testing to avoid false cache hits
476474
max_entries: 1000 # Only applies to memory backend
477475
ttl_seconds: 3600
478476
eviction_policy: "fifo"
@@ -509,11 +507,11 @@ config:
509507
# Classifier configuration
510508
classifier:
511509
category_model:
512-
model_id: "models/lora_intent_classifier_bert-base-uncased_model"
513-
use_modernbert: false # Use LoRA intent classifier with auto-detection
510+
model_id: "models/category_classifier_modernbert-base_model"
511+
use_modernbert: true
514512
threshold: 0.6
515513
use_cpu: true
516-
category_mapping_path: "models/lora_intent_classifier_bert-base-uncased_model/category_mapping.json"
514+
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
517515
pii_model:
518516
# Support both traditional (modernbert) and LoRA-based PII detection
519517
# When model_type is "auto", the system will auto-detect LoRA configuration
@@ -646,4 +644,3 @@ config:
646644
service_name: "vllm-semantic-router"
647645
service_version: "v0.1.0"
648646
deployment_environment: "development"
649-

e2e/profiles/dynamic-config/profile.go

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -108,20 +108,14 @@ func (p *Profile) GetTestCases() []string {
108108
"chat-completions-request",
109109
"chat-completions-stress-request",
110110

111-
// Classification and routing tests
112-
"domain-classify",
113-
114111
// Feature tests
115112
"semantic-cache",
116-
"pii-detection",
117-
"jailbreak-detection",
118113

119-
// Signal-Decision engine tests
120-
"decision-priority-selection", // Priority-based routing
114+
// Signal-Decision engine tests (CRD-specific)
115+
// These tests validate the CRD-based routing approach:
116+
"decision-priority-selection", // Priority-based routing between signals
121117
"plugin-chain-execution", // Plugin ordering and blocking
122-
"rule-condition-logic", // AND/OR operators
123-
"decision-fallback-behavior", // Fallback to default
124-
"plugin-config-variations", // Plugin configuration testing
118+
"rule-condition-logic", // AND/OR operators in signal conditions
125119
"embedding-signal-routing", // EmbeddingSignal-based semantic similarity routing
126120

127121
// Load tests

e2e/profiles/dynamic-config/values.yaml

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,14 @@ config:
4747

4848
classifier:
4949
category_model:
50-
model_id: "models/lora_intent_classifier_bert-base-uncased_model"
51-
use_modernbert: false # Use LoRA intent classifier with auto-detection
50+
model_id: "models/category_classifier_modernbert-base_model"
51+
use_modernbert: true
5252
threshold: 0.6
5353
use_cpu: true
54-
category_mapping_path: "models/lora_intent_classifier_bert-base-uncased_model/category_mapping.json"
54+
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
5555
pii_model:
5656
model_id: "models/lora_pii_detector_bert-base-uncased_model"
57-
use_modernbert: false # Use LoRA PII model with auto-detection
57+
use_modernbert: false # Use LoRA PII model
5858
threshold: 0.9
5959
use_cpu: true
6060
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"
@@ -154,4 +154,3 @@ resources:
154154
service_name: "vllm-semantic-router"
155155
service_version: "v0.1.0"
156156
deployment_environment: "development"
157-

e2e/profiles/llm-d/values.yaml

Lines changed: 69 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,11 @@ config:
88
# Using LoRA models for better performance with auto-detection
99
classifier:
1010
category_model:
11-
model_id: models/lora_intent_classifier_bert-base-uncased_model
11+
model_id: "models/category_classifier_modernbert-base_model"
12+
use_modernbert: true
1213
threshold: 0.6
13-
use_modernbert: false
14-
category_mapping_path: models/lora_intent_classifier_bert-base-uncased_model/category_mapping.json
14+
use_cpu: true
15+
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
1516
pii_model:
1617
model_id: ""
1718
threshold: 1.0
@@ -49,8 +50,8 @@ config:
4950
description: "General knowledge and miscellaneous topics"
5051

5152
decisions:
52-
# High priority for math - use specialized model
53-
- name: math_route
53+
# Decision names match E2E test expectations (*_decision format)
54+
- name: math_decision
5455
priority: 100
5556
rules:
5657
operator: OR
@@ -61,8 +62,7 @@ config:
6162
- model: phi4-mini
6263
use_reasoning: false
6364

64-
# High priority for computer science - use llama3
65-
- name: cs_route
65+
- name: computer_science_decision
6666
priority: 100
6767
rules:
6868
operator: OR
@@ -73,23 +73,40 @@ config:
7373
- model: llama3-8b
7474
use_reasoning: false
7575

76-
# Medium priority routes for other technical domains
77-
- name: science_route
76+
- name: physics_decision
7877
priority: 50
7978
rules:
8079
operator: OR
8180
conditions:
8281
- type: domain
8382
name: physics
83+
modelRefs:
84+
- model: llama3-8b
85+
use_reasoning: false
86+
87+
- name: chemistry_decision
88+
priority: 50
89+
rules:
90+
operator: OR
91+
conditions:
8492
- type: domain
8593
name: chemistry
94+
modelRefs:
95+
- model: llama3-8b
96+
use_reasoning: false
97+
98+
- name: biology_decision
99+
priority: 50
100+
rules:
101+
operator: OR
102+
conditions:
86103
- type: domain
87104
name: biology
88105
modelRefs:
89106
- model: llama3-8b
90107
use_reasoning: false
91108

92-
- name: engineering_route
109+
- name: engineering_decision
93110
priority: 50
94111
rules:
95112
operator: OR
@@ -100,7 +117,7 @@ config:
100117
- model: llama3-8b
101118
use_reasoning: false
102119

103-
- name: health_route
120+
- name: health_decision
104121
priority: 50
105122
rules:
106123
operator: OR
@@ -111,39 +128,74 @@ config:
111128
- model: llama3-8b
112129
use_reasoning: false
113130

114-
# Social sciences and humanities
115-
- name: social_sciences_route
131+
- name: psychology_decision
116132
priority: 40
117133
rules:
118134
operator: OR
119135
conditions:
120136
- type: domain
121137
name: psychology
138+
modelRefs:
139+
- model: llama3-8b
140+
use_reasoning: false
141+
142+
- name: economics_decision
143+
priority: 40
144+
rules:
145+
operator: OR
146+
conditions:
122147
- type: domain
123148
name: economics
149+
modelRefs:
150+
- model: llama3-8b
151+
use_reasoning: false
152+
153+
- name: business_decision
154+
priority: 40
155+
rules:
156+
operator: OR
157+
conditions:
124158
- type: domain
125159
name: business
126160
modelRefs:
127161
- model: llama3-8b
128162
use_reasoning: false
129163

130-
- name: humanities_route
164+
- name: history_decision
131165
priority: 40
132166
rules:
133167
operator: OR
134168
conditions:
135169
- type: domain
136170
name: history
171+
modelRefs:
172+
- model: llama3-8b
173+
use_reasoning: false
174+
175+
- name: philosophy_decision
176+
priority: 40
177+
rules:
178+
operator: OR
179+
conditions:
137180
- type: domain
138181
name: philosophy
182+
modelRefs:
183+
- model: llama3-8b
184+
use_reasoning: false
185+
186+
- name: law_decision
187+
priority: 40
188+
rules:
189+
operator: OR
190+
conditions:
139191
- type: domain
140192
name: law
141193
modelRefs:
142194
- model: llama3-8b
143195
use_reasoning: false
144196

145-
# Default fallback route with lowest priority
146-
- name: default_route
197+
# Default fallback
198+
- name: other_decision
147199
priority: 1
148200
rules:
149201
operator: OR
@@ -153,6 +205,7 @@ config:
153205
modelRefs:
154206
- model: llama3-8b
155207
use_reasoning: false
208+
156209
semantic_cache:
157210
enabled: false
158211
prompt_guard:
@@ -164,6 +217,5 @@ config:
164217
threshold: 0.6
165218
use_cpu: true
166219

167-
# Keep consistent with the default chart: initContainer, model downloads, and PVC use chart defaults
168220
image:
169221
pullPolicy: IfNotPresent

e2e/profiles/production-stack/values.yaml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,11 @@ config:
2727
# Using LoRA models for better performance with auto-detection
2828
classifier:
2929
category_model:
30-
model_id: models/lora_intent_classifier_bert-base-uncased_model
31-
use_modernbert: false
30+
model_id: models/category_classifier_modernbert-base_model
31+
use_modernbert: true
3232
threshold: 0.6
3333
use_cpu: true
34-
category_mapping_path: models/lora_intent_classifier_bert-base-uncased_model/category_mapping.json
34+
category_mapping_path: models/category_classifier_modernbert-base_model/category_mapping.json
3535
pii_model:
3636
# Required for pii-detection test
3737
model_id: models/lora_pii_detector_bert-base-uncased_model
@@ -398,6 +398,5 @@ config:
398398
tracing:
399399
enabled: false
400400

401-
# Keep consistent with the default chart: initContainer, model downloads, and PVC use chart defaults
402401
image:
403402
pullPolicy: IfNotPresent

e2e/profiles/routing-strategies/values.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@ config:
4646
use_cpu: true
4747
category_mapping_path: "models/category_classifier_modernbert-base_model/category_mapping.json"
4848
pii_model:
49-
model_id: "models/pii_classifier_modernbert-base_presidio_token_model"
50-
use_modernbert: true
49+
model_id: "models/lora_pii_detector_bert-base-uncased_model"
50+
use_modernbert: false # Use LoRA PII model
5151
threshold: 0.7
5252
use_cpu: true
5353
pii_mapping_path: "models/pii_classifier_modernbert-base_presidio_token_model/pii_type_mapping.json"

0 commit comments

Comments
 (0)