Skip to content

Commit b452605

Browse files
committed
fix: reliable priority extraction and simplified intent prompt
Priority extraction fix: - Add deterministic keyword-based priority enforcement because the LLM (qwen2.5:7b) unreliably inferred priorities from use case type rather than from explicit user statements. Priorities now only reflect what the user actually said. The SLO profiles already handle use-case-appropriate latency/throughput targets. Prompt simplification (for smaller LLMs): - Remove experience_class, complexity_priority, and additional_context from the LLM prompt schema. experience_class is still inferred deterministically from use_case in post-processing; complexity_priority and additional_context were never consumed downstream. Use_case normalization improvements (follow-up to PR #180): - Lowercase use_case before alias/fuzzy lookup so mixed-case LLM responses like "Text_Summarization" are handled correctly. - Add logger.warning when an unrecognized use_case cannot be resolved by alias map or fuzzy match, aiding production debugging. - Remove stale complexity_priority from test helper _base_intent(). - Add 4 unit tests for case-insensitive normalization. Assisted-by: Claude <noreply@anthropic.com> Signed-off-by: Andre Fredette <afredette@redhat.com>
1 parent cb1a6fa commit b452605

4 files changed

Lines changed: 138 additions & 46 deletions

File tree

src/planner/intent_extraction/extractor.py

Lines changed: 105 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def extract_intent(
111111
logger.info(f"[EXTRACTED INTENT] {extracted}")
112112

113113
# Validate and parse into Pydantic model
114-
intent = self._parse_extracted_intent(extracted)
114+
intent = self._parse_extracted_intent(extracted, user_message)
115115
logger.info(f"Extracted intent: use_case={intent.use_case}, users={intent.user_count}")
116116

117117
return intent
@@ -120,12 +120,13 @@ def extract_intent(
120120
logger.error(f"Failed to extract intent: {e}")
121121
raise ValueError(f"Intent extraction failed: {e}") from e
122122

123-
def _parse_extracted_intent(self, raw_data: dict) -> DeploymentIntent:
123+
def _parse_extracted_intent(self, raw_data: dict, user_message: str = "") -> DeploymentIntent:
124124
"""
125125
Parse and validate raw LLM output into DeploymentIntent.
126126
127127
Args:
128128
raw_data: Raw dict from LLM
129+
user_message: Original user message for priority validation
129130
130131
Returns:
131132
Validated DeploymentIntent
@@ -134,20 +135,21 @@ def _parse_extracted_intent(self, raw_data: dict) -> DeploymentIntent:
134135
ValueError: If data is invalid
135136
"""
136137
# Handle common LLM mistakes
137-
cleaned_data = self._clean_llm_output(raw_data)
138+
cleaned_data = self._clean_llm_output(raw_data, user_message)
138139

139140
try:
140141
return DeploymentIntent(**cleaned_data)
141142
except Exception as e:
142143
logger.error(f"Failed to parse intent from: {cleaned_data}")
143144
raise ValueError(f"Invalid intent data: {e}") from e
144145

145-
def _clean_llm_output(self, data: dict) -> dict:
146+
def _clean_llm_output(self, data: dict, user_message: str = "") -> dict:
146147
"""
147148
Clean common LLM output mistakes.
148149
149150
Args:
150151
data: Raw LLM output
152+
user_message: Original user message for priority validation
151153
152154
Returns:
153155
Cleaned data dict
@@ -161,7 +163,8 @@ def _clean_llm_output(self, data: dict) -> dict:
161163
cleaned["use_case"] = cleaned["use_case"].split("|")[0].strip()
162164

163165
# Normalize hallucinated use_case values
164-
use_case = cleaned.get("use_case", "")
166+
use_case = cleaned.get("use_case", "").lower()
167+
cleaned["use_case"] = use_case
165168
valid_use_cases = list(get_args(DeploymentIntent.model_fields["use_case"].annotation))
166169
if use_case not in valid_use_cases:
167170
mapped = _USE_CASE_ALIASES.get(use_case)
@@ -173,6 +176,10 @@ def _clean_llm_output(self, data: dict) -> dict:
173176
if close:
174177
logger.info("Fuzzy-matched use_case '%s' -> '%s'", use_case, close[0])
175178
cleaned["use_case"] = close[0]
179+
else:
180+
logger.warning(
181+
"Unrecognized use_case '%s' — no alias or fuzzy match found", use_case
182+
)
176183

177184
# Infer experience_class if not provided
178185
if "experience_class" not in cleaned or not cleaned.get("experience_class"):
@@ -266,7 +273,6 @@ def _clean_llm_output(self, data: dict) -> dict:
266273
"accuracy_priority",
267274
"cost_priority",
268275
"latency_priority",
269-
"complexity_priority",
270276
]:
271277
if priority_field in cleaned:
272278
# Normalize to lowercase and validate
@@ -282,12 +288,104 @@ def _clean_llm_output(self, data: dict) -> dict:
282288
# Field not provided by LLM, default to medium
283289
cleaned[priority_field] = "medium"
284290

291+
# Enforce explicit-only priority extraction.
292+
# Priorities must only reflect explicit user statements, not be inferred
293+
# from the use case type. The SLO profile already handles
294+
# use-case-appropriate latency/throughput targets.
295+
if user_message:
296+
msg_lower = user_message.lower()
297+
self._enforce_explicit_priority(
298+
cleaned,
299+
"accuracy_priority",
300+
msg_lower,
301+
high_keywords=[
302+
"accuracy",
303+
"quality",
304+
"best model",
305+
"top quality",
306+
"most accurate",
307+
"accuracy is critical",
308+
"accuracy is paramount",
309+
"accuracy matters",
310+
],
311+
low_keywords=["good enough", "accuracy less important", "accuracy doesn't matter"],
312+
)
313+
self._enforce_explicit_priority(
314+
cleaned,
315+
"cost_priority",
316+
msg_lower,
317+
high_keywords=[
318+
"cost-effective",
319+
"cost effective",
320+
"budget",
321+
"cost-sensitive",
322+
"cost sensitive",
323+
"minimize cost",
324+
"cheap",
325+
"affordable",
326+
"budget is tight",
327+
],
328+
low_keywords=[
329+
"cost doesn't matter",
330+
"cost is not",
331+
"budget is unlimited",
332+
"money is no object",
333+
],
334+
)
335+
self._enforce_explicit_priority(
336+
cleaned,
337+
"latency_priority",
338+
msg_lower,
339+
high_keywords=[
340+
"low latency",
341+
"fast response",
342+
"speed is important",
343+
"latency is critical",
344+
"latency is important",
345+
"real-time",
346+
"instant response",
347+
],
348+
low_keywords=[
349+
"latency less important",
350+
"latency doesn't matter",
351+
"async",
352+
"latency is not",
353+
],
354+
)
285355
# Remove any unexpected fields that aren't in the schema
286356
valid_fields = DeploymentIntent.model_fields.keys()
287357
cleaned = {k: v for k, v in cleaned.items() if k in valid_fields}
288358

289359
return cleaned
290360

361+
@staticmethod
362+
def _enforce_explicit_priority(
363+
cleaned: dict,
364+
field: str,
365+
msg_lower: str,
366+
high_keywords: list[str],
367+
low_keywords: list[str],
368+
) -> None:
369+
"""Reset a priority field to 'medium' unless the user message contains
370+
explicit keywords that justify 'high' or 'low'."""
371+
has_high = any(kw in msg_lower for kw in high_keywords)
372+
has_low = any(kw in msg_lower for kw in low_keywords)
373+
374+
if has_high:
375+
if cleaned.get(field) != "high":
376+
logger.info(f"Overriding {field} to 'high' based on explicit user keywords")
377+
cleaned[field] = "high"
378+
elif has_low:
379+
if cleaned.get(field) != "low":
380+
logger.info(f"Overriding {field} to 'low' based on explicit user keywords")
381+
cleaned[field] = "low"
382+
elif cleaned.get(field) != "medium":
383+
logger.info(
384+
f"Resetting {field} from '{cleaned.get(field)}' to 'medium' "
385+
f"(no explicit user statement found)"
386+
)
387+
cleaned[field] = "medium"
388+
291389
def infer_missing_fields(self, intent: DeploymentIntent) -> DeploymentIntent:
292390
"""
293391
Infer missing optional fields based on available information.
@@ -302,11 +400,7 @@ def infer_missing_fields(self, intent: DeploymentIntent) -> DeploymentIntent:
302400
if intent.domain_specialization == ["general"]:
303401
if intent.use_case in ["code_generation_detailed", "code_completion"]:
304402
intent.domain_specialization = ["general", "code"]
305-
elif intent.use_case == "translation" or (
306-
"multilingual" in intent.additional_context.lower()
307-
if intent.additional_context
308-
else False
309-
):
403+
elif intent.use_case == "translation":
310404
intent.domain_specialization = ["general", "multilingual"]
311405

312406
return intent

src/planner/llm/prompts.py

Lines changed: 13 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,17 @@
44
Expected JSON schema:
55
{
66
"use_case": "chatbot_conversational|code_completion|code_generation_detailed|translation|content_generation|summarization_short|document_analysis_rag|long_document_summarization|research_legal_analysis",
7-
"experience_class": "instant|conversational|interactive|deferred|batch",
87
"user_count": <integer>,
98
"domain_specialization": ["general"|"code"|"multilingual"|"enterprise"],
109
"preferred_gpu_types": ["<list of GPU types if mentioned, empty list if not specified>"],
1110
"preferred_models": ["<list of model IDs in HuggingFace format if mentioned, empty list if not specified>"],
1211
"accuracy_priority": "low|medium|high",
1312
"cost_priority": "low|medium|high",
14-
"latency_priority": "low|medium|high",
15-
"complexity_priority": "low|medium|high",
16-
"additional_context": "<any other relevant details mentioned>"
13+
"latency_priority": "low|medium|high"
1714
}
1815
16+
All priority fields default to "medium". Set to "high" or "low" only when the user explicitly mentions that priority.
17+
1918
Use case descriptions:
2019
- chatbot_conversational: Real-time conversational chatbots (short prompts, short responses)
2120
- code_completion: Fast code completion/autocomplete (short prompts, short completions)
@@ -28,13 +27,6 @@
2827
- research_legal_analysis: Research/legal document analysis (very long prompts, detailed analysis)
2928
3029
CRITICAL: The use_case value MUST be exactly one of the 9 values listed above. Do not invent variations like "text_summarization" or "chatbot" — use the exact strings shown.
31-
32-
Experience class guidance:
33-
- instant: Sub-200ms response time - code completion, autocomplete
34-
- conversational: Real-time user interaction - chatbots, interactive tools
35-
- interactive: User waiting but tolerates slight delay - RAG Q&A, content generation
36-
- deferred: Quality over speed - long summarization, detailed analysis
37-
- batch: Background/async processing - research, legal analysis
3830
"""
3931

4032

@@ -82,6 +74,16 @@ def build_intent_extraction_prompt(
8274
- "chatbot" or "customer service" or "conversational" → use_case: chatbot_conversational
8375
- "content" or "marketing" or "blog" or "content generation" → use_case: content_generation
8476
- "summarize document" or "summarization" → use_case: summarization_short or long_document_summarization
77+
- "legal" or "regulatory" or "legal analysis" or "legal document" → use_case: research_legal_analysis (NOT document_analysis_rag)
78+
- "research" or "research analysis" → use_case: research_legal_analysis
79+
- "code generation" or "generate code" or "implementing features" or "full code" → use_case: code_generation_detailed (NOT chatbot_conversational, NOT code_completion)
80+
- "code completion" or "autocomplete" or "IDE completion" → use_case: code_completion (NOT code_generation_detailed)
81+
82+
Domain specialization rules:
83+
- If use_case is code_completion or code_generation_detailed → domain_specialization MUST include "code"
84+
- If the request mentions "translation" or "multilingual" or multiple languages → include "multilingual"
85+
- If the request mentions "enterprise" or "knowledge base" → include "enterprise"
86+
- Default to ["general"] only when none of the above apply
8587
8688
GPU extraction examples (canonical names: L4, A100-40, A100-80, H100, H200, B200):
8789
- "running on h200" or "h200" or "H200" → preferred_gpu_types: ["H200"]
@@ -98,22 +100,6 @@ def build_intent_extraction_prompt(
98100
- "run mistral small" → preferred_models: ["mistralai/Mistral-Small-24B-Instruct-2501"]
99101
- No model mentioned → preferred_models: []
100102
101-
Priority extraction (for scoring weights - use "medium" as baseline, adjust based on context):
102-
- accuracy_priority: "high" if user mentions accuracy matters, quality is important, accuracy is critical, best model, or top quality. "low" if user says good enough or accuracy less important.
103-
- cost_priority: "high" if user EXPLICITLY says cost-effective, cost-sensitive, budget constrained, minimize cost, cost is important, or budget is tight. "low" ONLY if user EXPLICITLY says "cost doesn't matter" or "budget is unlimited" or "money is no object". Default to "medium" if not mentioned. DO NOT infer from GPU choice.
104-
- latency_priority: "high" if user mentions low latency needed, fast response critical, speed is important, real-time performance required, or instant responses needed. "low" if user says latency less important or async/batch is acceptable. Default to "medium" if not mentioned.
105-
- complexity_priority: "high" if user wants simple deployment, easy setup. "low" if they're okay with complex setups.
106-
IMPORTANT - Priority Extraction Rules (FOLLOW STRICTLY):
107-
- Only extract priorities from EXPLICIT user statements about priorities, not from hardware choices or use case type
108-
- Hardware preference (H100, L4, etc.) does NOT imply cost_priority - user may have GPUs available or budget allocated
109-
- Use case type does NOT imply latency_priority - default to "medium" unless user explicitly mentions speed/latency concerns
110-
- When in doubt, use "medium" - only deviate when user EXPLICITLY states a priority
111-
Examples:
112-
- "chatbot with H100" → cost_priority: "medium" (H100 doesn't mean cost doesn't matter)
113-
- "low latency is important" → latency_priority: "high" (explicit statement)
114-
- "cost-effective solution" → cost_priority: "high" (explicit statement)
115-
- "chatbot for 300 users, low latency important, H100 gpus" → latency_priority: "high", cost_priority: "medium" (only latency explicitly mentioned)
116-
117103
{INTENT_EXTRACTION_SCHEMA}
118104
"""
119105
return prompt

src/planner/shared/schemas/intent.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,6 @@ class DeploymentIntent(BaseModel):
5454
latency_priority: Literal["low", "medium", "high"] = Field(
5555
default="medium", description="Latency importance"
5656
)
57-
complexity_priority: Literal["low", "medium", "high"] = Field(
58-
default="medium", description="Preference for simpler deployments"
59-
)
60-
61-
additional_context: str | None = Field(
62-
None, description="Any other relevant details from conversation"
63-
)
6457

6558

6659
class ConversationMessage(BaseModel):

tests/unit/test_intent_extractor.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ def _base_intent(**overrides) -> dict:
2121
"accuracy_priority": "medium",
2222
"cost_priority": "medium",
2323
"latency_priority": "medium",
24-
"complexity_priority": "medium",
2524
}
2625
data.update(overrides)
2726
return data
@@ -100,6 +99,26 @@ def test_clean_llm_output_fuzzy_matches_close_typos(extractor, typo, expected):
10099
assert cleaned["use_case"] == expected
101100

102101

102+
# --- Case-insensitive normalization ---
103+
104+
105+
@pytest.mark.unit
106+
@pytest.mark.parametrize(
107+
"mixed_case, expected",
108+
[
109+
("Text_Summarization", "summarization_short"),
110+
("CHATBOT", "chatbot_conversational"),
111+
("Code_Completion", "code_completion"),
112+
("DOCUMENT_ANALYSIS_RAG", "document_analysis_rag"),
113+
],
114+
)
115+
def test_clean_llm_output_handles_case_insensitive(extractor, mixed_case, expected):
116+
"""Mixed-case use_case values are lowercased before alias/fuzzy matching."""
117+
raw = _base_intent(use_case=mixed_case)
118+
cleaned = extractor._clean_llm_output(raw)
119+
assert cleaned["use_case"] == expected
120+
121+
103122
# --- Garbage values are NOT matched ---
104123

105124

0 commit comments

Comments
 (0)