Skip to content

Commit 9a7f298

Browse files
committed
Fix: Improve origin query detection and fix post-processing errors
- Add detection for 'công ty', 'team', 'nhóm' (company, team, group) in origin queries - Add pattern matching for 'công ty nào', 'team nào', 'nhóm nào' + 'đã tạo ra bạn' - Add English pattern: 'which/what organization/company/team/group created you' - Fix ValueError: too many values to unpack in should_rewrite() call (line 5943) - Fix format string error in epistemic state logging (line 6265) - All origin query patterns now properly trigger SYSTEM_ORIGIN_ANSWER
1 parent cf64e93 commit 9a7f298

2 files changed

Lines changed: 17 additions & 2 deletions

File tree

backend/api/routers/chat_router.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5940,7 +5940,7 @@ def estimate_tokens(text: str) -> int:
59405940
# Phase 3: Only rewrite when CRITICAL issues are present
59415941
# Critical issues: missing citation, anthropomorphic language, language mismatch, topic drift, template-like
59425942
# Non-critical issues (depth, unpacking, style) do NOT trigger rewrite
5943-
should_rewrite, rewrite_reason = optimizer.should_rewrite(
5943+
should_rewrite, rewrite_reason, max_attempts = optimizer.should_rewrite(
59445944
quality_result=quality_result,
59455945
is_philosophical=is_philosophical_non_rag,
59465946
response_length=len(sanitized_response)
@@ -6262,7 +6262,8 @@ def estimate_tokens(text: str) -> int:
62626262
response_text=response,
62636263
context_docs_count=ctx_docs_count
62646264
)
6265-
logger.info(f"📊 EpistemicState: {epistemic_state.value} (confidence={confidence_score:.2f if confidence_score else 'N/A'}, ctx_docs={ctx_docs_count})")
6265+
confidence_str = f"{confidence_score:.2f}" if confidence_score else 'N/A'
6266+
logger.info(f"📊 EpistemicState: {epistemic_state.value} (confidence={confidence_str}, ctx_docs={ctx_docs_count})")
62666267
except Exception as e:
62676268
logger.warning(f"⚠️ Failed to calculate epistemic state: {e}, defaulting to UNKNOWN")
62686269
epistemic_state = EpistemicState.UNKNOWN

backend/core/stillme_detector.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,10 @@ def detect_origin_query(query: str) -> Tuple[bool, List[str]]:
477477
"what is your purpose", "why were you created", "what is your mission",
478478
"mục tiêu của bạn", "bạn ra đời", "muc tieu cua ban", "ban ra doi",
479479
"mục đích của bạn", "muc dich cua ban", "nhiệm vụ của bạn", "nhiem vu cua ban",
480+
"tổ chức nào", "to chuc nao", "organization", "which organization", "what organization",
481+
"công ty nào", "cong ty nao", "company", "which company", "what company",
482+
"team nào", "team nao", "which team", "what team",
483+
"nhóm nào", "nhom nao", "which group", "what group",
480484
]
481485
for keyword in strong_origin_keywords:
482486
if keyword in query_lower:
@@ -488,6 +492,16 @@ def detect_origin_query(query: str) -> Tuple[bool, List[str]]:
488492
matched_keywords.append("who_created_you_pattern")
489493
return (True, matched_keywords)
490494

495+
# Check for pattern: "tổ chức/công ty/team/nhóm nào" + "đã tạo ra/tạo ra" + "bạn" (Vietnamese)
496+
if re.search(r'\b(tổ chức|to chuc|organization|công ty|cong ty|company|team|nhóm|nhom|group)\s+nào\b.*\b(đã\s+)?(tạo ra|làm ra|xây dựng|phát triển|tao ra|lam ra|xay dung|phat trien)\b.*\b(bạn|ban|stillme|you)\b', query_lower):
497+
matched_keywords.append("organization_pattern")
498+
return (True, matched_keywords)
499+
500+
# Check for pattern: "which/what" + "organization/company/team/group" + "created/built" + "you"
501+
if re.search(r'\b(which|what)\s+(organization|company|team|group)\b.*\b(created|built|made|developed|founded)\b.*\b(you|stillme)\b', query_lower):
502+
matched_keywords.append("which_organization_pattern")
503+
return (True, matched_keywords)
504+
491505
# Check for pattern: "ai" + "tạo ra/xây dựng" + "bạn" (Vietnamese)
492506
if re.search(r'\bai\b.*\b(tạo ra|xây dựng|làm ra|phát triển|sáng lập|tao ra|xay dung|lam ra|phat trien|sang lap)\b.*\b(bạn|ban|stillme)\b', query_lower):
493507
matched_keywords.append("ai_tao_ra_ban_pattern")

0 commit comments

Comments
 (0)