Skip to content

Commit 1ac1fb0

Browse files
committed
fix: Mark foundational knowledge clearly in RAG context
- Add [foundational knowledge] prefix for CRITICAL_FOUNDATION documents - Use 'source: CRITICAL_FOUNDATION' format to match prompt builder expectations - Helps LLM recognize and prioritize foundational knowledge over general training data - Fixes issue where LLM ignores RAG context for StillMe self-tracking questions
1 parent f50698c commit 1ac1fb0

1 file changed

Lines changed: 18 additions & 2 deletions

File tree

stillme_core/rag/rag_retrieval.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -846,16 +846,32 @@ def build_prompt_context(self, context: Dict[str, Any], max_context_tokens: int
846846
logger.warning(f"Stopped adding knowledge docs at {i}/{len(context['knowledge_docs'])} due to token limit")
847847
break
848848

849-
source = doc.get("metadata", {}).get("source", "Unknown")
849+
metadata = doc.get("metadata", {})
850+
source = metadata.get("source", "Unknown")
850851
content = doc.get("content", "")
851852

853+
# CRITICAL: Mark foundational knowledge clearly for LLM to recognize
854+
# Prompt builder expects [foundational knowledge] or source: CRITICAL_FOUNDATION
855+
is_foundational = (
856+
source == "CRITICAL_FOUNDATION" or
857+
metadata.get("foundational") == "stillme" or
858+
metadata.get("type") == "foundational" or
859+
"CRITICAL_FOUNDATION" in str(metadata.get("tags", "")) or
860+
"foundational:stillme" in str(metadata.get("tags", ""))
861+
)
862+
852863
# Allocate tokens per document (distribute remaining tokens)
853864
# Reserve some tokens for formatting
854865
doc_max_tokens = remaining_tokens // max(1, len(context["knowledge_docs"]) - i + 1)
855866
doc_max_tokens = min(doc_max_tokens, 2000) # Cap each doc at 2000 tokens
856867

857868
truncated_content = self._truncate_text_by_tokens(content, doc_max_tokens)
858-
doc_text = f"{i}. {truncated_content} (Source: {source})"
869+
870+
# Format with clear foundational knowledge marker
871+
if is_foundational:
872+
doc_text = f"{i}. [foundational knowledge] {truncated_content} (source: CRITICAL_FOUNDATION)"
873+
else:
874+
doc_text = f"{i}. {truncated_content} (Source: {source})"
859875

860876
doc_tokens = self._estimate_tokens(doc_text)
861877
remaining_tokens -= doc_tokens

0 commit comments

Comments
 (0)