Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 41 additions & 19 deletions lightrag/operate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3186,7 +3186,12 @@ async def kg_query(
)

# Build system prompt
sys_prompt_temp = system_prompt if system_prompt else PROMPTS["rag_response"]
if system_prompt:
sys_prompt_temp = system_prompt
elif not query_param.include_references:
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Add include_references to kg_query cache key

This branch makes kg_query generate a different prompt when include_references is false, but the query cache key still omits that flag in the subsequent compute_args_hash(...) call. With enable_llm_cache=true, a response cached for one setting can be served to the other (e.g., cached cited answer returned when citations are disabled), violating the API contract for include_references.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Already fixed in f956920 — added include_references to the compute_args_hash call for kg_query.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Already fixed in f956920 — added include_references to the compute_args_hash call for kg_query.

sys_prompt_temp = PROMPTS["rag_response_no_ref"]
else:
sys_prompt_temp = PROMPTS["rag_response"]
sys_prompt = sys_prompt_temp.format(
response_type=response_type,
user_prompt=user_prompt,
Expand Down Expand Up @@ -4011,11 +4016,18 @@ async def _build_context_str(
)

# Get the system prompt template from PROMPTS or global_config
sys_prompt_template = global_config.get(
"system_prompt_template", PROMPTS["rag_response"]
)
custom_sys_prompt = global_config.get("system_prompt_template")
if custom_sys_prompt:
sys_prompt_template = custom_sys_prompt
elif not query_param.include_references:
sys_prompt_template = PROMPTS["rag_response_no_ref"]
else:
sys_prompt_template = PROMPTS["rag_response"]

kg_context_template = PROMPTS["kg_query_context"]
if not query_param.include_references:
kg_context_template = PROMPTS["kg_query_context_no_ref"]
else:
kg_context_template = PROMPTS["kg_query_context"]
user_prompt = query_param.user_prompt if query_param.user_prompt else ""
response_type = (
query_param.response_type
Expand Down Expand Up @@ -4087,11 +4099,14 @@ async def _build_context_str(
text_units_str = "\n".join(
json.dumps(text_unit, ensure_ascii=False) for text_unit in chunks_context
)
reference_list_str = "\n".join(
f"[{ref['reference_id']}] {ref['file_path']}"
for ref in reference_list
if ref["reference_id"]
)
if query_param.include_references:
reference_list_str = "\n".join(
f"[{ref['reference_id']}] {ref['file_path']}"
for ref in reference_list
if ref["reference_id"]
)
else:
reference_list_str = ""

logger.info(
f"Final context: {len(entities_context)} entities, {len(relations_context)} relations, {len(chunks_context)} chunks"
Expand Down Expand Up @@ -4938,9 +4953,12 @@ async def naive_query(
)

# Use the provided system prompt or default
sys_prompt_template = (
system_prompt if system_prompt else PROMPTS["naive_rag_response"]
)
if system_prompt:
sys_prompt_template = system_prompt
elif not query_param.include_references:
Comment thread
danielaskdd marked this conversation as resolved.
sys_prompt_template = PROMPTS["naive_rag_response_no_ref"]
else:
sys_prompt_template = PROMPTS["naive_rag_response"]

# Create a preliminary system prompt with empty content_data to calculate overhead
pre_sys_prompt = sys_prompt_template.format(
Expand Down Expand Up @@ -5012,13 +5030,17 @@ async def naive_query(
text_units_str = "\n".join(
json.dumps(text_unit, ensure_ascii=False) for text_unit in chunks_context
)
reference_list_str = "\n".join(
f"[{ref['reference_id']}] {ref['file_path']}"
for ref in reference_list
if ref["reference_id"]
)
if query_param.include_references:
reference_list_str = "\n".join(
f"[{ref['reference_id']}] {ref['file_path']}"
for ref in reference_list
if ref["reference_id"]
)
naive_context_template = PROMPTS["naive_query_context"]
else:
reference_list_str = ""
naive_context_template = PROMPTS["naive_query_context_no_ref"]

naive_context_template = PROMPTS["naive_query_context"]
context_content = naive_context_template.format(
text_chunks_str=text_units_str,
reference_list_str=reference_list_str,
Expand Down
100 changes: 100 additions & 0 deletions lightrag/prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,76 @@
6. Additional Instructions: {user_prompt}


---Context---

{content_data}
"""

PROMPTS["rag_response_no_ref"] = """---Role---

You are an expert AI assistant specializing in synthesizing information from a provided knowledge base. Your primary function is to answer user queries accurately by ONLY using the information within the provided **Context**.

---Goal---

Generate a comprehensive, well-structured answer to the user query.
The answer must integrate relevant facts from the Knowledge Graph and Document Chunks found in the **Context**.
Consider the conversation history if provided to maintain conversational flow and avoid repeating information.

---Instructions---

1. Step-by-Step Instruction:
- Carefully determine the user's query intent in the context of the conversation history to fully understand the user's information need.
- Scrutinize both `Knowledge Graph Data` and `Document Chunks` in the **Context**. Identify and extract all pieces of information that are directly relevant to answering the user query.
- Weave the extracted facts into a coherent and logical response. Your own knowledge must ONLY be used to formulate fluent sentences and connect ideas, NOT to introduce any external information.

2. Content & Grounding:
- Strictly adhere to the provided context from the **Context**; DO NOT invent, assume, or infer any information not explicitly stated.
- If the answer cannot be found in the **Context**, state that you do not have enough information to answer. Do not attempt to guess.

3. Formatting & Language:
- The response MUST be in the same language as the user query.
- The response MUST utilize Markdown formatting for enhanced clarity and structure (e.g., headings, bold text, bullet points).
- The response should be presented in {response_type}.
- Do not include a references or citations section in the response.

4. Additional Instructions: {user_prompt}


---Context---

{context_data}
"""

PROMPTS["naive_rag_response_no_ref"] = """---Role---

You are an expert AI assistant specializing in synthesizing information from a provided knowledge base. Your primary function is to answer user queries accurately by ONLY using the information within the provided **Context**.

---Goal---

Generate a comprehensive, well-structured answer to the user query.
The answer must integrate relevant facts from the Document Chunks found in the **Context**.
Consider the conversation history if provided to maintain conversational flow and avoid repeating information.

---Instructions---

1. Step-by-Step Instruction:
- Carefully determine the user's query intent in the context of the conversation history to fully understand the user's information need.
- Scrutinize `Document Chunks` in the **Context**. Identify and extract all pieces of information that are directly relevant to answering the user query.
- Weave the extracted facts into a coherent and logical response. Your own knowledge must ONLY be used to formulate fluent sentences and connect ideas, NOT to introduce any external information.

2. Content & Grounding:
- Strictly adhere to the provided context from the **Context**; DO NOT invent, assume, or infer any information not explicitly stated.
- If the answer cannot be found in the **Context**, state that you do not have enough information to answer. Do not attempt to guess.

3. Formatting & Language:
- The response MUST be in the same language as the user query.
- The response MUST utilize Markdown formatting for enhanced clarity and structure (e.g., headings, bold text, bullet points).
- The response should be presented in {response_type}.
- Do not include a references or citations section in the response.

4. Additional Instructions: {user_prompt}


---Context---

{content_data}
Expand Down Expand Up @@ -371,6 +441,36 @@

"""

PROMPTS["kg_query_context_no_ref"] = """
Knowledge Graph Data (Entity):

```json
{entities_str}
```

Knowledge Graph Data (Relationship):

```json
{relations_str}
```

Document Chunks:

```json
{text_chunks_str}
```

"""

PROMPTS["naive_query_context_no_ref"] = """
Document Chunks:

```json
{text_chunks_str}
```

"""

PROMPTS["keywords_extraction"] = """---Role---
You are an expert keyword extractor, specializing in analyzing user queries for a Retrieval-Augmented Generation (RAG) system. Your purpose is to identify both high-level and low-level keywords in the user's query that will be used for effective document retrieval.

Expand Down
73 changes: 73 additions & 0 deletions tests/test_include_references_prompt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
"""Tests that include_references=False removes reference instructions from LLM prompts."""

import pytest

from lightrag.prompt import PROMPTS

pytestmark = pytest.mark.offline


class TestRagResponsePromptVariants:
"""Verify rag_response and rag_response_no_ref prompt templates."""

def test_rag_response_contains_reference_instructions(self):
prompt = PROMPTS["rag_response"]
assert "References Section Format" in prompt
assert "reference_id" in prompt
assert "Reference Document List" in prompt

def test_rag_response_no_ref_omits_reference_instructions(self):
prompt = PROMPTS["rag_response_no_ref"]
assert "References Section Format" not in prompt
assert "reference_id" not in prompt
assert "Reference Document List" not in prompt

def test_rag_response_no_ref_has_required_placeholders(self):
prompt = PROMPTS["rag_response_no_ref"]
assert "{response_type}" in prompt
assert "{user_prompt}" in prompt
assert "{context_data}" in prompt


class TestNaiveRagResponsePromptVariants:
"""Verify naive_rag_response and naive_rag_response_no_ref prompt templates."""

def test_naive_rag_response_contains_reference_instructions(self):
prompt = PROMPTS["naive_rag_response"]
assert "References Section Format" in prompt
assert "reference_id" in prompt

def test_naive_rag_response_no_ref_omits_reference_instructions(self):
prompt = PROMPTS["naive_rag_response_no_ref"]
assert "References Section Format" not in prompt
assert "reference_id" not in prompt

def test_naive_rag_response_no_ref_has_required_placeholders(self):
prompt = PROMPTS["naive_rag_response_no_ref"]
assert "{response_type}" in prompt
assert "{user_prompt}" in prompt
assert "{content_data}" in prompt


class TestContextTemplateVariants:
"""Verify context templates with and without reference sections."""

def test_kg_context_contains_reference_list(self):
template = PROMPTS["kg_query_context"]
assert "Reference Document List" in template
assert "{reference_list_str}" in template

def test_kg_context_no_ref_omits_reference_list(self):
template = PROMPTS["kg_query_context_no_ref"]
assert "Reference Document List" not in template
assert "{reference_list_str}" not in template

def test_naive_context_contains_reference_list(self):
template = PROMPTS["naive_query_context"]
assert "Reference Document List" in template
assert "{reference_list_str}" in template

def test_naive_context_no_ref_omits_reference_list(self):
template = PROMPTS["naive_query_context_no_ref"]
assert "Reference Document List" not in template
assert "{reference_list_str}" not in template
Loading