Skip to content

Commit 70c5a9a

Browse files
committed
skip LLM summary when entity descriptions haven't changed
Check incoming descriptions against what's already on the node. If nothing new was added, reuse the existing summary instead of calling the LLM again. Saves a lot of time on re-ingestion.
1 parent 595629c commit 70c5a9a

File tree

2 files changed

+169
-0
lines changed

2 files changed

+169
-0
lines changed

lightrag/operate.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1770,6 +1770,31 @@ async def _merge_nodes_then_upsert(
17701770
)
17711771
description_list = [fallback_description]
17721772

1773+
# Skip LLM summary if no new descriptions were added (re-ingestion optimisation)
1774+
if already_node and already_description:
1775+
existing_descriptions = set(
1776+
d.strip()
1777+
for d in already_description
1778+
if d.strip()
1779+
)
1780+
incoming_descriptions = set(
1781+
d.strip()
1782+
for d in sorted_descriptions
1783+
if d.strip()
1784+
)
1785+
if incoming_descriptions and incoming_descriptions.issubset(existing_descriptions):
1786+
logger.debug(
1787+
f"Entity '{entity_name}': no new descriptions, skipping LLM summary"
1788+
)
1789+
node_data = dict(already_node)
1790+
node_data["source_id"] = source_id
1791+
node_data["file_path"] = GRAPH_FIELD_SEP.join(
1792+
list(dict.fromkeys(already_file_paths + [dp.get("file_path") for dp in nodes_data if dp.get("file_path")]))
1793+
)
1794+
await knowledge_graph_inst.upsert_node(entity_name, node_data=node_data)
1795+
node_data["entity_name"] = entity_name
1796+
return node_data
1797+
17731798
# Check for cancellation before LLM summary
17741799
if pipeline_status is not None and pipeline_status_lock is not None:
17751800
async with pipeline_status_lock:
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
# test_skip_unchanged_summary.py
2+
3+
import pytest
4+
from unittest.mock import AsyncMock, patch
5+
from lightrag.utils import GRAPH_FIELD_SEP
6+
7+
8+
def _make_global_config(**overrides):
9+
cfg = {
10+
"source_ids_limit_method": "FIFO",
11+
"max_source_ids_per_entity": 100,
12+
"max_file_paths": 50,
13+
"file_path_more_placeholder": "more",
14+
}
15+
cfg.update(overrides)
16+
return cfg
17+
18+
19+
@pytest.fixture
20+
def mock_kg_with_existing_person():
21+
kg = AsyncMock()
22+
kg.get_node = AsyncMock(return_value={
23+
"entity_type": "PERSON",
24+
"source_id": "doc1",
25+
"file_path": "file1.pdf",
26+
"description": f"Alice is a person{GRAPH_FIELD_SEP}Alice works at Acme",
27+
})
28+
kg.upsert_node = AsyncMock()
29+
return kg
30+
31+
32+
@pytest.mark.asyncio
33+
async def test_skip_summary_when_no_new_descriptions(mock_kg_with_existing_person):
34+
from lightrag.operate import _merge_nodes_then_upsert
35+
36+
nodes_data = [
37+
{
38+
"entity_type": "PERSON",
39+
"source_id": "doc1",
40+
"file_path": "file1.pdf",
41+
"description": "Alice is a person",
42+
},
43+
{
44+
"entity_type": "PERSON",
45+
"source_id": "doc1",
46+
"file_path": "file1.pdf",
47+
"description": "Alice works at Acme",
48+
},
49+
]
50+
51+
with patch("lightrag.operate._handle_entity_relation_summary") as mock_summary:
52+
await _merge_nodes_then_upsert(
53+
entity_name="Alice",
54+
nodes_data=nodes_data,
55+
knowledge_graph_inst=mock_kg_with_existing_person,
56+
entity_vdb=None,
57+
global_config=_make_global_config(),
58+
)
59+
mock_summary.assert_not_called()
60+
61+
mock_kg_with_existing_person.upsert_node.assert_called_once()
62+
63+
64+
@pytest.mark.asyncio
65+
async def test_calls_summary_when_new_descriptions(mock_kg_with_existing_person):
66+
from lightrag.operate import _merge_nodes_then_upsert
67+
68+
nodes_data = [
69+
{
70+
"entity_type": "PERSON",
71+
"source_id": "doc2",
72+
"file_path": "file2.pdf",
73+
"description": "Alice is the CEO of Acme",
74+
},
75+
]
76+
77+
with patch("lightrag.operate._handle_entity_relation_summary") as mock_summary:
78+
mock_summary.return_value = ("Alice is a person and CEO of Acme", True)
79+
await _merge_nodes_then_upsert(
80+
entity_name="Alice",
81+
nodes_data=nodes_data,
82+
knowledge_graph_inst=mock_kg_with_existing_person,
83+
entity_vdb=None,
84+
global_config=_make_global_config(),
85+
)
86+
mock_summary.assert_called_once()
87+
88+
89+
@pytest.mark.asyncio
90+
async def test_calls_summary_for_new_entity():
91+
from lightrag.operate import _merge_nodes_then_upsert
92+
93+
mock_kg = AsyncMock()
94+
mock_kg.get_node = AsyncMock(return_value=None)
95+
mock_kg.upsert_node = AsyncMock()
96+
97+
nodes_data = [
98+
{
99+
"entity_type": "ORG",
100+
"source_id": "doc1",
101+
"file_path": "file1.pdf",
102+
"description": "Acme Corp makes widgets",
103+
},
104+
]
105+
106+
with patch("lightrag.operate._handle_entity_relation_summary") as mock_summary:
107+
mock_summary.return_value = ("Acme Corp makes widgets", True)
108+
await _merge_nodes_then_upsert(
109+
entity_name="Acme",
110+
nodes_data=nodes_data,
111+
knowledge_graph_inst=mock_kg,
112+
entity_vdb=None,
113+
global_config=_make_global_config(),
114+
)
115+
mock_summary.assert_called_once()
116+
117+
118+
@pytest.mark.asyncio
119+
async def test_skip_preserves_existing_description(mock_kg_with_existing_person):
120+
from lightrag.operate import _merge_nodes_then_upsert
121+
122+
existing_desc = f"Alice is a person{GRAPH_FIELD_SEP}Alice works at Acme"
123+
124+
nodes_data = [
125+
{
126+
"entity_type": "PERSON",
127+
"source_id": "doc1",
128+
"file_path": "file1.pdf",
129+
"description": "Alice is a person",
130+
},
131+
]
132+
133+
with patch("lightrag.operate._handle_entity_relation_summary"):
134+
await _merge_nodes_then_upsert(
135+
entity_name="Alice",
136+
nodes_data=nodes_data,
137+
knowledge_graph_inst=mock_kg_with_existing_person,
138+
entity_vdb=None,
139+
global_config=_make_global_config(),
140+
)
141+
142+
call_kwargs = mock_kg_with_existing_person.upsert_node.call_args
143+
upserted_data = call_kwargs.kwargs.get("node_data", call_kwargs[1].get("node_data"))
144+
assert upserted_data["description"] == existing_desc

0 commit comments

Comments
 (0)