Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 59 additions & 1 deletion lightrag/api/routers/document_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

from lightrag import LightRAG
from lightrag.base import DeletionResult, DocProcessingStatus, DocStatus
from lightrag.operate import rebuild_knowledge_from_chunks
from lightrag.utils import (
generate_track_id,
compute_mdhash_id,
Expand Down Expand Up @@ -1853,6 +1854,9 @@ async def background_delete_documents(
total_docs = len(doc_ids)
successful_deletions = []
failed_deletions = []
# Aggregate rebuild targets across all deletions so we only rebuild once
all_entities_to_rebuild: dict[str, list] = {}
all_relationships_to_rebuild: dict[tuple, list] = {}

# Double-check pipeline status before proceeding
async with pipeline_status_lock:
Expand Down Expand Up @@ -1905,13 +1909,31 @@ async def background_delete_documents(
file_path = "#"
try:
result = await rag.adelete_by_doc_id(
doc_id, delete_llm_cache=delete_llm_cache
doc_id,
delete_llm_cache=delete_llm_cache,
skip_rebuild=True,
)
file_path = (
getattr(result, "file_path", "-") if "result" in locals() else "-"
)
if result.status == "success":
successful_deletions.append(doc_id)
# Collect deferred rebuild targets
if result.entities_to_rebuild:
all_entities_to_rebuild.update(result.entities_to_rebuild)
if result.relationships_to_rebuild:
all_relationships_to_rebuild.update(
result.relationships_to_rebuild
)
# Remove completely deleted entities and relationships from rebuild targets
if getattr(result, "deleted_entities", None):
for entity in result.deleted_entities:
all_entities_to_rebuild.pop(entity, None)
if getattr(result, "deleted_relationships", None):
for relation in result.deleted_relationships:
# Try both orders of the tuple since relationships are undirected in rebuilding logic
all_relationships_to_rebuild.pop(relation, None)
all_relationships_to_rebuild.pop((relation[1], relation[0]), None)
success_msg = (
f"Document deleted {i}/{total_docs}: {doc_id}[{file_path}]"
)
Expand Down Expand Up @@ -2048,6 +2070,42 @@ async def background_delete_documents(
pipeline_status["latest_message"] = error_msg
pipeline_status["history_messages"].append(error_msg)

# Single deferred rebuild for all affected entities/relations
if all_entities_to_rebuild or all_relationships_to_rebuild:
from dataclasses import asdict

rebuild_msg = (
f"Rebuilding knowledge graph: {len(all_entities_to_rebuild)} entities, "
f"{len(all_relationships_to_rebuild)} relations"
)
logger.info(rebuild_msg)
async with pipeline_status_lock:
pipeline_status["latest_message"] = rebuild_msg
pipeline_status["history_messages"].append(rebuild_msg)
try:
await rebuild_knowledge_from_chunks(
entities_to_rebuild=all_entities_to_rebuild,
relationships_to_rebuild=all_relationships_to_rebuild,
knowledge_graph_inst=rag.chunk_entity_relation_graph,
entities_vdb=rag.entities_vdb,
relationships_vdb=rag.relationships_vdb,
text_chunks_storage=rag.text_chunks,
llm_response_cache=rag.llm_response_cache,
global_config=asdict(rag),
pipeline_status=pipeline_status,
pipeline_status_lock=pipeline_status_lock,
entity_chunks_storage=rag.entity_chunks,
relation_chunks_storage=rag.relation_chunks,
)
await rag._insert_done()
except Exception as rebuild_err:
rebuild_error_msg = f"Failed to rebuild knowledge graph after batch deletion: {rebuild_err}"
logger.error(rebuild_error_msg)
logger.error(traceback.format_exc())
async with pipeline_status_lock:
pipeline_status["latest_message"] = rebuild_error_msg
pipeline_status["history_messages"].append(rebuild_error_msg)

except Exception as e:
error_msg = f"Critical error during batch deletion: {str(e)}"
logger.error(error_msg)
Expand Down
6 changes: 6 additions & 0 deletions lightrag/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,12 @@ class DeletionResult:
message: str
status_code: int = 200
file_path: str | None = None
# Populated when skip_rebuild=True so the caller can do a single deferred rebuild
entities_to_rebuild: Dict[str, list] | None = None
relationships_to_rebuild: Dict[Any, list] | None = None
# Track fully deleted entities/relationships that should NOT be rebuilt
deleted_entities: list[str] | None = None
deleted_relationships: list[tuple[str, str]] | None = None


# Unified Query Result Data Structures for Reference List Support
Expand Down
65 changes: 46 additions & 19 deletions lightrag/lightrag.py
Original file line number Diff line number Diff line change
Expand Up @@ -3044,7 +3044,10 @@ async def aget_docs_by_ids(
return found_statuses

async def adelete_by_doc_id(
self, doc_id: str, delete_llm_cache: bool = False
self,
doc_id: str,
delete_llm_cache: bool = False,
skip_rebuild: bool = False,
) -> DeletionResult:
"""Delete a document and all its related data, including chunks, graph elements.

Expand Down Expand Up @@ -3077,6 +3080,10 @@ async def adelete_by_doc_id(
doc_id (str): The unique identifier of the document to be deleted.
delete_llm_cache (bool): Whether to delete cached LLM extraction results
associated with the document. Defaults to False.
skip_rebuild (bool): When True, skip the per-document KG rebuild step.
The caller is responsible for performing a single deferred rebuild
using the entities/relationships returned in the DeletionResult.
Used by batch deletion to avoid N redundant rebuilds. Defaults to False.

Returns:
DeletionResult: An object containing the outcome of the deletion process.
Expand All @@ -3085,6 +3092,8 @@ async def adelete_by_doc_id(
- `message` (str): A summary of the operation's result.
- `status_code` (int): HTTP status code (e.g., 200, 404, 403, 500).
- `file_path` (str | None): The file path of the deleted document, if available.
- `entities_to_rebuild` (dict | None): Populated when skip_rebuild=True.
- `relationships_to_rebuild` (dict | None): Populated when skip_rebuild=True.
"""
# Get pipeline status shared data and lock for validation
pipeline_status = await get_namespace_data(
Expand Down Expand Up @@ -3645,26 +3654,38 @@ async def adelete_by_doc_id(
await self._insert_done()

# 8. Rebuild entities and relationships from remaining chunks
# When skip_rebuild is set (batch deletion), we hand the targets back
# to the caller so it can do one combined rebuild at the end.
if entities_to_rebuild or relationships_to_rebuild:
try:
await rebuild_knowledge_from_chunks(
entities_to_rebuild=entities_to_rebuild,
relationships_to_rebuild=relationships_to_rebuild,
knowledge_graph_inst=self.chunk_entity_relation_graph,
entities_vdb=self.entities_vdb,
relationships_vdb=self.relationships_vdb,
text_chunks_storage=self.text_chunks,
llm_response_cache=self.llm_response_cache,
global_config=asdict(self),
pipeline_status=pipeline_status,
pipeline_status_lock=pipeline_status_lock,
entity_chunks_storage=self.entity_chunks,
relation_chunks_storage=self.relation_chunks,
if skip_rebuild:
logger.info(
"Skipping per-doc rebuild (skip_rebuild=True), "
"%d entities / %d relations deferred",
len(entities_to_rebuild),
len(relationships_to_rebuild),
)
else:
try:
await rebuild_knowledge_from_chunks(
entities_to_rebuild=entities_to_rebuild,
relationships_to_rebuild=relationships_to_rebuild,
knowledge_graph_inst=self.chunk_entity_relation_graph,
entities_vdb=self.entities_vdb,
relationships_vdb=self.relationships_vdb,
text_chunks_storage=self.text_chunks,
llm_response_cache=self.llm_response_cache,
global_config=asdict(self),
pipeline_status=pipeline_status,
pipeline_status_lock=pipeline_status_lock,
entity_chunks_storage=self.entity_chunks,
relation_chunks_storage=self.relation_chunks,
)

except Exception as e:
logger.error(f"Failed to rebuild knowledge from chunks: {e}")
raise Exception(f"Failed to rebuild knowledge graph: {e}") from e
except Exception as e:
logger.error(f"Failed to rebuild knowledge from chunks: {e}")
raise Exception(
f"Failed to rebuild knowledge graph: {e}"
) from e

# 9. Delete from full_entities and full_relations storage
try:
Expand Down Expand Up @@ -3701,13 +3722,19 @@ async def adelete_by_doc_id(
pipeline_status["latest_message"] = log_message
pipeline_status["history_messages"].append(log_message)

return DeletionResult(
result = DeletionResult(
status="success",
doc_id=doc_id,
message=log_message,
status_code=200,
file_path=file_path,
)
if skip_rebuild:
result.entities_to_rebuild = entities_to_rebuild
result.relationships_to_rebuild = relationships_to_rebuild
result.deleted_entities = list(entities_to_delete)
result.deleted_relationships = list(relationships_to_delete)
return result

except Exception as e:
original_exception = e
Expand Down
Loading