77import logging
88from pathlib import Path
99
10+ import cognee
11+ from cognee import SearchType
12+
1013logger = logging .getLogger (__name__ )
1114
1215
13- async def ingest_document (path : Path , dataset_name : str ) -> dict :
14- pass
16+ async def ingest_document (
17+ file_path : str ,
18+ dataset_name : str ,
19+ document_id : str = None ,
20+ ) -> dict :
21+ """
22+ Ingest a document into the knowledge graph.
23+
24+ Calls cognee.add() to ingest the file, then cognee.cognify() to
25+ process it into chunks, entities, relationships, and summaries.
26+ Finally extracts structured data from the processed results.
27+
28+ Returns a dict with "status": "success" or "status": "error".
29+ """
30+ try :
31+ await cognee .add (file_path , dataset_name )
32+ await cognee .cognify ([dataset_name ])
33+ structured_data = await _extract_structured_data (dataset_name )
34+
35+ return {
36+ "status" : "success" ,
37+ "document_id" : document_id ,
38+ "dataset_name" : dataset_name ,
39+ ** structured_data ,
40+ }
41+
42+ except Exception as e :
43+ return {
44+ "status" : "error" ,
45+ "error" : str (e ),
46+ }
47+
48+
49+ async def _extract_structured_data (dataset_name : str ) -> dict :
50+ """
51+ Query Cognee for structured data after cognify() has run.
52+
53+ Uses SearchType.SUMMARIES for pre-computed summaries and
54+ SearchType.CHUNKS for raw text segments.
55+
56+ Returns summary (str), entities (list), and raw_chunks_count (int).
57+ """
58+ summary_results = await cognee .search (
59+ query_type = SearchType .SUMMARIES ,
60+ query_text = dataset_name ,
61+ )
62+
63+ chunk_results = await cognee .search (
64+ query_type = SearchType .CHUNKS ,
65+ query_text = dataset_name ,
66+ )
67+
68+ summary = summary_results [0 ] if summary_results else ""
69+
70+ entities = []
71+ for chunk in chunk_results :
72+ if hasattr (chunk , "entities" ):
73+ entities .extend (chunk .entities )
74+
75+ return {
76+ "summary" : str (summary ),
77+ "entities" : entities ,
78+ "raw_chunks_count" : len (chunk_results ),
79+ }
80+
1581
1682async def ingest_document_background (path : Path , dataset_name : str ) -> None :
1783 """
18- For FastAPI BackgroundTasks. Allows ingest_document to run in the background for large files.
84+ For FastAPI BackgroundTasks. Allows ingest_document to run in the
85+ background for large files.
1986 """
2087 try :
21- await ingest_document (path , dataset_name )
88+ await ingest_document (str ( path ) , dataset_name )
2289 except Exception :
2390 logger .error ("Background ingest failed for %s" , path , exc_info = True )
2491 finally :
2592 try :
2693 path .unlink (missing_ok = True )
2794 except Exception :
28- pass
95+ pass
0 commit comments