@@ -116,7 +116,6 @@ async def _handle_entity_relation_summary(
116
116
use_llm_func : callable = global_config ["llm_model_func" ]
117
117
tokenizer : Tokenizer = global_config ["tokenizer" ]
118
118
llm_max_tokens = global_config ["llm_model_max_token_size" ]
119
- tiktoken_model_name = global_config ["tiktoken_model_name" ]
120
119
summary_max_tokens = global_config ["summary_to_max_tokens" ]
121
120
122
121
language = global_config ["addon_params" ].get (
@@ -842,7 +841,6 @@ async def kg_query(
842
841
relationships_vdb ,
843
842
text_chunks_db ,
844
843
query_param ,
845
- global_config ,
846
844
)
847
845
848
846
if query_param .only_need_context :
@@ -1114,7 +1112,6 @@ async def get_kg_context():
1114
1112
relationships_vdb ,
1115
1113
text_chunks_db ,
1116
1114
query_param ,
1117
- global_config ,
1118
1115
)
1119
1116
1120
1117
return context
@@ -1269,7 +1266,6 @@ async def _build_query_context(
1269
1266
relationships_vdb : BaseVectorStorage ,
1270
1267
text_chunks_db : BaseKVStorage ,
1271
1268
query_param : QueryParam ,
1272
- global_config : dict [str , str ],
1273
1269
):
1274
1270
logger .info (f"Process { os .getpid ()} buidling query context..." )
1275
1271
if query_param .mode == "local" :
@@ -1279,7 +1275,6 @@ async def _build_query_context(
1279
1275
entities_vdb ,
1280
1276
text_chunks_db ,
1281
1277
query_param ,
1282
- global_config ,
1283
1278
)
1284
1279
elif query_param .mode == "global" :
1285
1280
entities_context , relations_context , text_units_context = await _get_edge_data (
@@ -1288,7 +1283,6 @@ async def _build_query_context(
1288
1283
relationships_vdb ,
1289
1284
text_chunks_db ,
1290
1285
query_param ,
1291
- global_config ,
1292
1286
)
1293
1287
else : # hybrid mode
1294
1288
ll_data = await _get_node_data (
@@ -1297,15 +1291,13 @@ async def _build_query_context(
1297
1291
entities_vdb ,
1298
1292
text_chunks_db ,
1299
1293
query_param ,
1300
- global_config ,
1301
1294
)
1302
1295
hl_data = await _get_edge_data (
1303
1296
hl_keywords ,
1304
1297
knowledge_graph_inst ,
1305
1298
relationships_vdb ,
1306
1299
text_chunks_db ,
1307
1300
query_param ,
1308
- global_config ,
1309
1301
)
1310
1302
1311
1303
(
@@ -1352,7 +1344,6 @@ async def _get_node_data(
1352
1344
entities_vdb : BaseVectorStorage ,
1353
1345
text_chunks_db : BaseKVStorage ,
1354
1346
query_param : QueryParam ,
1355
- global_config : dict [str , str ],
1356
1347
):
1357
1348
# get similar entities
1358
1349
logger .info (
@@ -1389,13 +1380,13 @@ async def _get_node_data(
1389
1380
] # what is this text_chunks_db doing. dont remember it in airvx. check the diagram.
1390
1381
# get entitytext chunk
1391
1382
use_text_units = await _find_most_related_text_unit_from_entities (
1392
- node_datas , query_param , text_chunks_db , knowledge_graph_inst , global_config
1383
+ node_datas , query_param , text_chunks_db , knowledge_graph_inst ,
1393
1384
)
1394
1385
use_relations = await _find_most_related_edges_from_entities (
1395
- node_datas , query_param , knowledge_graph_inst , global_config
1386
+ node_datas , query_param , knowledge_graph_inst ,
1396
1387
)
1397
1388
1398
- tokenizer : Tokenizer = global_config [ "tokenizer" ]
1389
+ tokenizer : Tokenizer = text_chunks_db . global_config . get ( "tokenizer" )
1399
1390
len_node_datas = len (node_datas )
1400
1391
node_datas = truncate_list_by_token_size (
1401
1392
node_datas ,
@@ -1495,7 +1486,6 @@ async def _find_most_related_text_unit_from_entities(
1495
1486
query_param : QueryParam ,
1496
1487
text_chunks_db : BaseKVStorage ,
1497
1488
knowledge_graph_inst : BaseGraphStorage ,
1498
- global_config : dict [str , str ],
1499
1489
):
1500
1490
text_units = [
1501
1491
split_string_by_multi_markers (dp ["source_id" ], [GRAPH_FIELD_SEP ])
@@ -1577,7 +1567,7 @@ async def _find_most_related_text_unit_from_entities(
1577
1567
logger .warning ("No valid text units found" )
1578
1568
return []
1579
1569
1580
- tokenizer : Tokenizer = global_config [ "tokenizer" ]
1570
+ tokenizer : Tokenizer = text_chunks_db . global_config . get ( "tokenizer" )
1581
1571
all_text_units = sorted (
1582
1572
all_text_units , key = lambda x : (x ["order" ], - x ["relation_counts" ])
1583
1573
)
@@ -1600,7 +1590,6 @@ async def _find_most_related_edges_from_entities(
1600
1590
node_datas : list [dict ],
1601
1591
query_param : QueryParam ,
1602
1592
knowledge_graph_inst : BaseGraphStorage ,
1603
- global_config : dict [str , str ],
1604
1593
):
1605
1594
node_names = [dp ["entity_name" ] for dp in node_datas ]
1606
1595
batch_edges_dict = await knowledge_graph_inst .get_nodes_edges_batch (node_names )
@@ -1640,7 +1629,7 @@ async def _find_most_related_edges_from_entities(
1640
1629
}
1641
1630
all_edges_data .append (combined )
1642
1631
1643
- tokenizer : Tokenizer = global_config [ "tokenizer" ]
1632
+ tokenizer : Tokenizer = knowledge_graph_inst . global_config . get ( "tokenizer" )
1644
1633
all_edges_data = sorted (
1645
1634
all_edges_data , key = lambda x : (x ["rank" ], x ["weight" ]), reverse = True
1646
1635
)
@@ -1664,7 +1653,6 @@ async def _get_edge_data(
1664
1653
relationships_vdb : BaseVectorStorage ,
1665
1654
text_chunks_db : BaseKVStorage ,
1666
1655
query_param : QueryParam ,
1667
- global_config : dict [str , str ],
1668
1656
):
1669
1657
logger .info (
1670
1658
f"Query edges: { keywords } , top_k: { query_param .top_k } , cosine: { relationships_vdb .cosine_better_than_threshold } "
@@ -1705,7 +1693,7 @@ async def _get_edge_data(
1705
1693
}
1706
1694
edge_datas .append (combined )
1707
1695
1708
- tokenizer : Tokenizer = global_config [ "tokenizer" ]
1696
+ tokenizer : Tokenizer = text_chunks_db . global_config . get ( "tokenizer" )
1709
1697
edge_datas = sorted (
1710
1698
edge_datas , key = lambda x : (x ["rank" ], x ["weight" ]), reverse = True
1711
1699
)
@@ -1717,10 +1705,10 @@ async def _get_edge_data(
1717
1705
)
1718
1706
use_entities , use_text_units = await asyncio .gather (
1719
1707
_find_most_related_entities_from_relationships (
1720
- edge_datas , query_param , knowledge_graph_inst , global_config
1708
+ edge_datas , query_param , knowledge_graph_inst ,
1721
1709
),
1722
1710
_find_related_text_unit_from_relationships (
1723
- edge_datas , query_param , text_chunks_db , knowledge_graph_inst , global_config
1711
+ edge_datas , query_param , text_chunks_db , knowledge_graph_inst ,
1724
1712
),
1725
1713
)
1726
1714
logger .info (
@@ -1800,7 +1788,6 @@ async def _find_most_related_entities_from_relationships(
1800
1788
edge_datas : list [dict ],
1801
1789
query_param : QueryParam ,
1802
1790
knowledge_graph_inst : BaseGraphStorage ,
1803
- global_config : dict [str , str ],
1804
1791
):
1805
1792
entity_names = []
1806
1793
seen = set ()
@@ -1831,7 +1818,7 @@ async def _find_most_related_entities_from_relationships(
1831
1818
combined = {** node , "entity_name" : entity_name , "rank" : degree }
1832
1819
node_datas .append (combined )
1833
1820
1834
- tokenizer : Tokenizer = global_config [ "tokenizer" ]
1821
+ tokenizer : Tokenizer = knowledge_graph_inst . global_config . get ( "tokenizer" )
1835
1822
len_node_datas = len (node_datas )
1836
1823
node_datas = truncate_list_by_token_size (
1837
1824
node_datas ,
@@ -1851,7 +1838,6 @@ async def _find_related_text_unit_from_relationships(
1851
1838
query_param : QueryParam ,
1852
1839
text_chunks_db : BaseKVStorage ,
1853
1840
knowledge_graph_inst : BaseGraphStorage ,
1854
- global_config : dict [str , str ],
1855
1841
):
1856
1842
text_units = [
1857
1843
split_string_by_multi_markers (dp ["source_id" ], [GRAPH_FIELD_SEP ])
@@ -1893,7 +1879,7 @@ async def fetch_chunk_data(c_id, index):
1893
1879
logger .warning ("No valid text chunks after filtering" )
1894
1880
return []
1895
1881
1896
- tokenizer : Tokenizer = global_config [ "tokenizer" ]
1882
+ tokenizer : Tokenizer = text_chunks_db . global_config . get ( "tokenizer" )
1897
1883
truncated_text_units = truncate_list_by_token_size (
1898
1884
valid_text_units ,
1899
1885
key = lambda x : x ["data" ]["content" ],
@@ -2130,7 +2116,6 @@ async def kg_query_with_keywords(
2130
2116
relationships_vdb ,
2131
2117
text_chunks_db ,
2132
2118
query_param ,
2133
- global_config ,
2134
2119
)
2135
2120
if not context :
2136
2121
return PROMPTS ["fail_response" ]
0 commit comments