Skip to content

Commit 6382ab6

Browse files
feat: supporting for faster batch search and storage (#136)
1 parent 84e3068 commit 6382ab6

File tree

5 files changed

+348
-103
lines changed

5 files changed

+348
-103
lines changed

nano_graphrag/_op.py

Lines changed: 11 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -483,27 +483,29 @@ async def _pack_single_community_describe(
483483
)
484484
node_fields = ["id", "entity", "type", "description", "degree"]
485485
edge_fields = ["id", "source", "target", "description", "rank"]
486+
node_degrees = await knwoledge_graph_inst.node_degrees_batch(nodes_in_order)
486487
nodes_list_data = [
487488
[
488489
i,
489490
node_name,
490491
node_data.get("entity_type", "UNKNOWN"),
491492
node_data.get("description", "UNKNOWN"),
492-
await knwoledge_graph_inst.node_degree(node_name),
493+
node_degrees[i],
493494
]
494495
for i, (node_name, node_data) in enumerate(zip(nodes_in_order, nodes_data))
495496
]
496497
nodes_list_data = sorted(nodes_list_data, key=lambda x: x[-1], reverse=True)
497498
nodes_may_truncate_list_data = truncate_list_by_token_size(
498499
nodes_list_data, key=lambda x: x[3], max_token_size=max_token_size // 2
499500
)
501+
edge_degrees = await knwoledge_graph_inst.edge_degrees_batch(edges_in_order)
500502
edges_list_data = [
501503
[
502504
i,
503505
edge_name[0],
504506
edge_name[1],
505507
edge_data.get("description", "UNKNOWN"),
506-
await knwoledge_graph_inst.edge_degree(*edge_name),
508+
edge_degrees[i]
507509
]
508510
for i, (edge_name, edge_data) in enumerate(zip(edges_in_order, edges_data))
509511
]
@@ -730,18 +732,14 @@ async def _find_most_related_text_unit_from_entities(
730732
split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])
731733
for dp in node_datas
732734
]
733-
edges = await asyncio.gather(
734-
*[knowledge_graph_inst.get_node_edges(dp["entity_name"]) for dp in node_datas]
735-
)
735+
edges = await knowledge_graph_inst.get_nodes_edges_batch([dp["entity_name"] for dp in node_datas])
736736
all_one_hop_nodes = set()
737737
for this_edges in edges:
738738
if not this_edges:
739739
continue
740740
all_one_hop_nodes.update([e[1] for e in this_edges])
741741
all_one_hop_nodes = list(all_one_hop_nodes)
742-
all_one_hop_nodes_data = await asyncio.gather(
743-
*[knowledge_graph_inst.get_node(e) for e in all_one_hop_nodes]
744-
)
742+
all_one_hop_nodes_data = await knowledge_graph_inst.get_nodes_batch(all_one_hop_nodes)
745743
all_one_hop_text_units_lookup = {
746744
k: set(split_string_by_multi_markers(v["source_id"], [GRAPH_FIELD_SEP]))
747745
for k, v in zip(all_one_hop_nodes, all_one_hop_nodes_data)
@@ -786,9 +784,7 @@ async def _find_most_related_edges_from_entities(
786784
query_param: QueryParam,
787785
knowledge_graph_inst: BaseGraphStorage,
788786
):
789-
all_related_edges = await asyncio.gather(
790-
*[knowledge_graph_inst.get_node_edges(dp["entity_name"]) for dp in node_datas]
791-
)
787+
all_related_edges = await knowledge_graph_inst.get_nodes_edges_batch([dp["entity_name"] for dp in node_datas])
792788

793789
all_edges = []
794790
seen = set()
@@ -800,12 +796,8 @@ async def _find_most_related_edges_from_entities(
800796
seen.add(sorted_edge)
801797
all_edges.append(sorted_edge)
802798

803-
all_edges_pack = await asyncio.gather(
804-
*[knowledge_graph_inst.get_edge(e[0], e[1]) for e in all_edges]
805-
)
806-
all_edges_degree = await asyncio.gather(
807-
*[knowledge_graph_inst.edge_degree(e[0], e[1]) for e in all_edges]
808-
)
799+
all_edges_pack = await knowledge_graph_inst.get_edges_batch(all_edges)
800+
all_edges_degree = await knowledge_graph_inst.edge_degrees_batch(all_edges)
809801
all_edges_data = [
810802
{"src_tgt": k, "rank": d, **v}
811803
for k, v, d in zip(all_edges, all_edges_pack, all_edges_degree)
@@ -833,14 +825,10 @@ async def _build_local_query_context(
833825
results = await entities_vdb.query(query, top_k=query_param.top_k)
834826
if not len(results):
835827
return None
836-
node_datas = await asyncio.gather(
837-
*[knowledge_graph_inst.get_node(r["entity_name"]) for r in results]
838-
)
828+
node_datas = await knowledge_graph_inst.get_nodes_batch([r["entity_name"] for r in results])
839829
if not all([n is not None for n in node_datas]):
840830
logger.warning("Some nodes are missing, maybe the storage is damaged")
841-
node_degrees = await asyncio.gather(
842-
*[knowledge_graph_inst.node_degree(r["entity_name"]) for r in results]
843-
)
831+
node_degrees = await knowledge_graph_inst.node_degrees_batch([r["entity_name"] for r in results])
844832
node_datas = [
845833
{**n, "entity_name": k["entity_name"], "rank": d}
846834
for k, n, d in zip(results, node_datas, node_degrees)

0 commit comments

Comments
 (0)