Skip to content

Commit 3ff9dee

Browse files
committed
fix: take global_config from storage class
1 parent fa742eb commit 3ff9dee

File tree

2 files changed

+11
-26
lines changed

2 files changed

+11
-26
lines changed

lightrag/lightrag.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from dataclasses import asdict, dataclass, field
88
from datetime import datetime
99
from functools import partial
10-
from typing import Any, AsyncIterator, Callable, Iterator, cast, final, Literal
10+
from typing import Any, AsyncIterator, Callable, Iterator, cast, final, Literal, Optional, List, Dict
1111

1212
from lightrag.kg import (
1313
STORAGES,

lightrag/operate.py

+10-25
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,6 @@ async def _handle_entity_relation_summary(
116116
use_llm_func: callable = global_config["llm_model_func"]
117117
tokenizer: Tokenizer = global_config["tokenizer"]
118118
llm_max_tokens = global_config["llm_model_max_token_size"]
119-
tiktoken_model_name = global_config["tiktoken_model_name"]
120119
summary_max_tokens = global_config["summary_to_max_tokens"]
121120

122121
language = global_config["addon_params"].get(
@@ -842,7 +841,6 @@ async def kg_query(
842841
relationships_vdb,
843842
text_chunks_db,
844843
query_param,
845-
global_config,
846844
)
847845

848846
if query_param.only_need_context:
@@ -1114,7 +1112,6 @@ async def get_kg_context():
11141112
relationships_vdb,
11151113
text_chunks_db,
11161114
query_param,
1117-
global_config,
11181115
)
11191116

11201117
return context
@@ -1269,7 +1266,6 @@ async def _build_query_context(
12691266
relationships_vdb: BaseVectorStorage,
12701267
text_chunks_db: BaseKVStorage,
12711268
query_param: QueryParam,
1272-
global_config: dict[str, str],
12731269
):
12741270
logger.info(f"Process {os.getpid()} buidling query context...")
12751271
if query_param.mode == "local":
@@ -1279,7 +1275,6 @@ async def _build_query_context(
12791275
entities_vdb,
12801276
text_chunks_db,
12811277
query_param,
1282-
global_config,
12831278
)
12841279
elif query_param.mode == "global":
12851280
entities_context, relations_context, text_units_context = await _get_edge_data(
@@ -1288,7 +1283,6 @@ async def _build_query_context(
12881283
relationships_vdb,
12891284
text_chunks_db,
12901285
query_param,
1291-
global_config,
12921286
)
12931287
else: # hybrid mode
12941288
ll_data = await _get_node_data(
@@ -1297,15 +1291,13 @@ async def _build_query_context(
12971291
entities_vdb,
12981292
text_chunks_db,
12991293
query_param,
1300-
global_config,
13011294
)
13021295
hl_data = await _get_edge_data(
13031296
hl_keywords,
13041297
knowledge_graph_inst,
13051298
relationships_vdb,
13061299
text_chunks_db,
13071300
query_param,
1308-
global_config,
13091301
)
13101302

13111303
(
@@ -1352,7 +1344,6 @@ async def _get_node_data(
13521344
entities_vdb: BaseVectorStorage,
13531345
text_chunks_db: BaseKVStorage,
13541346
query_param: QueryParam,
1355-
global_config: dict[str, str],
13561347
):
13571348
# get similar entities
13581349
logger.info(
@@ -1389,13 +1380,13 @@ async def _get_node_data(
13891380
] # what is this text_chunks_db doing. dont remember it in airvx. check the diagram.
13901381
# get entitytext chunk
13911382
use_text_units = await _find_most_related_text_unit_from_entities(
1392-
node_datas, query_param, text_chunks_db, knowledge_graph_inst, global_config
1383+
node_datas, query_param, text_chunks_db, knowledge_graph_inst,
13931384
)
13941385
use_relations = await _find_most_related_edges_from_entities(
1395-
node_datas, query_param, knowledge_graph_inst, global_config
1386+
node_datas, query_param, knowledge_graph_inst,
13961387
)
13971388

1398-
tokenizer: Tokenizer = global_config["tokenizer"]
1389+
tokenizer: Tokenizer = text_chunks_db.global_config.get("tokenizer")
13991390
len_node_datas = len(node_datas)
14001391
node_datas = truncate_list_by_token_size(
14011392
node_datas,
@@ -1495,7 +1486,6 @@ async def _find_most_related_text_unit_from_entities(
14951486
query_param: QueryParam,
14961487
text_chunks_db: BaseKVStorage,
14971488
knowledge_graph_inst: BaseGraphStorage,
1498-
global_config: dict[str, str],
14991489
):
15001490
text_units = [
15011491
split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])
@@ -1577,7 +1567,7 @@ async def _find_most_related_text_unit_from_entities(
15771567
logger.warning("No valid text units found")
15781568
return []
15791569

1580-
tokenizer: Tokenizer = global_config["tokenizer"]
1570+
tokenizer: Tokenizer = text_chunks_db.global_config.get("tokenizer")
15811571
all_text_units = sorted(
15821572
all_text_units, key=lambda x: (x["order"], -x["relation_counts"])
15831573
)
@@ -1600,7 +1590,6 @@ async def _find_most_related_edges_from_entities(
16001590
node_datas: list[dict],
16011591
query_param: QueryParam,
16021592
knowledge_graph_inst: BaseGraphStorage,
1603-
global_config: dict[str, str],
16041593
):
16051594
node_names = [dp["entity_name"] for dp in node_datas]
16061595
batch_edges_dict = await knowledge_graph_inst.get_nodes_edges_batch(node_names)
@@ -1640,7 +1629,7 @@ async def _find_most_related_edges_from_entities(
16401629
}
16411630
all_edges_data.append(combined)
16421631

1643-
tokenizer: Tokenizer = global_config["tokenizer"]
1632+
tokenizer: Tokenizer = knowledge_graph_inst.global_config.get("tokenizer")
16441633
all_edges_data = sorted(
16451634
all_edges_data, key=lambda x: (x["rank"], x["weight"]), reverse=True
16461635
)
@@ -1664,7 +1653,6 @@ async def _get_edge_data(
16641653
relationships_vdb: BaseVectorStorage,
16651654
text_chunks_db: BaseKVStorage,
16661655
query_param: QueryParam,
1667-
global_config: dict[str, str],
16681656
):
16691657
logger.info(
16701658
f"Query edges: {keywords}, top_k: {query_param.top_k}, cosine: {relationships_vdb.cosine_better_than_threshold}"
@@ -1705,7 +1693,7 @@ async def _get_edge_data(
17051693
}
17061694
edge_datas.append(combined)
17071695

1708-
tokenizer: Tokenizer = global_config["tokenizer"]
1696+
tokenizer: Tokenizer = text_chunks_db.global_config.get("tokenizer")
17091697
edge_datas = sorted(
17101698
edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True
17111699
)
@@ -1717,10 +1705,10 @@ async def _get_edge_data(
17171705
)
17181706
use_entities, use_text_units = await asyncio.gather(
17191707
_find_most_related_entities_from_relationships(
1720-
edge_datas, query_param, knowledge_graph_inst, global_config
1708+
edge_datas, query_param, knowledge_graph_inst,
17211709
),
17221710
_find_related_text_unit_from_relationships(
1723-
edge_datas, query_param, text_chunks_db, knowledge_graph_inst, global_config
1711+
edge_datas, query_param, text_chunks_db, knowledge_graph_inst,
17241712
),
17251713
)
17261714
logger.info(
@@ -1800,7 +1788,6 @@ async def _find_most_related_entities_from_relationships(
18001788
edge_datas: list[dict],
18011789
query_param: QueryParam,
18021790
knowledge_graph_inst: BaseGraphStorage,
1803-
global_config: dict[str, str],
18041791
):
18051792
entity_names = []
18061793
seen = set()
@@ -1831,7 +1818,7 @@ async def _find_most_related_entities_from_relationships(
18311818
combined = {**node, "entity_name": entity_name, "rank": degree}
18321819
node_datas.append(combined)
18331820

1834-
tokenizer: Tokenizer = global_config["tokenizer"]
1821+
tokenizer: Tokenizer = knowledge_graph_inst.global_config.get("tokenizer")
18351822
len_node_datas = len(node_datas)
18361823
node_datas = truncate_list_by_token_size(
18371824
node_datas,
@@ -1851,7 +1838,6 @@ async def _find_related_text_unit_from_relationships(
18511838
query_param: QueryParam,
18521839
text_chunks_db: BaseKVStorage,
18531840
knowledge_graph_inst: BaseGraphStorage,
1854-
global_config: dict[str, str],
18551841
):
18561842
text_units = [
18571843
split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])
@@ -1893,7 +1879,7 @@ async def fetch_chunk_data(c_id, index):
18931879
logger.warning("No valid text chunks after filtering")
18941880
return []
18951881

1896-
tokenizer: Tokenizer = global_config["tokenizer"]
1882+
tokenizer: Tokenizer = text_chunks_db.global_config.get("tokenizer")
18971883
truncated_text_units = truncate_list_by_token_size(
18981884
valid_text_units,
18991885
key=lambda x: x["data"]["content"],
@@ -2130,7 +2116,6 @@ async def kg_query_with_keywords(
21302116
relationships_vdb,
21312117
text_chunks_db,
21322118
query_param,
2133-
global_config,
21342119
)
21352120
if not context:
21362121
return PROMPTS["fail_response"]

0 commit comments

Comments
 (0)