Skip to content

Commit 14f9256

Browse files
committed
Fix conflicts
Signed-off-by: Jin Hai <haijin.chn@gmail.com>
2 parents f4e0b38 + 4bbbf92 commit 14f9256

File tree

5 files changed

+77
-11
lines changed

5 files changed

+77
-11
lines changed

api/apps/connector_app.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ def set_connector():
4040
"source": req["source"],
4141
"input_type": InputType.POLL,
4242
"config": req["config"],
43-
"refresh_freq": int(req["refresh_freq"]),
44-
"prune_freq": int(req["prune_freq"]),
45-
"timeout_secs": int(req["timeout_secs"]),
43+
"refresh_freq": int(req.get("refresh_freq", 30)),
44+
"prune_freq": int(req.get("prune_freq", 720)),
45+
"timeout_secs": int(req.get("timeout_secs", 60*29)),
4646
"status": TaskStatus.SCHEDULE
4747
}
4848
conn["status"] = TaskStatus.SCHEDULE

api/apps/kb_app.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
from flask_login import login_required, current_user
2222
import numpy as np
2323

24+
25+
from api.db.services.connector_service import Connector2KbService
2426
from api.db.services.llm_service import LLMBundle
2527
from api.db.services.document_service import DocumentService, queue_raptor_o_graphrag_tasks
2628
from api.db.services.file2document_service import File2DocumentService
@@ -146,6 +148,8 @@ def detail():
146148
return get_data_error_result(
147149
message="Can't find this knowledgebase!")
148150
kb["size"] = DocumentService.get_total_size_by_kb_id(kb_id=kb["id"],keywords="", run_status=[], types=[])
151+
kb["connectors"] = Connector2KbService.list_connectors(kb_id)
152+
149153
for key in ["graphrag_task_finish_at", "raptor_task_finish_at", "mindmap_task_finish_at"]:
150154
if finish_at := kb.get(key):
151155
kb[key] = finish_at.strftime("%Y-%m-%d %H:%M:%S")
@@ -719,26 +723,30 @@ def delete_kb_task():
719723
if not pipeline_task_type or pipeline_task_type not in [PipelineTaskType.GRAPH_RAG, PipelineTaskType.RAPTOR, PipelineTaskType.MINDMAP]:
720724
return get_error_data_result(message="Invalid task type")
721725

726+
def cancel_task(task_id):
727+
REDIS_CONN.set(f"{task_id}-cancel", "x")
728+
722729
match pipeline_task_type:
723730
case PipelineTaskType.GRAPH_RAG:
724-
settings.docStoreConn.delete({"knowledge_graph_kwd": ["graph", "subgraph", "entity", "relation"]}, search.index_name(kb.tenant_id), kb_id)
725731
kb_task_id_field = "graphrag_task_id"
726732
task_id = kb.graphrag_task_id
727733
kb_task_finish_at = "graphrag_task_finish_at"
734+
cancel_task(task_id)
735+
settings.docStoreConn.delete({"knowledge_graph_kwd": ["graph", "subgraph", "entity", "relation"]}, search.index_name(kb.tenant_id), kb_id)
728736
case PipelineTaskType.RAPTOR:
729737
kb_task_id_field = "raptor_task_id"
730738
task_id = kb.raptor_task_id
731739
kb_task_finish_at = "raptor_task_finish_at"
740+
cancel_task(task_id)
741+
settings.docStoreConn.delete({"raptor_kwd": ["raptor"]}, search.index_name(kb.tenant_id), kb_id)
732742
case PipelineTaskType.MINDMAP:
733743
kb_task_id_field = "mindmap_task_id"
734744
task_id = kb.mindmap_task_id
735745
kb_task_finish_at = "mindmap_task_finish_at"
746+
cancel_task(task_id)
736747
case _:
737748
return get_error_data_result(message="Internal Error: Invalid task type")
738749

739-
def cancel_task(task_id):
740-
REDIS_CONN.set(f"{task_id}-cancel", "x")
741-
cancel_task(task_id)
742750

743751
ok = KnowledgebaseService.update_by_id(kb_id, {kb_task_id_field: "", kb_task_finish_at: None})
744752
if not ok:
@@ -883,3 +891,15 @@ def sample_random_chunks_with_vectors(
883891
if summary["avg_cos_sim"] > 0.99:
884892
return get_json_result(data={"summary": summary, "results": results})
885893
return get_json_result(code=RetCode.NOT_EFFECTIVE, message="failed", data={"summary": summary, "results": results})
894+
895+
896+
@manager.route("/<kb_id>/link", methods=["POST"]) # noqa: F821
897+
@validate_request("connector_ids")
898+
@login_required
899+
def link_connector(kb_id):
900+
req = request.json
901+
errors = Connector2KbService.link_connectors(kb_id, req["connector_ids"], current_user.id)
902+
if errors:
903+
return get_json_result(data=False, message=errors, code=RetCode.SERVER_ERROR)
904+
return get_json_result(data=True)
905+

api/db/services/connector_service.py

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,9 @@ def list(cls, tenant_id):
5858
cls.model.source,
5959
cls.model.status
6060
]
61-
return cls.model.select(*fields).where(
61+
return list(cls.model.select(*fields).where(
6262
cls.model.tenant_id == tenant_id
63-
).dicts()
63+
).dicts())
6464

6565

6666
class SyncLogsService(CommonService):
@@ -219,3 +219,45 @@ def link_kb(cls, conn_id:str, kb_ids: list[str], tenant_id:str):
219219
errs.append(err)
220220
return "\n".join(errs)
221221

222+
@classmethod
223+
def link_connectors(cls, kb_id:str, connector_ids: list[str], tenant_id:str):
224+
arr = cls.query(kb_id=kb_id)
225+
old_conn_ids = [a.connector_id for a in arr]
226+
for conn_id in connector_ids:
227+
if conn_id in old_conn_ids:
228+
continue
229+
cls.save(**{
230+
"id": get_uuid(),
231+
"connector_id": conn_id,
232+
"kb_id": kb_id
233+
})
234+
SyncLogsService.schedule(conn_id, kb_id, reindex=True)
235+
236+
errs = []
237+
for conn_id in old_conn_ids:
238+
if conn_id in connector_ids:
239+
continue
240+
cls.filter_delete([cls.model.kb_id==kb_id, cls.model.connector_id==conn_id])
241+
e, conn = ConnectorService.get_by_id(conn_id)
242+
SyncLogsService.filter_update([SyncLogs.connector_id==conn_id, SyncLogs.kb_id==kb_id, SyncLogs.status==TaskStatus.SCHEDULE], {"status": TaskStatus.CANCEL})
243+
docs = DocumentService.query(source_type=f"{conn.source}/{conn.id}")
244+
err = FileService.delete_docs([d.id for d in docs], tenant_id)
245+
if err:
246+
errs.append(err)
247+
return "\n".join(errs)
248+
249+
@classmethod
250+
def list_connectors(cls, kb_id):
251+
fields = [
252+
Connector.id,
253+
Connector.source,
254+
Connector.name,
255+
Connector.status
256+
]
257+
return list(cls.model.select(*fields)\
258+
.join(Connector, on=(cls.model.connector_id==Connector.id))\
259+
.where(
260+
cls.model.kb_id==kb_id
261+
).dicts()
262+
)
263+

common/base64_image.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,10 @@ async def image2id(d: dict, storage_put_func: partial, objname:str, bucket:str="
3030
from io import BytesIO
3131
import trio
3232
from rag.svr.task_executor import minio_limiter
33-
if not d.get("image"):
33+
if "image" not in d:
34+
return
35+
if not d["image"]:
36+
del d["image"]
3437
return
3538

3639
with BytesIO() as output_buffer:

rag/svr/task_executor.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -701,7 +701,8 @@ async def run_raptor_for_kb(row, kb_parser_config, chat_mdl, embd_mdl, vector_si
701701
"doc_id": fake_doc_id,
702702
"kb_id": [str(row["kb_id"])],
703703
"docnm_kwd": row["name"],
704-
"title_tks": rag_tokenizer.tokenize(row["name"])
704+
"title_tks": rag_tokenizer.tokenize(row["name"]),
705+
"raptor_kwd": "raptor"
705706
}
706707
if row["pagerank"]:
707708
doc[PAGERANK_FLD] = int(row["pagerank"])

0 commit comments

Comments
 (0)