@@ -54,7 +54,6 @@ def resume(cls, connector_id, status):
5454 SyncLogsService .update_by_id (task ["id" ], task )
5555 ConnectorService .update_by_id (connector_id , {"status" : status })
5656
57-
5857 @classmethod
5958 def list (cls , tenant_id ):
6059 fields = [
@@ -67,6 +66,15 @@ def list(cls, tenant_id):
6766 cls .model .tenant_id == tenant_id
6867 ).dicts ())
6968
69+ @classmethod
70+ def rebuild (cls , kb_id :str , connector_id : str , tenant_id :str ):
71+ e , conn = cls .get_by_id (connector_id )
72+ if not e :
73+ return
74+ SyncLogsService .filter_delete ([SyncLogs .connector_id == connector_id , SyncLogs .kb_id == kb_id ])
75+ docs = DocumentService .query (source_type = f"{ conn .source } /{ conn .id } " )
76+ return FileService .delete_docs ([d .id for d in docs ], tenant_id )
77+
7078
7179class SyncLogsService (CommonService ):
7280 model = SyncLogs
@@ -91,6 +99,7 @@ def list_sync_tasks(cls, connector_id=None, page_number=None, items_per_page=15)
9199 Connector .timeout_secs ,
92100 Knowledgebase .name .alias ("kb_name" ),
93101 Knowledgebase .avatar .alias ("kb_avatar" ),
102+ Connector2Kb .auto_parse ,
94103 cls .model .from_beginning .alias ("reindex" ),
95104 cls .model .status
96105 ]
@@ -179,7 +188,7 @@ def increase_docs(cls, id, min_update, max_update, doc_num, err_msg="", error_co
179188 .where (cls .model .id == id ).execute ()
180189
181190 @classmethod
182- def duplicate_and_parse (cls , kb , docs , tenant_id , src ):
191+ def duplicate_and_parse (cls , kb , docs , tenant_id , src , auto_parse = True ):
183192 if not docs :
184193 return None
185194
@@ -191,14 +200,17 @@ def read(self) -> bytes:
191200 return self .blob
192201
193202 errs = []
194- files = [FileObj (filename = d ["semantic_identifier" ]+ f". { d ['extension' ]} " , blob = d ["blob" ]) for d in docs ]
203+ files = [FileObj (filename = d ["semantic_identifier" ]+ ( f" { d ['extension' ]} " if d [ "semantic_identifier" ][:: - 1 ]. find ( d [ 'extension' ][:: - 1 ]) < 0 else "" ) , blob = d ["blob" ]) for d in docs ]
195204 doc_ids = []
196205 err , doc_blob_pairs = FileService .upload_document (kb , files , tenant_id , src )
197206 errs .extend (err )
207+
198208 kb_table_num_map = {}
199209 for doc , _ in doc_blob_pairs :
200- DocumentService .run (tenant_id , doc , kb_table_num_map )
201210 doc_ids .append (doc ["id" ])
211+ if not auto_parse or auto_parse == "0" :
212+ continue
213+ DocumentService .run (tenant_id , doc , kb_table_num_map )
202214
203215 return errs , doc_ids
204216
@@ -213,33 +225,6 @@ def get_latest_task(cls, connector_id, kb_id):
213225class Connector2KbService (CommonService ):
214226 model = Connector2Kb
215227
216- @classmethod
217- def link_kb (cls , conn_id :str , kb_ids : list [str ], tenant_id :str ):
218- arr = cls .query (connector_id = conn_id )
219- old_kb_ids = [a .kb_id for a in arr ]
220- for kb_id in kb_ids :
221- if kb_id in old_kb_ids :
222- continue
223- cls .save (** {
224- "id" : get_uuid (),
225- "connector_id" : conn_id ,
226- "kb_id" : kb_id
227- })
228- SyncLogsService .schedule (conn_id , kb_id , reindex = True )
229-
230- errs = []
231- e , conn = ConnectorService .get_by_id (conn_id )
232- for kb_id in old_kb_ids :
233- if kb_id in kb_ids :
234- continue
235- cls .filter_delete ([cls .model .kb_id == kb_id , cls .model .connector_id == conn_id ])
236- SyncLogsService .filter_update ([SyncLogs .connector_id == conn_id , SyncLogs .kb_id == kb_id , SyncLogs .status == TaskStatus .SCHEDULE ], {"status" : TaskStatus .CANCEL })
237- docs = DocumentService .query (source_type = f"{ conn .source } /{ conn .id } " )
238- err = FileService .delete_docs ([d .id for d in docs ], tenant_id )
239- if err :
240- errs .append (err )
241- return "\n " .join (errs )
242-
243228 @classmethod
244229 def link_connectors (cls , kb_id :str , connector_ids : list [str ], tenant_id :str ):
245230 arr = cls .query (kb_id = kb_id )
@@ -260,11 +245,15 @@ def link_connectors(cls, kb_id:str, connector_ids: list[str], tenant_id:str):
260245 continue
261246 cls .filter_delete ([cls .model .kb_id == kb_id , cls .model .connector_id == conn_id ])
262247 e , conn = ConnectorService .get_by_id (conn_id )
263- SyncLogsService .filter_update ([SyncLogs .connector_id == conn_id , SyncLogs .kb_id == kb_id , SyncLogs .status == TaskStatus .SCHEDULE ], {"status" : TaskStatus .CANCEL })
264- docs = DocumentService .query (source_type = f"{ conn .source } /{ conn .id } " )
265- err = FileService .delete_docs ([d .id for d in docs ], tenant_id )
266- if err :
267- errs .append (err )
248+ if not e :
249+ continue
250+ #SyncLogsService.filter_delete([SyncLogs.connector_id==conn_id, SyncLogs.kb_id==kb_id])
251+ # Do not delete docs while unlinking.
252+ SyncLogsService .filter_update ([SyncLogs .connector_id == conn_id , SyncLogs .kb_id == kb_id , SyncLogs .status .in_ ([TaskStatus .SCHEDULE , TaskStatus .RUNNING ])], {"status" : TaskStatus .CANCEL })
253+ #docs = DocumentService.query(source_type=f"{conn.source}/{conn.id}")
254+ #err = FileService.delete_docs([d.id for d in docs], tenant_id)
255+ #if err:
256+ # errs.append(err)
268257 return "\n " .join (errs )
269258
270259 @classmethod
@@ -282,3 +271,5 @@ def list_connectors(cls, kb_id):
282271 ).dicts ()
283272 )
284273
274+
275+
0 commit comments