@@ -14,7 +14,6 @@ import (
1414 "slices"
1515 "sort"
1616 "strings"
17- "sync"
1817 "time"
1918
2019 "github.com/Tencent/WeKnora/internal/application/service/retriever"
@@ -271,9 +270,11 @@ func (s *knowledgeService) CreateKnowledgeFromURL(ctx context.Context,
271270 // Check if URL already exists in the knowledge base
272271 tenantID := ctx .Value (types .TenantIDContextKey ).(uint )
273272 logger .Infof (ctx , "Checking if URL exists, tenant ID: %d" , tenantID )
273+ fileHash := calculateStr (url )
274274 exists , existingKnowledge , err := s .repo .CheckKnowledgeExists (ctx , tenantID , kbID , & types.KnowledgeCheckParams {
275- Type : "url" ,
276- URL : url ,
275+ Type : "url" ,
276+ URL : url ,
277+ FileHash : fileHash ,
277278 })
278279 if err != nil {
279280 logger .Errorf (ctx , "Failed to check knowledge existence: %v" , err )
@@ -306,6 +307,7 @@ func (s *knowledgeService) CreateKnowledgeFromURL(ctx context.Context,
306307 KnowledgeBaseID : kbID ,
307308 Type : "url" ,
308309 Source : url ,
310+ FileHash : fileHash ,
309311 ParseStatus : "pending" ,
310312 EnableStatus : "disabled" ,
311313 CreatedAt : time .Now (),
@@ -1375,49 +1377,47 @@ func (s *knowledgeService) CloneKnowledgeBase(ctx context.Context, srcID, dstID
13751377 logger .Infof (ctx , "Knowledge after update to add: %d, delete: %d" , len (addKnowledge ), len (delKnowledge ))
13761378
13771379 batch := 10
1378- wg := sync.WaitGroup {}
1379- errCh := make (chan error , len (delKnowledge ))
1380+ g , gctx := errgroup .WithContext (ctx )
13801381 for ids := range slices .Chunk (delKnowledge , batch ) {
1381- wg .Add (1 )
1382- go func (ids []string ) {
1383- defer wg .Done ()
1384- if err := s .DeleteKnowledgeList (ctx , ids ); err != nil {
1385- errCh <- fmt .Errorf ("delete knowledge %v: %w" , ids , err )
1382+ ids = ids
1383+ g .Go (func () error {
1384+ err := s .DeleteKnowledgeList (gctx , ids )
1385+ if err != nil {
1386+ logger .Errorf (gctx , "delete partial knowledge %v: %w" , ids , err )
1387+ return err
13861388 }
1387- }(ids )
1389+ return nil
1390+ })
13881391 }
1389- wg .Wait ()
1390- close (errCh )
1391- for err := range errCh {
1392- if err != nil {
1393- return err
1394- }
1392+ err = g .Wait ()
1393+ if err != nil {
1394+ logger .Errorf (ctx , "delete total knowledge %d: %v" , len (delKnowledge ), err )
1395+ return err
13951396 }
13961397
1397- wg = sync. WaitGroup {}
1398- errCh = make ( chan error , len ( addKnowledge ) + len ( delKnowledge ) )
1399- for ids := range slices . Chunk ( addKnowledge , batch ) {
1400- wg . Add ( 1 )
1401- go func ( ids [] string ) {
1402- defer wg . Done ()
1403- for _ , kID := range ids {
1404- srcKn , err := s . repo . GetKnowledgeByID ( ctx , srcKB . TenantID , kID )
1405- if err != nil {
1406- errCh <- fmt . Errorf ( "get knowledge %s: %w" , kID , err )
1407- continue
1408- }
1409- if err := s . cloneKnowledge ( ctx , srcKn , dstKB ); err != nil {
1410- errCh <- fmt .Errorf ("move knowledge %s: %w" , kID , err )
1411- }
1398+ // Copy context out of auto-stop task
1399+ g , gctx = errgroup . WithContext ( ctx )
1400+ g . SetLimit ( batch )
1401+ for _ , knowledge := range addKnowledge {
1402+ knowledge = knowledge
1403+ g . Go ( func () error {
1404+ srcKn , err := s . repo . GetKnowledgeByID ( gctx , srcKB . TenantID , knowledge )
1405+ if err != nil {
1406+ logger . Errorf ( gctx , "get knowledge %s: %w" , knowledge , err )
1407+ return err
1408+ }
1409+ err = s . cloneKnowledge ( gctx , srcKn , dstKB )
1410+ if err != nil {
1411+ logger .Errorf (gctx , "clone knowledge %s: %w" , knowledge , err )
1412+ return err
14121413 }
1413- }(ids )
1414+ return nil
1415+ })
14141416 }
1415- wg .Wait ()
1416- close (errCh )
1417- for err := range errCh {
1418- if err != nil {
1419- return err
1420- }
1417+ err = g .Wait ()
1418+ if err != nil {
1419+ logger .Errorf (ctx , "add total knowledge %d: %v" , len (addKnowledge ), err )
1420+ return err
14211421 }
14221422 return nil
14231423}
0 commit comments