@@ -53,31 +53,31 @@ func (r *Recall) Search(ctx context.Context, query string) (resp []*types.Search
5353 err = errors .WithMessagef (err , "text2postingslists error" )
5454 return
5555 }
56-
57- // 倒排库搜索
58- res , err := r .searchDoc (ctx , splitQuery )
56+ // TODO: 改成并发执行的多路召回
57+ // 倒排库召回
58+ textRes , err := r .SearchText (ctx , splitQuery )
5959 if err != nil {
60- err = errors .WithMessage (err , "searchDoc error" )
60+ err = errors .WithMessage (err , "SearchText error" )
6161 return
6262 }
6363
64- // 向量库搜索
65- vRes , err := r .SearchVector (ctx , splitQuery )
64+ // 向量库召回
65+ vectorRes , err := r .SearchVector (ctx , splitQuery )
6666 if err != nil {
6767 err = errors .WithMessage (err , "searchVector error" )
6868 return
6969 }
7070
71- resp , _ = r .Multiplex (ctx , query , res , vRes )
71+ resp , _ = r .Multiplex (ctx , query , textRes , vectorRes )
7272 return
7373}
7474
7575// Multiplex 多路融合排序
76- func (r * Recall ) Multiplex (ctx context.Context , query string , iRes , vRes []int64 ) (resp []* types.SearchItem , err error ) {
76+ func (r * Recall ) Multiplex (ctx context.Context , query string , tRes , vRes []int64 ) (resp []* types.SearchItem , err error ) {
7777 // 融合去重
78- iRes = append (iRes , vRes ... )
79- iRes = lo .Uniq (iRes )
80- recallData , _ := dao .NewInputDataDao (ctx ).ListInputDataByDocIds (iRes )
78+ tRes = append (tRes , vRes ... )
79+ tRes = lo .Uniq (tRes )
80+ recallData , _ := dao .NewInputDataDao (ctx ).ListInputDataByDocIds (tRes )
8181 searchItems := make ([]* types.SearchItem , 0 )
8282
8383 // 处理
@@ -105,7 +105,7 @@ func (r *Recall) Multiplex(ctx context.Context, query string, iRes, vRes []int64
105105 return
106106}
107107
108- // SearchVector 搜索向量
108+ // SearchVector 向量召回
109109func (r * Recall ) SearchVector (ctx context.Context , queries []string ) (docIds []int64 , err error ) {
110110 // rpc 调用python接口 获取
111111 req := & pb.SearchVectorRequest {Query : queries }
@@ -124,6 +124,7 @@ func (r *Recall) SearchVector(ctx context.Context, queries []string) (docIds []i
124124// SearchQueryWord 入口词语联想
125125func (r * Recall ) SearchQueryWord (query string ) (resp []string , err error ) {
126126 dictTreeList := make ([]string , 0 , 1e3 )
127+ // TODO: 改成并发的读取
127128 for _ , trieDb := range storage .GlobalTrieDB {
128129 trie , errx := trieDb .GetTrieTreeDict ()
129130 if errx != nil {
@@ -138,7 +139,8 @@ func (r *Recall) SearchQueryWord(query string) (resp []string, err error) {
138139 return
139140}
140141
141- func (r * Recall ) searchDoc (ctx context.Context , tokens []string ) (recalls []int64 , err error ) {
142+ // SearchText 文本召回
143+ func (r * Recall ) SearchText (ctx context.Context , tokens []string ) (recalls []int64 , err error ) {
142144 recalls = make ([]int64 , 0 )
143145 for _ , token := range tokens {
144146 docIds , errx := redis .GetInvertedIndexTokenDocIds (ctx , token )
@@ -151,7 +153,7 @@ func (r *Recall) searchDoc(ctx context.Context, tokens []string) (recalls []int6
151153 err = errors .WithMessage (err , "fetchPostingsByToken error" )
152154 continue
153155 } else {
154- // 如果缓存存在,就直接读缓存,不用担心实时性问题,缓存10分钟清空一次,这延迟是能接受到
156+ // 如果缓存存在,就直接读缓存,不用担心实时性问题,缓存10分钟清空一次,这延迟能接受
155157 for _ , v := range postingsList {
156158 if v != nil && v .DocIds != nil {
157159 docIds .AddMany (v .DocIds .ToArray ())
@@ -168,17 +170,6 @@ func (r *Recall) searchDoc(ctx context.Context, tokens []string) (recalls []int6
168170 }
169171 }
170172
171- // 排序打分
172- // iDao := dao.NewInputDataDao(ctx)
173- // for _, p := range allPostingsList {
174- // if p == nil || p.DocIds == nil || p.DocIds.IsEmpty() {
175- // continue
176- // }
177- // recallData, _ := iDao.ListInputDataByDocIds(p.DocIds.ToArray())
178- // searchItems := ranking.CalculateScoreBm25(p.Term, recallData)
179- // recalls = append(recalls, searchItems...)
180- // }
181-
182173 log .LogrusObj .Infof ("recalls size:%v" , len (recalls ))
183174
184175 return
0 commit comments