Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions configs/milvus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1480,3 +1480,5 @@ function:
url: # Your voyageai rerank url, Default is the official rerank url
analyzer:
local_resource_path: /var/lib/milvus/analyzer
concurrency_per_cpu_core: 8 # The concurrency per cpu core for analyzer, pipeline not included
runner_concurrency: 8 # The concurrency for each function runner to tokenize text
1 change: 1 addition & 0 deletions internal/util/analyzer/interfaces/interfaces.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ type TokenStream interface {
Destroy()
}

//go:generate mockery --name=Analyzer --with-expecter --inpackage --filename=mock_analyzer.go --structname=MockAnalyzer
type Analyzer interface {
NewTokenStream(text string) TokenStream
Clone() (Analyzer, error)
Expand Down
169 changes: 169 additions & 0 deletions internal/util/analyzer/interfaces/mock_analyzer.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

110 changes: 81 additions & 29 deletions internal/util/function/bm25_function.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,82 @@
package function

import (
"context"
"fmt"
"sync"

"github.com/cockroachdb/errors"
"github.com/samber/lo"
"go.uber.org/zap"

"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/util/analyzer"
"github.com/milvus-io/milvus/pkg/v2/config"
"github.com/milvus-io/milvus/pkg/v2/log"
"github.com/milvus-io/milvus/pkg/v2/util/conc"
"github.com/milvus-io/milvus/pkg/v2/util/hardware"
"github.com/milvus-io/milvus/pkg/v2/util/merr"
"github.com/milvus-io/milvus/pkg/v2/util/paramtable"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)

const analyzerParams = "analyzer_params"

var (
analyzerPool *conc.Pool[struct{}]
analyzerPoolInitOnce sync.Once
)

func getAnalyzerPoolSize() int {
cpuNum := hardware.GetCPUNum()
poolSize := int(float64(cpuNum) * paramtable.Get().FunctionCfg.AnalyzerConcurrencyPerCPUCore.GetAsFloat())
if poolSize <= 0 {
log.Warn("analyzer pool size is not positive, set to cpu num", zap.Int("cpuNum", cpuNum))
poolSize = cpuNum
}
return poolSize
}

func initAnalyzerPool() {
analyzerPool = conc.NewPool[struct{}](getAnalyzerPoolSize())

pt := paramtable.Get()
pt.Watch(pt.FunctionCfg.AnalyzerConcurrencyPerCPUCore.Key, config.NewHandler("function.analyzer.concurrency", ResizeAnalyzerPool))
}

func resizeAnalyzerPool(pool *conc.Pool[struct{}], newSize int) {
log := log.Ctx(context.Background()).With(zap.Int("newSize", newSize))

if newSize <= 0 {
log.Warn("cannot set analyzer pool size to non-positive value")
return
}

if err := pool.Resize(newSize); err != nil {
log.Warn("failed to resize analyzer pool", zap.Error(err))
return
}
log.Info("analyzer pool resize successfully")
}

func ResizeAnalyzerPool(evt *config.Event) {
if !evt.HasUpdated {
return
}

resizeAnalyzerPool(getOrCreateAnalyzerPool(), getAnalyzerPoolSize())
}

func getOrCreateAnalyzerPool() *conc.Pool[struct{}] {
analyzerPoolInitOnce.Do(initAnalyzerPool)
return analyzerPool
}

func getAnalyzerRunnerConcurrency() int {
return paramtable.Get().FunctionCfg.GetAnalyzerRunnerConcurrency()
}

type Analyzer interface {
BatchAnalyze(withDetail bool, withHash bool, inputs ...any) ([][]*milvuspb.AnalyzerToken, error)
GetInputFields() []*schemapb.FieldSchema
Expand All @@ -50,7 +111,6 @@ type BM25FunctionRunner struct {
schema *schemapb.FunctionSchema
outputField *schemapb.FieldSchema
inputField *schemapb.FieldSchema
concurrency int
}

func getAnalyzerParams(field *schemapb.FieldSchema) string {
Expand All @@ -70,9 +130,8 @@ func NewAnalyzerRunner(field *schemapb.FieldSchema) (Analyzer, error) {
}

return &BM25FunctionRunner{
inputField: field,
tokenizer: tokenizer,
concurrency: 8,
inputField: field,
tokenizer: tokenizer,
}, nil
}

Expand Down Expand Up @@ -112,7 +171,6 @@ func NewBM25FunctionRunner(coll *schemapb.CollectionSchema, schema *schemapb.Fun
inputField: inputField,
outputField: outputField,
tokenizer: tokenizer,
concurrency: 8,
}, nil
}

Expand Down Expand Up @@ -166,12 +224,13 @@ func (v *BM25FunctionRunner) BatchRun(inputs ...any) ([]any, error) {
rowNum := len(text)
embedData := make([]map[uint32]float32, rowNum)
wg := sync.WaitGroup{}
concurrency := getAnalyzerRunnerConcurrency()

errCh := make(chan error, v.concurrency)
for i, j := 0, 0; i < v.concurrency && j < rowNum; i++ {
errCh := make(chan error, concurrency)
for i, j := 0, 0; i < concurrency && j < rowNum; i++ {
start := j
end := start + rowNum/v.concurrency
if i < rowNum%v.concurrency {
end := start + rowNum/concurrency
if i < rowNum%concurrency {
end += 1
}
wg.Add(1)
Expand Down Expand Up @@ -247,33 +306,26 @@ func (v *BM25FunctionRunner) BatchAnalyze(withDetail bool, withHash bool, inputs

rowNum := len(text)
result := make([][]*milvuspb.AnalyzerToken, rowNum)
wg := sync.WaitGroup{}
pool := getOrCreateAnalyzerPool()
concurrency := getAnalyzerRunnerConcurrency()
futures := make([]*conc.Future[struct{}], 0, concurrency)

errCh := make(chan error, v.concurrency)
for i, j := 0, 0; i < v.concurrency && j < rowNum; i++ {
for i, j := 0, 0; i < concurrency && j < rowNum; i++ {
start := j
end := start + rowNum/v.concurrency
if i < rowNum%v.concurrency {
end := start + rowNum/concurrency
if i < rowNum%concurrency {
end += 1
}
wg.Add(1)
go func() {
defer wg.Done()
err := v.analyze(text[start:end], result[start:end], withDetail, withHash)
if err != nil {
errCh <- err
return
}
}()
future := pool.Submit(func() (struct{}, error) {
return struct{}{}, v.analyze(text[start:end], result[start:end], withDetail, withHash)
})
futures = append(futures, future)
j = end
}

wg.Wait()
close(errCh)
for err := range errCh {
if err != nil {
return nil, err
}
err := conc.AwaitAll(futures...)
if err != nil {
return nil, err
}
return result, nil
}
Expand Down
Loading
Loading