Skip to content

fix: analyzer memory leak because function runner not close #41839

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions internal/datanode/importv2/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,8 @@
continue
}

defer runner.Close()

Check warning on line 383 in internal/datanode/importv2/util.go

View check run for this annotation

Codecov / codecov/patch

internal/datanode/importv2/util.go#L382-L383

Added lines #L382 - L383 were not covered by tests
inputFieldIDs := lo.Map(runner.GetInputFields(), func(field *schemapb.FieldSchema, _ int) int64 { return field.GetFieldID() })
inputDatas := make([]any, 0, len(inputFieldIDs))
for _, inputFieldID := range inputFieldIDs {
Expand Down
6 changes: 6 additions & 0 deletions internal/flushcommon/pipeline/flow_graph_embedding_node.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,12 @@ func (eNode *embeddingNode) Operate(in []Msg) []Msg {
return []Msg{fgMsg}
}

func (eNode *embeddingNode) Close() {
for _, runner := range eNode.functionRunners {
runner.Close()
}
}

func BuildSparseFieldData(array *schemapb.SparseFloatArray) storage.FieldData {
return &storage.SparseFloatVectorFieldData{
SparseFloatArray: schemapb.SparseFloatArray{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ func TestEmbeddingNode_BM25_Operator(t *testing.T) {
t.Run("normal case", func(t *testing.T) {
node, err := newEmbeddingNode("test-channel", metaCache)
assert.NoError(t, err)
defer node.Close()

var output []Msg
assert.NotPanics(t, func() {
Expand Down Expand Up @@ -114,6 +115,7 @@ func TestEmbeddingNode_BM25_Operator(t *testing.T) {
t.Run("with close msg", func(t *testing.T) {
node, err := newEmbeddingNode("test-channel", metaCache)
assert.NoError(t, err)
defer node.Close()

var output []Msg

Expand All @@ -131,6 +133,7 @@ func TestEmbeddingNode_BM25_Operator(t *testing.T) {
t.Run("prepare insert failed", func(t *testing.T) {
node, err := newEmbeddingNode("test-channel", metaCache)
assert.NoError(t, err)
defer node.Close()

assert.Panics(t, func() {
node.Operate([]Msg{
Expand All @@ -152,6 +155,7 @@ func TestEmbeddingNode_BM25_Operator(t *testing.T) {
t.Run("embedding failed", func(t *testing.T) {
node, err := newEmbeddingNode("test-channel", metaCache)
assert.NoError(t, err)
defer node.Close()

node.functionRunners[0].GetSchema().Type = 0
assert.Panics(t, func() {
Expand Down
6 changes: 6 additions & 0 deletions internal/querynodev2/pipeline/embedding_node.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,12 @@ func (eNode *embeddingNode) Operate(in Msg) Msg {
return nodeMsg
}

func (eNode *embeddingNode) Close() {
for _, functionRunner := range eNode.functionRunners {
functionRunner.Close()
}
}

func getEmbeddingFieldDatas(datas []*schemapb.FieldData, fieldIDs ...int64) ([]any, error) {
result := []any{}
for _, fieldID := range fieldIDs {
Expand Down
10 changes: 9 additions & 1 deletion internal/querynodev2/pipeline/embedding_node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,9 @@ func (suite *EmbeddingNodeSuite) TestCreateEmbeddingNode() {
collSchema := proto.Clone(suite.collectionSchema).(*schemapb.CollectionSchema)
collection := segments.NewCollectionWithoutSegcoreForTest(suite.collectionID, collSchema)
suite.colManager.EXPECT().Get(suite.collectionID).Return(collection).Once()
_, err := newEmbeddingNode(suite.collectionID, suite.channel, suite.manager, 128)
node, err := newEmbeddingNode(suite.collectionID, suite.channel, suite.manager, 128)
suite.NoError(err)
defer node.Close()
})
}

Expand All @@ -151,6 +152,7 @@ func (suite *EmbeddingNodeSuite) TestOperator() {
suite.colManager.EXPECT().Get(suite.collectionID).Return(collection).Once()
node, err := newEmbeddingNode(suite.collectionID, suite.channel, suite.manager, 128)
suite.NoError(err)
defer node.Close()

suite.colManager.EXPECT().Get(suite.collectionID).Return(nil).Once()
suite.Panics(func() {
Expand All @@ -163,6 +165,7 @@ func (suite *EmbeddingNodeSuite) TestOperator() {
suite.colManager.EXPECT().Get(suite.collectionID).Return(collection).Times(2)
node, err := newEmbeddingNode(suite.collectionID, suite.channel, suite.manager, 128)
suite.NoError(err)
defer node.Close()

suite.Panics(func() {
node.Operate(&insertNodeMsg{
Expand Down Expand Up @@ -192,6 +195,7 @@ func (suite *EmbeddingNodeSuite) TestOperator() {
suite.colManager.EXPECT().Get(suite.collectionID).Return(collection).Times(2)
node, err := newEmbeddingNode(suite.collectionID, suite.channel, suite.manager, 128)
suite.NoError(err)
defer node.Close()

suite.NotPanics(func() {
output := node.Operate(&insertNodeMsg{
Expand All @@ -213,6 +217,7 @@ func (suite *EmbeddingNodeSuite) TestAddInsertData() {
suite.colManager.EXPECT().Get(suite.collectionID).Return(collection).Once()
node, err := newEmbeddingNode(suite.collectionID, suite.channel, suite.manager, 128)
suite.NoError(err)
defer node.Close()

// transfer insert msg failed because rowbase data not support sparse vector
insertDatas := make(map[int64]*delegator.InsertData)
Expand All @@ -237,6 +242,7 @@ func (suite *EmbeddingNodeSuite) TestAddInsertData() {
suite.colManager.EXPECT().Get(suite.collectionID).Return(collection).Once()
node, err := newEmbeddingNode(suite.collectionID, suite.channel, suite.manager, 128)
suite.NoError(err)
defer node.Close()

insertDatas := make(map[int64]*delegator.InsertData)
err = node.addInsertData(insertDatas, suite.msgs[0], collection)
Expand All @@ -250,6 +256,7 @@ func (suite *EmbeddingNodeSuite) TestBM25Embedding() {
suite.colManager.EXPECT().Get(suite.collectionID).Return(collection).Once()
node, err := newEmbeddingNode(suite.collectionID, suite.channel, suite.manager, 128)
suite.NoError(err)
defer node.Close()

runner := function.NewMockFunctionRunner(suite.T())
runner.EXPECT().BatchRun(mock.Anything).Return(nil, errors.New("mock error"))
Expand All @@ -265,6 +272,7 @@ func (suite *EmbeddingNodeSuite) TestBM25Embedding() {
suite.colManager.EXPECT().Get(suite.collectionID).Return(collection).Once()
node, err := newEmbeddingNode(suite.collectionID, suite.channel, suite.manager, 128)
suite.NoError(err)
defer node.Close()

runner := function.NewMockFunctionRunner(suite.T())
runner.EXPECT().BatchRun(mock.Anything).Return([]interface{}{1}, nil)
Expand Down
4 changes: 4 additions & 0 deletions internal/util/function/bm25_function.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,10 @@ func (v *BM25FunctionRunner) GetInputFields() []*schemapb.FieldSchema {
return []*schemapb.FieldSchema{v.inputField}
}

func (v *BM25FunctionRunner) Close() {
v.tokenizer.Destroy()
}

func buildSparseFloatArray(mapdata []map[uint32]float32) *schemapb.SparseFloatArray {
dim := int64(0)
bytes := lo.Map(mapdata, func(sparseMap map[uint32]float32, _ int) []byte {
Expand Down
2 changes: 2 additions & 0 deletions internal/util/function/function.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ type FunctionRunner interface {
GetSchema() *schemapb.FunctionSchema
GetOutputFields() []*schemapb.FieldSchema
GetInputFields() []*schemapb.FieldSchema

Close()
}

func NewFunctionRunner(coll *schemapb.CollectionSchema, schema *schemapb.FunctionSchema) (FunctionRunner, error) {
Expand Down
40 changes: 36 additions & 4 deletions internal/util/function/mock_function.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions internal/util/function/multi_analyzer_bm25_function.go
Original file line number Diff line number Diff line change
Expand Up @@ -325,3 +325,9 @@
func (v *MultiAnalyzerBM25FunctionRunner) GetInputFields() []*schemapb.FieldSchema {
return v.inputFields
}

func (v *MultiAnalyzerBM25FunctionRunner) Close() {
for _, analyzer := range v.analyzers {
analyzer.Destroy()
}

Check warning on line 332 in internal/util/function/multi_analyzer_bm25_function.go

View check run for this annotation

Codecov / codecov/patch

internal/util/function/multi_analyzer_bm25_function.go#L329-L332

Added lines #L329 - L332 were not covered by tests
}
5 changes: 5 additions & 0 deletions internal/util/pipeline/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ type Node interface {
Name() string
MaxQueueLength() int32
Operate(in Msg) Msg

Close()
}

type nodeCtx struct {
Expand Down Expand Up @@ -56,6 +58,9 @@ func (node *BaseNode) MaxQueueLength() int32 {
return node.maxQueueLength
}

func (node *BaseNode) Close() {
}

func NewBaseNode(name string, maxQueryLength int32) *BaseNode {
return &BaseNode{
name: name,
Expand Down
1 change: 1 addition & 0 deletions internal/util/pipeline/pipeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ func (p *pipeline) Start() error {

func (p *pipeline) Close() {
for _, node := range p.nodes {
node.node.Close()
if node.Checker != nil {
node.Checker.Close()
}
Expand Down
Loading