@@ -71,12 +71,6 @@ func (m *Mongo) GetOrSplitChunks(ctx context.Context, pool *destination.WriterPo
7171 logger .Infof ("Total expected count for stream %s: %d" , stream .ID (), recordCount )
7272 pool .AddRecordsToSync (recordCount )
7373
74- // build filter
75- filter , err := buildFilter (stream )
76- if err != nil {
77- return nil , fmt .Errorf ("failed to parse filter during chunk splitting: %s" , err )
78- }
79-
8074 // check for _id type
8175 isObjID , err := isObjectID (ctx , collection )
8276 if err != nil {
@@ -88,7 +82,7 @@ func (m *Mongo) GetOrSplitChunks(ctx context.Context, pool *destination.WriterPo
8882 var retryErr error
8983 var chunksArray []types.Chunk
9084 err = abstract .RetryOnBackoff (m .config .RetryCount , 1 * time .Minute , func () error {
91- chunksArray , retryErr = m .splitChunks (ctx , collection , stream , filter , isObjID , storageSize )
85+ chunksArray , retryErr = m .splitChunks (ctx , collection , stream , isObjID , storageSize )
9286 return retryErr
9387 })
9488 if err != nil {
@@ -97,11 +91,11 @@ func (m *Mongo) GetOrSplitChunks(ctx context.Context, pool *destination.WriterPo
9791 return types .NewSet (chunksArray ... ), nil
9892}
9993
100- func (m * Mongo ) splitChunks (ctx context.Context , collection * mongo.Collection , stream types.StreamInterface , filter bson. D , isObjID bool , storageSize float64 ) ([]types.Chunk , error ) {
94+ func (m * Mongo ) splitChunks (ctx context.Context , collection * mongo.Collection , stream types.StreamInterface , isObjID bool , storageSize float64 ) ([]types.Chunk , error ) {
10195 splitVectorStrategy := func () ([]types.Chunk , error ) {
10296 getID := func (order int ) (primitive.ObjectID , error ) {
10397 var doc bson.M
104- err := collection .FindOne (ctx , filter , options .FindOne ().SetSort (bson.D {{Key : "_id" , Value : order }})).Decode (& doc )
98+ err := collection .FindOne (ctx , bson. D {} , options .FindOne ().SetSort (bson.D {{Key : "_id" , Value : order }})).Decode (& doc )
10599 if err == mongo .ErrNoDocuments {
106100 return primitive .NilObjectID , nil
107101 }
@@ -131,9 +125,6 @@ func (m *Mongo) splitChunks(ctx context.Context, collection *mongo.Collection, s
131125 {Key : "maxChunkSize" , Value : 1024 },
132126 }
133127
134- if len (filter ) > 0 {
135- cmd = append (cmd , bson.E {Key : "filter" , Value : filter })
136- }
137128 if err := collection .Database ().RunCommand (ctx , cmd ).Decode (& result ); err != nil {
138129 return nil , fmt .Errorf ("failed to run splitVector command: %s" , err )
139130 }
@@ -169,19 +160,14 @@ func (m *Mongo) splitChunks(ctx context.Context, collection *mongo.Collection, s
169160 bucketAutoStrategy := func (storageSize float64 ) ([]types.Chunk , error ) {
170161 logger .Infof ("using bucket auto strategy for stream: %s" , stream .ID ())
171162 // Use $bucketAuto for chunking
172- pipeline := mongo.Pipeline {}
173- if len (filter ) > 0 {
174- pipeline = append (pipeline , bson.D {{Key : "$match" , Value : filter }})
175- }
176-
177163 numberOfBuckets := int (math .Ceil (storageSize / float64 (constants .EffectiveParquetSize )))
178- pipeline = append ( pipeline ,
179- bson. D {{Key : "$sort" , Value : bson.D {{Key : "_id" , Value : 1 }}}},
180- bson. D {{Key : "$bucketAuto" , Value : bson.D {
164+ pipeline := mongo. Pipeline {
165+ {{Key : "$sort" , Value : bson.D {{Key : "_id" , Value : 1 }}}},
166+ {{Key : "$bucketAuto" , Value : bson.D {
181167 {Key : "groupBy" , Value : "$_id" },
182168 {Key : "buckets" , Value : numberOfBuckets },
183169 }}},
184- )
170+ }
185171
186172 cursor , err := collection .Aggregate (ctx , pipeline )
187173 if err != nil {
@@ -227,7 +213,7 @@ func (m *Mongo) splitChunks(ctx context.Context, collection *mongo.Collection, s
227213
228214 timestampStrategy := func () ([]types.Chunk , error ) {
229215 // Time-based strategy implementation
230- first , last , err := m .fetchExtremes (ctx , collection , filter )
216+ first , last , err := m .fetchExtremes (ctx , collection )
231217 if err != nil {
232218 return nil , err
233219 }
@@ -307,12 +293,12 @@ func (m *Mongo) totalCountAndStorageSizeInCollection(ctx context.Context, collec
307293 return count , storageSize , nil
308294}
309295
310- func (m * Mongo ) fetchExtremes (ctx context.Context , collection * mongo.Collection , filter bson. D ) (time.Time , time.Time , error ) {
296+ func (m * Mongo ) fetchExtremes (ctx context.Context , collection * mongo.Collection ) (time.Time , time.Time , error ) {
311297 extreme := func (sortby int ) (time.Time , error ) {
312298 // Find the first document
313299 var result bson.M
314300 // Sort by _id ascending to get the first document
315- err := collection .FindOne (ctx , filter , options .FindOne ().SetSort (bson.D {{Key : "_id" , Value : sortby }})).Decode (& result )
301+ err := collection .FindOne (ctx , bson. D {} , options .FindOne ().SetSort (bson.D {{Key : "_id" , Value : sortby }})).Decode (& result )
316302 if err != nil {
317303 return time.Time {}, err
318304 }
0 commit comments