Skip to content

Commit 464c2e5

Browse files
authored
[BUG]: Handle version file creation with empty file paths (#4552)
## Description of changes - Improvements & Bug fixes - It can happen that compaction flushes to sysdb with empty file paths. This happens when there are no changes to the segments after applying the logs. - The sysdb handles this by keeping the previous segment file paths - However versioning missed handling this case in the version file. This led to creation of versions with empty file paths in the version file. - Consequently, after GC deletes all old versions and only leaves this version with empty file paths, there is no data on S3. - New functionality ## Test plan - Added a local test with mock s3 ## Documentation Changes None
1 parent 33ce9f5 commit 464c2e5

9 files changed

+497
-56
lines changed

go/pkg/sysdb/coordinator/coordinator.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ func (s *Coordinator) GetCollectionSize(ctx context.Context, collectionID types.
131131
}
132132

133133
func (s *Coordinator) GetCollectionWithSegments(ctx context.Context, collectionID types.UniqueID) (*model.Collection, []*model.Segment, error) {
134-
return s.catalog.GetCollectionWithSegments(ctx, collectionID)
134+
return s.catalog.GetCollectionWithSegments(ctx, collectionID, false)
135135
}
136136

137137
func (s *Coordinator) CheckCollection(ctx context.Context, collectionID types.UniqueID) (bool, error) {

go/pkg/sysdb/coordinator/coordinator_test.go

Lines changed: 109 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,15 @@ func (suite *APIsTestSuite) TestCreateGetDeleteCollections() {
407407
sort.Slice(results, func(i, j int) bool {
408408
return results[i].Name < results[j].Name
409409
})
410-
suite.Equal(suite.sampleCollections, results)
410+
suite.Len(results, len(suite.sampleCollections))
411+
for i, collection := range results {
412+
suite.Equal(suite.sampleCollections[i].ID, collection.ID)
413+
suite.Equal(suite.sampleCollections[i].Name, collection.Name)
414+
suite.Equal(suite.sampleCollections[i].TenantID, collection.TenantID)
415+
suite.Equal(suite.sampleCollections[i].DatabaseName, collection.DatabaseName)
416+
suite.Equal(suite.sampleCollections[i].Dimension, collection.Dimension)
417+
suite.Equal(suite.sampleCollections[i].Metadata, collection.Metadata)
418+
}
411419

412420
// Duplicate create fails
413421
_, _, err = suite.coordinator.CreateCollection(ctx, &model.CreateCollection{
@@ -448,14 +456,26 @@ func (suite *APIsTestSuite) TestCreateGetDeleteCollections() {
448456
for _, collection := range suite.sampleCollections {
449457
result, err := suite.coordinator.GetCollections(ctx, types.NilUniqueID(), &collection.Name, suite.tenantName, suite.databaseName, nil, nil)
450458
suite.NoError(err)
451-
suite.Equal([]*model.Collection{collection}, result)
459+
suite.Equal(len(result), 1)
460+
suite.Equal(collection.ID, result[0].ID)
461+
suite.Equal(collection.Name, result[0].Name)
462+
suite.Equal(collection.TenantID, result[0].TenantID)
463+
suite.Equal(collection.DatabaseName, result[0].DatabaseName)
464+
suite.Equal(collection.Dimension, result[0].Dimension)
465+
suite.Equal(collection.Metadata, result[0].Metadata)
452466
}
453467

454468
// Find by id
455469
for _, collection := range suite.sampleCollections {
456470
result, err := suite.coordinator.GetCollections(ctx, collection.ID, nil, suite.tenantName, suite.databaseName, nil, nil)
457471
suite.NoError(err)
458-
suite.Equal([]*model.Collection{collection}, result)
472+
suite.Equal(len(result), 1)
473+
suite.Equal(collection.ID, result[0].ID)
474+
suite.Equal(collection.Name, result[0].Name)
475+
suite.Equal(collection.TenantID, result[0].TenantID)
476+
suite.Equal(collection.DatabaseName, result[0].DatabaseName)
477+
suite.Equal(collection.Dimension, result[0].Dimension)
478+
suite.Equal(collection.Metadata, result[0].Metadata)
459479
}
460480

461481
// Delete
@@ -470,10 +490,18 @@ func (suite *APIsTestSuite) TestCreateGetDeleteCollections() {
470490

471491
results, err = suite.coordinator.GetCollections(ctx, types.NilUniqueID(), nil, suite.tenantName, suite.databaseName, nil, nil)
472492
suite.NoError(err)
493+
result_ids := make([]types.UniqueID, len(results))
494+
for i, result := range results {
495+
result_ids[i] = result.ID
496+
}
473497

474-
suite.NotContains(results, c1)
498+
sample_ids := make([]types.UniqueID, len(suite.sampleCollections))
499+
for i, collection := range suite.sampleCollections {
500+
sample_ids[i] = collection.ID
501+
}
502+
suite.NotContains(result_ids, c1.ID)
475503
suite.Len(results, len(suite.sampleCollections)-1)
476-
suite.ElementsMatch(results, suite.sampleCollections[1:])
504+
suite.ElementsMatch(result_ids, sample_ids[1:])
477505
byIDResult, err := suite.coordinator.GetCollections(ctx, c1.ID, nil, suite.tenantName, suite.databaseName, nil, nil)
478506
suite.NoError(err)
479507
suite.Empty(byIDResult)
@@ -632,40 +660,88 @@ func (suite *APIsTestSuite) TestUpdateCollections() {
632660
coll.Name = "new_name"
633661
result, err := suite.coordinator.UpdateCollection(ctx, &model.UpdateCollection{ID: coll.ID, Name: &coll.Name})
634662
suite.NoError(err)
635-
suite.Equal(coll, result)
663+
suite.Equal(coll.ID, result.ID)
664+
suite.Equal(coll.Name, result.Name)
665+
suite.Equal(coll.TenantID, result.TenantID)
666+
suite.Equal(coll.DatabaseName, result.DatabaseName)
667+
suite.Equal(coll.Dimension, result.Dimension)
668+
suite.Equal(coll.Metadata, result.Metadata)
669+
636670
resultList, err := suite.coordinator.GetCollections(ctx, types.NilUniqueID(), &coll.Name, suite.tenantName, suite.databaseName, nil, nil)
637671
suite.NoError(err)
638-
suite.Equal([]*model.Collection{coll}, resultList)
672+
suite.Equal(len(resultList), 1)
673+
suite.Equal(coll.ID, resultList[0].ID)
674+
suite.Equal(coll.Name, resultList[0].Name)
675+
suite.Equal(coll.TenantID, resultList[0].TenantID)
676+
suite.Equal(coll.DatabaseName, resultList[0].DatabaseName)
677+
suite.Equal(coll.Dimension, resultList[0].Dimension)
678+
suite.Equal(coll.Metadata, resultList[0].Metadata)
639679

640680
// Update dimension
641681
newDimension := int32(128)
642682
coll.Dimension = &newDimension
643683
result, err = suite.coordinator.UpdateCollection(ctx, &model.UpdateCollection{ID: coll.ID, Dimension: coll.Dimension})
644684
suite.NoError(err)
645-
suite.Equal(coll, result)
685+
suite.Equal(coll.ID, result.ID)
686+
suite.Equal(coll.Name, result.Name)
687+
suite.Equal(coll.TenantID, result.TenantID)
688+
suite.Equal(coll.DatabaseName, result.DatabaseName)
689+
suite.Equal(coll.Dimension, result.Dimension)
690+
suite.Equal(coll.Metadata, result.Metadata)
691+
646692
resultList, err = suite.coordinator.GetCollections(ctx, coll.ID, nil, suite.tenantName, suite.databaseName, nil, nil)
647693
suite.NoError(err)
648-
suite.Equal([]*model.Collection{coll}, resultList)
694+
suite.Equal(len(resultList), 1)
695+
suite.Equal(coll.ID, resultList[0].ID)
696+
suite.Equal(coll.Name, resultList[0].Name)
697+
suite.Equal(coll.TenantID, resultList[0].TenantID)
698+
suite.Equal(coll.DatabaseName, resultList[0].DatabaseName)
699+
suite.Equal(coll.Dimension, resultList[0].Dimension)
700+
suite.Equal(coll.Metadata, resultList[0].Metadata)
649701

650702
// Reset the metadata
651703
newMetadata := model.NewCollectionMetadata[model.CollectionMetadataValueType]()
652704
newMetadata.Add("test_str2", &model.CollectionMetadataValueStringType{Value: "str2"})
653705
coll.Metadata = newMetadata
654706
result, err = suite.coordinator.UpdateCollection(ctx, &model.UpdateCollection{ID: coll.ID, Metadata: coll.Metadata})
655707
suite.NoError(err)
656-
suite.Equal(coll, result)
708+
suite.Equal(coll.ID, result.ID)
709+
suite.Equal(coll.Name, result.Name)
710+
suite.Equal(coll.TenantID, result.TenantID)
711+
suite.Equal(coll.DatabaseName, result.DatabaseName)
712+
suite.Equal(coll.Dimension, result.Dimension)
713+
suite.Equal(coll.Metadata, result.Metadata)
714+
657715
resultList, err = suite.coordinator.GetCollections(ctx, coll.ID, nil, suite.tenantName, suite.databaseName, nil, nil)
658716
suite.NoError(err)
659-
suite.Equal([]*model.Collection{coll}, resultList)
717+
suite.Equal(len(resultList), 1)
718+
suite.Equal(coll.ID, resultList[0].ID)
719+
suite.Equal(coll.Name, resultList[0].Name)
720+
suite.Equal(coll.TenantID, resultList[0].TenantID)
721+
suite.Equal(coll.DatabaseName, resultList[0].DatabaseName)
722+
suite.Equal(coll.Dimension, resultList[0].Dimension)
723+
suite.Equal(coll.Metadata, resultList[0].Metadata)
660724

661725
// Delete all metadata keys
662726
coll.Metadata = nil
663727
result, err = suite.coordinator.UpdateCollection(ctx, &model.UpdateCollection{ID: coll.ID, Metadata: coll.Metadata, ResetMetadata: true})
664728
suite.NoError(err)
665-
suite.Equal(coll, result)
729+
suite.Equal(coll.ID, result.ID)
730+
suite.Equal(coll.Name, result.Name)
731+
suite.Equal(coll.TenantID, result.TenantID)
732+
suite.Equal(coll.DatabaseName, result.DatabaseName)
733+
suite.Equal(coll.Dimension, result.Dimension)
734+
suite.Equal(coll.Metadata, result.Metadata)
735+
666736
resultList, err = suite.coordinator.GetCollections(ctx, coll.ID, nil, suite.tenantName, suite.databaseName, nil, nil)
667737
suite.NoError(err)
668-
suite.Equal([]*model.Collection{coll}, resultList)
738+
suite.Equal(len(resultList), 1)
739+
suite.Equal(coll.ID, resultList[0].ID)
740+
suite.Equal(coll.Name, resultList[0].Name)
741+
suite.Equal(coll.TenantID, resultList[0].TenantID)
742+
suite.Equal(coll.DatabaseName, resultList[0].DatabaseName)
743+
suite.Equal(coll.Dimension, resultList[0].Dimension)
744+
suite.Equal(coll.Metadata, resultList[0].Metadata)
669745
}
670746

671747
func (suite *APIsTestSuite) TestGetOrCreateCollectionsTwice() {
@@ -794,7 +870,14 @@ func (suite *APIsTestSuite) TestGetMultipleWithDatabase() {
794870
sort.Slice(result, func(i, j int) bool {
795871
return result[i].Name < result[j].Name
796872
})
797-
suite.Equal(suite.sampleCollections, result)
873+
for index, collection := range result {
874+
suite.Equal(suite.sampleCollections[index].ID, collection.ID)
875+
suite.Equal(suite.sampleCollections[index].Name, collection.Name)
876+
suite.Equal(suite.sampleCollections[index].TenantID, collection.TenantID)
877+
suite.Equal(suite.sampleCollections[index].DatabaseName, collection.DatabaseName)
878+
suite.Equal(suite.sampleCollections[index].Dimension, collection.Dimension)
879+
suite.Equal(suite.sampleCollections[index].Metadata, collection.Metadata)
880+
}
798881

799882
result, err = suite.coordinator.GetCollections(ctx, types.NilUniqueID(), nil, suite.tenantName, suite.databaseName, nil, nil)
800883
suite.NoError(err)
@@ -876,15 +959,25 @@ func (suite *APIsTestSuite) TestCreateDatabaseWithTenants() {
876959
result, err := suite.coordinator.GetCollections(ctx, types.NilUniqueID(), nil, newTenantName, newDatabaseName, nil, nil)
877960
suite.NoError(err)
878961
suite.Len(result, 1)
879-
suite.Equal(expected[0], result[0])
962+
suite.Equal(expected[0].ID, result[0].ID)
963+
suite.Equal(expected[0].Name, result[0].Name)
964+
suite.Equal(expected[0].TenantID, result[0].TenantID)
965+
suite.Equal(expected[0].DatabaseName, result[0].DatabaseName)
966+
suite.Equal(expected[0].Dimension, result[0].Dimension)
967+
suite.Equal(expected[0].Metadata, result[0].Metadata)
880968

881969
expected = []*model.Collection{suite.sampleCollections[1]}
882970
expected[0].TenantID = suite.tenantName
883971
expected[0].DatabaseName = newDatabaseName
884972
result, err = suite.coordinator.GetCollections(ctx, types.NilUniqueID(), nil, suite.tenantName, newDatabaseName, nil, nil)
885973
suite.NoError(err)
886974
suite.Len(result, 1)
887-
suite.Equal(expected[0], result[0])
975+
suite.Equal(expected[0].ID, result[0].ID)
976+
suite.Equal(expected[0].Name, result[0].Name)
977+
suite.Equal(expected[0].TenantID, result[0].TenantID)
978+
suite.Equal(expected[0].DatabaseName, result[0].DatabaseName)
979+
suite.Equal(expected[0].Dimension, result[0].Dimension)
980+
suite.Equal(expected[0].Metadata, result[0].Metadata)
888981

889982
// A new tenant DOES NOT have a default database. This does not error, instead 0
890983
// results are returned

go/pkg/sysdb/coordinator/model/collection.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package model
22

33
import (
4+
"time"
5+
46
"github.com/chroma-core/chroma/go/pkg/types"
57
)
68

@@ -21,6 +23,10 @@ type Collection struct {
2123
TotalRecordsPostCompaction uint64
2224
SizeBytesPostCompaction uint64 // Note: This represents the size of the records off the log
2325
LastCompactionTimeSecs uint64
26+
IsDeleted bool
27+
VersionFileName string
28+
CreatedAt time.Time
29+
DatabaseId types.UniqueID
2430
}
2531

2632
type CollectionToGc struct {

go/pkg/sysdb/coordinator/model_db_convert.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ func convertCollectionToModel(collectionAndMetadataList []*dbmodel.CollectionAnd
3333
LastCompactionTimeSecs: collectionAndMetadata.Collection.LastCompactionTimeSecs,
3434
RootCollectionID: rootCollectionID,
3535
LineageFileName: collectionAndMetadata.Collection.LineageFileName,
36+
IsDeleted: collectionAndMetadata.Collection.IsDeleted,
37+
VersionFileName: collectionAndMetadata.Collection.VersionFileName,
38+
CreatedAt: collectionAndMetadata.Collection.CreatedAt,
39+
DatabaseId: types.MustParse(collectionAndMetadata.Collection.DatabaseID),
3640
}
3741
collection.Metadata = convertCollectionMetadataToModel(collectionAndMetadata.CollectionMetadata)
3842
collections = append(collections, collection)

go/pkg/sysdb/coordinator/model_db_convert_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ func TestConvertCollectionToModel(t *testing.T) {
138138
collectionTotalRecordsPostCompaction := uint64(100)
139139
collectionSizeBytesPostCompaction := uint64(500000)
140140
collectionLastCompactionTimeSecs := uint64(1741037006)
141+
dbId := types.NewUniqueID()
141142
collectionAndMetadata := &dbmodel.CollectionAndMetadata{
142143
Collection: &dbmodel.Collection{
143144
ID: collectionID.String(),
@@ -147,6 +148,7 @@ func TestConvertCollectionToModel(t *testing.T) {
147148
TotalRecordsPostCompaction: collectionTotalRecordsPostCompaction,
148149
SizeBytesPostCompaction: collectionSizeBytesPostCompaction,
149150
LastCompactionTimeSecs: collectionLastCompactionTimeSecs,
151+
DatabaseID: dbId.String(),
150152
},
151153
CollectionMetadata: []*dbmodel.CollectionMetadata{},
152154
}
@@ -161,4 +163,5 @@ func TestConvertCollectionToModel(t *testing.T) {
161163
assert.Equal(t, collectionSizeBytesPostCompaction, modelCollections[0].SizeBytesPostCompaction)
162164
assert.Equal(t, collectionLastCompactionTimeSecs, modelCollections[0].LastCompactionTimeSecs)
163165
assert.Nil(t, modelCollections[0].Metadata)
166+
assert.Equal(t, dbId, modelCollections[0].DatabaseId)
164167
}

0 commit comments

Comments
 (0)