Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
8f5900b
Update the BlobStore.Get method to return the total object size. Not …
macneale4 Sep 17, 2025
171072b
Add support for archive conjoin in blobstore persister
macneale4 Aug 28, 2025
3e33cf2
WIP
macneale4 Sep 2, 2025
d5cc26a
WIP - will conflict
macneale4 Sep 17, 2025
5280bb0
WIP - stuck
macneale4 Sep 17, 2025
d40055b
Update archive-inspect to show the split offset
macneale4 Sep 18, 2025
2731c2e
Shove the split offset into the tableFile struct
macneale4 Oct 7, 2025
c720aef
WIP - checkpoint. push directy to gcs works
macneale4 Sep 18, 2025
876f9ea
Add the split_offset field to TableFileDetails
macneale4 Sep 19, 2025
8246ad0
URL query parameters for split_offset. Not backwards compatible yet
macneale4 Sep 19, 2025
de7aeba
Update test interfaces
macneale4 Sep 19, 2025
dc2f91b
Add split offset to the TableFileInfo proto spec
macneale4 Oct 6, 2025
18e9fcb
Make the split_offset query string arg optional
macneale4 Oct 6, 2025
ad1cf17
More clarity in gDoc
macneale4 Oct 7, 2025
4059466
more fixes to blobstore file resolution
macneale4 Oct 8, 2025
f84a15c
Create the *.darc.records file when it doesn't exist
macneale4 Oct 8, 2025
1ca2727
[ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/upda…
macneale4 Oct 9, 2025
cb605d3
Update go/store/chunks/tablefilestore.go
macneale4 Oct 10, 2025
b71dc75
Cope with splitOffset being unset
macneale4 Oct 10, 2025
d201688
mem table WriteChunks to return splitOffset
macneale4 Oct 10, 2025
4e5610d
AutoGC and Archive as defaults. Untested
macneale4 Oct 2, 2025
eb59609
Fix yaml config defaults test
macneale4 Oct 3, 2025
17745cc
WIP
macneale4 Oct 3, 2025
dbb92f4
Another test update
macneale4 Oct 3, 2025
0ca6544
WIP
macneale4 Oct 6, 2025
118d324
rollback archive.bats change for sql-server
macneale4 Oct 10, 2025
1c5d2e2
Update Archive tests to account for new defaults
macneale4 Oct 10, 2025
4465a77
Insert more data for garbage collection test. Archives make differenc…
macneale4 Oct 10, 2025
4001bf3
return approximate size from GenericTableWriter.AddChunk
macneale4 Oct 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go/cmd/dolt/cli/arg_parser_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ func CreateGCArgParser() *argparser.ArgParser {
ap := argparser.NewArgParserWithMaxArgs("gc", 0)
ap.SupportsFlag(ShallowFlag, "s", "perform a fast, but incomplete garbage collection pass")
ap.SupportsFlag(FullFlag, "f", "perform a full garbage collection, including the old generation")
ap.SupportsInt(ArchiveLevelParam, "", "archive compression level", "Specify the archive compression level garbage collection results. Default is 0. Max is 1")
ap.SupportsInt(ArchiveLevelParam, "", "archive compression level", "Specify the archive compression level garbage collection results. Default is 1, Disable with 0")
return ap
}

Expand Down
3 changes: 1 addition & 2 deletions go/cmd/dolt/commands/admin/archive_inspect.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,11 @@ func (cmd ArchiveInspectCmd) Exec(ctx context.Context, commandStr string, args [
cli.Printf("File size: %d bytes\n", inspector.FileSize())
cli.Printf("Format version: %d\n", inspector.FormatVersion())
cli.Printf("File signature: %s\n", inspector.FileSignature())
cli.Println()

cli.Printf("Chunk count: %d\n", inspector.ChunkCount())
cli.Printf("Byte span count: %d\n", inspector.ByteSpanCount())
cli.Printf("Index size: %d bytes\n", inspector.IndexSize())
cli.Printf("Metadata size: %d bytes\n", inspector.MetadataSize())
cli.Printf("Split offset: %d bytes\n", inspector.SplitOffset())

// Display metadata if present
if inspector.MetadataSize() > 0 {
Expand Down
3 changes: 2 additions & 1 deletion go/cmd/dolt/commands/engine/sqlengine.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,11 +226,12 @@ func NewSqlEngine(
// XXX: We force session aware safepoint controller if auto_gc is on.
dprocedures.UseSessionAwareSafepointController = true
sql.SystemVariables.AssignValues(map[string]interface{}{
// NM4 - not sure about this one.
dsess.DoltAutoGCEnabled: int8(1),
})
} else {
sql.SystemVariables.AssignValues(map[string]interface{}{
dsess.DoltAutoGCEnabled: int8(0),
dsess.DoltAutoGCEnabled: int8(1),
})
}

Expand Down
2 changes: 1 addition & 1 deletion go/cmd/dolt/commands/gc.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ func (cmd GarbageCollectionCmd) constructDoltGCQuery(apr *argparser.ArgParseResu
extraFlag = "--full"
}

archiveLevel := chunks.NoArchive
archiveLevel := chunks.SimpleArchive
if apr.Contains(cli.ArchiveLevelParam) {
lvl, ok := apr.GetInt(cli.ArchiveLevelParam)
if !ok {
Expand Down
4 changes: 2 additions & 2 deletions go/cmd/dolt/commands/sqlserver/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -640,8 +640,8 @@ func TestGenerateYamlConfig(t *testing.T) {
# dolt_transaction_commit: false
# event_scheduler: "OFF"
# auto_gc_behavior:
# enable: false
# archive_level: 0
# enable: true
# archive_level: 1

listener:
# host: localhost
Expand Down
2 changes: 1 addition & 1 deletion go/cmd/dolt/commands/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ func newLateBindingEngine(
// to include auto-gc log lines.
nullLgr := logrus.New()
nullLgr.SetOutput(io.Discard)
config.AutoGCController = sqle.NewAutoGCController(chunks.NoArchive, nullLgr)
config.AutoGCController = sqle.NewAutoGCController(chunks.SimpleArchive, nullLgr)
}

se, err := engine.NewSqlEngine(
Expand Down
26 changes: 22 additions & 4 deletions go/gen/proto/dolt/services/remotesapi/v1alpha1/chunkstore.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ func createUninitializedEnv() *env.DoltEnv {
func TestGetDotDotRevisions(t *testing.T) {
ctx := context.Background()
dEnv := createUninitializedEnv()
err := dEnv.InitRepo(context.Background(), types.Format_Default, "Bill Billerson", "[email protected]", env.DefaultInitBranch)
err := dEnv.InitRepo(context.Background(), types.Format_DOLT, "Bill Billerson", "[email protected]", env.DefaultInitBranch)
require.NoError(t, err)

cs, err := doltdb.NewCommitSpec(env.DefaultInitBranch)
Expand Down
1 change: 1 addition & 0 deletions go/libraries/doltcore/remotesrv/grpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,7 @@ func (rs *RemoteChunkStore) getUploadUrl(md metadata.MD, repoPath string, tfd *r
fileID := hash.New(tfd.Id).String() + tfd.Suffix
params := url.Values{}
params.Add("num_chunks", strconv.Itoa(int(tfd.NumChunks)))
params.Add("split_offset", strconv.Itoa(int(tfd.SplitOffset)))
params.Add("content_length", strconv.Itoa(int(tfd.ContentLength)))
params.Add("content_hash", base64.RawURLEncoding.EncodeToString(tfd.ContentHash))
return &url.URL{
Expand Down
26 changes: 20 additions & 6 deletions go/libraries/doltcore/remotesrv/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,21 +140,22 @@ func (fh filehandler) ServeHTTP(respWr http.ResponseWriter, req *http.Request) {
respWr.WriteHeader(http.StatusBadRequest)
return
}
num_chunks, err := strconv.Atoi(ncs)
numChunks, err := strconv.Atoi(ncs)
if err != nil {
logger = logger.WithField("status", http.StatusBadRequest)
logger.WithError(err).Warn("bad request: num_chunks parameter did not parse")
respWr.WriteHeader(http.StatusBadRequest)
return
}

cls := q.Get("content_length")
if cls == "" {
logger = logger.WithField("status", http.StatusBadRequest)
logger.Warn("bad request: content_length parameter not provided")
respWr.WriteHeader(http.StatusBadRequest)
return
}
content_length, err := strconv.Atoi(cls)
contentLength, err := strconv.Atoi(cls)
if err != nil {
logger = logger.WithField("status", http.StatusBadRequest)
logger.WithError(err).Warn("bad request: content_length parameter did not parse")
Expand All @@ -168,15 +169,28 @@ func (fh filehandler) ServeHTTP(respWr http.ResponseWriter, req *http.Request) {
respWr.WriteHeader(http.StatusBadRequest)
return
}
content_hash, err := base64.RawURLEncoding.DecodeString(chs)
contentHash, err := base64.RawURLEncoding.DecodeString(chs)
if err != nil {
logger = logger.WithField("status", http.StatusBadRequest)
logger.WithError(err).Warn("bad request: content_hash parameter did not parse")
respWr.WriteHeader(http.StatusBadRequest)
return
}

logger, statusCode = writeTableFile(req.Context(), logger, fh.dbCache, filepath, file, num_chunks, content_hash, uint64(content_length), req.Body)
// splitOffset is not required to allow for backwards compatibility with older clients.
splitOffset := uint64(0)
splitQstr := q.Get("split_offset")
if splitQstr != "" {
splitOffset, err = strconv.ParseUint(splitQstr, 10, 64)
if err != nil {
logger = logger.WithField("status", http.StatusBadRequest)
logger.WithError(err).Warn("bad request: split_offset parameter did not parse")
respWr.WriteHeader(http.StatusBadRequest)
return
}
}

logger, statusCode = writeTableFile(req.Context(), logger, fh.dbCache, filepath, file, splitOffset, numChunks, contentHash, uint64(contentLength), req.Body)
}

if statusCode != -1 {
Expand Down Expand Up @@ -286,7 +300,7 @@ func (u *uploadreader) Close() error {
return nil
}

func writeTableFile(ctx context.Context, logger *logrus.Entry, dbCache DBCache, path, fileId string, numChunks int, contentHash []byte, contentLength uint64, body io.ReadCloser) (*logrus.Entry, int) {
func writeTableFile(ctx context.Context, logger *logrus.Entry, dbCache DBCache, path, fileId string, splitOffset uint64, numChunks int, contentHash []byte, contentLength uint64, body io.ReadCloser) (*logrus.Entry, int) {
if !validateFileName(fileId) {
logger = logger.WithField("status", http.StatusBadRequest)
logger.Warnf("%s is not a valid hash", fileId)
Expand All @@ -300,7 +314,7 @@ func writeTableFile(ctx context.Context, logger *logrus.Entry, dbCache DBCache,
return logger, http.StatusInternalServerError
}

err = cs.WriteTableFile(ctx, fileId, numChunks, contentHash, func() (io.ReadCloser, uint64, error) {
err = cs.WriteTableFile(ctx, fileId, splitOffset, numChunks, contentHash, func() (io.ReadCloser, uint64, error) {
reader := body
size := contentLength
return &uploadreader{
Expand Down
17 changes: 12 additions & 5 deletions go/libraries/doltcore/remotestorage/chunk_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -915,19 +915,21 @@ func (dcs *DoltChunkStore) uploadChunks(ctx context.Context, hashToChunk map[has
chnks = append(chnks, ch)
}

hashToSplitOffset := make(map[hash.Hash]uint64)
hashToCount := make(map[hash.Hash]int)
hashToData := make(map[hash.Hash][]byte)
hashToContentHash := make(map[hash.Hash][]byte)

// structuring so this can be done as multiple files in the future.
{
name, data, err := nbs.WriteChunks(chnks)
name, data, splitOffset, err := nbs.WriteChunks(chnks)

if err != nil {
return map[hash.Hash]int{}, err
}

h := hash.Parse(name)
hashToSplitOffset[h] = splitOffset
hashToData[h] = data
hashToCount[h] = len(chnks)

Expand All @@ -938,7 +940,7 @@ func (dcs *DoltChunkStore) uploadChunks(ctx context.Context, hashToChunk map[has
for h, contentHash := range hashToContentHash {
// Tables created on this path are always starting from memory tables and ending up as noms table files.
// As a result, the suffix is always empty.
err := dcs.uploadTableFileWithRetries(ctx, h, "", uint64(hashToCount[h]), contentHash, func() (io.ReadCloser, uint64, error) {
err := dcs.uploadTableFileWithRetries(ctx, h, "", hashToSplitOffset[h], uint64(hashToCount[h]), contentHash, func() (io.ReadCloser, uint64, error) {
data := hashToData[h]
return io.NopCloser(bytes.NewReader(data)), uint64(len(data)), nil
})
Expand All @@ -950,7 +952,7 @@ func (dcs *DoltChunkStore) uploadChunks(ctx context.Context, hashToChunk map[has
return hashToCount, nil
}

func (dcs *DoltChunkStore) uploadTableFileWithRetries(ctx context.Context, tableFileId hash.Hash, suffix string, numChunks uint64, tableFileContentHash []byte, getContent func() (io.ReadCloser, uint64, error)) error {
func (dcs *DoltChunkStore) uploadTableFileWithRetries(ctx context.Context, tableFileId hash.Hash, suffix string, splitOffset uint64, numChunks uint64, tableFileContentHash []byte, getContent func() (io.ReadCloser, uint64, error)) error {
op := func() error {
body, contentLength, err := getContent()
if err != nil {
Expand All @@ -963,6 +965,7 @@ func (dcs *DoltChunkStore) uploadTableFileWithRetries(ctx context.Context, table
ContentHash: tableFileContentHash,
NumChunks: numChunks,
Suffix: suffix,
SplitOffset: splitOffset,
}

dcs.logf("getting upload location for file %s", tableFileId.String())
Expand Down Expand Up @@ -1066,15 +1069,15 @@ func (dcs *DoltChunkStore) SupportedOperations() chunks.TableFileStoreOps {
}

// WriteTableFile reads a table file from the provided reader and writes it to the chunk store.
func (dcs *DoltChunkStore) WriteTableFile(ctx context.Context, fileId string, numChunks int, contentHash []byte, getRd func() (io.ReadCloser, uint64, error)) error {
func (dcs *DoltChunkStore) WriteTableFile(ctx context.Context, fileId string, splitOffset uint64, numChunks int, contentHash []byte, getRd func() (io.ReadCloser, uint64, error)) error {
suffix := ""
if strings.HasSuffix(fileId, nbs.ArchiveFileSuffix) {
suffix = nbs.ArchiveFileSuffix
fileId = strings.TrimSuffix(fileId, nbs.ArchiveFileSuffix)
}

fileIdBytes := hash.Parse(fileId)
return dcs.uploadTableFileWithRetries(ctx, fileIdBytes, suffix, uint64(numChunks), contentHash, getRd)
return dcs.uploadTableFileWithRetries(ctx, fileIdBytes, suffix, splitOffset, uint64(numChunks), contentHash, getRd)
}

// AddTableFilesToManifest adds table files to the manifest
Expand Down Expand Up @@ -1195,6 +1198,10 @@ func (drtf DoltRemoteTableFile) NumChunks() int {
return int(drtf.info.NumChunks)
}

func (drtf DoltRemoteTableFile) SplitOffset() uint64 {
return drtf.info.SplitOffset
}

var ErrRemoteTableFileGet = errors.New("HTTP GET for remote table file failed")

func sanitizeSignedUrl(url string) string {
Expand Down
4 changes: 2 additions & 2 deletions go/libraries/doltcore/servercfg/serverconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ const (
DefaultLogLevel = LogLevel_Info
DefaultLogFormat = LogFormat_Text
DefaultAutoCommit = true
DefaultAutoGCBehaviorEnable = false
DefaultAutoGCBehaviorEnable = true
DefaultDoltTransactionCommit = false
DefaultMaxConnections = 1000
DefaultMaxWaitConnections = 50
Expand All @@ -75,7 +75,7 @@ const (
DefaultMySQLUnixSocketFilePath = "/tmp/mysql.sock"
DefaultMaxLoggedQueryLen = 0
DefaultEncodeLoggedQuery = false
DefaultCompressionLevel = 0
DefaultCompressionLevel = 1
)

func ptr[T any](t T) *T {
Expand Down
4 changes: 2 additions & 2 deletions go/libraries/doltcore/servercfg/yaml_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -1042,14 +1042,14 @@ type AutoGCBehaviorYAMLConfig struct {

func (a *AutoGCBehaviorYAMLConfig) Enable() bool {
if a.Enable_ == nil {
return false
return true
}
return *a.Enable_
}

func (a *AutoGCBehaviorYAMLConfig) ArchiveLevel() int {
if a.ArchiveLevel_ == nil {
return 0
return 1
}
return *a.ArchiveLevel_
}
Expand Down
4 changes: 2 additions & 2 deletions go/libraries/doltcore/servercfg/yaml_config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ behavior:
disable_client_multi_statements: false
event_scheduler: ON
auto_gc_behavior:
enable: false
archive_level: 0
enable: true
archive_level: 1

listener:
host: localhost
Expand Down
Loading
Loading