Skip to content

Commit e325545

Browse files
committed
feat: update FindPriorBackups to use backup index
This patch teaches `FindPriorBackups` (now more aptly renamed to `FindAllIncrementals`) to use the backup index introduced in cockroachdb#150384. Additional changes will still be required to fully fix the related tickets, namely updating the logic to no longer need to load ALL manifests of a chain into memory, only the manifests needed. Epic: CRDB-47942 Informs: cockroachdb#150327, cockroachdb#150329, cockroachdb#150330, cockroachdb#150331
1 parent 056a760 commit e325545

File tree

9 files changed

+475
-153
lines changed

9 files changed

+475
-153
lines changed

pkg/backup/backupdest/backup_destination.go

Lines changed: 47 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import (
2121
"github.com/cockroachdb/cockroach/pkg/backup/backuputils"
2222
"github.com/cockroachdb/cockroach/pkg/cloud"
2323
"github.com/cockroachdb/cockroach/pkg/cloud/cloudpb"
24-
"github.com/cockroachdb/cockroach/pkg/clusterversion"
2524
"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
2625
"github.com/cockroachdb/cockroach/pkg/roachpb"
2726
"github.com/cockroachdb/cockroach/pkg/security/username"
@@ -220,7 +219,12 @@ func ResolveDest(
220219
}
221220
defer incrementalStore.Close()
222221

223-
priors, err := FindPriorBackups(ctx, incrementalStore, OmitManifest)
222+
rootStore, err := makeCloudStorage(ctx, collectionURI, user)
223+
if err != nil {
224+
return ResolvedDestination{}, err
225+
}
226+
defer rootStore.Close()
227+
priors, err := FindAllIncrementals(ctx, incrementalStore, rootStore, chosenSuffix, OmitManifest)
224228
if err != nil {
225229
return ResolvedDestination{}, errors.Wrap(err, "adjusting backup destination to append new layer to existing backup")
226230
}
@@ -237,41 +241,40 @@ func ResolveDest(
237241
}
238242
prevBackupURIs = append([]string{plannedBackupDefaultURI}, prevBackupURIs...)
239243

240-
// Within the chosenSuffix dir, differentiate incremental backups with partName.
241-
partName := endTime.GoTime().Format(backupbase.DateBasedIncFolderName)
242-
if execCfg.Settings.Version.IsActive(ctx, clusterversion.V25_2) {
243-
if startTime.IsEmpty() {
244-
baseEncryptionOptions, err := backupencryption.GetEncryptionFromBase(
245-
ctx, user, execCfg.DistSQLSrv.ExternalStorageFromURI, prevBackupURIs[0],
246-
encryption, kmsEnv,
247-
)
248-
if err != nil {
249-
return ResolvedDestination{}, err
250-
}
244+
// If startTime is not already set, we will find it via the previous backup
245+
// manifest.
246+
if startTime.IsEmpty() {
247+
baseEncryptionOptions, err := backupencryption.GetEncryptionFromBase(
248+
ctx, user, execCfg.DistSQLSrv.ExternalStorageFromURI, prevBackupURIs[0],
249+
encryption, kmsEnv,
250+
)
251+
if err != nil {
252+
return ResolvedDestination{}, err
253+
}
251254

252-
// TODO (kev-cao): Once we have completed the backup directory index work, we
253-
// can remove the need to read an entire backup manifest just to fetch the
254-
// start time. We can instead read the metadata protobuf.
255-
mem := execCfg.RootMemoryMonitor.MakeBoundAccount()
256-
defer mem.Close(ctx)
257-
precedingBackupManifest, size, err := backupinfo.ReadBackupManifestFromURI(
258-
ctx, &mem, prevBackupURIs[len(prevBackupURIs)-1], user,
259-
execCfg.DistSQLSrv.ExternalStorageFromURI, baseEncryptionOptions, kmsEnv,
260-
)
261-
if err != nil {
262-
return ResolvedDestination{}, err
263-
}
264-
if err := mem.Grow(ctx, size); err != nil {
265-
return ResolvedDestination{}, err
266-
}
267-
defer mem.Shrink(ctx, size)
268-
startTime = precedingBackupManifest.EndTime
269-
if startTime.IsEmpty() {
270-
return ResolvedDestination{}, errors.Errorf("empty end time in prior backup manifest")
271-
}
255+
// TODO (kev-cao): Once we have completed the backup directory index work, we
256+
// can remove the need to read an entire backup manifest just to fetch the
257+
// start time. We can instead read the metadata protobuf.
258+
mem := execCfg.RootMemoryMonitor.MakeBoundAccount()
259+
defer mem.Close(ctx)
260+
precedingBackupManifest, size, err := backupinfo.ReadBackupManifestFromURI(
261+
ctx, &mem, prevBackupURIs[len(prevBackupURIs)-1], user,
262+
execCfg.DistSQLSrv.ExternalStorageFromURI, baseEncryptionOptions, kmsEnv,
263+
)
264+
if err != nil {
265+
return ResolvedDestination{}, err
266+
}
267+
if err := mem.Grow(ctx, size); err != nil {
268+
return ResolvedDestination{}, err
269+
}
270+
defer mem.Shrink(ctx, size)
271+
startTime = precedingBackupManifest.EndTime
272+
if startTime.IsEmpty() {
273+
return ResolvedDestination{}, errors.Errorf("empty end time in prior backup manifest")
272274
}
273-
partName = partName + "-" + startTime.GoTime().Format(backupbase.DateBasedIncFolderNameSuffix)
274275
}
276+
277+
partName := ConstructDateBasedIncrementalFolderName(startTime.GoTime(), endTime.GoTime())
275278
defaultIncrementalsURI, urisByLocalityKV, err := GetURIsByLocalityKV(fullyResolvedIncrementalsLocation, partName)
276279
if err != nil {
277280
return ResolvedDestination{}, err
@@ -540,9 +543,11 @@ func ListFullBackupsInCollection(
540543
func ResolveBackupManifests(
541544
ctx context.Context,
542545
mem *mon.BoundAccount,
546+
defaultCollectionURI string,
543547
baseStores []cloud.ExternalStorage,
544548
incStores []cloud.ExternalStorage,
545549
mkStore cloud.ExternalStorageFromURIFactory,
550+
resolvedSubdir string,
546551
fullyResolvedBaseDirectory []string,
547552
fullyResolvedIncrementalsDirectory []string,
548553
endTime hlc.Timestamp,
@@ -577,7 +582,14 @@ func ResolveBackupManifests(
577582

578583
var incrementalBackups []string
579584
if len(incStores) > 0 {
580-
incrementalBackups, err = FindPriorBackups(ctx, incStores[0], includeManifest)
585+
rootStore, err := mkStore(ctx, defaultCollectionURI, user)
586+
if err != nil {
587+
return nil, nil, nil, 0, err
588+
}
589+
defer rootStore.Close()
590+
incrementalBackups, err = FindAllIncrementals(
591+
ctx, incStores[0], rootStore, resolvedSubdir, includeManifest,
592+
)
581593
if err != nil {
582594
return nil, nil, nil, 0, err
583595
}

pkg/backup/backupdest/backup_destination_test.go

Lines changed: 2 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ func TestBackupRestoreResolveDestination(t *testing.T) {
111111
// - BACKUP INTO full1 IN collection, incremental_location = inc_storage_path @ 9
112112
// - BACKUP INTO full1 IN collection, incremental_location = inc_storage_path @ 9:30
113113
// - BACKUP INTO LATEST IN collection, incremental_location = inc_storage_path @ 10
114+
// - BACKUP INTO collection (full 3) @ 10:30
114115
t.Run("collection", func(t *testing.T) {
115116
collectionLoc := fmt.Sprintf("nodelocal://1/%s?AUTH=implicit", t.Name())
116117
// Note that this default is NOT arbitrary, but rather hard-coded as
@@ -134,9 +135,6 @@ func TestBackupRestoreResolveDestination(t *testing.T) {
134135
inc6Time := inc5Time.Add(time.Minute * 30)
135136
inc7Time := inc6Time.Add(time.Minute * 30)
136137
full3Time := inc7Time.Add(time.Minute * 30)
137-
inc8Time := full3Time.Add(time.Minute * 30)
138-
inc9Time := inc8Time.Add(time.Minute * 30)
139-
full4Time := inc9Time.Add(time.Minute * 30)
140138

141139
// firstBackupChain is maintained throughout the tests as the history of
142140
// backups that were taken based on the initial full backup.
@@ -155,6 +153,7 @@ func TestBackupRestoreResolveDestination(t *testing.T) {
155153
testCollectionBackup := func(t *testing.T, backupTime time.Time,
156154
expectedDefault, expectedSuffix, expectedIncDir string, expectedPrevBackups []string,
157155
appendToLatest bool, subdir string, incrementalTo []string) {
156+
t.Helper()
158157

159158
endTime := hlc.Timestamp{WallTime: backupTime.UnixNano()}
160159

@@ -335,14 +334,11 @@ func TestBackupRestoreResolveDestination(t *testing.T) {
335334
true /* intoLatest */, expectedSubdir, customIncrementalTo)
336335
writeManifest(t, expectedDefault, inc7Time)
337336
}
338-
339337
// A new full backup: BACKUP INTO collection
340-
var backup3Location string
341338
{
342339
expectedSuffix := "/2020/12/25-103000.00"
343340
expectedIncDir := ""
344341
expectedDefault := fmt.Sprintf("nodelocal://1/%s%s?AUTH=implicit", t.Name(), expectedSuffix)
345-
backup3Location = expectedDefault
346342

347343
testCollectionBackup(t, full3Time,
348344
expectedDefault, expectedSuffix, expectedIncDir, []string(nil),
@@ -351,47 +347,6 @@ func TestBackupRestoreResolveDestination(t *testing.T) {
351347
// We also wrote a new full backup, so let's update the latest.
352348
writeLatest(t, collectionLoc, expectedSuffix)
353349
}
354-
355-
// A remote incremental into the third full backup: BACKUP INTO LATEST
356-
// IN collection, BUT with a trick. Write a (fake) incremental backup
357-
// to the old directory, to be sure that subsequent incremental backups
358-
// go there as well (and not the newer incrementals/ subdir.)
359-
{
360-
expectedSuffix := "/2020/12/25-103000.00"
361-
expectedIncDir := "/20201225/110000.00-20201225-103000.00"
362-
363-
// Writes the (fake) incremental backup.
364-
oldStyleDefault := fmt.Sprintf("nodelocal://1/%s%s%s?AUTH=implicit",
365-
t.Name(),
366-
expectedSuffix, expectedIncDir)
367-
writeManifest(t, oldStyleDefault, inc8Time)
368-
369-
expectedSuffix = "/2020/12/25-103000.00"
370-
expectedIncDir = "/20201225/113000.00-20201225-110000.00"
371-
expectedSubdir := expectedSuffix
372-
expectedDefault := fmt.Sprintf("nodelocal://1/%s%s%s?AUTH=implicit",
373-
t.Name(),
374-
expectedSuffix, expectedIncDir)
375-
376-
testCollectionBackup(t, inc9Time,
377-
expectedDefault, expectedSuffix, expectedIncDir, []string{backup3Location, oldStyleDefault},
378-
true /* intoLatest */, expectedSubdir, noIncrementalStorage)
379-
writeManifest(t, expectedDefault, inc9Time)
380-
}
381-
382-
// A new full backup: BACKUP INTO collection
383-
{
384-
expectedSuffix := "/2020/12/25-120000.00"
385-
expectedIncDir := ""
386-
expectedDefault := fmt.Sprintf("nodelocal://1/%s%s?AUTH=implicit", t.Name(), expectedSuffix)
387-
388-
testCollectionBackup(t, full4Time,
389-
expectedDefault, expectedSuffix, expectedIncDir, []string(nil),
390-
false /* intoLatest */, noExplicitSubDir, noIncrementalStorage)
391-
writeManifest(t, expectedDefault, full4Time)
392-
// We also wrote a new full backup, so let's update the latest.
393-
writeLatest(t, collectionLoc, expectedSuffix)
394-
}
395350
})
396351
})
397352
}

pkg/backup/backupdest/backup_index.go

Lines changed: 88 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"fmt"
1212
"path"
1313
"strings"
14+
"time"
1415

1516
"github.com/cockroachdb/cockroach/pkg/backup/backupbase"
1617
"github.com/cockroachdb/cockroach/pkg/backup/backuppb"
@@ -120,10 +121,9 @@ func WriteBackupIndexMetadata(
120121
// can remove these checks in v26.2+.
121122
func IndexExists(ctx context.Context, store cloud.ExternalStorage, subdir string) (bool, error) {
122123
var indexExists bool
123-
indexSubdir := path.Join(backupbase.BackupIndexDirectoryPath, flattenSubdirForIndex(subdir))
124124
if err := store.List(
125125
ctx,
126-
indexSubdir,
126+
indexSubdir(subdir),
127127
"/",
128128
func(file string) error {
129129
indexExists = true
@@ -135,6 +135,83 @@ func IndexExists(ctx context.Context, store cloud.ExternalStorage, subdir string
135135
return indexExists, nil
136136
}
137137

138+
// ListIndexes lists all the index files for a backup chain rooted by the full
139+
// backup indicated by the subdir. The store should be rooted at the default
140+
// collection URI (the one that contains the `index/` directory). It returns
141+
// the basenames of the listed index files. It assumes that the subdir is
142+
// resolved and not `LATEST`.
143+
//
144+
// The indexes are returned in reverse chronological order, i.e. the most recent
145+
// index is first in the list.
146+
func ListIndexes(
147+
ctx context.Context, store cloud.ExternalStorage, subdir string,
148+
) ([]string, error) {
149+
var indexBasenames []string
150+
if err := store.List(
151+
ctx,
152+
indexSubdir(subdir),
153+
"",
154+
func(file string) error {
155+
indexBasenames = append(indexBasenames, path.Base(file))
156+
return nil
157+
},
158+
); err != nil {
159+
return nil, errors.Wrapf(err, "listing indexes in %s", subdir)
160+
}
161+
return indexBasenames, nil
162+
}
163+
164+
// ParseIndexFilename parses the start and end timestamps from the index
165+
// filename.
166+
//
167+
// Note: The timestamps are only millisecond-precise and so do not represent the
168+
// exact nano-specific times in the corresponding backup manifest.
169+
func parseIndexFilename(basename string) (start time.Time, end time.Time, err error) {
170+
invalidFmtErr := errors.Newf("invalid index filename format: %s", basename)
171+
172+
if !strings.HasSuffix(basename, "_metadata.pb") {
173+
return time.Time{}, time.Time{}, invalidFmtErr
174+
}
175+
parts := strings.Split(basename, "_")
176+
if len(parts) != 4 {
177+
return time.Time{}, time.Time{}, invalidFmtErr
178+
}
179+
180+
if parts[1] != "0" {
181+
start, err = time.Parse(backupbase.BackupIndexFilenameTimestampFormat, parts[1])
182+
if err != nil {
183+
return time.Time{}, time.Time{}, errors.Join(invalidFmtErr, err)
184+
}
185+
}
186+
end, err = time.Parse(backupbase.BackupIndexFilenameTimestampFormat, parts[2])
187+
if err != nil {
188+
return time.Time{}, time.Time{}, errors.Join(invalidFmtErr, err)
189+
}
190+
191+
return start, end, nil
192+
}
193+
194+
// ParseBackupFilePathFromIndexFileName parses the path to a backup given the
195+
// basename of its index file and its subdirectory. For full backups, the
196+
// returned path is relative to the root of the backup. For incremental
197+
// backups, the path is relative to the incremental storage location. It expects
198+
// that subdir is resolved and not `LATEST`.
199+
//
200+
// Note: While the path is stored in the index file, we can take a shortcut here
201+
// and derive it from the filename solely because backup paths are
202+
// millisecond-precise and so are the timestamps encoded in the filename.
203+
func parseBackupFilePathFromIndexFileName(subdir, basename string) (string, error) {
204+
start, end, err := parseIndexFilename(basename)
205+
if err != nil {
206+
return "", err
207+
}
208+
if start.IsZero() {
209+
return subdir, nil
210+
}
211+
212+
return ConstructDateBasedIncrementalFolderName(start, end), nil
213+
}
214+
138215
// shouldWriteIndex determines if a backup index file should be written for a
139216
// given backup. The rule is:
140217
// 1. An index should only be written on a v25.4+ cluster.
@@ -172,8 +249,7 @@ func getBackupIndexFilePath(subdir string, startTime, endTime hlc.Timestamp) (st
172249
return "", errors.AssertionFailedf("expected subdir to be resolved and not be 'LATEST'")
173250
}
174251
return backuputils.JoinURLPath(
175-
backupbase.BackupIndexDirectoryPath,
176-
flattenSubdirForIndex(subdir),
252+
indexSubdir(subdir),
177253
getBackupIndexFileName(startTime, endTime),
178254
), nil
179255
}
@@ -194,13 +270,15 @@ func getBackupIndexFileName(startTime, endTime hlc.Timestamp) string {
194270
)
195271
}
196272

197-
// flattenSubdirForIndex flattens a full backup subdirectory to be used in the
198-
// index. It assumes subdir is not `LATEST` and has been resolved.
199-
// We flatten the subdir so that when listing from the index, we can list with
200-
// the `index/` prefix and delimit on `/`.
201-
func flattenSubdirForIndex(subdir string) string {
202-
return strings.ReplaceAll(
273+
// indexSubdir returns the path to the index directory for a given full backup
274+
// subdir relative to the root of the default collection URI. It assumes that
275+
// subdir is not `LATEST` and has been resolved.
276+
func indexSubdir(subdir string) string {
277+
// We flatten the subdir so that when listing from the index, we can list with
278+
// the `index/` prefix and delimit on `/`.
279+
flattenedSubdir := strings.ReplaceAll(
203280
strings.TrimPrefix(subdir, "/"),
204281
"/", "-",
205282
)
283+
return path.Join(backupbase.BackupIndexDirectoryPath, flattenedSubdir)
206284
}

0 commit comments

Comments
 (0)