@@ -2,12 +2,14 @@ package main
22
33import (
44 "context"
5+ "errors"
56 "fmt"
67 "log"
78 "os"
89 "path/filepath"
910 "time"
1011
12+ "github.com/aws/smithy-go"
1113 "github.com/fly-apps/fly-etcd/internal/flyetcd"
1214)
1315
@@ -74,37 +76,57 @@ func maybeBackup(ctx context.Context, cli *flyetcd.Client, s3Client *flyetcd.S3C
7476 isLeader , err := cli .IsLeader (ctx , machineID )
7577 if err != nil {
7678 log .Printf ("[error] Failed to check leader status: %v" , err )
77- // If we can not determine leadership, default to checking again in backupInterval
7879 return backupInterval
7980 }
8081
82+ // Get last backup time
8183 lastTime , err := s3Client .LastBackupTaken (ctx )
8284 if err != nil {
83- log .Printf ("[error] Failed to get last backup time: %v" , err )
84- return - 1
85+ if isNotFoundErr (err ) {
86+ if isLeader {
87+ doBackup (ctx , cli , s3Client )
88+ return backupInterval
89+ }
90+ // Schedule a re-check one minute from now. We will never boot as a leader, so provides
91+ // a small window for leadership to settle after a deploy.
92+ lastTime = time .Now ().Add (- backupInterval + time .Minute )
93+ } else {
94+ log .Printf ("[error] Failed to get last backup time: %v" , err )
95+ return backupInterval
96+ }
8597 }
8698
87- // Calculate the interval, regardless of whether or not we are the leader.
88- // This is to accommodate deploys where the booting instance will never be the leader.
89- interval := time .Until (lastTime .Add (backupInterval ))
90- if interval > 0 {
91- log .Printf ("[info] Next backup will be performed in %v" , interval )
92- return interval
99+ // Calculate the interval regardless of whether or not we are the leader.
100+ // This is to accommodate deploys when the booting instance will never be the leader.
101+ nextBackupTime := lastTime .Add (backupInterval )
102+ timeUntilNext := time .Until (nextBackupTime )
103+ if timeUntilNext > 0 {
104+ log .Printf ("[info] Next backup is scheduled in %v" , timeUntilNext )
105+ return timeUntilNext
93106 }
94107
95108 if ! isLeader {
96109 return backupInterval
97110 }
98111
112+ doBackup (ctx , cli , s3Client )
113+
114+ return backupInterval
115+ }
116+
117+ func isNotFoundErr (err error ) bool {
118+ var apiErr smithy.APIError
119+ return errors .As (err , & apiErr ) && apiErr .ErrorCode () == "NotFound"
120+ }
121+
122+ func doBackup (ctx context.Context , cli * flyetcd.Client , s3Client * flyetcd.S3Client ) {
99123 log .Printf ("[info] Performing backup..." )
100124 if err := performBackup (ctx , cli , s3Client ); err != nil {
101125 log .Printf ("[warn] Backup failed: %v" , err )
102126 backupSuccess .Set (0 )
103127 } else {
104128 backupSuccess .Set (1 )
105129 }
106-
107- return backupInterval
108130}
109131
110132func performBackup (parentCtx context.Context , cli * flyetcd.Client , s3Client * flyetcd.S3Client ) error {
@@ -146,7 +168,7 @@ func performBackup(parentCtx context.Context, cli *flyetcd.Client, s3Client *fly
146168 return fmt .Errorf ("failed to upload backup: %w" , err )
147169 }
148170
149- log .Printf ("[info] Backup successful ( %0.2f MB), version : %s" , float64 (fi .Size ())/ (1024 * 1024 ), version )
171+ log .Printf ("[info] Backup successful. Size: %0.2f MiB, Version : %s" , float64 (fi .Size ())/ (1024 * 1024 ), version )
150172
151173 return nil
152174}
0 commit comments