@@ -9,9 +9,13 @@ package cluster
9
9
import (
10
10
"fmt"
11
11
"os"
12
+ "sync"
12
13
14
+ "github.com/dustin/go-humanize"
15
+ "github.com/shirou/gopsutil/disk"
13
16
"github.com/signal18/replication-manager/config"
14
17
"github.com/signal18/replication-manager/utils/archiver"
18
+ "github.com/signal18/replication-manager/utils/dbhelper"
15
19
"github.com/signal18/replication-manager/utils/state"
16
20
"github.com/sirupsen/logrus"
17
21
)
@@ -196,3 +200,107 @@ func (cluster *Cluster) ResticResetQueue() error {
196
200
197
201
return nil
198
202
}
203
+
204
+ func (cluster * Cluster ) CheckBackupFreeSpace (backtype string , backup bool ) error {
205
+ var isWarning bool
206
+ bcksrv := cluster .GetBackupServer ()
207
+ if bcksrv == nil {
208
+ bcksrv = cluster .master
209
+ }
210
+
211
+ parentDir := cluster .Conf .WorkingDir + "/" + config .ConstStreamingSubDir + "/" + cluster .Name
212
+ diskstat , err := disk .Usage (parentDir )
213
+ if err != nil {
214
+ cluster .LogModulePrintf (cluster .Conf .Verbose , config .ConstLogModTask , config .LvlErr , "Error getting disk usage: %s" , err )
215
+ return err
216
+ }
217
+
218
+ cluster .DiskStatManager .UpdateStat (parentDir , diskstat )
219
+ if diskstat .UsedPercent > float64 (cluster .Conf .BackupDiskTresholdCrit ) {
220
+ cluster .SetState ("WARN0140" , state.State {ErrType : "WARNING" , ErrDesc : fmt .Sprintf (cluster .GetErrorList ()["WARN0140" ], diskstat .Path , diskstat .UsedPercent , cluster .Conf .BackupDiskTresholdCrit ), ErrFrom : "JOB" , ServerUrl : bcksrv .URL })
221
+ return fmt .Errorf ("Disk usage is over %d%% on %s. Used: %s" , cluster .Conf .BackupDiskTresholdCrit , diskstat .Path , humanize .Bytes (diskstat .Used ))
222
+ } else if diskstat .UsedPercent > float64 (cluster .Conf .BackupDiskTresholdWarn ) {
223
+ isWarning = true
224
+ cluster .SetState ("WARN0139" , state.State {ErrType : "WARNING" , ErrDesc : fmt .Sprintf (cluster .GetErrorList ()["WARN0139" ], diskstat .Path , diskstat .UsedPercent , cluster .Conf .BackupDiskTresholdWarn ), ErrFrom : "JOB" , ServerUrl : bcksrv .URL })
225
+ }
226
+
227
+ // Estimate size if disk usage is over treshold and estimate size is enabled. For binlog we will always estimate size to 2GB
228
+ if (isWarning && cluster .Conf .BackupEstimateSize ) || backtype == "binlog" {
229
+ free := diskstat .Free
230
+ required := uint64 (0 )
231
+
232
+ switch backtype {
233
+ case "logical" , "physical" :
234
+ _ , prev := bcksrv .GetLatestMeta (backtype )
235
+ if prev != nil && prev .Completed {
236
+ required = uint64 (prev .Size * int64 (100 + cluster .Conf .BackupGrowthPercentage ) / 100 )
237
+
238
+ // If not keep until valid, we need to add the size of the previous backup to the free space
239
+ if ! cluster .Conf .BackupKeepUntilValid {
240
+ free = free + uint64 (prev .Size )
241
+ }
242
+
243
+ } else {
244
+ cluster .LogModulePrintf (cluster .Conf .Verbose , config .ConstLogModTask , config .LvlInfo , "No previous backup found for %s. Estimating backup size." , bcksrv .URL )
245
+ estimatedSize , err := dbhelper .GetBackupSizeEstimation (bcksrv .Conn , bcksrv .DBVersion )
246
+ if err != nil {
247
+ return fmt .Errorf ("Error estimating backup size: %s" , err )
248
+ }
249
+
250
+ required = estimatedSize * uint64 (cluster .Conf .BackupEstimateSizePercentage ) / 100
251
+ }
252
+ case "binlog" :
253
+ // Max binlog size per file is 1GB, additional 1GB for unexpected growth
254
+ required = 2 * 1024 * 1024 * 1024
255
+ case "restic" :
256
+ // Restic backup size is not known until the backup is done
257
+ }
258
+
259
+ if free < required {
260
+ if backtype == "logical" {
261
+ cluster .SetState ("WARN0141" , state.State {ErrType : "WARNING" , ErrDesc : fmt .Sprintf (cluster .GetErrorList ()["WARN0139" ], cluster .Conf .BackupLogicalType , bcksrv .URL , diskstat .Path , humanize .Bytes (diskstat .Free ), humanize .Bytes (required )), ErrFrom : "JOB" , ServerUrl : bcksrv .URL })
262
+ } else if backtype == "physical" {
263
+ cluster .SetState ("WARN0142" , state.State {ErrType : "WARNING" , ErrDesc : fmt .Sprintf (cluster .GetErrorList ()["WARN0140" ], cluster .Conf .BackupPhysicalType , bcksrv .URL , diskstat .Path , humanize .Bytes (diskstat .Free ), humanize .Bytes (required )), ErrFrom : "JOB" , ServerUrl : bcksrv .URL })
264
+ } else if backtype == "binlog" {
265
+ cluster .SetState ("WARN0143" , state.State {ErrType : "WARNING" , ErrDesc : fmt .Sprintf (cluster .GetErrorList ()["WARN0141" ], bcksrv .URL , diskstat .Path , humanize .Bytes (diskstat .Free ), humanize .Bytes (required )), ErrFrom : "JOB" , ServerUrl : bcksrv .URL })
266
+ }
267
+
268
+ return fmt .Errorf ("Not enough free space on %s for backup. Free: %s" , diskstat .Path , humanize .Bytes (diskstat .Free ))
269
+ }
270
+
271
+ if backup {
272
+ cluster .LogModulePrintf (cluster .Conf .Verbose , config .ConstLogModTask , config .LvlInfo , "Free space is enough on %s: %s. Required: %s" , diskstat .Path , humanize .Bytes (diskstat .Free ), humanize .Bytes (required ))
273
+ }
274
+ }
275
+
276
+ return nil
277
+ }
278
+
279
+ func (cluster * Cluster ) CheckAllBackupFreeSpace () {
280
+ if ! cluster .Conf .BackupCheckFreeSpace {
281
+ return
282
+ }
283
+
284
+ // Check based on treshold
285
+ wg := sync.WaitGroup {}
286
+ wg .Add (1 )
287
+ go func () {
288
+ cluster .CheckBackupFreeSpace ("logical" , false )
289
+ wg .Done ()
290
+ }()
291
+
292
+ // if estimate size is enabled, check the free space for physical and binlog backups too
293
+ if cluster .Conf .BackupEstimateSize {
294
+ wg .Add (2 )
295
+ go func () {
296
+ cluster .CheckBackupFreeSpace ("physical" , false )
297
+ wg .Done ()
298
+ }()
299
+ go func () {
300
+ cluster .CheckBackupFreeSpace ("binlog" , false )
301
+ wg .Done ()
302
+ }()
303
+ }
304
+
305
+ wg .Wait ()
306
+ }
0 commit comments