Skip to content

Commit 86576b4

Browse files
nixpanicmergify[bot]
authored andcommitted
rbd: include a delay and check for syncing status after resyncing
It may take some time for the RBD-mirror daemon to start syncing the image. After the resync operation is executed, the status of the resync is checked with a small delay to prevent subsequent resync calls from re-starting the resync quickly after each other. Signed-off-by: Niels de Vos <ndevos@ibm.com>
1 parent b0994a5 commit 86576b4

File tree

1 file changed

+44
-2
lines changed

1 file changed

+44
-2
lines changed

internal/rbd/mirror.go

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package rbd
1818
import (
1919
"context"
2020
"encoding/json"
21+
"errors"
2122
"fmt"
2223
"strings"
2324
"time"
@@ -209,8 +210,12 @@ func (rm *rbdMirror) Demote(_ context.Context) error {
209210
return nil
210211
}
211212

212-
// Resync resync image to correct the split-brain.
213-
func (rm *rbdMirror) Resync(_ context.Context) error {
213+
// Resync resync image to correct the split-brain. It may take some time for
214+
// the RBD-mirror daemon to start syncing the image. After the resync operation
215+
// is executed, the status of the resync is checked with a small delay to
216+
// prevent subsequent resync calls from re-starting the resync quickly after
217+
// each other.
218+
func (rm *rbdMirror) Resync(ctx context.Context) error {
214219
image, err := rm.open()
215220
if err != nil {
216221
return fmt.Errorf("failed to open image %q with error: %w", rm, err)
@@ -221,6 +226,43 @@ func (rm *rbdMirror) Resync(_ context.Context) error {
221226
return fmt.Errorf("failed to resync image %q with error: %w", rm, err)
222227
}
223228

229+
// delay until the state is syncing, or until 1+2+4+8+16 seconds passed
230+
delay := 1 * time.Second
231+
for {
232+
time.Sleep(delay)
233+
234+
sts, dErr := rm.GetGlobalMirroringStatus(ctx)
235+
if dErr != nil {
236+
// the image gets recreated after issuing resync
237+
if errors.Is(dErr, rbderrors.ErrImageNotFound) {
238+
continue
239+
}
240+
log.ErrorLog(ctx, dErr.Error())
241+
242+
return dErr
243+
}
244+
245+
localStatus, dErr := sts.GetLocalSiteStatus()
246+
if dErr != nil {
247+
log.ErrorLog(ctx, dErr.Error())
248+
249+
return fmt.Errorf("failed to get local status: %w", dErr)
250+
}
251+
252+
syncInfo, dErr := localStatus.GetLastSyncInfo(ctx)
253+
if dErr != nil {
254+
return fmt.Errorf("failed to get last sync info: %w", dErr)
255+
}
256+
if syncInfo.IsSyncing() {
257+
return nil
258+
}
259+
260+
delay = 2 * delay
261+
if delay > 30 {
262+
break
263+
}
264+
}
265+
224266
// If we issued a resync, return a non-final error as image needs to be recreated
225267
// locally. Caller retries till RBD syncs an initial version of the image to
226268
// report its status in the resync request.

0 commit comments

Comments
 (0)