@@ -33,12 +33,15 @@ const MAX_RPC_ATTEMPTS = 3;
3333
3434// Sync failure auto-disable configuration
3535const SYNC_FAILURE_THRESHOLD = 3 ;
36+ const MAX_RECOVERY_ATTEMPTS = 10 ;
3637const RECOVERY_BACKOFF_SCHEDULE = [
3738 5 * 60 * 1000 , // 5 minutes
3839 15 * 60 * 1000 , // 15 minutes
3940 60 * 60 * 1000 , // 1 hour
4041 6 * 60 * 60 * 1000 // 6 hours (max)
4142] ;
43+ // Stagger recovery checks to avoid thundering herd (random jitter up to 2 minutes)
44+ const RECOVERY_JITTER_MAX = 2 * 60 * 1000 ;
4245
4346module . exports = ( sequelize , DataTypes ) => {
4447 class Explorer extends Model {
@@ -317,6 +320,7 @@ module.exports = (sequelize, DataTypes) => {
317320 syncFailedAttempts : 0 ,
318321 syncDisabledAt : null ,
319322 syncDisabledReason : null ,
323+ recoveryAttempts : 0 ,
320324 nextRecoveryCheckAt : null
321325 } ) ;
322326 return this ;
@@ -332,82 +336,76 @@ module.exports = (sequelize, DataTypes) => {
332336
333337 /**
334338 * Increments the sync failure counter and auto-disables if threshold reached.
339+ * Uses atomic increment to avoid race conditions.
335340 * @param {string } [reason='rpc_unreachable'] - Reason for the failure
336341 * @returns {Promise<{disabled: boolean, attempts: number}> } Result with disable status
337342 */
338343 async incrementSyncFailures ( reason = 'rpc_unreachable' ) {
339- const newCount = ( this . syncFailedAttempts || 0 ) + 1 ;
340- await this . update ( { syncFailedAttempts : newCount } ) ;
344+ // Use atomic increment to avoid race conditions
345+ await this . increment ( 'syncFailedAttempts' ) ;
346+ await this . reload ( ) ;
341347
342- if ( newCount >= SYNC_FAILURE_THRESHOLD ) {
348+ if ( this . syncFailedAttempts >= SYNC_FAILURE_THRESHOLD ) {
343349 await this . autoDisableSync ( reason ) ;
344- return { disabled : true , attempts : newCount } ;
350+ return { disabled : true , attempts : this . syncFailedAttempts } ;
345351 }
346- return { disabled : false , attempts : newCount } ;
352+ return { disabled : false , attempts : this . syncFailedAttempts } ;
347353 }
348354
349355 /**
350356 * Auto-disables sync and schedules first recovery check.
357+ * Adds random jitter to avoid thundering herd when many explorers are disabled at once.
351358 * @param {string } reason - Reason for disabling (e.g., 'rpc_unreachable')
352359 * @returns {Promise<Explorer> } Updated explorer
353360 */
354361 async autoDisableSync ( reason ) {
355- const nextCheck = new Date ( Date . now ( ) + RECOVERY_BACKOFF_SCHEDULE [ 0 ] ) ;
362+ // Add random jitter to stagger recovery checks
363+ const jitter = Math . floor ( Math . random ( ) * RECOVERY_JITTER_MAX ) ;
364+ const nextCheck = new Date ( Date . now ( ) + RECOVERY_BACKOFF_SCHEDULE [ 0 ] + jitter ) ;
356365 await this . update ( {
357366 shouldSync : false ,
358367 syncDisabledAt : new Date ( ) ,
359368 syncDisabledReason : reason ,
369+ recoveryAttempts : 0 ,
360370 nextRecoveryCheckAt : nextCheck
361371 } ) ;
362372 return this ;
363373 }
364374
365- /**
366- * Resets all sync failure tracking state.
367- * @returns {Promise<Explorer> } Updated explorer
368- */
369- async resetSyncState ( ) {
370- await this . update ( {
371- syncFailedAttempts : 0 ,
372- syncDisabledAt : null ,
373- syncDisabledReason : null ,
374- nextRecoveryCheckAt : null
375- } ) ;
376- return this ;
377- }
378-
379375 /**
380376 * Schedules the next recovery check using exponential backoff.
377+ * Increments recovery attempts and returns null if max attempts reached.
381378 * Backoff schedule: 5m -> 15m -> 1h -> 6h (max)
382- * @returns {Promise<Explorer > } Updated explorer
379+ * @returns {Promise<{scheduled: boolean, attempts: number, maxReached: boolean} > } Result
383380 */
384381 async scheduleNextRecoveryCheck ( ) {
385382 if ( ! this . syncDisabledAt ) {
386- return this ;
383+ return { scheduled : false , attempts : 0 , maxReached : false } ;
387384 }
388385
389- const timeSinceDisabled = Date . now ( ) - new Date ( this . syncDisabledAt ) . getTime ( ) ;
390- let cumulativeTime = 0 ;
391- let backoffIndex = 0 ;
386+ const newAttempts = ( this . recoveryAttempts || 0 ) + 1 ;
392387
393- // Find which backoff interval we should use based on time since disabled
394- for ( let i = 0 ; i < RECOVERY_BACKOFF_SCHEDULE . length ; i ++ ) {
395- cumulativeTime += RECOVERY_BACKOFF_SCHEDULE [ i ] ;
396- if ( timeSinceDisabled < cumulativeTime ) {
397- backoffIndex = i ;
398- break ;
399- }
400- backoffIndex = i ;
388+ // Check if max recovery attempts reached
389+ if ( newAttempts >= MAX_RECOVERY_ATTEMPTS ) {
390+ await this . update ( {
391+ recoveryAttempts : newAttempts ,
392+ nextRecoveryCheckAt : null ,
393+ syncDisabledReason : 'max_recovery_attempts_reached'
394+ } ) ;
395+ return { scheduled : false , attempts : newAttempts , maxReached : true } ;
401396 }
402397
403- // Cap at max backoff (last element)
404- if ( backoffIndex >= RECOVERY_BACKOFF_SCHEDULE . length ) {
405- backoffIndex = RECOVERY_BACKOFF_SCHEDULE . length - 1 ;
406- }
398+ // Use recovery attempts as index, capped at max backoff
399+ const backoffIndex = Math . min ( newAttempts - 1 , RECOVERY_BACKOFF_SCHEDULE . length - 1 ) ;
400+ // Add random jitter to stagger recovery checks
401+ const jitter = Math . floor ( Math . random ( ) * RECOVERY_JITTER_MAX ) ;
402+ const nextCheck = new Date ( Date . now ( ) + RECOVERY_BACKOFF_SCHEDULE [ backoffIndex ] + jitter ) ;
407403
408- const nextCheck = new Date ( Date . now ( ) + RECOVERY_BACKOFF_SCHEDULE [ backoffIndex ] ) ;
409- await this . update ( { nextRecoveryCheckAt : nextCheck } ) ;
410- return this ;
404+ await this . update ( {
405+ recoveryAttempts : newAttempts ,
406+ nextRecoveryCheckAt : nextCheck
407+ } ) ;
408+ return { scheduled : true , attempts : newAttempts , maxReached : false } ;
411409 }
412410
413411 /**
@@ -420,6 +418,7 @@ module.exports = (sequelize, DataTypes) => {
420418 syncFailedAttempts : 0 ,
421419 syncDisabledAt : null ,
422420 syncDisabledReason : null ,
421+ recoveryAttempts : 0 ,
423422 nextRecoveryCheckAt : null
424423 } ) ;
425424 return this ;
@@ -806,6 +805,7 @@ module.exports = (sequelize, DataTypes) => {
806805 syncFailedAttempts : DataTypes . INTEGER ,
807806 syncDisabledAt : DataTypes . DATE ,
808807 syncDisabledReason : DataTypes . STRING ,
808+ recoveryAttempts : DataTypes . INTEGER ,
809809 nextRecoveryCheckAt : DataTypes . DATE
810810 } , {
811811 hooks : {
0 commit comments