@@ -150,6 +150,18 @@ const AUDIT_RATE_LIMIT_REDIS_URL =
150150
151151const auditRedis = createRedisClient ( AUDIT_RATE_LIMIT_REDIS_URL ) ;
152152
153+ // When a security report stays in PENDING status for too long (e.g. because
154+ // workers previously failed due to infrastructure issues), we want to
155+ // best-effort re-enqueue a scan job on the next download. These thresholds
156+ // control how old a PENDING report must be before we attempt a retry, and how
157+ // often we allow such retries per package/version via a simple Redis lock.
158+ const PENDING_RETRY_AFTER_MS = Number (
159+ process . env . SECURITY_PENDING_RETRY_AFTER_MS || 5 * 60 * 1000
160+ ) ;
161+ const PENDING_RETRY_LOCK_MS = Number (
162+ process . env . SECURITY_PENDING_RETRY_LOCK_MS || 60_000
163+ ) ;
164+
153165interface EffectiveAuditConfig {
154166 windowSeconds : number ;
155167 // Max new audits allowed per base window (24h by default)
@@ -689,7 +701,55 @@ app.post(
689701 } ) ;
690702 }
691703 } else if ( existing . status === 'PENDING' ) {
692- // Already enqueued previously; we just wait below.
704+ // This version already has a PENDING report. In normal operation that
705+ // means a scan job is in-flight or recently completed. However, if the
706+ // report has been stuck in PENDING for a long time (for example due to
707+ // past infrastructure issues), we attempt a best-effort re-enqueue of
708+ // the scan job, guarded by a small Redis lock to avoid hammering the
709+ // queue on every download.
710+ if ( PENDING_RETRY_AFTER_MS > 0 ) {
711+ const ageMs = now . getTime ( ) - existing . updated_at . getTime ( ) ;
712+ if ( ageMs >= PENDING_RETRY_AFTER_MS ) {
713+ const lockTtlMs = PENDING_RETRY_LOCK_MS > 0 ? PENDING_RETRY_LOCK_MS : 60_000 ;
714+ const lockKey = `scan:retry:${ name } :${ normalizedVersion } ` ;
715+
716+ try {
717+ // Basic Redis lock using NX + PX. Only one process will receive
718+ // an 'OK' reply within the TTL window and perform the retry.
719+ const resSet = await ( auditRedis as any ) . set (
720+ lockKey ,
721+ '1' ,
722+ 'PX' ,
723+ lockTtlMs ,
724+ 'NX'
725+ ) ;
726+
727+ if ( resSet === 'OK' ) {
728+ // eslint-disable-next-line no-console
729+ console . log ( 'Re-enqueueing stale PENDING scan job' , {
730+ packageName : name ,
731+ version : normalizedVersion ,
732+ ageMs
733+ } ) ;
734+
735+ await enqueueScanJob ( {
736+ packageName : name ,
737+ version : normalizedVersion ,
738+ orgId : orgId ?? null ,
739+ policyId : policy . id
740+ } ) ;
741+ }
742+ } catch ( err ) {
743+ // eslint-disable-next-line no-console
744+ console . error ( 'Failed to re-enqueue stale PENDING scan job' , {
745+ packageName : name ,
746+ version : normalizedVersion ,
747+ error :
748+ err instanceof Error ? err . message : String ( err )
749+ } ) ;
750+ }
751+ }
752+ }
693753 } else {
694754 // Existing final status (APPROVED / BLOCKED / ERROR etc.); no need to
695755 // enqueue another scan here. We still run the blocking wait below so we
0 commit comments