chore: wip

devthejo · devthejo · commit dd71b328d811 · 2025-12-04T18:46:22.000+01:00
diff --git a/services/api/src/index.ts b/services/api/src/index.ts
@@ -150,6 +150,18 @@ const AUDIT_RATE_LIMIT_REDIS_URL =
 
 const auditRedis = createRedisClient(AUDIT_RATE_LIMIT_REDIS_URL);
 
+// When a security report stays in PENDING status for too long (e.g. because
+// workers previously failed due to infrastructure issues), we want to
+// best-effort re-enqueue a scan job on the next download. These thresholds
+// control how old a PENDING report must be before we attempt a retry, and how
+// often we allow such retries per package/version via a simple Redis lock.
+const PENDING_RETRY_AFTER_MS = Number(
+  process.env.SECURITY_PENDING_RETRY_AFTER_MS || 5 * 60 * 1000
+);
+const PENDING_RETRY_LOCK_MS = Number(
+  process.env.SECURITY_PENDING_RETRY_LOCK_MS || 60_000
+);
+
 interface EffectiveAuditConfig {
   windowSeconds: number;
   // Max new audits allowed per base window (24h by default)
@@ -689,7 +701,55 @@ app.post(
           });
         }
       } else if (existing.status === 'PENDING') {
-        // Already enqueued previously; we just wait below.
+        // This version already has a PENDING report. In normal operation that
+        // means a scan job is in-flight or recently completed. However, if the
+        // report has been stuck in PENDING for a long time (for example due to
+        // past infrastructure issues), we attempt a best-effort re-enqueue of
+        // the scan job, guarded by a small Redis lock to avoid hammering the
+        // queue on every download.
+        if (PENDING_RETRY_AFTER_MS > 0) {
+          const ageMs = now.getTime() - existing.updated_at.getTime();
+          if (ageMs >= PENDING_RETRY_AFTER_MS) {
+            const lockTtlMs = PENDING_RETRY_LOCK_MS > 0 ? PENDING_RETRY_LOCK_MS : 60_000;
+            const lockKey = `scan:retry:${name}:${normalizedVersion}`;
+
+            try {
+              // Basic Redis lock using NX + PX. Only one process will receive
+              // an 'OK' reply within the TTL window and perform the retry.
+              const resSet = await (auditRedis as any).set(
+                lockKey,
+                '1',
+                'PX',
+                lockTtlMs,
+                'NX'
+              );
+
+              if (resSet === 'OK') {
+                // eslint-disable-next-line no-console
+                console.log('Re-enqueueing stale PENDING scan job', {
+                  packageName: name,
+                  version: normalizedVersion,
+                  ageMs
+                });
+
+                await enqueueScanJob({
+                  packageName: name,
+                  version: normalizedVersion,
+                  orgId: orgId ?? null,
+                  policyId: policy.id
+                });
+              }
+            } catch (err) {
+              // eslint-disable-next-line no-console
+              console.error('Failed to re-enqueue stale PENDING scan job', {
+                packageName: name,
+                version: normalizedVersion,
+                error:
+                  err instanceof Error ? err.message : String(err)
+              });
+            }
+          }
+        }
       } else {
         // Existing final status (APPROVED / BLOCKED / ERROR etc.); no need to
         // enqueue another scan here. We still run the blocking wait below so we