@@ -256,9 +256,20 @@ jobs:
256256 exit 0
257257 fi
258258 for JOB_ID in $FAILED_JOBS; do
259- # Download logs (may be ZIP or plain text depending on GitHub API)
259+ # First check job metadata for runner communication errors
260+ # This catches "The operation was canceled" which appears in annotations, not logs
261+ JOB_INFO=$(gh api "/repos/${{ github.repository }}/actions/jobs/${JOB_ID}" 2>/dev/null || true)
262+ if echo "$JOB_INFO" | grep -qiE "operation was canceled|runner.*lost|lost communication"; then
263+ echo "Detected: Runner lost communication or operation canceled (job $JOB_ID)"
264+ SPOT_TERMINATION=true
265+ break
266+ fi
267+
268+ # Try to download logs - if we can't, likely infrastructure failure
260269 if ! gh api "/repos/${{ github.repository }}/actions/jobs/${JOB_ID}/logs" > job_log.zip 2>/dev/null; then
261- continue
270+ echo "Detected: Cannot download logs, likely infrastructure failure (job $JOB_ID)"
271+ SPOT_TERMINATION=true
272+ break
262273 fi
263274 # Try to unzip if it's a ZIP file, otherwise use as-is
264275 if file job_log.zip | grep -q "Zip archive"; then
@@ -424,9 +435,20 @@ jobs:
424435 exit 0
425436 fi
426437 for JOB_ID in $FAILED_JOBS; do
427- # Download logs (may be ZIP or plain text depending on GitHub API)
438+ # First check job metadata for runner communication errors
439+ # This catches "The operation was canceled" which appears in annotations, not logs
440+ JOB_INFO=$(gh api "/repos/${{ github.repository }}/actions/jobs/${JOB_ID}" 2>/dev/null || true)
441+ if echo "$JOB_INFO" | grep -qiE "operation was canceled|runner.*lost|lost communication"; then
442+ echo "Detected: Runner lost communication or operation canceled (job $JOB_ID)"
443+ SPOT_TERMINATION=true
444+ break
445+ fi
446+
447+ # Try to download logs - if we can't, likely infrastructure failure
428448 if ! gh api "/repos/${{ github.repository }}/actions/jobs/${JOB_ID}/logs" > job_log.zip 2>/dev/null; then
429- continue
449+ echo "Detected: Cannot download logs, likely infrastructure failure (job $JOB_ID)"
450+ SPOT_TERMINATION=true
451+ break
430452 fi
431453 # Try to unzip if it's a ZIP file, otherwise use as-is
432454 if file job_log.zip | grep -q "Zip archive"; then
@@ -576,9 +598,20 @@ jobs:
576598 exit 0
577599 fi
578600 for JOB_ID in $FAILED_JOBS; do
579- # Download logs (may be ZIP or plain text depending on GitHub API)
601+ # First check job metadata for runner communication errors
602+ # This catches "The operation was canceled" which appears in annotations, not logs
603+ JOB_INFO=$(gh api "/repos/${{ github.repository }}/actions/jobs/${JOB_ID}" 2>/dev/null || true)
604+ if echo "$JOB_INFO" | grep -qiE "operation was canceled|runner.*lost|lost communication"; then
605+ echo "Detected: Runner lost communication or operation canceled (job $JOB_ID)"
606+ SPOT_TERMINATION=true
607+ break
608+ fi
609+
610+ # Try to download logs - if we can't, likely infrastructure failure
580611 if ! gh api "/repos/${{ github.repository }}/actions/jobs/${JOB_ID}/logs" > job_log.zip 2>/dev/null; then
581- continue
612+ echo "Detected: Cannot download logs, likely infrastructure failure (job $JOB_ID)"
613+ SPOT_TERMINATION=true
614+ break
582615 fi
583616 # Try to unzip if it's a ZIP file, otherwise use as-is
584617 if file job_log.zip | grep -q "Zip archive"; then
0 commit comments