@@ -490,7 +490,7 @@ func (r *reconciler) syncPendingJob(ctx context.Context, pj *prowv1.ProwJob) (*r
490490 r .log .WithField ("name" , pj .ObjectMeta .Name ).Debug ("Delete Pod." )
491491 return nil , ctrlruntimeclient .IgnoreNotFound (client .Delete (ctx , pod ))
492492 }
493- } else if pod . Status . Reason == Terminated {
493+ } else if isPodTerminated ( pod ) {
494494 // Pod was terminated.
495495 if pj .Spec .ErrorOnTermination {
496496 // ErrorOnTermination is enabled, complete the PJ and mark it as
@@ -701,6 +701,33 @@ func (r *reconciler) syncPendingJob(ctx context.Context, pj *prowv1.ProwJob) (*r
701701 return nil , nil
702702}
703703
704+ func isPodTerminated (pod * corev1.Pod ) bool {
705+ // If there was a Graceful node shutdown, the Pod's status will have a
706+ // reason set to "Terminated":
707+ // https://kubernetes.io/docs/concepts/architecture/nodes/#graceful-node-shutdown
708+ if pod .Status .Reason == Terminated {
709+ return true
710+ }
711+
712+ for _ , condition := range pod .Status .Conditions {
713+ // If the node does no longer exist and the pod gets garbage collected,
714+ // this condition will be set:
715+ // https://kubernetes.io/docs/concepts/workloads/pods/disruptions/#pod-disruption-conditions
716+ if condition .Reason == "DeletionByPodGC" {
717+ return true
718+ }
719+
720+ // On GCP, before a new spot instance is started, the old pods are garbage
721+ // collected (if they have not been already by the Kubernetes PodGC):
722+ // https://github.com/kubernetes/cloud-provider-gcp/blob/25e5dcc715781316bc5e39f8b17c0d5b313453f7/cmd/gcp-controller-manager/node_csr_approver.go#L1035-L1058
723+ if condition .Reason == "DeletionByGCPControllerManager" {
724+ return true
725+ }
726+ }
727+
728+ return false
729+ }
730+
704731// syncTriggeredJob syncs jobs that do not yet have an associated test workload running
705732func (r * reconciler ) syncTriggeredJob (ctx context.Context , pj * prowv1.ProwJob ) (* reconcile.Result , error ) {
706733 prevPJ := pj .DeepCopy ()
0 commit comments