Skip to content

Commit af47a2a

Browse files
authored
Surface pod container status for debugging (#42)
1 parent f657bf1 commit af47a2a

File tree

1 file changed

+21
-16
lines changed

1 file changed

+21
-16
lines changed

packages/k8s/src/k8s/index.ts

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -480,10 +480,12 @@ export async function waitForPodPhases(
480480
maxTimeSeconds = DEFAULT_WAIT_FOR_POD_TIME_SECONDS
481481
): Promise<void> {
482482
const backOffManager = new BackOffManager(maxTimeSeconds)
483+
let podStatus: k8s.V1PodStatus | undefined = undefined
483484
let phase: PodPhase = PodPhase.UNKNOWN
484485
try {
485486
while (true) {
486-
phase = await getPodPhase(podName)
487+
podStatus = await getPodStatus(podName)
488+
phase = getPodPhaseFromStatus(podStatus)
487489
if (awaitingPhases.has(phase)) {
488490
return
489491
}
@@ -496,7 +498,13 @@ export async function waitForPodPhases(
496498
await backOffManager.backOff()
497499
}
498500
} catch (error) {
499-
throw new Error(`Pod ${podName} is unhealthy with phase status ${phase}`)
501+
throw new Error(
502+
`Pod ${podName} is unhealthy with phase status ${phase}. Pod message is ${
503+
podStatus?.message
504+
} and pod's container statuses are ${JSON.stringify(
505+
podStatus?.containerStatuses || ''
506+
)}`
507+
)
500508
}
501509
}
502510

@@ -519,22 +527,19 @@ export function getPrepareJobTimeoutSeconds(): number {
519527
return timeoutSeconds
520528
}
521529

522-
async function getPodPhase(podName: string): Promise<PodPhase> {
523-
const podPhaseLookup = new Set<string>([
524-
PodPhase.PENDING,
525-
PodPhase.RUNNING,
526-
PodPhase.SUCCEEDED,
527-
PodPhase.FAILED,
528-
PodPhase.UNKNOWN
529-
])
530-
const pod = await k8sApi.readNamespacedPod({
531-
name: podName,
532-
namespace: namespace()
533-
})
534-
if (!pod.status?.phase || !podPhaseLookup.has(pod.status.phase)) {
530+
const podPhaseLookup = new Set<string>([
531+
PodPhase.PENDING,
532+
PodPhase.RUNNING,
533+
PodPhase.SUCCEEDED,
534+
PodPhase.FAILED,
535+
PodPhase.UNKNOWN
536+
])
537+
538+
function getPodPhaseFromStatus(podStatus?: k8s.V1PodStatus): PodPhase {
539+
if (!podStatus || !podStatus.phase || !podPhaseLookup.has(podStatus.phase)) {
535540
return PodPhase.UNKNOWN
536541
}
537-
return pod.status?.phase as PodPhase
542+
return podStatus.phase as PodPhase
538543
}
539544

540545
async function isJobSucceeded(jobName: string): Promise<boolean> {

0 commit comments

Comments
 (0)