Skip to content

Commit 491c036

Browse files
authored
Fix panic with Job resource diffing against an unreachable cluster (#3024)
### Proposed changes This PR ensures that we do not make a k8s API request during the provider's diff if there is an unreachable cluster. This currently occurs when the Pulumi program contains a Job resource with the `replaceUnready` annotation set to true. A panic would occur if we attempt to make the API call since our clients are nil. #### Testing done: 1. Created a repro test case that fails with a panic (https://github.com/pulumi/pulumi-kubernetes/actions/runs/9228447658/job/25392833842?pr=3024) 2. Added logic to prevent the panic, and test passes subsequently without intervention (https://github.com/pulumi/pulumi-kubernetes/actions/runs/9228685506/job/25393667599?pr=3024) 3. Manual validation to ensure panic isn't trigerred. ### Related issues (optional) Fixes: #3022
1 parent b6f6eff commit 491c036

File tree

5 files changed

+95
-2
lines changed

5 files changed

+95
-2
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
## Unreleased
22

33
- Update to pulumi-java v0.12.0 #3025 (https://github.com/pulumi/pulumi-kubernetes/pull/3025)
4+
- Fixed a panic that occurs when diffing Job resources containing `replaceUnready` annotations and an unreachable cluster connection. (https://github.com/pulumi/pulumi-kubernetes/pull/3024)
45

56
## 4.12.0 (May 21, 2024)
67

provider/pkg/provider/provider.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1743,8 +1743,12 @@ func (k *kubeProvider) Diff(ctx context.Context, req *pulumirpc.DiffRequest) (*p
17431743
}
17441744

17451745
if metadata.ReplaceUnready(newInputs) {
1746-
switch newInputs.GetKind() {
1747-
case "Job":
1746+
switch {
1747+
case k.clusterUnreachable:
1748+
// Check if the cluster is unreachable. If it is, we can't check the status of the resource otherwise
1749+
// a panic occurs due to the client being nil.
1750+
_ = k.host.Log(ctx, diag.Warning, urn, "Cluster is unreachable, skipping replaceUnready check")
1751+
case newInputs.GetKind() == "Job":
17481752
// Fetch current Job status and check point-in-time readiness. Errors are ignored.
17491753
if live, err := k.readLiveObject(oldLive); err == nil {
17501754
jobChecker := checkjob.NewJobChecker()
@@ -2677,6 +2681,7 @@ func (k *kubeProvider) gvkFromURN(urn resource.URN) (schema.GroupVersionKind, er
26772681

26782682
func (k *kubeProvider) readLiveObject(obj *unstructured.Unstructured) (*unstructured.Unstructured, error) {
26792683
contract.Assertf(obj.GetName() != "", "expected object name to be nonempty: %v", obj)
2684+
contract.Assertf(k.clientSet != nil, "expected Kubernetes client-set to be non-nil")
26802685
rc, err := k.clientSet.ResourceClientForObject(obj)
26812686
if err != nil {
26822687
return nil, err
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
name: job-unreachable
2+
runtime: yaml
3+
resources:
4+
provider:
5+
type: pulumi:providers:kubernetes
6+
job:
7+
type: kubernetes:batch/v1:Job
8+
properties:
9+
metadata:
10+
name: test-job-unreachable
11+
annotations:
12+
pulumi.com/replaceUnready: "true"
13+
spec:
14+
template:
15+
metadata:
16+
name: test-job-unreachable
17+
spec:
18+
containers:
19+
- name: test-job-unreachable-container
20+
image: busybox
21+
# This command will cause the container to exit with a non-zero status code, and fail the job.
22+
command:
23+
- sh
24+
- -c
25+
- exit 1
26+
restartPolicy: Never
27+
options:
28+
provider: ${provider}
29+
customTimeouts:
30+
create: 15s
31+
update: 15s
32+
delete: 15s
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
name: job-unreachable
2+
runtime: yaml
3+
resources:
4+
provider:
5+
type: pulumi:providers:kubernetes
6+
properties:
7+
kubeconfig: "fake-kubeconfig-data"
8+
job:
9+
type: kubernetes:batch/v1:Job
10+
properties:
11+
metadata:
12+
name: test-job-unreachable
13+
annotations:
14+
pulumi.com/replaceUnready: "true"
15+
spec:
16+
template:
17+
metadata:
18+
name: test-job-unreachable
19+
spec:
20+
containers:
21+
- name: test-job-unreachable-container
22+
image: busybox
23+
# This command will cause the container to exit with a non-zero status code, and fail the job.
24+
command:
25+
- sh
26+
- -c
27+
- exit 1
28+
restartPolicy: Never
29+
options:
30+
provider: ${provider}
31+
customTimeouts:
32+
create: 15s
33+
update: 15s
34+
delete: 15s

tests/sdk/java/yamlv2_test.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"testing"
55

66
"github.com/pulumi/providertest/pulumitest"
7+
"github.com/stretchr/testify/assert"
78
)
89

910
// TestYamlV2 deploys a complex stack using yaml/v2 package.
@@ -21,3 +22,23 @@ func TestYamlV2(t *testing.T) {
2122
test.Preview()
2223
test.Up()
2324
}
25+
26+
// TestJobUnreachable ensures that a panic does not occur when diffing Job resources against an unreachable API server.
27+
// https://github.com/pulumi/pulumi-kubernetes/issues/3022
28+
func TestJobUnreachable(t *testing.T) {
29+
test := pulumitest.NewPulumiTest(t, "testdata/job-unreachable")
30+
t.Logf("into %s", test.Source())
31+
t.Cleanup(func() {
32+
test.Destroy()
33+
})
34+
test.Preview()
35+
36+
// Create the job, but expect it to fail as the job is meant to fail.
37+
_, err := test.CurrentStack().Up(test.Context())
38+
assert.ErrorContains(t, err, `but the Kubernetes API server reported that it failed to fully initialize or become live`)
39+
40+
// Re-run the Pulumi program with a malformed kubeconfig to simulate an unreachable API server.
41+
// This should not panic annd preview should succeed.
42+
test.UpdateSource("testdata/job-unreachable/step2")
43+
test.Preview()
44+
}

0 commit comments

Comments
 (0)