Skip to content

Commit 886d4f4

Browse files
Retry tests on network errors, improve test docs (#64)
1 parent 9b0da08 commit 886d4f4

File tree

7 files changed

+122
-31
lines changed

7 files changed

+122
-31
lines changed

.github/workflows/test-go.yaml

+27-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@ name: Run code tests
22

33
on:
44
push:
5+
paths:
6+
- '.github/workflows/**'
7+
- 'src/**'
58
workflow_call:
69
secrets:
710
DNSIMPLE_API_TOKEN:
@@ -37,11 +40,17 @@ jobs:
3740
env:
3841
DNSIMPLE_API_TOKEN: ${{ secrets.DNSIMPLE_API_TOKEN }}
3942
DNSIMPLE_ZONE_NAME: ${{ secrets.DNSIMPLE_ZONE_NAME }}
43+
shell: 'script -q -e -c "bash {0}"'
44+
timeout-minutes: 15
4045
run: |
4146
export TEST_ASSET_KUBE_APISERVER=${{ steps.kubebuilder.outputs.BIN_DIR }}/kube-apiserver
4247
export TEST_ASSET_ETCD=${{ steps.kubebuilder.outputs.BIN_DIR }}/etcd
4348
export TEST_ASSET_KUBECTL=${{ steps.kubebuilder.outputs.BIN_DIR }}/kubectl
4449
export TEST_ZONE_NAME="${DNSIMPLE_ZONE_NAME}." # add trailing dot
50+
51+
YLW='\033[1;33m'
52+
NC='\033[0m'
53+
4554
echo """apiVersion: v1
4655
kind: Secret
4756
metadata:
@@ -51,4 +60,21 @@ jobs:
5160
token: $DNSIMPLE_API_TOKEN
5261
""" > testdata/dnsimple-token.yaml
5362
cd src
54-
go test -v .
63+
64+
# Occasionally, transient network errors can make tests fail
65+
attempt=0
66+
max_attempts=3
67+
test_exit_code=0
68+
while [ $attempt -lt $max_attempts ]; do
69+
attempt=$((attempt+1))
70+
output=$(go test -v . 2>&1 | tee /dev/tty)
71+
test_exit_code=$?
72+
73+
if echo "$output" | grep -q -e "Temporary failure in name resolution" -e "connection reset by peer" -e "i/o timeout"; then
74+
echo -e "${YLW}Detected transient network error. Retrying... ($attempt/$max_attempts)${NC}"
75+
else
76+
break
77+
fi
78+
done
79+
80+
exit $test_exit_code

.github/workflows/test-kubernetes.yaml

+45-14
Original file line numberDiff line numberDiff line change
@@ -12,26 +12,31 @@ on:
1212
jobs:
1313
test:
1414
runs-on: ubuntu-latest
15+
strategy:
16+
max-parallel: 3
17+
matrix:
18+
# Always quote versions to prevent int truncation (1.30 -> 1.3)
19+
# https://kubernetes.io/releases
20+
k8s-version: ["1.30", "1.31", "1.32"]
21+
# https://cert-manager.io/docs/releases/ (Always include path version)
22+
cm-version: ["1.16.0", "1.17.0"]
1523

1624
steps:
1725
- uses: actions/checkout@v4
1826

19-
2027
- name: Start minikube
2128
uses: medyagh/setup-minikube@master
2229
with:
23-
kubernetes-version: 1.31.3
24-
30+
kubernetes-version: ${{ matrix.k8s-version }}
2531

2632
- name: Install cert-manager, patch upstream dns servers, wait for readiness
2733
run: |
28-
echo "Target cert-manager version: ${{ vars.TARGET_CERT_MANAGER_VERSION }}"
29-
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/${{ vars.TARGET_CERT_MANAGER_VERSION }}/cert-manager.yaml
34+
echo "Target cert-manager version: ${{ matrix.cm-version }}"
35+
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v${{ matrix.cm-version }}/cert-manager.yaml
3036
# Patch cert-manager to use DNSimple's nameservers for faster propagation-checks
3137
kubectl patch deployment cert-manager -n cert-manager --type='json' -p='[{"op": "add", "path": "/spec/template/spec/containers/0/args/-", "value": "--dns01-recursive-nameservers=ns1.dnsimple.com:53"}]'
3238
kubectl wait --for=condition=available --timeout=600s deployment/cert-manager-webhook -n cert-manager
3339
34-
3540
- name: Install cert-manager-webhook-dnsimple, wait for readiness
3641
env:
3742
DNSIMPLE_API_TOKEN: ${{ secrets.DNSIMPLE_API_TOKEN }}
@@ -48,7 +53,7 @@ jobs:
4853
4954
helm -n cert-manager list
5055
51-
max_wait_time_seconds=600
56+
max_wait_time_seconds=800
5257
sleep_between_iterations=10
5358
5459
start=$(date +%s)
@@ -57,7 +62,7 @@ jobs:
5762
echo ""
5863
echo "Awaiting succesful deployment for max ${max_wait_time_seconds} seconds or until $(date --date="@$end")"
5964
while [ $(date +%s) -le $end ]; do
60-
echo "[i] New iteration at $(date +%s)"
65+
echo "[i] New iteration at $(date)"
6166
kubectl -n cert-manager get po
6267
6368
if [ $(kubectl -n cert-manager get po | grep Crash | wc -l) -gt 0 ]; then
@@ -101,11 +106,10 @@ jobs:
101106
""" > certificate.yaml
102107
kubectl apply -f certificate.yaml
103108
104-
105109
- name: Assert that the DNS record was created
106110
env:
107111
DNSIMPLE_ZONE_NAME: ${{ secrets.DNSIMPLE_ZONE_NAME }}
108-
timeout-minutes: 10
112+
timeout-minutes: 10
109113
run: |
110114
while true; do
111115
if nslookup -type=TXT _acme-challenge.gh-action-test.$DNSIMPLE_ZONE_NAME ns1.dnsimple.com; then
@@ -114,9 +118,36 @@ jobs:
114118
sleep 30
115119
done
116120
117-
121+
# This step can time out, but it timing out doesn't necessarily mean that the webhook is not working.
122+
# Timeouts mainly happen due to the environment of the runner and/or parallelism, thus such occurrences will simply be dismissed as warnings.
118123
- name: Check the certificate status
119124
run: |
120-
kubectl wait --for=condition=ready --timeout=600s certificate/dnsimple-test
121-
# this should not be necessary since the certificate is usually ready once the DNS record is propagated
122-
kubectl get certificate dnsimple-test -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' | grep True
125+
max_wait_time_seconds=300
126+
end=$(( $(date +%s) + $max_wait_time_seconds ))
127+
start=$(date +%s)
128+
129+
sleep 5
130+
while [ $(date +%s) -le $end ]; do
131+
OUT_CRT=$(kubectl get certificate/dnsimple-test -o jsonpath='{.status.conditions}')
132+
OUT_CRQ=$(kubectl get CertificateRequest -o json)
133+
134+
echo "Certificate:"
135+
echo "$OUT_CRT"
136+
137+
echo "CertificateRequest:"
138+
echo "$OUT_CRQ" | jq .items[0].status.conditions
139+
140+
if [ $(echo "$OUT_CRT" | grep -iE "Failed|Denied" | wc -l) -gt 0 ]; then
141+
echo "::Error title=Certificate resource errored::The certificate ressource has an error"
142+
exit 1
143+
fi
144+
145+
if [ $(kubectl get certificate dnsimple-test -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}') == "True" ]; then
146+
echo "Certificate is ready after $(( $(date +%s) - $start )) seconds"
147+
exit 0
148+
fi
149+
sleep 20
150+
echo -e "\n[i] New iteration at $(date)"
151+
done
152+
153+
echo "::warning title=Certificate timed out::Have timed out waiting for certificate"

.github/workflows/workflow_full-test-suite.yaml

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
name: Run full test suite
22

33
on:
4-
push:
5-
branches:
6-
- master
4+
# To prevent this time intesive suite from running redundandtly, it will only run on PRs.
5+
# If a PR is merged, it also creates a push and thus this workflow unnecessarily runs again.
76
pull_request:
87
branches:
98
- master
9+
paths:
10+
- '.github/workflows/**'
11+
- 'src/**'
1012

1113
jobs:
1214
code-test:

Makefile

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
GO ?= $(shell which go)
22
OS ?= $(shell $(GO) env GOOS)
33
ARCH ?= $(shell $(GO) env GOARCH)
4-
KUBE_VERSION=1.25.0
4+
5+
# Available versions: https://storage.googleapis.com/kubebuilder-tools
6+
KUBE_VERSION=$(shell curl -s https://storage.googleapis.com/kubebuilder-tools | grep -oP 'kubebuilder-tools-\K[0-9]+\.[0-9]+\.[0-9]+' | sort -V | tail -n 1 || echo "1.30.0")
57

68
# required by go tests
79
export TEST_ASSET_ETCD=../_test/kubebuilder/etcd

README.md

+24-9
Original file line numberDiff line numberDiff line change
@@ -75,26 +75,34 @@ The Helm chart accepts the following values:
7575
All cert-manager webhooks have to pass the DNS01 provider conformance testing suite.
7676
7777
### Pull requests
78-
Prerequisites for PRs are implemented as GitHub-actions. All tests should pass before a PR is merged:
79-
- the `cert-manager` conformance suite is run with provided kubebuilder fixtures
80-
- a custom test suite running on a working k8s cluster (using `minikube`) is executed as well
78+
Prerequisites for PRs are implemented as GitHub-actions. All tests should pass before a PR is merged:
79+
- The `cert-manager` conformance suite is run with provided kubebuilder fixtures
80+
- A custom test suite running on a working k8s cluster (using `minikube`) is executed as well
8181
8282
### Local testing
8383
#### Test suite
84-
You can also run tests locally, as specified in the `Makefile`:
84+
Tests can be run locally according to the `Makefile`:
8585
86-
1. Set-up `testdata/` according to its [README][3].
87-
- `dnsimple-token.yaml` should be filled with a valid token (for either the sandbox or production environment)
88-
- `dnsimple.env` should contain the remaining environment variables (non sensitive)
89-
2. Execute the test suite:
86+
1. Set up `testdata/` according to its [README][3]
87+
- `dnsimple-token.yaml` should be filled with a valid token (for either the sandbox or production environment)
88+
89+
2. Set env var `TEST_ZONE_NAME`, adding a trailing dot
90+
- `export TEST_ZONE_NAME="<zone>."`
91+
92+
3. Execute the test suite:
9093
```bash
9194
make test
9295
```
96+
97+
> [!NOTE]
98+
> Kubebuilder will always use the latest version available.
99+
93100
#### In-cluster testing
94101
1. Install cert-manager:
95102
```bash
96103
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.14.3/cert-manager.yaml
97104
```
105+
98106
2. Install the webhook:
99107
```bash
100108
helm install cert-manager-webhook-dnsimple \
@@ -103,6 +111,7 @@ You can also run tests locally, as specified in the `Makefile`:
103111
--set clusterIssuer.staging.enabled=true \
104112
./charts/cert-manager-webhook-dnsimple
105113
```
114+
106115
3. Test away... You can create a sample certificate to ensure the webhook is working correctly:
107116
```bash
108117
kubectl apply -f - <<<EOF
@@ -120,10 +129,16 @@ You can also run tests locally, as specified in the `Makefile`:
120129
EOF
121130
```
122131
132+
#### GitHub Actions
133+
Each PR is vetted against a full test suite that tests changes against multiple versions of both Kubernetes and Cert-Manager using a matrix strategy.
134+
Generally, tested k8s versions are the [last 3 supported major versions](https://kubernetes.io/releases/).
135+
Cert-Manager is tested uisng the [last 2 supported versions](https://cert-manager.io/docs/releases/).
136+
123137
124138
## Releases
125139
### Docker images
126-
Every push to `master` or on a pull-request triggers the upload of a new docker image to the GitHub Container Registry (this is configured through github actions). These images should **not considered stable** and are tagged with `commit-<hash>`. **We recommend using a specific version tag for production deployments instead.**
140+
Every push to `master` or on a pull-request triggers the upload of a new docker image to the GitHub Container Registry (this is configured through github actions).
141+
These images should **not be considered stable** and are tagged with `commit-<hash>`. **We recommend using a specific version tag for production deployments instead.**
127142
128143
Tagged images are considered stable, these are the ones referenced by the default helm values.
129144

src/main_test.go

+16-2
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,36 @@
11
package main
22

33
import (
4+
"fmt"
45
"os"
6+
"strings"
57
"testing"
68

79
dns "github.com/cert-manager/cert-manager/test/acme"
10+
"sigs.k8s.io/controller-runtime/pkg/log"
11+
"sigs.k8s.io/controller-runtime/pkg/log/zap"
812
)
913

10-
var (
11-
zone = os.Getenv("TEST_ZONE_NAME")
14+
const (
1215
testdata_dir = "../testdata"
1316
)
1417

18+
var (
19+
zone = os.Getenv("TEST_ZONE_NAME")
20+
)
21+
1522
func TestRunsSuite(t *testing.T) {
23+
log.SetLogger(zap.New(zap.UseFlagOptions(&zap.Options{})))
24+
1625
// The manifest path should contain a file named config.json that is a
1726
// snippet of valid configuration that should be included on the
1827
// ChallengeRequest passed as part of the test cases.
1928

29+
// Ensure trailing dot
30+
if !strings.HasSuffix(zone, ".") {
31+
zone = fmt.Sprintf("%s.", zone)
32+
}
33+
2034
fixture := dns.NewFixture(&dnsimpleDNSProviderSolver{},
2135
dns.SetResolvedZone(zone),
2236
dns.SetAllowAmbientCredentials(false),

testdata/README.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,5 @@ Copy the `dnsimple-token.yaml.example` example file removing the `.example` suff
66
$ cp dnsimple-token.yaml{.example,}
77
```
88

9-
Replace the placeholders for the API token in `dnsimple-token.yaml`. The API token can be generated in your DNSimple account settings in the automation tab.
9+
Replace the placeholders for the API token in `dnsimple-token.yaml`.
10+
The API token can be generated in your DNSimple account settings in the automation tab.

0 commit comments

Comments
 (0)