Skip to content

Commit f00468f

Browse files
authored
fix: add ready check, error log, and manual test (#87)
Signed-off-by: matttrach <matt.trachier@suse.com>
1 parent 8c1226f commit f00468f

9 files changed

Lines changed: 190 additions & 14 deletions

File tree

.github/workflows/manual.yaml

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
name: manual
2+
3+
on: workflow_dispatch
4+
5+
env:
6+
AWS_REGION: us-west-2
7+
AWS_ROLE: arn:aws:iam::270074865685:role/terraform-module-ci-test
8+
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
9+
ACME_SERVER_URL: https://acme-v02.api.letsencrypt.org/directory
10+
11+
permissions: write-all
12+
13+
jobs:
14+
test_TestOneBasic:
15+
runs-on: ubuntu-latest
16+
steps:
17+
- uses: actions/checkout@v4
18+
with:
19+
token: ${{secrets.GITHUB_TOKEN}}
20+
fetch-depth: 0
21+
- id: aws-creds
22+
uses: aws-actions/configure-aws-credentials@v4
23+
with:
24+
role-to-assume: ${{env.AWS_ROLE}}
25+
role-session-name: ${{github.run_id}}
26+
aws-region: ${{env.AWS_REGION}}
27+
role-duration-seconds: 7200 # 2 hours
28+
output-credentials: true
29+
- name: install-nix
30+
run: |
31+
curl -L https://nixos.org/nix/install | sh
32+
source /home/runner/.nix-profile/etc/profile.d/nix.sh
33+
nix --version
34+
which nix
35+
- name: run_tests
36+
shell: '/home/runner/.nix-profile/bin/nix develop --ignore-environment --extra-experimental-features nix-command --extra-experimental-features flakes --keep HOME --keep SSH_AUTH_SOCK --keep IDENTIFIER --keep GITHUB_TOKEN --keep GITHUB_OWNER --keep ZONE --keep AWS_ROLE --keep AWS_REGION --keep AWS_DEFAULT_REGION --keep AWS_ACCESS_KEY_ID --keep AWS_SECRET_ACCESS_KEY --keep AWS_SESSION_TOKEN --keep UPDATECLI_GPGTOKEN --keep UPDATECLI_GITHUB_TOKEN --keep UPDATECLI_GITHUB_ACTOR --keep GPG_SIGNING_KEY --keep NIX_SSL_CERT_FILE --keep NIX_ENV_LOADED --keep TERM --command bash -e {0}'
37+
env:
38+
AWS_ACCESS_KEY_ID: ${{ steps.aws-creds.outputs.aws-access-key-id }}
39+
AWS_SECRET_ACCESS_KEY: ${{ steps.aws-creds.outputs.aws-secret-access-key }}
40+
AWS_SESSION_TOKEN: ${{ steps.aws-creds.outputs.aws-session-token }}
41+
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
42+
GITHUB_OWNER: rancher
43+
IDENTIFIER: ${{github.run_id}}
44+
ZONE: ${{secrets.ZONE}}
45+
ACME_SERVER_URL: https://acme-v02.api.letsencrypt.org/directory
46+
RANCHER_INSECURE: false
47+
run: |
48+
./run_tests.sh -t TestOneBasic

.github/workflows/release.yaml

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,13 +224,57 @@ jobs:
224224
run: |
225225
./run_tests.sh -t TestDownstreamProd
226226
227+
test_Cleanup:
228+
needs:
229+
- release
230+
- test_TestOneBasic
231+
- test_TestProdBasic
232+
- test_TestDownstreamBasic
233+
- test_TestDownstreamProd
234+
if: needs.release.outputs.release_pr
235+
runs-on: ubuntu-latest
236+
steps:
237+
- uses: actions/checkout@v4
238+
with:
239+
token: ${{secrets.GITHUB_TOKEN}}
240+
fetch-depth: 0
241+
- id: aws-creds
242+
uses: aws-actions/configure-aws-credentials@v4
243+
with:
244+
role-to-assume: ${{env.AWS_ROLE}}
245+
role-session-name: ${{github.run_id}}
246+
aws-region: ${{env.AWS_REGION}}
247+
role-duration-seconds: 7200 # 2 hours
248+
output-credentials: true
249+
- name: install-nix
250+
run: |
251+
curl -L https://nixos.org/nix/install | sh
252+
source /home/runner/.nix-profile/etc/profile.d/nix.sh
253+
nix --version
254+
which nix
255+
- name: cleanup
256+
shell: '/home/runner/.nix-profile/bin/nix develop --ignore-environment --extra-experimental-features nix-command --extra-experimental-features flakes --keep HOME --keep SSH_AUTH_SOCK --keep IDENTIFIER --keep GITHUB_TOKEN --keep GITHUB_OWNER --keep ZONE --keep AWS_ROLE --keep AWS_REGION --keep AWS_DEFAULT_REGION --keep AWS_ACCESS_KEY_ID --keep AWS_SECRET_ACCESS_KEY --keep AWS_SESSION_TOKEN --keep UPDATECLI_GPGTOKEN --keep UPDATECLI_GITHUB_TOKEN --keep UPDATECLI_GITHUB_ACTOR --keep GPG_SIGNING_KEY --keep NIX_SSL_CERT_FILE --keep NIX_ENV_LOADED --keep TERM --command bash -e {0}'
257+
env:
258+
AWS_ACCESS_KEY_ID: ${{ steps.aws-creds.outputs.aws-access-key-id }}
259+
AWS_SECRET_ACCESS_KEY: ${{ steps.aws-creds.outputs.aws-secret-access-key }}
260+
AWS_SESSION_TOKEN: ${{ steps.aws-creds.outputs.aws-session-token }}
261+
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
262+
GITHUB_OWNER: rancher
263+
IDENTIFIER: ${{github.run_id}}
264+
ZONE: ${{secrets.ZONE}}
265+
ACME_SERVER_URL: https://acme-v02.api.letsencrypt.org/directory
266+
RANCHER_INSECURE: false
267+
run: |
268+
./run_tests.sh -c $IDENTIFIER
269+
227270
report:
228271
needs:
229272
- release
230273
- test_TestOneBasic
231274
- test_TestProdBasic
232275
- test_TestDownstreamBasic
233276
- test_TestDownstreamProd
277+
- test_Cleanup
234278
if: success() && needs.release.outputs.release_pr #Ensure the test jobs succeeded, and that a release PR was created.
235279
runs-on: ubuntu-latest
236280
steps:

modules/rancher_bootstrap/rancher/main.tf

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -150,10 +150,10 @@ resource "helm_release" "rancher" {
150150
chart = "${path.root}/rancher-${local.rancher_version}.tgz" # "${local.rancher_helm_repository}/${local.rancher_channel}/rancher-${local.rancher_version}.tgz"
151151
namespace = "cattle-system"
152152
create_namespace = false
153-
wait = true
154-
wait_for_jobs = true
153+
wait = false
154+
wait_for_jobs = false
155155
force_update = true
156-
timeout = 2400 # 40m
156+
timeout = 1800 # 30m
157157

158158
set {
159159
name = "hostname"

modules/rancher_bootstrap/rancher_externalTLS/main.tf

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,10 @@ resource "helm_release" "rancher" {
6767
chart = "${path.root}/rancher-${local.rancher_version}.tgz" #"${local.rancher_helm_repository}/${local.rancher_channel}/rancher-${local.rancher_version}.tgz"
6868
namespace = "cattle-system"
6969
create_namespace = false
70-
wait = true
71-
wait_for_jobs = true
70+
wait = false
71+
wait_for_jobs = false
7272
force_update = true
73-
timeout = 2400 # 40m
73+
timeout = 1800 # 30m
7474

7575
set {
7676
name = "hostname"

run_tests.sh

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,28 @@
33
rerun_failed=false
44
specific_test=""
55
specific_package=""
6+
cleanup_id=""
67

7-
while getopts ":r:t:p:" opt; do
8+
while getopts ":r:t:p:c:" opt; do
89
case $opt in
910
r) rerun_failed=true ;;
1011
t) specific_test="$OPTARG" ;;
1112
p) specific_package="$OPTARG" ;;
13+
c) cleanup_id="$OPTARG" ;;
1214
\?) cat <<EOT >&2 && exit 1 ;;
1315
Invalid option -$OPTARG, valid options are
1416
-r to re-run failed tests
1517
-t to specify a specific test (eg. TestBase)
1618
-p to specify a specific test package (eg. base)
19+
-c to run clean up only with the given id (eg. abc123)
1720
EOT
1821
esac
1922
done
2023

24+
if [ -n "$cleanup_id" ]; then
25+
export IDENTIFIER="$cleanup_id"
26+
fi
27+
2128
run_tests() {
2229
local rerun=$1
2330
REPO_ROOT="$(git rev-parse --show-toplevel)"
@@ -99,13 +106,15 @@ if [ -z "$GITHUB_TOKEN" ]; then echo "GITHUB_TOKEN isn't set"; else echo "GITHUB
99106
if [ -z "$GITHUB_OWNER" ]; then echo "GITHUB_OWNER isn't set"; else echo "GITHUB_OWNER is set"; fi
100107
if [ -z "$ZONE" ]; then echo "ZONE isn't set"; else echo "ZONE is set"; fi
101108

102-
# Run tests initially
103-
run_tests false
109+
if [ -z "$cleanup_id" ]; then
110+
# Run tests initially
111+
run_tests false
104112

105-
# Check if we need to rerun failed tests
106-
if [ "$rerun_failed" = true ] && [ -f "/tmp/${IDENTIFIER}_failed_tests.txt" ]; then
107-
echo "Rerunning failed tests..."
108-
run_tests true
113+
# Check if we need to rerun failed tests
114+
if [ "$rerun_failed" = true ] && [ -f "/tmp/${IDENTIFIER}_failed_tests.txt" ]; then
115+
echo "Rerunning failed tests..."
116+
run_tests true
117+
fi
109118
fi
110119

111120
echo "Clearing leftovers with Id $IDENTIFIER in $AWS_REGION..."

test/scripts/getLogs.sh

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/bin/bash
2+
3+
kubectl get nodes || true
4+
kubectl get all -A || true
5+
6+
kubectl get pods -A || true
7+
sleep 10
8+
kubectl get pods -A || true
9+
sleep 10
10+
kubectl get pods -A || true

test/scripts/readyNodes.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#!/bin/bash
2+
set -x
23

34
JSONPATH="'{range .items[*]}
45
{.metadata.name}{\"\\t\"} \
@@ -46,6 +47,13 @@ while notReady; do
4647
fi
4748
done
4849

50+
echo "Nodes are ready..."
51+
52+
echo "nodes..."
4953
kubectl get nodes || true
54+
echo "all..."
5055
kubectl get all -A || true
56+
echo "pods..."
57+
kubectl get pods -A || true
58+
5159
exit 0

test/tests/one/one_test.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,12 +95,19 @@ func TestOneBasic(t *testing.T) {
9595

9696
_, err = terraform.InitAndApplyE(t, terraformOptions)
9797
if err != nil {
98+
t.Log("Test failed, tearing down...")
99+
util.GetErrorLogs(t, testDir + "/kubeconfig")
98100
util.Teardown(t, testDir, terraformOptions, keyPair)
99101
os.Remove(exampleDir + ".terraform.lock.hcl")
100102
sshAgent.Stop()
101103
t.Fatalf("Error creating cluster: %s", err)
102104
}
103-
t.Log("Test passed, tearing down...")
105+
util.CheckReady(t, testDir + "/kubeconfig")
106+
if t.Failed() {
107+
t.Log("Test failed...")
108+
} else {
109+
t.Log("Test passed...")
110+
}
104111
util.Teardown(t, testDir, terraformOptions, keyPair)
105112
os.Remove(exampleDir + ".terraform.lock.hcl")
106113
sshAgent.Stop()

test/tests/util.go

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
aws "github.com/gruntwork-io/terratest/modules/aws"
1717
g "github.com/gruntwork-io/terratest/modules/git"
1818
"github.com/gruntwork-io/terratest/modules/random"
19+
"github.com/gruntwork-io/terratest/modules/shell"
1920
"github.com/gruntwork-io/terratest/modules/terraform"
2021
"golang.org/x/oauth2"
2122
)
@@ -417,3 +418,52 @@ func Teardown(t *testing.T, directory string, options *terraform.Options, keyPai
417418
}
418419
aws.DeleteEC2KeyPair(t, keyPair)
419420
}
421+
422+
func GetErrorLogs(t *testing.T, kubeconfigPath string) {
423+
repoRoot, err := filepath.Abs(g.GetRepoRoot(t))
424+
if err != nil {
425+
t.Logf("Error getting git root directory: %v", err)
426+
}
427+
script, err := os.ReadFile(repoRoot + "/test/scripts/getLogs.sh")
428+
if err != nil {
429+
t.Logf("Error reading script: %v", err)
430+
}
431+
errorLogsScript := shell.Command{
432+
Command: "bash",
433+
Args: []string{"-c", string(script)},
434+
Env: map[string]string{
435+
"KUBECONFIG": kubeconfigPath,
436+
},
437+
}
438+
out, err := shell.RunCommandAndGetOutputE(t, errorLogsScript)
439+
if err != nil {
440+
t.Logf("Error running script: %s", err)
441+
}
442+
t.Logf("Log script output: %s", out)
443+
}
444+
445+
func CheckReady(t *testing.T, kubeconfigPath string) {
446+
repoRoot, err := filepath.Abs(g.GetRepoRoot(t))
447+
if err != nil {
448+
t.Logf("Error getting git root directory: %v", err)
449+
t.Fail()
450+
}
451+
script, err := os.ReadFile(repoRoot + "/test/scripts/readyNodes.sh")
452+
if err != nil {
453+
t.Logf("Error reading script: %v", err)
454+
t.Fail()
455+
}
456+
readyScript := shell.Command{
457+
Command: "bash",
458+
Args: []string{"-c", string(script)},
459+
Env: map[string]string{
460+
"KUBECONFIG": kubeconfigPath,
461+
},
462+
}
463+
out, err := shell.RunCommandAndGetOutputE(t, readyScript)
464+
if err != nil {
465+
t.Logf("Error running script: %s", err)
466+
t.Fail()
467+
}
468+
t.Logf("Ready script output: %s", out)
469+
}

0 commit comments

Comments
 (0)