Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ jobs:
- test_TestProdBasic
- test_TestDownstreamBasic
- test_TestDownstreamProd
if: needs.release.outputs.release_pr
if: always() && needs.release.outputs.release_pr
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
Expand Down
32 changes: 24 additions & 8 deletions modules/rancher_bootstrap/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -86,30 +86,46 @@ resource "terraform_data" "create" {
MAX=2
EXITCODE=1
ATTEMPTS=0
E=1
E1=0
while [ $EXITCODE -gt 0 ] && [ $ATTEMPTS -lt $MAX ]; do
E=$EXITCODE
A=0
while [ $E -gt 0 ] && [ $A -lt $MAX ]; do
timeout 3600 terraform apply -var-file="inputs.tfvars" -auto-approve -state="${local.deploy_path}/tfstate"
timeout 1h terraform apply -var-file="inputs.tfvars" -auto-approve -state="${local.deploy_path}/tfstate"
E=$?
if [ $E -eq 124 ]; then echo "Apply timed out after 1 hour"; fi
A=$((A+1))
done
# don't destroy if the last attempt fails
if [ $E -gt 0 ] && [ $ATTEMPTS != $((MAX-1)) ]; then
A1=0
E1=$EXITCODE
while [ $E1 -gt 0 ] && [ $A1 -lt $MAX ]; do
timeout 3600 terraform destroy -var-file="inputs.tfvars" -auto-approve -state="${local.deploy_path}/tfstate"
timeout 1h terraform destroy -var-file="inputs.tfvars" -auto-approve -state="${local.deploy_path}/tfstate"
E1=$?
if [ $E1 -eq 124 ]; then echo "Apply timed out after 1 hour"; fi
A1=$((A1+1))
done
fi
EXITCODE=$((E+E1))
if [ $E -gt 0 ]; then
echo "apply failed..."
fi
if [ $E1 -gt 0 ]; then
echo "destroy failed..."
fi
if [ $E -gt 0 ] || [ $E1 -gt 0 ]; then
EXITCODE=1
else
EXITCODE=0
fi
ATTEMPTS=$((ATTEMPTS+1))
echo "wait 30 seconds between attempts..."
sleep 30
if [ $EXITCODE -gt 0 ] && [ $ATTEMPTS -lt $MAX ]; then
echo "wait 30 seconds between attempts..."
sleep 30
fi
done

if [ $ATTEMPTS -eq $MAX ]; then echo "max attempts reached..."; fi
if [ $EXITCODE -ne 0 ]; then echo "failure, exit code $EXITCODE..."; fi
if [ $EXITCODE -eq 0 ]; then echo "success..."; fi
exit $EXITCODE
EOT
}
Expand Down
28 changes: 20 additions & 8 deletions modules/rancher_bootstrap/rancher/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -150,10 +150,10 @@ resource "helm_release" "rancher" {
chart = "${path.root}/rancher-${local.rancher_version}.tgz" # "${local.rancher_helm_repository}/${local.rancher_channel}/rancher-${local.rancher_version}.tgz"
namespace = "cattle-system"
create_namespace = false
wait = true
wait_for_jobs = true
wait = false
wait_for_jobs = false
force_update = true
timeout = 3600 # 60m
timeout = 1800 # 30m

set {
name = "hostname"
Expand Down Expand Up @@ -205,22 +205,32 @@ resource "helm_release" "rancher" {
}
}

resource "time_sleep" "settle_after_rancher" {
resource "terraform_data" "wait_for_rancher" {
depends_on = [
time_sleep.settle_before_rancher,
kubernetes_manifest.issuer,
terraform_data.wait_for_nginx,
terraform_data.build_chart,
helm_release.rancher,
]
create_duration = "120s"
provisioner "local-exec" {
command = <<-EOT
cd ${abspath(path.root)} || true
chmod +x ${abspath(path.module)}/runningPods.sh
echo "using kubeconfig located at $KUBECONFIG"
${abspath(path.module)}/runningPods.sh
EOT
}
}


resource "terraform_data" "get_public_cert_info" {
depends_on = [
time_sleep.settle_before_rancher,
kubernetes_manifest.issuer,
terraform_data.wait_for_nginx,
terraform_data.build_chart,
helm_release.rancher,
time_sleep.settle_after_rancher,
terraform_data.wait_for_rancher,
]
provisioner "local-exec" {
command = <<-EOT
Expand Down Expand Up @@ -268,8 +278,10 @@ resource "rancher2_bootstrap" "admin" {
depends_on = [
time_sleep.settle_before_rancher,
kubernetes_manifest.issuer,
terraform_data.wait_for_nginx,
terraform_data.build_chart,
helm_release.rancher,
time_sleep.settle_after_rancher,
terraform_data.wait_for_rancher,
terraform_data.get_public_cert_info,
]
password = random_password.password.result
Expand Down
67 changes: 67 additions & 0 deletions modules/rancher_bootstrap/rancher/runningPods.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/bin/bash
set -x

JSONPATH="'{range .items[*]}
{.metadata.name}{\"\\t\"} \
{.metadata.namespace}{\"\\t\"} \
{.status.phase}{\"\\n\"} \
{end}'"

notReady() {
PODS=$(kubectl get pods -A -o jsonpath="$JSONPATH")
# shellcheck disable=SC2060,SC2140
NOT_READY=$(echo "$PODS" | grep -v "Running" | grep -v "Succeeded" | tr -d ["\t","\n"," ","'"] || true)
if [ -n "$NOT_READY" ]; then
# Some pods aren't running
return 0
else
# All pods are running
return 1
fi
}

readyWait() {
TIMEOUT=10 # 10 minutes
TIMEOUT_MINUTES=$((TIMEOUT * 60))
INTERVAL=30 # 30 seconds
MAX=$((TIMEOUT_MINUTES / INTERVAL))
ATTEMPTS=0

while notReady; do
if [ "$ATTEMPTS" -lt "$MAX" ]; then
ATTEMPTS=$((ATTEMPTS + 1))
sleep "$INTERVAL";
else
return 1
fi
done
return 0
}

SUCCESSES=0
SUCCESSES_NEEDED=3 # require three successes to make sure everything is settled

while readyWait && [ "$SUCCESSES" -lt "$SUCCESSES_NEEDED" ]; do
SUCCESSES=$((SUCCESSES + 1))
echo "succeeeded $SUCCESSES times..."
sleep 30
done

if [ "$SUCCESSES" -eq "$SUCCESSES_NEEDED" ]; then
echo "$SUCCESSES_NEEDED successes reached, passed..."
EXITCODE=0
else
echo "$SUCCESSES_NEEDED successes not reached, failed..."
EXITCODE=1
fi

echo "nodes..."
kubectl get nodes || true

echo "all..."
kubectl get all -A || true

echo "pods..."
kubectl get pods -A || true

exit $EXITCODE
21 changes: 14 additions & 7 deletions modules/rancher_bootstrap/rancher_externalTLS/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ resource "helm_release" "rancher" {
chart = "${path.root}/rancher-${local.rancher_version}.tgz" #"${local.rancher_helm_repository}/${local.rancher_channel}/rancher-${local.rancher_version}.tgz"
namespace = "cattle-system"
create_namespace = false
wait = true
wait_for_jobs = true
wait = false
wait_for_jobs = false
force_update = true
timeout = 1800 # 30m

Expand Down Expand Up @@ -106,12 +106,19 @@ resource "helm_release" "rancher" {
}
}

resource "time_sleep" "settle_after_rancher" {
resource "terraform_data" "wait_for_rancher" {
depends_on = [
time_sleep.settle_before_rancher,
helm_release.rancher,
]
create_duration = "120s"
provisioner "local-exec" {
command = <<-EOT
cd ${abspath(path.root)} || true
chmod +x ${abspath(path.module)}/runningPods.sh
echo "using kubeconfig located at $KUBECONFIG"
${abspath(path.module)}/runningPods.sh
EOT
}
}

resource "random_password" "password" {
Expand All @@ -125,7 +132,7 @@ resource "terraform_data" "get_public_cert_info" {
random_password.password,
time_sleep.settle_before_rancher,
helm_release.rancher,
time_sleep.settle_after_rancher,
terraform_data.wait_for_rancher,
]
provisioner "local-exec" {
command = <<-EOT
Expand All @@ -148,7 +155,7 @@ resource "terraform_data" "get_ping" {
random_password.password,
time_sleep.settle_before_rancher,
helm_release.rancher,
time_sleep.settle_after_rancher,
terraform_data.wait_for_rancher,
terraform_data.get_public_cert_info,
]
provisioner "local-exec" {
Expand Down Expand Up @@ -181,7 +188,7 @@ resource "rancher2_bootstrap" "admin" {
random_password.password,
time_sleep.settle_before_rancher,
helm_release.rancher,
time_sleep.settle_after_rancher,
terraform_data.wait_for_rancher,
terraform_data.get_public_cert_info,
terraform_data.get_ping,
]
Expand Down
67 changes: 67 additions & 0 deletions modules/rancher_bootstrap/rancher_externalTLS/runningPods.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/bin/bash
set -x

JSONPATH="'{range .items[*]}
{.metadata.name}{\"\\t\"} \
{.metadata.namespace}{\"\\t\"} \
{.status.phase}{\"\\n\"} \
{end}'"

notReady() {
PODS=$(kubectl get pods -A -o jsonpath="$JSONPATH")
# shellcheck disable=SC2060,SC2140
NOT_READY=$(echo "$PODS" | grep -v "Running" | grep -v "Succeeded" | tr -d ["\t","\n"," ","'"] || true)
if [ -n "$NOT_READY" ]; then
# Some pods aren't running
return 0
else
# All pods are running
return 1
fi
}

readyWait() {
TIMEOUT=10 # 10 minutes
TIMEOUT_MINUTES=$((TIMEOUT * 60))
INTERVAL=30 # 30 seconds
MAX=$((TIMEOUT_MINUTES / INTERVAL))
ATTEMPTS=0

while notReady; do
if [ "$ATTEMPTS" -lt "$MAX" ]; then
ATTEMPTS=$((ATTEMPTS + 1))
sleep "$INTERVAL";
else
return 1
fi
done
return 0
}

SUCCESSES=0
SUCCESSES_NEEDED=3 # require three successes to make sure everything is settled

while readyWait && [ "$SUCCESSES" -lt "$SUCCESSES_NEEDED" ]; do
SUCCESSES=$((SUCCESSES + 1))
echo "succeeeded $SUCCESSES times..."
sleep 30
done

if [ "$SUCCESSES" -eq "$SUCCESSES_NEEDED" ]; then
echo "$SUCCESSES_NEEDED successes reached, passed..."
EXITCODE=0
else
echo "$SUCCESSES_NEEDED successes not reached, failed..."
EXITCODE=1
fi

echo "nodes..."
kubectl get nodes || true

echo "all..."
kubectl get all -A || true

echo "pods..."
kubectl get pods -A || true

exit $EXITCODE
10 changes: 8 additions & 2 deletions test/scripts/runningPods.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,13 @@ while readyWait && [ "$SUCCESSES" -lt "$SUCCESSES_NEEDED" ]; do
sleep 30
done

echo "Pods are ready..."
if [ "$SUCCESSES" -eq "$SUCCESSES_NEEDED" ]; then
echo "$SUCCESSES_NEEDED reached, passed.."
EXITCODE=0
else
echo "$SUCCESSES_NEEDED not reached, failed.."
EXITCODE=1
fi

echo "nodes..."
kubectl get nodes || true
Expand All @@ -58,4 +64,4 @@ kubectl get all -A || true
echo "pods..."
kubectl get pods -A || true

exit 0
exit $EXITCODE
25 changes: 20 additions & 5 deletions test/tests/downstream/downstream_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,20 +108,27 @@ func TestDownstreamBasic(t *testing.T) {

_, err = terraform.InitAndApplyE(t, terraformOptions)
if err != nil {
t.Log("Test failed, tearing down...")
util.GetErrorLogs(t, testDir + "/kubeconfig")
util.Teardown(t, testDir, terraformOptions, keyPair)
os.Remove(exampleDir + ".terraform.lock.hcl")
sshAgent.Stop()
t.Fatalf("Error creating cluster: %s", err)
}
t.Log("Test passed, tearing down...")
util.CheckReady(t, testDir + "/kubeconfig")
util.CheckRunning(t, testDir + "/kubeconfig")
if t.Failed() {
t.Log("Test failed...")
} else {
t.Log("Test passed...")
}
util.Teardown(t, testDir, terraformOptions, keyPair)
os.Remove(exampleDir + ".terraform.lock.hcl")
os.Remove(exampleDir + "/.terraform.lock.hcl")
sshAgent.Stop()
}




func TestDownstreamProd(t *testing.T) {
t.Parallel()
id := util.GetId()
Expand Down Expand Up @@ -214,13 +221,21 @@ func TestDownstreamProd(t *testing.T) {

_, err = terraform.InitAndApplyE(t, terraformOptions)
if err != nil {
t.Log("Test failed, tearing down...")
util.GetErrorLogs(t, testDir + "/kubeconfig")
util.Teardown(t, testDir, terraformOptions, keyPair)
os.Remove(exampleDir + ".terraform.lock.hcl")
sshAgent.Stop()
t.Fatalf("Error creating cluster: %s", err)
}
t.Log("Test passed, tearing down...")
util.CheckReady(t, testDir + "/kubeconfig")
util.CheckRunning(t, testDir + "/kubeconfig")
if t.Failed() {
t.Log("Test failed...")
} else {
t.Log("Test passed...")
}
util.Teardown(t, testDir, terraformOptions, keyPair)
os.Remove(exampleDir + ".terraform.lock.hcl")
os.Remove(exampleDir + "/.terraform.lock.hcl")
sshAgent.Stop()
}
Loading