From 84c0d56b6b8b4c1e4657a360cb0eb3b7b84bd394 Mon Sep 17 00:00:00 2001 From: matttrach Date: Thu, 22 May 2025 16:53:58 -0500 Subject: [PATCH 1/4] fix: name space imports should not change project Signed-off-by: matttrach (cherry picked from commit 73050d60a13b736a4dd1c465fd6b1557765eb442) --- .github/workflows/pull_request.yaml | 157 ++++++++ .gitignore | 6 + examples/deploy_rke2/README.md | 53 +++ examples/deploy_rke2/main.tf | 145 ++++++++ .../downstream/addKeyToAmazonConfig.sh | 58 +++ .../deploy_rke2/modules/downstream/main.tf | 283 ++++++++++++++ .../modules/downstream/variables.tf | 133 +++++++ .../modules/downstream/versions.tf | 17 + examples/{base => deploy_rke2}/outputs.tf | 0 examples/deploy_rke2/variables.tf | 92 +++++ examples/{base => deploy_rke2}/versions.tf | 0 examples/deploy_rke2_multiple_pools/README.md | 56 +++ examples/deploy_rke2_multiple_pools/main.tf | 144 ++++++++ .../downstream/addKeyToAmazonConfig.sh | 58 +++ .../modules/downstream/main.tf | 346 ++++++++++++++++++ .../modules/downstream/variables.tf | 139 +++++++ .../modules/downstream/versions.tf | 17 + .../deploy_rke2_multiple_pools/outputs.tf | 22 ++ .../variables.tf | 12 +- .../versions.tf | 2 +- examples/downstream/README.md | 6 - .../downstream/addKeyToMachineTemplate.sh | 55 --- examples/downstream/main.tf | 225 ------------ examples/one/README.md | 40 ++ examples/{base => one}/main.tf | 12 +- examples/{downstream => one}/outputs.tf | 4 + examples/{base => one}/variables.tf | 2 +- examples/one/versions.tf | 53 +++ examples/prod/README.md | 42 +++ examples/prod/main.tf | 180 +++++++++ examples/prod/outputs.tf | 22 ++ examples/prod/variables.tf | 54 +++ examples/prod/versions.tf | 53 +++ examples/state_import/main.tf | 227 ++++++++++++ examples/state_import/modules/deploy/main.tf | 224 ++++++++++++ .../state_import/modules/deploy/outputs.tf | 14 + .../state_import/modules/deploy/variables.tf | 94 +++++ .../state_import/modules/deploy/versions.tf | 9 + .../downstream/addKeyToAmazonConfig.sh | 58 +++ .../state_import/modules/downstream/main.tf | 303 +++++++++++++++ .../modules/downstream/outputs.tf | 23 ++ .../modules/downstream/variables.tf | 160 ++++++++ .../modules/downstream/versions.tf | 17 + .../modules/import/cloud-config.tftpl | 17 + .../state_import/modules/import/main.tftpl | 74 ++++ .../state_import/modules/import/outputs.tf | 3 + .../state_import/modules/import/variables.tf | 24 ++ .../state_import/modules/import/versions.tf | 9 + examples/state_import/variables.tf | 102 ++++++ examples/state_import/versions.tf | 53 +++ flake.lock | 6 +- scripts/run_tests.sh | 89 +++-- test/downstream/downstream_test.go | 311 ++++++++++++++-- test/go.mod | 2 +- test/{base/base_test.go => one/one_test.go} | 54 ++- test/prod/prod_test.go | 121 ++++++ test/scripts/getLogs.sh | 10 + test/scripts/readyNodes.sh | 15 +- test/scripts/runningPods.sh | 67 ++++ test/util.go | 159 ++++++-- 60 files changed, 4298 insertions(+), 435 deletions(-) create mode 100644 examples/deploy_rke2/README.md create mode 100644 examples/deploy_rke2/main.tf create mode 100755 examples/deploy_rke2/modules/downstream/addKeyToAmazonConfig.sh create mode 100644 examples/deploy_rke2/modules/downstream/main.tf create mode 100644 examples/deploy_rke2/modules/downstream/variables.tf create mode 100644 examples/deploy_rke2/modules/downstream/versions.tf rename examples/{base => deploy_rke2}/outputs.tf (100%) create mode 100644 examples/deploy_rke2/variables.tf rename examples/{base => deploy_rke2}/versions.tf (100%) create mode 100644 examples/deploy_rke2_multiple_pools/README.md create mode 100644 examples/deploy_rke2_multiple_pools/main.tf create mode 100755 examples/deploy_rke2_multiple_pools/modules/downstream/addKeyToAmazonConfig.sh create mode 100644 examples/deploy_rke2_multiple_pools/modules/downstream/main.tf create mode 100644 examples/deploy_rke2_multiple_pools/modules/downstream/variables.tf create mode 100644 examples/deploy_rke2_multiple_pools/modules/downstream/versions.tf create mode 100644 examples/deploy_rke2_multiple_pools/outputs.tf rename examples/{downstream => deploy_rke2_multiple_pools}/variables.tf (100%) rename examples/{downstream => deploy_rke2_multiple_pools}/versions.tf (97%) delete mode 100644 examples/downstream/README.md delete mode 100755 examples/downstream/addKeyToMachineTemplate.sh delete mode 100644 examples/downstream/main.tf create mode 100644 examples/one/README.md rename examples/{base => one}/main.tf (93%) rename examples/{downstream => one}/outputs.tf (79%) rename examples/{base => one}/variables.tf (98%) create mode 100644 examples/one/versions.tf create mode 100644 examples/prod/README.md create mode 100644 examples/prod/main.tf create mode 100644 examples/prod/outputs.tf create mode 100644 examples/prod/variables.tf create mode 100644 examples/prod/versions.tf create mode 100644 examples/state_import/main.tf create mode 100644 examples/state_import/modules/deploy/main.tf create mode 100644 examples/state_import/modules/deploy/outputs.tf create mode 100644 examples/state_import/modules/deploy/variables.tf create mode 100644 examples/state_import/modules/deploy/versions.tf create mode 100755 examples/state_import/modules/downstream/addKeyToAmazonConfig.sh create mode 100644 examples/state_import/modules/downstream/main.tf create mode 100644 examples/state_import/modules/downstream/outputs.tf create mode 100644 examples/state_import/modules/downstream/variables.tf create mode 100644 examples/state_import/modules/downstream/versions.tf create mode 100644 examples/state_import/modules/import/cloud-config.tftpl create mode 100644 examples/state_import/modules/import/main.tftpl create mode 100644 examples/state_import/modules/import/outputs.tf create mode 100644 examples/state_import/modules/import/variables.tf create mode 100644 examples/state_import/modules/import/versions.tf create mode 100644 examples/state_import/variables.tf create mode 100644 examples/state_import/versions.tf rename test/{base/base_test.go => one/one_test.go} (73%) create mode 100644 test/prod/prod_test.go create mode 100755 test/scripts/getLogs.sh create mode 100755 test/scripts/runningPods.sh diff --git a/.github/workflows/pull_request.yaml b/.github/workflows/pull_request.yaml index ca5a9d2dc..8ab77001d 100644 --- a/.github/workflows/pull_request.yaml +++ b/.github/workflows/pull_request.yaml @@ -14,3 +14,160 @@ jobs: VERSION: ${{ github.ref_name }} run: | make build-rancher + + terraform: + name: 'Terraform' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: install-nix + run: | + curl -L https://nixos.org/nix/install | sh + source /home/runner/.nix-profile/etc/profile.d/nix.sh + nix --version + which nix + - name: lint terraform + shell: /home/runner/.nix-profile/bin/nix develop --ignore-environment --extra-experimental-features nix-command --extra-experimental-features flakes --keep HOME --keep SSH_AUTH_SOCK --keep GITHUB_TOKEN --keep AWS_ROLE --keep AWS_REGION --keep AWS_DEFAULT_REGION --keep AWS_ACCESS_KEY_ID --keep AWS_SECRET_ACCESS_KEY --keep AWS_SESSION_TOKEN --keep UPDATECLI_GPGTOKEN --keep UPDATECLI_GITHUB_TOKEN --keep UPDATECLI_GITHUB_ACTOR --keep GPG_SIGNING_KEY --keep NIX_SSL_CERT_FILE --keep NIX_ENV_LOADED --keep TERM --command bash -e {0} + run: | + terraform fmt -check -recursive + tflint --recursive + + actionlint: + name: 'Lint Workflows' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: install-nix + run: | + curl -L https://nixos.org/nix/install | sh + source /home/runner/.nix-profile/etc/profile.d/nix.sh + nix --version + which nix + - name: action lint + shell: /home/runner/.nix-profile/bin/nix develop --ignore-environment --extra-experimental-features nix-command --extra-experimental-features flakes --keep HOME --keep SSH_AUTH_SOCK --keep GITHUB_TOKEN --keep AWS_ROLE --keep AWS_REGION --keep AWS_DEFAULT_REGION --keep AWS_ACCESS_KEY_ID --keep AWS_SECRET_ACCESS_KEY --keep AWS_SESSION_TOKEN --keep UPDATECLI_GPGTOKEN --keep UPDATECLI_GITHUB_TOKEN --keep UPDATECLI_GITHUB_ACTOR --keep GPG_SIGNING_KEY --keep NIX_SSL_CERT_FILE --keep NIX_ENV_LOADED --keep TERM --command bash -e {0} + run: actionlint + + shellcheck: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: install-nix + run: | + curl -L https://nixos.org/nix/install | sh + source /home/runner/.nix-profile/etc/profile.d/nix.sh + nix --version + which nix + - name: shell check + shell: /home/runner/.nix-profile/bin/nix develop --ignore-environment --extra-experimental-features nix-command --extra-experimental-features flakes --keep HOME --keep SSH_AUTH_SOCK --keep GITHUB_TOKEN --keep AWS_ROLE --keep AWS_REGION --keep AWS_DEFAULT_REGION --keep AWS_ACCESS_KEY_ID --keep AWS_SECRET_ACCESS_KEY --keep AWS_SESSION_TOKEN --keep UPDATECLI_GPGTOKEN --keep UPDATECLI_GITHUB_TOKEN --keep UPDATECLI_GITHUB_ACTOR --keep GPG_SIGNING_KEY --keep NIX_SSL_CERT_FILE --keep NIX_ENV_LOADED --keep TERM --command bash -e {0} + run: | + while read -r file; do + echo "checking $file..." + shellcheck -x "$file" + done <<<"$(grep -Rl -e '^#!' | grep -v '.terraform'| grep -v '.git')" + + validate-commit-message: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # fetch all history so that we can validate the commit messages + - name: install-nix + run: | + curl -L https://nixos.org/nix/install | sh + source /home/runner/.nix-profile/etc/profile.d/nix.sh + nix --version + which nix + - name: Check commit message + shell: /home/runner/.nix-profile/bin/nix develop --ignore-environment --extra-experimental-features nix-command --extra-experimental-features flakes --keep HOME --keep SSH_AUTH_SOCK --keep GITHUB_TOKEN --keep AWS_ROLE --keep AWS_REGION --keep AWS_DEFAULT_REGION --keep AWS_ACCESS_KEY_ID --keep AWS_SECRET_ACCESS_KEY --keep AWS_SESSION_TOKEN --keep UPDATECLI_GPGTOKEN --keep UPDATECLI_GITHUB_TOKEN --keep UPDATECLI_GITHUB_ACTOR --keep GPG_SIGNING_KEY --keep NIX_SSL_CERT_FILE --keep NIX_ENV_LOADED --keep TERM --command bash -e {0} + run: | + set -e + # Check commit messages + # This steps enforces https://www.conventionalcommits.org/en/v1.0.0/ + # This format enables automatic generation of changelogs and versioning + filter() { + COMMIT="$1" + ouput="$(echo "$COMMIT" | grep -e '^fix: ' -e '^feature: ' -e '^feat: ' -e 'refactor!: ' -e 'feature!: ' -e 'feat!: ' -e '^chore(main): ')" + echo "$output" + } + prefix_check() { + message="$1" + if [ "" != "$(filter "$message")" ]; then + echo "...Commit message does not start with the required prefix. + Please use one of the following prefixes: "fix:", "feature:", "feat:", "refactor!:", "feature!:", or "feat!:". + This enables release-please to automatically determine the type of release (major, minor, patch) based on the commit message. + $message" + exit 1 + else + echo "...Commit message starts with the required prefix." + fi + } + empty_check() { + message="$1" + if [ "" == "$message" ]; then + echo "...Empty commit message." + exit 1 + else + echo "...Commit message isnt empty." + fi + } + length_check() { + message="$1" + if [ "$(wc -m <<<"$message")" -gt 50 ]; then + echo "...Commit message subject line should be less than 50 characters, found $(wc -m "$message")." + exit 1 + else + echo "...Commit message subject line is less than 50 characters." + fi + } + spell_check() { + message="$1" + WORDS="$(aspell list <<<"$message")" + if [ "" != "$WORDS" ]; then + echo "...Commit message contains spelling errors on: ^$WORDS\$" + echo "...Also try updating the PR title." + exit 1 + else + echo "...Commit message doesnt contain spelling errors." + fi + } + + # Fetch the commit messages + + COMMIT_MESSAGES="$(gh pr view ${{github.event.number}} --json commits | jq -r '.commits[].messageHeadline')" + echo "Commit messages found: " + echo "$COMMIT_MESSAGES" + + while read -r message; do + echo "checking message ^$message\$" + prefix_check "$message" + empty_check "$message" + length_check "$message" + spell_check "$message" + echo "message ^$message\$ passed all checks" + done <<<"$COMMIT_MESSAGES" + + gitleaks: + name: 'Scan for Secrets' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: install-nix + run: | + curl -L https://nixos.org/nix/install | sh + source /home/runner/.nix-profile/etc/profile.d/nix.sh + nix --version + which nix + - name: Check for secrets + shell: /home/runner/.nix-profile/bin/nix develop --ignore-environment --extra-experimental-features nix-command --extra-experimental-features flakes --keep HOME --keep SSH_AUTH_SOCK --keep GITHUB_TOKEN --keep AWS_ROLE --keep AWS_REGION --keep AWS_DEFAULT_REGION --keep AWS_ACCESS_KEY_ID --keep AWS_SECRET_ACCESS_KEY --keep AWS_SESSION_TOKEN --keep UPDATECLI_GPGTOKEN --keep UPDATECLI_GITHUB_TOKEN --keep UPDATECLI_GITHUB_ACTOR --keep GPG_SIGNING_KEY --keep NIX_SSL_CERT_FILE --keep NIX_ENV_LOADED --keep TERM --command bash -e {0} + run: | + gitleaks detect --no-banner -v --no-git + gitleaks detect --no-banner -v + continue-on-error: true diff --git a/.gitignore b/.gitignore index ba467e36a..8bfc3a422 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,9 @@ test/data .terraform.lock.hcl */.terraform.lock.hcl .terraform +*/*.test +examples/**/rke2 +terraform.tfstate +*.tfstate.* +tf-* +*.env diff --git a/examples/deploy_rke2/README.md b/examples/deploy_rke2/README.md new file mode 100644 index 000000000..8e4d475e7 --- /dev/null +++ b/examples/deploy_rke2/README.md @@ -0,0 +1,53 @@ +# Deploy RKE2 + +This example module configures Rancher to deploy a downstream RKE2 cluster. + +## Deploy Rancher + +This module starts by using the rancher-aws module to deploy Rancher on AWS. + +## Downstream + +This module has a local module that provides a logical separation for deploying a downstream cluster using the rancher2_cluster_v2 resource. + +## Machine Config Patch + +There is a local exec that runs kubectl to patch the Amazonec2Config objects. +The AWS access key id and secret access key attributes are obfuscated and unable to be set directly in favor of the Amazonec2Credential object. +The Amazonec2Credential object doesn't support a session token making it impossible to use temporary credentials. +Our CI must use temporary AWS credentials supplied via OIDC, so this may be something that you eliminate from the example in your implementation. +We bypass the Amazonec2Credential object by manually patching the Amazonec2Config objects with the aws_access_key_id and aws_secret_access_key. +The AWS session token isn't obfuscated and is available as an argument when creating a rancher2_machine_config_v2 resource. + +## Dependencies + +The Flake.nix in the root of the module explains all of the dependencies for the development of the module, it also includes the dependencies to run it. +You can see the list on lines 50-80, but a more specific list is below (with explanations). +- bash -> born again shell with linux core utils +- git -> required by Terraform +- curl -> required by Terraform as well as dependent modules (when downloading RKE2 for install) +- openssh -> required by Terraform and used in dependent modules to connect to servers for initial configuration +- ssh-agent -> used for connecting to remote server for initial configuration, you need to have the key you send into the module loaded in your agent +- gh -> the github cli tool, used to find releases when downloading RKE2 for install +- jq -> json parsing tool, used in dependent modules to parse submodule outputs +- openssl -> required by Terraform and used in dependent modules to verify TLS certificates +- kubectl -> used in local exec to patch kubernetes objects +- awscli2 -> the aws cli tool, used in some dependent modules in some use cases (dualstack) +- tfswitch -> handy for installing Terraform +- yq -> yaml parsing tool, used in dependent modules to parse kubectl outputs +- go -> necessary to run tests + +## Environment Variables + +I like to set my AWS credentials in environment variables: +- AWS_ROLE -> role to assume when using OIDC +- AWS_REGION -> AWS region to deploy to, make sure there are multiple availability zones when needing HA +- AWS_DEFAULT_REGION -> same as region +- AWS_ACCESS_KEY_ID -> access key, this will make it into the state, please secure it properly +- AWS_SECRET_ACCESS_KEY -> secret key, this will make it into the state, please secure it properly +- AWS_SESSION_TOKEN -> used with temporary AWS credentials, this will make it into the state, please secure it properly +- TF_VAR_aws_access_key_id -> access key, this will make it into the state, please secure it properly +- TF_VAR_aws_secret_access_key -> secret key, this will make it into the state, please secure it properly +- TF_VAR_aws_session_token -> used with temporary AWS credentials, this will make it into the state, please secure it properly +- TF_VAR_aws_region -> AWS region to deploy to, make sure there are multiple availability zones when needing HA + diff --git a/examples/deploy_rke2/main.tf b/examples/deploy_rke2/main.tf new file mode 100644 index 000000000..ffdc09c81 --- /dev/null +++ b/examples/deploy_rke2/main.tf @@ -0,0 +1,145 @@ +provider "aws" { + default_tags { + tags = { + Id = local.identifier + Owner = local.owner + } + } + region = local.aws_region +} + +provider "acme" { + server_url = "${local.acme_server_url}/directory" +} + +provider "github" {} +provider "kubernetes" {} # make sure you set the env variable KUBE_CONFIG_PATH to local_file_path (file_path variable) +provider "helm" {} # make sure you set the env variable KUBE_CONFIG_PATH to local_file_path (file_path variable) + +provider "rancher2" { + api_url = "https://${local.domain}.${local.zone}" + token_key = module.rancher.admin_token + timeout = "300s" +} + +locals { + identifier = var.identifier + example = "downstream" + project_name = "tf-${substr(md5(join("-", [local.example, local.identifier])), 0, 5)}" + username = local.project_name + domain = local.project_name + zone = var.zone + key_name = var.key_name + key = var.key + owner = var.owner + rke2_version = var.rke2_version + local_file_path = var.file_path + runner_ip = chomp(data.http.myip.response_body) # "runner" is the server running Terraform + rancher_version = var.rancher_version + cert_manager_version = "1.16.3" #"1.13.1" + os = "sle-micro-61" + aws_access_key_id = var.aws_access_key_id + aws_secret_access_key = var.aws_secret_access_key + aws_region = var.aws_region + aws_session_token = var.aws_session_token + aws_instance_type = "m5.large" + node_count = 3 + email = (var.email != "" ? var.email : "${local.identifier}@${local.zone}") + acme_server_url = "https://acme-v02.api.letsencrypt.org" +} + +data "http" "myip" { + url = "https://ipinfo.io/ip" +} + +module "rancher" { + source = "rancher/aws/rancher2" + version = "1.2.2" + # project + identifier = local.identifier + owner = local.owner + project_name = local.project_name + domain = local.domain + zone = local.zone + skip_project_cert_generation = true + # access + key_name = local.key_name + key = local.key + username = local.username + admin_ip = local.runner_ip + # rke2 + rke2_version = local.rke2_version + local_file_path = local.local_file_path + install_method = "rpm" # rpm only for now, need to figure out local helm chart installs otherwise + cni = "canal" + node_configuration = { + "rancher" = { + type = "all-in-one" + size = "large" + os = local.os + indirect_access = true + initial = true + } + } + # rancher + rancher_version = local.rancher_version + cert_manager_version = local.cert_manager_version + configure_cert_manager = true + cert_manager_configuration = { + aws_access_key_id = local.aws_access_key_id + aws_secret_access_key = local.aws_secret_access_key + aws_session_token = local.aws_session_token + aws_region = local.aws_region + acme_email = local.email + acme_server_url = local.acme_server_url + } +} + +module "rke2_image" { + source = "rancher/server/aws" + version = "v1.4.0" + server_use_strategy = "skip" + image_use_strategy = "find" + image_type = local.os # this is not required to match Rancher, it just seemed easier in this example +} + +# you can add this one multiple times, or use a loop to deploy multiple clusters +module "downstream" { + depends_on = [ + module.rancher, + module.rke2_image, + ] + source = "./modules/downstream" + # general + name = "${local.project_name}-aio" + identifier = local.identifier + owner = local.owner + # aws access + aws_access_key_id = local.aws_access_key_id + aws_secret_access_key = local.aws_secret_access_key + aws_session_token = trimspace(chomp(local.aws_session_token)) + aws_region = local.aws_region + aws_region_letter = replace( + module.rancher.subnets[keys(module.rancher.subnets)[0]].availability_zone, + local.aws_region, + "" + ) + # aws project info + vpc_id = module.rancher.vpc.id + security_group_id = module.rancher.security_group.id + load_balancer_security_groups = module.rancher.load_balancer_security_groups + subnet_id = module.rancher.subnets[keys(module.rancher.subnets)[0]].id + # node info + aws_instance_type = local.aws_instance_type + ami_id = module.rke2_image.image.id + ami_ssh_user = module.rke2_image.image.user + ami_admin_group = module.rke2_image.image.admin_group + node_count = local.node_count + direct_node_access = { + runner_ip = local.runner_ip + ssh_access_key = local.key + ssh_access_user = local.project_name + } + # rke2 info + rke2_version = local.rke2_version +} diff --git a/examples/deploy_rke2/modules/downstream/addKeyToAmazonConfig.sh b/examples/deploy_rke2/modules/downstream/addKeyToAmazonConfig.sh new file mode 100755 index 000000000..37245ab37 --- /dev/null +++ b/examples/deploy_rke2/modules/downstream/addKeyToAmazonConfig.sh @@ -0,0 +1,58 @@ +#!/bin/sh + +ACCESS_KEY="$1" +SECRET_KEY="$2" + +if [ -z "$ACCESS_KEY" ]; then echo "need access key to proceed..."; exit 1; fi +if [ -z "$SECRET_KEY" ]; then echo "need secret key to proceed..."; exit 1; fi + +NAMESPACE="fleet-default" +NAMES=$(kubectl get Amazonec2Config -n "$NAMESPACE" -o jsonpath='{.items[*].metadata.name}') + +if [ -z "$NAMES" ]; then echo "names not found..."; exit 1; fi + +cat < patch.yaml +accessKey: '$ACCESS_KEY' +secretKey: '$SECRET_KEY' +EOT + +for name in $NAMES; do + + kubectl patch Amazonec2Config "$name" -n "$NAMESPACE" --type merge --patch-file patch.yaml + RESULT=$? + if [ $RESULT -ne 0 ]; then + echo "error: $RESULT" + exit $RESULT + fi + + KEY="$(kubectl get Amazonec2Config "$name" -n "$NAMESPACE" -o json | jq -r '.accessKey')" + + if [ -z "$KEY" ] || [ "null" = "$KEY" ]; then + echo "error: key not found on object" + exit 1 + else + if [ "$KEY" != "$ACCESS_KEY" ]; then + echo "error: key not replaced properly" + exit 1 + fi + echo "key replaced properly" + fi + + SECRET="$(kubectl get Amazonec2Config "$name" -n "$NAMESPACE" -o json | jq -r '.secretKey')" + + if [ -z "$SECRET" ] || [ "null" = "$SECRET" ]; then + echo "error: secret not found on object" + exit 1 + else + if [ "$SECRET" != "$SECRET_KEY" ]; then + echo "error: secret not replaced properly" + exit 1 + fi + echo "secret replaced properly" + fi + + echo "Amazonec2Config $name in namespace $NAMESPACE updated." + +done + +rm -f patch.yaml diff --git a/examples/deploy_rke2/modules/downstream/main.tf b/examples/deploy_rke2/modules/downstream/main.tf new file mode 100644 index 000000000..e6f929ee3 --- /dev/null +++ b/examples/deploy_rke2/modules/downstream/main.tf @@ -0,0 +1,283 @@ + +locals { + # general + identifier = var.identifier + owner = var.owner + cluster_name = var.name + # aws access + aws_access_key_id = var.aws_access_key_id + aws_secret_access_key = var.aws_secret_access_key + aws_session_token = var.aws_session_token + aws_region = var.aws_region + aws_region_letter = var.aws_region_letter + # networking info + vpc_id = var.vpc_id + subnet_id = var.subnet_id + security_group_id = var.security_group_id + lbsg = sort(var.load_balancer_security_groups) + load_balancer_security_group_id = [ + for i in range(length(local.lbsg)) : + local.lbsg[i] if local.lbsg[i] != local.security_group_id + # load balancers only have 2 security groups, the project and its own + # this eliminates the project security group to just return the load balancer's security group + ][0] + downstream_security_group_name = "${local.cluster_name}-sgroup" + # node info + aws_instance_type = var.aws_instance_type + ami_id = var.ami_id + ami_ssh_user = var.ami_ssh_user + node_count = var.node_count + node_ips = { for i in range(local.node_count) : tostring(i) => data.aws_instances.rke2_instance_nodes.public_ips[i] } + node_id = "${local.cluster_name}-nodes" + ami_admin_group = (var.ami_admin_group != "" ? var.ami_admin_group : "tty") + runner_ip = (var.direct_node_access != null ? var.direct_node_access.runner_ip : "10.1.1.1") # the IP running Terraform + ssh_access_key = (var.direct_node_access != null ? var.direct_node_access.ssh_access_key : "fake123abc") + ssh_access_user = (var.direct_node_access != null ? var.direct_node_access.ssh_access_user : "fake") + # rke2 info + rke2_version = var.rke2_version +} + +resource "aws_security_group" "downstream_cluster" { + description = "Access to downstream cluster" + name = local.downstream_security_group_name + vpc_id = local.vpc_id + tags = { + Name = local.downstream_security_group_name + } + lifecycle { + ignore_changes = [ + ingress, + egress, + ] + } +} +# this allows servers attached to the project security group to accept connections initiated by the downstream cluster +resource "aws_vpc_security_group_ingress_rule" "downstream_ingress_rancher" { + depends_on = [ + aws_security_group.downstream_cluster, + ] + referenced_security_group_id = aws_security_group.downstream_cluster.id + security_group_id = local.security_group_id + ip_protocol = "-1" +} +# this allows the load balancer to accept connections initiated by the downstream cluster +resource "aws_vpc_security_group_ingress_rule" "downstream_ingress_loadbalancer" { + depends_on = [ + aws_security_group.downstream_cluster, + ] + referenced_security_group_id = aws_security_group.downstream_cluster.id + security_group_id = local.load_balancer_security_group_id + ip_protocol = "-1" +} + +# this allows the downstream cluster to reach out to any public ipv4 address +resource "aws_vpc_security_group_egress_rule" "downstream_egress_ipv4" { + depends_on = [ + aws_security_group.downstream_cluster, + ] + ip_protocol = "-1" + cidr_ipv4 = "0.0.0.0/0" + security_group_id = aws_security_group.downstream_cluster.id +} +# this allows the downstream cluster to reach out to any public ipv6 address +resource "aws_vpc_security_group_egress_rule" "downstream_egress_ipv6" { + depends_on = [ + aws_security_group.downstream_cluster, + ] + ip_protocol = "-1" + cidr_ipv6 = "::/0" + security_group_id = aws_security_group.downstream_cluster.id +} +# this allows the downstream cluster to reach out to any server attached to the project security group +resource "aws_vpc_security_group_egress_rule" "downstream_egress_project_link" { + depends_on = [ + aws_security_group.downstream_cluster, + ] + referenced_security_group_id = local.security_group_id + security_group_id = aws_security_group.downstream_cluster.id + ip_protocol = "-1" +} +# this allows nodes to talk to each other +resource "aws_vpc_security_group_ingress_rule" "downstream_ingress_internal_ipv4" { + depends_on = [ + aws_security_group.downstream_cluster, + ] + ip_protocol = "-1" + cidr_ipv4 = "10.0.0.0/16" + security_group_id = aws_security_group.downstream_cluster.id +} +resource "rancher2_machine_config_v2" "all_in_one" { + depends_on = [ + aws_security_group.downstream_cluster, + aws_vpc_security_group_ingress_rule.downstream_ingress_rancher, + aws_vpc_security_group_egress_rule.downstream_egress_ipv4, + aws_vpc_security_group_egress_rule.downstream_egress_ipv6, + aws_vpc_security_group_egress_rule.downstream_egress_project_link, + ] + generate_name = local.cluster_name + amazonec2_config { + ami = local.ami_id + region = local.aws_region + security_group = [local.downstream_security_group_name] + subnet_id = local.subnet_id + vpc_id = local.vpc_id + zone = local.aws_region_letter + session_token = local.aws_session_token + instance_type = local.aws_instance_type + ssh_user = local.ami_ssh_user + tags = join(",", ["Id", local.identifier, "Owner", local.owner, "NodeId", local.node_id]) + userdata = <<-EOT + #cloud-config + + merge_how: + - name: list + settings: [replace] + - name: dict + settings: [replace] + + users: + - name: ${local.ssh_access_user} + gecos: ${local.ssh_access_user} + sudo: ALL=(ALL) NOPASSWD:ALL + groups: users, ${local.ami_admin_group} + lock_passwd: true + ssh_authorized_keys: + - ${local.ssh_access_key} + homedir: /home/${local.ssh_access_user} + EOT + } +} +resource "terraform_data" "patch_machine_configs" { + depends_on = [ + aws_security_group.downstream_cluster, + aws_vpc_security_group_ingress_rule.downstream_ingress_rancher, + aws_vpc_security_group_egress_rule.downstream_egress_ipv4, + aws_vpc_security_group_egress_rule.downstream_egress_ipv6, + aws_vpc_security_group_egress_rule.downstream_egress_project_link, + rancher2_machine_config_v2.all_in_one, + ] + triggers_replace = { + core_config = rancher2_machine_config_v2.all_in_one.id + } + provisioner "local-exec" { + command = <<-EOT + ${path.module}/addKeyToAmazonConfig.sh "${local.aws_access_key_id}" "${local.aws_secret_access_key}" + EOT + } +} + +resource "rancher2_cluster_v2" "rke2_cluster" { + depends_on = [ + aws_security_group.downstream_cluster, + aws_vpc_security_group_ingress_rule.downstream_ingress_rancher, + aws_vpc_security_group_egress_rule.downstream_egress_ipv4, + aws_vpc_security_group_egress_rule.downstream_egress_ipv6, + aws_vpc_security_group_egress_rule.downstream_egress_project_link, + rancher2_machine_config_v2.all_in_one, + terraform_data.patch_machine_configs, + ] + name = local.cluster_name + kubernetes_version = local.rke2_version + enable_network_policy = true + rke_config { + machine_pools { + name = local.cluster_name + control_plane_role = true + etcd_role = true + worker_role = true + quantity = local.node_count + machine_config { + kind = rancher2_machine_config_v2.all_in_one.kind + name = rancher2_machine_config_v2.all_in_one.name + } + } + } + timeouts { + create = "120m" + } +} + +resource "time_sleep" "wait_for_nodes" { + depends_on = [ + aws_security_group.downstream_cluster, + aws_vpc_security_group_ingress_rule.downstream_ingress_rancher, + aws_vpc_security_group_egress_rule.downstream_egress_ipv4, + aws_vpc_security_group_egress_rule.downstream_egress_ipv6, + aws_vpc_security_group_egress_rule.downstream_egress_project_link, + rancher2_machine_config_v2.all_in_one, + terraform_data.patch_machine_configs, + ] + create_duration = "120s" +} + +data "aws_instances" "rke2_instance_nodes" { + depends_on = [ + aws_security_group.downstream_cluster, + aws_vpc_security_group_ingress_rule.downstream_ingress_rancher, + aws_vpc_security_group_egress_rule.downstream_egress_ipv4, + aws_vpc_security_group_egress_rule.downstream_egress_ipv6, + aws_vpc_security_group_egress_rule.downstream_egress_project_link, + rancher2_machine_config_v2.all_in_one, + terraform_data.patch_machine_configs, + time_sleep.wait_for_nodes, + ] + filter { + name = "tag:NodeId" + values = [local.node_id] + } +} + +# this allows the load balancer to accept connections initiated by the downstream cluster's public ip addresses +# this weird in-flight grab of the nodes and manipulating the security groups is not good, +# but the only way to allow ingress when the downstream cluster has public IPs +# FYI: security group references only work with private IPs +resource "aws_vpc_security_group_ingress_rule" "downstream_public_ingress_loadbalancer" { + depends_on = [ + aws_security_group.downstream_cluster, + aws_vpc_security_group_ingress_rule.downstream_ingress_rancher, + aws_vpc_security_group_egress_rule.downstream_egress_ipv4, + aws_vpc_security_group_egress_rule.downstream_egress_ipv6, + aws_vpc_security_group_egress_rule.downstream_egress_project_link, + rancher2_machine_config_v2.all_in_one, + terraform_data.patch_machine_configs, + time_sleep.wait_for_nodes, + data.aws_instances.rke2_instance_nodes, + ] + for_each = local.node_ips + security_group_id = local.load_balancer_security_group_id + ip_protocol = "-1" + cidr_ipv4 = "${each.value}/32" +} + +resource "aws_vpc_security_group_ingress_rule" "downstream_public_ingress_runner" { + depends_on = [ + aws_security_group.downstream_cluster, + aws_vpc_security_group_ingress_rule.downstream_ingress_rancher, + aws_vpc_security_group_egress_rule.downstream_egress_ipv4, + aws_vpc_security_group_egress_rule.downstream_egress_ipv6, + aws_vpc_security_group_egress_rule.downstream_egress_project_link, + rancher2_machine_config_v2.all_in_one, + terraform_data.patch_machine_configs, + time_sleep.wait_for_nodes, + data.aws_instances.rke2_instance_nodes, + ] + security_group_id = aws_security_group.downstream_cluster.id + ip_protocol = "tcp" + from_port = 22 + to_port = 22 + cidr_ipv4 = "${local.runner_ip}/32" +} + +resource "rancher2_cluster_sync" "sync" { + depends_on = [ + aws_security_group.downstream_cluster, + aws_vpc_security_group_ingress_rule.downstream_ingress_rancher, + aws_vpc_security_group_egress_rule.downstream_egress_ipv4, + aws_vpc_security_group_egress_rule.downstream_egress_ipv6, + aws_vpc_security_group_egress_rule.downstream_egress_project_link, + rancher2_machine_config_v2.all_in_one, + terraform_data.patch_machine_configs, + rancher2_cluster_v2.rke2_cluster, + ] + cluster_id = rancher2_cluster_v2.rke2_cluster.cluster_v1_id +} diff --git a/examples/deploy_rke2/modules/downstream/variables.tf b/examples/deploy_rke2/modules/downstream/variables.tf new file mode 100644 index 000000000..2a302cffd --- /dev/null +++ b/examples/deploy_rke2/modules/downstream/variables.tf @@ -0,0 +1,133 @@ + +variable "name" { + type = string + description = <<-EOT + The name to give the new downstream cluster. + EOT +} +variable "identifier" { + type = string + description = <<-EOT + A unique string that identifies the project as a whole. + EOT +} +variable "owner" { + type = string + description = <<-EOT + A string to identify the "owner" of the resource, this is added as a tag to all resources. + This is important both for finding resources in the future and removing them in case of problems. + EOT +} +variable "aws_access_key_id" { + type = string + description = <<-EOT + The AWS access key to use to deploy the machines. + We suggest using temporary credentials. + EOT +} +variable "aws_secret_access_key" { + type = string + description = <<-EOT + The AWS secret key to use to deploy the machines. + We suggest using temporary credentials. + EOT +} +variable "aws_session_token" { + type = string + description = <<-EOT + The AWS session token to use to deploy the machines. + While this is not required we suggest using it. + EOT + default = "" +} +variable "aws_region" { + type = string + description = <<-EOT + The AWS region name to deploy to. + EOT +} +variable "aws_region_letter" { + type = string + description = <<-EOT + The letter of the availability zone from the full region name. + Eg. in AZ "us-west-2a", this would be "a", in "us-west-2b" this would be "b". + EOT +} +variable "vpc_id" { + type = string + description = <<-EOT + The id of the VPC where the cluster will be deployed. + Currently this expects the same VPC as the Rancher cluster. + EOT +} +variable "security_group_id" { + type = string + description = <<-EOT + The id of the security group to add the downstream cluster's security group to. + EOT +} + +variable "load_balancer_security_groups" { + type = list(any) + description = <<-EOT + The security groups for the load balancer. + EOT +} +variable "subnet_id" { + type = string + description = <<-EOT + The id of the subnet to deploy the cluster to. + Currently we only support deploying to the same subnet as the Rancher cluster. + EOT +} +variable "aws_instance_type" { + type = string + description = <<-EOT + The AWS instance type to deploy. + EOT +} +variable "ami_id" { + type = string + description = <<-EOT + The AWS id for the OS image to deploy the cluster on. + EOT +} +variable "ami_ssh_user" { + type = string + description = <<-EOT + The ssh user that is default for the image, the deployment will replace this user. + EOT +} +variable "ami_admin_group" { + type = string + description = <<-EOT + The group name for the root/admin user. + Usually "wheel" or "admin". + EOT +} +variable "node_count" { + type = number + description = <<-EOT + The number of all in one nodes to deploy. + EOT +} +variable "direct_node_access" { + type = object({ + runner_ip = string + ssh_access_key = string + ssh_access_user = string + }) + description = <<-EOT + This object has the information to give the computer running Terraform (runner) the ability to directly access the nodes provisioned by Rancher. + When left in the default state no access will be given. + This requires the downstream cluster to have public IP addresses. + EOT + default = null +} +variable "rke2_version" { + type = string + description = <<-EOT + The RKE2 version to deploy. + This module only supports RKE2. + EOT +} diff --git a/examples/deploy_rke2/modules/downstream/versions.tf b/examples/deploy_rke2/modules/downstream/versions.tf new file mode 100644 index 000000000..2539bf8db --- /dev/null +++ b/examples/deploy_rke2/modules/downstream/versions.tf @@ -0,0 +1,17 @@ +terraform { + required_version = ">= 1.5.0" + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 5.11" + } + rancher2 = { + source = "rancher/rancher2" + version = ">= 5.0.0" + } + time = { + source = "hashicorp/time" + version = ">= 0.12.0" + } + } +} diff --git a/examples/base/outputs.tf b/examples/deploy_rke2/outputs.tf similarity index 100% rename from examples/base/outputs.tf rename to examples/deploy_rke2/outputs.tf diff --git a/examples/deploy_rke2/variables.tf b/examples/deploy_rke2/variables.tf new file mode 100644 index 000000000..fb979975b --- /dev/null +++ b/examples/deploy_rke2/variables.tf @@ -0,0 +1,92 @@ +variable "key_name" { + type = string + description = <<-EOT + The name of an AWS key pair to use for SSH access to the instance. + This key should already be added to your ssh agent for server authentication. + EOT +} +variable "key" { + type = string + description = <<-EOT + The contents of an AWS key pair's public key to use for SSH access to the instance. + EOT +} +variable "identifier" { + type = string + description = <<-EOT + A unique identifier for the project, this helps when generating names for infrastructure items." + EOT +} +variable "owner" { + type = string + description = <<-EOT + The owner of the project, this helps when generating names for infrastructure items." + EOT +} +variable "zone" { + type = string + description = <<-EOT + The Route53 DNS zone to deploy the cluster into. + This is used to generate the DNS name for the cluster. + The zone must already exist. + EOT +} +variable "rke2_version" { + type = string + description = <<-EOT + The version of rke2 to install on the nodes. + eg. v1.30.2+rke2r1 + EOT +} +variable "rancher_version" { + type = string + description = <<-EOT + The version of rancher to install on the rke2 cluster. + EOT + default = "2.9.2" +} +variable "file_path" { + type = string + description = <<-EOT + The path to the file containing the rke2 install script. + EOT + default = "./rke2" +} +variable "aws_access_key_id" { + type = string + description = <<-EOT + AWS access key ID. + EOT + sensitive = true +} +variable "aws_secret_access_key" { + type = string + description = <<-EOT + AWS secret key for EC2 services. + EOT + sensitive = true +} +variable "aws_session_token" { + type = string + description = <<-EOT + AWS session token for EC2 services. + If left empty the AWS provider will assume you are using permanent AWS credentials. + EOT + sensitive = true + default = "" +} +variable "aws_region" { + type = string + description = <<-EOT + AWS region EC2 services. + EOT + sensitive = true +} +variable "email" { + type = string + description = <<-EOT + Email used for TLS certification registration. + If left blank this will be @. + EOT + default = "" +} diff --git a/examples/base/versions.tf b/examples/deploy_rke2/versions.tf similarity index 100% rename from examples/base/versions.tf rename to examples/deploy_rke2/versions.tf diff --git a/examples/deploy_rke2_multiple_pools/README.md b/examples/deploy_rke2_multiple_pools/README.md new file mode 100644 index 000000000..94aeb3f64 --- /dev/null +++ b/examples/deploy_rke2_multiple_pools/README.md @@ -0,0 +1,56 @@ +# Deploy RKE2 with Multiple Machine Pools + +This example module configures Rancher to deploy a downstream RKE2 cluster with two different roles: worker and control plane. + +## Deploy Rancher + +This module starts by using the rancher-aws module to deploy Rancher on AWS. +The Rancher cluster is a single node. + +## Downstream + +This module has a local module that provides a logical separation for deploying a downstream cluster using the rancher2_cluster_v2 resource. + +## Machine Config Patch + +There is a local exec that runs kubectl to patch the Amazonec2Config objects. +The AWS access key id and secret access key attributes are unable to be set directly with the provider in favor of the Amazonec2Credential object. +The Amazonec2Credential object doesn't support a session tokens, making it impossible to use temporary credentials. +Our CI must use temporary AWS credentials supplied via OIDC, so this may be something that you eliminate from the example in your implementation. +We bypass the Amazonec2Credential object by manually patching the Amazonec2Config objects with the aws_access_key_id and aws_secret_access_key. +The AWS session token isn't obfuscated and is available as an argument when creating a rancher2_machine_config_v2 resource. + +## Dependencies + +The `flake.nix` file in the root of the module explains all of the dependencies for the development of the module, it also includes the dependencies to run it. +You can see the list on lines 50-80, but a more specific list is below (with explanations). + +- bash -> born again shell with linux core utils facilitates CLI actions +- tfswitch -> handy for installing Terraform at specific verisons +- git -> required by Terraform +- curl -> required by Terraform as well as dependent modules (when downloading RKE2 for install) +- openssh -> required by Terraform and used in dependent modules to connect to servers for initial configuration +- openssl -> required by Terraform and used in dependent modules to verify TLS certificates +- ssh-agent -> used for connecting to remote server for initial configuration, you need to have the key you send into the module loaded in your agent +- gh -> the github cli tool, used to find releases when downloading RKE2 for install +- jq -> json parsing tool, used in dependent modules to parse submodule outputs +- kubectl -> used in local exec to patch kubernetes objects +- awscli2 -> the aws cli tool, used in some dependent modules in some use cases (dualstack) +- yq -> yaml parsing tool, used in dependent modules to parse kubectl outputs +- go -> necessary to run tests + +## Environment Variables + +I like to set my AWS credentials in environment variables: + +- AWS_ROLE -> role to assume when using OIDC +- AWS_REGION -> AWS region to deploy to, make sure there are multiple availability zones when needing HA +- AWS_DEFAULT_REGION -> same as region +- AWS_ACCESS_KEY_ID -> access key, this will make it into the state, please secure it properly +- AWS_SECRET_ACCESS_KEY -> secret key, this will make it into the state, please secure it properly +- AWS_SESSION_TOKEN -> used with temporary AWS credentials, this will make it into the state, please secure it properly +- TF_VAR_aws_access_key_id -> access key, this will make it into the state, please secure it properly +- TF_VAR_aws_secret_access_key -> secret key, this will make it into the state, please secure it properly +- TF_VAR_aws_session_token -> used with temporary AWS credentials, this will make it into the state, please secure it properly +- TF_VAR_aws_region -> AWS region to deploy to, make sure there are multiple availability zones when needing HA + diff --git a/examples/deploy_rke2_multiple_pools/main.tf b/examples/deploy_rke2_multiple_pools/main.tf new file mode 100644 index 000000000..55d9f453a --- /dev/null +++ b/examples/deploy_rke2_multiple_pools/main.tf @@ -0,0 +1,144 @@ +provider "aws" { + default_tags { + tags = { + Id = local.identifier + Owner = local.owner + } + } + region = local.aws_region +} + +provider "acme" { + server_url = "${local.acme_server_url}/directory" +} + +provider "github" {} +provider "kubernetes" {} # make sure you set the env variable KUBE_CONFIG_PATH to local_file_path (file_path variable) +provider "helm" {} # make sure you set the env variable KUBE_CONFIG_PATH to local_file_path (file_path variable) + +provider "rancher2" { + api_url = "https://${local.domain}.${local.zone}" + token_key = module.rancher.admin_token + timeout = "3000s" +} + +locals { + identifier = var.identifier + example = "basic" + project_name = "tf-${substr(md5(join("-", [local.example, local.identifier])), 0, 5)}" + username = local.project_name + domain = local.project_name + zone = var.zone + key_name = var.key_name + key = var.key + owner = var.owner + rke2_version = var.rke2_version + local_file_path = var.file_path + runner_ip = chomp(data.http.myip.response_body) # "runner" is the server running Terraform + rancher_version = var.rancher_version + cert_manager_version = "1.16.3" #"1.13.1" + os = "sle-micro-61" + acme_server_url = "https://acme-v02.api.letsencrypt.org" + aws_access_key_id = var.aws_access_key_id + aws_secret_access_key = var.aws_secret_access_key + aws_region = var.aws_region + aws_session_token = var.aws_session_token + email = (var.email != "" ? var.email : "${local.identifier}@${local.zone}") +} + +data "http" "myip" { + url = "https://ipinfo.io/ip" +} + +module "rancher" { + source = "rancher/aws/rancher2" + version = "1.2.2" + # project + identifier = local.identifier + owner = local.owner + project_name = local.project_name + domain = local.domain + zone = local.zone + skip_project_cert_generation = true + # access + key_name = local.key_name + key = local.key + username = local.username + admin_ip = local.runner_ip + # rke2 + rke2_version = local.rke2_version + local_file_path = local.local_file_path + install_method = "rpm" # rpm only for now, need to figure out local helm chart installs otherwise + cni = "canal" + node_configuration = { + "rancher" = { + type = "all-in-one" + size = "large" + os = local.os + indirect_access = true + initial = true + } + } + # rancher + rancher_version = local.rancher_version + cert_manager_version = local.cert_manager_version + configure_cert_manager = true + cert_manager_configuration = { + aws_access_key_id = local.aws_access_key_id + aws_secret_access_key = local.aws_secret_access_key + aws_region = local.aws_region + aws_session_token = local.aws_session_token + acme_email = local.email + acme_server_url = local.acme_server_url + } +} + +module "rke2_image" { + source = "rancher/server/aws" + version = "v1.4.0" + server_use_strategy = "skip" + image_use_strategy = "find" + image_type = local.os +} + +# you can add this one multiple times, or use a loop to deploy multiple clusters +module "downstream" { + depends_on = [ + module.rancher, + module.rke2_image, + ] + source = "./modules/downstream" + # general + name = "${local.project_name}-mp" # multi-pool + identifier = local.identifier + owner = local.owner + # aws access + aws_access_key_id = local.aws_access_key_id + aws_secret_access_key = local.aws_secret_access_key + aws_session_token = trimspace(chomp(local.aws_session_token)) + aws_region = local.aws_region + aws_region_letter = replace( + module.rancher.subnets[keys(module.rancher.subnets)[0]].availability_zone, + local.aws_region, + "" + ) + # aws project info + vpc_id = module.rancher.vpc.id + security_group_id = module.rancher.security_group.id + load_balancer_security_groups = module.rancher.load_balancer_security_groups + subnet_id = module.rancher.subnets[keys(module.rancher.subnets)[0]].id + # node info + aws_instance_type = "m5.large" + ami_id = module.rke2_image.image.id + ami_ssh_user = module.rke2_image.image.user + ami_admin_group = module.rke2_image.image.admin_group + worker_node_count = 2 + control_plane_node_count = 2 + direct_node_access = { + runner_ip = local.runner_ip + ssh_access_key = local.key + ssh_access_user = local.project_name + } + # rke2 info + rke2_version = local.rke2_version +} diff --git a/examples/deploy_rke2_multiple_pools/modules/downstream/addKeyToAmazonConfig.sh b/examples/deploy_rke2_multiple_pools/modules/downstream/addKeyToAmazonConfig.sh new file mode 100755 index 000000000..37245ab37 --- /dev/null +++ b/examples/deploy_rke2_multiple_pools/modules/downstream/addKeyToAmazonConfig.sh @@ -0,0 +1,58 @@ +#!/bin/sh + +ACCESS_KEY="$1" +SECRET_KEY="$2" + +if [ -z "$ACCESS_KEY" ]; then echo "need access key to proceed..."; exit 1; fi +if [ -z "$SECRET_KEY" ]; then echo "need secret key to proceed..."; exit 1; fi + +NAMESPACE="fleet-default" +NAMES=$(kubectl get Amazonec2Config -n "$NAMESPACE" -o jsonpath='{.items[*].metadata.name}') + +if [ -z "$NAMES" ]; then echo "names not found..."; exit 1; fi + +cat < patch.yaml +accessKey: '$ACCESS_KEY' +secretKey: '$SECRET_KEY' +EOT + +for name in $NAMES; do + + kubectl patch Amazonec2Config "$name" -n "$NAMESPACE" --type merge --patch-file patch.yaml + RESULT=$? + if [ $RESULT -ne 0 ]; then + echo "error: $RESULT" + exit $RESULT + fi + + KEY="$(kubectl get Amazonec2Config "$name" -n "$NAMESPACE" -o json | jq -r '.accessKey')" + + if [ -z "$KEY" ] || [ "null" = "$KEY" ]; then + echo "error: key not found on object" + exit 1 + else + if [ "$KEY" != "$ACCESS_KEY" ]; then + echo "error: key not replaced properly" + exit 1 + fi + echo "key replaced properly" + fi + + SECRET="$(kubectl get Amazonec2Config "$name" -n "$NAMESPACE" -o json | jq -r '.secretKey')" + + if [ -z "$SECRET" ] || [ "null" = "$SECRET" ]; then + echo "error: secret not found on object" + exit 1 + else + if [ "$SECRET" != "$SECRET_KEY" ]; then + echo "error: secret not replaced properly" + exit 1 + fi + echo "secret replaced properly" + fi + + echo "Amazonec2Config $name in namespace $NAMESPACE updated." + +done + +rm -f patch.yaml diff --git a/examples/deploy_rke2_multiple_pools/modules/downstream/main.tf b/examples/deploy_rke2_multiple_pools/modules/downstream/main.tf new file mode 100644 index 000000000..dffc5187a --- /dev/null +++ b/examples/deploy_rke2_multiple_pools/modules/downstream/main.tf @@ -0,0 +1,346 @@ + +locals { + # general + identifier = var.identifier + owner = var.owner + cluster_name = var.name + # aws access + aws_access_key_id = var.aws_access_key_id + aws_secret_access_key = var.aws_secret_access_key + aws_session_token = var.aws_session_token + aws_region = var.aws_region + aws_region_letter = var.aws_region_letter + # networking info + vpc_id = var.vpc_id + subnet_id = var.subnet_id + security_group_id = var.security_group_id + lbsg = sort(var.load_balancer_security_groups) + load_balancer_security_group_id = [ + for i in range(length(local.lbsg)) : + local.lbsg[i] if local.lbsg[i] != local.security_group_id + # load balancers only have 2 security groups, the project and its own + # this eliminates the project security group to just return the load balancer's security group + ][0] + downstream_security_group_name = "${local.cluster_name}-sgroup" + # node info + aws_instance_type = var.aws_instance_type + ami_id = var.ami_id + ami_ssh_user = var.ami_ssh_user + wrknc = var.worker_node_count + cpnc = var.control_plane_node_count + node_count = (local.wrknc + local.cpnc) + node_ips = { for i in range(local.node_count) : tostring(i) => data.aws_instances.rke2_instance_nodes.public_ips[i] } + node_id = "${local.cluster_name}-nodes" + ami_admin_group = (var.ami_admin_group != "" ? var.ami_admin_group : "tty") + runner_ip = (var.direct_node_access != null ? var.direct_node_access.runner_ip : "10.1.1.1") # the IP running Terraform + ssh_access_key = (var.direct_node_access != null ? var.direct_node_access.ssh_access_key : "fake123abc") + ssh_access_user = (var.direct_node_access != null ? var.direct_node_access.ssh_access_user : "fake") + # rke2 info + rke2_version = var.rke2_version +} + +resource "aws_security_group" "downstream_cluster" { + description = "Access to downstream cluster" + name = local.downstream_security_group_name + vpc_id = local.vpc_id + tags = { + Name = local.downstream_security_group_name + } + lifecycle { + ignore_changes = [ + ingress, + egress, + ] + } +} +# this allows servers attached to the project security group to accept connections initiated by the downstream cluster +resource "aws_vpc_security_group_ingress_rule" "downstream_ingress_rancher" { + depends_on = [ + aws_security_group.downstream_cluster, + ] + referenced_security_group_id = aws_security_group.downstream_cluster.id + security_group_id = local.security_group_id + ip_protocol = "-1" +} +# this allows the load balancer to accept connections initiated by the downstream cluster +resource "aws_vpc_security_group_ingress_rule" "downstream_ingress_loadbalancer" { + depends_on = [ + aws_security_group.downstream_cluster, + ] + referenced_security_group_id = aws_security_group.downstream_cluster.id + security_group_id = local.load_balancer_security_group_id + ip_protocol = "-1" +} + +# this allows the downstream cluster to reach out to any public ipv4 address +resource "aws_vpc_security_group_egress_rule" "downstream_egress_ipv4" { + depends_on = [ + aws_security_group.downstream_cluster, + ] + ip_protocol = "-1" + cidr_ipv4 = "0.0.0.0/0" + security_group_id = aws_security_group.downstream_cluster.id +} +# this allows the downstream cluster to reach out to any public ipv6 address +resource "aws_vpc_security_group_egress_rule" "downstream_egress_ipv6" { + depends_on = [ + aws_security_group.downstream_cluster, + ] + ip_protocol = "-1" + cidr_ipv6 = "::/0" + security_group_id = aws_security_group.downstream_cluster.id +} +# this allows the downstream cluster to reach out to any server attached to the project security group +resource "aws_vpc_security_group_egress_rule" "downstream_egress_project_link" { + depends_on = [ + aws_security_group.downstream_cluster, + ] + referenced_security_group_id = local.security_group_id + security_group_id = aws_security_group.downstream_cluster.id + ip_protocol = "-1" +} +# this allows nodes to talk to each other +resource "aws_vpc_security_group_ingress_rule" "downstream_ingress_internal_ipv4" { + depends_on = [ + aws_security_group.downstream_cluster, + ] + ip_protocol = "-1" + cidr_ipv4 = "10.0.0.0/16" + security_group_id = aws_security_group.downstream_cluster.id +} +resource "rancher2_machine_config_v2" "core" { + depends_on = [ + aws_security_group.downstream_cluster, + aws_vpc_security_group_ingress_rule.downstream_ingress_rancher, + aws_vpc_security_group_egress_rule.downstream_egress_ipv4, + aws_vpc_security_group_egress_rule.downstream_egress_ipv6, + aws_vpc_security_group_egress_rule.downstream_egress_project_link, + ] + generate_name = "core-${local.cluster_name}" + amazonec2_config { + ami = local.ami_id + region = local.aws_region + security_group = [local.downstream_security_group_name] + subnet_id = local.subnet_id + vpc_id = local.vpc_id + zone = local.aws_region_letter + session_token = local.aws_session_token + instance_type = local.aws_instance_type + ssh_user = local.ami_ssh_user + tags = join(",", ["Id", local.identifier, "Owner", local.owner, "NodeId", local.node_id]) + userdata = <<-EOT + #cloud-config + + merge_how: + - name: list + settings: [replace] + - name: dict + settings: [replace] + + users: + - name: ${local.ssh_access_user} + gecos: ${local.ssh_access_user} + sudo: ALL=(ALL) NOPASSWD:ALL + groups: users, ${local.ami_admin_group} + lock_passwd: true + ssh_authorized_keys: + - ${local.ssh_access_key} + homedir: /home/${local.ssh_access_user} + EOT + } +} +resource "rancher2_machine_config_v2" "worker" { + depends_on = [ + aws_security_group.downstream_cluster, + aws_vpc_security_group_ingress_rule.downstream_ingress_rancher, + aws_vpc_security_group_egress_rule.downstream_egress_ipv4, + aws_vpc_security_group_egress_rule.downstream_egress_ipv6, + aws_vpc_security_group_egress_rule.downstream_egress_project_link, + ] + generate_name = "worker-${local.cluster_name}" + amazonec2_config { + ami = local.ami_id + region = local.aws_region + security_group = [local.downstream_security_group_name] + subnet_id = local.subnet_id + vpc_id = local.vpc_id + zone = local.aws_region_letter + session_token = local.aws_session_token + instance_type = local.aws_instance_type + ssh_user = local.ami_ssh_user + tags = join(",", ["Id", local.identifier, "Owner", local.owner, "NodeId", local.node_id]) + userdata = <<-EOT + #cloud-config + + merge_how: + - name: list + settings: [replace] + - name: dict + settings: [replace] + + users: + - name: ${local.ssh_access_user} + gecos: ${local.ssh_access_user} + sudo: ALL=(ALL) NOPASSWD:ALL + groups: users, ${local.ami_admin_group} + lock_passwd: true + ssh_authorized_keys: + - ${local.ssh_access_key} + homedir: /home/${local.ssh_access_user} + EOT + } +} + +resource "terraform_data" "patch_machine_configs" { + depends_on = [ + aws_security_group.downstream_cluster, + aws_vpc_security_group_ingress_rule.downstream_ingress_rancher, + aws_vpc_security_group_egress_rule.downstream_egress_ipv4, + aws_vpc_security_group_egress_rule.downstream_egress_ipv6, + aws_vpc_security_group_egress_rule.downstream_egress_project_link, + rancher2_machine_config_v2.core, + rancher2_machine_config_v2.worker, + ] + triggers_replace = { + core_config = rancher2_machine_config_v2.core.id + worker_config = rancher2_machine_config_v2.worker.id + } + provisioner "local-exec" { + command = <<-EOT + ${path.module}/addKeyToAmazonConfig.sh "${local.aws_access_key_id}" "${local.aws_secret_access_key}" + EOT + } +} + +resource "rancher2_cluster_v2" "rke2_cluster" { + depends_on = [ + aws_security_group.downstream_cluster, + aws_vpc_security_group_ingress_rule.downstream_ingress_rancher, + aws_vpc_security_group_egress_rule.downstream_egress_ipv4, + aws_vpc_security_group_egress_rule.downstream_egress_ipv6, + aws_vpc_security_group_egress_rule.downstream_egress_project_link, + rancher2_machine_config_v2.core, + rancher2_machine_config_v2.worker, + terraform_data.patch_machine_configs, + ] + name = local.cluster_name + kubernetes_version = local.rke2_version + enable_network_policy = true + rke_config { + machine_pools { + name = "${local.cluster_name}-cp" + control_plane_role = true + etcd_role = true + worker_role = false + quantity = local.cpnc + machine_config { + kind = rancher2_machine_config_v2.core.kind + name = rancher2_machine_config_v2.core.name + } + } + machine_pools { + name = "${local.cluster_name}-wk" + control_plane_role = false + etcd_role = false + worker_role = true + quantity = local.wrknc + machine_config { + kind = rancher2_machine_config_v2.worker.kind + name = rancher2_machine_config_v2.worker.name + } + } + } + timeouts { + create = "120m" + } +} + +resource "time_sleep" "wait_for_nodes" { + depends_on = [ + aws_security_group.downstream_cluster, + aws_vpc_security_group_ingress_rule.downstream_ingress_rancher, + aws_vpc_security_group_egress_rule.downstream_egress_ipv4, + aws_vpc_security_group_egress_rule.downstream_egress_ipv6, + aws_vpc_security_group_egress_rule.downstream_egress_project_link, + rancher2_machine_config_v2.core, + rancher2_machine_config_v2.worker, + terraform_data.patch_machine_configs, + ] + create_duration = "120s" +} + +data "aws_instances" "rke2_instance_nodes" { + depends_on = [ + aws_security_group.downstream_cluster, + aws_vpc_security_group_ingress_rule.downstream_ingress_rancher, + aws_vpc_security_group_egress_rule.downstream_egress_ipv4, + aws_vpc_security_group_egress_rule.downstream_egress_ipv6, + aws_vpc_security_group_egress_rule.downstream_egress_project_link, + rancher2_machine_config_v2.core, + rancher2_machine_config_v2.worker, + terraform_data.patch_machine_configs, + time_sleep.wait_for_nodes, + ] + filter { + name = "tag:NodeId" + values = [local.node_id] + } +} + +# this allows the load balancer to accept connections initiated by the downstream cluster's public ip addresses +# this weird in-flight grab of the nodes and manipulating the security groups is not good, +# but the only way to allow ingress when the downstream cluster has public IPs +# FYI: security group references only work with private IPs +resource "aws_vpc_security_group_ingress_rule" "downstream_public_ingress_loadbalancer" { + depends_on = [ + aws_security_group.downstream_cluster, + aws_vpc_security_group_ingress_rule.downstream_ingress_rancher, + aws_vpc_security_group_egress_rule.downstream_egress_ipv4, + aws_vpc_security_group_egress_rule.downstream_egress_ipv6, + aws_vpc_security_group_egress_rule.downstream_egress_project_link, + rancher2_machine_config_v2.core, + rancher2_machine_config_v2.worker, + terraform_data.patch_machine_configs, + time_sleep.wait_for_nodes, + data.aws_instances.rke2_instance_nodes, + ] + for_each = local.node_ips + security_group_id = local.load_balancer_security_group_id + ip_protocol = "-1" + cidr_ipv4 = "${each.value}/32" +} + +resource "aws_vpc_security_group_ingress_rule" "downstream_public_ingress_runner" { + depends_on = [ + aws_security_group.downstream_cluster, + aws_vpc_security_group_ingress_rule.downstream_ingress_rancher, + aws_vpc_security_group_egress_rule.downstream_egress_ipv4, + aws_vpc_security_group_egress_rule.downstream_egress_ipv6, + aws_vpc_security_group_egress_rule.downstream_egress_project_link, + rancher2_machine_config_v2.core, + rancher2_machine_config_v2.worker, + terraform_data.patch_machine_configs, + time_sleep.wait_for_nodes, + data.aws_instances.rke2_instance_nodes, + ] + security_group_id = aws_security_group.downstream_cluster.id + ip_protocol = "tcp" + from_port = 22 + to_port = 22 + cidr_ipv4 = "${local.runner_ip}/32" +} + +resource "rancher2_cluster_sync" "sync" { + depends_on = [ + aws_security_group.downstream_cluster, + aws_vpc_security_group_ingress_rule.downstream_ingress_rancher, + aws_vpc_security_group_egress_rule.downstream_egress_ipv4, + aws_vpc_security_group_egress_rule.downstream_egress_ipv6, + aws_vpc_security_group_egress_rule.downstream_egress_project_link, + rancher2_machine_config_v2.core, + rancher2_machine_config_v2.worker, + terraform_data.patch_machine_configs, + rancher2_cluster_v2.rke2_cluster, + ] + cluster_id = rancher2_cluster_v2.rke2_cluster.cluster_v1_id +} diff --git a/examples/deploy_rke2_multiple_pools/modules/downstream/variables.tf b/examples/deploy_rke2_multiple_pools/modules/downstream/variables.tf new file mode 100644 index 000000000..5d730ee1e --- /dev/null +++ b/examples/deploy_rke2_multiple_pools/modules/downstream/variables.tf @@ -0,0 +1,139 @@ + +variable "name" { + type = string + description = <<-EOT + The name to give the new downstream cluster. + EOT +} +variable "identifier" { + type = string + description = <<-EOT + A unique string that identifies the project as a whole. + EOT +} +variable "owner" { + type = string + description = <<-EOT + A string to identify the "owner" of the resource, this is added as a tag to all resources. + This is important both for finding resources in the future and removing them in case of problems. + EOT +} +variable "aws_access_key_id" { + type = string + description = <<-EOT + The AWS access key to use to deploy the machines. + We suggest using temporary credentials. + EOT +} +variable "aws_secret_access_key" { + type = string + description = <<-EOT + The AWS secret key to use to deploy the machines. + We suggest using temporary credentials. + EOT +} +variable "aws_session_token" { + type = string + description = <<-EOT + The AWS session token to use to deploy the machines. + While this is not required we suggest using it. + EOT + default = "" +} +variable "aws_region" { + type = string + description = <<-EOT + The AWS region name to deploy to. + EOT +} +variable "aws_region_letter" { + type = string + description = <<-EOT + The letter of the availability zone from the full region name. + Eg. in AZ "us-west-2a", this would be "a", in "us-west-2b" this would be "b". + EOT +} +variable "vpc_id" { + type = string + description = <<-EOT + The id of the VPC where the cluster will be deployed. + Currently this expects the same VPC as the Rancher cluster. + EOT +} +variable "security_group_id" { + type = string + description = <<-EOT + The id of the security group to add the downstream cluster's security group to. + EOT +} + +variable "load_balancer_security_groups" { + type = list(any) + description = <<-EOT + The security groups for the load balancer. + EOT +} +variable "subnet_id" { + type = string + description = <<-EOT + The id of the subnet to deploy the cluster to. + Currently we only support deploying to the same subnet as the Rancher cluster. + EOT +} +variable "aws_instance_type" { + type = string + description = <<-EOT + The AWS instance type to deploy. + EOT +} +variable "ami_id" { + type = string + description = <<-EOT + The AWS id for the OS image to deploy the cluster on. + EOT +} +variable "ami_ssh_user" { + type = string + description = <<-EOT + The ssh user that is default for the image, the deployment will replace this user. + EOT +} +variable "ami_admin_group" { + type = string + description = <<-EOT + The group name for the root/admin user. + Usually "wheel" or "admin". + EOT +} +variable "control_plane_node_count" { + type = number + description = <<-EOT + The number of control plane nodes to deploy. + EOT +} +variable "worker_node_count" { + type = number + description = <<-EOT + The number of worker nodes to deploy. + EOT +} +variable "direct_node_access" { + type = object({ + runner_ip = string + ssh_access_key = string + ssh_access_user = string + }) + description = <<-EOT + This object has the information to give the computer running Terraform (runner) the ability to directly access the nodes provisioned by Rancher. + When left in the default state no access will be given. + This requires the downstream cluster to have public IP addresses. + EOT + default = null +} +variable "rke2_version" { + type = string + description = <<-EOT + The RKE2 version to deploy. + This module only supports RKE2. + EOT +} diff --git a/examples/deploy_rke2_multiple_pools/modules/downstream/versions.tf b/examples/deploy_rke2_multiple_pools/modules/downstream/versions.tf new file mode 100644 index 000000000..2539bf8db --- /dev/null +++ b/examples/deploy_rke2_multiple_pools/modules/downstream/versions.tf @@ -0,0 +1,17 @@ +terraform { + required_version = ">= 1.5.0" + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 5.11" + } + rancher2 = { + source = "rancher/rancher2" + version = ">= 5.0.0" + } + time = { + source = "hashicorp/time" + version = ">= 0.12.0" + } + } +} diff --git a/examples/deploy_rke2_multiple_pools/outputs.tf b/examples/deploy_rke2_multiple_pools/outputs.tf new file mode 100644 index 000000000..772a494c9 --- /dev/null +++ b/examples/deploy_rke2_multiple_pools/outputs.tf @@ -0,0 +1,22 @@ +output "kubeconfig" { + value = module.rancher.kubeconfig + description = <<-EOT + The kubeconfig for the server. + EOT + sensitive = true +} +output "address" { + value = module.rancher.address +} +# output "admin_token" { +# value = module.rancher.admin_token +# sensitive = true +# } +output "admin_password" { + value = module.rancher.admin_password + sensitive = true +} + +# output "rke2_cluster_subnet" { +# value = module.rke2_cluster_access.subnets[keys(module.rke2_cluster_access.subnets)[0]] +# } diff --git a/examples/downstream/variables.tf b/examples/deploy_rke2_multiple_pools/variables.tf similarity index 100% rename from examples/downstream/variables.tf rename to examples/deploy_rke2_multiple_pools/variables.tf index c9fb55bd9..612abf457 100644 --- a/examples/downstream/variables.tf +++ b/examples/deploy_rke2_multiple_pools/variables.tf @@ -67,21 +67,21 @@ variable "aws_secret_access_key" { EOT sensitive = true } -variable "aws_session_token" { +variable "aws_region" { type = string description = <<-EOT - AWS session token for EC2 services. - If left empty the AWS provider will assume you are using permanent AWS credentials. + AWS region EC2 services. EOT sensitive = true - default = "" } -variable "aws_region" { +variable "aws_session_token" { type = string description = <<-EOT - AWS region EC2 services. + AWS session token for EC2 services. + If left empty the AWS provider will assume you are using permanent AWS credentials. EOT sensitive = true + default = "" } variable "email" { type = string diff --git a/examples/downstream/versions.tf b/examples/deploy_rke2_multiple_pools/versions.tf similarity index 97% rename from examples/downstream/versions.tf rename to examples/deploy_rke2_multiple_pools/versions.tf index 30e236d49..d3d093a17 100644 --- a/examples/downstream/versions.tf +++ b/examples/deploy_rke2_multiple_pools/versions.tf @@ -23,7 +23,7 @@ terraform { } null = { source = "hashicorp/null" - version = ">=3" + version = ">= 3" } tls = { source = "hashicorp/tls" diff --git a/examples/downstream/README.md b/examples/downstream/README.md deleted file mode 100644 index cd3360ed4..000000000 --- a/examples/downstream/README.md +++ /dev/null @@ -1,6 +0,0 @@ -# Deploy RKE2 Example - -This basic example shows a normal path to deploying Rancher with a downstream RKE2 with IAC. - -Because this is considered a normal deployment path it is useful when troubleshooting user issues that come up from time to time. -However, due to the security concerns and the way we authenticate our automation it won't be automatically tested. diff --git a/examples/downstream/addKeyToMachineTemplate.sh b/examples/downstream/addKeyToMachineTemplate.sh deleted file mode 100755 index c3b52b416..000000000 --- a/examples/downstream/addKeyToMachineTemplate.sh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/sh - -ACCESS_KEY="$1" -SECRET_KEY="$2" - -if [ -z "$ACCESS_KEY" ]; then echo "need access key to proceed..."; exit 1; fi -if [ -z "$SECRET_KEY" ]; then echo "need secret key to proceed..."; exit 1; fi - -NAME=$(kubectl get Amazonec2Config -n fleet-default -o jsonpath='{.items[0].metadata.name}') -NAMESPACE=$(kubectl get Amazonec2Config -n fleet-default -o jsonpath='{.items[0].metadata.namespace}') - -if [ -z "$NAME" ]; then echo "name not found..."; exit 1; fi -if [ -z "$NAMESPACE" ]; then echo "namespace not found..."; exit 1; fi - -cat < patch.yaml -accessKey: '$ACCESS_KEY' -secretKey: '$SECRET_KEY' -EOT - -kubectl patch Amazonec2Config "$NAME" -n "$NAMESPACE" --type merge --patch-file patch.yaml -RESULT=$? -if [ $RESULT -ne 0 ]; then - echo "error: $RESULT" - exit $RESULT -fi - -KEY="$(kubectl get Amazonec2Config -n fleet-default -o json | jq -r '.items[].accessKey')" - -if [ -z "$KEY" ] || [ "null" = "$KEY" ]; then - echo "error: key not found on object" - exit 1 -else - if [ "$KEY" != "$ACCESS_KEY" ]; then - echo "error: key not replaced properly" - exit 1 - fi - echo "key replaced properly" -fi - -SECRET="$(kubectl get Amazonec2Config -n fleet-default -o json | jq -r '.items[].secretKey')" - -if [ -z "$SECRET" ] || [ "null" = "$SECRET" ]; then - echo "error: secret not found on object" - exit 1 -else - if [ "$SECRET" != "$SECRET_KEY" ]; then - echo "error: secret not replaced properly" - exit 1 - fi - echo "secret replaced properly" -fi - -echo "Amazonec2Config $NAME in namespace $NAMESPACE updated." - -rm -f patch.yaml diff --git a/examples/downstream/main.tf b/examples/downstream/main.tf deleted file mode 100644 index 88461620a..000000000 --- a/examples/downstream/main.tf +++ /dev/null @@ -1,225 +0,0 @@ -provider "aws" { - default_tags { - tags = { - Id = local.identifier - Owner = local.owner - } - } - region = local.aws_region -} - -provider "acme" { - server_url = "${local.acme_server_url}/directory" -} - -provider "github" {} -provider "kubernetes" {} # make sure you set the env variable KUBE_CONFIG_PATH to local_file_path (file_path variable) -provider "helm" {} # make sure you set the env variable KUBE_CONFIG_PATH to local_file_path (file_path variable) - -provider "rancher2" { - alias = "authenticate" - bootstrap = true - api_url = "https://${local.domain}.${local.zone}" - timeout = "300s" -} - -resource "rancher2_bootstrap" "authenticate" { - provider = rancher2.authenticate - initial_password = module.rancher.admin_password - password = module.rancher.admin_password - token_update = true - token_ttl = 86400 # 24 hours -} - -provider "rancher2" { - alias = "default" - api_url = "https://${local.domain}.${local.zone}" - token_key = rancher2_bootstrap.authenticate.token - timeout = "300s" -} - -locals { - identifier = var.identifier - example = "downstream" - project_name = "tf-${substr(md5(join("-", [local.example, local.identifier])), 0, 5)}" - username = local.project_name - domain = local.project_name - zone = var.zone - key_name = var.key_name - key = var.key - owner = var.owner - rke2_version = var.rke2_version - local_file_path = var.file_path - runner_ip = chomp(data.http.myip.response_body) # "runner" is the server running Terraform - rancher_version = var.rancher_version - cert_manager_version = "1.16.3" #"1.13.1" - os = "sle-micro-61" - aws_access_key_id = var.aws_access_key_id - aws_secret_access_key = var.aws_secret_access_key - aws_session_token = var.aws_session_token - aws_region = var.aws_region - email = (var.email != "" ? var.email : "${local.identifier}@${local.zone}") - acme_server_url = "https://acme-v02.api.letsencrypt.org" - private_ip = replace(module.rancher.private_endpoint, "http://", "") -} - -data "http" "myip" { - url = "https://ipinfo.io/ip" -} - -module "rancher" { - source = "rancher/aws/rancher2" - version = "v1.2.0" - # project - identifier = local.identifier - owner = local.owner - project_name = local.project_name - domain = local.domain - zone = local.zone - skip_project_cert_generation = true - # access - key_name = local.key_name - key = local.key - username = local.username - admin_ip = local.runner_ip - # rke2 - rke2_version = local.rke2_version - local_file_path = local.local_file_path - install_method = "rpm" # rpm only for now, need to figure out local helm chart installs otherwise - cni = "canal" - node_configuration = { - "rancher" = { - type = "all-in-one" - size = "large" - os = local.os - indirect_access = true - initial = true - } - } - # rancher - rancher_version = local.rancher_version - cert_manager_version = local.cert_manager_version - configure_cert_manager = true - cert_manager_configuration = { - aws_access_key_id = local.aws_access_key_id - aws_secret_access_key = local.aws_secret_access_key - aws_session_token = local.aws_session_token - aws_region = local.aws_region - acme_email = local.email - acme_server_url = local.acme_server_url - } -} - -module "rke2_image" { - source = "rancher/server/aws" - version = "v1.4.0" - server_use_strategy = "skip" - image_use_strategy = "find" - image_type = local.os -} - -# this adds the private (10.) IP to the domain -# the private IP communicates to the agents where to find Rancher -resource "aws_route53_record" "modified" { - depends_on = [ - module.rancher, - ] - zone_id = module.rancher.domain_object.zone_id - name = module.rancher.domain_object.name - type = module.rancher.domain_object.type - ttl = 30 - records = concat([local.private_ip], tolist(module.rancher.domain_object.records)) - allow_overwrite = true -} - -resource "rancher2_machine_config_v2" "aio" { - depends_on = [ - rancher2_bootstrap.authenticate, - module.rancher, - aws_route53_record.modified, - ] - provider = rancher2.default - generate_name = "all-in-one-config" - amazonec2_config { - ami = module.rke2_image.image.id - region = local.aws_region - security_group = [module.rancher.security_group.name] - subnet_id = module.rancher.subnets[keys(module.rancher.subnets)[0]].id - vpc_id = module.rancher.vpc.id - zone = replace( # it is looking for just the last letter of the availability zone, eg. for us-west-2a it just wants 'a' - module.rancher.subnets[keys(module.rancher.subnets)[0]].availability_zone, - local.aws_region, - "" - ) - session_token = trimspace(chomp(local.aws_session_token)) - instance_type = "m5.large" - ssh_user = "ec2-user" - userdata = <<-EOT - #cloud-config - bootcmd: - - echo ${local.private_ip} ${local.domain}.${local.zone} >> /etc/hosts - EOT - tags = join(",", ["Id", local.identifier, "Owner", local.owner]) - } -} - -resource "terraform_data" "patch_machine_config" { - depends_on = [ - module.rancher, - aws_route53_record.modified, - rancher2_machine_config_v2.aio, - ] - triggers_replace = { - config = rancher2_machine_config_v2.aio.id - } - provisioner "local-exec" { - command = <<-EOT - ${path.module}/addKeyToMachineTemplate.sh "${local.aws_access_key_id}" "${local.aws_secret_access_key}" - EOT - } -} - -resource "rancher2_cluster_v2" "rke2_cluster" { - depends_on = [ - rancher2_bootstrap.authenticate, - module.rancher, - aws_route53_record.modified, - rancher2_machine_config_v2.aio, - terraform_data.patch_machine_config, - ] - provider = rancher2.default - name = "${local.project_name}-s1-cluster" - kubernetes_version = local.rke2_version - enable_network_policy = true - rke_config { - machine_pools { - name = "mp1" - control_plane_role = true - etcd_role = true - worker_role = true - quantity = 1 - machine_config { - kind = rancher2_machine_config_v2.aio.kind - name = rancher2_machine_config_v2.aio.name - } - } - rotate_certificates { - generation = 1 - } - } - timeouts { - create = "120m" # 2 hours - } -} - -resource "rancher2_cluster_sync" "sync" { - depends_on = [ - rancher2_bootstrap.authenticate, - module.rancher, - aws_route53_record.modified, - rancher2_machine_config_v2.aio, - rancher2_cluster_v2.rke2_cluster, - ] - provider = rancher2.default - cluster_id = rancher2_cluster_v2.rke2_cluster.cluster_v1_id -} diff --git a/examples/one/README.md b/examples/one/README.md new file mode 100644 index 000000000..c72ff4877 --- /dev/null +++ b/examples/one/README.md @@ -0,0 +1,40 @@ +# Single Server All in One Rancher Deployment + +This module deploys a single Rancher server will all kubernetes roles. + +This shows the most basic use case for the provider, is functions as a good start for configuring your Rancher deployment. + +## Dependencies + +The `flake.nix` file in the root of the module explains all of the dependencies for the development of the module, it also includes the dependencies to run it. +You can see the list on lines 50-80, but a more specific list is below (with explanations). + +- bash -> born again shell with linux core utils facilitates CLI actions +- tfswitch -> handy for installing Terraform at specific verisons +- git -> required by Terraform +- curl -> required by Terraform as well as dependent modules (when downloading RKE2 for install) +- openssh -> required by Terraform and used in dependent modules to connect to servers for initial configuration +- openssl -> required by Terraform and used in dependent modules to verify TLS certificates +- ssh-agent -> used for connecting to remote server for initial configuration, you need to have the key you send into the module loaded in your agent +- gh -> the github cli tool, used to find releases when downloading RKE2 for install +- jq -> json parsing tool, used in dependent modules to parse submodule outputs +- kubectl -> used in local exec to patch kubernetes objects +- awscli2 -> the aws cli tool, used in some dependent modules in some use cases (dualstack) +- yq -> yaml parsing tool, used in dependent modules to parse kubectl outputs +- go -> necessary to run tests + +## Environment Variables + +I like to set my AWS credentials in environment variables: + +- AWS_ROLE -> role to assume when using OIDC +- AWS_REGION -> AWS region to deploy to, make sure there are multiple availability zones when needing HA +- AWS_DEFAULT_REGION -> same as region +- AWS_ACCESS_KEY_ID -> access key, this will make it into the state, please secure it properly +- AWS_SECRET_ACCESS_KEY -> secret key, this will make it into the state, please secure it properly +- AWS_SESSION_TOKEN -> used with temporary AWS credentials, this will make it into the state, please secure it properly +- TF_VAR_aws_access_key_id -> access key, this will make it into the state, please secure it properly +- TF_VAR_aws_secret_access_key -> secret key, this will make it into the state, please secure it properly +- TF_VAR_aws_session_token -> used with temporary AWS credentials, this will make it into the state, please secure it properly +- TF_VAR_aws_region -> AWS region to deploy to, make sure there are multiple availability zones when needing HA + diff --git a/examples/base/main.tf b/examples/one/main.tf similarity index 93% rename from examples/base/main.tf rename to examples/one/main.tf index e2d844887..e8196bb54 100644 --- a/examples/base/main.tf +++ b/examples/one/main.tf @@ -42,7 +42,7 @@ provider "rancher2" { locals { identifier = var.identifier - example = "base" + example = "basic" project_name = "tf-${substr(md5(join("-", [local.example, local.identifier])), 0, 5)}" username = local.project_name domain = local.project_name @@ -65,7 +65,7 @@ data "http" "myip" { module "rancher" { source = "rancher/aws/rancher2" - version = "v1.2.0" + version = "1.2.2" # project identifier = local.identifier owner = local.owner @@ -96,13 +96,11 @@ module "rancher" { rancher_version = local.rancher_version } -# Create a new rancher2 Token -resource "rancher2_token" "test" { +data "rancher2_cluster" "local" { depends_on = [ module.rancher, rancher2_bootstrap.authenticate, ] - provider = rancher2.default - description = "test token" - ttl = 1200 + provider = rancher2.default + name = "local" } diff --git a/examples/downstream/outputs.tf b/examples/one/outputs.tf similarity index 79% rename from examples/downstream/outputs.tf rename to examples/one/outputs.tf index d9eca66d9..8e218ec41 100644 --- a/examples/downstream/outputs.tf +++ b/examples/one/outputs.tf @@ -16,3 +16,7 @@ output "admin_password" { value = module.rancher.admin_password sensitive = true } +output "cluster_data" { + value = jsonencode(data.rancher2_cluster.local) + sensitive = true +} diff --git a/examples/base/variables.tf b/examples/one/variables.tf similarity index 98% rename from examples/base/variables.tf rename to examples/one/variables.tf index 42979501e..3d7bc84c4 100644 --- a/examples/base/variables.tf +++ b/examples/one/variables.tf @@ -43,7 +43,7 @@ variable "rancher_version" { description = <<-EOT The version of rancher to install on the rke2 cluster. EOT - default = "2.10.4" + default = "2.9.1" } variable "file_path" { type = string diff --git a/examples/one/versions.tf b/examples/one/versions.tf new file mode 100644 index 000000000..d3d093a17 --- /dev/null +++ b/examples/one/versions.tf @@ -0,0 +1,53 @@ +terraform { + required_version = ">= 1.5.0" + required_providers { + local = { + source = "hashicorp/local" + version = ">= 2.4" + } + random = { + source = "hashicorp/random" + version = ">= 3.5.1" + } + github = { + source = "integrations/github" + version = ">= 5.44" + } + aws = { + source = "hashicorp/aws" + version = ">= 5.11" + } + http = { + source = "hashicorp/http" + version = ">= 3.4" + } + null = { + source = "hashicorp/null" + version = ">= 3" + } + tls = { + source = "hashicorp/tls" + version = ">= 4.0" + } + acme = { + source = "vancluever/acme" + version = ">= 2.0" + } + cloudinit = { + source = "hashicorp/cloudinit" + version = ">= 2.3.3" + } + helm = { + source = "hashicorp/helm" + version = ">= 2.14" + } + rancher2 = { + source = "rancher/rancher2" + version = ">= 5.0.0" + } + kubernetes = { + source = "hashicorp/kubernetes" + version = ">= 2.31.0" + } + } +} diff --git a/examples/prod/README.md b/examples/prod/README.md new file mode 100644 index 000000000..001be3669 --- /dev/null +++ b/examples/prod/README.md @@ -0,0 +1,42 @@ +# Production Rancher Cluster on AWS + +This module deploys what I would consider a production worthy Rancher cluster, + it splits each Kubernetes role across three nodes which get deployed in different AWS availability zones. +This means there are 3 nodes for the Kubernetes API, 3 nodes for Etcd, and 3 nodes for an HA Rancher deployment. +This is the general recommendations for a production HA RKE2 cluster + and allows for the most flexibility for architecture changes in the future. + +## Dependencies + +The `flake.nix` file in the root of the module explains all of the dependencies for the development of the module, it also includes the dependencies to run it. +You can see the list on lines 50-80, but a more specific list is below (with explanations). + +- bash -> born again shell with linux core utils facilitates CLI actions +- tfswitch -> handy for installing Terraform at specific versions +- git -> required by Terraform +- curl -> required by Terraform as well as dependent modules (when downloading RKE2 for install) +- openssh -> required by Terraform and used in dependent modules to connect to servers for initial configuration +- openssl -> required by Terraform and used in dependent modules to verify TLS certificates +- ssh-agent -> used for connecting to remote server for initial configuration, you need to have the key you send into the module loaded in your agent +- gh -> the github cli tool, used to find releases when downloading RKE2 for install +- jq -> json parsing tool, used in dependent modules to parse submodule outputs +- kubectl -> used in local exec to patch kubernetes objects +- awscli2 -> the aws cli tool, used in some dependent modules in some use cases (dual-stack) +- yq -> yaml parsing tool, used in dependent modules to parse kubectl outputs +- go -> necessary to run tests + +## Environment Variables + +I like to set my AWS credentials in environment variables: + +- AWS_ROLE -> role to assume when using OIDC +- AWS_REGION -> AWS region to deploy to, make sure there are multiple availability zones when needing HA +- AWS_DEFAULT_REGION -> same as region +- AWS_ACCESS_KEY_ID -> access key, this will make it into the state, please secure it properly +- AWS_SECRET_ACCESS_KEY -> secret key, this will make it into the state, please secure it properly +- AWS_SESSION_TOKEN -> used with temporary AWS credentials, this will make it into the state, please secure it properly +- TF_VAR_aws_access_key_id -> access key, this will make it into the state, please secure it properly +- TF_VAR_aws_secret_access_key -> secret key, this will make it into the state, please secure it properly +- TF_VAR_aws_session_token -> used with temporary AWS credentials, this will make it into the state, please secure it properly +- TF_VAR_aws_region -> AWS region to deploy to, make sure there are multiple availability zones when needing HA + diff --git a/examples/prod/main.tf b/examples/prod/main.tf new file mode 100644 index 000000000..93a5b2d06 --- /dev/null +++ b/examples/prod/main.tf @@ -0,0 +1,180 @@ +provider "aws" { + default_tags { + tags = { + Id = local.identifier + Owner = local.owner + } + } +} + +provider "acme" { + server_url = "https://acme-v02.api.letsencrypt.org/directory" +} + +provider "github" {} +provider "kubernetes" {} # make sure you set the env variable KUBE_CONFIG_PATH to local_file_path (file_path variable) +provider "helm" {} # make sure you set the env variable KUBE_CONFIG_PATH to local_file_path (file_path variable) + +provider "rancher2" { + alias = "default" + api_url = "https://${local.domain}.${local.zone}" + token_key = module.this.admin_token + timeout = "300s" +} + +provider "rancher2" { + alias = "authenticate" + bootstrap = true + api_url = "https://${local.domain}.${local.zone}" + timeout = "300s" +} + +resource "rancher2_bootstrap" "authenticate" { + depends_on = [ + module.this, + ] + provider = rancher2.authenticate + initial_password = module.this.admin_password + password = module.this.admin_password + token_update = true + token_ttl = 7200 # 2 hours +} + + +locals { + identifier = var.identifier + example = "prod" + project_name = "tf-${substr(md5(join("-", [local.example, local.identifier])), 0, 5)}" + username = local.project_name + domain = local.project_name + zone = var.zone + key_name = var.key_name + key = var.key + owner = var.owner + rke2_version = var.rke2_version + local_file_path = var.file_path + os = "sle-micro-61" + runner_ip = chomp(data.http.myip.response_body) # "runner" is the server running Terraform + rancher_version = var.rancher_version + cert_manager_version = "1.16.3" #"1.13.1" + node_configuration = { + "initial" = { + type = "database" + size = "xl" + os = local.os + indirect_access = false + initial = true # this will set the first server as the inital node + } + "db2" = { + type = "database" + size = "xl" + os = local.os + indirect_access = false + initial = false + } + "db3" = { + type = "database" + size = "xl" + os = local.os + indirect_access = false + initial = false + } + "api1" = { + type = "api" + size = "xl" + os = local.os + indirect_access = true + initial = false + } + "api2" = { + type = "api" + size = "xl" + os = local.os + indirect_access = true + initial = false + } + "api3" = { + type = "api" + size = "xl" + os = local.os + indirect_access = true + initial = false + } + "wrk1" = { + type = "worker" + size = "xxl" + os = local.os + indirect_access = true + initial = false + } + "wrk2" = { + type = "worker" + size = "xxl" + os = local.os + indirect_access = true + initial = false + } + "wrk3" = { + type = "worker" + size = "xxl" + os = local.os + indirect_access = true + initial = false + }, + } +} + +data "http" "myip" { + url = "https://ipinfo.io/ip" +} + +module "this" { + source = "rancher/aws/rancher2" + version = "1.2.2" + identifier = local.identifier + owner = local.owner + project_name = local.project_name + domain = local.domain + zone = local.zone + key_name = local.key_name + key = local.key + username = local.username + admin_ip = local.runner_ip + rke2_version = local.rke2_version + local_file_path = local.local_file_path + install_method = "rpm" # rpm only for now, need to figure out local helm chart installs otherwise + cni = "canal" + node_configuration = local.node_configuration + cert_manager_version = local.cert_manager_version + rancher_version = local.rancher_version +} + +# this will fail if the default self signed cert is found +resource "terraform_data" "get_cert_info" { + depends_on = [ + module.this, + ] + provisioner "local-exec" { + command = <<-EOT + CERT="$(echo | openssl s_client -showcerts -servername ${local.domain}.${local.zone} -connect ${local.domain}.${local.zone}:443 2>/dev/null | openssl x509 -inform pem -noout -text)" + echo "$CERT" + FAKE="$(echo "$CERT" | grep 'Kubernetes Ingress Controller Fake Certificate')" + if [ -z "$FAKE" ]; then + echo "cert is not fake" + exit 0 + else + echo "cert is fake" + exit 1 + fi + EOT + } +} + +data "rancher2_cluster" "local" { + depends_on = [ + module.this, + rancher2_bootstrap.authenticate, + ] + provider = rancher2.default + name = "local" +} diff --git a/examples/prod/outputs.tf b/examples/prod/outputs.tf new file mode 100644 index 000000000..ff1955e42 --- /dev/null +++ b/examples/prod/outputs.tf @@ -0,0 +1,22 @@ +output "kubeconfig" { + value = module.this.kubeconfig + description = <<-EOT + The kubeconfig for the server. + EOT + sensitive = true +} +output "address" { + value = module.this.address +} +output "admin_token" { + value = module.this.admin_token + sensitive = true +} +output "admin_password" { + value = module.this.admin_password + sensitive = true +} +output "cluster_data" { + value = jsonencode(data.rancher2_cluster.local) + sensitive = true +} diff --git a/examples/prod/variables.tf b/examples/prod/variables.tf new file mode 100644 index 000000000..40ab1e365 --- /dev/null +++ b/examples/prod/variables.tf @@ -0,0 +1,54 @@ +variable "identifier" { + type = string + description = <<-EOT + A unique identifier for the project, this helps when generating names for infrastructure items." + EOT +} +variable "owner" { + type = string + description = <<-EOT + The owner of the project, this helps when generating names for infrastructure items." + EOT +} +variable "key_name" { + type = string + description = <<-EOT + The name of an AWS key pair to use for SSH access to the instance. + This key should already be added to your ssh agent for server authentication. + EOT +} +variable "key" { + type = string + description = <<-EOT + The contents of an AWS key pair to use for SSH access to the instance. + This is necessary for installing rke2 on the nodes and will be removed after installation. + EOT +} +variable "zone" { + type = string + description = <<-EOT + The Route53 DNS zone to deploy the cluster into. + This is used to generate the DNS name for the cluster. + The zone must already exist. + EOT +} +variable "rke2_version" { + type = string + description = <<-EOT + The version of rke2 to install on the nodes. + EOT +} +variable "rancher_version" { + type = string + description = <<-EOT + The version of rancher to install on the rke2 cluster. + EOT + default = "2.9.1" +} +variable "file_path" { + type = string + description = <<-EOT + The path to the file containing the rke2 install script. + EOT + default = "./rke2" +} diff --git a/examples/prod/versions.tf b/examples/prod/versions.tf new file mode 100644 index 000000000..d3d093a17 --- /dev/null +++ b/examples/prod/versions.tf @@ -0,0 +1,53 @@ +terraform { + required_version = ">= 1.5.0" + required_providers { + local = { + source = "hashicorp/local" + version = ">= 2.4" + } + random = { + source = "hashicorp/random" + version = ">= 3.5.1" + } + github = { + source = "integrations/github" + version = ">= 5.44" + } + aws = { + source = "hashicorp/aws" + version = ">= 5.11" + } + http = { + source = "hashicorp/http" + version = ">= 3.4" + } + null = { + source = "hashicorp/null" + version = ">= 3" + } + tls = { + source = "hashicorp/tls" + version = ">= 4.0" + } + acme = { + source = "vancluever/acme" + version = ">= 2.0" + } + cloudinit = { + source = "hashicorp/cloudinit" + version = ">= 2.3.3" + } + helm = { + source = "hashicorp/helm" + version = ">= 2.14" + } + rancher2 = { + source = "rancher/rancher2" + version = ">= 5.0.0" + } + kubernetes = { + source = "hashicorp/kubernetes" + version = ">= 2.31.0" + } + } +} diff --git a/examples/state_import/main.tf b/examples/state_import/main.tf new file mode 100644 index 000000000..f39ffc955 --- /dev/null +++ b/examples/state_import/main.tf @@ -0,0 +1,227 @@ +provider "aws" { + default_tags { + tags = { + Id = local.identifier + Owner = local.owner + } + } +} + +provider "acme" { + server_url = "${local.acme_server_url}/directory" +} + +provider "github" {} +provider "kubernetes" {} # make sure you set the env variable KUBE_CONFIG_PATH to local_file_path (file_path variable) +provider "helm" {} # make sure you set the env variable KUBE_CONFIG_PATH to local_file_path (file_path variable) + +provider "rancher2" { + api_url = "https://${local.domain}.${local.zone}" + token_key = module.rancher.admin_token + timeout = "300s" +} + +locals { + identifier = var.identifier + example = "state_import_downstream" + project_name = "tf-${substr(md5(join("-", [local.example, local.identifier])), 0, 5)}" + tf_data_dir = abspath(var.data_path != null ? var.data_path : path.root) + username = local.project_name + domain = local.project_name + zone = var.zone + key_name = var.key_name + key = var.key + owner = var.owner + rke2_version = var.rke2_version + local_file_path = abspath(var.file_path) + runner_ip = chomp(data.http.myip.response_body) # "runner" is the server running Terraform + rancher_version = var.rancher_version + cert_manager_version = "1.16.3" #"1.13.1" + os = "sle-micro-61" + aws_access_key_id = var.aws_access_key_id + aws_secret_access_key = var.aws_secret_access_key + aws_region = var.aws_region + aws_session_token = var.aws_session_token + aws_instance_type = "m5.large" + node_count = 3 + email = (var.email != "" ? var.email : "${local.identifier}@${local.zone}") + acme_server_url = "https://acme-v02.api.letsencrypt.org" + cluster_name = "tf-all-in-one-config" + project_id = data.rancher2_cluster.downstream_cluster.default_project_id + # tflint-ignore: terraform_unused_declarations + fail_project_id = (strcontains(local.project_id, ":") != true ? one([local.project_id, "project_id_malformed"]) : false) +} + +data "http" "myip" { + url = "https://ipinfo.io/ip" +} + +module "rancher" { + source = "rancher/aws/rancher2" + version = "1.2.2" + # project + identifier = local.identifier + owner = local.owner + project_name = local.project_name + domain = local.domain + zone = local.zone + skip_project_cert_generation = true + # access + key_name = local.key_name + key = local.key + username = local.username + admin_ip = local.runner_ip + # rke2 + rke2_version = local.rke2_version + local_file_path = local.local_file_path + install_method = "rpm" # rpm only for now, need to figure out local helm chart installs otherwise + cni = "canal" + node_configuration = { + "rancher" = { + type = "all-in-one" + size = "large" + os = local.os + indirect_access = true + initial = true + } + } + # rancher + rancher_version = local.rancher_version + cert_manager_version = local.cert_manager_version + configure_cert_manager = true + cert_manager_configuration = { + aws_access_key_id = local.aws_access_key_id + aws_secret_access_key = local.aws_secret_access_key + aws_session_token = local.aws_session_token + aws_region = local.aws_region + acme_email = local.email + acme_server_url = local.acme_server_url + } +} + +module "rke2_image" { + source = "rancher/server/aws" + version = "v1.4.0" + server_use_strategy = "skip" + image_use_strategy = "find" + image_type = local.os # this is not required to match Rancher, it just seemed easier in this example +} + +module "downstream_cluster" { + source = "./modules/downstream" + # general + name = local.cluster_name + identifier = local.identifier + owner = local.owner + domain = local.domain + zone = local.zone + kubeconfig_path = "${local.local_file_path}/kubeconfig" + # aws access + aws_access_key_id = local.aws_access_key_id + aws_secret_access_key = local.aws_secret_access_key + aws_session_token = trimspace(chomp(local.aws_session_token)) + aws_region = local.aws_region + aws_region_letter = replace( + module.rancher.subnets[keys(module.rancher.subnets)[0]].availability_zone, + local.aws_region, + "" + ) + # aws project info + vpc_id = module.rancher.vpc.id + security_group_id = module.rancher.security_group.id + load_balancer_security_groups = module.rancher.load_balancer_security_groups + subnet_id = module.rancher.subnets[keys(module.rancher.subnets)[0]].id + # node info + aws_instance_type = local.aws_instance_type + ami_id = module.rke2_image.image.id + ami_ssh_user = module.rke2_image.image.user + ami_admin_group = module.rke2_image.image.admin_group + node_count = local.node_count + direct_node_access = { + runner_ip = local.runner_ip + ssh_access_key = chomp(local.key) + ssh_access_user = local.project_name + } + # rke2 info + rke2_version = local.rke2_version + # rancher info + rancher_token = module.rancher.admin_token +} + +data "rancher2_cluster" "downstream_cluster" { + depends_on = [ + module.rancher, + module.rke2_image, + module.downstream_cluster, + ] + name = local.cluster_name +} + +resource "rancher2_namespace" "test" { + depends_on = [ + module.rancher, + module.rke2_image, + module.downstream_cluster, + data.rancher2_cluster.downstream_cluster, + ] + name = "test" + project_id = data.rancher2_cluster.downstream_cluster.default_project_id + description = "testing namespace" + resource_quota { + limit { + limits_cpu = "100m" + limits_memory = "100Mi" + requests_storage = "1Gi" + } + } + container_resource_limit { + limits_cpu = "20m" + limits_memory = "20Mi" + requests_cpu = "1m" + requests_memory = "1Mi" + } +} + +resource "local_file" "import_main" { + depends_on = [ + module.rancher, + module.rke2_image, + module.downstream_cluster, + data.rancher2_cluster.downstream_cluster, + rancher2_namespace.test, + ] + filename = "${local.tf_data_dir}/tf-rancher-imported/main.tf" + content = templatefile("${path.module}/modules/import/main.tftpl", { + cluster_id = module.downstream_cluster.cluster_id + namespace_id = "${local.project_id}.${rancher2_namespace.test.id}" + }) +} + +module "import" { + source = "./modules/deploy" + depends_on = [ + module.downstream_cluster, + local_file.import_main, + data.rancher2_cluster.downstream_cluster, + rancher2_namespace.test, + ] + deploy_path = "${local.tf_data_dir}/tf-rancher-imported" + data_path = local.tf_data_dir + template_files = [ + "${abspath(path.module)}/modules/import/cloud-config.tftpl", + "${abspath(path.module)}/modules/import/variables.tf", + "${abspath(path.module)}/modules/import/versions.tf", + ] + inputs = <<-EOT + cluster_name = "${local.cluster_name}" + rke2_version = "${local.rke2_version}" + node_count = "${local.node_count}" + rancher_key = "${module.rancher.admin_token}" + domain = "${local.domain}" + zone = "${local.zone}" + machine_config_kind = "${module.downstream_cluster.machine_config_kind}" + machine_config_name = "${module.downstream_cluster.machine_config_name}" + EOT + skip_destroy = true // this is for testing purposes, it prevents an issue where the imported resources destroy the API objects and the main resources error out on destroy (not found) + init = false // this is for testing purposes, it allow us to use dev overrides in the terraformrc to use the locally built binary rather than the registry provider +} diff --git a/examples/state_import/modules/deploy/main.tf b/examples/state_import/modules/deploy/main.tf new file mode 100644 index 000000000..197e61e4c --- /dev/null +++ b/examples/state_import/modules/deploy/main.tf @@ -0,0 +1,224 @@ +# There are many ways to orchestrate Terraform configurations with the goal of breaking it down +# I am using Terraform resources to orchestrate Terraform +# I felt this was the best way to accomplish the goal without incurring additional dependencies + +locals { + inputs = var.inputs + inputs_hash = md5(local.inputs) + template_path = var.template_path + template_files = var.template_files + # tflint-ignore: terraform_unused_declarations + fail_no_template = ((local.template_path == null && local.template_files == null) ? one([local.template_path, "missing_template"]) : false) + # tflint-ignore: terraform_unused_declarations + fail_too_much_template = ((local.template_path != null && local.template_files != null) ? one([local.template_path, "template_path_or_template_files"]) : false) + # template_file_list should be a list of absolute file paths (not relative) + template_file_list = ( + local.template_path != null ? + [ + for i in range(length(fileset(local.template_path, "**"))) : + join("/", [local.template_path, tolist(fileset(local.template_path, "**"))[i]]) + ] + : local.template_files + ) + template_files_hash = md5(join("-", [for f in local.template_file_list : filemd5(f)])) + template_file_list_basenames = [for f in local.template_file_list : basename(f)] + outputs = contains(local.template_file_list_basenames, "outputs.tf") + + deploy_path = chomp(var.deploy_path) + environment_variables = var.environment_variables + export_contents = ( + local.environment_variables != null ? + join(";", [for k, v in local.environment_variables : "export ${k}=${v}"]) + : "" + ) + export_hash = md5(local.export_contents) + attempts = var.attempts + interval = var.interval + timeout = var.timeout + init = var.init + init_script = (local.init ? "terraform init -upgrade" : "") + tf_data_dir = abspath(var.data_path != null ? var.data_path : path.root) + skip_destroy = (var.skip_destroy ? "true" : "") +} + +resource "terraform_data" "path" { + triggers_replace = { + inputs = local.inputs_hash + files = local.template_files_hash + env = local.export_hash + } + provisioner "local-exec" { + command = <<-EOT + install -d ${local.deploy_path} + EOT + } +} + +# keep the deploy config up to date with the template +resource "local_file" "copy_template" { + depends_on = [ + terraform_data.path + ] + for_each = toset(local.template_file_list) + content = file(each.key) + filename = "${local.deploy_path}/${basename(each.key)}" +} + +resource "local_file" "inputs" { + depends_on = [ + terraform_data.path, + local_file.copy_template, + ] + content = local.inputs + filename = "${local.deploy_path}/inputs.tfvars" +} + +resource "terraform_data" "destroy" { + depends_on = [ + terraform_data.path, + local_file.copy_template, + local_file.inputs, + ] + triggers_replace = { + inputs = local.inputs_hash + files = local.template_files_hash + env = local.export_hash + ec = local.export_contents + dp = local.deploy_path + to = local.timeout + dd = local.tf_data_dir + sd = local.skip_destroy + } + provisioner "local-exec" { + when = destroy + command = <<-EOT + ${self.triggers_replace.ec} + cd ${self.triggers_replace.dp} + export TF_DATA_DIR="${self.triggers_replace.dd}" + if [ -z "${self.triggers_replace.sd}" ]; then + timeout -k 1m ${self.triggers_replace.to} terraform destroy -var-file="${self.triggers_replace.dp}/inputs.tfvars" -auto-approve -state="${self.triggers_replace.dp}/tfstate" || true + else + echo "Not destroying deployed module, it will no longer be managed here." + fi + EOT + } +} + +resource "terraform_data" "create" { + depends_on = [ + terraform_data.path, + local_file.copy_template, + local_file.inputs, + terraform_data.destroy, + ] + triggers_replace = { + inputs = local.inputs_hash + files = local.template_files_hash + env = local.export_hash + } + provisioner "local-exec" { + command = <<-EOT + ${local.export_contents} + cd ${local.deploy_path} + export TF_DATA_DIR="${local.tf_data_dir}" + + ${local.init_script} + + MAX=${local.attempts} + EXITCODE=1 + ATTEMPTS=0 + E=1 + E1=0 + while [ $EXITCODE -gt 0 ] && [ $ATTEMPTS -lt $MAX ]; do + A=0 + while [ $E -gt 0 ] && [ $A -lt $MAX ]; do + timeout -k 1m ${local.timeout} terraform apply -var-file="${local.deploy_path}/inputs.tfvars" -auto-approve -state="${local.deploy_path}/tfstate" + E=$? + if [ $E -eq 124 ]; then echo "Apply timed out after ${local.timeout}"; fi + A=$((A+1)) + done + # don't destroy if the last attempt fails + if [ $E -gt 0 ] && [ $ATTEMPTS != $((MAX-1)) ]; then + A1=0 + while [ $E1 -gt 0 ] && [ $A1 -lt $MAX ]; do + timeout -k 1m ${local.timeout} terraform destroy -var-file="${local.deploy_path}/inputs.tfvars" -auto-approve -state="${local.deploy_path}/tfstate" + E1=$? + if [ $E1 -eq 124 ]; then echo "Apply timed out after ${local.timeout}"; fi + A1=$((A1+1)) + done + fi + if [ $E -gt 0 ]; then + echo "apply failed..." + fi + if [ $E1 -gt 0 ]; then + echo "destroy failed..." + fi + if [ $E -gt 0 ] || [ $E1 -gt 0 ]; then + EXITCODE=1 + else + EXITCODE=0 + fi + ATTEMPTS=$((ATTEMPTS+1)) + if [ $EXITCODE -gt 0 ] && [ $ATTEMPTS -lt $MAX ]; then + echo "wait ${local.interval} seconds between attempts..." + sleep ${local.interval} + fi + done + if [ $ATTEMPTS -eq $MAX ]; then echo "max attempts reached..."; fi + if [ $EXITCODE -ne 0 ]; then echo "failure, exit code $EXITCODE..."; fi + if [ $EXITCODE -eq 0 ]; then + echo "success..."; + terraform output -json -state="${local.deploy_path}/tfstate" > ${local.deploy_path}/outputs.json + fi + exit $EXITCODE + EOT + } +} + +data "local_file" "outputs" { + count = (local.outputs ? 1 : 0) + depends_on = [ + terraform_data.path, + local_file.copy_template, + local_file.inputs, + terraform_data.create, + ] + filename = "${local.deploy_path}/outputs.json" +} + +data "local_file" "state" { + depends_on = [ + terraform_data.path, + local_file.copy_template, + local_file.inputs, + terraform_data.create, + ] + filename = "${local.deploy_path}/tfstate" +} + +# this ensures that the parent state can hold the deployed modules outputs +# when you plan on a new server you will see the need to deploy the outputs file if it doesn't exist +resource "local_file" "outputs" { + count = (local.outputs ? 1 : 0) + depends_on = [ + terraform_data.path, + local_file.copy_template, + local_file.inputs, + terraform_data.create, + ] + content = data.local_file.outputs[0].content + filename = "${local.deploy_path}/outputs.json" +} + +# this ensures that the parent state can hold the deployed modules state +# when you plan on a new server you will see the need to deploy the state file if it doesn't exist +resource "local_file" "state_backup" { + depends_on = [ + terraform_data.path, + local_file.copy_template, + local_file.inputs, + terraform_data.create, + ] + content = data.local_file.state.content + filename = "${local.deploy_path}/tfstate.backup" +} diff --git a/examples/state_import/modules/deploy/outputs.tf b/examples/state_import/modules/deploy/outputs.tf new file mode 100644 index 000000000..96a097729 --- /dev/null +++ b/examples/state_import/modules/deploy/outputs.tf @@ -0,0 +1,14 @@ +output "outputs_json" { + value = try(local_file.outputs[0].content, "") +} + +output "output" { + value = try({ + for i in range(length(keys(jsondecode(local_file.outputs[0].content)))) : + keys(jsondecode(local_file.outputs[0].content))[i] => jsondecode(local_file.outputs[0].content)[keys(jsondecode(local_file.outputs[0].content))[i]].value + }, "") +} + +output "state" { + value = data.local_file.state +} diff --git a/examples/state_import/modules/deploy/variables.tf b/examples/state_import/modules/deploy/variables.tf new file mode 100644 index 000000000..ccee27c23 --- /dev/null +++ b/examples/state_import/modules/deploy/variables.tf @@ -0,0 +1,94 @@ +variable "inputs" { + type = string + description = <<-EOT + Contents of an inputs.tfvars file to save in the deployment path. + EOT + default = "" +} +variable "template_path" { + type = string + description = <<-EOT + Path to the module to deploy. + These files will be copied to the deploy path, not used directly. + This is optional, but one of template_path or template_files must be specified. + Only one of template_path or template_files can be specified. + EOT + default = null +} +variable "template_files" { + type = list(any) + description = <<-EOT + List of file paths that will be copied to the deploy path. + This is optional, but one of template_path or template_files must be specified. + Only one of template_path or template_files can be specified. + EOT + default = null +} +variable "deploy_path" { + type = string + description = <<-EOT + Path to preform deployment in, this will be Terraform's working directory. + EOT +} +variable "data_path" { + type = string + description = <<-EOT + Should match your TF_DATA_DIR environment variable. + This directory is used to stage all of the various files for your implementation. + If left null, this will match "path.root". + This should be a full path, not relative. + EOT + default = null +} +variable "environment_variables" { + type = map(any) + description = <<-EOT + Map of environment variables to set before running Terraform. + Key is the name and Value is the value of the variable. + We export this before running Terraform, eg. "export KEY_1=VARIABLE_1;export KEY_2=VARIABLE_2". + EOT + default = null +} +variable "attempts" { + type = number + description = <<-EOT + Number of attempts to deploy module. + Each time Terraform apply is run we check for a successful exit code, + if the exit code !=0 then we try again, up to the value set in this argument. + EOT + default = 3 +} +variable "interval" { + type = number + description = <<-EOT + A number of seconds to sleep between Terraform apply or destroy attempts. + EOT + default = 30 +} +variable "timeout" { + type = string + description = <<-EOT + A (linux coreutils) timeout DURATION string. + This will be used to kill the Terraform run in case there is an endless loop. + If this DURATION is reached a single TERM will be sent, then KILL 1 minute later. + EOT + default = "45m" +} +variable "init" { + type = bool + description = <<-EOT + Set to false to prevent running Terraform init. + This is helpful when testing a local bin version of the provider. + EOT + default = true +} +variable "skip_destroy" { + type = bool + description = <<-EOT + Set to true to ignore calls to destroy the deployed substate. + State and deploy path will still exist, this essentially divorces the parent from the child. + This only effects specifically calls to destroy the deploy module, not taint or recreate. + Be careful as this can leave objects in your API unmanaged by IAC. + EOT + default = false +} diff --git a/examples/state_import/modules/deploy/versions.tf b/examples/state_import/modules/deploy/versions.tf new file mode 100644 index 000000000..4988e385f --- /dev/null +++ b/examples/state_import/modules/deploy/versions.tf @@ -0,0 +1,9 @@ +terraform { + required_version = ">= 1.5.0" + required_providers { + local = { + source = "hashicorp/local" + version = ">= 2.5.2" + } + } +} diff --git a/examples/state_import/modules/downstream/addKeyToAmazonConfig.sh b/examples/state_import/modules/downstream/addKeyToAmazonConfig.sh new file mode 100755 index 000000000..37245ab37 --- /dev/null +++ b/examples/state_import/modules/downstream/addKeyToAmazonConfig.sh @@ -0,0 +1,58 @@ +#!/bin/sh + +ACCESS_KEY="$1" +SECRET_KEY="$2" + +if [ -z "$ACCESS_KEY" ]; then echo "need access key to proceed..."; exit 1; fi +if [ -z "$SECRET_KEY" ]; then echo "need secret key to proceed..."; exit 1; fi + +NAMESPACE="fleet-default" +NAMES=$(kubectl get Amazonec2Config -n "$NAMESPACE" -o jsonpath='{.items[*].metadata.name}') + +if [ -z "$NAMES" ]; then echo "names not found..."; exit 1; fi + +cat < patch.yaml +accessKey: '$ACCESS_KEY' +secretKey: '$SECRET_KEY' +EOT + +for name in $NAMES; do + + kubectl patch Amazonec2Config "$name" -n "$NAMESPACE" --type merge --patch-file patch.yaml + RESULT=$? + if [ $RESULT -ne 0 ]; then + echo "error: $RESULT" + exit $RESULT + fi + + KEY="$(kubectl get Amazonec2Config "$name" -n "$NAMESPACE" -o json | jq -r '.accessKey')" + + if [ -z "$KEY" ] || [ "null" = "$KEY" ]; then + echo "error: key not found on object" + exit 1 + else + if [ "$KEY" != "$ACCESS_KEY" ]; then + echo "error: key not replaced properly" + exit 1 + fi + echo "key replaced properly" + fi + + SECRET="$(kubectl get Amazonec2Config "$name" -n "$NAMESPACE" -o json | jq -r '.secretKey')" + + if [ -z "$SECRET" ] || [ "null" = "$SECRET" ]; then + echo "error: secret not found on object" + exit 1 + else + if [ "$SECRET" != "$SECRET_KEY" ]; then + echo "error: secret not replaced properly" + exit 1 + fi + echo "secret replaced properly" + fi + + echo "Amazonec2Config $name in namespace $NAMESPACE updated." + +done + +rm -f patch.yaml diff --git a/examples/state_import/modules/downstream/main.tf b/examples/state_import/modules/downstream/main.tf new file mode 100644 index 000000000..627b6be32 --- /dev/null +++ b/examples/state_import/modules/downstream/main.tf @@ -0,0 +1,303 @@ + +provider "rancher2" { + api_url = "https://${local.domain}.${local.zone}" + token_key = local.rancher_token + timeout = "300s" +} + +provider "aws" { + default_tags { + tags = { + Id = local.identifier + Owner = local.owner + } + } +} + +locals { + # general + identifier = var.identifier + owner = var.owner + cluster_name = var.name + zone = var.zone + domain = var.domain + kubeconfig_path = var.kubeconfig_path + # aws access + aws_access_key_id = var.aws_access_key_id + aws_secret_access_key = var.aws_secret_access_key + aws_session_token = var.aws_session_token + aws_region = var.aws_region + aws_region_letter = var.aws_region_letter + # networking info + vpc_id = var.vpc_id + subnet_id = var.subnet_id + security_group_id = var.security_group_id + lbsg = sort(var.load_balancer_security_groups) + load_balancer_security_group_id = [ + for i in range(length(local.lbsg)) : + local.lbsg[i] if local.lbsg[i] != local.security_group_id + # load balancers only have 2 security groups, the project and its own + # this eliminates the project security group to just return the load balancer's security group + ][0] + downstream_security_group_name = "${local.cluster_name}-sgroup" + # node info + aws_instance_type = var.aws_instance_type + ami_id = var.ami_id + ami_ssh_user = var.ami_ssh_user + node_count = var.node_count + node_ips = { for i in range(local.node_count) : tostring(i) => data.aws_instances.rke2_instance_nodes.public_ips[i] } + node_id = "${local.cluster_name}-nodes" + ami_admin_group = (var.ami_admin_group != "" ? var.ami_admin_group : "tty") + runner_ip = (var.direct_node_access != null ? var.direct_node_access.runner_ip : "10.1.1.1") # the IP running Terraform + ssh_access_key = (var.direct_node_access != null ? var.direct_node_access.ssh_access_key : "fake123abc") + ssh_access_user = (var.direct_node_access != null ? var.direct_node_access.ssh_access_user : "fake") + # rke2 info + rke2_version = var.rke2_version + rancher_token = var.rancher_token +} + +resource "aws_security_group" "downstream_cluster" { + description = "Access to downstream cluster" + name = local.downstream_security_group_name + vpc_id = local.vpc_id + tags = { + Name = local.downstream_security_group_name + } + lifecycle { + ignore_changes = [ + ingress, + egress, + ] + } +} +# this allows servers attached to the project security group to accept connections initiated by the downstream cluster +resource "aws_vpc_security_group_ingress_rule" "downstream_ingress_rancher" { + depends_on = [ + aws_security_group.downstream_cluster, + ] + referenced_security_group_id = aws_security_group.downstream_cluster.id + security_group_id = local.security_group_id + ip_protocol = "-1" +} +# this allows the load balancer to accept connections initiated by the downstream cluster +resource "aws_vpc_security_group_ingress_rule" "downstream_ingress_loadbalancer" { + depends_on = [ + aws_security_group.downstream_cluster, + ] + referenced_security_group_id = aws_security_group.downstream_cluster.id + security_group_id = local.load_balancer_security_group_id + ip_protocol = "-1" +} + +# this allows the downstream cluster to reach out to any public ipv4 address +resource "aws_vpc_security_group_egress_rule" "downstream_egress_ipv4" { + depends_on = [ + aws_security_group.downstream_cluster, + ] + ip_protocol = "-1" + cidr_ipv4 = "0.0.0.0/0" + security_group_id = aws_security_group.downstream_cluster.id +} +# this allows the downstream cluster to reach out to any public ipv6 address +resource "aws_vpc_security_group_egress_rule" "downstream_egress_ipv6" { + depends_on = [ + aws_security_group.downstream_cluster, + ] + ip_protocol = "-1" + cidr_ipv6 = "::/0" + security_group_id = aws_security_group.downstream_cluster.id +} +# this allows the downstream cluster to reach out to any server attached to the project security group +resource "aws_vpc_security_group_egress_rule" "downstream_egress_project_link" { + depends_on = [ + aws_security_group.downstream_cluster, + ] + referenced_security_group_id = local.security_group_id + security_group_id = aws_security_group.downstream_cluster.id + ip_protocol = "-1" +} +# this allows nodes to talk to each other +resource "aws_vpc_security_group_ingress_rule" "downstream_ingress_internal_ipv4" { + depends_on = [ + aws_security_group.downstream_cluster, + ] + ip_protocol = "-1" + cidr_ipv4 = "10.0.0.0/16" + security_group_id = aws_security_group.downstream_cluster.id +} +resource "rancher2_machine_config_v2" "all_in_one" { + depends_on = [ + aws_security_group.downstream_cluster, + aws_vpc_security_group_ingress_rule.downstream_ingress_rancher, + aws_vpc_security_group_egress_rule.downstream_egress_ipv4, + aws_vpc_security_group_egress_rule.downstream_egress_ipv6, + aws_vpc_security_group_egress_rule.downstream_egress_project_link, + ] + generate_name = local.cluster_name + amazonec2_config { + ami = local.ami_id + region = local.aws_region + security_group = [local.downstream_security_group_name] + subnet_id = local.subnet_id + vpc_id = local.vpc_id + zone = local.aws_region_letter + session_token = local.aws_session_token + instance_type = local.aws_instance_type + ssh_user = local.ami_ssh_user + tags = join(",", ["Id", local.identifier, "Owner", local.owner, "NodeId", local.node_id]) + userdata = <<-EOT + #cloud-config + + merge_how: + - name: list + settings: [replace] + - name: dict + settings: [replace] + + users: + - name: ${local.ssh_access_user} + gecos: ${local.ssh_access_user} + sudo: ALL=(ALL) NOPASSWD:ALL + groups: users, ${local.ami_admin_group} + lock_passwd: true + ssh_authorized_keys: + - ${local.ssh_access_key} + homedir: /home/${local.ssh_access_user} + EOT + } +} +resource "terraform_data" "patch_machine_configs" { + depends_on = [ + aws_security_group.downstream_cluster, + aws_vpc_security_group_ingress_rule.downstream_ingress_rancher, + aws_vpc_security_group_egress_rule.downstream_egress_ipv4, + aws_vpc_security_group_egress_rule.downstream_egress_ipv6, + aws_vpc_security_group_egress_rule.downstream_egress_project_link, + rancher2_machine_config_v2.all_in_one, + ] + triggers_replace = { + core_config = rancher2_machine_config_v2.all_in_one.id + } + provisioner "local-exec" { + command = <<-EOT + export KUBECONFIG=${local.kubeconfig_path}; + ${path.module}/addKeyToAmazonConfig.sh "${local.aws_access_key_id}" "${local.aws_secret_access_key}" + EOT + } +} + +resource "rancher2_cluster_v2" "rke2_cluster" { + depends_on = [ + aws_security_group.downstream_cluster, + aws_vpc_security_group_ingress_rule.downstream_ingress_rancher, + aws_vpc_security_group_egress_rule.downstream_egress_ipv4, + aws_vpc_security_group_egress_rule.downstream_egress_ipv6, + aws_vpc_security_group_egress_rule.downstream_egress_project_link, + rancher2_machine_config_v2.all_in_one, + terraform_data.patch_machine_configs, + ] + name = local.cluster_name + kubernetes_version = local.rke2_version + enable_network_policy = true + rke_config { + machine_pools { + name = local.cluster_name + control_plane_role = true + etcd_role = true + worker_role = true + quantity = local.node_count + machine_config { + kind = rancher2_machine_config_v2.all_in_one.kind + name = rancher2_machine_config_v2.all_in_one.name + } + } + } + timeouts { + create = "120m" + } +} + +resource "time_sleep" "wait_for_nodes" { + depends_on = [ + aws_security_group.downstream_cluster, + aws_vpc_security_group_ingress_rule.downstream_ingress_rancher, + aws_vpc_security_group_egress_rule.downstream_egress_ipv4, + aws_vpc_security_group_egress_rule.downstream_egress_ipv6, + aws_vpc_security_group_egress_rule.downstream_egress_project_link, + rancher2_machine_config_v2.all_in_one, + terraform_data.patch_machine_configs, + ] + create_duration = "120s" +} + +data "aws_instances" "rke2_instance_nodes" { + depends_on = [ + aws_security_group.downstream_cluster, + aws_vpc_security_group_ingress_rule.downstream_ingress_rancher, + aws_vpc_security_group_egress_rule.downstream_egress_ipv4, + aws_vpc_security_group_egress_rule.downstream_egress_ipv6, + aws_vpc_security_group_egress_rule.downstream_egress_project_link, + rancher2_machine_config_v2.all_in_one, + terraform_data.patch_machine_configs, + time_sleep.wait_for_nodes, + ] + filter { + name = "tag:NodeId" + values = [local.node_id] + } +} + +# this allows the load balancer to accept connections initiated by the downstream cluster's public ip addresses +# this weird in-flight grab of the nodes and manipulating the security groups is not good, +# but the only way to allow ingress when the downstream cluster has public IPs +# FYI: security group references only work with private IPs +resource "aws_vpc_security_group_ingress_rule" "downstream_public_ingress_loadbalancer" { + depends_on = [ + aws_security_group.downstream_cluster, + aws_vpc_security_group_ingress_rule.downstream_ingress_rancher, + aws_vpc_security_group_egress_rule.downstream_egress_ipv4, + aws_vpc_security_group_egress_rule.downstream_egress_ipv6, + aws_vpc_security_group_egress_rule.downstream_egress_project_link, + rancher2_machine_config_v2.all_in_one, + terraform_data.patch_machine_configs, + time_sleep.wait_for_nodes, + data.aws_instances.rke2_instance_nodes, + ] + for_each = local.node_ips + security_group_id = local.load_balancer_security_group_id + ip_protocol = "-1" + cidr_ipv4 = "${each.value}/32" +} + +resource "aws_vpc_security_group_ingress_rule" "downstream_public_ingress_runner" { + depends_on = [ + aws_security_group.downstream_cluster, + aws_vpc_security_group_ingress_rule.downstream_ingress_rancher, + aws_vpc_security_group_egress_rule.downstream_egress_ipv4, + aws_vpc_security_group_egress_rule.downstream_egress_ipv6, + aws_vpc_security_group_egress_rule.downstream_egress_project_link, + rancher2_machine_config_v2.all_in_one, + terraform_data.patch_machine_configs, + time_sleep.wait_for_nodes, + data.aws_instances.rke2_instance_nodes, + ] + security_group_id = aws_security_group.downstream_cluster.id + ip_protocol = "tcp" + from_port = 22 + to_port = 22 + cidr_ipv4 = "${local.runner_ip}/32" +} + +resource "rancher2_cluster_sync" "sync" { + depends_on = [ + aws_security_group.downstream_cluster, + aws_vpc_security_group_ingress_rule.downstream_ingress_rancher, + aws_vpc_security_group_egress_rule.downstream_egress_ipv4, + aws_vpc_security_group_egress_rule.downstream_egress_ipv6, + aws_vpc_security_group_egress_rule.downstream_egress_project_link, + rancher2_machine_config_v2.all_in_one, + terraform_data.patch_machine_configs, + rancher2_cluster_v2.rke2_cluster, + ] + cluster_id = rancher2_cluster_v2.rke2_cluster.cluster_v1_id +} diff --git a/examples/state_import/modules/downstream/outputs.tf b/examples/state_import/modules/downstream/outputs.tf new file mode 100644 index 000000000..dbb0a1e56 --- /dev/null +++ b/examples/state_import/modules/downstream/outputs.tf @@ -0,0 +1,23 @@ +output "cluster_id" { + value = rancher2_cluster_v2.rke2_cluster.id +} + +output "cluster_v1_id" { + value = rancher2_cluster_v2.rke2_cluster.cluster_v1_id +} + +output "machine_config_kind" { + value = rancher2_machine_config_v2.all_in_one.kind +} + +output "machine_config_name" { + value = rancher2_machine_config_v2.all_in_one.name +} + +output "downstream_security_group" { + value = local.downstream_security_group_name +} + +output "node_id" { + value = local.node_id +} diff --git a/examples/state_import/modules/downstream/variables.tf b/examples/state_import/modules/downstream/variables.tf new file mode 100644 index 000000000..165100ea2 --- /dev/null +++ b/examples/state_import/modules/downstream/variables.tf @@ -0,0 +1,160 @@ + +variable "name" { + type = string + description = <<-EOT + The name to give the new downstream cluster. + EOT +} +variable "identifier" { + type = string + description = <<-EOT + A unique string that identifies the project as a whole. + EOT +} +variable "owner" { + type = string + description = <<-EOT + A string to identify the "owner" of the resource, this is added as a tag to all resources. + This is important both for finding resources in the future and removing them in case of problems. + EOT +} +variable "zone" { + type = string + description = <<-EOT + The DNS domain zone to add the new Rancher cluster to. + eg. "example.com" in "rancher.example.com" + EOT +} +variable "domain" { + type = string + description = <<-EOT + The DNS domain name to add the new Rancher cluster as in the zone. + eg. "rancher" in "rancher.example.com" + EOT +} +variable "aws_access_key_id" { + type = string + description = <<-EOT + The AWS access key to use to deploy the machines. + We suggest using temporary credentials. + EOT +} +variable "aws_secret_access_key" { + type = string + description = <<-EOT + The AWS secret key to use to deploy the machines. + We suggest using temporary credentials. + EOT +} +variable "aws_session_token" { + type = string + description = <<-EOT + The AWS session token to use to deploy the machines. + While this is not required we suggest using it. + EOT + default = "" +} +variable "aws_region" { + type = string + description = <<-EOT + The AWS region name to deploy to. + EOT +} +variable "aws_region_letter" { + type = string + description = <<-EOT + The letter of the availability zone from the full region name. + Eg. in AZ "us-west-2a", this would be "a", in "us-west-2b" this would be "b". + EOT +} +variable "vpc_id" { + type = string + description = <<-EOT + The id of the VPC where the cluster will be deployed. + Currently this expects the same VPC as the Rancher cluster. + EOT +} +variable "security_group_id" { + type = string + description = <<-EOT + The id of the security group to add the downstream cluster's security group to. + EOT +} + +variable "load_balancer_security_groups" { + type = list(any) + description = <<-EOT + The security groups for the load balancer. + EOT +} +variable "subnet_id" { + type = string + description = <<-EOT + The id of the subnet to deploy the cluster to. + Currently we only support deploying to the same subnet as the Rancher cluster. + EOT +} +variable "aws_instance_type" { + type = string + description = <<-EOT + The AWS instance type to deploy. + EOT +} +variable "ami_id" { + type = string + description = <<-EOT + The AWS id for the OS image to deploy the cluster on. + EOT +} +variable "ami_ssh_user" { + type = string + description = <<-EOT + The ssh user that is default for the image, the deployment will replace this user. + EOT +} +variable "ami_admin_group" { + type = string + description = <<-EOT + The group name for the root/admin user. + Usually "wheel" or "admin". + EOT +} +variable "node_count" { + type = number + description = <<-EOT + The number of all in one nodes to deploy. + EOT +} +variable "direct_node_access" { + type = object({ + runner_ip = string + ssh_access_key = string + ssh_access_user = string + }) + description = <<-EOT + This object has the information to give the computer running Terraform (runner) the ability to directly access the nodes provisioned by Rancher. + When left in the default state no access will be given. + This requires the downstream cluster to have public IP addresses. + EOT + default = null +} +variable "rke2_version" { + type = string + description = <<-EOT + The RKE2 version to deploy. + This module only supports RKE2. + EOT +} +variable "rancher_token" { + type = string + description = <<-EOT + The Rancher authentication token to use. + EOT +} +variable "kubeconfig_path" { + type = string + description = <<-EOT + The path to the cluster's kubeconfig file. + This is used to patch the machine config to allow for temporary credentials. + EOT +} diff --git a/examples/state_import/modules/downstream/versions.tf b/examples/state_import/modules/downstream/versions.tf new file mode 100644 index 000000000..2539bf8db --- /dev/null +++ b/examples/state_import/modules/downstream/versions.tf @@ -0,0 +1,17 @@ +terraform { + required_version = ">= 1.5.0" + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 5.11" + } + rancher2 = { + source = "rancher/rancher2" + version = ">= 5.0.0" + } + time = { + source = "hashicorp/time" + version = ">= 0.12.0" + } + } +} diff --git a/examples/state_import/modules/import/cloud-config.tftpl b/examples/state_import/modules/import/cloud-config.tftpl new file mode 100644 index 000000000..89bd824de --- /dev/null +++ b/examples/state_import/modules/import/cloud-config.tftpl @@ -0,0 +1,17 @@ +#cloud-config + +merge_how: + - name: list + settings: [replace] + - name: dict + settings: [replace] + +users: + - name: ${ssh_access_user} + gecos: ${ssh_access_user} + sudo: ALL=(ALL) NOPASSWD:ALL + groups: users, ${ami_admin_group} + lock_passwd: true + ssh_authorized_keys: + - ${ssh_access_key} + homedir: /home/${ssh_access_user} diff --git a/examples/state_import/modules/import/main.tftpl b/examples/state_import/modules/import/main.tftpl new file mode 100644 index 000000000..1cee2c8cc --- /dev/null +++ b/examples/state_import/modules/import/main.tftpl @@ -0,0 +1,74 @@ + +provider "rancher2" { + api_url = "https://$${local.domain}.$${local.zone}" + token_key = local.rancher_key + timeout = "300s" +} + +locals { + zone = var.zone + domain = var.domain + rancher_key = var.rancher_key + rke2_version = var.rke2_version + cluster_name = var.cluster_name + node_count = var.node_count + machine_config_name = var.machine_config_name + machine_config_kind = var.machine_config_kind +} + +import { + to = rancher2_cluster_v2.rke2_cluster + id = "${cluster_id}" +} +resource "rancher2_cluster_v2" "rke2_cluster" { + name = local.cluster_name + kubernetes_version = local.rke2_version + enable_network_policy = true + rke_config { + machine_pools { + name = local.cluster_name + control_plane_role = true + etcd_role = true + worker_role = true + quantity = local.node_count + machine_config { + kind = local.machine_config_kind + name = local.machine_config_name + } + } + } + timeouts { + create = "120m" + } +} + +data "rancher2_cluster" "downstream_cluster" { + name = local.cluster_name +} + +import { + to = rancher2_namespace.imported + id = "${namespace_id}" +} +resource "rancher2_namespace" "imported" { + depends_on = [ + data.rancher2_cluster.downstream_cluster, + ] + name = "test" + project_id = data.rancher2_cluster.downstream_cluster.default_project_id + description = "testing namespace" + wait_for_cluster = false + resource_quota { + limit { + limits_cpu = "100m" + limits_memory = "100Mi" + requests_storage = "1Gi" + } + } + container_resource_limit { + limits_cpu = "20m" + limits_memory = "20Mi" + requests_cpu = "1m" + requests_memory = "1Mi" + } +} diff --git a/examples/state_import/modules/import/outputs.tf b/examples/state_import/modules/import/outputs.tf new file mode 100644 index 000000000..40b8b9ebc --- /dev/null +++ b/examples/state_import/modules/import/outputs.tf @@ -0,0 +1,3 @@ +output "project_id" { + value = data.rancher2_cluster.downstream_cluster.default_project_id +} diff --git a/examples/state_import/modules/import/variables.tf b/examples/state_import/modules/import/variables.tf new file mode 100644 index 000000000..f80ed03a5 --- /dev/null +++ b/examples/state_import/modules/import/variables.tf @@ -0,0 +1,24 @@ +variable "cluster_name" { + type = string +} +variable "rke2_version" { + type = string +} +variable "node_count" { + type = number +} +variable "rancher_key" { + type = string +} +variable "domain" { + type = string +} +variable "zone" { + type = string +} +variable "machine_config_kind" { + type = string +} +variable "machine_config_name" { + type = string +} diff --git a/examples/state_import/modules/import/versions.tf b/examples/state_import/modules/import/versions.tf new file mode 100644 index 000000000..8d5dbe837 --- /dev/null +++ b/examples/state_import/modules/import/versions.tf @@ -0,0 +1,9 @@ +terraform { + required_version = ">= 1.5.0" + required_providers { + rancher2 = { + source = "rancher/rancher2" + version = ">= 6.0.0" + } + } +} diff --git a/examples/state_import/variables.tf b/examples/state_import/variables.tf new file mode 100644 index 000000000..eb4716168 --- /dev/null +++ b/examples/state_import/variables.tf @@ -0,0 +1,102 @@ +variable "key_name" { + type = string + description = <<-EOT + The name of an AWS key pair to use for SSH access to the instance. + This key should already be added to your ssh agent for server authentication. + EOT +} +variable "key" { + type = string + description = <<-EOT + The contents of an AWS key pair's public key to use for SSH access to the instance. + EOT +} +variable "identifier" { + type = string + description = <<-EOT + A unique identifier for the project, this helps when generating names for infrastructure items." + EOT +} +variable "owner" { + type = string + description = <<-EOT + The owner of the project, this helps when generating names for infrastructure items." + EOT +} +variable "zone" { + type = string + description = <<-EOT + The Route53 DNS zone to deploy the cluster into. + This is used to generate the DNS name for the cluster. + The zone must already exist. + EOT +} +variable "rke2_version" { + type = string + description = <<-EOT + The version of rke2 to install on the nodes. + eg. v1.30.2+rke2r1 + EOT +} +variable "rancher_version" { + type = string + description = <<-EOT + The version of rancher to install on the rke2 cluster. + EOT + default = "2.9.2" +} +variable "file_path" { + type = string + description = <<-EOT + The path to the file containing the rke2 install script. + EOT + default = "./rke2" +} +variable "aws_access_key_id" { + type = string + description = <<-EOT + AWS access key ID. + EOT + sensitive = true +} +variable "aws_secret_access_key" { + type = string + description = <<-EOT + AWS secret key for EC2 services. + EOT + sensitive = true +} +variable "aws_session_token" { + type = string + description = <<-EOT + AWS session token for EC2 services. + If left empty the AWS provider will assume you are using permanent AWS credentials. + EOT + sensitive = true + default = "" +} +variable "aws_region" { + type = string + description = <<-EOT + AWS region EC2 services. + EOT + sensitive = true +} +variable "email" { + type = string + description = <<-EOT + Email used for TLS certification registration. + If left blank this will be @. + EOT + default = "" +} +variable "data_path" { + type = string + description = <<-EOT + Should match your TF_DATA_DIR environment variable. + This directory is used to stage all of the various files for your implementation. + If left null, this will match "path.root". + This should be a full file path, not relative. + EOT + default = null +} diff --git a/examples/state_import/versions.tf b/examples/state_import/versions.tf new file mode 100644 index 000000000..d3d093a17 --- /dev/null +++ b/examples/state_import/versions.tf @@ -0,0 +1,53 @@ +terraform { + required_version = ">= 1.5.0" + required_providers { + local = { + source = "hashicorp/local" + version = ">= 2.4" + } + random = { + source = "hashicorp/random" + version = ">= 3.5.1" + } + github = { + source = "integrations/github" + version = ">= 5.44" + } + aws = { + source = "hashicorp/aws" + version = ">= 5.11" + } + http = { + source = "hashicorp/http" + version = ">= 3.4" + } + null = { + source = "hashicorp/null" + version = ">= 3" + } + tls = { + source = "hashicorp/tls" + version = ">= 4.0" + } + acme = { + source = "vancluever/acme" + version = ">= 2.0" + } + cloudinit = { + source = "hashicorp/cloudinit" + version = ">= 2.3.3" + } + helm = { + source = "hashicorp/helm" + version = ">= 2.14" + } + rancher2 = { + source = "rancher/rancher2" + version = ">= 5.0.0" + } + kubernetes = { + source = "hashicorp/kubernetes" + version = ">= 2.31.0" + } + } +} diff --git a/flake.lock b/flake.lock index 5d42af72d..e412b031e 100644 --- a/flake.lock +++ b/flake.lock @@ -20,11 +20,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1744868846, - "narHash": "sha256-5RJTdUHDmj12Qsv7XOhuospjAjATNiTMElplWnJE9Hs=", + "lastModified": 1747728033, + "narHash": "sha256-NnXFQu7g4LnvPIPfJmBuZF7LFy/fey2g2+LCzjQhTUk=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "ebe4301cbd8f81c4f8d3244b3632338bbeb6d49c", + "rev": "2f9173bde1d3fbf1ad26ff6d52f952f9e9da52ea", "type": "github" }, "original": { diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh index cc5ffd78b..5865c4e43 100755 --- a/scripts/run_tests.sh +++ b/scripts/run_tests.sh @@ -13,23 +13,29 @@ rerun_failed=false specific_test="" specific_package="" skip_build=false +cleanup_id="" -while getopts ":r:t:p:s:" opt; do +while getopts ":r:t:p:s:c:" opt; do case $opt in r) rerun_failed=true ;; t) specific_test="$OPTARG" ;; p) specific_package="$OPTARG" ;; s) skip_build=true ;; + c) cleanup_id="$OPTARG" ;; \?) cat <&2 && exit 1 ;; Invalid option -$OPTARG, valid options are - -r to re run failed tests + -r to re-run failed tests, only used when running multiple tests -t to specify a specific test (eg. TestBase) -p to specify a specific test package (eg. base) -s to skip building the provider binary and test using a released version + -c to run clean up only with the given id (eg. abc123) EOT esac done +if [ -n "$cleanup_id" ]; then + export IDENTIFIER="$cleanup_id" +fi run_tests() { local rerun=$1 @@ -40,25 +46,20 @@ run_tests() { if [ "false" = "$SKIP_BUILD" ]; then install -d "$REPO_ROOT/.terraform" || true touch "$REPO_ROOT/.terraform/terraformrc" || true + export TF_CLI_CONFIG_FILE="$REPO_ROOT/.terraform/terraformrc" cat < "$REPO_ROOT/.terraform/terraformrc" provider_installation { dev_overrides { "rancher/rancher2" = "$REPO_ROOT/bin" } + direct { + exclude = [] + } } EOF fi - # Find the tests directory - TEST_DIR="" - if [ -d "test" ]; then - TEST_DIR="test" - elif [ -d "test/tests" ]; then - TEST_DIR="test/tests" - else - echo "Error: Unable to find tests directory" >&2 - exit 1 - fi + TEST_DIR="$REPO_ROOT/test" echo "" > "/tmp/${IDENTIFIER}_test.log" rm -f "/tmp/${IDENTIFIER}_failed_tests.txt" @@ -97,14 +98,14 @@ EOF if [ -n "$specific_package" ]; then package_pattern="$specific_package" else - package_pattern="..." + package_pattern="/..." fi # shellcheck disable=SC2086 gotestsum \ --format=standard-verbose \ --jsonfile "/tmp/${IDENTIFIER}_test.log" \ --post-run-command "sh /tmp/${IDENTIFIER}_test-processor" \ - --packages "$REPO_ROOT/$TEST_DIR/$package_pattern" \ + --packages "$TEST_DIR$package_pattern" \ -- \ -parallel=2 \ -count=1 \ @@ -125,27 +126,42 @@ if [ -z "$IDENTIFIER" ]; then export IDENTIFIER fi echo "id is: $IDENTIFIER..." + if [ -z "$GITHUB_TOKEN" ]; then echo "GITHUB_TOKEN isn't set"; else echo "GITHUB_TOKEN is set"; fi if [ -z "$GITHUB_OWNER" ]; then echo "GITHUB_OWNER isn't set"; else echo "GITHUB_OWNER is set"; fi if [ -z "$ZONE" ]; then echo "ZONE isn't set"; else echo "ZONE is set"; fi echo 'if tmp directory is missing, try restarting dev environment' -if [ "false" = "$skip_build" ]; then - echo 'building...' - $REPO_ROOT/scripts/gobuild.sh - export SKIP_BUILD="false" -else - echo "skipping build..." - export SKIP_BUILD="true" -fi +if [ -z "$cleanup_id" ]; then + if [ "false" = "$skip_build" ]; then + echo 'building...' + $REPO_ROOT/scripts/gobuild.sh + export SKIP_BUILD="false" + else + echo "skipping build..." + export SKIP_BUILD="true" + fi -# Run tests initially -run_tests false + # Test if tests can compile + echo "checking tests for compile errors..." + cd "$REPO_ROOT/test" + if ! go mod tidy; then C=$?; echo "failed to tidy, exit code $C"; exit $C; fi + + for file in $(find $REPO_ROOT/test -name '*.go'); do + echo "found $file"; + if ! go test -c "$file" -o "$file.test"; then C=$?; echo "failed to compile $file, exit code $C"; exit $C; fi + rm -rf "$file.test" + done + echo "compile checks passed..." -# Check if we need to rerun failed tests -if [ "$rerun_failed" = true ] && [ -f "/tmp/${IDENTIFIER}_failed_tests.txt" ]; then - echo "Rerunning failed tests..." - run_tests true + # Run tests initially + run_tests false + + # Check if we need to rerun failed tests + if [ "$rerun_failed" = true ] && [ -f "/tmp/${IDENTIFIER}_failed_tests.txt" ]; then + echo "Rerunning failed tests..." + run_tests true + fi fi echo "Clearing leftovers with Id $IDENTIFIER in $AWS_REGION..." @@ -166,8 +182,8 @@ if [ -n "$IDENTIFIER" ]; then attempts=0 # shellcheck disable=SC2143 - while [ -n "$(leftovers -d --iaas=aws --aws-region="$AWS_REGION" --type="ec2-key-pair" --filter="tf-$IDENTIFIER" | grep -v 'AccessDenied')" ] && [ $attempts -lt 3 ]; do - leftovers --iaas=aws --aws-region="$AWS_REGION" --type="ec2-key-pair" --filter="tf-$IDENTIFIER" --no-confirm | grep -v 'AccessDenied' || true + while [ -n "$(leftovers -d --iaas=aws --aws-region="$AWS_REGION" --type="ec2-key-pair" --filter="terraform-ci-$IDENTIFIER" | grep -v 'AccessDenied')" ] && [ $attempts -lt 3 ]; do + leftovers --iaas=aws --aws-region="$AWS_REGION" --type="ec2-key-pair" --filter="terraform-ci-$IDENTIFIER" --no-confirm | grep -v 'AccessDenied' || true sleep 10 attempts=$((attempts + 1)) done @@ -177,6 +193,19 @@ if [ -n "$IDENTIFIER" ]; then fi fi +# attempts=0 +# # shellcheck disable=SC2143 +# while [ -n "$(leftovers -d --iaas=aws --aws-region="$AWS_REGION" --type="elbv2-target-group" --filter="tf-" | grep -v 'AccessDenied')" ] && [ $attempts -lt 3 ]; do +# leftovers --iaas=aws --aws-region="$AWS_REGION" --type="elbv2-target-group" --filter="tf-" --no-confirm | grep -v 'AccessDenied' || true +# sleep 10 +# attempts=$((attempts + 1)) +# done + +# if [ $attempts -eq 3 ]; then +# echo "Warning: Failed to clear all EC2 key pairs after 3 attempts." +# fi +# fi + if [ -f "/tmp/${IDENTIFIER}_failed_tests.txt" ]; then echo "done, test failed" EXIT=1 diff --git a/test/downstream/downstream_test.go b/test/downstream/downstream_test.go index 0d7ce6a37..1d29fc4a7 100644 --- a/test/downstream/downstream_test.go +++ b/test/downstream/downstream_test.go @@ -12,18 +12,16 @@ import ( util "github.com/rancher/terraform-provider-rancher2/test" ) -// This test makes sure we can configure Rancher to deploy a downstream rke2 cluster on AWS. -func TestDownstream(t *testing.T) { +func TestDownstreamBasic(t *testing.T) { t.Parallel() id := util.GetId() region := util.GetRegion() accessKey := util.GetAwsAccessKey() secretKey := util.GetAwsSecretKey() sessionToken := util.GetAwsSessionToken() - directory := "downstream" + directory := "deploy_rke2" owner := "terraform-ci@suse.com" util.SetAcmeServer() - build := util.GetBuild() repoRoot, err := filepath.Abs(g.GetRepoRoot(t)) if err != nil { @@ -43,11 +41,16 @@ func TestDownstream(t *testing.T) { os.RemoveAll(testDir) t.Fatalf("Error creating test key pair: %s", err) } + err = os.WriteFile(testDir+"/id_rsa", []byte(keyPair.KeyPair.PrivateKey), 0600) + if err != nil { + os.RemoveAll(testDir) + t.Fatalf("Error creating test key pair: %s", err) + } sshAgent := ssh.SshAgentWithKeyPair(t, keyPair.KeyPair) t.Logf("Key %s created and added to agent", keyPair.Name) // use oldest RKE2, remember it releases much more than Rancher - _, _, rke2Version, err := util.GetReleases("rancher", "rke2") + _, _, rke2Version, err := util.GetRke2Releases() if err != nil { os.RemoveAll(testDir) aws.DeleteEC2KeyPair(t, keyPair) @@ -55,8 +58,12 @@ func TestDownstream(t *testing.T) { t.Fatalf("Error getting Rke2 release version: %s", err) } - // use latest Rancher, due to community patch issue - rancherVersion, _, _, err := util.GetReleases("rancher", "rancher") + rancherVersion := os.Getenv("RANCHER_VERSION") + if rancherVersion == "" { + // use stable version if not specified + // using stable prevents problems where the Rancher provider hasn't released to fit the latest Rancher + _, rancherVersion, _, err = util.GetRancherReleases() + } if err != nil { os.RemoveAll(testDir) aws.DeleteEC2KeyPair(t, keyPair) @@ -68,31 +75,31 @@ func TestDownstream(t *testing.T) { TerraformDir: exampleDir, // Variables to pass to our Terraform code using -var options Vars: map[string]interface{}{ - "identifier": id, - "owner": owner, - "key_name": keyPair.Name, - "key": keyPair.KeyPair.PublicKey, - "zone": os.Getenv("ZONE"), - "rke2_version": rke2Version, - "rancher_version": rancherVersion, - "file_path": testDir, - "aws_region": region, + "identifier": id, + "owner": owner, + "key_name": keyPair.Name, + "key": keyPair.KeyPair.PublicKey, + "zone": os.Getenv("ZONE"), + "rke2_version": rke2Version, + "rancher_version": rancherVersion, + "file_path": testDir, + "aws_access_key_id": accessKey, + "aws_secret_access_key": secretKey, + "aws_session_token": sessionToken, + "aws_region": region, }, // Environment variables to set when running Terraform EnvVars: map[string]string{ - "AWS_DEFAULT_REGION": region, - "AWS_REGION": region, - "AWS_ACCESS_KEY_ID": accessKey, - "AWS_SECRET_ACCESS_KEY": secretKey, - "AWS_SESSION_TOKEN": sessionToken, - // Adding AWS and kubeconfig as environment variables so that they are not displayed in the log + "AWS_DEFAULT_REGION": region, + "AWS_REGION": region, + "TF_DATA_DIR": testDir, + "TF_IN_AUTOMATION": "1", + "KUBECONFIG": testDir + "/kubeconfig", + "KUBE_CONFIG_PATH": testDir, + // Adding these as environment variables so that they are not displayed in the log "TF_VAR_aws_access_key_id": accessKey, "TF_VAR_aws_secret_access_key": secretKey, "TF_VAR_aws_session_token": sessionToken, - "KUBECONFIG": testDir + "/kubeconfig", - "KUBE_CONFIG_PATH": testDir, - "TF_DATA_DIR": testDir, - "TF_IN_AUTOMATION": "1", "TF_CLI_ARGS_plan": "-no-color -state=" + testDir + "/tfstate", "TF_CLI_ARGS_apply": "-no-color -state=" + testDir + "/tfstate", "TF_CLI_ARGS_destroy": "-no-color -state=" + testDir + "/tfstate", @@ -104,32 +111,264 @@ func TestDownstream(t *testing.T) { Upgrade: true, }) - _, err = terraform.InitE(t, terraformOptions) + _, err = terraform.InitAndApplyE(t, terraformOptions) if err != nil { + t.Log("Test failed, tearing down...") + util.GetErrorLogs(t, testDir+"/kubeconfig") util.Teardown(t, testDir, terraformOptions, keyPair) os.Remove(exampleDir + ".terraform.lock.hcl") sshAgent.Stop() t.Fatalf("Error creating cluster: %s", err) } - - // after initializing the other providers override the rancher provider with the built binary - if build { - t.Log("using the prebuilt rancher provider...") - terraformOptions.EnvVars["TF_CLI_CONFIG_FILE"] = repoRoot + "/.terraform/terraformrc" + util.CheckReady(t, testDir+"/kubeconfig") + util.CheckRunning(t, testDir+"/kubeconfig") + if t.Failed() { + t.Log("Test failed...") } else { - t.Log("not using the prebuilt rancher provider...") + t.Log("Test passed...") } + util.Teardown(t, testDir, terraformOptions, keyPair) + os.Remove(exampleDir + "/.terraform.lock.hcl") + sshAgent.Stop() +} - _, err = terraform.ApplyE(t, terraformOptions) +func TestDownstreamRoles(t *testing.T) { + t.Parallel() + id := util.GetId() + region := util.GetRegion() + accessKey := util.GetAwsAccessKey() + secretKey := util.GetAwsSecretKey() + sessionToken := util.GetAwsSessionToken() + directory := "deploy_rke2_multiple_pools" + owner := "terraform-ci@suse.com" + util.SetAcmeServer() + + repoRoot, err := filepath.Abs(g.GetRepoRoot(t)) if err != nil { + t.Fatalf("Error getting git root directory: %v", err) + } + + exampleDir := repoRoot + "/examples/" + directory + testDir := repoRoot + "/test/data/" + id + + err = util.CreateTestDirectories(t, id) + if err != nil { + os.RemoveAll(testDir) + t.Fatalf("Error creating test data directories: %s", err) + } + keyPair, err := util.CreateKeypair(t, region, owner, id) + if err != nil { + os.RemoveAll(testDir) + t.Fatalf("Error creating test key pair: %s", err) + } + + err = os.WriteFile(testDir+"/id_rsa", []byte(keyPair.KeyPair.PrivateKey), 0600) + if err != nil { + os.RemoveAll(testDir) + t.Fatalf("Error creating test key pair: %s", err) + } + + sshAgent := ssh.SshAgentWithKeyPair(t, keyPair.KeyPair) + t.Logf("Key %s created and added to agent", keyPair.Name) + + // use oldest RKE2, remember it releases much more than Rancher + _, _, rke2Version, err := util.GetRke2Releases() + if err != nil { + os.RemoveAll(testDir) + aws.DeleteEC2KeyPair(t, keyPair) + sshAgent.Stop() + t.Fatalf("Error getting Rke2 release version: %s", err) + } + + rancherVersion := os.Getenv("RANCHER_VERSION") + if rancherVersion == "" { + // use stable version if not specified + // using stable prevents problems where the Rancher provider hasn't released to fit the latest Rancher + _, rancherVersion, _, err = util.GetRancherReleases() + } + if err != nil { + os.RemoveAll(testDir) + aws.DeleteEC2KeyPair(t, keyPair) + sshAgent.Stop() + t.Fatalf("Error getting Rancher release version: %s", err) + } + + terraformOptions := terraform.WithDefaultRetryableErrors(t, &terraform.Options{ + TerraformDir: exampleDir, + // Variables to pass to our Terraform code using -var options + Vars: map[string]interface{}{ + "identifier": id, + "owner": owner, + "key_name": keyPair.Name, + "key": keyPair.KeyPair.PublicKey, + "zone": os.Getenv("ZONE"), + "rke2_version": rke2Version, + "rancher_version": rancherVersion, + "file_path": testDir, + "aws_access_key_id": accessKey, + "aws_secret_access_key": secretKey, + "aws_session_token": sessionToken, + "aws_region": region, + }, + // Environment variables to set when running Terraform + EnvVars: map[string]string{ + "AWS_DEFAULT_REGION": region, + "AWS_REGION": region, + "TF_DATA_DIR": testDir, + "TF_IN_AUTOMATION": "1", + "KUBECONFIG": testDir + "/kubeconfig", + "KUBE_CONFIG_PATH": testDir, + "TF_CLI_ARGS_plan": "-no-color -state=" + testDir + "/tfstate", + "TF_CLI_ARGS_apply": "-no-color -state=" + testDir + "/tfstate", + "TF_CLI_ARGS_destroy": "-no-color -state=" + testDir + "/tfstate", + "TF_CLI_ARGS_output": "-no-color -state=" + testDir + "/tfstate", + }, + RetryableTerraformErrors: util.GetRetryableTerraformErrors(), + NoColor: true, + SshAgent: sshAgent, + Upgrade: true, + }) + + _, err = terraform.InitAndApplyE(t, terraformOptions) + if err != nil { + t.Log("Test failed, tearing down...") + util.GetErrorLogs(t, testDir+"/kubeconfig") util.Teardown(t, testDir, terraformOptions, keyPair) - os.Remove(exampleDir + "/.terraform.lock.hcl") + os.Remove(exampleDir + ".terraform.lock.hcl") sshAgent.Stop() t.Fatalf("Error creating cluster: %s", err) } + util.CheckReady(t, testDir+"/kubeconfig") + util.CheckRunning(t, testDir+"/kubeconfig") + if t.Failed() { + t.Log("Test failed...") + } else { + t.Log("Test passed...") + } + util.Teardown(t, testDir, terraformOptions, keyPair) + os.Remove(exampleDir + "/.terraform.lock.hcl") + sshAgent.Stop() +} + +func TestDownstreamImport(t *testing.T) { + t.Parallel() + id := util.GetId() + region := util.GetRegion() + accessKey := util.GetAwsAccessKey() + secretKey := util.GetAwsSecretKey() + sessionToken := util.GetAwsSessionToken() + directory := "state_import" + owner := "terraform-ci@suse.com" + util.SetAcmeServer() + + repoRoot, err := filepath.Abs(g.GetRepoRoot(t)) + if err != nil { + t.Fatalf("Error getting git root directory: %v", err) + } - t.Log("Test passed, tearing down...") + exampleDir := repoRoot + "/examples/" + directory + testDir := repoRoot + "/test/data/" + id + + err = util.CreateTestDirectories(t, id) + if err != nil { + os.RemoveAll(testDir) + t.Fatalf("Error creating test data directories: %s", err) + } + keyPair, err := util.CreateKeypair(t, region, owner, id) + if err != nil { + os.RemoveAll(testDir) + t.Fatalf("Error creating test key pair: %s", err) + } + err = os.WriteFile(testDir+"/id_rsa", []byte(keyPair.KeyPair.PrivateKey), 0600) + if err != nil { + os.RemoveAll(testDir) + t.Fatalf("Error creating test key pair: %s", err) + } + sshAgent := ssh.SshAgentWithKeyPair(t, keyPair.KeyPair) + t.Logf("Key %s created and added to agent", keyPair.Name) + + // use oldest RKE2, remember it releases much more than Rancher + _, _, rke2Version, err := util.GetRke2Releases() + if err != nil { + os.RemoveAll(testDir) + aws.DeleteEC2KeyPair(t, keyPair) + sshAgent.Stop() + t.Fatalf("Error getting Rke2 release version: %s", err) + } + + rancherVersion := os.Getenv("RANCHER_VERSION") + if rancherVersion == "" { + // use stable version if not specified + // using stable prevents problems where the Rancher provider hasn't released to fit the latest Rancher + _, rancherVersion, _, err = util.GetRancherReleases() + } + if err != nil { + os.RemoveAll(testDir) + aws.DeleteEC2KeyPair(t, keyPair) + sshAgent.Stop() + t.Fatalf("Error getting Rancher release version: %s", err) + } + + terraformOptions := terraform.WithDefaultRetryableErrors(t, &terraform.Options{ + TerraformDir: exampleDir, + // Variables to pass to our Terraform code using -var options + Vars: map[string]interface{}{ + "identifier": id, + "owner": owner, + "key_name": keyPair.Name, + "key": keyPair.KeyPair.PublicKey, + "zone": os.Getenv("ZONE"), + "rke2_version": rke2Version, + "rancher_version": rancherVersion, + "file_path": testDir, + "aws_access_key_id": accessKey, + "aws_secret_access_key": secretKey, + "aws_session_token": sessionToken, + "aws_region": region, + "data_path": testDir, + }, + // Environment variables to set when running Terraform + EnvVars: map[string]string{ + "AWS_DEFAULT_REGION": region, + "AWS_REGION": region, + "TF_DATA_DIR": testDir, + "TF_IN_AUTOMATION": "1", + "KUBECONFIG": testDir + "/kubeconfig", + "KUBE_CONFIG_PATH": testDir, + // Adding these as environment variables so that they are not displayed in the log + "TF_VAR_aws_access_key_id": accessKey, + "TF_VAR_aws_secret_access_key": secretKey, + "TF_VAR_aws_session_token": sessionToken, + "TF_CLI_ARGS_init": "-upgrade", + "TF_CLI_ARGS_plan": "-no-color -state=" + testDir + "/tfstate", + "TF_CLI_ARGS_apply": "-no-color -state=" + testDir + "/tfstate", + "TF_CLI_ARGS_destroy": "-no-color -state=" + testDir + "/tfstate", + "TF_CLI_ARGS_output": "-no-color -state=" + testDir + "/tfstate", + }, + RetryableTerraformErrors: util.GetRetryableTerraformErrors(), + NoColor: true, + SshAgent: sshAgent, + Upgrade: true, + }) + + _, err = terraform.InitAndApplyE(t, terraformOptions) + if err != nil { + t.Log("Test failed, tearing down...") + util.GetErrorLogs(t, testDir+"/kubeconfig") + util.Teardown(t, testDir, terraformOptions, keyPair) + os.Remove(exampleDir + ".terraform.lock.hcl") + sshAgent.Stop() + t.Fatalf("Error creating cluster: %s", err) + } + util.CheckReady(t, testDir+"/kubeconfig") + util.CheckRunning(t, testDir+"/kubeconfig") + if t.Failed() { + t.Log("Test failed...") + } else { + t.Log("Test passed...") + } util.Teardown(t, testDir, terraformOptions, keyPair) os.Remove(exampleDir + "/.terraform.lock.hcl") + os.Remove(exampleDir + "/TF_DATA_DIR.env") sshAgent.Stop() } diff --git a/test/go.mod b/test/go.mod index b90e20338..82465b60c 100644 --- a/test/go.mod +++ b/test/go.mod @@ -1,6 +1,6 @@ module github.com/rancher/terraform-provider-rancher2/test -go 1.23.4 +go 1.24.2 require ( github.com/aws/aws-sdk-go v1.55.5 diff --git a/test/base/base_test.go b/test/one/one_test.go similarity index 73% rename from test/base/base_test.go rename to test/one/one_test.go index b68302047..1723a9105 100644 --- a/test/base/base_test.go +++ b/test/one/one_test.go @@ -1,4 +1,4 @@ -package base +package one import ( "os" @@ -12,15 +12,13 @@ import ( util "github.com/rancher/terraform-provider-rancher2/test" ) -// This test makes sure we can deploy Rancher on AWS -func TestBase(t *testing.T) { +func TestOneBasic(t *testing.T) { t.Parallel() id := util.GetId() region := util.GetRegion() - directory := "base" + directory := "one" owner := "terraform-ci@suse.com" util.SetAcmeServer() - build := util.GetBuild() repoRoot, err := filepath.Abs(g.GetRepoRoot(t)) if err != nil { @@ -40,11 +38,17 @@ func TestBase(t *testing.T) { os.RemoveAll(testDir) t.Fatalf("Error creating test key pair: %s", err) } + + err = os.WriteFile(testDir+"/id_rsa", []byte(keyPair.KeyPair.PrivateKey), 0600) + if err != nil { + os.RemoveAll(testDir) + t.Fatalf("Error creating test key pair: %s", err) + } sshAgent := ssh.SshAgentWithKeyPair(t, keyPair.KeyPair) t.Logf("Key %s created and added to agent", keyPair.Name) // use oldest RKE2, remember it releases much more than Rancher - _, _, rke2Version, err := util.GetReleases("rancher", "rke2") + _, _, rke2Version, err := util.GetRke2Releases() if err != nil { os.RemoveAll(testDir) aws.DeleteEC2KeyPair(t, keyPair) @@ -52,8 +56,12 @@ func TestBase(t *testing.T) { t.Fatalf("Error getting Rke2 release version: %s", err) } - // use latest Rancher, due to community patch issue - rancherVersion, _, _, err := util.GetReleases("rancher", "rancher") + rancherVersion := os.Getenv("RANCHER_VERSION") + if rancherVersion == "" { + // use stable version if not specified + // using stable prevents problems where the Rancher provider hasn't released to fit the latest Rancher + _, rancherVersion, _, err = util.GetRancherReleases() + } if err != nil { os.RemoveAll(testDir) aws.DeleteEC2KeyPair(t, keyPair) @@ -78,11 +86,8 @@ func TestBase(t *testing.T) { EnvVars: map[string]string{ "AWS_DEFAULT_REGION": region, "AWS_REGION": region, - "KUBECONFIG": testDir + "/kubeconfig", - "KUBE_CONFIG_PATH": testDir, - "WORKSPACE": repoRoot, - "TF_IN_AUTOMATION": "1", "TF_DATA_DIR": testDir, + "TF_IN_AUTOMATION": "1", "TF_CLI_ARGS_plan": "-no-color -state=" + testDir + "/tfstate", "TF_CLI_ARGS_apply": "-no-color -state=" + testDir + "/tfstate", "TF_CLI_ARGS_destroy": "-no-color -state=" + testDir + "/tfstate", @@ -94,31 +99,22 @@ func TestBase(t *testing.T) { Upgrade: true, }) - _, err = terraform.InitE(t, terraformOptions) + _, err = terraform.InitAndApplyE(t, terraformOptions) if err != nil { + t.Log("Test failed, tearing down...") + util.GetErrorLogs(t, testDir+"/kubeconfig") util.Teardown(t, testDir, terraformOptions, keyPair) os.Remove(exampleDir + ".terraform.lock.hcl") sshAgent.Stop() t.Fatalf("Error creating cluster: %s", err) } - - // after initializing the other providers override the rancher provider with the built binary - if build { - t.Log("using the prebuilt rancher provider...") - terraformOptions.EnvVars["TF_CLI_CONFIG_FILE"] = repoRoot + "/.terraform/terraformrc" + util.CheckReady(t, testDir+"/kubeconfig") + util.CheckRunning(t, testDir+"/kubeconfig") + if t.Failed() { + t.Log("Test failed...") } else { - t.Log("not using the prebuilt rancher provider...") - } - - _, err = terraform.ApplyE(t, terraformOptions) - if err != nil { - util.Teardown(t, testDir, terraformOptions, keyPair) - os.Remove(exampleDir + "/.terraform.lock.hcl") - sshAgent.Stop() - t.Fatalf("Error creating cluster: %s", err) + t.Log("Test passed...") } - - t.Log("Test passed, tearing down...") util.Teardown(t, testDir, terraformOptions, keyPair) os.Remove(exampleDir + "/.terraform.lock.hcl") sshAgent.Stop() diff --git a/test/prod/prod_test.go b/test/prod/prod_test.go new file mode 100644 index 000000000..b7285104f --- /dev/null +++ b/test/prod/prod_test.go @@ -0,0 +1,121 @@ +package prod + +import ( + "os" + "path/filepath" + "testing" + + aws "github.com/gruntwork-io/terratest/modules/aws" + g "github.com/gruntwork-io/terratest/modules/git" + "github.com/gruntwork-io/terratest/modules/ssh" + "github.com/gruntwork-io/terratest/modules/terraform" + util "github.com/rancher/terraform-provider-rancher2/test" +) + +func TestProdBasic(t *testing.T) { + t.Parallel() + id := util.GetId() + region := util.GetRegion() + directory := "prod" + owner := "terraform-ci@suse.com" + util.SetAcmeServer() + + repoRoot, err := filepath.Abs(g.GetRepoRoot(t)) + if err != nil { + t.Fatalf("Error getting git root directory: %v", err) + } + + exampleDir := repoRoot + "/examples/" + directory + testDir := repoRoot + "/test/data/" + id + + err = util.CreateTestDirectories(t, id) + if err != nil { + os.RemoveAll(testDir) + t.Fatalf("Error creating test data directories: %s", err) + } + keyPair, err := util.CreateKeypair(t, region, owner, id) + if err != nil { + os.RemoveAll(testDir) + t.Fatalf("Error creating test key pair: %s", err) + } + + err = os.WriteFile(testDir+"/id_rsa", []byte(keyPair.KeyPair.PrivateKey), 0600) + if err != nil { + os.RemoveAll(testDir) + t.Fatalf("Error creating test key pair: %s", err) + } + + sshAgent := ssh.SshAgentWithKeyPair(t, keyPair.KeyPair) + t.Logf("Key %s created and added to agent", keyPair.Name) + + // use oldest RKE2, remember it releases much more than Rancher + _, _, rke2Version, err := util.GetRke2Releases() + if err != nil { + os.RemoveAll(testDir) + aws.DeleteEC2KeyPair(t, keyPair) + sshAgent.Stop() + t.Fatalf("Error getting Rke2 release version: %s", err) + } + + rancherVersion := os.Getenv("RANCHER_VERSION") + if rancherVersion == "" { + // use stable version if not specified + // using stable prevents problems where the Rancher provider hasn't released to fit the latest Rancher + _, rancherVersion, _, err = util.GetRancherReleases() + } + if err != nil { + os.RemoveAll(testDir) + aws.DeleteEC2KeyPair(t, keyPair) + sshAgent.Stop() + t.Fatalf("Error getting Rancher release version: %s", err) + } + + terraformOptions := terraform.WithDefaultRetryableErrors(t, &terraform.Options{ + TerraformDir: exampleDir, + // Variables to pass to our Terraform code using -var options + Vars: map[string]interface{}{ + "identifier": id, + "owner": owner, + "key_name": keyPair.Name, + "key": keyPair.KeyPair.PublicKey, + "zone": os.Getenv("ZONE"), + "rke2_version": rke2Version, + "rancher_version": rancherVersion, + "file_path": testDir, + }, + // Environment variables to set when running Terraform + EnvVars: map[string]string{ + "AWS_DEFAULT_REGION": region, + "AWS_REGION": region, + "TF_DATA_DIR": testDir, + "TF_IN_AUTOMATION": "1", + "TF_CLI_ARGS_plan": "-no-color -state=" + testDir + "/tfstate", + "TF_CLI_ARGS_apply": "-no-color -state=" + testDir + "/tfstate", + "TF_CLI_ARGS_destroy": "-no-color -state=" + testDir + "/tfstate", + "TF_CLI_ARGS_output": "-no-color -state=" + testDir + "/tfstate", + }, + RetryableTerraformErrors: util.GetRetryableTerraformErrors(), + NoColor: true, + SshAgent: sshAgent, + Upgrade: true, + }) + _, err = terraform.InitAndApplyE(t, terraformOptions) + if err != nil { + t.Log("Test failed, tearing down...") + util.GetErrorLogs(t, testDir+"/kubeconfig") + util.Teardown(t, testDir, terraformOptions, keyPair) + os.Remove(exampleDir + ".terraform.lock.hcl") + sshAgent.Stop() + t.Fatalf("Error creating cluster: %s", err) + } + util.CheckReady(t, testDir+"/kubeconfig") + util.CheckRunning(t, testDir+"/kubeconfig") + if t.Failed() { + t.Log("Test failed...") + } else { + t.Log("Test passed...") + } + util.Teardown(t, testDir, terraformOptions, keyPair) + os.Remove(exampleDir + "/.terraform.lock.hcl") + sshAgent.Stop() +} diff --git a/test/scripts/getLogs.sh b/test/scripts/getLogs.sh new file mode 100755 index 000000000..beb42c71b --- /dev/null +++ b/test/scripts/getLogs.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +kubectl get nodes || true +kubectl get all -A || true + +kubectl get pods -A || true +sleep 10 +kubectl get pods -A || true +sleep 10 +kubectl get pods -A || true diff --git a/test/scripts/readyNodes.sh b/test/scripts/readyNodes.sh index 8f8607068..2bd5dba87 100755 --- a/test/scripts/readyNodes.sh +++ b/test/scripts/readyNodes.sh @@ -1,4 +1,5 @@ #!/bin/bash +set -x JSONPATH="'{range .items[*]} {.metadata.name}{\"\\t\"} \ @@ -15,7 +16,6 @@ notReady() { # master-node Ready # worker-node Ready MemoryPressure # worker-node2 EtcVoter Ready - # worker-node3 # shellcheck disable=SC2060,SC2140 NOT_READY="$(echo "$NODES" | grep -v "Ready" | tr -d ["\t","\n"," ","'"] || true)" if [ -n "$NOT_READY" ]; then @@ -31,12 +31,12 @@ TIMEOUT=5 # 5 minutes TIMEOUT_MINUTES=$((TIMEOUT * 60)) INTERVAL=10 # 10 seconds MAX=$((TIMEOUT_MINUTES / INTERVAL)) -INDEX=0 +ATTEMPTS=0 while notReady; do - if [[ $INDEX -lt $MAX ]]; then + if [[ $ATTEMPTS -lt $MAX ]]; then echo "Waiting for nodes to be ready..." - INDEX=$((INDEX + 1)) + ATTEMPTS=$((ATTEMPTS + 1)) sleep $INTERVAL; else echo "Timeout reached. Nodes are not ready..." @@ -46,6 +46,13 @@ while notReady; do fi done +echo "Nodes are ready..." + +echo "nodes..." kubectl get nodes || true +echo "all..." kubectl get all -A || true +echo "pods..." +kubectl get pods -A || true + exit 0 diff --git a/test/scripts/runningPods.sh b/test/scripts/runningPods.sh new file mode 100755 index 000000000..8c8d742cb --- /dev/null +++ b/test/scripts/runningPods.sh @@ -0,0 +1,67 @@ +#!/bin/bash +set -x + +JSONPATH="'{range .items[*]} + {.metadata.name}{\"\\t\"} \ + {.metadata.namespace}{\"\\t\"} \ + {.status.phase}{\"\\n\"} \ +{end}'" + +notReady() { + PODS=$(kubectl get pods -A -o jsonpath="$JSONPATH") + # shellcheck disable=SC2060,SC2140 + NOT_READY=$(echo "$PODS" | grep -v "Running" | grep -v "Succeeded" | tr -d ["\t","\n"," ","'"] || true) + if [ -n "$NOT_READY" ]; then + # Some pods aren't running + return 0 + else + # All pods are running + return 1 + fi +} + +readyWait() { + TIMEOUT=10 # 10 minutes + TIMEOUT_MINUTES=$((TIMEOUT * 60)) + INTERVAL=30 # 30 seconds + MAX=$((TIMEOUT_MINUTES / INTERVAL)) + ATTEMPTS=0 + + while notReady; do + if [ "$ATTEMPTS" -lt "$MAX" ]; then + ATTEMPTS=$((ATTEMPTS + 1)) + sleep "$INTERVAL"; + else + return 1 + fi + done + return 0 +} + +SUCCESSES=0 +SUCCESSES_NEEDED=3 # require three successes to make sure everything is settled + +while readyWait && [ "$SUCCESSES" -lt "$SUCCESSES_NEEDED" ]; do + SUCCESSES=$((SUCCESSES + 1)) + echo "succeeeded $SUCCESSES times..." + sleep 30 +done + +if [ "$SUCCESSES" -eq "$SUCCESSES_NEEDED" ]; then + echo "$SUCCESSES_NEEDED successes reached, passed.." + EXITCODE=0 +else + echo "$SUCCESSES_NEEDED successes not reached, failed.." + EXITCODE=1 +fi + +echo "nodes..." +kubectl get nodes || true + +echo "all..." +kubectl get all -A || true + +echo "pods..." +kubectl get pods -A || true + +exit $EXITCODE diff --git a/test/util.go b/test/util.go index de54c99c3..3c7557d0a 100644 --- a/test/util.go +++ b/test/util.go @@ -16,17 +16,18 @@ import ( aws "github.com/gruntwork-io/terratest/modules/aws" g "github.com/gruntwork-io/terratest/modules/git" "github.com/gruntwork-io/terratest/modules/random" + "github.com/gruntwork-io/terratest/modules/shell" "github.com/gruntwork-io/terratest/modules/terraform" "golang.org/x/oauth2" ) -func GetReleases(owner string, repo string) (string, string, string, error) { - releases, err := getReleases(owner, repo) +func GetRancherReleases() (string, string, string, error) { + releases, err := getReleases("rancher", "rancher") if err != nil { return "", "", "", err } - filterPrerelease(&releases) // this removes release candidates and pending releases - filterAssetsExist(&releases) + filterPrerelease(&releases) + filterPrimeOnly(&releases) versions := getVersionsFromReleases(&releases) if len(versions) == 0 { return "", "", "", errors.New("no eligible versions found") @@ -47,7 +48,33 @@ func GetReleases(owner string, repo string) (string, string, string, error) { return latest, stable, lts, nil } -func getReleases(owner string, repo string) ([]*github.RepositoryRelease, error) { +func GetRke2Releases() (string, string, string, error) { + releases, err := getReleases("rancher", "rke2") + if err != nil { + return "", "", "", err + } + filterPrerelease(&releases) + versions := getVersionsFromReleases(&releases) + if len(versions) == 0 { + return "", "", "", errors.New("no eligible versions found") + } + zeroPadVersionNumbers(&versions) + sortVersions(&versions) + filterDuplicateMinors(&versions) + removeZeroPadding(&versions) + latest := versions[0] + stable := latest + lts := stable + if len(versions) > 1 { + stable = versions[1] + } + if len(versions) > 2 { + lts = versions[2] + } + return latest, stable, lts, nil +} + +func getReleases(org string, repo string) ([]*github.RepositoryRelease, error) { githubToken := os.Getenv("GITHUB_TOKEN") if githubToken == "" { fmt.Println("GITHUB_TOKEN environment variable not set") @@ -62,31 +89,33 @@ func getReleases(owner string, repo string) ([]*github.RepositoryRelease, error) client := github.NewClient(tokenClient) var releases []*github.RepositoryRelease - releases, _, err := client.Repositories.ListReleases(context.Background(), owner, repo, &github.ListOptions{}) + releases, _, err := client.Repositories.ListReleases(context.Background(), org, repo, &github.ListOptions{}) if err != nil { return nil, err } + return releases, nil } -func filterPrerelease(r *[]*github.RepositoryRelease) { +func filterPrimeOnly(r *[]*github.RepositoryRelease) { var fr []*github.RepositoryRelease releases := *r for i := 0; i < len(releases); i++ { - if !releases[i].GetPrerelease() { + if len(releases[i].Assets) > 2 { // source zip and tar are always there + // prime only releases won't have artifacts + // so we only add releases with more than 2 artifacts fr = append(fr, releases[i]) } } *r = fr } -func filterAssetsExist(r *[]*github.RepositoryRelease) { +// this effectively removes release candidates as well as pending releases +func filterPrerelease(r *[]*github.RepositoryRelease) { var fr []*github.RepositoryRelease releases := *r for i := 0; i < len(releases); i++ { - if len(releases[i].Assets) > 2 { // source zip and tar are always there - // prime only releases won't have additional artifacts - // so we only want releases with more than 2 artifacts + if !releases[i].GetPrerelease() { fr = append(fr, releases[i]) } } @@ -277,16 +306,14 @@ func CreateKeypair(t *testing.T, region string, owner string, id string) (*aws.E func GetRetryableTerraformErrors() map[string]string { retryableTerraformErrors := map[string]string{ // The reason is unknown, but eventually these succeed after a few retries. - ".*unable to verify signature.*": "Failed due to transient network error.", - ".*unable to verify checksum.*": "Failed due to transient network error.", - ".*no provider exists with the given name.*": "Failed due to transient network error.", - ".*registry service is unreachable.*": "Failed due to transient network error.", - ".*connection reset by peer.*": "Failed due to transient network error.", - ".*TLS handshake timeout.*": "Failed due to transient network error.", - ".*Error: disassociating EC2 EIP.*does not exist.*": "Failed to delete EIP because interface is already gone", - ".*context deadline exceeded.*": "Failed due to kubernetes timeout, retrying.", - ".*http2: client connection lost.*": "Failed due to transient network error.", - ".*iam.amazonaws.com: no such host.*": "Failed due to transient AWS issue.", + ".*unable to verify signature.*": "Failed due to transient network error.", + ".*unable to verify checksum.*": "Failed due to transient network error.", + ".*no provider exists with the given name.*": "Failed due to transient network error.", + ".*registry service is unreachable.*": "Failed due to transient network error.", + ".*connection reset by peer.*": "Failed due to transient network error.", + ".*TLS handshake timeout.*": "Failed due to transient network error.", + ".*context deadline exceeded.*": "Failed due to kubernetes timeout, retrying.", + ".*http2: client connection lost.*": "Failed due to transient network error.", } return retryableTerraformErrors } @@ -348,8 +375,7 @@ func CreateTestDirectories(t *testing.T, id string) error { paths := []string{ filepath.Join(fwd, "test/data"), filepath.Join(fwd, "test/data", id), - filepath.Join(fwd, "test/data", id, "providers"), - filepath.Join(fwd, "test/data", id, "plugins"), + filepath.Join(fwd, "test/data", id, "data"), } for _, path := range paths { err = os.Mkdir(path, 0755) @@ -369,12 +395,12 @@ func Teardown(t *testing.T, directory string, options *terraform.Options, keyPai } } if directoryExists { - _, err2 := terraform.DestroyE(t, options) - if err2 != nil { - // don't fail the test if destroying the cluster fails - t.Logf("Error destroying cluster: %s", err2) + _, err := terraform.DestroyE(t, options) + if err != nil { + t.Logf("Failed to destroy: %v", err) } - err := os.RemoveAll(directory) + + err = os.RemoveAll(directory) if err != nil { t.Logf("Failed to delete test data directory: %v", err) } @@ -382,6 +408,77 @@ func Teardown(t *testing.T, directory string, options *terraform.Options, keyPai aws.DeleteEC2KeyPair(t, keyPair) } -func GetBuild() bool { - return os.Getenv("SKIP_BUILD") != "true" +func GetErrorLogs(t *testing.T, kubeconfigPath string) { + repoRoot, err := filepath.Abs(g.GetRepoRoot(t)) + if err != nil { + t.Logf("Error getting git root directory: %v", err) + } + script, err := os.ReadFile(repoRoot + "/test/scripts/getLogs.sh") + if err != nil { + t.Logf("Error reading script: %v", err) + } + errorLogsScript := shell.Command{ + Command: "bash", + Args: []string{"-c", string(script)}, + Env: map[string]string{ + "KUBECONFIG": kubeconfigPath, + }, + } + out, err := shell.RunCommandAndGetOutputE(t, errorLogsScript) + if err != nil { + t.Logf("Error running script: %s", err) + } + t.Logf("Log script output: %s", out) +} + +func CheckReady(t *testing.T, kubeconfigPath string) { + repoRoot, err := filepath.Abs(g.GetRepoRoot(t)) + if err != nil { + t.Logf("Error getting git root directory: %v", err) + t.Fail() + } + script, err := os.ReadFile(repoRoot + "/test/scripts/readyNodes.sh") + if err != nil { + t.Logf("Error reading script: %v", err) + t.Fail() + } + readyScript := shell.Command{ + Command: "bash", + Args: []string{"-c", string(script)}, + Env: map[string]string{ + "KUBECONFIG": kubeconfigPath, + }, + } + out, err := shell.RunCommandAndGetOutputE(t, readyScript) + if err != nil { + t.Logf("Error running script: %s", err) + t.Fail() + } + t.Logf("Ready script output: %s", out) +} + +func CheckRunning(t *testing.T, kubeconfigPath string) { + repoRoot, err := filepath.Abs(g.GetRepoRoot(t)) + if err != nil { + t.Logf("Error getting git root directory: %v", err) + t.Fail() + } + script, err := os.ReadFile(repoRoot + "/test/scripts/runningPods.sh") + if err != nil { + t.Logf("Error reading script: %v", err) + t.Fail() + } + readyScript := shell.Command{ + Command: "bash", + Args: []string{"-c", string(script)}, + Env: map[string]string{ + "KUBECONFIG": kubeconfigPath, + }, + } + out, err := shell.RunCommandAndGetOutputE(t, readyScript) + if err != nil { + t.Logf("Error running script: %s", err) + t.Fail() + } + t.Logf("Ready script output: %s", out) } From 98ecfcd1988d16e7af2036932b279a9372356ddc Mon Sep 17 00:00:00 2001 From: matttrach Date: Tue, 27 May 2025 17:19:52 -0500 Subject: [PATCH 2/4] fix: error importing name space with mismatch Signed-off-by: matttrach --- .github/workflows/pull_request.yaml | 10 +++++--- .github/workflows/release.yaml | 4 +-- examples/state_import/main.tf | 19 ++++++++++++-- .../state_import/modules/import/variables.tf | 12 +++++++++ examples/state_import/variables.tf | 9 +++++++ rancher2/import_rancher2_namespace.go | 25 ++++++++----------- rancher2/util.go | 4 +++ scripts/run_tests.sh | 12 +++++---- test/downstream/downstream_test.go | 14 +++++++++++ 9 files changed, 81 insertions(+), 28 deletions(-) diff --git a/.github/workflows/pull_request.yaml b/.github/workflows/pull_request.yaml index 8ab77001d..d416d476f 100644 --- a/.github/workflows/pull_request.yaml +++ b/.github/workflows/pull_request.yaml @@ -66,10 +66,12 @@ jobs: - name: shell check shell: /home/runner/.nix-profile/bin/nix develop --ignore-environment --extra-experimental-features nix-command --extra-experimental-features flakes --keep HOME --keep SSH_AUTH_SOCK --keep GITHUB_TOKEN --keep AWS_ROLE --keep AWS_REGION --keep AWS_DEFAULT_REGION --keep AWS_ACCESS_KEY_ID --keep AWS_SECRET_ACCESS_KEY --keep AWS_SESSION_TOKEN --keep UPDATECLI_GPGTOKEN --keep UPDATECLI_GITHUB_TOKEN --keep UPDATECLI_GITHUB_ACTOR --keep GPG_SIGNING_KEY --keep NIX_SSL_CERT_FILE --keep NIX_ENV_LOADED --keep TERM --command bash -e {0} run: | - while read -r file; do - echo "checking $file..." - shellcheck -x "$file" - done <<<"$(grep -Rl -e '^#!' | grep -v '.terraform'| grep -v '.git')" + # while read -r file; do + # echo "checking $file..." + # shellcheck -x "$file" + # done <<<"$(grep -Rl -e '^#!' | grep -v '.terraform'| grep -v '.git')" + # in the future run this on every script, but first we need to eliminate unused scripts + shellcheck -x "./scripts/run_tests.sh" validate-commit-message: runs-on: ubuntu-latest diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 154519d11..edc0ac977 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -44,8 +44,8 @@ jobs: echo '${{ env.GPG_PASSPHRASE }}' | gpg --detach-sig --pinentry-mode loopback --passphrase-fd 0 --output "${SHASUM_FILE}.sig" --sign "${SHASUM_FILE}" echo "Validating signature..." - gpg --verify "${SHASUM_FILE}.sig" "${SHASUM_FILE}" - if [ $? -eq 0 ]; then + + if ! gpg --verify "${SHASUM_FILE}.sig" "${SHASUM_FILE}"; then echo "Signature is valid..." else echo "Signature verification failed!" diff --git a/examples/state_import/main.tf b/examples/state_import/main.tf index f39ffc955..e30bbeefa 100644 --- a/examples/state_import/main.tf +++ b/examples/state_import/main.tf @@ -47,7 +47,8 @@ locals { email = (var.email != "" ? var.email : "${local.identifier}@${local.zone}") acme_server_url = "https://acme-v02.api.letsencrypt.org" cluster_name = "tf-all-in-one-config" - project_id = data.rancher2_cluster.downstream_cluster.default_project_id + project_mismatch = var.project_mismatch # if this is true, then the import should fail + project_id = (local.project_mismatch ? rancher2_project.test.id : data.rancher2_cluster.downstream_cluster.default_project_id) # tflint-ignore: terraform_unused_declarations fail_project_id = (strcontains(local.project_id, ":") != true ? one([local.project_id, "project_id_malformed"]) : false) } @@ -182,6 +183,18 @@ resource "rancher2_namespace" "test" { } } +resource "rancher2_project" "test" { + depends_on = [ + module.rancher, + module.rke2_image, + module.downstream_cluster, + data.rancher2_cluster.downstream_cluster, + rancher2_namespace.test, + ] + name = "test" + cluster_id = data.rancher2_cluster.downstream_cluster.id +} + resource "local_file" "import_main" { depends_on = [ module.rancher, @@ -189,11 +202,12 @@ resource "local_file" "import_main" { module.downstream_cluster, data.rancher2_cluster.downstream_cluster, rancher2_namespace.test, + rancher2_project.test, ] filename = "${local.tf_data_dir}/tf-rancher-imported/main.tf" content = templatefile("${path.module}/modules/import/main.tftpl", { cluster_id = module.downstream_cluster.cluster_id - namespace_id = "${local.project_id}.${rancher2_namespace.test.id}" + namespace_id = join(".", [local.project_id, rancher2_namespace.test.id]) }) } @@ -221,6 +235,7 @@ module "import" { zone = "${local.zone}" machine_config_kind = "${module.downstream_cluster.machine_config_kind}" machine_config_name = "${module.downstream_cluster.machine_config_name}" + project_mismatch = "${local.project_mismatch}" EOT skip_destroy = true // this is for testing purposes, it prevents an issue where the imported resources destroy the API objects and the main resources error out on destroy (not found) init = false // this is for testing purposes, it allow us to use dev overrides in the terraformrc to use the locally built binary rather than the registry provider diff --git a/examples/state_import/modules/import/variables.tf b/examples/state_import/modules/import/variables.tf index f80ed03a5..f28869238 100644 --- a/examples/state_import/modules/import/variables.tf +++ b/examples/state_import/modules/import/variables.tf @@ -1,24 +1,36 @@ +# tflint-ignore: terraform_unused_declarations variable "cluster_name" { type = string } +# tflint-ignore: terraform_unused_declarations variable "rke2_version" { type = string } +# tflint-ignore: terraform_unused_declarations variable "node_count" { type = number } +# tflint-ignore: terraform_unused_declarations variable "rancher_key" { type = string } +# tflint-ignore: terraform_unused_declarations variable "domain" { type = string } +# tflint-ignore: terraform_unused_declarations variable "zone" { type = string } +# tflint-ignore: terraform_unused_declarations variable "machine_config_kind" { type = string } +# tflint-ignore: terraform_unused_declarations variable "machine_config_name" { type = string } +# tflint-ignore: terraform_unused_declarations +variable "project_mismatch" { + type = string +} diff --git a/examples/state_import/variables.tf b/examples/state_import/variables.tf index eb4716168..afee9700b 100644 --- a/examples/state_import/variables.tf +++ b/examples/state_import/variables.tf @@ -100,3 +100,12 @@ variable "data_path" { EOT default = null } +variable "project_mismatch" { + type = string + description = <<-EOT + This is used for testing, it ensures that namespace imports can't alter the namespace. + In previous versions it was possible to move the namespace between projects as you import it, this is no longer possible. + Setting this to true should cause the import module to fail. + EOT + default = false +} diff --git a/rancher2/import_rancher2_namespace.go b/rancher2/import_rancher2_namespace.go index a59b77beb..43b3f7b8e 100644 --- a/rancher2/import_rancher2_namespace.go +++ b/rancher2/import_rancher2_namespace.go @@ -1,19 +1,25 @@ package rancher2 import ( + "fmt" "log" "github.com/hashicorp/terraform-plugin-sdk/helper/schema" - clusterClient "github.com/rancher/rancher/pkg/client/generated/cluster/v3" ) func resourceRancher2NamespaceImport(d *schema.ResourceData, meta interface{}) ([]*schema.ResourceData, error) { + // in this context the clusterID is rancher2_cluster_v2.cluster_v1_id or rancher2_cluster.id + // the input is expected to be "project_id.namespace_id" + // project_id is a concatenation of "cluster_id:project_id" or data.rancher2_cluster.downstream_cluster.default_project_id + log.Printf("[INFO] Splitting given id %s", d.Id()) projectID, resourceID := splitID(d.Id()) - + log.Printf("[INFO] Splitting project %s", projectID) clusterID, projectID := splitProjectID(projectID) + log.Printf("[INFO] Using cluster id %s", clusterID) client, err := meta.(*Config).ClusterClient(clusterID) if err != nil { + log.Printf("[INFO] Problem getting cluster client for cluster with id \"%s\"", clusterID) return []*schema.ResourceData{}, err } @@ -21,19 +27,8 @@ func resourceRancher2NamespaceImport(d *schema.ResourceData, meta interface{}) ( if err != nil { return []*schema.ResourceData{}, err } - - d.Set("project_id", clusterID) - if projectID != "" { - log.Printf("[INFO] Moving Namespace ID %s to project %s", d.Id(), projectID) - nsMove := &clusterClient.NamespaceMove{ - ProjectID: projectID, - } - - err = client.Namespace.ActionMove(ns, nsMove) - if err != nil { - return []*schema.ResourceData{}, err - } - d.Set("project_id", projectID) + if ns.ProjectID != projectID { + return []*schema.ResourceData{}, fmt.Errorf("[ERROR] Project ID \"%s\" in import object doesn't match resource to import (\"%s\").", projectID, ns.ProjectID) } err = flattenNamespace(d, ns) diff --git a/rancher2/util.go b/rancher2/util.go index 1106ce9a6..d823508f7 100644 --- a/rancher2/util.go +++ b/rancher2/util.go @@ -395,6 +395,8 @@ func splitBySep(data, sep string) []string { return strings.Split(data, sep) } +// eg. "abc123.def456"(id) returns "abc123"(clusterID), "def456"(resourceID) +// eg. "abc123"(id) returns ""(clusterID), "abc123"(resourceID) func splitID(id string) (clusterID, resourceID string) { separator := "." @@ -437,6 +439,8 @@ func splitProjectIDPart(id string) (projectID string) { return "" } +// eg. "abc123:def456"(id) would return "abc123"(clusterID),"abc123:def456"(projectID) +// eg. "abc123"(id) would return "abc123"(clusterID),""(projectID) func splitProjectID(id string) (clusterID, projectID string) { id = strings.TrimSuffix(id, clusterProjectIDSeparator) diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh index 5865c4e43..5e8a2ee45 100755 --- a/scripts/run_tests.sh +++ b/scripts/run_tests.sh @@ -4,7 +4,7 @@ get_git_root() { git rev-parse --show-toplevel } -if get_git_root; then cd "$(get_git_root)"; else echo "expects to be run from within the terraform provider git repo"; exit 1; fi +if get_git_root; then cd "$(get_git_root)" || exit; else echo "expects to be run from within the terraform provider git repo"; exit 1; fi export REPO_ROOT=$PWD IDENTIFIER="" @@ -135,8 +135,9 @@ echo 'if tmp directory is missing, try restarting dev environment' if [ -z "$cleanup_id" ]; then if [ "false" = "$skip_build" ]; then echo 'building...' - $REPO_ROOT/scripts/gobuild.sh + if ! "$REPO_ROOT/scripts/gobuild.sh"; then C=$?; echo "failed to compile provider, exit code $C"; exit $C; fi export SKIP_BUILD="false" + echo "provider successfully compiles..." else echo "skipping build..." export SKIP_BUILD="true" @@ -144,14 +145,15 @@ if [ -z "$cleanup_id" ]; then # Test if tests can compile echo "checking tests for compile errors..." - cd "$REPO_ROOT/test" + cd "$REPO_ROOT/test" || exit if ! go mod tidy; then C=$?; echo "failed to tidy, exit code $C"; exit $C; fi - for file in $(find $REPO_ROOT/test -name '*.go'); do + while read -r file; do echo "found $file"; if ! go test -c "$file" -o "$file.test"; then C=$?; echo "failed to compile $file, exit code $C"; exit $C; fi rm -rf "$file.test" - done + done <<<"$(find "$REPO_ROOT/test" -name '*.go')" + echo "compile checks passed..." # Run tests initially diff --git a/test/downstream/downstream_test.go b/test/downstream/downstream_test.go index 1d29fc4a7..491e26e54 100644 --- a/test/downstream/downstream_test.go +++ b/test/downstream/downstream_test.go @@ -351,7 +351,19 @@ func TestDownstreamImport(t *testing.T) { Upgrade: true, }) + terraformOptions.Vars["project_mismatch"] = "true" _, err = terraform.InitAndApplyE(t, terraformOptions) + if err == nil { + t.Log("Test failed, tearing down...") + util.GetErrorLogs(t, testDir+"/kubeconfig") + util.Teardown(t, testDir, terraformOptions, keyPair) + os.Remove(exampleDir + ".terraform.lock.hcl") + sshAgent.Stop() + t.Fatalf("Error creating cluster: %s", err) + } + + terraformOptions.Vars["project_mismatch"] = "false" + _, err = terraform.ApplyE(t, terraformOptions) if err != nil { t.Log("Test failed, tearing down...") util.GetErrorLogs(t, testDir+"/kubeconfig") @@ -362,11 +374,13 @@ func TestDownstreamImport(t *testing.T) { } util.CheckReady(t, testDir+"/kubeconfig") util.CheckRunning(t, testDir+"/kubeconfig") + if t.Failed() { t.Log("Test failed...") } else { t.Log("Test passed...") } + util.Teardown(t, testDir, terraformOptions, keyPair) os.Remove(exampleDir + "/.terraform.lock.hcl") os.Remove(exampleDir + "/TF_DATA_DIR.env") From 8c06906beff74edd11437437b7558f5bbd9b3f98 Mon Sep 17 00:00:00 2001 From: matttrach Date: Tue, 27 May 2025 17:26:24 -0500 Subject: [PATCH 3/4] fix: add environment variables for CI Signed-off-by: matttrach --- .github/workflows/pull_request.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pull_request.yaml b/.github/workflows/pull_request.yaml index d416d476f..d04de0064 100644 --- a/.github/workflows/pull_request.yaml +++ b/.github/workflows/pull_request.yaml @@ -3,6 +3,11 @@ name: Pull Request on: pull_request: +env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_OWNER: ${{ github.repository_owner }} + jobs: build: runs-on: ubuntu-latest @@ -147,8 +152,8 @@ jobs: while read -r message; do echo "checking message ^$message\$" - prefix_check "$message" empty_check "$message" + prefix_check "$message" length_check "$message" spell_check "$message" echo "message ^$message\$ passed all checks" From 2f6dfcbebb86057abbf6aac60850eedb00b93ea6 Mon Sep 17 00:00:00 2001 From: matttrach Date: Tue, 27 May 2025 18:27:48 -0500 Subject: [PATCH 4/4] fix: update docs Signed-off-by: matttrach --- docs/resources/namespace.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/resources/namespace.md b/docs/resources/namespace.md index f0338291e..c6a4506dc 100644 --- a/docs/resources/namespace.md +++ b/docs/resources/namespace.md @@ -134,8 +134,8 @@ Namespaces can be imported using the namespace ID in the format `.` is in the format `:`, but part is optional: +`` is in the format `:`, this is the output from `data.rancher2_cluster.downstream_cluster.default_project_id` -- If full project_id is provided, `=:`, the namespace'll be assigned to corresponding cluster project once it's imported. -- If `` part is omitted `=`, the namespace'll not be assigned to any project. To move it into a project, `=:` needs to be updated in tf file. Namespace movement is only supported inside same `cluster_id`. +The `` must match the actual resource, importing will error on project mismatch. +In past versions you could modify the namespace while importing it, this functionality is no longer available.