From 16d046e3545c9cc3abe284e6512eb7c34bc8713e Mon Sep 17 00:00:00 2001 From: matttrach Date: Wed, 25 Jun 2025 20:29:19 -0500 Subject: [PATCH] fix!: rewrite the deployment system Signed-off-by: matttrach --- .github/workflows/validate.yaml | 2 +- .gitignore | 2 + aspell_custom.txt | 4 + examples/{deploy_rke2 => downstream}/main.tf | 0 .../downstream/addKeyToAmazonConfig.sh | 0 .../modules/downstream/main.tf | 0 .../modules/downstream/variables.tf | 0 .../modules/downstream/versions.tf | 0 .../{deploy_rke2 => downstream}/outputs.tf | 0 .../{deploy_rke2 => downstream}/variables.tf | 0 .../{deploy_rke2 => downstream}/versions.tf | 4 +- .../main.tf | 0 .../downstream/addKeyToAmazonConfig.sh | 0 .../modules/downstream/main.tf | 0 .../modules/downstream/variables.tf | 0 .../modules/downstream/versions.tf | 0 .../outputs.tf | 0 .../variables.tf | 0 .../versions.tf | 4 +- examples/one/versions.tf | 4 +- examples/prod/README.md | 36 ++ examples/prod/versions.tf | 4 +- examples/three/main.tf | 119 ++++ examples/three/outputs.tf | 22 + examples/three/variables.tf | 54 ++ examples/three/versions.tf | 53 ++ flake.lock | 6 +- flake.nix | 2 + main.tf | 55 +- modules/cluster/main.tf | 560 ++++++++---------- modules/cluster/main.tf.tftpl | 135 ----- modules/cluster/node_outputs | 13 - modules/cluster/node_template/main.tf | 148 +++++ modules/cluster/node_template/outputs.tf | 36 ++ modules/cluster/node_template/variables.tf | 330 +++++++++++ modules/cluster/node_template/versions.tf | 30 + modules/cluster/outputs.tf | 32 +- modules/cluster/variables.tf | 13 +- modules/cluster/versions.tf | 2 +- modules/deploy/main.tf | 221 +++++++ modules/deploy/outputs.tf | 11 + modules/deploy/variables.tf | 94 +++ modules/deploy/versions.tf | 9 + .../install_cert_manager/configured/main.tf | 15 +- .../configured/versions.tf | 2 +- modules/install_cert_manager/main.tf | 81 +-- .../install_cert_manager/unconfigured/main.tf | 24 +- .../unconfigured/versions.tf | 4 +- modules/install_cert_manager/variables.tf | 22 +- modules/install_cert_manager/versions.tf | 4 +- modules/persist_file/archive.sh | 25 + modules/persist_file/main.tf | 53 ++ modules/persist_file/make_holders.sh | 34 ++ modules/persist_file/outputs.tf | 7 + modules/persist_file/read_file.sh | 14 + modules/persist_file/variables.tf | 26 + modules/persist_file/versions.tf | 13 + modules/rancher_bootstrap/main.tf | 137 +---- modules/rancher_bootstrap/outputs.tf | 9 +- modules/rancher_bootstrap/rancher/main.tf | 158 +++-- .../rancher_bootstrap/rancher/variables.tf | 58 +- modules/rancher_bootstrap/rancher/versions.tf | 2 +- .../rancher_externalTLS/main.tf | 113 ++-- .../rancher_externalTLS/variables.tf | 54 +- .../rancher_externalTLS/versions.tf | 2 +- modules/rancher_bootstrap/variables.tf | 49 +- modules/rancher_bootstrap/versions.tf | 4 +- run_tests.sh | 3 +- test/tests/downstream/downstream_test.go | 6 +- test/tests/one/one_test.go | 2 +- test/tests/prod/prod_test.go | 2 +- test/tests/three/three_test.go | 121 ++++ variables.tf | 50 +- versions.tf | 4 +- 74 files changed, 2120 insertions(+), 983 deletions(-) create mode 100644 aspell_custom.txt rename examples/{deploy_rke2 => downstream}/main.tf (100%) rename examples/{deploy_rke2 => downstream}/modules/downstream/addKeyToAmazonConfig.sh (100%) rename examples/{deploy_rke2 => downstream}/modules/downstream/main.tf (100%) rename examples/{deploy_rke2 => downstream}/modules/downstream/variables.tf (100%) rename examples/{deploy_rke2 => downstream}/modules/downstream/versions.tf (100%) rename examples/{deploy_rke2 => downstream}/outputs.tf (100%) rename examples/{deploy_rke2 => downstream}/variables.tf (100%) rename examples/{deploy_rke2 => downstream}/versions.tf (95%) rename examples/{deploy_rke2_multiple_pools => downstream_splitrole}/main.tf (100%) rename examples/{deploy_rke2_multiple_pools => downstream_splitrole}/modules/downstream/addKeyToAmazonConfig.sh (100%) rename examples/{deploy_rke2_multiple_pools => downstream_splitrole}/modules/downstream/main.tf (100%) rename examples/{deploy_rke2_multiple_pools => downstream_splitrole}/modules/downstream/variables.tf (100%) rename examples/{deploy_rke2_multiple_pools => downstream_splitrole}/modules/downstream/versions.tf (100%) rename examples/{deploy_rke2_multiple_pools => downstream_splitrole}/outputs.tf (100%) rename examples/{deploy_rke2_multiple_pools => downstream_splitrole}/variables.tf (100%) rename examples/{deploy_rke2_multiple_pools => downstream_splitrole}/versions.tf (95%) create mode 100644 examples/prod/README.md create mode 100644 examples/three/main.tf create mode 100644 examples/three/outputs.tf create mode 100644 examples/three/variables.tf create mode 100644 examples/three/versions.tf delete mode 100644 modules/cluster/main.tf.tftpl delete mode 100644 modules/cluster/node_outputs create mode 100644 modules/cluster/node_template/main.tf create mode 100644 modules/cluster/node_template/outputs.tf create mode 100644 modules/cluster/node_template/variables.tf create mode 100644 modules/cluster/node_template/versions.tf create mode 100644 modules/deploy/main.tf create mode 100644 modules/deploy/outputs.tf create mode 100644 modules/deploy/variables.tf create mode 100644 modules/deploy/versions.tf create mode 100755 modules/persist_file/archive.sh create mode 100644 modules/persist_file/main.tf create mode 100755 modules/persist_file/make_holders.sh create mode 100644 modules/persist_file/outputs.tf create mode 100755 modules/persist_file/read_file.sh create mode 100644 modules/persist_file/variables.tf create mode 100644 modules/persist_file/versions.tf create mode 100644 test/tests/three/three_test.go diff --git a/.github/workflows/validate.yaml b/.github/workflows/validate.yaml index 9945e29..20cc754 100644 --- a/.github/workflows/validate.yaml +++ b/.github/workflows/validate.yaml @@ -87,7 +87,7 @@ jobs: # This format enables automatic generation of changelogs and versioning filter() { COMMIT="$1" - ouput="$(echo "$COMMIT" | grep -e '^fix: ' -e '^feature: ' -e '^feat: ' -e 'refactor!: ' -e 'feature!: ' -e 'feat!: ' -e '^chore(main): ')" + output="$(echo "$COMMIT" | grep -e '^fix: ' -e '^feature: ' -e '^feat: ' -e 'refactor!: ' -e 'feature!: ' -e 'feat!: ' -e '^chore(main): ')" echo "$output" } prefix_check() { diff --git a/.gitignore b/.gitignore index 0cd490b..4456694 100644 --- a/.gitignore +++ b/.gitignore @@ -29,3 +29,5 @@ examples/basic/rancher_bootstrap .terraform.lock.hcl run.sh *.test +static_files.txt +* copy diff --git a/aspell_custom.txt b/aspell_custom.txt new file mode 100644 index 0000000..100429b --- /dev/null +++ b/aspell_custom.txt @@ -0,0 +1,4 @@ +kubernetes +config +git +variablize diff --git a/examples/deploy_rke2/main.tf b/examples/downstream/main.tf similarity index 100% rename from examples/deploy_rke2/main.tf rename to examples/downstream/main.tf diff --git a/examples/deploy_rke2/modules/downstream/addKeyToAmazonConfig.sh b/examples/downstream/modules/downstream/addKeyToAmazonConfig.sh similarity index 100% rename from examples/deploy_rke2/modules/downstream/addKeyToAmazonConfig.sh rename to examples/downstream/modules/downstream/addKeyToAmazonConfig.sh diff --git a/examples/deploy_rke2/modules/downstream/main.tf b/examples/downstream/modules/downstream/main.tf similarity index 100% rename from examples/deploy_rke2/modules/downstream/main.tf rename to examples/downstream/modules/downstream/main.tf diff --git a/examples/deploy_rke2/modules/downstream/variables.tf b/examples/downstream/modules/downstream/variables.tf similarity index 100% rename from examples/deploy_rke2/modules/downstream/variables.tf rename to examples/downstream/modules/downstream/variables.tf diff --git a/examples/deploy_rke2/modules/downstream/versions.tf b/examples/downstream/modules/downstream/versions.tf similarity index 100% rename from examples/deploy_rke2/modules/downstream/versions.tf rename to examples/downstream/modules/downstream/versions.tf diff --git a/examples/deploy_rke2/outputs.tf b/examples/downstream/outputs.tf similarity index 100% rename from examples/deploy_rke2/outputs.tf rename to examples/downstream/outputs.tf diff --git a/examples/deploy_rke2/variables.tf b/examples/downstream/variables.tf similarity index 100% rename from examples/deploy_rke2/variables.tf rename to examples/downstream/variables.tf diff --git a/examples/deploy_rke2/versions.tf b/examples/downstream/versions.tf similarity index 95% rename from examples/deploy_rke2/versions.tf rename to examples/downstream/versions.tf index d3d093a..98af46d 100644 --- a/examples/deploy_rke2/versions.tf +++ b/examples/downstream/versions.tf @@ -3,7 +3,7 @@ terraform { required_providers { local = { source = "hashicorp/local" - version = ">= 2.4" + version = ">= 2.5" } random = { source = "hashicorp/random" @@ -39,7 +39,7 @@ terraform { } helm = { source = "hashicorp/helm" - version = ">= 2.14" + version = "2.14" } rancher2 = { source = "rancher/rancher2" diff --git a/examples/deploy_rke2_multiple_pools/main.tf b/examples/downstream_splitrole/main.tf similarity index 100% rename from examples/deploy_rke2_multiple_pools/main.tf rename to examples/downstream_splitrole/main.tf diff --git a/examples/deploy_rke2_multiple_pools/modules/downstream/addKeyToAmazonConfig.sh b/examples/downstream_splitrole/modules/downstream/addKeyToAmazonConfig.sh similarity index 100% rename from examples/deploy_rke2_multiple_pools/modules/downstream/addKeyToAmazonConfig.sh rename to examples/downstream_splitrole/modules/downstream/addKeyToAmazonConfig.sh diff --git a/examples/deploy_rke2_multiple_pools/modules/downstream/main.tf b/examples/downstream_splitrole/modules/downstream/main.tf similarity index 100% rename from examples/deploy_rke2_multiple_pools/modules/downstream/main.tf rename to examples/downstream_splitrole/modules/downstream/main.tf diff --git a/examples/deploy_rke2_multiple_pools/modules/downstream/variables.tf b/examples/downstream_splitrole/modules/downstream/variables.tf similarity index 100% rename from examples/deploy_rke2_multiple_pools/modules/downstream/variables.tf rename to examples/downstream_splitrole/modules/downstream/variables.tf diff --git a/examples/deploy_rke2_multiple_pools/modules/downstream/versions.tf b/examples/downstream_splitrole/modules/downstream/versions.tf similarity index 100% rename from examples/deploy_rke2_multiple_pools/modules/downstream/versions.tf rename to examples/downstream_splitrole/modules/downstream/versions.tf diff --git a/examples/deploy_rke2_multiple_pools/outputs.tf b/examples/downstream_splitrole/outputs.tf similarity index 100% rename from examples/deploy_rke2_multiple_pools/outputs.tf rename to examples/downstream_splitrole/outputs.tf diff --git a/examples/deploy_rke2_multiple_pools/variables.tf b/examples/downstream_splitrole/variables.tf similarity index 100% rename from examples/deploy_rke2_multiple_pools/variables.tf rename to examples/downstream_splitrole/variables.tf diff --git a/examples/deploy_rke2_multiple_pools/versions.tf b/examples/downstream_splitrole/versions.tf similarity index 95% rename from examples/deploy_rke2_multiple_pools/versions.tf rename to examples/downstream_splitrole/versions.tf index d3d093a..98af46d 100644 --- a/examples/deploy_rke2_multiple_pools/versions.tf +++ b/examples/downstream_splitrole/versions.tf @@ -3,7 +3,7 @@ terraform { required_providers { local = { source = "hashicorp/local" - version = ">= 2.4" + version = ">= 2.5" } random = { source = "hashicorp/random" @@ -39,7 +39,7 @@ terraform { } helm = { source = "hashicorp/helm" - version = ">= 2.14" + version = "2.14" } rancher2 = { source = "rancher/rancher2" diff --git a/examples/one/versions.tf b/examples/one/versions.tf index d3d093a..98af46d 100644 --- a/examples/one/versions.tf +++ b/examples/one/versions.tf @@ -3,7 +3,7 @@ terraform { required_providers { local = { source = "hashicorp/local" - version = ">= 2.4" + version = ">= 2.5" } random = { source = "hashicorp/random" @@ -39,7 +39,7 @@ terraform { } helm = { source = "hashicorp/helm" - version = ">= 2.14" + version = "2.14" } rancher2 = { source = "rancher/rancher2" diff --git a/examples/prod/README.md b/examples/prod/README.md new file mode 100644 index 0000000..b68b3a9 --- /dev/null +++ b/examples/prod/README.md @@ -0,0 +1,36 @@ +# Production + +This example shows what the RKE2 team considers an ideal production infrastructure configuration with Rancher deployed. + +## Note + +While this is what we consider the ideal technical configuration we are not working with the constraints that many users have. + +- Not every team is worried about scaling their cluster +- Not every team can afford to deploy 9 nodes +- Not every team is worried about availability + +The name of this example is "production" because it is what we believe will cause the least amount of trouble for the user in the long run. +After years of troubleshooting RKE2 deployments we feel that most problems that users encounter can be avoided with this configuration. + +You know the constraints or goals of your team better than we do, so while this example is titled "production" +it isn't meant to be a judgement of the other examples, do what works best for your team, there is no +"one size fits all" infrastructure configuration. + +# Split Role + +This configuration includes three node roles: `database`, `API`, and `worker`. +Each role is considered critical to scaling your cluster: + +- the `database` role is RKE2 focused on etcd +- the `API` role is RKE2 focused on the Kubernetes API components +- the `worker` role is RKE2 focused on user workloads + +# Scaling + +As the number of total nodes increases, you should scale your `database` nodes accordingly. + - monitor disk pressure +As the number of requests to kubernetes increases you should scale your `API` nodes. + - monitor network traffic, both ingress from outside of your cluster and between nodes +As the amount of user workloads increases you should increase the `worker` nodes. + - monitor CPU and RAM usage of your workers diff --git a/examples/prod/versions.tf b/examples/prod/versions.tf index d3d093a..98af46d 100644 --- a/examples/prod/versions.tf +++ b/examples/prod/versions.tf @@ -3,7 +3,7 @@ terraform { required_providers { local = { source = "hashicorp/local" - version = ">= 2.4" + version = ">= 2.5" } random = { source = "hashicorp/random" @@ -39,7 +39,7 @@ terraform { } helm = { source = "hashicorp/helm" - version = ">= 2.14" + version = "2.14" } rancher2 = { source = "rancher/rancher2" diff --git a/examples/three/main.tf b/examples/three/main.tf new file mode 100644 index 0000000..54e0727 --- /dev/null +++ b/examples/three/main.tf @@ -0,0 +1,119 @@ +provider "aws" { + default_tags { + tags = { + Id = local.identifier + Owner = local.owner + } + } +} + +provider "acme" { + server_url = "${local.acme_server_url}/directory" +} + +provider "github" {} +provider "kubernetes" {} # make sure you set the env variable KUBE_CONFIG_PATH to local_file_path (file_path variable) +provider "helm" {} # make sure you set the env variable KUBE_CONFIG_PATH to local_file_path (file_path variable) + +provider "rancher2" { + alias = "authenticate" + bootstrap = true + api_url = "https://${local.domain}.${local.zone}" + timeout = "300s" +} + +resource "rancher2_bootstrap" "authenticate" { + depends_on = [ + module.rancher, + ] + provider = rancher2.authenticate + initial_password = module.rancher.admin_password + password = module.rancher.admin_password + token_update = true + token_ttl = 7200 # 2 hours +} + +provider "rancher2" { + alias = "default" + api_url = "https://${local.domain}.${local.zone}" + token_key = rancher2_bootstrap.authenticate.token + timeout = "300s" +} + +locals { + identifier = var.identifier + example = "basic" + project_name = "tf-${substr(md5(join("-", [local.example, local.identifier])), 0, 5)}" + username = local.project_name + domain = local.project_name + zone = var.zone + key_name = var.key_name + key = var.key + acme_server_url = "https://acme-v02.api.letsencrypt.org" + owner = var.owner + rke2_version = var.rke2_version + local_file_path = var.file_path + runner_ip = chomp(data.http.myip.response_body) # "runner" is the server running Terraform + rancher_version = var.rancher_version + cert_manager_version = "1.16.3" # "1.13.1" + os = "sle-micro-61" +} + +data "http" "myip" { + url = "https://ipinfo.io/ip" +} + +module "rancher" { + source = "../../" + # project + identifier = local.identifier + owner = local.owner + project_name = local.project_name + domain = local.domain + zone = local.zone + # access + key_name = local.key_name + key = local.key + username = local.username + admin_ip = local.runner_ip + # rke2 + rke2_version = local.rke2_version + local_file_path = local.local_file_path + install_method = "rpm" # rpm only for now, need to figure out local helm chart installs otherwise + cni = "canal" + node_configuration = { + "rancherA" = { + type = "all-in-one" + size = "xxl" + os = local.os + indirect_access = true + initial = true + } + "rancherB" = { + type = "all-in-one" + size = "xxl" + os = local.os + indirect_access = true + initial = false + } + "rancherC" = { + type = "all-in-one" + size = "xxl" + os = local.os + indirect_access = true + initial = false + } + } + # rancher + cert_manager_version = local.cert_manager_version + rancher_version = local.rancher_version +} + +data "rancher2_cluster" "local" { + depends_on = [ + module.rancher, + rancher2_bootstrap.authenticate, + ] + provider = rancher2.default + name = "local" +} diff --git a/examples/three/outputs.tf b/examples/three/outputs.tf new file mode 100644 index 0000000..8e218ec --- /dev/null +++ b/examples/three/outputs.tf @@ -0,0 +1,22 @@ +output "kubeconfig" { + value = module.rancher.kubeconfig + description = <<-EOT + The kubeconfig for the server. + EOT + sensitive = true +} +output "address" { + value = module.rancher.address +} +output "admin_token" { + value = module.rancher.admin_token + sensitive = true +} +output "admin_password" { + value = module.rancher.admin_password + sensitive = true +} +output "cluster_data" { + value = jsonencode(data.rancher2_cluster.local) + sensitive = true +} diff --git a/examples/three/variables.tf b/examples/three/variables.tf new file mode 100644 index 0000000..3d7bc84 --- /dev/null +++ b/examples/three/variables.tf @@ -0,0 +1,54 @@ +variable "key_name" { + type = string + description = <<-EOT + The name of an AWS key pair to use for SSH access to the instance. + This key should already be added to your ssh agent for server authentication. + EOT +} +variable "key" { + type = string + description = <<-EOT + The contents of an AWS key pair to use for SSH access to the instance. + This is necessary for installing rke2 on the nodes and will be removed after installation. + EOT +} +variable "identifier" { + type = string + description = <<-EOT + A unique identifier for the project, this helps when generating names for infrastructure items." + EOT +} +variable "owner" { + type = string + description = <<-EOT + The owner of the project, this helps when generating names for infrastructure items." + EOT +} +variable "zone" { + type = string + description = <<-EOT + The Route53 DNS zone to deploy the cluster into. + This is used to generate the DNS name for the cluster. + The zone must already exist. + EOT +} +variable "rke2_version" { + type = string + description = <<-EOT + The version of rke2 to install on the nodes. + EOT +} +variable "rancher_version" { + type = string + description = <<-EOT + The version of rancher to install on the rke2 cluster. + EOT + default = "2.9.1" +} +variable "file_path" { + type = string + description = <<-EOT + The path to the file containing the rke2 install script. + EOT + default = "./rke2" +} diff --git a/examples/three/versions.tf b/examples/three/versions.tf new file mode 100644 index 0000000..98af46d --- /dev/null +++ b/examples/three/versions.tf @@ -0,0 +1,53 @@ +terraform { + required_version = ">= 1.5.0" + required_providers { + local = { + source = "hashicorp/local" + version = ">= 2.5" + } + random = { + source = "hashicorp/random" + version = ">= 3.5.1" + } + github = { + source = "integrations/github" + version = ">= 5.44" + } + aws = { + source = "hashicorp/aws" + version = ">= 5.11" + } + http = { + source = "hashicorp/http" + version = ">= 3.4" + } + null = { + source = "hashicorp/null" + version = ">= 3" + } + tls = { + source = "hashicorp/tls" + version = ">= 4.0" + } + acme = { + source = "vancluever/acme" + version = ">= 2.0" + } + cloudinit = { + source = "hashicorp/cloudinit" + version = ">= 2.3.3" + } + helm = { + source = "hashicorp/helm" + version = "2.14" + } + rancher2 = { + source = "rancher/rancher2" + version = ">= 5.0.0" + } + kubernetes = { + source = "hashicorp/kubernetes" + version = ">= 2.31.0" + } + } +} diff --git a/flake.lock b/flake.lock index 123b37c..963cb0f 100644 --- a/flake.lock +++ b/flake.lock @@ -20,11 +20,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1746576598, - "narHash": "sha256-FshoQvr6Aor5SnORVvh/ZdJ1Sa2U4ZrIMwKBX5k2wu0=", + "lastModified": 1750811787, + "narHash": "sha256-rD/978c35JXz6JLAzciTIOCMenPumF6zrQOj4rVZeHE=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "b3582c75c7f21ce0b429898980eddbbf05c68e55", + "rev": "992f916556fcfaa94451ebc7fc6e396134bbf5b1", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index 4fb3ed0..d6172c0 100644 --- a/flake.nix +++ b/flake.nix @@ -77,6 +77,7 @@ updatecli vim which + xz yq-go ]; }; @@ -88,6 +89,7 @@ devShells.default = pkgs.mkShell { buildInputs = [ devShellPackage ]; shellHook = '' + while read word; do echo -e "*$word\n#" | aspell -a >/dev/null; done < aspell_custom.txt homebin=$HOME/bin; install -d $homebin; tfswitch -b $homebin/terraform 1.5.7 &>/dev/null; diff --git a/main.tf b/main.tf index 72416fe..ad79e45 100644 --- a/main.tf +++ b/main.tf @@ -6,7 +6,13 @@ locals { domain = var.domain zone = var.zone fqdn = join(".", [local.domain, local.zone]) - skip_cert = var.skip_project_cert_generation + # tflint-ignore: terraform_unused_declarations + fqdn_validate = (can(regex( + "^(?:https?://)?[[:alpha:]](?:[[:alnum:]\\p{Pd}]{1,63}\\.)+[[:alnum:]\\p{Pd}]{1,62}[[:alnum:]](?::[[:digit:]]{1,5})?$", + local.fqdn + )) ? false : one([local.fqdn, "The_fqdn_must_be_a_fully_qualified_domain_name"])) # used like this we can validate local variables + + skip_cert = var.skip_project_cert_generation # access key_name = var.key_name key = var.key @@ -14,27 +20,22 @@ locals { admin_ip = var.admin_ip # rke2 rke2_version = var.rke2_version - local_file_path = ( - var.local_file_path != "" ? (var.local_file_path == path.root ? "${abspath(path.root)}/rke2" : abspath(var.local_file_path)) : - "${abspath(path.root)}/rke2" + local_file_path = abspath( + var.local_file_path != "" ? (var.local_file_path == path.root ? "${path.root}/rke2" : var.local_file_path) : + "${path.root}/rke2" ) install_method = var.install_method cni = var.cni node_configuration = var.node_configuration # rancher - cert_manager_version = var.cert_manager_version - rancher_version = var.rancher_version - ip_family = "ipv4" - ingress_controller = "nginx" + cert_manager_version = var.cert_manager_version + rancher_version = var.rancher_version + ip_family = "ipv4" + # ingress_controller = "nginx" bootstrap_rancher = var.bootstrap_rancher install_cert_manager = var.install_cert_manager configure_cert_manager = var.configure_cert_manager cert_manager_config = var.cert_manager_configuration - - # remote state files - install_cert_manager_backend = var.install_cert_manager_backend - rancher_bootstrap_backend = var.rancher_bootstrap_backend - # the cluster submodule uses the main state } data "aws_route53_zone" "zone" { @@ -58,7 +59,7 @@ module "cluster" { cni = local.cni node_configuration = local.node_configuration ip_family = local.ip_family - ingress_controller = local.ingress_controller + # ingress_controller = local.ingress_controller skip_cert_creation = local.skip_cert } @@ -77,7 +78,6 @@ module "install_cert_manager" { cert_manager_version = local.cert_manager_version configure_cert_manager = local.configure_cert_manager cert_manager_configuration = local.cert_manager_config - backend_file = local.install_cert_manager_backend } module "rancher_bootstrap" { @@ -85,18 +85,15 @@ module "rancher_bootstrap" { module.cluster, module.install_cert_manager, ] - count = (local.bootstrap_rancher ? 1 : 0) - source = "./modules/rancher_bootstrap" - path = local.local_file_path - project_domain = local.fqdn - zone = local.zone - zone_id = data.aws_route53_zone.zone.zone_id - region = local.cert_manager_config.aws_region - email = local.cert_manager_config.acme_email - acme_server_url = local.cert_manager_config.acme_server_url - rancher_version = local.rancher_version - cert_manager_version = local.cert_manager_version - externalTLS = (local.configure_cert_manager ? false : true) - cert_manager_configuration = local.cert_manager_config - backend_file = local.rancher_bootstrap_backend + count = (local.bootstrap_rancher ? 1 : 0) + source = "./modules/rancher_bootstrap" + path = local.local_file_path + project_domain = local.fqdn + zone_id = data.aws_route53_zone.zone.zone_id + region = local.cert_manager_config.aws_region + email = local.cert_manager_config.acme_email + acme_server_url = local.cert_manager_config.acme_server_url + rancher_version = local.rancher_version + cert_manager_version = local.cert_manager_version + externalTLS = (local.configure_cert_manager ? false : true) } diff --git a/modules/cluster/main.tf b/modules/cluster/main.tf index 98d88fa..6582d64 100644 --- a/modules/cluster/main.tf +++ b/modules/cluster/main.tf @@ -1,13 +1,14 @@ locals { # project - identifier = var.identifier # this is a random unique string that can be used to identify resources in the cloud provider - owner = var.owner - project_name = var.project_name - domain = var.domain - zone = var.zone # DNS zone - ip_family = var.ip_family - skip_cert = var.skip_cert_creation + identifier = var.identifier # this is a random unique string that can be used to identify resources in the cloud provider + owner = var.owner + project_name = var.project_name + domain = var.domain + zone = var.zone # DNS zone + ip_family = var.ip_family + skip_cert = var.skip_cert_creation + acme_server_url = var.acme_server_url # access ssh_key_name = var.key_name @@ -15,18 +16,24 @@ locals { username = var.username runner_ip = var.runner_ip + # server + node_configuration = var.node_configuration + #rke2 rke2_version = var.rke2_version local_file_path = ( - var.file_path != "" ? (var.file_path == path.root ? "${abspath(path.root)}/rke2" : abspath(var.file_path)) : - "${abspath(path.root)}/rke2" + var.file_path != "" ? (var.file_path == path.root ? "${path.root}/rke2" : var.file_path) : + "${path.root}/rke2" ) - install_method = var.install_method - download = (local.install_method == "tar" ? "download" : "skip") - cni = var.cni - # tflint-ignore: terraform_unused_declarations - ingress_controller = var.ingress_controller # not currently in use, TODO: add traefik functionality + # # tflint-ignore: terraform_unused_declarations + # local_file_path_validate = (can(regex( + # "^\\.", + # local.local_file_path + # )) ? false : one([local.local_file_path, "local_file_path_must_be_relative"])) # used like this we can validate local variables + install_method = var.install_method + download = (local.install_method == "tar" ? "download" : "skip") + cni = var.cni cni_file = (local.cni == "cilium" ? "${path.root}/cilium.yaml" : (local.cni == "calico" ? "${path.root}/calico.yaml" : "")) cni_config = (local.cni_file != "" ? file(local.cni_file) : "") api_config = <<-EOT @@ -51,149 +58,31 @@ locals { ${local.cni_config} EOT - ino = module.initial[keys(local.initial_node)[0]] - subnets = local.ino.project_subnets - node_configuration = var.node_configuration - full_node_configs = { for key, node in local.node_configuration : - key => { - name = substr("${local.project_name}-${md5(key)}", 0, 25) - domain = substr("${local.project_name}-${md5(key)}", 0, 25) - indirect_access = (node.indirect_access ? "enable" : "skip") - file_path = "${local.local_file_path}/${substr("${local.project_name}-${md5(key)}", 0, 25)}/data" - path = "${local.local_file_path}/${substr("${local.project_name}-${md5(key)}", 0, 25)}" - config = ( - strcontains(node.type, "all-in-one") ? local.all_in_one_config : - strcontains(node.type, "control-plane") ? local.control_plane_config : - strcontains(node.type, "api") ? local.api_config : - strcontains(node.type, "database") ? local.database_config : - "" # worker nodes don't need additional config - ) - config_strategy = "merge" - role = node.type - type = (strcontains(node.type, "worker") ? "agent" : "server") - size = node.size - image = node.os - prep_script = ( - strcontains(node.os, "sles") ? templatefile("${path.module}/suse_prep.sh", { - install_method = local.install_method, - ip_family = local.ip_family, - image = node.os, - }) : - strcontains(node.os, "rhel") ? templatefile("${path.module}/rhel_prep.sh", { - install_method = local.install_method, - ip_family = local.ip_family, - image = node.os, - }) : - strcontains(node.os, "ubuntu") ? templatefile("${path.module}/ubuntu_prep.sh", { - install_method = local.install_method, - ip_family = local.ip_family, - image = node.os, - }) : - (strcontains(node.os, "sle-micro-60") || strcontains(node.os, "sle-micro-61")) ? templatefile("${path.module}/slem60_61_prep.sh", { - install_method = local.install_method, - ip_family = local.ip_family, - image = node.os, - }) : - "" - ) - start_prep_script = ( - # (strcontains(node.os, "sle-micro-60") || strcontains(node.os, "sle-micro-61")) ? file("${path.module}/slem60_61_start_prep.sh") : - "" - ) - initial = node.initial - workfolder = strcontains(node.os, "cis") ? "/var/tmp" : "/home/${local.username}" - cloudinit_strategy = (node.os == "sle-micro-55" || node.os == "cis-rhel-8") ? "skip" : "default" - # CIS images are not supported on IPv6 only deployments due to kernel modifications with how AWS IPv6 works (dhcpv6) - # tflint-ignore: terraform_unused_declarations - fail_cis_ipv6 = ((node.os == "rhel-8-cis" && local.ip_family == "ipv6") ? one([local.ip_family, "cis_ipv6_incompatible"]) : false) - # Ubuntu images do not support rpm install method - # tflint-ignore: terraform_unused_declarations - fail_ubuntu_rpm = ((strcontains(node.os, "ubuntu") && local.install_method == "rpm") ? one([local.install_method, "ubuntu_rpm_incompatible"]) : false) - } + ino = module.deploy_initial_node[keys(local.initial_node)[0]] + subnets = local.ino.output.project_subnets + all_nodes = { + for k, v in local.node_configuration : + k => merge( + v, + { + deploy_path = "${local.local_file_path}/tf-nodes/${substr("${local.project_name}-${md5(k)}", 0, 25)}" + }, + ) } + initial_node = { for k, v in local.all_nodes : k => v if v.initial == true } + additional_nodes = { for k, v in local.all_nodes : k => v if v.initial != true } + target_groups = { kubectl = substr(lower("${local.project_name}-kubectl"), 0, 32) application-secure = substr(lower("${local.project_name}-application-secure"), 0, 32) application-insecure = substr(lower("${local.project_name}-application-insecure"), 0, 32) } - initial_node = { for k, v in local.full_node_configs : k => v if v.initial == true } - additional_nodes = { - for k, v in local.full_node_configs : - k => merge( - v, - tomap({ az = local.subnets[keys(local.subnets)[index(keys(local.full_node_configs), k) % length(local.subnets)]].availability_zone }), - tomap({ subnet = local.subnets[keys(local.subnets)[index(keys(local.full_node_configs), k) % length(local.subnets)]].tags.Name }) - ) - if v.initial != true - } -} - -data "aws_availability_zones" "available" { - state = "available" -} -module "initial" { - depends_on = [ - data.aws_availability_zones.available, - ] - source = "rancher/rke2/aws" - version = "1.2.6" - for_each = local.initial_node - project_use_strategy = "create" - project_vpc_use_strategy = "create" - project_vpc_name = "${local.project_name}-vpc" - project_vpc_zones = data.aws_availability_zones.available.names - project_vpc_type = local.ip_family - project_vpc_public = local.ip_family == "ipv6" ? false : true # ipv6 addresses assigned by AWS are always public - project_subnet_use_strategy = "create" - project_subnet_names = [for z in data.aws_availability_zones.available.names : "${local.project_name}-subnet-${z}"] - project_security_group_use_strategy = "create" - project_security_group_name = "${local.project_name}-sg" - project_security_group_type = "egress" # in the future we should allow this to be variable, but we need to figure out airgap first - project_load_balancer_use_strategy = "create" - project_load_balancer_name = "${local.project_name}-lb" - project_load_balancer_access_cidrs = { - "kubectl" = { - port = "6443" - protocol = "tcp" - ip_family = (local.ip_family == "ipv6" ? "ipv6" : "ipv4") - cidrs = (local.ip_family == "ipv6" ? ["${local.runner_ip}/128"] : ["${local.runner_ip}/32"]) - target_name = local.target_groups.kubectl - } - "application-secure" = { - port = "443" - protocol = "tcp" - ip_family = (local.ip_family == "ipv6" ? "ipv6" : "ipv4") - cidrs = (local.ip_family == "ipv6" ? ["${local.runner_ip}/128"] : ["${local.runner_ip}/32"]) - target_name = local.target_groups.application-secure - } - "application-insecure" = { - port = "80" - protocol = "tcp" - ip_family = (local.ip_family == "ipv6" ? "ipv6" : "ipv4") - cidrs = (local.ip_family == "ipv6" ? ["${local.runner_ip}/128"] : ["${local.runner_ip}/32"]) - target_name = local.target_groups.application-insecure - } - } - project_domain_use_strategy = "create" - project_domain = local.domain - project_domain_zone = local.zone - project_domain_cert_use_strategy = (local.skip_cert ? "skip" : "create") - server_use_strategy = "create" - server_name = each.value.name - server_type = each.value.size - server_availability_zone = data.aws_availability_zones.available.names[0] - server_image_use_strategy = "find" - server_image_type = each.value.image - server_ip_family = local.ip_family - server_cloudinit_use_strategy = each.value.cloudinit_strategy - server_indirect_access_use_strategy = each.value.indirect_access - server_load_balancer_target_groups = values(local.target_groups) - server_direct_access_use_strategy = "ssh" # configure the servers for direct ssh access - #https://ranchermanager.docs.rancher.com/getting-started/installation-and-upgrade/installation-requirements/port-requirements#rancher-aws-ec2-security-group + # remember these are external access objects, internal access is enabled by default + # https://ranchermanager.docs.rancher.com/getting-started/installation-and-upgrade/installation-requirements/port-requirements#rancher-aws-ec2-security-group server_access_addresses = { # you must include ssh access here to enable setup ssh = { - port = 22 # allow runner access on ssh port + port = 22 # allow access on ssh port protocol = "tcp" ip_family = (local.ip_family == "ipv6" ? "ipv6" : "ipv4") cidrs = (local.ip_family == "ipv6" ? ["${local.runner_ip}/128"] : ["${local.runner_ip}/32"]) @@ -217,194 +106,223 @@ module "initial" { cidrs = (local.ip_family == "ipv6" ? ["${local.runner_ip}/128"] : ["${local.runner_ip}/32"]) } } - #server_load_balancer_target_groups = values(local.target_groups) first node is an etcd node, not an API node, we use the internal LB for that - server_user = { - user = local.username - aws_keypair_use_strategy = "select" - ssh_key_name = local.ssh_key_name - public_ssh_key = local.ssh_key - user_workfolder = each.value.workfolder - timeout = 10 + project_load_balancer_access_cidrs = { + "kubectl" = { + port = "6443" + protocol = "tcp" + ip_family = (local.ip_family == "ipv6" ? "ipv6" : "ipv4") + cidrs = (local.ip_family == "ipv6" ? ["${local.runner_ip}/128"] : ["${local.runner_ip}/32"]) + target_name = local.target_groups.kubectl + } + "application-secure" = { + port = "443" + protocol = "tcp" + ip_family = (local.ip_family == "ipv6" ? "ipv6" : "ipv4") + cidrs = (local.ip_family == "ipv6" ? ["${local.runner_ip}/128"] : ["${local.runner_ip}/32"]) + target_name = local.target_groups.application-secure + } + "application-insecure" = { + port = "80" + protocol = "tcp" + ip_family = (local.ip_family == "ipv6" ? "ipv6" : "ipv4") + cidrs = (local.ip_family == "ipv6" ? ["${local.runner_ip}/128"] : ["${local.runner_ip}/32"]) + target_name = local.target_groups.application-insecure + } } - server_add_domain = false - server_domain_name = each.value.domain - server_domain_zone = local.zone - server_add_eip = false - install_use_strategy = local.install_method - local_file_use_strategy = local.download - local_file_path = each.value.file_path - install_rke2_version = local.rke2_version - install_rpm_channel = "stable" - install_remote_file_path = "${each.value.workfolder}/rke2" - install_role = each.value.type - install_start = true - install_prep_script = each.value.prep_script - install_start_prep_script = each.value.start_prep_script - install_start_timeout = 10 - config_use_strategy = each.value.config_strategy - config_join_strategy = "skip" - config_default_name = "50-default-config.yaml" - config_supplied_name = "51-config.yaml" - config_supplied_content = each.value.config - retrieve_kubeconfig = true + project_subnet_names = [for z in data.aws_availability_zones.available.names : "${local.project_name}-subnet-${z}"] } -# There are many ways to orchestrate Terraform configurations with the goal of breaking it down -# In this example I am using Terraform resources to orchestrate Terraform -# I felt this was the best way to accomplish the goal without incurring additional dependencies -# The configuration we are orchestrating isn't hard coded, we will be generating the config from a templatefile -# see "local_file.cp_main" -resource "terraform_data" "path" { - depends_on = [ - module.initial, - ] - for_each = local.additional_nodes - triggers_replace = { - initial_token = local.ino.join_token - initial_url = local.ino.join_url - } - provisioner "local-exec" { - command = <<-EOT - install -d ${each.value.path} - cp ${abspath(path.module)}/variables.tf ${each.value.path} - cp ${abspath(path.module)}/versions.tf ${each.value.path} - EOT - } -} -resource "local_file" "main" { - depends_on = [ - module.initial, - terraform_data.path, - ] - for_each = local.additional_nodes - content = templatefile( - "${abspath(path.module)}/main.tf.tftpl", - { - project_security_group_name = local.ino.project_security_group.name - project_subnets = jsonencode(local.ino.project_subnets) - join_url = local.ino.join_url - join_token = local.ino.join_token - cluster_cidr = jsonencode(local.ino.cluster_cidr) - service_cidr = jsonencode(local.ino.service_cidr) - server_info = jsonencode(each.value) - role = each.value.role # worker, control-plane, database, all-in-one, etc - target_groups = jsonencode(local.target_groups) - } - ) - filename = "${each.value.path}/main.tf" +data "aws_availability_zones" "available" { + state = "available" } -resource "local_file" "inputs" { + +module "deploy_initial_node" { + source = "../deploy" depends_on = [ - module.initial, - terraform_data.path, - local_file.main, + data.aws_availability_zones.available, ] - for_each = local.additional_nodes - content = <<-EOT - identifier = "${local.identifier}" - owner = "${local.owner}" - project_name = "${local.project_name}" - domain = "${local.domain}" - zone = "${local.zone}" - key_name = "${local.ssh_key_name}" - key = "${local.ssh_key}" - username = "${local.username}" - runner_ip = "${local.runner_ip}" - rke2_version = "${local.rke2_version}" - file_path = "${each.value.file_path}" - install_method = "${local.install_method}" - cni = "${local.cni}" - ip_family = "${local.ip_family}" - ingress_controller = "${local.ingress_controller}" + for_each = local.initial_node + deploy_path = each.value.deploy_path + data_path = each.value.deploy_path + template_path = "${path.module}/node_template" + inputs = <<-EOT + identifier = "${local.identifier}" + owner = "${local.owner}" + acme_server_url = "${local.acme_server_url}" + project_use_strategy = "create" + project_name = "${local.project_name}" + project_vpc_use_strategy = "create" + project_vpc_type = "${local.ip_family}" + project_vpc_zones = "${base64encode(jsonencode(data.aws_availability_zones.available.names))}" + project_vpc_public = "${local.ip_family == "ipv6" ? "false" : "true"}" # ipv6 addresses assigned by AWS are always public + project_subnet_use_strategy = "create" + project_subnet_names = "${base64encode(jsonencode(local.project_subnet_names))}" + project_security_group_use_strategy = "create" + project_security_group_type = "egress" # in the future we should allow this to be variable, but we need to figure out airgap first + project_load_balancer_use_strategy = "create" + project_load_balancer_access_cidrs = "${base64encode(jsonencode(local.project_load_balancer_access_cidrs))}" + project_domain_use_strategy = "create" + project_domain = "${local.domain}" + project_domain_zone = "${local.zone}" + project_domain_cert_use_strategy = "${(local.skip_cert ? "skip" : "create")}" + server_name = "${substr("${local.project_name}-${md5(each.key)}", 0, 25)}" + server_type = "${each.value.size}" + server_ip_family = "${local.ip_family}" + server_availability_zone = "${data.aws_availability_zones.available.names[0]}" + server_image_type = "${each.value.os}" + server_cloudinit_use_strategy = "${(each.value.os == "sle-micro-55" || each.value.os == "cis-rhel-8") ? "skip" : "default"}" + server_indirect_access_use_strategy = "${(each.value.indirect_access ? "enable" : "skip")}" + server_load_balancer_target_groups = "${base64encode(jsonencode(values(local.target_groups)))}" + server_access_addresses = "${base64encode(jsonencode(local.server_access_addresses))}" + server_user = "${base64encode(jsonencode({ + user = local.username + aws_keypair_use_strategy = "select" + ssh_key_name = local.ssh_key_name + public_ssh_key = local.ssh_key + user_workfolder = strcontains(each.value.os, "cis") ? "/var/tmp" : "/home/${local.username}" + timeout = 10 +}))}" + server_domain_name = "${substr("${local.project_name}-${md5(each.key)}", 0, 25)}" + server_domain_zone = "${local.zone}" + install_use_strategy = "${local.install_method}" + local_file_use_strategy = "${local.download}" + local_file_path = "${each.value.deploy_path}/configs" + install_rke2_version = "${local.rke2_version}" + install_remote_file_path = "${join("/", [(strcontains(each.value.os, "cis") ? "/var/tmp" : "/home/${local.username}"), "rke2"])}" + install_prep_script = "${base64encode(( +strcontains(each.value.os, "sles") ? templatefile("${path.module}/suse_prep.sh", { + install_method = local.install_method, + ip_family = local.ip_family, + image = each.value.os, +}) : +strcontains(each.value.os, "rhel") ? templatefile("${path.module}/rhel_prep.sh", { + install_method = local.install_method, + ip_family = local.ip_family, + image = each.value.os, +}) : +strcontains(each.value.os, "ubuntu") ? templatefile("${path.module}/ubuntu_prep.sh", { + install_method = local.install_method, + ip_family = local.ip_family, + image = each.value.os, +}) : +(strcontains(each.value.os, "sle-micro-60") || strcontains(each.value.os, "sle-micro-61")) ? templatefile("${path.module}/slem60_61_prep.sh", { + install_method = local.install_method, + ip_family = local.ip_family, + image = each.value.os, +}) : +"" +))}" + install_role = "${(strcontains(each.value.type, "worker") ? "agent" : "server")}" + config_supplied_content = "${base64encode(( +strcontains(each.value.type, "all-in-one") ? local.all_in_one_config : +strcontains(each.value.type, "control-plane") ? local.control_plane_config : +strcontains(each.value.type, "api") ? local.api_config : +strcontains(each.value.type, "database") ? local.database_config : +"" # worker nodes don't need additional config +))}" + config_supplied_name = "51-config.yaml" + config_join_strategy = "skip" + retrieve_kubeconfig = "true" EOT - filename = "${each.value.path}/inputs.tfvars" } -resource "terraform_data" "create" { +# There are many ways to orchestrate Terraform configurations with the goal of breaking it down +# In this example I am using Terraform resources to orchestrate Terraform +# I felt this was the best way to accomplish the goal without incurring additional dependencies +module "deploy_additional_nodes" { + source = "../deploy" depends_on = [ - module.initial, - terraform_data.path, - local_file.main, - local_file.inputs, + data.aws_availability_zones.available, + module.deploy_initial_node, ] - for_each = local.additional_nodes - triggers_replace = { - initial = local.ino.join_url - path = each.value.path - } - provisioner "local-exec" { - command = <<-EOT - cd ${self.triggers_replace.path} - TF_DATA_DIR="${self.triggers_replace.path}" - terraform init -upgrade=true - EXITCODE=1 - ATTEMPTS=0 - MAX=3 - while [ $EXITCODE -gt 0 ] && [ $ATTEMPTS -lt $MAX ]; do - echo "Starting attempt $((ATTEMPTS + 1))..." - timeout 1h terraform apply -var-file="inputs.tfvars" -auto-approve -state="${self.triggers_replace.path}/tfstate" - EXITCODE=$? - if [ $EXITCODE -eq 124 ]; then echo "Apply timed out after 1 hour"; fi - ATTEMPTS=$((ATTEMPTS + 1)) - echo "Exit code $EXITCODE..." - if [ $EXITCODE -gt 0 ] && [ $ATTEMPTS -lt $MAX ]; then - echo "wait 30 seconds between attempts..." - sleep 30 - fi - done - exit $EXITCODE - EOT - } - provisioner "local-exec" { - # warning! this is only triggered on destroy, not refresh/taint - when = destroy - command = <<-EOT - set -x - cd ${self.triggers_replace.path} - TF_DATA_DIR="${self.triggers_replace.path}" - EXITCODE=1 - ATTEMPTS=0 - MAX=3 - while [ $EXITCODE -gt 0 ] && [ $ATTEMPTS -lt $MAX ]; do - echo "Starting attempt $((ATTEMPTS + 1))..." - timeout 1h terraform destroy -var-file="inputs.tfvars" -no-color -auto-approve -state="${self.triggers_replace.path}/tfstate" - EXITCODE=$? - if [ $EXITCODE -eq 124 ]; then echo "Apply timed out after 1 hour"; fi - ATTEMPTS=$((ATTEMPTS + 1)) - echo "Exit code $EXITCODE..." - if [ $EXITCODE -gt 0 ] && [ $ATTEMPTS -lt $MAX ]; then - echo "wait 30 seconds between attempts..." - sleep 30 - fi - done - exit $EXITCODE - EOT - } + for_each = local.additional_nodes + deploy_path = each.value.deploy_path + data_path = each.value.deploy_path + template_path = "${path.module}/node_template" + inputs = <<-EOT + identifier = "${local.identifier}" + owner = "${local.owner}" + acme_server_url = "${local.acme_server_url}" + project_use_strategy = "skip" + project_domain = "${local.domain}" + project_domain_zone = "${local.zone}" + project_security_group_name = "${local.ino.output.project_security_group.name}" + server_name = "${substr("${local.project_name}-${md5(each.key)}", 0, 25)}" + server_type = "${each.value.size}" + server_ip_family = "${local.ip_family}" + # the availability zone of the subnet with index matching the modulo of the index of the current key and the total number of subnets + # so current key index % length of subnets = the index of the subnet that we will get the availability zone of + # ex1. key index = 1, subnets length = 3; subnet[2].availability_zone, subnet[2].tags.Name + # ex2. key index = 5, subnets length = 3; subnet[1].availability_zone, subnet[1].tags.Name + # this creates round robin distribution of nodes across availability zones + server_availability_zone = "${local.subnets[keys(local.subnets)[index(keys(local.additional_nodes), each.key) % length(local.subnets)]].availability_zone}" + server_subnet_name = "${local.subnets[keys(local.subnets)[index(keys(local.additional_nodes), each.key) % length(local.subnets)]].tags.Name}" + server_security_group_name = "${local.ino.output.project_security_group.name}" + server_image_type = "${each.value.os}" + server_cloudinit_use_strategy = "${(each.value.os == "sle-micro-55" || each.value.os == "cis-rhel-8") ? "skip" : "default"}" + server_indirect_access_use_strategy = "${(each.value.indirect_access ? "enable" : "skip")}" + server_load_balancer_target_groups = "${base64encode(jsonencode(values(local.target_groups)))}" + server_access_addresses = "${base64encode(jsonencode(local.server_access_addresses))}" + server_user = "${base64encode(jsonencode({ + user = local.username + aws_keypair_use_strategy = "select" + ssh_key_name = local.ssh_key_name + public_ssh_key = local.ssh_key + user_workfolder = strcontains(each.value.os, "cis") ? "/var/tmp" : "/home/${local.username}" + timeout = 10 +}))}" + server_domain_name = "${substr("${local.project_name}-${md5(each.key)}", 0, 25)}" + server_domain_zone = "${local.zone}" + install_use_strategy = "${local.install_method}" + local_file_use_strategy = "${local.download}" + local_file_path = "${each.value.deploy_path}/configs" + install_rke2_version = "${local.rke2_version}" + install_remote_file_path = "${join("/", [(strcontains(each.value.os, "cis") ? "/var/tmp" : "/home/${local.username}"), "rke2"])}" + install_prep_script = "${base64encode(( +strcontains(each.value.os, "sles") ? templatefile("${path.module}/suse_prep.sh", { + install_method = local.install_method, + ip_family = local.ip_family, + image = each.value.os, +}) : +strcontains(each.value.os, "rhel") ? templatefile("${path.module}/rhel_prep.sh", { + install_method = local.install_method, + ip_family = local.ip_family, + image = each.value.os, +}) : +strcontains(each.value.os, "ubuntu") ? templatefile("${path.module}/ubuntu_prep.sh", { + install_method = local.install_method, + ip_family = local.ip_family, + image = each.value.os, +}) : +(strcontains(each.value.os, "sle-micro-60") || strcontains(each.value.os, "sle-micro-61")) ? templatefile("${path.module}/slem60_61_prep.sh", { + install_method = local.install_method, + ip_family = local.ip_family, + image = each.value.os, +}) : +"" +))}" + install_role = "${(strcontains(each.value.type, "worker") ? "agent" : "server")}" + config_supplied_content = "${base64encode(( +strcontains(each.value.type, "all-in-one") ? local.all_in_one_config : +strcontains(each.value.type, "control-plane") ? local.control_plane_config : +strcontains(each.value.type, "api") ? local.api_config : +strcontains(each.value.type, "database") ? local.database_config : +"" # worker nodes don't need additional config +))}" + config_supplied_name = "51-config.yaml" + config_join_strategy = "join" + config_join_url = "${local.ino.output.join_url}" + config_join_token = "${local.ino.output.join_token}" + config_cluster_cidr = "${base64encode(jsonencode(local.ino.output.cluster_cidr))}" + config_service_cidr = "${base64encode(jsonencode(local.ino.output.service_cidr))}" + EOT } resource "local_file" "kubeconfig" { depends_on = [ - module.initial, - terraform_data.path, - local_file.main, - local_file.inputs, - terraform_data.create, + module.deploy_initial_node, + module.deploy_additional_nodes, ] - content = local.ino.kubeconfig + content = local.ino.output.kubeconfig filename = "${local.local_file_path}/kubeconfig" } -# commented for performance, leaving to show how you can export the state if necessary -# data "terraform_remote_state" "additional_node_states" { -# depends_on = [ -# module.initial, -# terraform_data.path, -# local_file.main, -# local_file.inputs, -# terraform_data.create, -# ] -# for_each = local.additional_nodes -# backend = "local" -# config = { -# path = "${each.value.path}/tfstate" -# } -# } diff --git a/modules/cluster/main.tf.tftpl b/modules/cluster/main.tf.tftpl deleted file mode 100644 index 1435bd5..0000000 --- a/modules/cluster/main.tf.tftpl +++ /dev/null @@ -1,135 +0,0 @@ -# control plane terraform templatefile - -provider "aws" { - default_tags { - tags = { - Id = local.identifier - Owner = local.owner - } - } -} - -locals { - # tflint-ignore: terraform_unused_declarations - ingress_controller = var.ingress_controller # not currently in use, TODO: add traefik functionality - identifier = var.identifier # this is a random unique string that can be used to identify resources in the cloud provider - owner = var.owner - project_name = var.project_name - username = var.username - ip_family = var.ip_family - runner_ip = var.runner_ip - ssh_key = var.key - ssh_key_name = var.key_name - domain = var.domain - zone = var.zone # DNS zone - - rke2_version = var.rke2_version - install_method = var.install_method - download = (local.install_method == "tar" ? "download" : "skip") - - server_info = jsondecode(<<-EOT - ${server_info} - EOT - ) - install_prep_script = local.server_info.prep_script - local_file_path = local.server_info.file_path - workfolder = local.server_info.workfolder - cloudinit_strategy = local.server_info.cloudinit_strategy - config = local.server_info.config - - target_groups = jsondecode(<<-EOT - ${target_groups} - EOT - ) - cluster_cidr = jsondecode(<<-EOT - ${cluster_cidr} - EOT - ) - service_cidr = jsondecode(<<-EOT - ${service_cidr} - EOT - ) -} - -data "http" "myip" { - url = "https://ipinfo.io/ip" -} - -module "${role}_node" { - source = "rancher/rke2/aws" - version = "1.2.6" - project_use_strategy = "skip" - project_domain = local.domain - project_domain_zone = local.zone - server_use_strategy = "create" - server_name = local.server_info.name - server_type = local.server_info.size - server_security_group_name = "${project_security_group_name}" # should always match project security group - server_availability_zone = local.server_info.az - server_image_use_strategy = "find" - server_image_type = local.server_info.image - server_ip_family = local.ip_family - server_cloudinit_use_strategy = local.server_info.cloudinit_strategy - server_indirect_access_use_strategy = local.server_info.indirect_access - server_load_balancer_target_groups = values(local.target_groups) - server_subnet_name = local.server_info.subnet - server_direct_access_use_strategy = "ssh" # configure the servers for direct ssh access - # remember these are external access objects, internal access is enabled by default - server_access_addresses = { # you must include ssh access here to enable setup - ssh = { - port = 22 # allow access on ssh port - protocol = "tcp" - ip_family = (local.ip_family == "ipv6" ? "ipv6" : "ipv4") - cidrs = (local.ip_family == "ipv6" ? ["$${local.runner_ip}/128"] : ["$${local.runner_ip}/32"]) - } - api = { - port = 6443 # allow runner IP access to API - protocol = "tcp" - ip_family = (local.ip_family == "ipv6" ? "ipv6" : "ipv4") - cidrs = (local.ip_family == "ipv6" ? ["$${local.runner_ip}/128"] : ["$${local.runner_ip}/32"]) - } - application-secure = { - port = 443 # allow runner IP access to https - protocol = "tcp" - ip_family = (local.ip_family == "ipv6" ? "ipv6" : "ipv4") - cidrs = (local.ip_family == "ipv6" ? ["$${local.runner_ip}/128"] : ["$${local.runner_ip}/32"]) - } - application-insecure = { - port = 80 # allow runner IP access to http - protocol = "tcp" - ip_family = (local.ip_family == "ipv6" ? "ipv6" : "ipv4") - cidrs = (local.ip_family == "ipv6" ? ["$${local.runner_ip}/128"] : ["$${local.runner_ip}/32"]) - } - } - server_user = { - user = local.username - aws_keypair_use_strategy = "select" - ssh_key_name = local.ssh_key_name - public_ssh_key = local.ssh_key - user_workfolder = local.server_info.workfolder - timeout = 10 - } - server_add_domain = false - server_domain_name = local.server_info.domain - server_domain_zone = local.zone - server_add_eip = false - install_use_strategy = local.install_method - local_file_use_strategy = local.download - local_file_path = local.server_info.file_path - install_rke2_version = local.rke2_version - install_rpm_channel = "stable" - install_remote_file_path = "$${local.server_info.workfolder}/rke2" - install_role = local.server_info.type - install_start = true - install_prep_script = local.server_info.prep_script - install_start_timeout = 10 - config_use_strategy = local.server_info.config_strategy - config_join_strategy = "join" - config_join_url = "${join_url}" - config_join_token = "${join_token}" - config_cluster_cidr = local.cluster_cidr - config_service_cidr = local.service_cidr - config_supplied_content = local.server_info.config - config_supplied_name = "51-config.yaml" - retrieve_kubeconfig = false # use initial -} diff --git a/modules/cluster/node_outputs b/modules/cluster/node_outputs deleted file mode 100644 index f787953..0000000 --- a/modules/cluster/node_outputs +++ /dev/null @@ -1,13 +0,0 @@ -output "kubeconfig" { - value = module.initial[0].kubeconfig - description = "Kubernetes config file contents for the cluster." - sensitive = true -} -output "api" { - value = yamldecode(module.initial[0].kubeconfig).clusters[0].cluster.server - description = "Address to use to connect to the cluster's API service." - sensitive = true -} -output "cert" { - value = module.initial[0].project_domain_tls_certificate -} diff --git a/modules/cluster/node_template/main.tf b/modules/cluster/node_template/main.tf new file mode 100644 index 0000000..d8b13dd --- /dev/null +++ b/modules/cluster/node_template/main.tf @@ -0,0 +1,148 @@ +provider "aws" { + default_tags { + tags = { + Id = local.identifier + Owner = local.owner + } + } +} + +provider "acme" { + server_url = local.acme_server_url +} + +locals { + identifier = var.identifier + owner = var.owner + acme_server_url = var.acme_server_url + project_use_strategy = var.project_use_strategy + project_name = var.project_name + project_admin_cidrs = (var.project_admin_cidrs != "[]" ? jsondecode(base64decode(var.project_admin_cidrs)) : []) + project_vpc_use_strategy = var.project_vpc_use_strategy + project_vpc_name = var.project_vpc_name + project_vpc_type = var.project_vpc_type + project_vpc_zones = (var.project_vpc_zones != "[]" ? jsondecode(base64decode(var.project_vpc_zones)) : []) + project_vpc_public = (var.project_vpc_public == "true" ? true : false) + project_subnet_use_strategy = var.project_subnet_use_strategy + project_subnet_names = (var.project_subnet_names != "[]" ? jsondecode(base64decode(var.project_subnet_names)) : []) # list + project_security_group_use_strategy = var.project_security_group_use_strategy + project_security_group_name = var.project_security_group_name + project_security_group_type = var.project_security_group_type + project_load_balancer_use_strategy = var.project_load_balancer_use_strategy + project_load_balancer_name = var.project_load_balancer_name + project_load_balancer_access_cidrs = (var.project_load_balancer_access_cidrs != null ? jsondecode(base64decode(var.project_load_balancer_access_cidrs)) : null) # object + project_domain_use_strategy = var.project_domain_use_strategy + project_domain = var.project_domain + project_domain_zone = var.project_domain_zone + project_domain_cert_use_strategy = var.project_domain_cert_use_strategy + server_use_strategy = var.server_use_strategy + server_name = var.server_name + server_type = var.server_type + server_ip_family = var.server_ip_family + server_private_ip = var.server_private_ip + server_availability_zone = var.server_availability_zone + server_subnet_name = var.server_subnet_name + server_security_group_name = var.server_security_group_name + server_image_use_strategy = var.server_image_use_strategy + server_image_type = var.server_image_type + server_cloudinit_use_strategy = var.server_cloudinit_use_strategy + server_cloudinit_content = var.server_cloudinit_content + server_indirect_access_use_strategy = var.server_indirect_access_use_strategy + server_load_balancer_target_groups = (var.server_load_balancer_target_groups != "[]" ? jsondecode(base64decode(var.server_load_balancer_target_groups)) : []) # list + server_direct_access_use_strategy = var.server_direct_access_use_strategy + server_access_addresses = (var.server_access_addresses != null ? jsondecode(base64decode(var.server_access_addresses)) : null) # object + server_user = (var.server_user != null ? jsondecode(base64decode(var.server_user)) : null) # object + server_add_domain = var.server_add_domain + server_domain_name = var.server_domain_name + server_domain_zone = var.server_domain_zone + server_add_eip = var.server_add_eip + install_use_strategy = var.install_use_strategy + local_file_use_strategy = var.local_file_use_strategy + local_file_path = var.local_file_path + install_rke2_version = var.install_rke2_version + install_rpm_channel = var.install_rpm_channel + install_remote_file_path = var.install_remote_file_path + install_prep_script = (var.install_prep_script != "" ? base64decode(var.install_prep_script) : "") + install_start_prep_script = (var.install_start_prep_script != "" ? base64decode(var.install_start_prep_script) : "") + install_role = var.install_role + install_start = var.install_start + install_start_timeout = var.install_start_timeout + config_use_strategy = var.config_use_strategy + config_default_name = var.config_default_name + config_supplied_content = (var.config_supplied_content != "" ? base64decode(var.config_supplied_content) : "") + config_supplied_name = var.config_supplied_name + config_join_strategy = var.config_join_strategy + config_join_url = var.config_join_url + config_join_token = var.config_join_token + config_cluster_cidr = (var.config_cluster_cidr != "[]" ? jsondecode(base64decode(var.config_cluster_cidr)) : []) # list + config_service_cidr = (var.config_service_cidr != "[]" ? jsondecode(base64decode(var.config_service_cidr)) : []) # list + retrieve_kubeconfig = (var.retrieve_kubeconfig == "true" ? true : false) # bool +} + +module "node" { + source = "rancher/rke2/aws" + version = "1.2.6" + project_use_strategy = local.project_use_strategy + project_name = local.project_name + project_admin_cidrs = local.project_admin_cidrs + project_vpc_use_strategy = local.project_vpc_use_strategy + project_vpc_name = local.project_vpc_name + project_vpc_type = local.project_vpc_type + project_vpc_zones = local.project_vpc_zones + project_vpc_public = local.project_vpc_public + project_subnet_use_strategy = local.project_subnet_use_strategy + project_subnet_names = local.project_subnet_names + project_security_group_use_strategy = local.project_security_group_use_strategy + project_security_group_name = local.project_security_group_name + project_security_group_type = local.project_security_group_type + project_load_balancer_use_strategy = local.project_load_balancer_use_strategy + project_load_balancer_name = local.project_load_balancer_name + project_load_balancer_access_cidrs = local.project_load_balancer_access_cidrs + project_domain_use_strategy = local.project_domain_use_strategy + project_domain = local.project_domain + project_domain_zone = local.project_domain_zone + project_domain_cert_use_strategy = local.project_domain_cert_use_strategy + server_use_strategy = local.server_use_strategy + server_name = local.server_name + server_type = local.server_type + server_ip_family = local.server_ip_family + server_private_ip = local.server_private_ip + server_availability_zone = local.server_availability_zone + server_subnet_name = local.server_subnet_name + server_security_group_name = local.server_security_group_name # should always match project security group + server_image_use_strategy = local.server_image_use_strategy + server_image_type = local.server_image_type + server_cloudinit_use_strategy = local.server_cloudinit_use_strategy + server_cloudinit_content = local.server_cloudinit_content + server_indirect_access_use_strategy = local.server_indirect_access_use_strategy + server_load_balancer_target_groups = local.server_load_balancer_target_groups + server_direct_access_use_strategy = local.server_direct_access_use_strategy + server_access_addresses = local.server_access_addresses + server_user = local.server_user + server_add_domain = local.server_add_domain + server_domain_name = local.server_domain_name + server_domain_zone = local.server_domain_zone + server_add_eip = local.server_add_eip + install_use_strategy = local.install_use_strategy + local_file_use_strategy = local.local_file_use_strategy + local_file_path = local.local_file_path + install_rke2_version = local.install_rke2_version + install_rpm_channel = local.install_rpm_channel + install_remote_file_path = local.install_remote_file_path + install_prep_script = local.install_prep_script + install_start_prep_script = local.install_start_prep_script + install_role = local.install_role + install_start = local.install_start + install_start_timeout = local.install_start_timeout + config_use_strategy = local.config_use_strategy + config_default_name = local.config_default_name + config_supplied_content = local.config_supplied_content + config_supplied_name = local.config_supplied_name + config_join_strategy = local.config_join_strategy + config_join_url = local.config_join_url + config_join_token = local.config_join_token + config_cluster_cidr = local.config_cluster_cidr + config_service_cidr = local.config_service_cidr + retrieve_kubeconfig = local.retrieve_kubeconfig +} + diff --git a/modules/cluster/node_template/outputs.tf b/modules/cluster/node_template/outputs.tf new file mode 100644 index 0000000..83dbe50 --- /dev/null +++ b/modules/cluster/node_template/outputs.tf @@ -0,0 +1,36 @@ +output "kubeconfig" { + value = module.node.kubeconfig + sensitive = true +} +output "join_url" { + value = module.node.join_url +} +output "join_token" { + value = module.node.join_token + sensitive = true +} +output "cluster_cidr" { + value = module.node.cluster_cidr +} +output "service_cidr" { + value = module.node.service_cidr +} +output "project_subnets" { + value = module.node.project_subnets +} +output "project_security_group" { + value = module.node.project_security_group +} +output "project_domain_tls_certificate" { + value = module.node.project_domain_tls_certificate +} +output "project_vpc" { + value = module.node.project_vpc +} +output "project_domain_object" { + value = module.node.project_domain_object +} +output "project_load_balancer" { + value = module.node.project_load_balancer +} + diff --git a/modules/cluster/node_template/variables.tf b/modules/cluster/node_template/variables.tf new file mode 100644 index 0000000..c4bf830 --- /dev/null +++ b/modules/cluster/node_template/variables.tf @@ -0,0 +1,330 @@ +variable "identifier" { + type = string +} +variable "owner" { + type = string +} +variable "acme_server_url" { + type = string +} +variable "project_use_strategy" { + type = string + default = "skip" +} +variable "project_name" { + type = string + default = "" +} +variable "project_admin_cidrs" { + type = string + description = <<-EOT + Base64 encoded Json encoded list. + EOT + default = "[]" +} +variable "project_vpc_use_strategy" { + type = string + default = "skip" +} +variable "project_vpc_name" { + type = string + default = "" +} +variable "project_vpc_type" { + type = string + default = "ipv4" +} +variable "project_vpc_zones" { + type = string + description = <<-EOT + Base64 encoded Json encoded list. + EOT + default = "[]" +} +variable "project_vpc_public" { + type = string + description = <<-EOT + If specified must be "true" or "false" only. + EOT + default = "false" +} +variable "project_subnet_use_strategy" { + type = string + default = "skip" +} +variable "project_subnet_names" { + type = string + description = <<-EOT + Base64 encoded Json encoded list. + EOT + default = "[]" +} +variable "project_security_group_use_strategy" { + type = string + default = "skip" +} +variable "project_security_group_name" { + type = string + default = "" +} +variable "project_security_group_type" { + type = string + default = "project" +} +variable "project_load_balancer_use_strategy" { + type = string + default = "skip" +} +variable "project_load_balancer_name" { + type = string + default = "" +} +variable "project_load_balancer_access_cidrs" { + type = string + description = <<-EOT + Base64 encoded Json encoded object. + example: + { + test = { + port = 443 + ip_family = "ipv4" + cidrs = ["1.1.1.1/32"] + protocol = "tcp" + target_name = "test" + } + } + EOT + default = null +} +variable "project_domain_use_strategy" { + type = string + default = "skip" +} +variable "project_domain" { + type = string + default = "" +} +variable "project_domain_zone" { + type = string + default = "" +} +variable "project_domain_cert_use_strategy" { + type = string + default = "skip" +} +variable "server_use_strategy" { + type = string + default = "create" +} +variable "server_name" { + type = string +} +variable "server_type" { + type = string +} +variable "server_ip_family" { + type = string +} +variable "server_private_ip" { + type = string + default = "" +} +variable "server_availability_zone" { + type = string + default = "" +} +variable "server_subnet_name" { + type = string + default = "" +} +variable "server_security_group_name" { + type = string + default = "" +} +variable "server_image_use_strategy" { + type = string + default = "find" +} +variable "server_image_type" { + type = string + default = "sle-micro-61" +} +variable "server_cloudinit_use_strategy" { + type = string + default = "skip" +} +variable "server_cloudinit_content" { + type = string + default = "" +} +variable "server_indirect_access_use_strategy" { + type = string + default = "enable" +} +variable "server_load_balancer_target_groups" { + type = string + description = <<-EOT + Base64 encoded Json encoded list. + EOT + default = "[]" +} +variable "server_direct_access_use_strategy" { + type = string + default = "ssh" +} +variable "server_access_addresses" { + type = string + description = <<-EOT + Base64 encoded Json encoded object. + Example: + { + workstation = { + port = 443, + ip_family = "ipv4", + cidrs = ["100.1.1.1/32"], + protocol = "tcp" + } + ci = { + port = 443 + ip_family = "ipv4", + cidrs = ["50.1.1.1/32"], + protocol = "tcp" + } + } + EOT + default = null +} +variable "server_user" { + type = string + description = <<-EOT + Base64 encoded Json encoded object. + Example: + { + user = "myuser" + aws_keypair_use_strategy = "select" + ssh_key_name = "abc123" + public_ssh_key = "abc123 aabbccdd11223344" + user_workfolder = "/var/tmp" + timeout = 10 + } + EOT + default = null +} +variable "server_add_domain" { + type = string + default = "false" +} +variable "server_domain_name" { + type = string + default = "" +} +variable "server_domain_zone" { + type = string + default = "" +} +variable "server_add_eip" { + type = string + default = "false" +} +variable "install_use_strategy" { + type = string + default = "rpm" +} +variable "local_file_use_strategy" { + type = string + default = "download" +} +variable "local_file_path" { + type = string + default = "" +} +variable "install_rke2_version" { + type = string +} +variable "install_rpm_channel" { + type = string + default = "stable" +} +variable "install_remote_file_path" { + type = string + default = "" +} +variable "install_prep_script" { + type = string + description = <<-EOT + Base64 encoded string. + EOT + default = "" +} +variable "install_start_prep_script" { + type = string + description = <<-EOT + Base64 encoded string. + EOT + default = "" +} +variable "install_role" { + type = string + default = "server" +} +variable "install_start" { + type = string + default = "true" +} +variable "install_start_timeout" { + type = string + default = "10" +} +variable "config_use_strategy" { + type = string + default = "merge" +} +variable "config_default_name" { + type = string + default = "50-default-config.yaml" +} +variable "config_supplied_content" { + type = string + description = <<-EOT + Base64 encoded string. + EOT + default = "" +} +variable "config_supplied_name" { + type = string + default = "51-rke2-config.yaml" +} +variable "config_join_strategy" { + type = string + default = "skip" +} +variable "config_join_url" { + type = string + default = "" +} +variable "config_join_token" { + type = string + default = "" +} +variable "config_cluster_cidr" { + type = string + description = <<-EOT + Base64 encoded Json encoded list. + EOT + default = "[]" +} +variable "config_service_cidr" { + type = string + description = <<-EOT + Base64 encoded Json encoded list. + EOT + default = "[]" +} +variable "retrieve_kubeconfig" { + type = string + description = <<-EOT + If specified, must be "true" or "false". + EOT + default = "false" +} + diff --git a/modules/cluster/node_template/versions.tf b/modules/cluster/node_template/versions.tf new file mode 100644 index 0000000..b5ed9df --- /dev/null +++ b/modules/cluster/node_template/versions.tf @@ -0,0 +1,30 @@ +terraform { + required_version = ">= 1.5.0" + required_providers { + local = { + source = "hashicorp/local" + version = ">= 2.5" + } + random = { + source = "hashicorp/random" + version = ">= 3.5.1" + } + aws = { + source = "hashicorp/aws" + version = ">= 5.11" + } + http = { + source = "hashicorp/http" + version = ">= 3.4" + } + acme = { + source = "vancluever/acme" + version = ">= 2.0" + } + github = { + source = "integrations/github" + version = ">= 6.3" + } + } +} + diff --git a/modules/cluster/outputs.tf b/modules/cluster/outputs.tf index 039ae14..ec0e307 100644 --- a/modules/cluster/outputs.tf +++ b/modules/cluster/outputs.tf @@ -1,42 +1,42 @@ output "kubeconfig" { - value = local.ino.kubeconfig + value = local.ino.output.kubeconfig description = "Kubernetes config file contents for the cluster." sensitive = true } output "api" { - value = yamldecode(local.ino.kubeconfig).clusters[0].cluster.server + value = yamldecode(local.ino.output.kubeconfig).clusters[0].cluster.server description = "Address to use to connect to the cluster's API service." sensitive = true } output "cert" { - value = local.ino.project_domain_tls_certificate + value = local.ino.output.project_domain_tls_certificate description = "Information about the certificate that was generated with the domain" sensitive = true } -# commented for performance, leaving to show how you can export the state if necessary -# output "additional_node_states" { -# value = data.terraform_remote_state.additional_node_states -# description = "The states for the orchestrated modules which produce nodes." -# sensitive = true -# } output "vpc" { - value = module.initial[keys(local.initial_node)[0]].project_vpc + value = local.ino.output.project_vpc } output "subnets" { - value = module.initial[keys(local.initial_node)[0]].project_subnets + value = local.ino.output.project_subnets } output "join_url" { - value = local.ino.join_url + value = local.ino.output.join_url } output "initial_node_private_ip" { - value = replace(replace(local.ino.join_url, ":9345", ""), "https://", "") + value = replace(replace(local.ino.output.join_url, ":9345", ""), "https://", "") } output "project_domain_object" { - value = module.initial[keys(local.initial_node)[0]].project_domain_object + value = local.ino.output.project_domain_object } output "project_security_group" { - value = module.initial[keys(local.initial_node)[0]].project_security_group + value = local.ino.output.project_security_group } output "load_balancer_security_groups" { - value = module.initial[keys(local.initial_node)[0]].project_load_balancer.security_groups + value = local.ino.output.project_load_balancer.security_groups } +# commented for performance, leaving to show how you can export the state if necessary +# output "additional_node_states" { +# value = data.terraform_remote_state.additional_node_states +# description = "The states for the orchestrated modules which produce nodes." +# sensitive = true +# } diff --git a/modules/cluster/variables.tf b/modules/cluster/variables.tf index c200677..c74dccb 100644 --- a/modules/cluster/variables.tf +++ b/modules/cluster/variables.tf @@ -112,12 +112,17 @@ variable "ip_family" { type = string description = "The IP family to use. Must be 'ipv4', 'ipv6', or 'dualstack'." } -variable "ingress_controller" { - type = string - description = "The ingress controller to use. Must be 'nginx' or 'traefik'. Currently only supports 'nginx'." -} +# variable "ingress_controller" { +# type = string +# description = "The ingress controller to use. Must be 'nginx' or 'traefik'. Currently only supports 'nginx'." +# } variable "skip_cert_creation" { type = bool description = "Skip the generation of a certificate, useful when configuring cert manager." default = false } +variable "acme_server_url" { + type = string + description = "Server URL to make ACME requests to." + default = "https://acme-v02.api.letsencrypt.org/directory" +} diff --git a/modules/cluster/versions.tf b/modules/cluster/versions.tf index 58c9cd8..e065e62 100644 --- a/modules/cluster/versions.tf +++ b/modules/cluster/versions.tf @@ -3,7 +3,7 @@ terraform { required_providers { local = { source = "hashicorp/local" - version = ">= 2.4" + version = ">= 2.5" } random = { source = "hashicorp/random" diff --git a/modules/deploy/main.tf b/modules/deploy/main.tf new file mode 100644 index 0000000..fac12cc --- /dev/null +++ b/modules/deploy/main.tf @@ -0,0 +1,221 @@ +# There are many ways to orchestrate Terraform configurations with the goal of breaking it down +# I am using Terraform resources to orchestrate Terraform +# I felt this was the best way to accomplish the goal without incurring additional dependencies + +locals { + inputs = var.inputs + inputs_hash = md5(local.inputs) + template_path = var.template_path + template_files = var.template_files + # tflint-ignore: terraform_unused_declarations + fail_no_template = ((local.template_path == null && length(local.template_files) == 0) ? one([local.template_path, "missing_template"]) : false) + # tflint-ignore: terraform_unused_declarations + fail_too_much_template = ((local.template_path != null && length(local.template_files) > 0) ? one([local.template_path, "template_path_or_template_files"]) : false) + template_file_list = ( + local.template_path != null ? + [ + for i in range(length(fileset(local.template_path, "**"))) : + join("/", [local.template_path, tolist(fileset(local.template_path, "**"))[i]]) + ] + : local.template_files + ) + template_file_map = { for file in local.template_file_list : basename(file) => file } + template_files_hash = md5(join("-", local.template_file_list)) + deploy_path = chomp(var.deploy_path) + + environment_variables = var.environment_variables + export_contents = ( + local.environment_variables != null ? + join(";", [for k, v in local.environment_variables : "export ${k}=${v}"]) + : "" + ) + export_hash = md5(local.export_contents) + attempts = var.attempts + interval = var.interval + timeout = var.timeout + init = var.init + init_script = (local.init ? "terraform init -upgrade" : "") + tf_data_dir = var.data_path != null ? var.data_path : path.root + skip_destroy = (var.skip_destroy ? "true" : "") +} + +module "persist_template" { + source = "../persist_file" + depends_on = [ + ] + for_each = local.template_file_map + path = "${local.deploy_path}/${each.key}" + contents = file(each.value) + recreate = filemd5(each.value) +} + +module "persist_inputs" { + source = "../persist_file" + depends_on = [ + ] + path = "${local.deploy_path}/inputs.tfvars" + contents = local.inputs + recreate = md5(local.inputs) +} + +resource "terraform_data" "destroy" { + depends_on = [ + module.persist_template, + module.persist_inputs, + ] + triggers_replace = { + inputs = local.inputs_hash + files = local.template_files_hash + env = local.export_hash + ec = local.export_contents + dp = local.deploy_path + to = local.timeout + dd = local.tf_data_dir + sd = local.skip_destroy + } + provisioner "local-exec" { + when = destroy + command = <<-EOT + ${self.triggers_replace.ec} + cd ${self.triggers_replace.dp} + export TF_DATA_DIR="${self.triggers_replace.dd}" + if [ -z "${self.triggers_replace.sd}" ]; then + timeout -k 1m ${self.triggers_replace.to} terraform init -upgrade + timeout -k 1m ${self.triggers_replace.to} terraform destroy -var-file="${self.triggers_replace.dp}/inputs.tfvars" -auto-approve -state="${self.triggers_replace.dp}/tfstate" || true + else + echo "Not destroying deployed module, it will no longer be managed here." + fi + EOT + } +} + +resource "terraform_data" "create" { + depends_on = [ + module.persist_template, + module.persist_inputs, + terraform_data.destroy, + ] + triggers_replace = { + inputs = local.inputs_hash + files = local.template_files_hash + env = local.export_hash + } + provisioner "local-exec" { + command = <<-EOT + ${local.export_contents} + cd ${local.deploy_path} + export TF_DATA_DIR="${local.tf_data_dir}" + + ${local.init_script} + + MAX=${local.attempts} + EXITCODE=1 + ATTEMPTS=0 + E=1 + E1=0 + while [ $EXITCODE -gt 0 ] && [ $ATTEMPTS -lt $MAX ]; do + A=0 + while [ $E -gt 0 ] && [ $A -lt $MAX ]; do + timeout -k 1m ${local.timeout} terraform apply -var-file="${local.deploy_path}/inputs.tfvars" -auto-approve -state="${local.deploy_path}/tfstate" + E=$? + if [ $E -eq 124 ]; then echo "Apply timed out after ${local.timeout}"; fi + A=$((A+1)) + done + # don't destroy if the last attempt fails + if [ $E -gt 0 ] && [ $ATTEMPTS != $((MAX-1)) ]; then + A1=0 + while [ $E1 -gt 0 ] && [ $A1 -lt $MAX ]; do + timeout -k 1m ${local.timeout} terraform destroy -var-file="${local.deploy_path}/inputs.tfvars" -auto-approve -state="${local.deploy_path}/tfstate" + E1=$? + if [ $E1 -eq 124 ]; then echo "Apply timed out after ${local.timeout}"; fi + A1=$((A1+1)) + done + fi + if [ $E -gt 0 ]; then + echo "apply failed..." + fi + if [ $E1 -gt 0 ]; then + echo "destroy failed..." + fi + if [ $E -gt 0 ] || [ $E1 -gt 0 ]; then + EXITCODE=1 + else + EXITCODE=0 + fi + ATTEMPTS=$((ATTEMPTS+1)) + if [ $EXITCODE -gt 0 ] && [ $ATTEMPTS -lt $MAX ]; then + echo "wait ${local.interval} seconds between attempts..." + sleep ${local.interval} + fi + done + if [ $ATTEMPTS -eq $MAX ]; then echo "max attempts reached..."; fi + if [ $EXITCODE -ne 0 ]; then echo "failure, exit code $EXITCODE..."; fi + if [ $EXITCODE -eq 0 ]; then + echo "success..."; + terraform output -json -state="${local.deploy_path}/tfstate" > ${local.deploy_path}/outputs.json + fi + exit $EXITCODE + EOT + } +} + +module "persist_state" { + depends_on = [ + module.persist_template, + module.persist_inputs, + terraform_data.destroy, + terraform_data.create, + ] + source = "../persist_file" + path = "${local.deploy_path}/tfstate" + sourcefile = "${local.deploy_path}/tfstate" + recreate = terraform_data.create.id +} + +module "persist_outputs" { + depends_on = [ + module.persist_template, + module.persist_inputs, + terraform_data.destroy, + terraform_data.create, + ] + source = "../persist_file" + path = "${local.deploy_path}/outputs.json" + sourcefile = "${local.deploy_path}/outputs.json" + recreate = terraform_data.create.id +} + +resource "terraform_data" "destroy_end" { + depends_on = [ + module.persist_template, + module.persist_inputs, + terraform_data.destroy, + terraform_data.create, + module.persist_state, + module.persist_outputs, + ] + triggers_replace = { + inputs = local.inputs_hash + files = local.template_files_hash + env = local.export_hash + ec = local.export_contents + dp = local.deploy_path + to = local.timeout + dd = local.tf_data_dir + sd = local.skip_destroy + } + provisioner "local-exec" { + when = destroy + command = <<-EOT + ${self.triggers_replace.ec} + cd ${self.triggers_replace.dp} + export TF_DATA_DIR="${self.triggers_replace.dd}" + if [ -z "${self.triggers_replace.sd}" ]; then + timeout -k 1m ${self.triggers_replace.to} terraform init -upgrade + timeout -k 1m ${self.triggers_replace.to} terraform destroy -var-file="${self.triggers_replace.dp}/inputs.tfvars" -auto-approve -state="${self.triggers_replace.dp}/tfstate" || true + else + echo "Not destroying deployed module, it will no longer be managed here." + fi + EOT + } +} diff --git a/modules/deploy/outputs.tf b/modules/deploy/outputs.tf new file mode 100644 index 0000000..faad33b --- /dev/null +++ b/modules/deploy/outputs.tf @@ -0,0 +1,11 @@ +output "output" { + value = { for k, v in jsondecode(module.persist_outputs.contents) : k => v.value } +} + +# output "raw_output" { +# value = module.persist_outputs.contents +# } + +# output "state" { +# value = module.persist_state.contents +# } diff --git a/modules/deploy/variables.tf b/modules/deploy/variables.tf new file mode 100644 index 0000000..bf2bf2b --- /dev/null +++ b/modules/deploy/variables.tf @@ -0,0 +1,94 @@ +variable "inputs" { + type = string + description = <<-EOT + Contents of an inputs.tfvars file to save in the deployment path. + EOT + default = "" +} +variable "template_path" { + type = string + description = <<-EOT + Path to the module to deploy. + These files will be copied to the deploy path, not used directly. + This is optional, but one of template_path or template_files must be specified. + Only one of template_path or template_files can be specified. + EOT + default = null +} +variable "template_files" { + type = list(any) + description = <<-EOT + List of file paths that will be copied to the deploy path. + This is optional, but one of template_path or template_files must be specified. + Only one of template_path or template_files can be specified. + EOT + default = [] +} +variable "deploy_path" { + type = string + description = <<-EOT + Path to preform deployment in, this will be Terraform's working directory. + EOT +} +variable "data_path" { + type = string + description = <<-EOT + Should match your TF_DATA_DIR environment variable. + This directory is used to stage all of the various files for your implementation. + If left null, this will match "path.root". + This should be a full path, not relative. + EOT + default = null +} +variable "environment_variables" { + type = map(any) + description = <<-EOT + Map of environment variables to set before running Terraform. + Key is the name and Value is the value of the variable. + We export this before running Terraform, eg. "export KEY_1=VARIABLE_1;export KEY_2=VARIABLE_2". + EOT + default = null +} +variable "attempts" { + type = number + description = <<-EOT + Number of attempts to deploy module. + Each time Terraform apply is run we check for a successful exit code, + if the exit code !=0 then we try again, up to the value set in this argument. + EOT + default = 3 +} +variable "interval" { + type = number + description = <<-EOT + A number of seconds to sleep between Terraform apply or destroy attempts. + EOT + default = 30 +} +variable "timeout" { + type = string + description = <<-EOT + A (linux coreutils) timeout DURATION string. + This will be used to kill the Terraform run in case there is an endless loop. + If this DURATION is reached a single TERM will be sent, then KILL 1 minute later. + EOT + default = "45m" +} +variable "init" { + type = bool + description = <<-EOT + Set to false to prevent running Terraform init. + This is helpful when testing a local bin version of the provider. + EOT + default = true +} +variable "skip_destroy" { + type = bool + description = <<-EOT + Set to true to ignore calls to destroy the deployed substate. + State and deploy path will still exist, this essentially divorces the parent from the child. + This only effects specifically calls to destroy the deploy module, not taint or recreate. + Be careful as this can leave objects in your API unmanaged by IAC. + EOT + default = false +} diff --git a/modules/deploy/versions.tf b/modules/deploy/versions.tf new file mode 100644 index 0000000..824ea62 --- /dev/null +++ b/modules/deploy/versions.tf @@ -0,0 +1,9 @@ +terraform { + required_version = ">= 1.5.0" + required_providers { + filesystem = { + source = "sethvargo/filesystem" + version = "1.0.0" + } + } +} diff --git a/modules/install_cert_manager/configured/main.tf b/modules/install_cert_manager/configured/main.tf index 3e7678d..c31d7d6 100644 --- a/modules/install_cert_manager/configured/main.tf +++ b/modules/install_cert_manager/configured/main.tf @@ -9,17 +9,15 @@ resource "time_sleep" "settle_before_cert_manager" { create_duration = "30s" } -resource "kubernetes_namespace" "cert_manager" { +# uses kubectl to idempotentenly create cert-manager namespace +resource "terraform_data" "cert_manager" { depends_on = [ time_sleep.settle_before_cert_manager, ] - metadata { - name = "cert-manager" - } - lifecycle { - ignore_changes = [ - metadata, - ] + provisioner "local-exec" { + command = <<-EOT + kubectl get namespace cert-manager || kubectl create namespace cert-manager + EOT } provisioner "local-exec" { command = <<-EOT @@ -42,6 +40,7 @@ resource "kubernetes_namespace" "cert_manager" { resource "helm_release" "cert_manager_configured" { depends_on = [ time_sleep.settle_before_cert_manager, + terraform_data.cert_manager, ] name = "cert-manager" repository = "https://charts.jetstack.io" diff --git a/modules/install_cert_manager/configured/versions.tf b/modules/install_cert_manager/configured/versions.tf index e601fef..0ff3c60 100644 --- a/modules/install_cert_manager/configured/versions.tf +++ b/modules/install_cert_manager/configured/versions.tf @@ -3,7 +3,7 @@ terraform { required_providers { helm = { source = "hashicorp/helm" - version = ">= 2.15" + version = "2.14" } kubernetes = { source = "hashicorp/kubernetes" diff --git a/modules/install_cert_manager/main.tf b/modules/install_cert_manager/main.tf index 0f0c12c..b42bb5a 100644 --- a/modules/install_cert_manager/main.tf +++ b/modules/install_cert_manager/main.tf @@ -12,50 +12,23 @@ locals { cert_manager_version = var.cert_manager_version configure_cert_manager = var.configure_cert_manager cert_manager_configured = (local.configure_cert_manager ? "configured" : "unconfigured") - cert_manager_path = "${abspath(path.module)}/${local.cert_manager_configured}" + cert_manager_path = "${path.module}/${local.cert_manager_configured}" cert_manager_config = var.cert_manager_configuration - deploy_path = "${abspath(local.path)}/install_cert_manager" - backend_file = var.backend_file + deploy_path = "${local.path}/install_cert_manager" } -resource "terraform_data" "path" { - triggers_replace = { - main_contents = md5(file("${local.cert_manager_path}/main.tf")) - variables_contents = md5(file("${local.cert_manager_path}/variables.tf")) - versions_contents = md5(file("${local.cert_manager_path}/versions.tf")) - backend_contents = (local.backend_file == "" ? "" : md5(file(local.backend_file))) - } - provisioner "local-exec" { - command = <<-EOT - install -d ${local.deploy_path} - install -d ${local.deploy_path}/.terraform - cp --remove-destination ${local.cert_manager_path}/* ${local.deploy_path} - cp --remove-destination "${abspath(path.root)}/.terraform.lock.hcl" ${local.deploy_path} - if [ -f "${local.backend_file}" ]; then - cp --remove-destination ${local.backend_file} ${local.deploy_path} - fi - if [ -z "$TF_DATA_DIR" ]; then - echo "copying terraform data from default location..." - cp -rf --remove-destination "${abspath(path.root)}/.terraform" ${local.deploy_path} - else - echo "copying terraform data from $TF_DATA_DIR..." - cp -rf --remove-destination "$TF_DATA_DIR/modules" ${local.deploy_path}/.terraform - cp -rf --remove-destination "$TF_DATA_DIR/providers" ${local.deploy_path}/.terraform - fi - EOT - } -} - -resource "local_file" "inputs" { +module "deploy_cert_manager" { + source = "../deploy" depends_on = [ - terraform_data.path, ] - lifecycle { - replace_triggered_by = [ - terraform_data.path.id, - ] + deploy_path = local.deploy_path + data_path = local.deploy_path + template_path = local.cert_manager_path + environment_variables = { + KUBE_CONFIG_PATH = "${abspath(local.path)}/kubeconfig" + KUBECONFIG = "${abspath(local.path)}/kubeconfig" } - content = <<-EOT + inputs = <<-EOT project_domain = "${local.rancher_domain}" zone = "${local.zone}" zone_id = "${local.zone_id}" @@ -70,36 +43,4 @@ resource "local_file" "inputs" { aws_secret_access_key = "${local.cert_manager_config.aws_secret_access_key}" } EOT - filename = "${local.deploy_path}/inputs.tfvars" -} - -# this is a one way operation, there is no destroy or update -resource "terraform_data" "create" { - depends_on = [ - terraform_data.path, - local_file.inputs, - ] - triggers_replace = { - path_data = terraform_data.path.id - inputs_data = local_file.inputs.id - } - provisioner "local-exec" { - command = <<-EOT - cd ${local.deploy_path} - export KUBECONFIG=${abspath(local.path)}/kubeconfig - export KUBE_CONFIG_PATH=${abspath(local.path)}/kubeconfig - - EXITCODE=1 - ATTEMPTS=0 - MAX=1 - while [ $EXITCODE -gt 0 ] && [ $ATTEMPTS -lt $MAX ]; do - timeout 3600 terraform apply -var-file="inputs.tfvars" -auto-approve -state="${local.deploy_path}/tfstate" - EXITCODE=$? - ATTEMPTS=$((ATTEMPTS+1)) - echo "waiting 30 seconds between attempts..." - sleep 30 - done - exit $EXITCODE - EOT - } } diff --git a/modules/install_cert_manager/unconfigured/main.tf b/modules/install_cert_manager/unconfigured/main.tf index fe5309b..ede1aca 100644 --- a/modules/install_cert_manager/unconfigured/main.tf +++ b/modules/install_cert_manager/unconfigured/main.tf @@ -15,17 +15,16 @@ data "aws_secretsmanager_secret_version" "project_cert_key" { resource "time_sleep" "settle_before_cert_manager" { create_duration = "30s" } -resource "kubernetes_namespace" "cattle_system" { + +# uses kubectl to idempotentenly create cattle-system namespace +resource "terraform_data" "cattle-system" { depends_on = [ time_sleep.settle_before_cert_manager, ] - metadata { - name = "cattle-system" - } - lifecycle { - ignore_changes = [ - metadata, - ] + provisioner "local-exec" { + command = <<-EOT + kubectl get namespace cattle-system || kubectl create namespace cattle-system + EOT } provisioner "local-exec" { command = <<-EOT @@ -43,10 +42,11 @@ resource "kubernetes_namespace" "cattle_system" { when = destroy } } + resource "kubernetes_secret" "tls_rancher_ingress" { depends_on = [ time_sleep.settle_before_cert_manager, - kubernetes_namespace.cattle_system, + terraform_data.cattle-system, ] metadata { name = "tls-rancher-ingress" @@ -67,10 +67,11 @@ resource "kubernetes_secret" "tls_rancher_ingress" { ] } } + resource "kubernetes_secret" "tls_rancher_ca" { depends_on = [ time_sleep.settle_before_cert_manager, - kubernetes_namespace.cattle_system, + terraform_data.cattle-system, kubernetes_secret.tls_rancher_ingress, ] metadata { @@ -87,11 +88,12 @@ resource "kubernetes_secret" "tls_rancher_ca" { ] } } + # https://github.com/cert-manager/cert-manager/blob/master/deploy/charts/cert-manager/values.yaml resource "helm_release" "cert_manager_unconfigured" { depends_on = [ time_sleep.settle_before_cert_manager, - kubernetes_namespace.cattle_system, + terraform_data.cattle-system, kubernetes_secret.tls_rancher_ingress, kubernetes_secret.tls_rancher_ca, ] diff --git a/modules/install_cert_manager/unconfigured/versions.tf b/modules/install_cert_manager/unconfigured/versions.tf index 78dc697..ce06c24 100644 --- a/modules/install_cert_manager/unconfigured/versions.tf +++ b/modules/install_cert_manager/unconfigured/versions.tf @@ -3,7 +3,7 @@ terraform { required_providers { helm = { source = "hashicorp/helm" - version = ">= 2.14" + version = "2.14" } aws = { source = "hashicorp/aws" @@ -15,7 +15,7 @@ terraform { } local = { source = "hashicorp/local" - version = ">= 2.5.2" + version = ">= 2.5" } time = { source = "hashicorp/time" diff --git a/modules/install_cert_manager/variables.tf b/modules/install_cert_manager/variables.tf index b9043be..097dd91 100644 --- a/modules/install_cert_manager/variables.tf +++ b/modules/install_cert_manager/variables.tf @@ -75,14 +75,14 @@ variable "cert_manager_configuration" { } sensitive = true } -variable "backend_file" { - type = string - description = <<-EOT - Path to a .tfbackend file. - This allows the user to pass a backend file. - The backend file will be added to the terraform run and will allow state data to be saved remotely. - Please note that this is a separate state file, and this backend should be independent of the main module's state and any other submodules' states. - See https://developer.hashicorp.com/terraform/language/backend#file for more information. - EOT - default = "" -} +# variable "backend_file" { +# type = string +# description = <<-EOT +# Path to a .tfbackend file. +# This allows the user to pass a backend file. +# The backend file will be added to the terraform run and will allow state data to be saved remotely. +# Please note that this is a separate state file, and this backend should be independent of the main module's state and any other submodules' states. +# See https://developer.hashicorp.com/terraform/language/backend#file for more information. +# EOT +# default = "" +# } diff --git a/modules/install_cert_manager/versions.tf b/modules/install_cert_manager/versions.tf index 7934149..859ef02 100644 --- a/modules/install_cert_manager/versions.tf +++ b/modules/install_cert_manager/versions.tf @@ -3,7 +3,7 @@ terraform { required_providers { helm = { source = "hashicorp/helm" - version = ">= 2.14" + version = "2.14" } kubernetes = { source = "hashicorp/kubernetes" @@ -11,7 +11,7 @@ terraform { } local = { source = "hashicorp/local" - version = ">= 2.5.2" + version = ">= 2.5" } time = { source = "hashicorp/time" diff --git a/modules/persist_file/archive.sh b/modules/persist_file/archive.sh new file mode 100755 index 0000000..25aa818 --- /dev/null +++ b/modules/persist_file/archive.sh @@ -0,0 +1,25 @@ +# !/bin/bash +set -e + + +# This script can compress text into smaller text or decompress that text back to its original form +# this requires: xz, openssl, jq, bash, and core linux utils (echo, redirection, pipe) +JSON_INPUT="$(jq -r '.')" +COMPRESS="$(jq -r '.compress' <<<"$JSON_INPUT")" +DECOMPRESS="$(jq -r '.decompress' <<<"$JSON_INPUT")" +DATA="$(jq -r '.contents' <<<"$JSON_INPUT")" + +if [ -n "$COMPRESS" ] && [ "null" != "$COMPRESS" ]; then + ENCODED_OUTPUT="$(printf "%s" "$DATA" | xz -c -9 -e -T0 | openssl base64 -A -)" +fi + +if [ -n "$DECOMPRESS" ] && [ "null" != "$DECOMPRESS" ]; then + ENCODED_OUTPUT="$(echo -n "$DATA" | openssl base64 -d -A | xz -dc | openssl base64 -A -)" +fi + +if [ -z "$ENCODED_OUTPUT" ]; then + echo "output is empty" >&2 + exit 1 +fi + +jq -n --arg data "$ENCODED_OUTPUT" '{"data": $data}' diff --git a/modules/persist_file/main.tf b/modules/persist_file/main.tf new file mode 100644 index 0000000..d9b2199 --- /dev/null +++ b/modules/persist_file/main.tf @@ -0,0 +1,53 @@ +locals { + full_path = abspath(var.path) # where to place the file + contents = var.contents # the contents to persist + sourcefile = var.sourcefile # the sourcefile to persist + recreate = var.recreate # when this changes update the persisted data to match contents + + # tflint-ignore: terraform_unused_declarations + fail_no_source = ((local.contents == "" && local.sourcefile == "") ? one([local.contents, "missing_something_to_persist"]) : false) + + data = (local.contents != "" ? local.contents : data.external.read_file.result.data) +} + +resource "terraform_data" "recreate" { + input = local.recreate +} + +data "external" "read_file" { + depends_on = [ + terraform_data.recreate, + ] + program = ["bash", "${path.module}/read_file.sh"] + query = { + filepath = local.sourcefile + } +} + +resource "terraform_data" "snapshot" { + depends_on = [ + data.external.read_file, + terraform_data.recreate, + ] + input = local.data + # we want this data to persist even if the input data changes + # the point of this is so that we control when the data is updated ie. when the snapshot is saved/updated + lifecycle { + ignore_changes = [ + input, + ] + } + triggers_replace = [ + terraform_data.recreate.output, + ] +} + +resource "local_file" "file" { + depends_on = [ + data.external.read_file, + terraform_data.recreate, + terraform_data.snapshot, + ] + filename = local.full_path + content = terraform_data.snapshot.output +} diff --git a/modules/persist_file/make_holders.sh b/modules/persist_file/make_holders.sh new file mode 100755 index 0000000..3de24dd --- /dev/null +++ b/modules/persist_file/make_holders.sh @@ -0,0 +1,34 @@ +#!/bin/bash +set -e +INPUTS="$(jq -r '.')" +FILENAMEFILE="$(jq -r '.filename_file' <<<"$INPUTS")" +NEWFILE="$(jq -r '.filename' <<<"$INPUTS")" + +if [ -z "$FILENAMEFILE" ]; then + echo "filename_file required" >&2 + exit 1 +fi + +if [ -z "$NEWFILE" ]; then + echo "filename required" >&2 + exit 1 +fi + +install -d "$(dirname "$FILENAMEFILE")" +touch "$FILENAMEFILE" + +# grep returns 1 if the pattern isn't found, so we need to ignore the "failure" here +NEW="$(grep -l "$NEWFILE" "$FILENAMEFILE" || true)" + +if [ -z "$NEW" ]; then + echo "$NEWFILE" >> "$FILENAMEFILE" +fi + +while read -r FILEPATH; do + if [ -z "$FILEPATH" ]; then continue; fi # ignore empty lines + DIRECTORY="$(dirname "$FILEPATH")" + install -d "$DIRECTORY" + touch "$FILEPATH" +done < "$FILENAMEFILE" + +jq -n '{"outcome": "success"}' diff --git a/modules/persist_file/outputs.tf b/modules/persist_file/outputs.tf new file mode 100644 index 0000000..0c697ae --- /dev/null +++ b/modules/persist_file/outputs.tf @@ -0,0 +1,7 @@ +# output "encoded_contents" { +# value = base64encode(filesystem_file_writer.file.contents) +# } + +output "contents" { + value = local_file.file.content #filesystem_file_writer.file.contents +} diff --git a/modules/persist_file/read_file.sh b/modules/persist_file/read_file.sh new file mode 100755 index 0000000..a647f53 --- /dev/null +++ b/modules/persist_file/read_file.sh @@ -0,0 +1,14 @@ +# !/bin/bash +set -e + +JSON_INPUT="$(jq -r '.')" +FILEPATH="$(jq -r '.filepath' <<<"$JSON_INPUT")" + +DATA="" +if [ -n "$FILEPATH" ]; then + if [ -f "$FILEPATH" ]; then + DATA="$(cat "$FILEPATH")" + fi +fi + +jq -n --arg data "$DATA" '{"data": $data}' diff --git a/modules/persist_file/variables.tf b/modules/persist_file/variables.tf new file mode 100644 index 0000000..02da55e --- /dev/null +++ b/modules/persist_file/variables.tf @@ -0,0 +1,26 @@ +variable "path" { + type = string + description = <<-EOT + The path to save the contents to. + EOT +} +variable "recreate" { + type = string + description = <<-EOT + When this string changes, update the file snapshot. + EOT +} +variable "contents" { + type = string + description = <<-EOT + The contents to persist, one of "contents" or "sourcefile" must be given. + EOT + default = "" +} +variable "sourcefile" { + type = string + description = <<-EOT + A file to persist, one of "contents" or "sourcefile" must be given. + EOT + default = "" +} diff --git a/modules/persist_file/versions.tf b/modules/persist_file/versions.tf new file mode 100644 index 0000000..e57796b --- /dev/null +++ b/modules/persist_file/versions.tf @@ -0,0 +1,13 @@ +terraform { + required_version = ">= 1.5.0" + required_providers { + external = { + source = "hashicorp/external" + version = ">= 2.3" + } + local = { + source = "hashicorp/local" + version = ">= 2.5.3" + } + } +} diff --git a/modules/rancher_bootstrap/main.tf b/modules/rancher_bootstrap/main.tf index 7033474..9c48db1 100644 --- a/modules/rancher_bootstrap/main.tf +++ b/modules/rancher_bootstrap/main.tf @@ -3,142 +3,37 @@ # I felt this was the best way to accomplish the goal without incurring additional dependencies locals { - rancher_domain = var.project_domain - zone = var.zone + project_domain = var.project_domain zone_id = var.zone_id region = var.region email = var.email acme_server_url = var.acme_server_url rancher_version = replace(var.rancher_version, "v", "") # don't include the v cert_manager_version = var.cert_manager_version - cert_manager_config = var.cert_manager_configuration - externalTLS = var.externalTLS path = var.path - rancher_path = (local.externalTLS ? "${abspath(path.module)}/rancher_externalTLS" : "${abspath(path.module)}/rancher") - deploy_path = "${abspath(local.path)}/rancher_bootstrap" - backend_file = var.backend_file -} - -resource "terraform_data" "path" { - triggers_replace = { - main_contents = md5(file("${local.rancher_path}/main.tf")) - variables_contents = md5(file("${local.rancher_path}/variables.tf")) - versions_contents = md5(file("${local.rancher_path}/versions.tf")) - outputs_contents = md5(file("${local.rancher_path}/outputs.tf")) - backend_contents = (local.backend_file == "" ? "" : md5(file(local.backend_file))) - } - provisioner "local-exec" { - command = <<-EOT - install -d ${local.deploy_path} - install -d ${local.deploy_path}/.terraform - cp --remove-destination ${local.rancher_path}/* ${local.deploy_path} - cp --remove-destination "${abspath(path.root)}/.terraform.lock.hcl" ${local.deploy_path} - if [ -f "${local.backend_file}" ]; then - cp --remove-destination ${local.backend_file} ${local.deploy_path} - fi - if [ -z "$TF_DATA_DIR" ]; then - echo "copying terraform data from default location..." - cp -rf --remove-destination "${abspath(path.root)}/.terraform" ${local.deploy_path} - else - echo "copying terraform data from $TF_DATA_DIR..." - cp -rf --remove-destination "$TF_DATA_DIR/modules" ${local.deploy_path}/.terraform - cp -rf --remove-destination "$TF_DATA_DIR/providers" ${local.deploy_path}/.terraform - fi - EOT - } + externalTLS = var.externalTLS + rancher_path = (local.externalTLS ? "${path.module}/rancher_externalTLS" : "${path.module}/rancher") + deploy_path = "${local.path}/rancher_bootstrap" } -resource "local_file" "inputs" { +module "deploy_rancher" { + source = "../deploy" depends_on = [ - terraform_data.path, ] - content = <<-EOT - project_domain = "${local.rancher_domain}" - zone = "${local.zone}" + deploy_path = local.deploy_path + data_path = local.deploy_path + template_path = local.rancher_path + environment_variables = { + KUBECONFIG = "${local.path}/kubeconfig" + KUBE_CONFIG_PATH = "${local.path}/kubeconfig" + } + inputs = <<-EOT + project_domain = "${local.project_domain}" zone_id = "${local.zone_id}" region = "${local.region}" email = "${local.email}" - acme_server_url = "${local.acme_server_url}" rancher_version = "${local.rancher_version}" cert_manager_version = "${local.cert_manager_version}" - cert_manager_configuration = { - aws_region = "${local.cert_manager_config.aws_region}" - aws_access_key_id = "${local.cert_manager_config.aws_access_key_id}" - aws_secret_access_key = "${local.cert_manager_config.aws_secret_access_key}" - aws_session_token = "${local.cert_manager_config.aws_session_token}" - } - path = "${local.deploy_path}" + acme_server_url = "${local.acme_server_url}" EOT - filename = "${local.deploy_path}/inputs.tfvars" -} - -resource "terraform_data" "create" { - depends_on = [ - terraform_data.path, - local_file.inputs, - ] - provisioner "local-exec" { - command = <<-EOT - export KUBECONFIG=${abspath(local.path)}/kubeconfig - export KUBE_CONFIG_PATH=${abspath(local.path)}/kubeconfig - cd ${local.deploy_path} - - MAX=2 - EXITCODE=1 - ATTEMPTS=0 - E=1 - E1=0 - while [ $EXITCODE -gt 0 ] && [ $ATTEMPTS -lt $MAX ]; do - A=0 - while [ $E -gt 0 ] && [ $A -lt $MAX ]; do - timeout 1h terraform apply -var-file="inputs.tfvars" -auto-approve -state="${local.deploy_path}/tfstate" - E=$? - if [ $E -eq 124 ]; then echo "Apply timed out after 1 hour"; fi - A=$((A+1)) - done - # don't destroy if the last attempt fails - if [ $E -gt 0 ] && [ $ATTEMPTS != $((MAX-1)) ]; then - A1=0 - while [ $E1 -gt 0 ] && [ $A1 -lt $MAX ]; do - timeout 1h terraform destroy -var-file="inputs.tfvars" -auto-approve -state="${local.deploy_path}/tfstate" - E1=$? - if [ $E1 -eq 124 ]; then echo "Apply timed out after 1 hour"; fi - A1=$((A1+1)) - done - fi - if [ $E -gt 0 ]; then - echo "apply failed..." - fi - if [ $E1 -gt 0 ]; then - echo "destroy failed..." - fi - if [ $E -gt 0 ] || [ $E1 -gt 0 ]; then - EXITCODE=1 - else - EXITCODE=0 - fi - ATTEMPTS=$((ATTEMPTS+1)) - if [ $EXITCODE -gt 0 ] && [ $ATTEMPTS -lt $MAX ]; then - echo "wait 30 seconds between attempts..." - sleep 30 - fi - done - if [ $ATTEMPTS -eq $MAX ]; then echo "max attempts reached..."; fi - if [ $EXITCODE -ne 0 ]; then echo "failure, exit code $EXITCODE..."; fi - if [ $EXITCODE -eq 0 ]; then echo "success..."; fi - exit $EXITCODE - EOT - } -} - -data "terraform_remote_state" "rancher_bootstrap_state" { - depends_on = [ - terraform_data.path, - local_file.inputs, - terraform_data.create, - ] - backend = "local" - config = { - path = "${local.deploy_path}/tfstate" - } } diff --git a/modules/rancher_bootstrap/outputs.tf b/modules/rancher_bootstrap/outputs.tf index 8dd7dc4..a271fa4 100644 --- a/modules/rancher_bootstrap/outputs.tf +++ b/modules/rancher_bootstrap/outputs.tf @@ -1,14 +1,9 @@ output "admin_token" { - value = data.terraform_remote_state.rancher_bootstrap_state.outputs.admin_token + value = module.deploy_rancher.output.admin_token sensitive = true } output "admin_password" { - value = data.terraform_remote_state.rancher_bootstrap_state.outputs.admin_password - sensitive = true -} - -output "rancher_bootstrap_state_location" { - value = "${local.deploy_path}/tfstate" + value = module.deploy_rancher.output.admin_password sensitive = true } diff --git a/modules/rancher_bootstrap/rancher/main.tf b/modules/rancher_bootstrap/rancher/main.tf index b020f6e..c4f2c9a 100644 --- a/modules/rancher_bootstrap/rancher/main.tf +++ b/modules/rancher_bootstrap/rancher/main.tf @@ -4,11 +4,37 @@ provider "rancher2" { } locals { - rancher_domain = var.project_domain + rancher_domain = var.project_domain + rancher_helm_repo = var.rancher_helm_repo + rancher_helm_channel = var.rancher_helm_channel + rancher_version = replace(var.rancher_version, "v", "") # don't include the v + helm_chart_use_strategy = var.rancher_helm_chart_use_strategy + rancher_helm_chart_values = var.rancher_helm_chart_values + default_hc_values = { + "hostname" = local.rancher_domain + "replicas" = "1" + "bootstrapPassword" = "admin" + "ingress.enabled" = "true" + "ingress.tls.source" = "letsEncrypt" + "tls" = "ingress" + "letsEncrypt.ingress.class" = "nginx" + "letsEncrypt.environment" = "production" + "letsEncrypt.email" = local.email + "certmanager.version" = local.cert_manager_version + "agentTLSMode" = "system-store" + "ingress.extraAnnotations.cert-manager\\.io\\/issuer" = "rancher" + } + helm_chart_values = coalesce( # using coalesce like this essentially gives us a switch function + (local.helm_chart_use_strategy == "merge" ? + merge(local.default_hc_values, local.rancher_helm_chart_values) : null), + (local.helm_chart_use_strategy == "default" ? + local.default_hc_values : null), + (local.helm_chart_use_strategy == "provide" ? + local.rancher_helm_chart_values : null) + ) # WARNING! Some config is necessary, if the result is an empty string the coalesce will fail zone_id = var.zone_id region = var.region email = var.email - rancher_version = replace(var.rancher_version, "v", "") # don't include the v cert_manager_version = replace(var.cert_manager_version, "v", "") # don't include the v acme_server = var.acme_server_url } @@ -17,17 +43,15 @@ resource "time_sleep" "settle_before_rancher" { create_duration = "30s" } -resource "kubernetes_namespace" "cattle-system" { +# uses kubectl to idempotentenly create cattle-system namespace +resource "terraform_data" "cattle-system" { depends_on = [ time_sleep.settle_before_rancher, ] - metadata { - name = "cattle-system" - } - lifecycle { - ignore_changes = [ - metadata, - ] + provisioner "local-exec" { + command = <<-EOT + kubectl get namespace cattle-system || kubectl create namespace cattle-system + EOT } provisioner "local-exec" { command = <<-EOT @@ -49,7 +73,7 @@ resource "kubernetes_namespace" "cattle-system" { resource "kubernetes_manifest" "issuer" { depends_on = [ time_sleep.settle_before_rancher, - kubernetes_namespace.cattle-system, + terraform_data.cattle-system, ] manifest = { apiVersion = "cert-manager.io/v1" @@ -96,6 +120,7 @@ resource "kubernetes_manifest" "issuer" { resource "terraform_data" "wait_for_nginx" { depends_on = [ time_sleep.settle_before_rancher, + terraform_data.cattle-system, kubernetes_manifest.issuer, ] provisioner "local-exec" { @@ -116,38 +141,39 @@ resource "terraform_data" "wait_for_nginx" { } } -# WARNING! This adds git, yq, and helm to the dependency list! -resource "terraform_data" "build_chart" { - depends_on = [ - time_sleep.settle_before_rancher, - ] - provisioner "local-exec" { - command = <<-EOT - cd ${abspath(path.root)} || true - if [ -d chart ]; then - rm -rf chart - fi - mkdir chart - cd chart || exit 1 - ${abspath(path.module)}/build_chart.sh "${local.rancher_version}" - cd ${abspath(path.root)} || true - mv chart/rancher-${local.rancher_version}.tgz . - rm -rf chart - ls ${abspath(path.root)}/rancher-${local.rancher_version}.tgz - EOT - } -} +# # WARNING! This adds git, yq, and helm to the dependency list! +# resource "terraform_data" "build_chart" { +# depends_on = [ +# time_sleep.settle_before_rancher, +# ] +# provisioner "local-exec" { +# command = <<-EOT +# cd ${abspath(path.root)} || true +# if [ -d chart ]; then +# rm -rf chart +# fi +# mkdir chart +# cd chart || exit 1 +# ${abspath(path.module)}/build_chart.sh "${local.rancher_version}" +# cd ${abspath(path.root)} || true +# mv chart/rancher-${local.rancher_version}.tgz . +# rm -rf chart +# ls ${abspath(path.root)}/rancher-${local.rancher_version}.tgz +# EOT +# } +# } # https://github.com/rancher/rancher/blob/main/chart/values.yaml resource "helm_release" "rancher" { depends_on = [ time_sleep.settle_before_rancher, + terraform_data.cattle-system, kubernetes_manifest.issuer, terraform_data.wait_for_nginx, - terraform_data.build_chart, + # terraform_data.build_chart, ] name = "rancher" - chart = "${path.root}/rancher-${local.rancher_version}.tgz" # "${local.rancher_helm_repository}/${local.rancher_channel}/rancher-${local.rancher_version}.tgz" + chart = "${local.rancher_helm_repo}/${local.rancher_helm_channel}/rancher-${local.rancher_version}.tgz" # "${path.root}/rancher-${local.rancher_version}.tgz" namespace = "cattle-system" create_namespace = false wait = false @@ -155,63 +181,23 @@ resource "helm_release" "rancher" { force_update = true timeout = 1800 # 30m - set { - name = "hostname" - value = local.rancher_domain - } - set { - name = "replicas" - value = "1" - } - set { - name = "bootstrapPassword" - value = "admin" - } - set { - name = "ingress.enabled" - value = "true" - } - set { - name = "ingress.tls.source" - value = "letsEncrypt" - } - set { - name = "tls" - value = "ingress" - } - set { - name = "letsEncrypt.ingress.class" - value = "nginx" - } - set { - name = "letsEncrypt.environment" - value = "production" - } - set { - name = "letsEncrypt.email" - value = local.email - } - set { - name = "certmanager.version" - value = local.cert_manager_version - } - set { - name = "ingress.extraAnnotations.cert-manager\\.io\\/issuer" - value = "rancher" - } - set { - name = "agentTLSMode" - value = "system-store" + dynamic "set" { + for_each = local.helm_chart_values + content { + name = set.key + value = set.value + } } } resource "terraform_data" "wait_for_rancher" { depends_on = [ time_sleep.settle_before_rancher, + terraform_data.cattle-system, kubernetes_manifest.issuer, terraform_data.wait_for_nginx, - terraform_data.build_chart, helm_release.rancher, + # terraform_data.build_chart, ] provisioner "local-exec" { command = <<-EOT @@ -227,11 +213,12 @@ resource "terraform_data" "wait_for_rancher" { resource "terraform_data" "get_public_cert_info" { depends_on = [ time_sleep.settle_before_rancher, + terraform_data.cattle-system, kubernetes_manifest.issuer, terraform_data.wait_for_nginx, - terraform_data.build_chart, helm_release.rancher, terraform_data.wait_for_rancher, + # terraform_data.build_chart, ] provisioner "local-exec" { command = <<-EOT @@ -261,6 +248,8 @@ resource "terraform_data" "get_public_cert_info" { echo "$E" timeout 3600 kubectl get order -A timeout 3600 kubectl get challenge -A + timeout 3600 kubectl get CertificateRequest -A + timeout 3600 kubectl get Certificate -A timeout 3600 kubectl describe order -n cattle-system timeout 3600 kubectl describe challenge -n cattle-system exit 1 @@ -278,12 +267,13 @@ resource "random_password" "password" { resource "rancher2_bootstrap" "admin" { depends_on = [ time_sleep.settle_before_rancher, + terraform_data.cattle-system, kubernetes_manifest.issuer, terraform_data.wait_for_nginx, - terraform_data.build_chart, helm_release.rancher, terraform_data.wait_for_rancher, terraform_data.get_public_cert_info, + # terraform_data.build_chart, ] password = random_password.password.result } diff --git a/modules/rancher_bootstrap/rancher/variables.tf b/modules/rancher_bootstrap/rancher/variables.tf index b3191e3..6670339 100644 --- a/modules/rancher_bootstrap/rancher/variables.tf +++ b/modules/rancher_bootstrap/rancher/variables.tf @@ -3,6 +3,62 @@ variable "project_domain" { description = <<-EOT The project domain. An fqdn, eg. "test.example.com". EOT + validation { + condition = can(regex( + "^(?:https?://)?[[:alpha:]](?:[[:alnum:]\\p{Pd}]{1,63}\\.)+[[:alnum:]\\p{Pd}]{1,62}[[:alnum:]](?::[[:digit:]]{1,5})?$", + var.project_domain + )) + error_message = "Must be a fully qualified domain name." + } +} +variable "rancher_helm_repo" { + type = string + description = <<-EOT + The Helm repository to retrieve charts from. + EOT + default = "https://releases.rancher.com/server-charts" +} +variable "rancher_helm_channel" { + type = string + description = <<-EOT + The Helm repository channel retrieve charts from. + Can be "latest" or "stable", defaults to "stable". + EOT + default = "stable" +} +variable "rancher_helm_chart_use_strategy" { + type = string + description = <<-EOT + The strategy to use for Rancher's Helm chart values. + Options include: "default", "merge", or "provide". + Default will tell the module to use our suggested default configuration. + Merge will merge our default suggestions with your supplied configuration, anything you supply will override the default. + Provide will ignore our default suggestions and use the configuration provided in the rancher_helm_chart_values argument. + EOT + default = "default" +} +variable "rancher_helm_chart_values" { + type = map(any) + description = <<-EOT + A key/value map of Helm arguments to pass to the Rancher helm chart. + This will be ignored if the rancher_helm_chart_use_strategy argument is set to "default". + eg. + { + "hostname" = "test.example.com" + "replicas" = "1" + "bootstrapPassword" = "password" + "ingress.enabled" = "true" + "ingress.tls.source" = "letsEncrypt" + "tls" = "ingress" + "letsEncrypt.ingress.class" = "nginx" + "letsEncrypt.environment" = "production" + "letsEncrypt.email" = "test@example.com" + "certmanager.version" = "1.13.1" + "agentTLSMode" = "system-store" + "ingress.extraAnnotations.cert-manager\\.io\\/issuer" = "rancher" + } + EOT + default = {} } variable "zone_id" { type = string @@ -29,7 +85,7 @@ variable "rancher_version" { description = <<-EOT The version of rancher to install. EOT - default = "2.9.2" + default = "2.11.2" } variable "cert_manager_version" { type = string diff --git a/modules/rancher_bootstrap/rancher/versions.tf b/modules/rancher_bootstrap/rancher/versions.tf index d6aa034..e251155 100644 --- a/modules/rancher_bootstrap/rancher/versions.tf +++ b/modules/rancher_bootstrap/rancher/versions.tf @@ -3,7 +3,7 @@ terraform { required_providers { helm = { source = "hashicorp/helm" - version = ">= 2.14" + version = "2.14" } rancher2 = { source = "rancher/rancher2" diff --git a/modules/rancher_bootstrap/rancher_externalTLS/main.tf b/modules/rancher_bootstrap/rancher_externalTLS/main.tf index 9b1b7bb..694a49a 100644 --- a/modules/rancher_bootstrap/rancher_externalTLS/main.tf +++ b/modules/rancher_bootstrap/rancher_externalTLS/main.tf @@ -4,8 +4,30 @@ provider "rancher2" { } locals { - rancher_domain = var.project_domain - rancher_version = replace(var.rancher_version, "v", "") # don't include the v + rancher_domain = var.project_domain + rancher_helm_repo = var.rancher_helm_repo + rancher_helm_channel = var.rancher_helm_channel + rancher_version = replace(var.rancher_version, "v", "") # don't include the v + helm_chart_use_strategy = var.rancher_helm_chart_use_strategy + rancher_helm_chart_values = var.rancher_helm_chart_values + default_hc_values = { + "hostname" = local.rancher_domain + "replicas" = "1" + "bootstrapPassword" = "admin" + "ingress.enabled" = "true" + "ingress.tls.source" = "secret" + "ingress.tls.secretName" = "tls-rancher-ingress" + "privateCA" = "true" + "agentTLSMode" = "system-store" + } + helm_chart_values = coalesce( # using coalesce like this essentially gives us a switch function + (local.helm_chart_use_strategy == "merge" ? + merge(local.default_hc_values, local.rancher_helm_chart_values) : null), + (local.helm_chart_use_strategy == "default" ? + local.default_hc_values : null), + (local.helm_chart_use_strategy == "provide" ? + local.rancher_helm_chart_values : null) + ) # WARNING! Some config is necessary, if the result is an empty string the coalesce will fail } resource "time_sleep" "settle_before_rancher" { @@ -34,37 +56,37 @@ resource "terraform_data" "wait_for_nginx" { } } -# WARNING! This adds git, yq, and helm to the dependency list! -resource "terraform_data" "build_chart" { - depends_on = [ - time_sleep.settle_before_rancher, - ] - provisioner "local-exec" { - command = <<-EOT - cd ${abspath(path.root)} || true - if [ -d chart ]; then - rm -rf chart - fi - mkdir chart - cd chart || exit 1 - ${abspath(path.module)}/build_chart.sh "${local.rancher_version}" - cd ${abspath(path.root)} || true - mv chart/rancher-${local.rancher_version}.tgz . - rm -rf chart - ls ${abspath(path.root)}/rancher-${local.rancher_version}.tgz - EOT - } -} +# # WARNING! This adds git, yq, and helm to the dependency list! +# resource "terraform_data" "build_chart" { +# depends_on = [ +# time_sleep.settle_before_rancher, +# ] +# provisioner "local-exec" { +# command = <<-EOT +# cd ${abspath(path.root)} || true +# if [ -d chart ]; then +# rm -rf chart +# fi +# mkdir chart +# cd chart || exit 1 +# ${abspath(path.module)}/build_chart.sh "${local.rancher_version}" +# cd ${abspath(path.root)} || true +# mv chart/rancher-${local.rancher_version}.tgz . +# rm -rf chart +# ls ${abspath(path.root)}/rancher-${local.rancher_version}.tgz +# EOT +# } +# } # https://github.com/rancher/rancher/blob/main/chart/values.yaml resource "helm_release" "rancher" { depends_on = [ time_sleep.settle_before_rancher, terraform_data.wait_for_nginx, - terraform_data.build_chart, + # terraform_data.build_chart, ] name = "rancher" - chart = "${path.root}/rancher-${local.rancher_version}.tgz" #"${local.rancher_helm_repository}/${local.rancher_channel}/rancher-${local.rancher_version}.tgz" + chart = "${local.rancher_helm_repo}/${local.rancher_helm_channel}/rancher-${local.rancher_version}.tgz" # "${path.root}/rancher-${local.rancher_version}.tgz" namespace = "cattle-system" create_namespace = false wait = false @@ -72,43 +94,19 @@ resource "helm_release" "rancher" { force_update = true timeout = 1800 # 30m - set { - name = "hostname" - value = local.rancher_domain - } - set { - name = "replicas" - value = "2" # this should be variable on number of nodes deployed - } - set { - name = "bootstrapPassword" - value = "admin" - } - set { - name = "ingress.enabled" - value = "true" - } - set { - name = "ingress.tls.source" - value = "secret" - } - set { - name = "ingress.tls.secretName" - value = "tls-rancher-ingress" - } - set { - name = "privateCA" - value = "true" - } - set { - name = "agentTLSMode" - value = "system-store" + dynamic "set" { + for_each = local.helm_chart_values + content { + name = set.key + value = set.value + } } } resource "terraform_data" "wait_for_rancher" { depends_on = [ time_sleep.settle_before_rancher, + terraform_data.wait_for_nginx, helm_release.rancher, ] provisioner "local-exec" { @@ -131,6 +129,7 @@ resource "terraform_data" "get_public_cert_info" { depends_on = [ random_password.password, time_sleep.settle_before_rancher, + terraform_data.wait_for_nginx, helm_release.rancher, terraform_data.wait_for_rancher, ] @@ -154,6 +153,7 @@ resource "terraform_data" "get_ping" { depends_on = [ random_password.password, time_sleep.settle_before_rancher, + terraform_data.wait_for_nginx, helm_release.rancher, terraform_data.wait_for_rancher, terraform_data.get_public_cert_info, @@ -187,6 +187,7 @@ resource "rancher2_bootstrap" "admin" { depends_on = [ random_password.password, time_sleep.settle_before_rancher, + terraform_data.wait_for_nginx, helm_release.rancher, terraform_data.wait_for_rancher, terraform_data.get_public_cert_info, diff --git a/modules/rancher_bootstrap/rancher_externalTLS/variables.tf b/modules/rancher_bootstrap/rancher_externalTLS/variables.tf index 3762e37..47c7fba 100644 --- a/modules/rancher_bootstrap/rancher_externalTLS/variables.tf +++ b/modules/rancher_bootstrap/rancher_externalTLS/variables.tf @@ -3,11 +3,63 @@ variable "project_domain" { description = <<-EOT The project domain. An fqdn, eg. "test.example.com". EOT + validation { + condition = can(regex( + "^(?:https?://)?[[:alpha:]](?:[[:alnum:]\\p{Pd}]{1,63}\\.)+[[:alnum:]\\p{Pd}]{1,62}[[:alnum:]](?::[[:digit:]]{1,5})?$", + var.project_domain + )) + error_message = "Must be a fully qualified domain name." + } } variable "rancher_version" { type = string description = <<-EOT The version of rancher to install. EOT - default = "2.9.2" + default = "2.11.2" +} +variable "rancher_helm_repo" { + type = string + description = <<-EOT + The Helm repository to retrieve charts from. + EOT + default = "https://releases.rancher.com/server-charts" +} +variable "rancher_helm_channel" { + type = string + description = <<-EOT + The Helm repository channel retrieve charts from. + Can be "latest" or "stable", defaults to "stable". + EOT + default = "stable" +} +variable "rancher_helm_chart_use_strategy" { + type = string + description = <<-EOT + The strategy to use for Rancher's Helm chart values. + Options include: "default", "merge", or "provide". + Default will tell the module to use our suggested default configuration. + Merge will merge our default suggestions with your supplied configuration, anything you supply will override the default. + Provide will ignore our default suggestions and use the configuration provided in the rancher_helm_chart_values argument. + EOT + default = "default" +} +variable "rancher_helm_chart_values" { + type = map(any) + description = <<-EOT + A key/value map of Helm arguments to pass to the Rancher helm chart. + This will be ignored if the rancher_helm_chart_use_strategy argument is set to "default". + eg. + { + "hostname" = local.rancher_domain + "replicas" = "1" + "bootstrapPassword" = "admin" + "ingress.enabled" = "true" + "ingress.tls.source" = "secret" + "ingress.tls.secretName" = "tls-rancher-ingress" + "privateCA" = "true" + "agentTLSMode" = "system-store" + } + EOT + default = {} } diff --git a/modules/rancher_bootstrap/rancher_externalTLS/versions.tf b/modules/rancher_bootstrap/rancher_externalTLS/versions.tf index c73ce64..19cf779 100644 --- a/modules/rancher_bootstrap/rancher_externalTLS/versions.tf +++ b/modules/rancher_bootstrap/rancher_externalTLS/versions.tf @@ -3,7 +3,7 @@ terraform { required_providers { helm = { source = "hashicorp/helm" - version = ">= 2.14" + version = "2.14" } rancher2 = { source = "rancher/rancher2" diff --git a/modules/rancher_bootstrap/variables.tf b/modules/rancher_bootstrap/variables.tf index 397d95a..9866481 100644 --- a/modules/rancher_bootstrap/variables.tf +++ b/modules/rancher_bootstrap/variables.tf @@ -3,20 +3,20 @@ variable "project_domain" { description = <<-EOT The project domain. An fqdn, eg. "test.example.com". EOT -} -variable "zone" { - type = string - description = <<-EOT - The zone within the domain. - eg. if the domain is "test.example.com", then this should be "example.com" - EOT + validation { + condition = can(regex( + "^(?:https?://)?[[:alpha:]](?:[[:alnum:]\\p{Pd}]{1,63}\\.)+[[:alnum:]\\p{Pd}]{1,62}[[:alnum:]](?::[[:digit:]]{1,5})?$", + var.project_domain + )) + error_message = "Must be a fully qualified domain name." + } } variable "zone_id" { type = string description = <<-EOT The ID of the zone within the domain. eg. if the domain is "test.example.com", then the zone should be "example.com" - The ID of that zone. + The AWS ID of that zone. EOT } variable "region" { @@ -43,7 +43,7 @@ variable "rancher_version" { description = <<-EOT The version of rancher to install. EOT - default = "2.8.4" + default = "2.11.2" } variable "cert_manager_version" { type = string @@ -66,34 +66,3 @@ variable "path" { The local file path to stage files for the deployment. EOT } -variable "cert_manager_configuration" { - type = object({ - aws_region = string - aws_session_token = string - aws_access_key_id = string - aws_secret_access_key = string - }) - description = <<-EOT - The AWS access key information necessary to configure cert-manager. - These will be added as environment variables to configure Cert Manager Ambient Credentials. - https://cert-manager.io/docs/configuration/acme/dns01/route53/#ambient-credentials - EOT - default = { - aws_region = "" - aws_session_token = "" - aws_access_key_id = "" - aws_secret_access_key = "" - } - sensitive = true -} -variable "backend_file" { - type = string - description = <<-EOT - Path to a .tfbackend file. - This allows the user to pass a backend file. - The backend file will be added to the terraform run and will allow state data to be saved remotely. - Please note that this is a separate state file, and this backend should be independent of the main module's state and any other submodules' states. - See https://developer.hashicorp.com/terraform/language/backend#file for more information. - EOT - default = "" -} diff --git a/modules/rancher_bootstrap/versions.tf b/modules/rancher_bootstrap/versions.tf index 335c64c..b7799cc 100644 --- a/modules/rancher_bootstrap/versions.tf +++ b/modules/rancher_bootstrap/versions.tf @@ -3,11 +3,11 @@ terraform { required_providers { local = { source = "hashicorp/local" - version = ">= 2.5.2" + version = ">= 2.5" } helm = { source = "hashicorp/helm" - version = ">= 2.14" + version = "2.14" } rancher2 = { source = "rancher/rancher2" diff --git a/run_tests.sh b/run_tests.sh index 27aaa37..541828a 100755 --- a/run_tests.sh +++ b/run_tests.sh @@ -131,16 +131,17 @@ if [ -z "$cleanup_id" ]; then # Run tests initially run_tests false + sleep 60 # Check if we need to rerun failed tests if [ "$rerun_failed" = true ] && [ -f "/tmp/${IDENTIFIER}_failed_tests.txt" ]; then echo "Rerunning failed tests..." run_tests true + sleep 60 fi fi echo "Clearing leftovers with Id $IDENTIFIER in $AWS_REGION..." -sleep 60 if [ -n "$IDENTIFIER" ]; then attempts=0 diff --git a/test/tests/downstream/downstream_test.go b/test/tests/downstream/downstream_test.go index 002194b..ba34a8d 100644 --- a/test/tests/downstream/downstream_test.go +++ b/test/tests/downstream/downstream_test.go @@ -19,7 +19,7 @@ func TestDownstreamBasic(t *testing.T) { accessKey := util.GetAwsAccessKey() secretKey := util.GetAwsSecretKey() sessionToken := util.GetAwsSessionToken() - directory := "deploy_rke2" + directory := "downstream" owner := "terraform-ci@suse.com" util.SetAcmeServer() @@ -132,14 +132,14 @@ func TestDownstreamBasic(t *testing.T) { sshAgent.Stop() } -func TestDownstreamRoles(t *testing.T) { +func TestDownstreamSplitrole(t *testing.T) { t.Parallel() id := util.GetId() region := util.GetRegion() accessKey := util.GetAwsAccessKey() secretKey := util.GetAwsSecretKey() sessionToken := util.GetAwsSessionToken() - directory := "deploy_rke2_multiple_pools" + directory := "downstream_splitrole" owner := "terraform-ci@suse.com" util.SetAcmeServer() diff --git a/test/tests/one/one_test.go b/test/tests/one/one_test.go index e3e8ce3..e830db7 100644 --- a/test/tests/one/one_test.go +++ b/test/tests/one/one_test.go @@ -12,7 +12,7 @@ import ( util "github.com/rancher/terraform-rancher2-aws/test/tests" ) -func TestOneBasic(t *testing.T) { +func TestOne(t *testing.T) { t.Parallel() id := util.GetId() region := util.GetRegion() diff --git a/test/tests/prod/prod_test.go b/test/tests/prod/prod_test.go index 8e96406..c86f113 100644 --- a/test/tests/prod/prod_test.go +++ b/test/tests/prod/prod_test.go @@ -12,7 +12,7 @@ import ( util "github.com/rancher/terraform-rancher2-aws/test/tests" ) -func TestProdBasic(t *testing.T) { +func TestProd(t *testing.T) { t.Parallel() id := util.GetId() region := util.GetRegion() diff --git a/test/tests/three/three_test.go b/test/tests/three/three_test.go new file mode 100644 index 0000000..0451cb0 --- /dev/null +++ b/test/tests/three/three_test.go @@ -0,0 +1,121 @@ +package one + +import ( + "os" + "path/filepath" + "testing" + + aws "github.com/gruntwork-io/terratest/modules/aws" + g "github.com/gruntwork-io/terratest/modules/git" + "github.com/gruntwork-io/terratest/modules/ssh" + "github.com/gruntwork-io/terratest/modules/terraform" + util "github.com/rancher/terraform-rancher2-aws/test/tests" +) + +func TestThree(t *testing.T) { + t.Parallel() + id := util.GetId() + region := util.GetRegion() + directory := "three" + owner := "terraform-ci@suse.com" + util.SetAcmeServer() + + repoRoot, err := filepath.Abs(g.GetRepoRoot(t)) + if err != nil { + t.Fatalf("Error getting git root directory: %v", err) + } + + exampleDir := repoRoot + "/examples/" + directory + testDir := repoRoot + "/test/tests/data/" + id + + err = util.CreateTestDirectories(t, id) + if err != nil { + os.RemoveAll(testDir) + t.Fatalf("Error creating test data directories: %s", err) + } + keyPair, err := util.CreateKeypair(t, region, owner, id) + if err != nil { + os.RemoveAll(testDir) + t.Fatalf("Error creating test key pair: %s", err) + } + + err = os.WriteFile(testDir+"/id_rsa", []byte(keyPair.KeyPair.PrivateKey), 0600) + if err != nil { + os.RemoveAll(testDir) + t.Fatalf("Error creating test key pair: %s", err) + } + sshAgent := ssh.SshAgentWithKeyPair(t, keyPair.KeyPair) + t.Logf("Key %s created and added to agent", keyPair.Name) + + // use oldest RKE2, remember it releases much more than Rancher + _, _, rke2Version, err := util.GetRke2Releases() + if err != nil { + os.RemoveAll(testDir) + aws.DeleteEC2KeyPair(t, keyPair) + sshAgent.Stop() + t.Fatalf("Error getting Rke2 release version: %s", err) + } + + rancherVersion := os.Getenv("RANCHER_VERSION") + if rancherVersion == "" { + // use stable version if not specified + // using stable prevents problems where the Rancher provider hasn't released to fit the latest Rancher + _, rancherVersion, _, err = util.GetRancherReleases() + } + if err != nil { + os.RemoveAll(testDir) + aws.DeleteEC2KeyPair(t, keyPair) + sshAgent.Stop() + t.Fatalf("Error getting Rancher release version: %s", err) + } + + terraformOptions := terraform.WithDefaultRetryableErrors(t, &terraform.Options{ + TerraformDir: exampleDir, + // Variables to pass to our Terraform code using -var options + Vars: map[string]interface{}{ + "identifier": id, + "owner": owner, + "key_name": keyPair.Name, + "key": keyPair.KeyPair.PublicKey, + "zone": os.Getenv("ZONE"), + "rke2_version": rke2Version, + "rancher_version": rancherVersion, + "file_path": testDir, + }, + // Environment variables to set when running Terraform + EnvVars: map[string]string{ + "AWS_DEFAULT_REGION": region, + "AWS_REGION": region, + "TF_DATA_DIR": testDir, + "TF_IN_AUTOMATION": "1", + "TF_CLI_ARGS_plan": "-no-color -state=" + testDir + "/tfstate", + "TF_CLI_ARGS_apply": "-no-color -state=" + testDir + "/tfstate", + "TF_CLI_ARGS_destroy": "-no-color -state=" + testDir + "/tfstate", + "TF_CLI_ARGS_output": "-no-color -state=" + testDir + "/tfstate", + }, + RetryableTerraformErrors: util.GetRetryableTerraformErrors(), + NoColor: true, + SshAgent: sshAgent, + Upgrade: true, + }) + + _, err = terraform.InitAndApplyE(t, terraformOptions) + if err != nil { + t.Log("Test failed, tearing down...") + util.GetErrorLogs(t, testDir+"/kubeconfig") + util.Teardown(t, testDir, terraformOptions, keyPair) + os.Remove(exampleDir + ".terraform.lock.hcl") + sshAgent.Stop() + t.Fatalf("Error creating cluster: %s", err) + } + util.CheckReady(t, testDir+"/kubeconfig") + util.CheckRunning(t, testDir+"/kubeconfig") + if t.Failed() { + t.Log("Test failed...") + } else { + t.Log("Test passed...") + } + util.Teardown(t, testDir, terraformOptions, keyPair) + os.Remove(exampleDir + "/.terraform.lock.hcl") + sshAgent.Stop() +} diff --git a/variables.tf b/variables.tf index ddb81ef..920911a 100644 --- a/variables.tf +++ b/variables.tf @@ -150,7 +150,7 @@ variable "cert_manager_version" { description = <<-EOT The version of cert-manager to install. EOT - default = "v1.13.1" + default = "v1.18.1" # "v1.13.1" } variable "rancher_version" { type = string @@ -209,27 +209,27 @@ variable "cert_manager_configuration" { } sensitive = true } -variable "install_cert_manager_backend" { - type = string - description = <<-EOT - Path to a .tfbackend file. - This allows the user to pass a backend file to the install_cert_manager submodule. - The backend file will be added to the submodule's terraform run and will allow that module's state data to be saved remotely. - Please note that this is a separate state file, and this backend should be independent of the main module's state and any other submodules' states. - See https://developer.hashicorp.com/terraform/language/backend#file for more information. - The default is to use a local state file. - EOT - default = "" -} -variable "rancher_bootstrap_backend" { - type = string - description = <<-EOT - Path to a .tfbackend file. - This allows the user to pass a backend file to the rancher_bootstrap submodule. - The backend file will be added to the submodule's terraform run and will allow that module's state data to be saved remotely. - Please note that this is a separate state file, and this backend should be independent of the main module's state and any other submodules' states. - See https://developer.hashicorp.com/terraform/language/backend#file for more information. - The default is to use a local state file. - EOT - default = "" -} +# variable "install_cert_manager_backend" { +# type = string +# description = <<-EOT +# Path to a .tfbackend file. +# This allows the user to pass a backend file to the install_cert_manager submodule. +# The backend file will be added to the submodule's terraform run and will allow that module's state data to be saved remotely. +# Please note that this is a separate state file, and this backend should be independent of the main module's state and any other submodules' states. +# See https://developer.hashicorp.com/terraform/language/backend#file for more information. +# The default is to use a local state file. +# EOT +# default = "" +# } +# variable "rancher_bootstrap_backend" { +# type = string +# description = <<-EOT +# Path to a .tfbackend file. +# This allows the user to pass a backend file to the rancher_bootstrap submodule. +# The backend file will be added to the submodule's terraform run and will allow that module's state data to be saved remotely. +# Please note that this is a separate state file, and this backend should be independent of the main module's state and any other submodules' states. +# See https://developer.hashicorp.com/terraform/language/backend#file for more information. +# The default is to use a local state file. +# EOT +# default = "" +# } diff --git a/versions.tf b/versions.tf index 7263b81..59242b2 100644 --- a/versions.tf +++ b/versions.tf @@ -3,7 +3,7 @@ terraform { required_providers { local = { source = "hashicorp/local" - version = ">= 2.4" + version = ">= 2.5" } random = { source = "hashicorp/random" @@ -39,7 +39,7 @@ terraform { } helm = { source = "hashicorp/helm" - version = ">= 2.14" + version = "2.14" } rancher2 = { source = "rancher/rancher2"