diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 00000000..0ff7d014 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,41 @@ +## What type of PR is this? (check all applicable) + +- [ ] 🍕 Feature +- [ ] 🐛 Bug Fix +- [ ] 📝 Documentation +- [ ] 🧑‍💻 Refactor +- [ ] ✅ Test +- [ ] 🤖 Build or CI +- [ ] ❓ Other (please specify) + +## Related Issue + +Example: Fixes #123 + +## Describe this PR + +A brief description of how this solves the issue. + +## Screenshots + +Please provide screenshots of the change. + +## Alternative Approaches Considered + +Did you attempt any other approaches that are not documented in code? + +## Review Guide + +Notes for the reviewer. How to test this change? + +## Checklist before requesting a review + +- 📖 Read the HOT Contributing Guide: +- 📖 Read the HOT Code of Conduct: +- 👷‍♀️ Create small PRs. In most cases, this will be possible. +- ✅ Provide tests for your changes. +- 📝 Use descriptive commit messages. +- 📗 Update any related documentation and include any relevant screenshots. +- 🔠 Does this PR introduce or change any environment variables? If so, make sure to specify this change in the description. + +## [optional] What gif best describes this PR or how it makes you feel? \ No newline at end of file diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 6cf0209a..1da0d01e 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -1,4 +1,4 @@ -name: Deploy Terraform +name: Deploy Changes on: push: branches: @@ -35,12 +35,30 @@ jobs: role-to-assume: ${{ secrets.AWS_OIDC_ROLE }} - name: Provision TF uses: op5dev/tf-via-pr@v13 + env: + TF_VAR_cluster_ci_access_role_arn: ${{ secrets.AWS_OIDC_ROLE }} + TF_VAR_cluster_admin_access_role_arns: ${{ secrets.CLUSTER_ADMIN_ACCESS_ROLE_ARNS }} with: + # command: 'apply' command: ${{ github.event_name == 'push' && 'apply' || 'plan' }} tool: tofu working-directory: terraform validate: true format: true arg-var-file: ${{ env.VAR_FILE }} - arg-var: cluster_ci_access_role_arn=${{ secrets.AWS_OIDC_ROLE }} - label-pr: false \ No newline at end of file + - name: Get TF Outputs + run: | + echo "S3_BACKUP_ROLE=$(tofu -chdir=terraform output -var-file=vars/production.tfvars s3_backup_role)" >> $GITHUB_ENV + echo "CLUSTER_NAME=$(tofu -chdir=terraform output -var-file=vars/production.tfvars cluster_name)" >> $GITHUB_ENV + - name: Pull kubeconfig + run: | + aws eks update-kubeconfig --name ${{ env.CLUSTER_NAME }} + - name: Apply manifests + run: | + kubectl apply -f kubernetes/manifests/ ${{ github.event_name == 'pull_request' && '--dry-run' || '' }} + - name: Deploy eoAPI Chart + uses: helmfile/helmfile-action@v2.0.4 + with: + helmfile-args: 'apply' + # helmfile-args: ${{ github.event_name == 'push' && 'apply' || 'diff' }} + helmfile-workdirectory: kubernetes/helm diff --git a/Makefile b/Makefile index b75256cb..bdaac639 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ AWS_PROFILE ?= default -CLUSTER_NAME = $(shell tofu -chdir=terraform output cluster_name) -S3_BACKUP_ROLE = $(shell tofu -chdir=terraform output s3_backup_role) +CLUSTER_NAME = $(shell tofu -chdir=terraform output -var-file=vars/local.tfvars cluster_name) +S3_BACKUP_ROLE = $(shell tofu -chdir=terraform output -var-file=vars/local.tfvars s3_backup_role) PGO_CHART_VERSION = 5.7.4 EOAPI_CHART_VERSION = 0.7.1 @@ -34,4 +34,4 @@ init-eoapi: ## deploy-eoapi: Upgrade or install eoAPI release deploy-eoapi: helm repo list | grep "eoapi" >/dev/null 2>&1 || { echo "Not initialized, run 'make init-eoapi' before retrying"; exit 1; } - helm upgrade --install --namespace eoapi --create-namespace eoapi eoapi/eoapi --version $(EOAPI_CHART_VERSION) -f kubernetes/helm/eoapi.yaml --set previousVersion=$(EOAPI_CHART_VERSION) --set postgrescluster.metadata.annotations.eks.amazonaws.com/role-arn=$(S3_BACKUP_ROLE) \ No newline at end of file + helm upgrade --install --namespace eoapi --create-namespace eoapi eoapi/eoapi --version $(EOAPI_CHART_VERSION) -f kubernetes/helm/eoapi-values.yaml --set previousVersion=$(EOAPI_CHART_VERSION) --set postgrescluster.metadata.annotations."eks\.amazonaws\.com/role-arn"=$(S3_BACKUP_ROLE) diff --git a/README.md b/README.md index f385592d..acaa273a 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,30 @@ See the [inital proposal](docs/proposal.md) for more background. #### Required Tools -[AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) -[OpenTofu](https://opentofu.org/docs/intro/install/) -[kubectl](https://kubernetes.io/docs/tasks/tools/) -[Helm](https://helm.sh/docs/intro/install/) +- [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) +- [OpenTofu](https://opentofu.org/docs/intro/install/) +- [kubectl](https://kubernetes.io/docs/tasks/tools/) +- [Helm](https://helm.sh/docs/intro/install/) -// TODO 🚧 \ No newline at end of file + +### Areas for Further (Initial) Development + +#### Variable Management + +- Duplication exists between TF inputs, CI workflows, and local scripts. +- A tool like https://github.com/helmfile/helmfile may help with sourcing variables by environment. + - A basic version has been added to deploy revision deltas, further templating would be required. +- As more HOT applications + services are moved to cluster, this will only grow. + +#### Deployment + +- Provisioning is currently done in the same workflow (TF, K8s, Helm), mostly as byproduct of initial development phase. Can be further refined. +- GitOps tools like ArgoCD are [under consideration](https://github.com/hotosm/k8s-infra/issues/14) +- Flux [Tofu controller](https://github.com/flux-iac/tofu-controller) may be an analog for base infrastructure (further investigation required). + +#### Bridging TF and Kubernetes + +- TF-managed information often needs to be referenced on the cluster + - ex: PostgresCluster CRD requires the role ARN authorized for backups. Role and bucket are created in TF. +- Global cluster resources are provisioned through TF, but argument can be made for their management by K8s. +- Ideal solution enables cluster resources to reference, mount, inject, etc. TF-managed information with minimal developer intervention. diff --git a/kubernetes/README.md b/kubernetes/README.md new file mode 100644 index 00000000..b950de5f --- /dev/null +++ b/kubernetes/README.md @@ -0,0 +1,151 @@ +# Cluster Applications + +See [initial migration outline](../proposal.md) for main HOT OSM applications. + +Relevant Docs: +- [kubectl] +- [Helm] + +## Global + +### ClusterIssuer + +Issue TLS certificates for the cluster via [cert-manager]. See also [eoAPI TLS section](#transport-layer-security-tls). + +Install: +```sh +# ** See helm/eoapi-values.yaml for initial setup ** +$ kubectl apply -f kubernetes/manifests/cluster-issuer.yaml +``` + +## eoAPI + +Open source Earth Observation (EO) backend supporting Open Aerial Map (OAM). + +Site: https://eoapi.dev/ +Chart: https://github.com/developmentseed/eoapi-k8s + +Install: +```sh +$ helm upgrade --install --set disable_check_for_upgrades=true pgo oci://registry.developers.crunchydata.com/crunchydata/pgo --version $PGO_VERSION +$ helm repo add eoapi https://devseed.com/eoapi-k8s/ +$ helm upgrade --install --namespace eoapi --create-namespace eoapi eoapi/eoapi \ + --version $EOAPI_CHART_VERSION \ + -f kubernetes/helm/eoapi-values.yaml \ + --set previousVersion=$EOAPI_CHART_VERSION \ + --set postgrescluster.metadata.annotations."eks\.amazonaws\.com/role-arn"=$S3_BACKUP_ROLE +``` + +#### helmfile + +A basic [helmfile] has been added for GitHub Actions, but its recommended to use outside of CI workflows to maintain consistency. + +```sh +$ helmfile apply +``` + +Provided the values match, a similar workflow can be achieved with the Makefile commands if the additional install isn't desired. + +### Configuration + +See [eoAPI chart docs]. The following sections provide a basic outline of overlays, customizations, and considerations specific to HOT's initial implementation. + +#### Transport Layer Security (TLS) + +See [cert-manager docs] and [eoAPI guidance on cert-manager setup]. + +- Requires a domain controlled by HOT +- Issuer manifests and chart settings have been made available to provision certificates using [ingress annotations] and Let's Encrypt/[ACME] +- Recommend going through staging issuer first to avoid hitting rate limits + +#### Backups + +Enabled with default settings, see the [PostgresOperator docs] for further customization. + +Uses an [OIDC auth setup] to access S3, which requires propagating TF-managed information to K8s. + +> [!NOTE] +> Further development to bridge and/or reorganize TF and K8s-provisioned resources may remove the need to set a `role-arn` annotation on each release. + +#### Monitoring / Observability / Autoscaling + +The eoAPI support chart adds Prometheus and Grafana tooling to enable systems analysis, visualization, and custom metrics for autoscaling. + +- [eoAPI support chart setup]: in-depth walkthrough +- [eoAPI chart configuration]: set HPA behavior for services +- [eoAPI support chart dependencies]: explore further customization, provider documentation + +_Currently set to install once TLS is enabled in eoAPI._ + +## Tips + Commands + +### Setup + +#### Local Context + +```sh +$ aws eks update-kubeconfig --name +``` + +### Debugging + +CLI manual will be most helpful: +```sh +$ kubectl --help +``` + +#### Examples + +Basic cluster overview: +```sh +$ kubectl get pod,svc,deploy -A +``` + +Shell into default container on pod: +```sh +$ kubectl -n exec -it -- bash +# $ +``` + +Inspect ingress details: +```sh +$ kubectl -n describe ingress/ +``` + +Redirect pod log output to file: +```sh +$ kubectl -n logs --all-containers=true >> file.log +``` + +[kubectl]: + https://kubernetes.io/docs/reference/kubectl/ +[Helm]: + https://helm.sh/docs/ +[Let's Encrypt]: + https://letsencrypt.org/ +[cert-manager]: + https://cert-manager.io/ +[cert-manager docs]: + https://cert-manager.io/docs/configuration/ +[helmfile]: + https://github.com/helmfile/helmfile +[eoAPI chart docs]: + https://github.com/developmentseed/eoapi-k8s/tree/975a26639fa3b8be7d3338220d6ea9c4470d8d15/docs +[iframing]: + https://developmentseed.slack.com/archives/C08B8L61QTT/p1747740182369159?thread_ts=1747314980.658339&cid=C08B8L61QTT +[eoAPI guidance on cert-manager setup]: + https://github.com/developmentseed/eoapi-k8s/blob/main/docs/unified-ingress.md#setting-up-tls-with-cert-manager +[ingress annotations]: + https://cert-manager.io/docs/usage/ingress/ +[ACME]: + https://cert-manager.io/docs/configuration/acme/ +[PostgresOperator docs]: + https://access.crunchydata.com/documentation/postgres-operator/latest/tutorials/backups-disaster-recovery/backups +[OIDC auth setup]: + https://access.crunchydata.com/documentation/postgres-operator/latest/tutorials/backups-disaster-recovery/backups#using-an-aws-integrated-identity-provider-and-role +[eoAPI support chart setup]: + https://github.com/developmentseed/eoapi-k8s/blob/975a26639fa3b8be7d3338220d6ea9c4470d8d15/docs/autoscaling.md +[eoAPI chart configuration]: + https://github.com/developmentseed/eoapi-k8s/blob/975a26639fa3b8be7d3338220d6ea9c4470d8d15/docs/configuration.md +[eoAPI support chart dependencies]: + https://github.com/developmentseed/eoapi-k8s/blob/975a26639fa3b8be7d3338220d6ea9c4470d8d15/helm-chart/eoapi-support/Chart.yaml \ No newline at end of file diff --git a/kubernetes/helm/eoapi-support-values.yaml b/kubernetes/helm/eoapi-support-values.yaml new file mode 100644 index 00000000..f64f7259 --- /dev/null +++ b/kubernetes/helm/eoapi-support-values.yaml @@ -0,0 +1,58 @@ +prometheus-adapter: + prometheus: + url: http://eoapi-support-prometheus-server.eoapi-support.svc.cluster.local + +prometheus: + server: + service: + type: ClusterIP + annotations: { } + ingress: + annotations: + nginx.ingress.kubernetes.io/auth-type: basic + nginx.ingress.kubernetes.io/auth-secret: eoapi-support-prometheus + nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required' + nginx.ingress.kubernetes.io/enable-cors: "true" + nginx.ingress.kubernetes.io/enable-access-log: "true" + cert-manager.io/cluster-issuer: "letsencrypt-prod" + enabled: true + ingressClassName: nginx + hosts: + - metrics.k8s-prod.hotosm.org + tls: + - secretName: prometheus-server-tls + hosts: + - metrics.k8s-prod.hotosm.org + persistentVolume: + storageClass: gp2 + +grafana: + service: + type: ClusterIP + annotations: { } + ingress: + annotations: + nginx.ingress.kubernetes.io/enable-cors: "true" + nginx.ingress.kubernetes.io/enable-access-log: "true" + cert-manager.io/cluster-issuer: "letsencrypt-prod" + enabled: true + ingressClassName: nginx + hosts: + - dashboard.k8s-prod.hotosm.org + tls: + - secretName: grafana-tls + hosts: + - dashboard.k8s-prod.hotosm.org + datasources: + datasources.yaml: + datasources: + - name: prometheus + orgId: 1 + type: prometheus + url: http://eoapi-support-prometheus-server.eoapi-support.svc.cluster.local + access: proxy + jsonData: + timeInterval: "5s" + isDefault: true + editable: true + version: 1 # This number should be increased when changes are made to update the datasource \ No newline at end of file diff --git a/kubernetes/helm/eoapi-values.yaml b/kubernetes/helm/eoapi-values.yaml new file mode 100644 index 00000000..257b2d7c --- /dev/null +++ b/kubernetes/helm/eoapi-values.yaml @@ -0,0 +1,70 @@ +ingress: + annotations: + # increase the max body size to 100MB + nginx.ingress.kubernetes.io/proxy-body-size: "100m" + nginx.ingress.kubernetes.io/enable-cors: "true" + nginx.ingress.kubernetes.io/enable-access-log: "true" + cert-manager.io/cluster-issuer: "letsencrypt-prod" + host: "oam-eoapi-prod.imagery-services.k8s-prod.hotosm.org" + tls: + enabled: true + secretName: eoapi-tls + +vector: + # The vector service that provides OGC Features API and vector tiles isn't + # needed currently, so it is disabled to save on some cluster resources. + enabled: false + +stac: + image: + # From https://github.com/hotosm/OpenAerialMap/tree/main/backend/stac-api + name: ghcr.io/hotosm/openaerialmap/stac-api + tag: main + command: + - "uvicorn" + - "app.main:app" + - "--host=$(HOST)" + - "--port=$(PORT)" + settings: + envVars: + ############## + # uvicorn + ############## + HOST: "0.0.0.0" + PORT: "8080" + # https://www.uvicorn.org/settings/#production + WEB_CONCURRENCY: "5" + ############## + # stac-fastapi-pgstac + ############## + ENABLE_TRANSACTIONS_EXTENSIONS: "false" + ############## + # stac-fastapi + ############## + STAC_FASTAPI_TITLE: "STAC FastAPI for OpenAerialMap" + STAC_FASTAPI_DESCRIPTION: "STAC FastAPI deployment for the OpenAerialMap and open imagery catalogs." + +postgrescluster: + backupsEnabled: true + s3: + bucket: "hotosm-pgstac-backup" + endpoint: "s3.us-east-1.amazonaws.com" + region: "us-east-1" + keyType: "web-id" + instances: + - name: eoapi + replicas: 1 + dataVolumeClaimSpec: + storageClassName: "gp2" + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: "100Gi" + cpu: "1024m" + memory: "3048Mi" + +pgstacBootstrap: + image: + name: ghcr.io/stac-utils/pgstac-pypgstac + tag: v0.9.6 diff --git a/kubernetes/helm/eoapi.yaml b/kubernetes/helm/eoapi.yaml deleted file mode 100644 index 784c4827..00000000 --- a/kubernetes/helm/eoapi.yaml +++ /dev/null @@ -1,37 +0,0 @@ -ingress: - annotations: - # increase the max body size to 100MB - nginx.ingress.kubernetes.io/proxy-body-size: "100m" - nginx.ingress.kubernetes.io/enable-cors: "true" - nginx.ingress.kubernetes.io/enable-access-log: "true" - - -postgrescluster: - # # TODO: bridge alternatives for TF output to CRD annotations - # metadata: - # annotations: - # eks.amazonaws.com/role-arn: "" - backupsEnabled: true - s3: - bucket: "pgstac-backup" - endpoint: "s3.us-east-1.amazonaws.com" - region: "us-east-1" - keyType: "web-id" - instances: - - name: eoapi - replicas: 1 - dataVolumeClaimSpec: - # TODO: gp3 SC - storageClassName: "gp2" - accessModes: - - "ReadWriteOnce" - resources: - requests: - storage: "10Gi" - cpu: "1024m" - memory: "3048Mi" - -pgstacBootstrap: - image: - name: ghcr.io/stac-utils/pgstac-pypgstac - tag: v0.9.6 diff --git a/kubernetes/helm/helmfile.yaml.gotmpl b/kubernetes/helm/helmfile.yaml.gotmpl new file mode 100644 index 00000000..f627ece4 --- /dev/null +++ b/kubernetes/helm/helmfile.yaml.gotmpl @@ -0,0 +1,37 @@ +releases: + - name: eoapi + namespace: eoapi + chart: eoapi/eoapi + version: 0.7.1 + needs: [default/pgo] + values: + - eoapi-values.yaml + - postgrescluster: + metadata: + annotations: + eks.amazonaws.com/role-arn: {{ env "S3_BACKUP_ROLE" }} + set: + - name: previousVersion + value: 0.7.1 + + - name: pgo + namespace: default + chart: oci://registry.developers.crunchydata.com/crunchydata/pgo + version: 5.7.4 + set: + - name: disable_check_for_upgrades + value: true + + - name: eoapi-support + namespace: eoapi-support + chart: eoapi/eoapi-support + version: 0.1.7 + needs: [eoapi/eoapi] + installed: {{ readFile "eoapi-values.yaml" | fromYaml | get "ingress.tls.enabled" false }} + values: + - eoapi-support-values.yaml + + +repositories: + - name: eoapi + url: https://devseed.com/eoapi-k8s/ \ No newline at end of file diff --git a/kubernetes/manifests/cluster-issuer-staging.yaml b/kubernetes/manifests/cluster-issuer-staging.yaml new file mode 100644 index 00000000..208e1de1 --- /dev/null +++ b/kubernetes/manifests/cluster-issuer-staging.yaml @@ -0,0 +1,14 @@ +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: letsencrypt-staging +spec: + acme: + server: https://acme-staging-v02.api.letsencrypt.org/directory + email: sysadmin@hotosm.org + privateKeySecretRef: + name: letsencrypt-staging + solvers: + - http01: + ingress: + ingressClassName: nginx \ No newline at end of file diff --git a/kubernetes/manifests/cluster-issuer.yaml b/kubernetes/manifests/cluster-issuer.yaml new file mode 100644 index 00000000..a00ea1c1 --- /dev/null +++ b/kubernetes/manifests/cluster-issuer.yaml @@ -0,0 +1,14 @@ +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: letsencrypt-prod +spec: + acme: + server: https://acme-v02.api.letsencrypt.org/directory + email: sysadmin@hotosm.org + privateKeySecretRef: + name: letsencrypt-prod + solvers: + - http01: + ingress: + ingressClassName: nginx \ No newline at end of file diff --git a/kubernetes/manifests/sync-maxar.yaml b/kubernetes/manifests/sync-maxar.yaml new file mode 100644 index 00000000..c02f6865 --- /dev/null +++ b/kubernetes/manifests/sync-maxar.yaml @@ -0,0 +1,70 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: stac-ingest-maxar + namespace: eoapi + labels: + app: stac-ingest-maxar +spec: + schedule: "0 0 31 2 *" + # schedule: "0 0 * * *" + jobTemplate: + spec: + template: + metadata: + labels: + app: stac-ingest-maxar + spec: + restartPolicy: Never + containers: + - name: stac-ingest-maxar + image: ghcr.io/hotosm/openaerialmap/stac-ingester:main + resources: + limits: + cpu: "768m" + memory: "4096Mi" + requests: + cpu: "256m" + memory: "3072Mi" + command: + - "/bin/sh" + - "-c" + args: + - | + # Exit immediately if a command exits with a non-zero status + set -e + + # Database connection configured through standard PG* environment variables + # Environment variables are already set by the container + + echo "Beginning sync" + hotosm sync-maxar --uploaded-since 86700 + + echo "Job complete" + env: + - name: PGUSER + valueFrom: + secretKeyRef: + key: user + name: eoapi-pguser-eoapi + - name: PGPORT + valueFrom: + secretKeyRef: + key: port + name: eoapi-pguser-eoapi + - name: PGHOST + valueFrom: + secretKeyRef: + key: host + name: eoapi-pguser-eoapi + - name: PGPASSWORD + valueFrom: + secretKeyRef: + key: password + name: eoapi-pguser-eoapi + - name: PGDATABASE + valueFrom: + secretKeyRef: + key: dbname + name: eoapi-pguser-eoapi + backoffLimit: 2 diff --git a/kubernetes/manifests/sync-oam.yaml b/kubernetes/manifests/sync-oam.yaml new file mode 100644 index 00000000..f0bc1850 --- /dev/null +++ b/kubernetes/manifests/sync-oam.yaml @@ -0,0 +1,70 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: stac-ingest-oam + namespace: eoapi + labels: + app: stac-ingest-oam +spec: + schedule: "0 0 31 2 *" + # schedule: "*/30 * * * *" + jobTemplate: + spec: + template: + metadata: + labels: + app: stac-ingest-oam + spec: + restartPolicy: Never + containers: + - name: stac-ingest-oam + image: ghcr.io/hotosm/openaerialmap/stac-ingester:main + resources: + limits: + cpu: "768m" + memory: "4096Mi" + requests: + cpu: "256m" + memory: "3072Mi" + command: + - "/bin/sh" + - "-c" + args: + - | + # Exit immediately if a command exits with a non-zero status + set -e + + # Database connection configured through standard PG* environment variables + # Environment variables are already set by the container + + echo "Beginning sync" + hotosm sync-oam --uploaded-since 2100 + + echo "Job complete" + env: + - name: PGUSER + valueFrom: + secretKeyRef: + key: user + name: eoapi-pguser-eoapi + - name: PGPORT + valueFrom: + secretKeyRef: + key: port + name: eoapi-pguser-eoapi + - name: PGHOST + valueFrom: + secretKeyRef: + key: host + name: eoapi-pguser-eoapi + - name: PGPASSWORD + valueFrom: + secretKeyRef: + key: password + name: eoapi-pguser-eoapi + - name: PGDATABASE + valueFrom: + secretKeyRef: + key: dbname + name: eoapi-pguser-eoapi + backoffLimit: 3 diff --git a/terraform/README.md b/terraform/README.md index 01b5f5bf..ea265438 100644 --- a/terraform/README.md +++ b/terraform/README.md @@ -1,3 +1,111 @@ -// TODO 🚧 +# Cluster Infrastructure -https://github.com/developmentseed/eoapi-k8s-terraform \ No newline at end of file +See [original proposal](../proposal.md) for background. + +Defines AWS infrastructure for an EKS cluster managed via OpenTofu. The initial setup is based on an [eoAPI-compatible build]. + +Resource Overview (AWS): +- Control Plane (EKS v1.32) +- Node Group (EC2 Amazon Linux + ASG) +- Dedicated VPC +- Shared cluster resources (ingress, autoscaler, certificate manager) +- Block storage (EBS) +- Bucket provisioning (S3) +- State locking remote backend (S3 + Dynamo) + - ([S3-only locking expected next release](https://github.com/opentofu/opentofu/issues/599)) + +Relevant Docs: +- [AWS EKS] +- [OpenTofu] + + +## Note + +Reconfigure the backend before running any `tofu` commands outside of GitHub Actions to avoid colocating local state with live state. + +```tf +# ./versions.tf + +terraform { + # Change backend or use different buckets/tables + backend "s3" { } +} +``` + +OpenTofu supports the use of [variables in backend configuration](https://opentofu.org/docs/language/settings/backends/configuration/#variables-and-locals). The provided `local.tfvars` file intentionally omits state resources to prompt the user if new values aren't defined in an s3 backend. + + +## Tips + Commands + +### Setup + +#### AWS Auth + +OpenTofu needs to connect with AWS for most all operations. + +Make sure the AWS CLI is [installed](https://docs.aws.amazon.com/cli/v1/userguide/cli-chap-install.html) with a profile configured to access the target deployment account/region. Setup details vary based on organization policies, principal types, authentication methods, etc. + +- Non-default AWS credentials are typically set per session: + ```sh + $ export AWS_PROFILE= + $ tofu plan #... + $ tofu apply #... + $ #... + ``` +- Or per command: + ```sh + $ AWS_PROFILE= tofu plan #... + $ AWS_PROFILE= tofu apply #... + $ AWS_PROFILE= #... + ``` +- See [credential precedence] for other sourcing options. + +#### Working Directory + +OpenTofu operations reference the current working directory. + +- Either switch to the (root) module _before_ running `tofu` commands: + ```sh + $ cd terraform + $ tofu plan + ``` +- Or reference the correct directory _in_ `tofu` commands: + ```sh + $ tofu -chdir=terraform plan + ``` + +### Basic Provision + Teardown + +```sh +$ tofu init +# ... +# OpenTofu has been successfully initialized! +``` +```sh +$ tofu validate +# Success! The configuration is valid. +``` +```sh +$ tofu plan +# ... +# Plan: X to add, Y to change, Z to destroy. +``` +```sh +$ tofu apply +# ... +# Apply complete! Resources: X added, Y changed, Z destroyed. +``` +```sh +$ tofu destroy +# ... +# Destroy complete! Resources: X destroyed. +``` + +[AWS EKS]: + https://docs.aws.amazon.com/eks/ +[OpenTofu]: + https://opentofu.org/docs/ +[eoAPI-compatible build]: + https://github.com/developmentseed/eoapi-k8s-terraform +[credential precedence]: + https://docs.aws.amazon.com/cli/v1/userguide/cli-chap-configure.html \ No newline at end of file diff --git a/terraform/main.tf b/terraform/main.tf index c1ea9f20..894e43e6 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -12,7 +12,7 @@ data "aws_availability_zones" "available" { locals { cluster_prefix = "hotosm-${var.environment}" - cluster_admins = concat(var.cluster_admin_access_role_arns, [var.cluster_ci_access_role_arn]) + cluster_admins = concat([var.cluster_ci_access_role_arn], var.cluster_admin_access_role_arns) azs = slice(sort(data.aws_availability_zones.available.names), 0, min(4, length(data.aws_availability_zones.available.names))) vpc_cidr = "10.0.0.0/16" } \ No newline at end of file diff --git a/terraform/network.tf b/terraform/network.tf index 60b44f8e..e6bb6f57 100644 --- a/terraform/network.tf +++ b/terraform/network.tf @@ -9,5 +9,5 @@ module "vpc" { azs = local.azs private_subnets = [for k, v in local.azs : cidrsubnet(local.vpc_cidr, 4, k)] - public_subnets = [for k, v in local.azs : cidrsubnet(local.vpc_cidr, 8, k + 48)] + public_subnets = [for k, v in local.azs : cidrsubnet(local.vpc_cidr, 8, k + 64)] } diff --git a/terraform/variables.tf b/terraform/variables.tf index 2e9f04e1..9da33dd3 100644 --- a/terraform/variables.tf +++ b/terraform/variables.tf @@ -13,6 +13,22 @@ variable "region" { EOT } +variable "state_bucket" { + type = string + default = "" + description = <<-EOT + S3 bucket for remote state backend + EOT +} + +variable "lock_table" { + type = string + default = "" + description = <<-EOT + Dynamo table to use for consistency checks (when using an s3 backend) + EOT +} + variable "tags" { type = map(string) default = {} diff --git a/terraform/vars/local.tfvars b/terraform/vars/local.tfvars new file mode 100644 index 00000000..20741356 --- /dev/null +++ b/terraform/vars/local.tfvars @@ -0,0 +1,2 @@ +environment = "develop" +region = "us-east-1" diff --git a/terraform/vars/production.tfvars b/terraform/vars/production.tfvars index cdee0beb..bec14d08 100644 --- a/terraform/vars/production.tfvars +++ b/terraform/vars/production.tfvars @@ -1,7 +1,9 @@ environment = "production" region = "us-east-1" +state_bucket = "hotosm-terraform" +lock_table = "k8s-infra" instance_type = "t3.xlarge" -bucket_names = ["pgstac-backup", ] +bucket_names = ["hotosm-pgstac-backup", ] tags = { project = "k8s-control" } \ No newline at end of file diff --git a/terraform/versions.tf b/terraform/versions.tf index 46446d8a..14719d14 100644 --- a/terraform/versions.tf +++ b/terraform/versions.tf @@ -1,9 +1,9 @@ terraform { backend "s3" { region = var.region - bucket = "hotosm-terraform" + bucket = var.state_bucket key = "${var.environment}/k8s-infra/terraform.tfstate" - dynamodb_table = "k8s-infra" + dynamodb_table = var.lock_table } required_providers {