Skip to content

Tests - Daily Cleanup - AWS OpenShift ROSA HCP Single Region #202

Tests - Daily Cleanup - AWS OpenShift ROSA HCP Single Region

Tests - Daily Cleanup - AWS OpenShift ROSA HCP Single Region #202

---
name: Tests - Daily Cleanup - AWS OpenShift ROSA HCP Single Region
on:
workflow_dispatch:
inputs:
max_age_hours_cluster:
description: Maximum age of clusters in hours
required: true
default: '12'
pull_request:
paths:
- .github/workflows/aws_rosa_hcp_single_region_daily_cleanup.yml
- .tool-versions
- aws/openshift/rosa-hcp-single-region/**
- '!aws/openshift/rosa-hcp-single-region/test/golden/**'
- .github/actions/aws-generic-terraform-cleanup/**
- .github/actions/aws-configure-cli/**
schedule:
- cron: 0 4 * * * # At 04:00 everyday.
# limit to a single execution per actor of this workflow
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
# we don't cancel the previous run, so it can finish it and let clusters in a proper state
cancel-in-progress: false
env:
IS_SCHEDULE: ${{ (contains(github.head_ref, 'schedules/') || github.event_name == 'schedule') && 'true' || 'false' }}
MAX_AGE_HOURS_CLUSTER: ${{ github.event.inputs.max_age_hours_cluster || '12' }}
# please keep those variables synced with aws_rosa_hcp_tests.yml
AWS_PROFILE: infex
S3_BACKEND_BUCKET: tests-ra-aws-rosa-hcp-tf-state-eu-central-1
S3_BUCKET_REGION: eu-central-1
AWS_REGION: eu-west-2
S3_BACKEND_BUCKET_PREFIX: aws/openshift/rosa-hcp-single-region/ # keep it synced with the name of the module for simplicity
jobs:
triage:
runs-on: ubuntu-latest
outputs:
should_skip: ${{ steps.skip_check.outputs.should_skip }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
- name: Check labels
id: skip_check
uses: ./.github/actions/internal-triage-skip
cleanup-clusters:
needs:
- triage
if: needs.triage.outputs.should_skip == 'false'
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
with:
ref: ${{ github.ref }}
fetch-depth: 0
- name: Install asdf tools with cache
uses: camunda/infraex-common-config/./.github/actions/asdf-install-tooling@193a21e1e56c9a65517a822224ac3b4ffa4d6ae4 # 1.5.9
- name: Use repo .tool-version as global version
run: cp .tool-versions ~/.tool-versions
- name: Set current target branch
id: target-branch
run: |
set -euo pipefail
TARGET_BRANCH=$(cat .target-branch)
echo "TARGET_BRANCH=$TARGET_BRANCH" | tee -a "$GITHUB_OUTPUT"
- name: Import Secrets
id: secrets
uses: hashicorp/vault-action@4c06c5ccf5c0761b6029f56cfb1dcf5565918a3b # v3
with:
url: ${{ secrets.VAULT_ADDR }}
method: approle
roleId: ${{ secrets.VAULT_ROLE_ID }}
secretId: ${{ secrets.VAULT_SECRET_ID }}
exportEnv: false
secrets: |
secret/data/products/infrastructure-experience/ci/common RH_OPENSHIFT_TOKEN;
- name: Configure AWS CLI
uses: ./.github/actions/aws-configure-cli
with:
vault-addr: ${{ secrets.VAULT_ADDR }}
vault-role-id: ${{ secrets.VAULT_ROLE_ID }}
vault-secret-id: ${{ secrets.VAULT_SECRET_ID }}
aws-profile: ${{ env.AWS_PROFILE }}
aws-region: ${{ env.AWS_REGION }}
- name: Delete clusters
id: delete_clusters
continue-on-error: true
timeout-minutes: 125
uses: ./.github/actions/aws-generic-terraform-cleanup
env:
RHCS_TOKEN: ${{ steps.secrets.outputs.RH_OPENSHIFT_TOKEN }}
with:
tf-bucket: ${{ env.S3_BACKEND_BUCKET }}
tf-bucket-region: ${{ env.S3_BUCKET_REGION }}
max-age-hours: ${{ env.MAX_AGE_HOURS_CLUSTER }}
tf-bucket-key-prefix: ${{ env.S3_BACKEND_BUCKET_PREFIX }}${{ steps.target-branch.outputs.TARGET_BRANCH }}/
openshift: 'true'
delete-ghost-rosa-clusters: 'true'
modules-order: vpn,cluster
# The previous step has a continue-on-error set to true in case of schedule run.
# This means that the workflow is not marked as failed, but the step is.
# We can't use the `if: failure()` condition here, as the overall job is succeeding.
# Instead, we check the outcome of the previous step and if it failed, we retry the deletion.
# If the retry fails, then the slack notification will be triggered as normally.
# There are cases where the deletion of resources fails due to dependencies.
- name: Retry delete clusters
id: retry_delete_clusters
if: steps.delete_clusters.outcome == 'failure'
timeout-minutes: 125
uses: ./.github/actions/aws-generic-terraform-cleanup
env:
RHCS_TOKEN: ${{ steps.secrets.outputs.RH_OPENSHIFT_TOKEN }}
with:
tf-bucket: ${{ env.S3_BACKEND_BUCKET }}
tf-bucket-region: ${{ env.S3_BUCKET_REGION }}
max-age-hours: 0 # the previous step alters the age and resets it to 0
tf-bucket-key-prefix: ${{ env.S3_BACKEND_BUCKET_PREFIX }}${{ steps.target-branch.outputs.TARGET_BRANCH }}/
modules-order: vpn,cluster
openshift: 'true'
delete-ghost-rosa-clusters: 'true'
- name: Notify in Slack in case of failure
id: slack-notification
if: ${{ failure() && env.IS_SCHEDULE == 'true' && steps.retry_delete_clusters.outcome == 'failure' }}
uses: camunda/infraex-common-config/.github/actions/report-failure-on-slack@193a21e1e56c9a65517a822224ac3b4ffa4d6ae4 # 1.5.9
with:
vault_addr: ${{ secrets.VAULT_ADDR }}
vault_role_id: ${{ secrets.VAULT_ROLE_ID }}
vault_secret_id: ${{ secrets.VAULT_SECRET_ID }}