IBM Cloud Hot Cluster Setup #16
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: IBM Cloud Hot Cluster Setup | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| infrastructure_type: | |
| description: 'Infrastructure type: classic, vpc, or ipi (ipi = diagnostics only)' | |
| required: true | |
| default: 'classic' | |
| type: choice | |
| options: | |
| - classic | |
| - vpc | |
| - ipi | |
| cluster_name: | |
| description: 'Cluster name' | |
| required: true | |
| default: 'kubevirt-plugin-ci' | |
| type: string | |
| zone: | |
| description: 'Zone (classic: wdc04, fra02; vpc: us-south-1, eu-de-1)' | |
| required: true | |
| default: 'us-south-1' | |
| type: string | |
| openshift_version: | |
| description: 'OpenShift version' | |
| required: true | |
| default: '4.20_openshift' | |
| type: string | |
| worker_flavor: | |
| description: 'Worker node flavor (classic bare metal: mb4c.4x32; vpc: bx2.8x32, cx2.4x8)' | |
| required: true | |
| default: 'bx2.8x32' | |
| type: string | |
| worker_count: | |
| description: 'Number of worker nodes (at least 2 so ingress is happy)' | |
| required: true | |
| default: '2' | |
| type: string | |
| kvm_emulation: | |
| description: 'KVM emulation (true for vpc/shared, false for bare metal)' | |
| required: true | |
| default: true | |
| type: boolean | |
| cos_instance_crn: | |
| description: 'COS instance CRN for VPC internal registry (required for vpc, ignored for classic)' | |
| required: false | |
| default: '' | |
| type: string | |
| permissions: | |
| contents: read | |
| env: | |
| CLUSTER_NAME: ${{ inputs.cluster_name || 'kubevirt-plugin-ci' }} | |
| INFRASTRUCTURE_TYPE: ${{ inputs.infrastructure_type || 'classic' }} | |
| jobs: | |
| provision-cluster: | |
| name: Provision OpenShift Cluster | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 360 | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v5 | |
| - name: Setup IBM Cloud CLI | |
| uses: IBM/actions-ibmcloud-cli@v1 | |
| with: | |
| api_key: ${{ secrets.IC_KEY }} | |
| region: eu-de | |
| group: cnv-ui | |
| plugins: kubernetes-service, container-registry, vpc-infrastructure | |
| - name: Log IBM Cloud IAM diagnostics | |
| id: iam_diagnostics | |
| continue-on-error: true | |
| env: | |
| WORKER_ZONE: ${{ inputs.zone }} | |
| INFRASTRUCTURE_TYPE: ${{ inputs.infrastructure_type || 'classic' }} | |
| run: bash ./ci-scripts/log-ibmcloud-iam-diagnostics.sh | |
| - name: Upload IAM diagnostics log | |
| if: always() && steps.iam_diagnostics.outcome != 'skipped' | |
| continue-on-error: true | |
| uses: actions/upload-artifact@v6 | |
| with: | |
| name: ibmcloud-iam-diagnostics-${{ github.run_id }} | |
| path: ${{ runner.temp }}/ibmcloud-iam-diagnostics.txt | |
| retention-days: 14 | |
| if-no-files-found: warn | |
| - name: Check for existing cluster | |
| if: inputs.infrastructure_type != 'ipi' | |
| id: check_cluster | |
| run: | | |
| if ibmcloud oc cluster get --cluster "${CLUSTER_NAME}" &>/dev/null; then | |
| echo "Cluster '${CLUSTER_NAME}' already exists" | |
| echo "exists=true" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "Cluster '${CLUSTER_NAME}' does not exist, will create" | |
| echo "exists=false" >> "$GITHUB_OUTPUT" | |
| fi | |
| # ────────────────────────────────────────────────────────────────────── | |
| # Classic infrastructure path | |
| # ────────────────────────────────────────────────────────────────────── | |
| - name: Verify zone and flavor (classic) | |
| if: steps.check_cluster.outputs.exists == 'false' && inputs.infrastructure_type == 'classic' | |
| env: | |
| ZONE: ${{ inputs.zone }} | |
| FLAVOR: ${{ inputs.worker_flavor }} | |
| run: | | |
| echo "Fetching classic infrastructure locations and flavors..." | |
| LOCATIONS_JSON=$( | |
| ibmcloud oc locations --provider classic --show-flavors --output json |\ | |
| jq '[.[] | select(.kind=="dc")]' | |
| ) | |
| echo "Checking zone '${ZONE}' exists..." | |
| ZONE_EXISTS=$( | |
| echo "${LOCATIONS_JSON}" |\ | |
| jq -r --arg z "${ZONE}" \ | |
| '[.[] | select(.id == $z)] | length' | |
| ) | |
| if [[ "${ZONE_EXISTS}" -ne 1 ]]; then | |
| echo "ERROR: Zone '${ZONE}' not found in classic infrastructure locations." | |
| echo "" | |
| echo "Available zones:" | |
| echo "${LOCATIONS_JSON}" | jq -r '.[].id' | sort | |
| exit 1 | |
| fi | |
| echo "Zone '${ZONE}' exists" | |
| echo "Checking flavor '${FLAVOR}' is available in zone '${ZONE}'..." | |
| FLAVOR_EXISTS=$( | |
| echo "${LOCATIONS_JSON}" |\ | |
| jq -r --arg z "${ZONE}" --arg f "${FLAVOR}" \ | |
| '.[] | select(.id == $z) | .flavors | split(",") | any(index($f))' | |
| ) | |
| if [[ "${FLAVOR_EXISTS}" == "false" ]]; then | |
| echo "ERROR: Flavor '${FLAVOR}' is not available in zone '${ZONE}'." | |
| echo "" | |
| echo "Available flavors in '${ZONE}':" | |
| echo "${LOCATIONS_JSON}" | jq -r --arg z "${ZONE}" '.[] | select(.id == $z) | .flavors[]? | .id' | sort | |
| exit 2 | |
| fi | |
| echo "Flavor '${FLAVOR}' is available in zone '${ZONE}'" | |
| - name: Create ROKS cluster (classic) | |
| if: steps.check_cluster.outputs.exists == 'false' && inputs.infrastructure_type == 'classic' | |
| env: | |
| ZONE: ${{ inputs.zone }} | |
| run: | | |
| echo "Looking up existing VLANs in zone '${ZONE}'..." | |
| VLAN_JSON=$(ibmcloud oc vlan ls --zone "${ZONE}" --output json 2>/dev/null || echo "[]") | |
| PRIVATE_VLAN=$(echo "${VLAN_JSON}" | jq -r '[.[] | select(.type == "private")] | first | .id // empty') | |
| PUBLIC_VLAN=$(echo "${VLAN_JSON}" | jq -r '[.[] | select(.type == "public")] | first | .id // empty') | |
| if [[ -n "${PRIVATE_VLAN}" ]]; then | |
| echo "Reusing existing private VLAN: ${PRIVATE_VLAN}" | |
| echo "Reusing existing public VLAN: ${PUBLIC_VLAN:-'(none)'}" | |
| else | |
| echo "No existing VLANs in zone, new VLANs will be created" | |
| fi | |
| echo "Creating cluster '${CLUSTER_NAME}' with ${{ inputs.worker_count }}x ${{ inputs.worker_flavor }} workers in zone ${ZONE}..." | |
| ibmcloud oc cluster create classic \ | |
| --name "${CLUSTER_NAME}" \ | |
| --version "${{ inputs.openshift_version }}" \ | |
| --flavor "${{ inputs.worker_flavor }}" \ | |
| --workers "${{ inputs.worker_count }}" \ | |
| --zone "${ZONE}" \ | |
| --private-vlan "${PRIVATE_VLAN}" \ | |
| --public-vlan "${PUBLIC_VLAN}" | |
| # ────────────────────────────────────────────────────────────────────── | |
| # VPC Gen2 infrastructure path | |
| # ────────────────────────────────────────────────────────────────────── | |
| - name: Provision VPC resources | |
| if: steps.check_cluster.outputs.exists == 'false' && inputs.infrastructure_type == 'vpc' | |
| id: vpc_resources | |
| env: | |
| ZONE: ${{ inputs.zone }} | |
| VPC_NAME: ${{ inputs.cluster_name || 'kubevirt-plugin-ci' }}-vpc | |
| run: | | |
| echo "=== VPC Gen2 provisioning ===" | |
| # Derive region from zone (e.g. us-south-1 -> us-south) | |
| VPC_REGION="${ZONE%-*}" | |
| echo "VPC region: ${VPC_REGION}, zone: ${ZONE}" | |
| # Target the VPC region | |
| ibmcloud target -r "${VPC_REGION}" | |
| # Create or reuse VPC | |
| VPC_ID=$(ibmcloud is vpcs --output json 2>/dev/null | jq -r --arg n "${VPC_NAME}" '.[] | select(.name == $n) | .id // empty') | |
| if [[ -z "${VPC_ID}" ]]; then | |
| echo "Creating VPC '${VPC_NAME}'..." | |
| VPC_ID=$(ibmcloud is vpc-create "${VPC_NAME}" --output json | jq -r '.id') | |
| echo "Created VPC: ${VPC_ID}" | |
| else | |
| echo "Reusing existing VPC '${VPC_NAME}': ${VPC_ID}" | |
| fi | |
| # Create or reuse subnet | |
| SUBNET_NAME="${VPC_NAME}-subnet-${ZONE}" | |
| SUBNET_ID=$(ibmcloud is subnets --output json 2>/dev/null | jq -r --arg n "${SUBNET_NAME}" '.[] | select(.name == $n) | .id // empty') | |
| if [[ -z "${SUBNET_ID}" ]]; then | |
| echo "Creating subnet '${SUBNET_NAME}' in zone '${ZONE}'..." | |
| SUBNET_ID=$(ibmcloud is subnet-create "${SUBNET_NAME}" "${VPC_ID}" --zone "${ZONE}" --ipv4-address-count 256 --output json | jq -r '.id') | |
| echo "Created subnet: ${SUBNET_ID}" | |
| else | |
| echo "Reusing existing subnet '${SUBNET_NAME}': ${SUBNET_ID}" | |
| fi | |
| # Create or reuse public gateway (required for console/OperatorHub access) | |
| GW_NAME="${VPC_NAME}-gw-${ZONE}" | |
| GW_ID=$(ibmcloud is public-gateways --output json 2>/dev/null | jq -r --arg n "${GW_NAME}" '.[] | select(.name == $n) | .id // empty') | |
| if [[ -z "${GW_ID}" ]]; then | |
| echo "Creating public gateway '${GW_NAME}'..." | |
| GW_ID=$(ibmcloud is public-gateway-create "${GW_NAME}" "${VPC_ID}" "${ZONE}" --output json | jq -r '.id') | |
| echo "Created public gateway: ${GW_ID}" | |
| else | |
| echo "Reusing existing public gateway '${GW_NAME}': ${GW_ID}" | |
| fi | |
| # Attach public gateway to subnet (idempotent — no-op if already attached) | |
| echo "Attaching public gateway to subnet..." | |
| ibmcloud is subnet-update "${SUBNET_ID}" --pgw "${GW_ID}" 2>/dev/null || true | |
| echo "vpc_id=${VPC_ID}" >> "$GITHUB_OUTPUT" | |
| echo "subnet_id=${SUBNET_ID}" >> "$GITHUB_OUTPUT" | |
| echo "vpc_region=${VPC_REGION}" >> "$GITHUB_OUTPUT" | |
| - name: Create ROKS cluster (vpc) | |
| if: steps.check_cluster.outputs.exists == 'false' && inputs.infrastructure_type == 'vpc' | |
| env: | |
| ZONE: ${{ inputs.zone }} | |
| VPC_ID: ${{ steps.vpc_resources.outputs.vpc_id }} | |
| SUBNET_ID: ${{ steps.vpc_resources.outputs.subnet_id }} | |
| COS_CRN: ${{ inputs.cos_instance_crn }} | |
| COS_INSTANCE_NAME: ${{ inputs.cluster_name || 'kubevirt-plugin-ci' }}-cos | |
| run: | | |
| if [[ -z "${COS_CRN}" ]]; then | |
| echo "No COS CRN provided — looking for existing COS instance '${COS_INSTANCE_NAME}'..." | |
| COS_CRN=$(ibmcloud resource service-instances --service-name cloud-object-storage --output json 2>/dev/null \ | |
| | jq -r --arg n "${COS_INSTANCE_NAME}" '.[] | select(.name == $n) | .crn // empty' || true) | |
| if [[ -z "${COS_CRN}" ]]; then | |
| echo "Creating COS instance '${COS_INSTANCE_NAME}'..." | |
| ibmcloud resource service-instance-create "${COS_INSTANCE_NAME}" cloud-object-storage \ | |
| 744bfc56-d12c-4866-88d5-dac9139e0e5d global \ | |
| -d premium-global-deployment | |
| COS_CRN=$(ibmcloud resource service-instances --service-name cloud-object-storage --output json \ | |
| | jq -r --arg n "${COS_INSTANCE_NAME}" '.[] | select(.name == $n) | .crn') | |
| echo "Created COS instance: ${COS_CRN}" | |
| else | |
| echo "Reusing existing COS instance: ${COS_CRN}" | |
| fi | |
| fi | |
| echo "Creating VPC cluster '${CLUSTER_NAME}' with ${{ inputs.worker_count }}x ${{ inputs.worker_flavor }} workers in zone ${ZONE}..." | |
| ibmcloud oc cluster create vpc-gen2 \ | |
| --name "${CLUSTER_NAME}" \ | |
| --version "${{ inputs.openshift_version }}" \ | |
| --flavor "${{ inputs.worker_flavor }}" \ | |
| --workers "${{ inputs.worker_count }}" \ | |
| --zone "${ZONE}" \ | |
| --vpc-id "${VPC_ID}" \ | |
| --subnet-id "${SUBNET_ID}" \ | |
| --cos-instance "${COS_CRN}" \ | |
| --disable-outbound-traffic-protection | |
| # ────────────────────────────────────────────────────────────────────── | |
| # Common steps (both classic and VPC converge here; skipped for ipi) | |
| # ────────────────────────────────────────────────────────────────────── | |
| - name: Wait for cluster to be ready to use | |
| if: inputs.infrastructure_type != 'ipi' | |
| run: | | |
| ./ci-scripts/check-roks-cluster-state.sh | |
| - name: Install oc client from cluster version | |
| if: inputs.infrastructure_type != 'ipi' | |
| run: | | |
| CLUSTER_JSON="$(ibmcloud oc cluster get --cluster "${CLUSTER_NAME}" --output json)" | |
| export CLUSTER_JSON | |
| bash ./ci-scripts/install-oc-client.sh | |
| - name: Configure kubeconfig | |
| if: inputs.infrastructure_type != 'ipi' | |
| run: | | |
| ibmcloud oc cluster config --cluster "${CLUSTER_NAME}" --admin | |
| oc cluster-info | |
| oc get nodes -o wide | |
| - name: Install HCO | |
| if: inputs.infrastructure_type != 'ipi' | |
| env: | |
| KVM_EMULATION: ${{ inputs.kvm_emulation }} | |
| run: | | |
| ./ci-scripts/install-hco.sh | |
| - name: Verify ARC secrets | |
| if: inputs.infrastructure_type != 'ipi' | |
| run: | | |
| HAS_APP=$([ -n "${{ secrets.ARC_GITHUB_APP_ID }}" ] && [ -n "${{ secrets.ARC_GITHUB_APP_INSTALL_ID }}" ] && [ -n "${{ secrets.ARC_GITHUB_APP_PRIVATE_KEY }}" ] && echo "yes" || echo "no") | |
| HAS_PAT=$([ -n "${{ secrets.ARC_GITHUB_PAT }}" ] && echo "yes" || echo "no") | |
| if [[ "$HAS_APP" != "yes" && "$HAS_PAT" != "yes" ]]; then | |
| echo "::error::ARC authentication secrets are missing or empty." | |
| echo "Configure either:" | |
| echo " - ARC_GITHUB_APP_ID, ARC_GITHUB_APP_INSTALL_ID, ARC_GITHUB_APP_PRIVATE_KEY (GitHub App), or" | |
| echo " - ARC_GITHUB_PAT (Personal Access Token)" | |
| echo "in Settings → Secrets and variables → Actions for this repository (or its organization)." | |
| exit 1 | |
| fi | |
| echo "ARC secrets are present." | |
| - name: Build ARC runner image | |
| if: inputs.infrastructure_type != 'ipi' | |
| id: build_runner | |
| env: | |
| OC_VERSION: '4.20' | |
| run: | | |
| IMAGE_REF=$(./ci-scripts/images/setup-arc-runner-image.sh | grep '^IMAGE_REF=' | cut -d= -f2-) | |
| echo "image_ref=${IMAGE_REF}" >> "$GITHUB_OUTPUT" | |
| - name: Install ARC | |
| if: inputs.infrastructure_type != 'ipi' | |
| env: | |
| ARC_CONFIG_URL: 'https://github.com/${{ github.repository }}' | |
| ARC_APP_ID: ${{ secrets.ARC_GITHUB_APP_ID }} | |
| ARC_APP_INSTALL_ID: ${{ secrets.ARC_GITHUB_APP_INSTALL_ID }} | |
| ARC_APP_PRIVATE_KEY: ${{ secrets.ARC_GITHUB_APP_PRIVATE_KEY }} | |
| ARC_PAT: ${{ secrets.ARC_GITHUB_PAT }} | |
| ARC_RUNNER_IMAGE: ${{ steps.build_runner.outputs.image_ref }} | |
| ARC_VERSION: '0.14.0' | |
| run: | | |
| ./ci-scripts/arc/install-arc-controller.sh | |
| ./ci-scripts/arc/install-runner-scale-set.sh | |
| - name: Install CI environment controller | |
| if: inputs.infrastructure_type != 'ipi' | |
| run: | | |
| ./ci-scripts/ci-env/install-ci-env-controller.sh | |
| - name: Verify cluster health | |
| if: inputs.infrastructure_type != 'ipi' | |
| env: | |
| GITHUB_REPOSITORY: ${{ github.repository }} | |
| run: | | |
| ./ci-scripts/check-cluster-health.sh | |
| - name: Setup summary | |
| if: always() | |
| run: | | |
| echo "## Hot Cluster Setup Summary" >> "$GITHUB_STEP_SUMMARY" | |
| echo "" >> "$GITHUB_STEP_SUMMARY" | |
| echo "| Parameter | Value |" >> "$GITHUB_STEP_SUMMARY" | |
| echo "|-----------|-------|" >> "$GITHUB_STEP_SUMMARY" | |
| echo "| Infrastructure | \`${{ inputs.infrastructure_type || 'classic' }}\` |" >> "$GITHUB_STEP_SUMMARY" | |
| echo "| Cluster | \`${CLUSTER_NAME}\` |" >> "$GITHUB_STEP_SUMMARY" | |
| echo "| Zone | \`${{ inputs.zone }}\` |" >> "$GITHUB_STEP_SUMMARY" | |
| echo "| OpenShift | \`${{ inputs.openshift_version }}\` |" >> "$GITHUB_STEP_SUMMARY" | |
| echo "| Worker Flavor | \`${{ inputs.worker_flavor }}\` |" >> "$GITHUB_STEP_SUMMARY" | |
| echo "| Workers | \`${{ inputs.worker_count }}\` |" >> "$GITHUB_STEP_SUMMARY" | |
| echo "| KVM Emulation | \`${{ inputs.kvm_emulation }}\` |" >> "$GITHUB_STEP_SUMMARY" | |
| echo "" >> "$GITHUB_STEP_SUMMARY" | |
| echo "### IAM diagnostics" >> "$GITHUB_STEP_SUMMARY" | |
| echo "" >> "$GITHUB_STEP_SUMMARY" | |
| if [[ "${{ inputs.infrastructure_type || 'classic' }}" == "classic" ]]; then | |
| echo "If cluster create failed with **E73e6**, expand the **Log IBM Cloud IAM diagnostics** step log, open the run **Summary** tab, or download the \`ibmcloud-iam-diagnostics\` artifact." >> "$GITHUB_STEP_SUMMARY" | |
| else | |
| echo "If cluster create failed, check VPC Infrastructure permissions in the **Log IBM Cloud IAM diagnostics** step." >> "$GITHUB_STEP_SUMMARY" | |
| fi | |
| echo "" >> "$GITHUB_STEP_SUMMARY" | |
| if oc cluster-info &>/dev/null; then | |
| echo "Cluster is **healthy** and ready for CI." >> "$GITHUB_STEP_SUMMARY" | |
| else | |
| echo "Cluster setup **may have issues**. Check the logs." >> "$GITHUB_STEP_SUMMARY" | |
| fi |