[DO NOT MERGE] Setup minimal deployment #923
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Copyright 2024 Canonical Ltd. | |
| # See LICENSE file for licensing details. | |
| name: Mimic UAT Tests | |
| on: | |
| workflow_dispatch: | |
| pull_request: | |
| paths-ignore: | |
| - "docs/**" | |
| - "**.md" | |
| jobs: | |
| integration-tests: | |
| name: "UATs (MicroK8s:${{ matrix.k8s-version }}): spark-${{ matrix.spark-version }} | ${{ matrix.bundle-backend }} | ${{ matrix.storage-backend }} | juju ${{ matrix.juju.agent }} " | |
| runs-on: ["self-hosted-linux-amd64-jammy-xlarge"] | |
| timeout-minutes: 150 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| spark-version: | |
| - "3.4.4" | |
| backend: | |
| - terraform | |
| storage-backend: | |
| - s3 | |
| # - azure_storage | |
| juju: | |
| - snap_channel: "3.6/stable" | |
| agent: "3.6.21" | |
| k8s-version: | |
| # - "1.28" | |
| - "1.30" | |
| # - "1.32" | |
| # - "1.34" | |
| steps: | |
| - name: Checkout repo | |
| uses: actions/checkout@v6 | |
| - name: Setup tmate session | |
| # if: ${{ failure() }} | |
| uses: canonical/action-tmate@main | |
| with: | |
| detached: true | |
| - id: setup-python | |
| name: Setup Python | |
| uses: actions/setup-python@v5.0.0 | |
| with: | |
| python-version: "3.10" | |
| architecture: x64 | |
| - name: Setup operator environment | |
| uses: charmed-kubernetes/actions-operator@main | |
| with: | |
| juju-channel: ${{ matrix.juju.snap_channel }} | |
| provider: microk8s | |
| channel: "${{ matrix.k8s-version }}-strict/stable" | |
| # bootstrap-options: "--agent-version ${{ matrix.juju.agent }}" | |
| bootstrap-options: '--logging-config="<root>=DEBUG;juju.worker.undertaker=TRACE;juju.kubernetes.provider=TRACE"' | |
| microk8s-group: snap_microk8s | |
| microk8s-addons: "hostpath-storage rbac dns metallb:10.64.140.43-10.64.140.49" | |
| # - name: Patch juju installation | |
| # run: | | |
| # sudo snap install yq | |
| # CONTROLLER_NAME=$(juju controllers --format yaml | yq .current-controller) | |
| # juju destroy-controller $CONTROLLER_NAME --destroy-storage --no-prompt --destroy-all-models | |
| # sudo snap refresh juju --revision 33784 | |
| # sudo -i -u ubuntu juju bootstrap microk8s $CONTROLLER_NAME --agent-version "${{ matrix.juju.agent }}" | |
| - name: Install tox & poetry | |
| run: | | |
| pip install tox | |
| pip install poetry | |
| - name: Setup spark object storage | |
| id: spark-object-storage | |
| if: ${{ contains(matrix.storage-backend, 's3') }} | |
| shell: bash | |
| env: | |
| CLOUD_INIT_FILE: | | |
| #cloud-config | |
| package_upgrade: true | |
| snap: | |
| commands: | |
| 0: snap install microceph | |
| 1: sudo microceph cluster bootstrap | |
| 2: sudo microceph disk add loop,1G,3 | |
| 3: sudo microceph enable rgw | |
| 4: sudo microceph.radosgw-admin user create --uid test --display-name test --access-key=foo --secret-key=bar | |
| run: | | |
| echo -e "$CLOUD_INIT_FILE" > microceph_rgw.yaml | |
| lxc init ubuntu:jammy ceph -c limits.cpu=4 -c limits.memory=2GB -d root,size=5GB | |
| lxc config set ceph cloud-init.user-data - < microceph_rgw.yaml | |
| lxc start ceph | |
| while ! lxc exec ceph -- id -u ubuntu &>/dev/null; do sleep 0.5; done | |
| lxc exec ceph -- cloud-init status --wait | |
| echo -e "S3_SERVER_URL=http://$(lxc list --format json | yq '.[] | select(.name == "ceph") .state.network.eth0.addresses.[] | select(.family == "inet") .address'):80/\nS3_ACCESS_KEY=foo\nS3_SECRET_KEY=bar" > .env | |
| lxc list | |
| cat .env | |
| - name: Select tests | |
| id: select-tests | |
| run: | | |
| if [ "${{ github.event_name }}" == "schedule" ] | |
| then | |
| echo Running unstable and stable tests | |
| echo "mark_expression=" >> $GITHUB_OUTPUT | |
| else | |
| echo Skipping unstable tests | |
| echo "mark_expression=not unstable" >> $GITHUB_OUTPUT | |
| fi | |
| - id: setup-terraform | |
| name: Install terraform if needed | |
| run: | | |
| if ! [ -x "$(command -v terraform)" ]; then | |
| echo "Installing terraform from snap" | |
| sudo snap install terraform --classic | |
| fi | |
| - id: tests-integration | |
| name: Run Integration Tests | |
| timeout-minutes: 30 | |
| env: | |
| AZURE_STORAGE_ACCOUNT: ${{ secrets.AZURE_STORAGE_ACCOUNT }} | |
| AZURE_STORAGE_KEY: ${{ secrets.AZURE_STORAGE_KEY }} | |
| run: | | |
| UUID=$(uuidgen) | |
| echo "Using UUID: ${UUID}" | |
| cd python | |
| # Run simple tests to make sure backend works and to clean up if needed | |
| tox run -e integration-object-storage -- --spark-version ${{ matrix.version }} --storage-backend ${{ matrix.storage-backend }} --uuid $UUID | |
| # Sometimes deleting resources on the clouds may take some time | |
| sleep 60 | |
| # The first tests deploy the bundle | |
| tox run -e integration-bundle -- --backend ${{ matrix.backend }} --spark-version ${{ matrix.version }} --storage-backend ${{ matrix.storage-backend }} --model test-uat --cos-model cos --uuid $UUID | |
| # Next we only use --no-deploy flag | |
| # tox run -e integration-kyuubi -- --backend ${{ matrix.backend }} --spark-version ${{ matrix.version }} --storage-backend ${{ matrix.storage-backend }} --no-deploy --keep-models --model test-uat --cos-model cos --uuid $UUID | |
| # tox run -e integration-sparkjob -- --backend ${{ matrix.backend }} --spark-version ${{ matrix.version }} --storage-backend ${{ matrix.storage-backend }} --no-deploy --keep-models --model test-uat --cos-model cos --uuid $UUID | |
| # We re-run the last tests to make sure that the tests are idempotent | |
| # tox run -e integration-kyuubi -- --backend ${{ matrix.backend }} --spark-version ${{ matrix.version }} --storage-backend ${{ matrix.storage-backend }} --no-deploy --keep-models --model test-uat --cos-model cos --uuid $UUID | |
| # tox run -e integration-sparkjob -- --backend ${{ matrix.backend }} --spark-version ${{ matrix.version }} --storage-backend ${{ matrix.storage-backend }} --no-deploy --model test-uat --cos-model cos --uuid $UUID | |
| # # Run backup / restore tests separately without --no-deploy and --keep-models | |
| # tox run -e integration-backup-restore -- --backend ${{ matrix.backend }} --spark-version ${{ matrix.version }} --storage-backend ${{ matrix.storage-backend }} --model test-uat --cos-model cos --uuid $UUID | |
| - id: collect-logs | |
| name: Collect logs if job failed | |
| shell: bash | |
| if: ${{ failure() }} | |
| run: | | |
| juju-crashdump --model test-uat | |
| if [[ -n "${{ inputs.cos-model }}" ]]; then | |
| juju-crashdump --model "${{ inputs.cos-model }}" | |
| fi | |
| - id: debug-info | |
| name: Debug info | |
| shell: bash | |
| if: ${{ failure() }} | |
| run: | | |
| printf '\nDisk usage after tests\n' | |
| df --human-readable | |
| printf '\nJuju status(es)\n' | |
| juju status -m test-uat | |
| printf '\nCos status\n' | |
| juju status -m cos | |
| printf '\nK8s pods\n' | |
| kubectl get pods -n test-uat -o wide | |
| printf '\nCos pods\n' | |
| kubectl get pods -n cos -o wide | |
| printf '\nDebug logs\n' | |
| juju debug-log -m test-uat --replay -l INFO | tail -n 500 | |
| printf '\nCos logs\n' | |
| juju debug-log -m cos --replay -l INFO | tail -n 500 |