|
| 1 | +name: Minimum Dependency Version Tests on AWS EC2 (Reusable) |
| 2 | + |
| 3 | +# Standalone workflow that tests Newton with the lowest compatible versions |
| 4 | +# of direct PyPI dependencies (as specified by version floors in pyproject.toml). |
| 5 | +# Dispatched by scheduled_nightly.yml via the workflow_dispatch API. |
| 6 | + |
| 7 | +env: |
| 8 | + AWS_REGION: us-east-2 |
| 9 | + AWS_INSTANCE_TYPE: g7e.2xlarge |
| 10 | + AWS_VOLUME_SIZE: 92 |
| 11 | + AWS_VOLUME_TYPE: gp3 |
| 12 | + AWS_SECURITY_GROUP_IDS: sg-07807c44e7f2a368a |
| 13 | + AWS_ROLE_ARN: arn:aws:iam::968945269301:role/newton-physics-newton-ec2-github-runner-role |
| 14 | + AWS_ROLE_DURATION: 3600 |
| 15 | + HOME: /actions-runner |
| 16 | + |
| 17 | +on: |
| 18 | + workflow_call: |
| 19 | + secrets: |
| 20 | + GH_PERSONAL_ACCESS_TOKEN: |
| 21 | + required: true |
| 22 | + CODECOV_TOKEN: |
| 23 | + required: true |
| 24 | + workflow_dispatch: |
| 25 | + |
| 26 | +jobs: |
| 27 | + start-runner: |
| 28 | + name: Start self-hosted EC2 runner |
| 29 | + if: github.repository == 'newton-physics/newton' |
| 30 | + runs-on: ubuntu-latest |
| 31 | + permissions: |
| 32 | + id-token: write |
| 33 | + contents: read |
| 34 | + outputs: |
| 35 | + label: ${{ steps.start-ec2-runner.outputs.label }} |
| 36 | + ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} |
| 37 | + steps: |
| 38 | + - name: Harden the runner (Audit all outbound calls) |
| 39 | + uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 |
| 40 | + with: |
| 41 | + egress-policy: audit |
| 42 | + |
| 43 | + - name: Configure AWS credentials |
| 44 | + uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 # v6.0.0 |
| 45 | + with: |
| 46 | + aws-region: ${{ env.AWS_REGION }} |
| 47 | + role-to-assume: ${{ env.AWS_ROLE_ARN }} |
| 48 | + role-duration-seconds: ${{ env.AWS_ROLE_DURATION }} |
| 49 | + |
| 50 | + - name: Get the latest AWS Deep Learning Base GPU AMI |
| 51 | + run: | |
| 52 | + echo "Finding the latest AWS Deep Learning Base GPU AMI..." |
| 53 | + LATEST_AMI_ID=$(aws ec2 describe-images --region ${{ env.AWS_REGION }} \ |
| 54 | + --owners amazon \ |
| 55 | + --filters 'Name=name,Values=Deep Learning Base AMI with Single CUDA (Ubuntu 22.04) ????????' 'Name=state,Values=available' \ |
| 56 | + --query 'reverse(sort_by(Images, &CreationDate))[:1].ImageId' \ |
| 57 | + --output text) |
| 58 | + if [[ -z "$LATEST_AMI_ID" ]]; then |
| 59 | + echo "❌ No AMI ID found. Exiting." |
| 60 | + exit 1 |
| 61 | + fi |
| 62 | + echo "Latest AMI ID found: $LATEST_AMI_ID" |
| 63 | + echo "LATEST_AMI_ID=$LATEST_AMI_ID" >> "$GITHUB_ENV" |
| 64 | +
|
| 65 | + - name: Start EC2 runner |
| 66 | + id: start-ec2-runner |
| 67 | + uses: machulav/ec2-github-runner@a00f575a87f3a96ec6de9413d16eeb828a3cc0a8 # v2.5.2 |
| 68 | + with: |
| 69 | + mode: start |
| 70 | + github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} |
| 71 | + ec2-instance-type: ${{ env.AWS_INSTANCE_TYPE }} |
| 72 | + ec2-volume-size: ${{ env.AWS_VOLUME_SIZE }} |
| 73 | + ec2-volume-type: ${{ env.AWS_VOLUME_TYPE }} |
| 74 | + availability-zones-config: > |
| 75 | + [ |
| 76 | + {"imageId": "${{ env.LATEST_AMI_ID }}", "subnetId": "subnet-051b9d2e71acf8047", "securityGroupId": "${{ env.AWS_SECURITY_GROUP_IDS }}"}, |
| 77 | + {"imageId": "${{ env.LATEST_AMI_ID }}", "subnetId": "subnet-0c98bd06abe8ee5eb", "securityGroupId": "${{ env.AWS_SECURITY_GROUP_IDS }}"} |
| 78 | + ] |
| 79 | + pre-runner-script: | |
| 80 | + if [ -d /opt/dlami/nvme ]; then |
| 81 | + mkdir -p /opt/dlami/nvme/actions-runner/_work |
| 82 | + mkdir -p /opt/dlami/nvme/actions-runner/.local |
| 83 | + mkdir -p /opt/dlami/nvme/actions-runner/.cache |
| 84 | + ln -s /opt/dlami/nvme/actions-runner/_work /actions-runner/_work |
| 85 | + ln -s /opt/dlami/nvme/actions-runner/.local /actions-runner/.local |
| 86 | + ln -s /opt/dlami/nvme/actions-runner/.cache /actions-runner/.cache |
| 87 | + fi |
| 88 | + aws-resource-tags: > |
| 89 | + [ |
| 90 | + {"Key": "Name", "Value": "ec2-github-runner"}, |
| 91 | + {"Key": "created-by", "Value": "github-actions-newton-role"}, |
| 92 | + {"Key": "GitHub-Repository", "Value": "${{ github.repository }}"} |
| 93 | + ] |
| 94 | +
|
| 95 | + minimum-deps-tests: |
| 96 | + name: Run Tests with Minimum Dependency Versions |
| 97 | + needs: start-runner |
| 98 | + if: ${{ !cancelled() && needs.start-runner.result == 'success' }} |
| 99 | + runs-on: ${{ needs.start-runner.outputs.label }} |
| 100 | + timeout-minutes: 60 |
| 101 | + permissions: |
| 102 | + contents: read |
| 103 | + env: |
| 104 | + PYTHONFAULTHANDLER: "1" |
| 105 | + steps: |
| 106 | + - name: Harden the runner (Audit all outbound calls) |
| 107 | + uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 |
| 108 | + with: |
| 109 | + egress-policy: audit |
| 110 | + |
| 111 | + - name: Checkout repository |
| 112 | + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1 |
| 113 | + |
| 114 | + - name: Install uv |
| 115 | + uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0 |
| 116 | + with: |
| 117 | + version: "0.11.0" |
| 118 | + |
| 119 | + - name: Set up Python |
| 120 | + run: uv python install |
| 121 | + |
| 122 | + - name: Resolve minimum dependency versions |
| 123 | + run: | |
| 124 | + uv lock --resolution lowest-direct |
| 125 | + echo "Resolved dependency versions:" |
| 126 | + uv tree --depth 1 |
| 127 | +
|
| 128 | + - name: Run Tests |
| 129 | + run: uv run --extra dev -m newton.tests --junit-report-xml rspec.xml |
| 130 | + |
| 131 | + - name: Test Summary |
| 132 | + if: ${{ !cancelled() }} |
| 133 | + uses: test-summary/action@31493c76ec9e7aa675f1585d3ed6f1da69269a86 # v2.4 |
| 134 | + with: |
| 135 | + paths: "rspec.xml" |
| 136 | + show: "fail" |
| 137 | + |
| 138 | + - name: Upload test results to Codecov |
| 139 | + if: ${{ !cancelled() }} |
| 140 | + continue-on-error: true |
| 141 | + uses: codecov/codecov-action@1af58845a975a7985b0beb0cbe6fbbb71a41dbad # v5.5.3 |
| 142 | + with: |
| 143 | + disable_search: true |
| 144 | + files: ./rspec.xml |
| 145 | + flags: minimum-deps-nightly |
| 146 | + report_type: test_results |
| 147 | + token: ${{ secrets.CODECOV_TOKEN }} |
| 148 | + |
| 149 | + - name: Re-run instructions |
| 150 | + if: failure() |
| 151 | + run: | |
| 152 | + echo "::error::DO NOT use 'Re-run failed jobs' - the EC2 runner no longer exists and your job will be queued forever." |
| 153 | + echo "::error::USE 'Re-run all jobs' instead to start a fresh EC2 runner." |
| 154 | + cat >> "$GITHUB_STEP_SUMMARY" << 'EOF' |
| 155 | + ## ⚠️ How to Re-run This Workflow |
| 156 | +
|
| 157 | + This workflow uses **ephemeral EC2 runners** that are terminated after each run. |
| 158 | +
|
| 159 | + | | Option | Result | |
| 160 | + |---|--------|--------| |
| 161 | + | ❌ | **Re-run failed jobs** | Runner no longer exists → job queued forever | |
| 162 | + | ✅ | **Re-run all jobs** | Starts new EC2 runner → tests re-run | |
| 163 | + EOF |
| 164 | +
|
| 165 | + stop-runner: |
| 166 | + name: Stop self-hosted EC2 runner |
| 167 | + runs-on: ubuntu-latest |
| 168 | + permissions: |
| 169 | + id-token: write |
| 170 | + contents: read |
| 171 | + needs: |
| 172 | + - start-runner |
| 173 | + - minimum-deps-tests |
| 174 | + if: always() && needs.start-runner.result != 'skipped' && github.repository == 'newton-physics/newton' |
| 175 | + steps: |
| 176 | + - name: Harden the runner (Audit all outbound calls) |
| 177 | + uses: step-security/harden-runner@fa2e9d605c4eeb9fcad4c99c224cee0c6c7f3594 # v2.16.0 |
| 178 | + with: |
| 179 | + egress-policy: audit |
| 180 | + |
| 181 | + - name: Configure AWS credentials |
| 182 | + uses: aws-actions/configure-aws-credentials@8df5847569e6427dd6c4fb1cf565c83acfa8afa7 # v6.0.0 |
| 183 | + with: |
| 184 | + aws-region: ${{ env.AWS_REGION }} |
| 185 | + role-to-assume: ${{ env.AWS_ROLE_ARN }} |
| 186 | + role-duration-seconds: ${{ env.AWS_ROLE_DURATION }} |
| 187 | + |
| 188 | + - name: Stop EC2 runner |
| 189 | + uses: machulav/ec2-github-runner@a00f575a87f3a96ec6de9413d16eeb828a3cc0a8 # v2.5.2 |
| 190 | + with: |
| 191 | + mode: stop |
| 192 | + github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} |
| 193 | + label: ${{ needs.start-runner.outputs.label }} |
| 194 | + ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} |
0 commit comments