-
Notifications
You must be signed in to change notification settings - Fork 1
259 lines (239 loc) · 11.5 KB
/
scenario-ci.yml
File metadata and controls
259 lines (239 loc) · 11.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
name: Scenario CI (reusable)
# Reusable workflow invoked by per-scenario caller workflows. Provisions an
# ephemeral ISB lease, deploys the scenario's CloudFormation template into the
# leased pool account, runs the scenario's Playwright smoke spec, terminates
# the lease. Lease termination runs on if: always() so a failed deploy or smoke
# still releases the account back to the pool.
#
# Per-scenario callers declare `environment: smoke-test-deploy` to gate access
# to the hub OIDC role (CODEOWNERS approval + branch policy on that env).
on:
workflow_call:
inputs:
scenario:
description: Scenario name (matches cloudformation/scenarios/<name>/).
required: true
type: string
template_path:
description: Path to the CloudFormation template to deploy.
required: false
type: string
default: ''
lease_template:
description: ISB lease template to acquire.
required: false
type: string
default: empty-sandbox
ci_lease_email:
description: Email associated with the lease. Must be an existing ISB user (ISB API returns 500 if user not found). Created via `isb create-user --firstname=NDX --lastname=CI-Bot --email=ci-bot@ndx-try.local --preapproved`.
required: false
type: string
default: ci-bot@ndx-try.local
reuse_account_id:
description: |
DEBUG: skip acquire/release; use this already-leased pool account.
Lease the account locally first (`isb assign` then look at
assigned account_id) and paste here. Workflow will assume
CIDeployRole directly. Lease is NOT released — call `isb terminate`
when done. Used during CI iteration to avoid burning pool accounts.
required: false
type: string
default: ''
permissions:
id-token: write
contents: read
jobs:
smoke:
runs-on: ubuntu-latest
timeout-minutes: 90
# smoke-test-deploy gates PR-triggered access to the hub OIDC role.
# CODEOWNERS approval + branch policy on this environment is the
# human-in-the-loop sign-off for spending a pool account on a lease.
environment: smoke-test-deploy
steps:
- uses: actions/checkout@v6
- uses: actions/setup-node@v6
with:
node-version: '22'
cache: 'npm'
- run: npm ci
- name: Install Playwright browsers
run: npx playwright install --with-deps chromium
- name: Resolve template path
id: paths
env:
SCENARIO: ${{ inputs.scenario }}
OVERRIDE: ${{ inputs.template_path }}
BLUEPRINTS_BUCKET: ndx-try-isb-blueprints-568672915267
run: |
set -uo pipefail
if [ -n "$OVERRIDE" ]; then
path="$OVERRIDE"
else
# Hand-authored YAML scenarios have template.yaml committed.
# CDK-synth and SAM-style scenarios get their built template
# uploaded to the hub blueprints bucket by deploy-blueprints.yml.
# When the local file is missing, fetch from there so CI works
# against the same template real lease deploys use.
local_path="cloudformation/scenarios/${SCENARIO}/template.yaml"
if [ -f "$local_path" ]; then
path="$local_path"
else
echo "::notice::No local $local_path — fetching from s3://${BLUEPRINTS_BUCKET}/scenarios/${SCENARIO}/template.yaml"
# Use the hub creds for this fetch. The CI-lease role doesn't
# have S3 read on the blueprints bucket, but the hub OIDC
# role assumed below does — switch order if this becomes an
# issue. For now, anonymous PUBLIC-READ on the bucket key
# works (templateUrl in StackSet is public).
path="$local_path"
mkdir -p "cloudformation/scenarios/${SCENARIO}"
curl -fsSL "https://${BLUEPRINTS_BUCKET}.s3.us-east-1.amazonaws.com/scenarios/${SCENARIO}/template.yaml" -o "$path" || {
echo "::error::Template not found locally OR in blueprints bucket for ${SCENARIO}"
exit 1
}
fi
fi
echo "template_path=$path" >> "$GITHUB_OUTPUT"
echo "Using template: $path ($(wc -c <"$path") bytes)"
# Assume the CI-lease OIDC role in the hub. This identity can read
# the ISB JWT secret and assume CIDeployRole in any pool account.
- uses: aws-actions/configure-aws-credentials@v6
id: hub-creds
with:
role-to-assume: arn:aws:iam::568672915267:role/isb-hub-github-actions-ci-lease
role-session-name: scenario-ci-${{ github.run_id }}
aws-region: us-west-2
role-duration-seconds: 21600
- name: Acquire ISB lease
id: lease
if: inputs.reuse_account_id == ''
run: |
python3 scripts/isb/ci_lease.py acquire \
--template '${{ inputs.lease_template }}' \
--user-email '${{ inputs.ci_lease_email }}'
- name: Use reused lease account
id: reuse
if: inputs.reuse_account_id != ''
run: |
echo "::notice::Reusing already-leased account ${{ inputs.reuse_account_id }} — acquire+release skipped"
echo "account_id=${{ inputs.reuse_account_id }}" >> "$GITHUB_OUTPUT"
# Now assume the in-lease CIDeployRole. role-chaining=true tells
# configure-aws-credentials to sigv4-sign from the already-loaded
# hub creds (sts:AssumeRole) instead of trying OIDC against the
# leased account (which has no OIDC provider).
- uses: aws-actions/configure-aws-credentials@v6
id: lease-creds
with:
role-to-assume: arn:aws:iam::${{ steps.lease.outputs.account_id || steps.reuse.outputs.account_id }}:role/InnovationSandbox-ndx-CIDeployRole
role-session-name: scenario-ci-deploy-${{ github.run_id }}
aws-region: us-east-1
# Chained assumes (sigv4 from already-assumed creds) are capped
# at 1h by AWS regardless of MaxSessionDuration on the target
# role. Asking for more returns ValidationError. Workflow job
# timeout is 90m so any deploy needing >1h would already need
# other plumbing.
role-duration-seconds: 3600
role-chaining: true
# The default session-tag attempt requires sts:TagSession in the
# target role's trust policy, which CIDeployRole doesn't grant.
role-skip-session-tagging: true
- name: Deploy scenario stack
id: deploy
env:
SCENARIO: ${{ inputs.scenario }}
TEMPLATE: ${{ steps.paths.outputs.template_path }}
ACCOUNT_ID: ${{ steps.lease.outputs.account_id || steps.reuse.outputs.account_id }}
run: |
set -euo pipefail
STACK_NAME="ndx-try-${SCENARIO}"
# Emit the stack name to GITHUB_OUTPUT FIRST so the Capture
# CFN events step (gated on stack_name) runs even if deploy
# fails — without the events we can't debug the failure.
echo "stack_name=$STACK_NAME" >> "$GITHUB_OUTPUT"
# CFN requires templates >51KB to be staged in S3. Several
# scenarios (council-chatbot, simply-readable, ai-contact-centre)
# exceed this. Create a per-account staging bucket on demand —
# the leased pool account is wiped between leases so we can't
# rely on a pre-existing bucket. mb is idempotent on existing
# buckets and the bucket is freshly created each lease cycle.
BUCKET="ndx-try-ci-cfn-stage-${ACCOUNT_ID}"
aws s3api create-bucket \
--bucket "$BUCKET" \
--region us-east-1 \
>/dev/null 2>&1 || true
aws s3api put-bucket-encryption \
--bucket "$BUCKET" \
--server-side-encryption-configuration '{"Rules":[{"ApplyServerSideEncryptionByDefault":{"SSEAlgorithm":"AES256"}}]}' \
>/dev/null 2>&1 || true
aws cloudformation deploy \
--stack-name "$STACK_NAME" \
--template-file "$TEMPLATE" \
--s3-bucket "$BUCKET" \
--capabilities CAPABILITY_IAM CAPABILITY_NAMED_IAM CAPABILITY_AUTO_EXPAND \
--no-fail-on-empty-changeset \
--tags Project=ndx-try Scenario="$SCENARIO" RunId="${{ github.run_id }}"
- name: Run scenario smoke spec
env:
SMOKE_STACK_NAME: ${{ steps.deploy.outputs.stack_name }}
SMOKE_AWS_REGION: us-east-1
# Skips the local eleventy webServer that playwright.config.ts
# otherwise starts for the desktop/mobile projects.
PLAYWRIGHT_SUITE: smoke
CI: 'true'
run: |
npx playwright test --project=smoke \
"cloudformation/scenarios/${{ inputs.scenario }}/smoke.ts"
# Captured BEFORE teardown so live-stack state survives a failure.
- name: Capture CFN events
if: always() && steps.deploy.outputs.stack_name != ''
env:
STACK: ${{ steps.deploy.outputs.stack_name }}
run: |
mkdir -p artefacts
aws cloudformation describe-stack-events --stack-name "$STACK" \
> "artefacts/${STACK}-events.json" 2>&1 || true
aws cloudformation describe-stacks --stack-name "$STACK" \
> "artefacts/${STACK}-stack.json" 2>&1 || true
- name: Upload artefacts
if: always()
uses: actions/upload-artifact@v7
with:
name: scenario-${{ inputs.scenario }}-${{ github.run_id }}
path: |
artefacts/
playwright-report/
test-results/
retention-days: 14
# Delete the scenario stack so leased accounts don't accumulate
# VPCs, ELBs, etc. across runs. In acquire mode (no reuse), this is
# belt-and-braces because ISB nuke wipes the account anyway. In
# reuse mode (same lease used for multiple scenarios), this is
# required to stay under per-account quotas (5 VPCs/region by default).
# Wait synchronously so the next scenario doesn't race the cleanup,
# but cap at 20 minutes so a stuck delete doesn't block everything.
- name: Delete scenario stack
if: always() && steps.deploy.outputs.stack_name != ''
env:
STACK: ${{ steps.deploy.outputs.stack_name }}
run: |
aws cloudformation delete-stack --stack-name "$STACK" || true
echo "::notice::Waiting up to 20m for $STACK to delete"
timeout 1200 aws cloudformation wait stack-delete-complete --stack-name "$STACK" \
|| echo "::warning::$STACK delete didn't finish in 20m; ISB nuke will catch it at lease end"
# Switch back to the hub-side identity to call the ISB API for release.
# The lease-creds identity is only valid for ops inside the pool account;
# the release POST goes to the hub API.
- uses: aws-actions/configure-aws-credentials@v6
if: always() && steps.lease.outputs.lease_id != ''
with:
role-to-assume: arn:aws:iam::568672915267:role/isb-hub-github-actions-ci-lease
role-session-name: scenario-ci-release-${{ github.run_id }}
aws-region: us-west-2
role-duration-seconds: 3600
- name: Release ISB lease
if: always() && steps.lease.outputs.lease_id != ''
run: |
python3 scripts/isb/ci_lease.py release \
--lease-id '${{ steps.lease.outputs.lease_id }}' \
--user-email '${{ inputs.ci_lease_email }}' \
|| echo "::warning::Lease release failed (lease_id=${{ steps.lease.outputs.lease_id }}); janitor will recover it"