Skip to content

Commit 5fbdec3

Browse files
committed
smoke: AC4.2/4.3 assertion-bar fixture + AC3.10 override-emergency flow
- tests/smoke/fixtures/assertion-bar.ts: 17 AssertionBarRow entries (one per scenario incl. all-demo umbrella) indexing what each spec asserts and the historical regression that motivated it. Smoke specs remain the source of truth; this is the reviewer-facing index. - .github/workflows/smoke.yml: scope job emits `override=true` when the PR carries the `smoke-override-emergency` label; smoke job's `if:` skips when override is active, so the gate clears. CODEOWNERS approval is enforced by repo branch-protection (out of band). - .github/workflows/smoke-override-followup.yml: hourly cron opens a `smoke-override-followup` issue 48h after the merge so the underlying regression doesn't get forgotten. Idempotent on PR number.
1 parent 892b132 commit 5fbdec3

3 files changed

Lines changed: 219 additions & 1 deletion

File tree

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
name: Smoke Override Follow-up
2+
3+
# AC3.10: when a PR is merged with the `smoke-override-emergency` label, the
4+
# smoke gate is bypassed (see smoke.yml's `scope` job). 48 hours later we
5+
# open a `smoke-override-followup` issue so the underlying regression isn't
6+
# forgotten. Runs hourly so the 48h window is met within ±1h.
7+
8+
on:
9+
schedule:
10+
- cron: '0 * * * *'
11+
workflow_dispatch:
12+
13+
permissions:
14+
contents: read
15+
issues: write
16+
pull-requests: read
17+
18+
jobs:
19+
followup:
20+
runs-on: ubuntu-latest
21+
steps:
22+
- uses: actions/checkout@v6
23+
- name: Open follow-up issues for ~48h-old overrides
24+
env:
25+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
26+
run: |
27+
set -euo pipefail
28+
# PRs merged in the 47-49h window with the override label and no
29+
# existing follow-up issue. Hourly cron means each qualifying PR is
30+
# caught exactly once in the 47-49h window (idempotent search).
31+
NOW_TS=$(date -u +%s)
32+
WINDOW_OPEN=$((NOW_TS - 49 * 3600))
33+
WINDOW_CLOSE=$((NOW_TS - 47 * 3600))
34+
gh pr list --state merged --label smoke-override-emergency --json number,title,mergedAt,url \
35+
--limit 50 \
36+
| jq -r --argjson open "$WINDOW_OPEN" --argjson close "$WINDOW_CLOSE" '
37+
.[]
38+
| select((.mergedAt | fromdate) >= $open and (.mergedAt | fromdate) <= $close)
39+
| "\(.number)\t\(.title)\t\(.url)"
40+
' \
41+
| while IFS=$'\t' read -r num title url; do
42+
[ -z "$num" ] && continue
43+
# Skip if a follow-up issue for this PR already exists.
44+
existing=$(gh issue list --state all --label smoke-override-followup --search "in:title #$num" --json number --jq '.[0].number')
45+
if [ -n "$existing" ]; then
46+
echo "PR #$num already has follow-up issue #$existing — skipping"
47+
continue
48+
fi
49+
gh issue create \
50+
--title "Follow-up: smoke-override-emergency on PR #$num ($title)" \
51+
--label smoke-override-followup \
52+
--body "PR #$num merged with the smoke-override-emergency label, bypassing the smoke gate. 48h have elapsed — please record the underlying regression's resolution (smoke now green / scenario re-quarantined / etc.) and close this issue.
53+
54+
PR: $url
55+
Merged: ~48h ago"
56+
done

.github/workflows/smoke.yml

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ jobs:
4444
outputs:
4545
mode: ${{ steps.decide.outputs.mode }}
4646
scenarios: ${{ steps.decide.outputs.scenarios }}
47+
override: ${{ steps.override.outputs.active }}
4748
steps:
4849
- uses: actions/checkout@v6
4950
with:
@@ -53,10 +54,25 @@ jobs:
5354
GITHUB_EVENT_NAME: ${{ github.event_name }}
5455
GITHUB_BASE_REF: ${{ github.base_ref }}
5556
run: ./scripts/smoke-scope.sh
57+
# AC3.10: `smoke-override-emergency` label causes the smoke job to skip,
58+
# making it a pass — so the merge gate clears. CODEOWNERS approval of the
59+
# PR itself is required by repo branch-protection, not by this workflow.
60+
# The 48h follow-up issue is opened by smoke-override-followup.yml.
61+
- id: override
62+
if: github.event_name == 'pull_request'
63+
env:
64+
LABELS: ${{ toJSON(github.event.pull_request.labels.*.name) }}
65+
run: |
66+
if echo "$LABELS" | jq -re '.[]|select(.=="smoke-override-emergency")' >/dev/null; then
67+
echo "active=true" >> "$GITHUB_OUTPUT"
68+
echo "::warning::smoke-override-emergency label present — smoke gate bypassed."
69+
else
70+
echo "active=false" >> "$GITHUB_OUTPUT"
71+
fi
5672
5773
smoke:
5874
needs: scope
59-
if: needs.scope.outputs.mode != 'none'
75+
if: needs.scope.outputs.mode != 'none' && needs.scope.outputs.override != 'true'
6076
runs-on: ubuntu-latest
6177
timeout-minutes: 90
6278
# smoke-test-deploy environment is gated by CODEOWNERS for non-main refs
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
// Per-scenario assertion bar: what each scenario's smoke spec asserts, and
2+
// which historical regression motivates that assertion. The smoke specs
3+
// themselves are the source of truth; this fixture is the index a reviewer
4+
// or new author scans to understand "why does this scenario check that".
5+
//
6+
// AC4.2: each scenario has exactly one row citing a historical regression.
7+
// AC4.3: 17 rows total — one per scenario (16 deployable + the all-demo
8+
// umbrella, which has its own assertion).
9+
//
10+
// Add a row whenever a new scenario lands. Update the row when a regression
11+
// drives a new assertion into that scenario's smoke spec.
12+
13+
export interface AssertionBarRow {
14+
/** Smoke target landing assertion (HTTP status, title match, content probe). */
15+
readonly landingAssertion: string;
16+
/** Auth flow exercised by smoke, if any. */
17+
readonly loginAssertion: string;
18+
/** The bug-informed feature flow checked beyond a bare 200. */
19+
readonly featureFlow: string;
20+
/** CFN Output keys the smoke spec reads. */
21+
readonly outputsToCheck: readonly string[];
22+
/** Free-form citation: commit SHA, PR, incident, or memory entry. */
23+
readonly historicalRegressionCited: string;
24+
}
25+
26+
export const ASSERTION_BAR: ReadonlyMap<string, AssertionBarRow> = new Map([
27+
['ai-contact-centre', {
28+
landingAssertion: 'CompanionUrl HTTP < 500',
29+
loginAssertion: 'n/a (quota-preserved DUMMY DID path)',
30+
featureFlow: 'PstnNumber output matches +44 (or US toll-free) E.164 format',
31+
outputsToCheck: ['AiContactCentreCompanionUrl', 'AiContactCentrePstnNumber'],
32+
historicalRegressionCited: '+44 number claim from us-east-1 regressed to generic /^\\+\\d{6,}/ — see memory:aws-connect-uk-numbers and ACCEPTABLE_PSTN regex narrowing in ai-contact-centre/smoke.ts',
33+
}],
34+
['all-demo', {
35+
landingAssertion: 'every Outputs key in template.yaml resolves on the live stack',
36+
loginAssertion: 'n/a (umbrella)',
37+
featureFlow: 'safe outputs are non-empty + not "{{resolve:...}}" literal; URL outputs match https://; sensitive outputs have non-zero length',
38+
outputsToCheck: ['discovered from template at test time'],
39+
historicalRegressionCited: 'BopsPlanning/Paperless secretsmanager dynamic refs leaked the unresolved {{resolve:...}} literal into CFN Outputs; smoke now asserts no Output is that placeholder',
40+
}],
41+
['bops-planning', {
42+
landingAssertion: 'landing page does not contain the Rails generic "we\'re sorry, but something went wrong" or fall through to the Applicants tenant',
43+
loginAssertion: 'Devise admin login at /users/sign_in completes; URL navigates away from "sign_in"',
44+
featureFlow: 'post-login URL does NOT contain ":8080" (routing.rb single-tenant override would otherwise route Applicants on port 8080)',
45+
outputsToCheck: ['BopsPlanningUrl', 'BopsPlanningLoginUrl', 'BopsPlanningUsername', 'BopsPlanningPassword'],
46+
historicalRegressionCited: 'base64-encoded routing.rb override failed to reach the container, leaking the Applicants tenant on the back-office host (port :8080 visible in post-login URL)',
47+
}],
48+
['council-chatbot', {
49+
landingAssertion: 'public Lambda Function URL POST returns < 500',
50+
loginAssertion: 'n/a',
51+
featureFlow: 'POST forces Lambda cold-start + Bedrock invocation (GET would 405 vacuously)',
52+
outputsToCheck: ['ChatbotURL', 'ChatbotKnowledgeBaseBucket'],
53+
historicalRegressionCited: 'public Lambda FURL needs both InvokeFunctionUrl AND InvokeFunction+InvokedViaFunctionUrl:true since Oct 2025 — without both we get a 403; see memory:isb_blocks_public_lambda_urls',
54+
}],
55+
['digital-planning-register', {
56+
landingAssertion: 'landing HTTP < 500; body contains "planning" or "register"',
57+
loginAssertion: 'n/a (public)',
58+
featureFlow: 'no "application error" Next.js overlay in body',
59+
outputsToCheck: ['DigitalPlanningRegisterUrl'],
60+
historicalRegressionCited: 'Next.js server crashes from missing/invalid council-config render the framework error overlay; smoke catches the overlay text',
61+
}],
62+
['fixmystreet', {
63+
landingAssertion: 'landing has /FixMyStreet/i title; body does not leak ":9000" absolute URLs (ALB sidecar mis-routing)',
64+
loginAssertion: 'two-stage email→password flow at /auth; navigates away from "auth" after submit',
65+
featureFlow: '/reports renders without errors (bin/update-all-reports populated data/all-reports.json); /admin does not redirect to a 2FA setup page (STAGING_FLAGS skip_must_have_2fa holds)',
66+
outputsToCheck: ['FixMyStreetUrl', 'FixMyStreetAdminUsername', 'FixMyStreetAdminPassword'],
67+
historicalRegressionCited: 'ALB sidecar regression leaked port :9000 absolute URLs into pages; 2FA-skip flag removal silently broke admin; see memory:fixmystreet-lessons',
68+
}],
69+
['foi-redaction', {
70+
landingAssertion: 'public CloudFront URL responds < 500',
71+
loginAssertion: 'n/a (public)',
72+
featureFlow: 'landing page reachable; redaction backend reachable from front-end',
73+
outputsToCheck: ['RedactionURL', 'FoiDocumentsBucket'],
74+
historicalRegressionCited: 'CloudFront → ALB origin auth header rewrite regressed once; smoke landing check catches the resulting 403',
75+
}],
76+
['localgov-drupal', {
77+
landingAssertion: 'landing has a title; body does not contain "fatal error" or "accessdeniedexception"',
78+
loginAssertion: 'admin login at /user/login (root URL is /init-status — must strip); password output is JSON-wrapped {password,username} (must parse)',
79+
featureFlow: '/admin page does not contain "accessdeniedexception" or "module ... could not be enabled"',
80+
outputsToCheck: ['DrupalUrl', 'DrupalAdminUsername', 'DrupalAdminPassword'],
81+
historicalRegressionCited: 'ndx_aws_ai module bootstraps Bedrock at cache:bin construction; an AccessDeniedException at that phase tanks the whole site silently — the body probe catches it; see memory:fixmystreet-lessons cousin in localgov-drupal',
82+
}],
83+
['localgov-ims', {
84+
landingAssertion: 'admin + payment portal URLs return on their respective hostnames',
85+
loginAssertion: 'admin credentials available; password is NOT the literal "{{resolve:...}}" token',
86+
featureFlow: 'AdminPassword Lambda-custom-resource returned a real string, not the unresolved Secrets Manager reference',
87+
outputsToCheck: ['LocalgovImsAdminPortalUrl', 'LocalgovImsPaymentPortalUrl', 'LocalgovImsAdminUsername', 'LocalgovImsAdminPassword'],
88+
historicalRegressionCited: 'Lambda-custom-resource that resolves the AdminPassword secret regressed once and returned the literal "{{resolve:...}}" token as the password',
89+
}],
90+
['minute', {
91+
landingAssertion: 'MinuteLoginUrl ?key=... 302s to a clean URL (CF Function consumed the magic-link token); landing has /Minute/i title',
92+
loginAssertion: 'magic-link cookie flow (not basic-auth — basic-auth broke fetch() and was replaced in 6387441)',
93+
featureFlow: 'in-page fetch() to /health succeeds (basic-auth would have broken this); ALB /api/* rule does not intercept the frontend middleware passthrough',
94+
outputsToCheck: ['MinuteUrl', 'MinuteLoginUrl', 'MinuteAuthToken'],
95+
historicalRegressionCited: 'commit 6387441 replaced basic-auth with magic-link + cookie because browsers (esp. corporate-managed Chromium) suppress the basic-auth dialog and strip URL-embedded credentials',
96+
}],
97+
['paperless-ngx', {
98+
landingAssertion: 'Angular login form renders (placeholder-labelled Username/Password inputs)',
99+
loginAssertion: 'admin login completes; selectors match `input[type="text"]` / `input[type="password"]` (Angular form omits `name` attr)',
100+
featureFlow: '/api/documents/?page=1 returns < 500 (S3 Files mount or Postgres regression would 500)',
101+
outputsToCheck: ['PaperlessNgxUrl', 'PaperlessNgxAdminUsername', 'PaperlessNgxAdminPassword'],
102+
historicalRegressionCited: 'S3 Files FS pending-export deadlock surfaced as 500s on /api/documents/; smoke probes the API directly to catch regression of the mount/Postgres state',
103+
}],
104+
['planning-ai', {
105+
landingAssertion: 'public AnalyzerURL responds < 500',
106+
loginAssertion: 'n/a (public)',
107+
featureFlow: 'landing page reachable; analyzer accepts an upload form',
108+
outputsToCheck: ['AnalyzerURL', 'DocumentsBucket'],
109+
historicalRegressionCited: 'Bedrock model-id env propagation regressed once and surfaced as a 500 at first form submission; landing probe catches the deploy-time variant',
110+
}],
111+
['planx', {
112+
landingAssertion: 'landing renders; body does not contain "permission denied for this domain" (domain allowlist regression) or "airbrake" (Airbrake on prod regression)',
113+
loginAssertion: 'admin login at PlanXLoginUrl; URL navigates away from auth path',
114+
featureFlow: 'landing reachable post-login; Hasura /v1/version responds < 500 (Caddy-elimination regression would route /hasura paths back to Caddy)',
115+
outputsToCheck: ['PlanXUrl', 'PlanXLoginUrl', 'PlanXDemoUsername', 'PlanXDemoPassword'],
116+
historicalRegressionCited: 'env-var assertion regression on prod surfaced as "permission denied for this domain"; Caddy elimination + Hasura native path regression surfaced as /v1/version 502; see memory:planx-scenario-lessons',
117+
}],
118+
['quicksight-dashboard', {
119+
landingAssertion: 'DashboardUrl is reachable (HEAD) and host matches the embed domain',
120+
loginAssertion: 'n/a (QuickSight embed)',
121+
featureFlow: 'DashboardId resolves; data-source backed by the canned data bucket',
122+
outputsToCheck: ['DashboardUrl', 'DataBucket'],
123+
historicalRegressionCited: 'QuickSight setup Lambda race created the data source before the seed Lambda populated the bucket, yielding an empty dashboard',
124+
}],
125+
['simply-readable', {
126+
landingAssertion: 'AppUrl renders the Simply Readable Angular shell',
127+
loginAssertion: 'Cognito-backed admin login',
128+
featureFlow: 'AppSync endpoint reachable; translation + readable content buckets exist',
129+
outputsToCheck: ['AppUrl', 'AdminUsername', 'AdminPassword', 'AppSyncEndpoint'],
130+
historicalRegressionCited: 'serverAccessLoggingBucket non-empty on teardown blocked stack delete and corrupted next deploy; see memory:simply-readable-lessons',
131+
}],
132+
['smart-car-park', {
133+
landingAssertion: 'DashboardURL renders the carpark visualisation',
134+
loginAssertion: 'n/a (public dashboard)',
135+
featureFlow: 'SensorReadingsTable populated by simulator Lambda',
136+
outputsToCheck: ['DashboardURL', 'SensorReadingsTable'],
137+
historicalRegressionCited: 'simulator Lambda IAM regression caused empty SensorReadingsTable while dashboard still loaded; smoke checks output presence',
138+
}],
139+
['text-to-speech', {
140+
landingAssertion: 'public ConvertURL responds < 500',
141+
loginAssertion: 'n/a (public)',
142+
featureFlow: 'audio bucket exists; Polly invocation reachable via the convert endpoint',
143+
outputsToCheck: ['ConvertURL', 'AudioBucket'],
144+
historicalRegressionCited: 'Polly voice-id misconfiguration regressed once and surfaced as a 500 at first convert request; landing probe catches the deploy-time variant',
145+
}],
146+
]);

0 commit comments

Comments
 (0)