-
Notifications
You must be signed in to change notification settings - Fork 0
150 lines (142 loc) · 5.54 KB
/
uptime.yml
File metadata and controls
150 lines (142 loc) · 5.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# Uptime monitoring for transparency.foxbook.dev / api.foxbook.dev / foxbook.dev.
#
# Runs every 15 minutes via cron. Pings each endpoint with a short timeout.
# On failure: opens (or comments on) a GitHub issue tagged `uptime-incident`.
# On recovery: closes the open incident issue automatically.
#
# GitHub notification settings determine when the maintainer gets paged.
# Default: email on issue open and close.
#
# Silence during planned maintenance:
# gh workflow disable uptime.yml
# gh workflow enable uptime.yml
#
# See docs/OPERATIONS.md § "Uptime monitoring" for the full runbook.
#
# Security: all matrix values are passed into run: blocks via env: rather
# than direct `${{ }}` interpolation. Matrix values are static config in
# this workflow (no user-controllable path), but env-passing is the
# defense-in-depth pattern in case a future maintainer adds user-derived
# matrix entries.
name: uptime
on:
schedule:
- cron: "*/15 * * * *" # every 15 minutes (UTC)
workflow_dispatch: # manual trigger via Actions tab
permissions:
issues: write # to open / comment / close incident issues
contents: read
jobs:
check:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
- name: transparency
url: https://transparency.foxbook.dev/root
expected_status: 200
- name: api
url: https://api.foxbook.dev/healthz
expected_status: 200
- name: landing
url: https://foxbook.dev/
expected_status: 200
steps:
- name: Probe endpoint
id: probe
env:
ENDPOINT_URL: ${{ matrix.url }}
EXPECTED_STATUS: ${{ matrix.expected_status }}
ENDPOINT_NAME: ${{ matrix.name }}
run: |
set +e
status=$(curl -sS -o /tmp/body -w "%{http_code}" --max-time 10 "$ENDPOINT_URL")
curl_exit=$?
set -e
echo "status=$status" >> "$GITHUB_OUTPUT"
echo "curl_exit=$curl_exit" >> "$GITHUB_OUTPUT"
if [ "$curl_exit" -ne 0 ]; then
echo "outcome=fail" >> "$GITHUB_OUTPUT"
echo "reason=curl exit $curl_exit (timeout / connect / TLS)" >> "$GITHUB_OUTPUT"
elif [ "$status" != "$EXPECTED_STATUS" ]; then
echo "outcome=fail" >> "$GITHUB_OUTPUT"
echo "reason=HTTP $status (expected $EXPECTED_STATUS)" >> "$GITHUB_OUTPUT"
else
echo "outcome=ok" >> "$GITHUB_OUTPUT"
echo "reason=" >> "$GITHUB_OUTPUT"
fi
echo "Probe result: $ENDPOINT_NAME -> $status (curl exit $curl_exit)"
- name: Open or update incident issue
if: steps.probe.outputs.outcome == 'fail'
uses: actions/github-script@v7
env:
ENDPOINT_NAME: ${{ matrix.name }}
ENDPOINT_URL: ${{ matrix.url }}
FAIL_REASON: ${{ steps.probe.outputs.reason }}
with:
script: |
const endpoint = process.env.ENDPOINT_NAME;
const url = process.env.ENDPOINT_URL;
const reason = process.env.FAIL_REASON;
const title = `uptime-incident: ${endpoint} (${url})`;
const now = new Date().toISOString();
const body = `Endpoint: ${url}\nFailure: ${reason}\nDetected at: ${now}\n\nSee \`docs/OPERATIONS.md\` § "Future-you sanity check" for triage.`;
const open = await github.rest.issues.listForRepo({
owner: context.repo.owner,
repo: context.repo.repo,
state: 'open',
labels: 'uptime-incident',
per_page: 50,
});
const existing = open.data.find(i => i.title === title);
if (existing) {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: existing.number,
body: `Still failing at ${now}. Reason: ${reason}`,
});
} else {
await github.rest.issues.create({
owner: context.repo.owner,
repo: context.repo.repo,
title,
body,
labels: ['uptime-incident'],
});
}
- name: Close recovered incident issue
if: steps.probe.outputs.outcome == 'ok'
uses: actions/github-script@v7
env:
ENDPOINT_NAME: ${{ matrix.name }}
ENDPOINT_URL: ${{ matrix.url }}
with:
script: |
const endpoint = process.env.ENDPOINT_NAME;
const url = process.env.ENDPOINT_URL;
const title = `uptime-incident: ${endpoint} (${url})`;
const now = new Date().toISOString();
const open = await github.rest.issues.listForRepo({
owner: context.repo.owner,
repo: context.repo.repo,
state: 'open',
labels: 'uptime-incident',
per_page: 50,
});
const existing = open.data.find(i => i.title === title);
if (existing) {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: existing.number,
body: `Recovered at ${now}. Auto-closing.`,
});
await github.rest.issues.update({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: existing.number,
state: 'closed',
});
}