-
Notifications
You must be signed in to change notification settings - Fork 5k
Expand file tree
/
Copy pathclose-failing-ci-prs.sh
More file actions
executable file
·428 lines (366 loc) · 14 KB
/
close-failing-ci-prs.sh
File metadata and controls
executable file
·428 lines (366 loc) · 14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
#!/usr/bin/env bash
# Copyright IBM Corp. 2014, 2025
# SPDX-License-Identifier: MPL-2.0
# shellcheck disable=SC2086
# Warns and closes PRs that have had failing CI for an extended period.
# Checks CI status directly via the GitHub API (does not depend on labels).
#
# - After 7 days of failing CI: leaves a warning comment
# - After 14 days of failing CI: closes the PR with a polite message
# - PRs with "ci-ignore-failure" label are skipped
set -euo pipefail
DRY_RUN=true
WARN_DAYS=7
CLOSE_DAYS=14
IGNORE_LABEL="ci-ignore-failure"
WARNING_MARKER="<!-- ci-failure-warning -->"
# Only these CI checks are considered when deciding whether to warn/close.
# Process workflows (label bots, triage, comment actions, etc.) are excluded
# so they cannot accidentally trigger PR closures.
MONITORED_CHECKS=(
"detect" # breaking-change-detection + static-analysis job name
"depscheck" # Vendor Dependencies Check
"gencheck" # Generation Check
"golint" # GoLang Linting
"test" # Unit Tests + gradually-deprecated job name
"preview-api-version-linter" # Preview ARM API Version Linter
"shellcheck" # ShellCheck Scripts
"tflint" # Terraform Schema Linting
"website-lint" # Website Linting + Validate Examples job name
"provider-tests" # Provider Tests
)
# Returns 0 (true) if the check name is in MONITORED_CHECKS, 1 otherwise.
is_monitored_check() {
local name=$1
for monitored in "${MONITORED_CHECKS[@]}"; do
if [[ "$name" == "$monitored" ]]; then
return 0
fi
done
return 1
}
while getopts o:r:t:l flag
do
case "${flag}" in
o) owner=${OPTARG};;
r) repo=${OPTARG};;
t) token=${OPTARG};;
l) DRY_RUN=false;;
*) echo "Usage: $0 -o owner -r repo [-t token] [-l]"; exit 1;;
esac
done
# Use token from env if not provided via flag
token="${token:-${GH_TOKEN:-${GITHUB_TOKEN:-}}}"
if [[ -z "$token" ]]; then
echo "Error: No token provided. Use -t flag or set GH_TOKEN/GITHUB_TOKEN env var."
exit 1
fi
API_BASE="https://api.github.com/repos/${owner}/${repo}"
echo "=== Close PRs With Failing CI ==="
echo "Repository: ${owner}/${repo}"
echo "Warn after: ${WARN_DAYS} days of failing CI"
echo "Close after: ${CLOSE_DAYS} days of failing CI"
echo "Ignore label: ${IGNORE_LABEL}"
if [[ "$DRY_RUN" == "true" ]]; then
echo "Mode: DRY RUN (no changes will be made)"
else
echo "Mode: LIVE"
fi
echo ""
# Fetch all open PRs
echo "Fetching open PRs..."
all_prs=()
page=1
while :; do
prs_json=$(curl -s -L \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer $token" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"${API_BASE}/pulls?state=open&per_page=100&page=${page}")
count=$(echo "$prs_json" | jq length)
if [[ "$count" -eq 0 ]]; then
break
fi
while IFS= read -r pr; do
all_prs+=("$pr")
done < <(echo "$prs_json" | jq -c '.[]')
page=$((page + 1))
done
total_prs=${#all_prs[@]}
echo "Found ${total_prs} open PR(s)"
echo ""
# Check CI status for a commit SHA.
# Outputs "status|timestamp|failed_checks" where:
# status = "failure" or "ok"
# timestamp = earliest failure time (or PR updated_at as fallback)
# failed_checks = comma-separated list of failed check/status names
check_ci_status() {
local sha=$1
local pr_updated_at=$2
local has_failure=false
local earliest_failure=""
local failed_names=""
# Check combined commit status (legacy status API)
local status_json
status_json=$(curl -s -L \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer $token" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"${API_BASE}/commits/${sha}/status")
# Collect failure timestamps and names from legacy statuses, filtered to monitored checks
local failed_statuses
failed_statuses=$(echo "$status_json" | jq -r '.statuses[] | select(.state == "failure" or .state == "error") | .context' 2>/dev/null || true)
while IFS= read -r ctx; do
[[ -z "$ctx" ]] && continue
is_monitored_check "$ctx" || continue
has_failure=true
local ts
ts=$(echo "$status_json" | jq -r --arg c "$ctx" '[.statuses[] | select(.context == $c) | .updated_at] | first // empty')
if [[ -n "$ts" ]]; then
if [[ -z "$earliest_failure" ]] || [[ "$ts" < "$earliest_failure" ]]; then
earliest_failure="$ts"
fi
fi
if [[ -n "$failed_names" ]]; then
failed_names="${failed_names},${ctx}"
else
failed_names="$ctx"
fi
done <<< "$failed_statuses"
# Check check runs (checks API) with pagination, filtered to monitored checks
local check_page=1
while :; do
local check_runs_json
check_runs_json=$(curl -s -L \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer $token" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"${API_BASE}/commits/${sha}/check-runs?per_page=100&page=${check_page}")
local page_count
page_count=$(echo "$check_runs_json" | jq '.check_runs | length')
if [[ "$page_count" -eq 0 ]]; then
break
fi
# Iterate failed/cancelled check runs — only consider monitored ones
local failed_run_names
failed_run_names=$(echo "$check_runs_json" | jq -r '.check_runs[] | select(.conclusion == "failure" or .conclusion == "cancelled") | .name')
while IFS= read -r run_name; do
[[ -z "$run_name" ]] && continue
is_monitored_check "$run_name" || continue
has_failure=true
local run_time
run_time=$(echo "$check_runs_json" | jq -r --arg n "$run_name" '[.check_runs[] | select(.name == $n and (.conclusion == "failure" or .conclusion == "cancelled")) | .completed_at // empty] | map(select(. != "")) | first // empty')
if [[ -n "$run_time" ]]; then
if [[ -z "$earliest_failure" ]] || [[ "$run_time" < "$earliest_failure" ]]; then
earliest_failure="$run_time"
fi
fi
if [[ -n "$failed_names" ]]; then
failed_names="${failed_names},${run_name}"
else
failed_names="$run_name"
fi
done <<< "$failed_run_names"
if [[ "$page_count" -lt 100 ]]; then
break
fi
check_page=$((check_page + 1))
done
if [[ "$has_failure" == "true" ]]; then
echo "failure|${earliest_failure:-$pr_updated_at}|${failed_names}"
else
echo "ok||"
fi
}
# Map a failed check name to actionable guidance
get_check_guidance() {
local check_name=$1
case "$check_name" in
depscheck|"Vendor Dependencies Check")
echo "Run \`make depscheck\`. Do not modify files in the \`vendor/\` directory directly - instead update dependencies in \`go.mod\` and run \`go mod vendor\`."
;;
website-lint|"Website Linting")
echo "Run \`make website-lint\` and \`make document-validate\` locally. Check your documentation files under \`website/\` for formatting issues."
;;
gencheck|"Generation Check")
echo "Run \`make generate\` to regenerate any auto-generated code, then commit the changes."
;;
golint|"GoLang Linting")
echo "Run the Go linter locally with \`golangci-lint run ./internal/...\` and fix any reported issues."
;;
tflint|"Terraform Schema Linting")
echo "Run \`make tflint\` locally and fix any Terraform schema issues in your resource/data source definitions."
;;
detect|"Breaking Schema Changes")
echo "Your changes contain breaking schema changes. Please review the [breaking changes guide](contributing/topics/guide-breaking-changes.md) and ensure any breaking changes are behind the appropriate feature flag."
;;
test|"Unit Tests")
echo "Run \`make test\` locally to reproduce and fix the failing unit tests."
;;
"Static Analysis")
echo "Run \`bash ./scripts/run-static-analysis.sh\` locally and fix any reported issues."
;;
shellcheck|"ShellCheck Scripts")
echo "Run \`make shellcheck\` to check shell scripts for issues."
;;
"Validate Examples")
echo "Run \`make validate-examples\` to check that your example configurations are valid."
;;
"Preview API Version Linter")
echo "Check that any API version references are not using preview versions unless explicitly required."
;;
*)
echo "Check the CI logs for details on this failure."
;;
esac
}
# Build a guidance section from a comma-separated list of failed check names
build_guidance() {
local failed_checks=$1
if [[ -z "$failed_checks" ]]; then
echo ""
return
fi
local guidance=$'\n\n**Failing checks and how to fix them:**\n'
local seen=""
IFS=',' read -ra checks <<< "$failed_checks"
for check in "${checks[@]}"; do
# Deduplicate
if echo "$seen" | grep -qF "|${check}|"; then
continue
fi
seen="${seen}|${check}|"
local fix
fix=$(get_check_guidance "$check")
guidance="${guidance}
- **${check}**: ${fix}"
done
echo "$guidance"
}
# Check if we already left a warning comment (uses a hidden HTML marker)
has_warning_comment() {
local pr_number=$1
local comments_json
comments_json=$(curl -s -L \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer $token" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"${API_BASE}/issues/${pr_number}/comments?per_page=100")
echo "$comments_json" | jq -r '[.[] | select(.body | contains("'"${WARNING_MARKER}"'"))] | length'
}
# Process each PR
warn_count=0
close_count=0
skip_count=0
failing_count=0
for pr in "${all_prs[@]}"; do
draft=$(echo "$pr" | jq -r '.draft')
# Skip draft PRs
if [[ "$draft" == "true" ]]; then
continue
fi
pr_number=$(echo "$pr" | jq -r '.number')
pr_title=$(echo "$pr" | jq -r '.title')
head_sha=$(echo "$pr" | jq -r '.head.sha')
updated_at=$(echo "$pr" | jq -r '.updated_at')
labels=$(echo "$pr" | jq -r '.labels[].name' 2>/dev/null || echo "")
pr_author=$(echo "$pr" | jq -r '.user.login')
# Skip PRs with ignore label
if echo "$labels" | grep -q "^${IGNORE_LABEL}$"; then
continue
fi
# Check CI status - returns "status|timestamp|failed_checks"
ci_result=$(check_ci_status "$head_sha" "$updated_at")
ci_status=$(echo "$ci_result" | cut -d'|' -f1)
ci_failed_since=$(echo "$ci_result" | cut -d'|' -f2)
ci_failed_checks=$(echo "$ci_result" | cut -d'|' -f3)
if [[ "$ci_status" != "failure" ]]; then
continue
fi
failing_count=$((failing_count + 1))
echo "PR #${pr_number} \"${pr_title}\""
if [[ -z "$ci_failed_since" ]]; then
echo " ↳ CI failing but could not determine since when, skipping"
skip_count=$((skip_count + 1))
continue
fi
# Calculate days since CI started failing
# Handle both GNU date (Linux) and BSD date (macOS)
failed_epoch=$(date -d "$ci_failed_since" +%s 2>/dev/null || date -jf "%Y-%m-%dT%H:%M:%SZ" "$ci_failed_since" +%s 2>/dev/null || date -jf "%Y-%m-%dT%T%z" "$ci_failed_since" +%s)
now_epoch=$(date -u +%s)
days_since=$(( (now_epoch - failed_epoch) / 86400 ))
echo " ↳ CI failing since: ${ci_failed_since} (${days_since} days)"
if [[ -n "$ci_failed_checks" ]]; then
echo " ↳ Failed checks: ${ci_failed_checks}"
fi
# Build guidance text for the comment
guidance=$(build_guidance "$ci_failed_checks")
# Close if past close threshold
if [[ "$days_since" -ge "$CLOSE_DAYS" ]]; then
close_count=$((close_count + 1))
echo " ↳ CI failing for ${days_since} days -> CLOSING"
if [[ "$DRY_RUN" == "false" ]]; then
comment_body="${WARNING_MARKER}
Thank you for your contribution @${pr_author}. Unfortunately, we are unable to review or merge this pull request as the CI checks have been failing for more than 14 days.
Please feel free to reopen this PR once the CI issues have been resolved.${guidance}
Thank you for your understanding!"
# Use jq to safely build JSON
json_payload=$(jq -n --arg body "$comment_body" '{"body": $body}')
curl -s -L -X POST \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer $token" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"${API_BASE}/issues/${pr_number}/comments" \
-d "$json_payload" > /dev/null
curl -s -L -X PATCH \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer $token" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"${API_BASE}/pulls/${pr_number}" \
-d '{"state":"closed"}' > /dev/null
echo " ↳ Closed and commented"
else
echo " ↳ (dry run - would close and comment)"
fi
# Warn if past warn threshold and not already warned
elif [[ "$days_since" -ge "$WARN_DAYS" ]]; then
existing_warnings=$(has_warning_comment "$pr_number")
if [[ "$existing_warnings" -gt 0 ]]; then
echo " ↳ Already warned, waiting for close threshold"
skip_count=$((skip_count + 1))
continue
fi
warn_count=$((warn_count + 1))
echo " ↳ CI failing for ${days_since} days -> WARNING"
if [[ "$DRY_RUN" == "false" ]]; then
comment_body="${WARNING_MARKER}
Hi @${pr_author}, we have noticed that the CI on this pull request has been failing for 7 days.
If the CI failures are not resolved within the next 7 days, we will close this pull request.${guidance}
If you need help, please leave a comment and we will do our best to assist. Thank you!"
json_payload=$(jq -n --arg body "$comment_body" '{"body": $body}')
curl -s -L -X POST \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer $token" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"${API_BASE}/issues/${pr_number}/comments" \
-d "$json_payload" > /dev/null
echo " ↳ Warning comment posted"
else
echo " ↳ (dry run - would post warning comment)"
fi
else
echo " ↳ Under threshold (${days_since}/${WARN_DAYS} days), skipping"
skip_count=$((skip_count + 1))
fi
done
echo ""
echo "=== Summary ==="
echo "Total PRs checked: ${total_prs}"
echo "PRs with failing CI: ${failing_count}"
echo "Warned: ${warn_count}"
echo "Closed: ${close_count}"
echo "Skipped: ${skip_count}"
if [[ "$DRY_RUN" == "true" ]]; then
echo "(dry run - no actual changes made)"
fi
echo "Done."