-
Notifications
You must be signed in to change notification settings - Fork 3.7k
147 lines (124 loc) · 4.84 KB
/
report_404_packages.yml
File metadata and controls
147 lines (124 loc) · 4.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
name: Report unreachable packages
on:
workflow_dispatch:
schedule:
- cron: "31 7 * * *"
permissions:
contents: write
pull-requests: write
concurrency:
group: report-404-packages
cancel-in-progress: false
jobs:
report_404_packages:
runs-on: ubuntu-latest
env:
GH_TOKEN: ${{ github.token }}
steps:
- uses: actions/checkout@v5
with:
fetch-depth: 0
# Always roll the cache, GitHub will evict it after 7 days of inactivity.
- name: Restore reported URLs cache
id: reported_urls_cache
uses: actions/cache@v5
with:
path: ./reported_urls.txt
key: reported-urls-cache-${{ github.run_id }}
restore-keys: |
reported-urls-cache-
- name: Require cache for scheduled runs
run: |
# cache-hit semantics:
# true => exact key match
# false => restore-key match
# "" => true miss (nothing restored)
if [ "${{ github.event_name }}" != "workflow_dispatch" ] && [ "${{ steps.reported_urls_cache.outputs.cache-hit }}" = "" ]; then
echo "::error::No reported_urls cache found. Run workflow_dispatch once to bootstrap."
exit 1
fi
- name: Ensure reported_urls.txt exists
run: touch ./reported_urls.txt
- name: Decide run cadence
id: cadence
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
echo "run_report=true" >> "$GITHUB_OUTPUT"
exit 0
fi
# Daily schedule, but only report on first Saturday of the month.
if [ "$(date -u +%u)" -eq 6 ] && [ "$(date -u +%d)" -le 7 ]; then
echo "run_report=true" >> "$GITHUB_OUTPUT"
else
echo "run_report=false" >> "$GITHUB_OUTPUT"
echo "::notice::Skipping report run: not the first Saturday of the month."
fi
- name: Set up Python
if: steps.cadence.outputs.run_report == 'true'
uses: actions/setup-python@v6
with:
python-version: "3.13"
- name: Set up uv
if: steps.cadence.outputs.run_report == 'true'
uses: astral-sh/setup-uv@v8.1.0
- name: Configure git
if: steps.cadence.outputs.run_report == 'true'
run: |
git config user.name "thecrawl bot"
git config user.email "noreply@packagecontrol.io"
- name: Run 404 package report
id: report
if: steps.cadence.outputs.run_report == 'true'
run: |
uv run -m tools.report_404_packages \
--commit \
--build-pr-message \
-z \
--ignore-file ./reported_urls.txt > ./reported_records.txt
if [ -s ./reported_records.txt ]; then
echo "has_results=true" >> "$GITHUB_OUTPUT"
else
echo "has_results=false" >> "$GITHUB_OUTPUT"
fi
- name: No packages to report
if: steps.cadence.outputs.run_report == 'true' && steps.report.outputs.has_results != 'true'
run: echo "No unreachable packages to report."
- name: Prepare branch
id: branch
if: steps.cadence.outputs.run_report == 'true' && steps.report.outputs.has_results == 'true'
run: |
report_hash="$(sha256sum ./reported_records.txt | awk '{print substr($1,1,12)}')"
branch_name="bot/report-404-${report_hash}-${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT:-1}"
git switch -c "$branch_name"
git push --set-upstream origin "$branch_name"
echo "name=$branch_name" >> "$GITHUB_OUTPUT"
- name: Open pull request
if: steps.cadence.outputs.run_report == 'true' && steps.report.outputs.has_results == 'true'
run: |
gh pr create \
--base "${{ github.ref_name }}" \
--head "${{ steps.branch.outputs.name }}" \
--title "$(cat ./pr_title.txt)" \
--body-file ./pr_body.md
- name: Update reported URL list for cache
if: steps.cadence.outputs.run_report == 'true'
run: |
# Append URLs from this run (name\0details\0timestamp records).
awk -v RS='\n' -v FS='\0' 'NF >= 2 && $2 != "" { print $2 }' \
./reported_records.txt >> ./reported_urls.txt
# Keep only URLs still present in workspace.json.
if [ ! -f ./workspace.json ]; then
echo "::error::workspace.json missing; cannot prune reported URLs."
exit 1
fi
tmp_file="$(mktemp)"
while IFS= read -r url; do
[ -z "$url" ] && continue
if grep -Fq "\"$url\"" ./workspace.json; then
echo "$url" >> "$tmp_file"
fi
done < ./reported_urls.txt
sort -u "$tmp_file" > ./reported_urls.txt
rm -f "$tmp_file"
echo "Reported URLs:"
cat ./reported_urls.txt