Labels #6640
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# WARNING: | |
# When extending this action, be aware that $GITHUB_TOKEN allows some write | |
# access to the GitHub API. This means that it should not evaluate user input in | |
# a way that allows code injection. | |
name: Labels | |
on: | |
schedule: | |
- cron: '07,17,27,37,47,57 * * * *' | |
workflow_call: | |
inputs: | |
headBranch: | |
required: true | |
type: string | |
secrets: | |
NIXPKGS_CI_APP_PRIVATE_KEY: | |
required: true | |
workflow_dispatch: | |
concurrency: | |
# This explicitly avoids using `run_id` for the concurrency key to make sure that only | |
# *one* scheduled run can run at a time. | |
group: labels-${{ github.workflow }}-${{ github.event_name }}-${{ github.event.pull_request.number }} | |
# PR-triggered runs will be cancelled, but scheduled runs will be queued. | |
cancel-in-progress: ${{ github.event_name != 'schedule' }} | |
# This is used as fallback without app only. | |
# This happens when testing in forks without setting up that app. | |
# Labels will most likely not exist in forks, yet. For this case, | |
# we add the issues permission only here. | |
permissions: | |
issues: write # needed to create *new* labels | |
pull-requests: write | |
defaults: | |
run: | |
shell: bash | |
jobs: | |
update: | |
runs-on: ubuntu-24.04-arm | |
if: github.event_name != 'schedule' || github.repository_owner == 'NixOS' | |
steps: | |
- name: Install dependencies | |
run: npm install @actions/artifact bottleneck | |
# Use a GitHub App, because it has much higher rate limits: 12,500 instead of 5,000 req / hour. | |
- uses: actions/create-github-app-token@df432ceedc7162793a195dd1713ff69aefc7379e # v2.0.6 | |
if: vars.NIXPKGS_CI_APP_ID | |
id: app-token | |
with: | |
app-id: ${{ vars.NIXPKGS_CI_APP_ID }} | |
private-key: ${{ secrets.NIXPKGS_CI_APP_PRIVATE_KEY }} | |
# No issues: write permission here, because labels in Nixpkgs should | |
# be created explicitly via the UI with color and description. | |
permission-pull-requests: write | |
- name: Log current API rate limits | |
env: | |
GH_TOKEN: ${{ steps.app-token.outputs.token || github.token }} | |
run: gh api /rate_limit | jq | |
- name: Labels from API data and Eval results | |
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 | |
with: | |
github-token: ${{ steps.app-token.outputs.token || github.token }} | |
script: | | |
const Bottleneck = require('bottleneck') | |
const path = require('node:path') | |
const { DefaultArtifactClient } = require('@actions/artifact') | |
const { readFile } = require('node:fs/promises') | |
const artifactClient = new DefaultArtifactClient() | |
const stats = { | |
prs: 0, | |
requests: 0, | |
artifacts: 0 | |
} | |
// Rate-Limiting and Throttling, see for details: | |
// https://github.com/octokit/octokit.js/issues/1069#throttling | |
// https://docs.github.com/en/rest/using-the-rest-api/best-practices-for-using-the-rest-api | |
const allLimits = new Bottleneck({ | |
// Avoid concurrent requests | |
maxConcurrent: 1, | |
// Will be updated with first `updateReservoir()` call below. | |
reservoir: 0 | |
}) | |
// Pause between mutative requests | |
const writeLimits = new Bottleneck({ minTime: 1000 }).chain(allLimits) | |
github.hook.wrap('request', async (request, options) => { | |
// Requests to the /rate_limit endpoint do not count against the rate limit. | |
if (options.url == '/rate_limit') return request(options) | |
// Search requests are in a different resource group, which allows 30 requests / minute. | |
// We do less than a handful each run, so not implementing throttling for now. | |
if (options.url.startsWith('/search/')) return request(options) | |
stats.requests++ | |
if (['POST', 'PUT', 'PATCH', 'DELETE'].includes(options.method)) | |
return writeLimits.schedule(request.bind(null, options)) | |
else | |
return allLimits.schedule(request.bind(null, options)) | |
}) | |
async function updateReservoir() { | |
let response | |
try { | |
response = await github.rest.rateLimit.get() | |
} catch (err) { | |
core.error(`Failed updating reservoir:\n${err}`) | |
// Keep retrying on failed rate limit requests instead of exiting the script early. | |
return | |
} | |
// Always keep 1000 spare requests for other jobs to do their regular duty. | |
// They normally use below 100, so 1000 is *plenty* of room to work with. | |
const reservoir = Math.max(0, response.data.resources.core.remaining - 1000) | |
core.info(`Updating reservoir to: ${reservoir}`) | |
allLimits.updateSettings({ reservoir }) | |
} | |
await updateReservoir() | |
// Update remaining requests every minute to account for other jobs running in parallel. | |
const reservoirUpdater = setInterval(updateReservoir, 60 * 1000) | |
async function handle(item) { | |
try { | |
const log = (k,v,skip) => { | |
core.info(`#${item.number} - ${k}: ${v}` + (skip ? ' (skipped)' : '')) | |
return skip | |
} | |
log('Last updated at', item.updated_at) | |
stats.prs++ | |
log('URL', item.html_url) | |
const pull_number = item.number | |
const issue_number = item.number | |
// This API request is important for the merge-conflict label, because it triggers the | |
// creation of a new test merge commit. This is needed to actually determine the state of a PR. | |
const pull_request = (await github.rest.pulls.get({ | |
...context.repo, | |
pull_number | |
})).data | |
const run_id = (await github.rest.actions.listWorkflowRuns({ | |
...context.repo, | |
workflow_id: 'pr.yml', | |
event: 'pull_request_target', | |
// In pull_request contexts the workflow is still running. | |
status: context.payload.pull_request ? undefined : 'success', | |
exclude_pull_requests: true, | |
head_sha: pull_request.head.sha | |
})).data.workflow_runs[0]?.id ?? | |
// TODO: Remove this after 2025-09-17, at which point all eval.yml artifacts will have expired. | |
(await github.rest.actions.listWorkflowRuns({ | |
...context.repo, | |
// In older PRs, we need eval.yml instead of pr.yml. | |
workflow_id: 'eval.yml', | |
event: 'pull_request_target', | |
status: 'success', | |
exclude_pull_requests: true, | |
head_sha: pull_request.head.sha | |
})).data.workflow_runs[0]?.id | |
// Newer PRs might not have run Eval to completion, yet. | |
// Older PRs might not have an eval.yml workflow, yet. | |
// In either case we continue without fetching an artifact on a best-effort basis. | |
log('Last eval run', run_id ?? '<n/a>') | |
const artifact = run_id && (await github.rest.actions.listWorkflowRunArtifacts({ | |
...context.repo, | |
run_id, | |
name: 'comparison' | |
})).data.artifacts[0] | |
// Instead of checking the boolean artifact.expired, we will give us a minute to | |
// actually download the artifact in the next step and avoid that race condition. | |
// Older PRs, where the workflow run was already eval.yml, but the artifact was not | |
// called "comparison", yet, will skip the download. | |
const expired = !artifact || new Date(artifact?.expires_at ?? 0) < new Date(new Date().getTime() + 60 * 1000) | |
log('Artifact expires at', artifact?.expires_at ?? '<n/a>') | |
if (!expired) { | |
stats.artifacts++ | |
await artifactClient.downloadArtifact(artifact.id, { | |
findBy: { | |
repositoryName: context.repo.repo, | |
repositoryOwner: context.repo.owner, | |
token: core.getInput('github-token') | |
}, | |
path: path.resolve(pull_number.toString()), | |
expectedHash: artifact.digest | |
}) | |
} | |
// Create a map (Label -> Boolean) of all currently set labels. | |
// Each label is set to True and can be disabled later. | |
const before = Object.fromEntries( | |
(await github.paginate(github.rest.issues.listLabelsOnIssue, { | |
...context.repo, | |
issue_number | |
})) | |
.map(({ name }) => [name, true]) | |
) | |
const approvals = new Set( | |
(await github.paginate(github.rest.pulls.listReviews, { | |
...context.repo, | |
pull_number | |
})) | |
.filter(review => review.state == 'APPROVED') | |
.map(review => review.user?.id) | |
) | |
const latest_event_at = new Date( | |
(await github.paginate( | |
github.rest.issues.listEventsForTimeline, | |
{ | |
...context.repo, | |
issue_number, | |
per_page: 100 | |
} | |
)) | |
.filter(({ event }) => [ | |
// These events are hand-picked from: | |
// https://docs.github.com/en/rest/using-the-rest-api/issue-event-types?apiVersion=2022-11-28 | |
// Each of those causes a PR/issue to *not* be considered as stale anymore. | |
// Most of these use created_at. | |
'assigned', | |
'commented', // uses updated_at, because that could be > created_at | |
'committed', // uses committer.date | |
'head_ref_force_pushed', | |
'milestoned', | |
'pinned', | |
'ready_for_review', | |
'renamed', | |
'reopened', | |
'review_dismissed', | |
'review_requested', | |
'reviewed', // uses submitted_at | |
'unlocked', | |
'unmarked_as_duplicate', | |
].includes(event)) | |
.map(({ created_at, updated_at, committer, submitted_at }) => new Date(updated_at ?? created_at ?? submitted_at ?? committer.date)) | |
// Reverse sort by date value. The default sort() sorts by string representation, which is bad for dates. | |
.sort((a,b) => b-a) | |
.at(0) ?? item.created_at | |
) | |
const stale_at = new Date(new Date().setDate(new Date().getDate() - 180)) | |
// After creation of a Pull Request, `merge_commit_sha` will be null initially: | |
// The very first merge commit will only be calculated after a little while. | |
// To avoid labeling the PR as conflicted before that, we wait a few minutes. | |
// This is intentionally less than the time that Eval takes, so that the label job | |
// running after Eval can indeed label the PR as conflicted if that is the case. | |
const merge_commit_sha_valid = new Date() - new Date(pull_request.created_at) > 3 * 60 * 1000 | |
// Manage most of the labels, without eval results | |
const after = Object.assign( | |
{}, | |
before, | |
{ | |
// We intentionally don't use the mergeable or mergeable_state attributes. | |
// Those have an intermediate state while the test merge commit is created. | |
// This doesn't work well for us, because we might have just triggered another | |
// test merge commit creation by request the pull request via API at the start | |
// of this function. | |
// The attribute merge_commit_sha keeps the old value of null or the hash *until* | |
// the new test merge commit has either successfully been created or failed so. | |
// This essentially means we are updating the merge conflict label in two steps: | |
// On the first pass of the day, we just fetch the pull request, which triggers | |
// the creation. At this stage, the label is likely not updated, yet. | |
// The second pass will then read the result from the first pass and set the label. | |
'2.status: merge conflict': merge_commit_sha_valid && !pull_request.merge_commit_sha, | |
'2.status: stale': !before['1.severity: security'] && latest_event_at < stale_at, | |
'12.approvals: 1': approvals.size == 1, | |
'12.approvals: 2': approvals.size == 2, | |
'12.approvals: 3+': approvals.size >= 3, | |
'12.first-time contribution': | |
[ 'NONE', 'FIRST_TIMER', 'FIRST_TIME_CONTRIBUTOR' ].includes(pull_request.author_association), | |
} | |
) | |
// Manage labels based on eval results | |
if (!expired) { | |
const maintainers = new Set(Object.keys( | |
JSON.parse(await readFile(`${pull_number}/maintainers.json`, 'utf-8')) | |
).map(m => Number.parseInt(m, 10))) | |
const evalLabels = JSON.parse(await readFile(`${pull_number}/changed-paths.json`, 'utf-8')).labels | |
Object.assign( | |
after, | |
// Ignore `evalLabels` if it's an array. | |
// This can happen for older eval runs, before we switched to objects. | |
// The old eval labels would have been set by the eval run, | |
// so now they'll be present in `before`. | |
// TODO: Simplify once old eval results have expired (~2025-10) | |
(Array.isArray(evalLabels) ? undefined : evalLabels), | |
{ | |
'12.approved-by: package-maintainer': Array.from(maintainers).some(m => approvals.has(m)), | |
} | |
) | |
} | |
// No need for an API request, if all labels are the same. | |
const hasChanges = Object.keys(after).some(name => (before[name] ?? false) != after[name]) | |
if (log('Has changes', hasChanges, !hasChanges)) | |
return; | |
// Skipping labeling on a pull_request event, because we have no privileges. | |
const labels = Object.entries(after).filter(([,value]) => value).map(([name]) => name) | |
if (log('Set labels', labels, context.eventName == 'pull_request')) | |
return; | |
await github.rest.issues.setLabels({ | |
...context.repo, | |
issue_number, | |
labels | |
}) | |
} catch (cause) { | |
throw new Error(`Labeling #${item.number} failed.`, { cause }) | |
} | |
} | |
try { | |
if (context.payload.pull_request) { | |
await handle(context.payload.pull_request) | |
} else { | |
const workflowData = (await github.rest.actions.listWorkflowRuns({ | |
...context.repo, | |
workflow_id: 'labels.yml', | |
event: 'schedule', | |
status: 'success', | |
exclude_pull_requests: true, | |
per_page: 1 | |
})).data | |
// Go back as far as the last successful run of this workflow to make sure | |
// we are not leaving anyone behind on GHA failures. | |
// Defaults to go back 1 hour on the first run. | |
const cutoff = new Date(workflowData.workflow_runs[0]?.created_at ?? new Date().getTime() - 1 * 60 * 60 * 1000) | |
core.info('cutoff timestamp: ' + cutoff.toISOString()) | |
const updatedItems = await github.paginate( | |
github.rest.search.issuesAndPullRequests, | |
{ | |
q: [ | |
`repo:"${process.env.GITHUB_REPOSITORY}"`, | |
'type:pr', | |
'is:open', | |
`updated:>=${cutoff.toISOString()}` | |
].join(' AND '), | |
// TODO: Remove in 2025-10, when it becomes the default. | |
advanced_search: true | |
} | |
) | |
// The search endpoint only allows fetching the first 1000 records, but the | |
// pull request list endpoint does not support counting the total number | |
// of results. | |
// Thus, we use /search for counting and /pulls for reading the response. | |
const { total_count: total_pulls } = (await github.rest.search.issuesAndPullRequests({ | |
q: [ | |
`repo:"${process.env.GITHUB_REPOSITORY}"`, | |
'type:pr', | |
'is:open' | |
].join(' AND '), | |
sort: 'created', | |
direction: 'asc', | |
// TODO: Remove in 2025-10, when it becomes the default. | |
advanced_search: true, | |
per_page: 1 | |
})).data | |
const { total_count: total_runs } = workflowData | |
const allPulls = (await github.rest.pulls.list({ | |
...context.repo, | |
state: 'open', | |
sort: 'created', | |
direction: 'asc', | |
per_page: 100, | |
// We iterate through pages of 100 items across scheduled runs. With currently ~7000 open PRs and | |
// up to 6*24=144 scheduled runs per day, we hit every PR twice each day. | |
// We might not hit every PR on one iteration, because the pages will shift slightly when | |
// PRs are closed or merged. We assume this to be OK on the bigger scale, because a PR which was | |
// missed once, would have to move through the whole page to be missed again. This is very unlikely, | |
// so it should certainly be hit on the next iteration. | |
// TODO: Evaluate after a while, whether the above holds still true and potentially implement | |
// an overlap between runs. | |
page: total_runs % Math.ceil(total_pulls / 100) | |
})).data | |
// Some items might be in both search results, so filtering out duplicates as well. | |
const items = [].concat(updatedItems, allPulls) | |
.filter((thisItem, idx, arr) => idx == arr.findIndex(firstItem => firstItem.number == thisItem.number)) | |
;(await Promise.allSettled(items.map(handle))) | |
.filter(({ status }) => status == 'rejected') | |
.map(({ reason }) => core.setFailed(`${reason.message}\n${reason.cause.stack}`)) | |
core.notice(`Processed ${stats.prs} PRs, made ${stats.requests + stats.artifacts} API requests and downloaded ${stats.artifacts} artifacts.`) | |
} | |
} finally { | |
clearInterval(reservoirUpdater) | |
} | |
- name: Log current API rate limits | |
env: | |
GH_TOKEN: ${{ steps.app-token.outputs.token || github.token }} | |
run: gh api /rate_limit | jq | |
- uses: actions/labeler@8558fd74291d67161a8a78ce36a881fa63b766a9 # v5.0.0 | |
name: Labels from touched files | |
if: | | |
github.event_name == 'pull_request_target' && | |
!contains(fromJSON(inputs.headBranch).type, 'development') | |
with: | |
repo-token: ${{ steps.app-token.outputs.token }} | |
configuration-path: .github/labeler.yml # default | |
sync-labels: true | |
- uses: actions/labeler@8558fd74291d67161a8a78ce36a881fa63b766a9 # v5.0.0 | |
name: Labels from touched files (no sync) | |
if: | | |
github.event_name == 'pull_request_target' && | |
!contains(fromJSON(inputs.headBranch).type, 'development') | |
with: | |
repo-token: ${{ steps.app-token.outputs.token }} | |
configuration-path: .github/labeler-no-sync.yml | |
sync-labels: false | |
- uses: actions/labeler@8558fd74291d67161a8a78ce36a881fa63b766a9 # v5.0.0 | |
name: Labels from touched files (development branches) | |
# Development branches like staging-next, haskell-updates and python-updates get special labels. | |
# This is to avoid the mass of labels there, which is mostly useless - and really annoying for | |
# the backport labels. | |
if: | | |
github.event_name == 'pull_request_target' && | |
contains(fromJSON(inputs.headBranch).type, 'development') | |
with: | |
repo-token: ${{ steps.app-token.outputs.token }} | |
configuration-path: .github/labeler-development-branches.yml | |
sync-labels: true | |
- name: Log current API rate limits | |
env: | |
GH_TOKEN: ${{ steps.app-token.outputs.token || github.token }} | |
run: gh api /rate_limit | jq |