55Samples in-progress and queued GitHub-hosted runner jobs for a repo and
66returns a structured snapshot suitable for the health dashboard.
77
8- The Slang org runs on the public-repo 20-concurrent-runner cap, shared
9- across every hosted-runner label (ubuntu-*, macos-*, windows-*, etc.).
10- When usage approaches the cap, gating jobs starve and the merge queue
11- stalls. See shader-slang/slang#11142 for background.
8+ The Slang org's GitHub-hosted-runner concurrency cap is shared across
9+ every hosted-runner label (ubuntu-*, macos-*, windows-*, etc.) and is
10+ set by the org's GitHub plan tier, not by anything we configure. When
11+ usage approaches the cap, gating jobs starve and the merge queue stalls.
12+ See shader-slang/slang#11142 for background.
13+
14+ The cap is queried dynamically from the org's plan (see
15+ `fetch_org_plan_cap`) so that a plan upgrade — e.g. Free (20) -> Team
16+ (60) — is picked up automatically instead of silently reporting against
17+ a stale hard-coded number. `DEFAULT_HOSTED_RUNNER_CAP` is only the
18+ fallback used when that query fails.
1219
1320CLI usage:
1421 python3 ci_hosted_runner_usage.py
15- python3 ci_hosted_runner_usage.py --repo shader-slang/slang --cap 20
22+ python3 ci_hosted_runner_usage.py --repo shader-slang/slang # cap auto-detected
23+ python3 ci_hosted_runner_usage.py --repo shader-slang/slang --cap 60
1624"""
1725
1826import argparse
2230from concurrent .futures import ThreadPoolExecutor , as_completed
2331
2432sys .path .insert (0 , os .path .join (os .path .dirname (os .path .abspath (__file__ )), ".." ))
25- from gh_api import gh_api_list
33+ from gh_api import gh_api , gh_api_list
2634
2735DEFAULT_REPO = "shader-slang/slang"
2836
29- # The Slang org runs on the standard public-repo concurrent-runner cap
30- # of 20 hosted runners shared across all labels. The cap is per-org,
31- # not per-label.
32- DEFAULT_HOSTED_RUNNER_CAP = 20
37+ # GitHub's standard concurrent-runner cap for GitHub-hosted runners, by
38+ # plan tier. This is the total number of hosted runners an account can
39+ # run at once across all labels; it is a per-account limit, not
40+ # per-label and not per-repo. Values are GitHub's published standard
41+ # limits (https://docs.github.com/actions/reference/usage-limits).
42+ PLAN_TIER_HOSTED_RUNNER_CAP = {
43+ "free" : 20 ,
44+ "team" : 60 ,
45+ "enterprise" : 180 ,
46+ }
47+
48+ # Fallback cap used only when the org plan cannot be queried. Set to the
49+ # Team-tier value because the Slang org is on GitHub Team (60 concurrent
50+ # hosted runners); see the plan map above.
51+ DEFAULT_HOSTED_RUNNER_CAP = PLAN_TIER_HOSTED_RUNNER_CAP ["team" ]
3352
3453HOSTED_LABEL_PREFIXES = ("ubuntu-" , "macos-" , "windows-" )
3554
@@ -190,16 +209,87 @@ def summarize(jobs):
190209 }
191210
192211
193- def sample_hosted_runner_usage (repo , cap = DEFAULT_HOSTED_RUNNER_CAP ):
212+ def org_from_repo (repo ):
213+ """Return the org/owner portion of an `owner/name` repo string.
214+
215+ e.g. `"shader-slang/slang"` -> `"shader-slang"`. Returns the input
216+ unchanged if it carries no `/`, so a bare org name also works.
217+ """
218+ return repo .split ("/" , 1 )[0 ] if repo else repo
219+
220+
221+ def fetch_org_plan_cap (org ):
222+ """Look up the GitHub-hosted-runner concurrency cap for `org` from its
223+ plan tier, or None if it can't be determined.
224+
225+ The concurrency cap isn't exposed directly by any API, but it is a
226+ fixed function of the org's GitHub plan (Free -> 20, Team -> 60,
227+ Enterprise -> 180). We read `orgs/<org>.plan.name` and map it through
228+ `PLAN_TIER_HOSTED_RUNNER_CAP`. Querying the plan requires the token to
229+ have org visibility (an org owner/member token); an external token
230+ sees no `plan` field, in which case this returns None and the caller
231+ falls back to `DEFAULT_HOSTED_RUNNER_CAP`.
232+
233+ Returns None (never raises) on any API error, missing plan, or
234+ unrecognized tier, so it is safe to call from the sampler's happy
235+ path.
236+ """
237+ if not org :
238+ return None
239+ data , err = gh_api (f"orgs/{ org } " )
240+ if err or not isinstance (data , dict ):
241+ print (
242+ f"Warning: could not query plan for org { org } : "
243+ f"{ err or 'unexpected response' } ; using fallback cap." ,
244+ file = sys .stderr ,
245+ )
246+ return None
247+ plan = data .get ("plan" )
248+ tier = plan .get ("name" ) if isinstance (plan , dict ) else None
249+ if not tier :
250+ # No `plan` field means the token lacks org visibility. Don't warn
251+ # loudly — this is expected for external/fork tokens.
252+ return None
253+ cap = PLAN_TIER_HOSTED_RUNNER_CAP .get (tier .lower ())
254+ if cap is None :
255+ print (
256+ f"Warning: unrecognized GitHub plan tier { tier !r} for org "
257+ f"{ org } ; using fallback cap." ,
258+ file = sys .stderr ,
259+ )
260+ return None
261+ return cap
262+
263+
264+ def resolve_hosted_runner_cap (repo ):
265+ """Return the hosted-runner cap to report against for `repo`.
266+
267+ Prefers the cap derived from the org's live plan tier
268+ (`fetch_org_plan_cap`) and falls back to `DEFAULT_HOSTED_RUNNER_CAP`
269+ when the plan can't be queried. Kept separate from
270+ `sample_hosted_runner_usage` so the CLI and health run can resolve the
271+ cap once and log which value they landed on.
272+ """
273+ return fetch_org_plan_cap (org_from_repo (repo )) or DEFAULT_HOSTED_RUNNER_CAP
274+
275+
276+ def sample_hosted_runner_usage (repo , cap = None ):
194277 """Sample current hosted-runner usage for `repo`.
195278
279+ `cap` is the concurrency cap to report against. When None (the
280+ default), it is auto-detected from the org's plan tier via
281+ `resolve_hosted_runner_cap`; pass an explicit integer to override
282+ (e.g. from the `--cap` CLI flag or a test).
283+
196284 Returns a dict suitable for embedding in the health snapshot:
197285 {
198- "cap": 20 ,
286+ "cap": 60 ,
199287 "in_progress": { total, by_workflow, by_label },
200288 "queued": { total, by_workflow, by_label },
201289 }
202290 """
291+ if cap is None :
292+ cap = resolve_hosted_runner_cap (repo )
203293 in_progress_runs , ip_list_err = fetch_in_progress_runs (repo )
204294 queued_runs , q_list_err = fetch_queued_runs (repo )
205295
@@ -249,7 +339,8 @@ def parse_args():
249339 description = (
250340 "Sample GitHub-hosted runner usage for a repo, broken down "
251341 "by workflow and label. Aimed at detecting impending "
252- "20-runner-cap exhaustion before it stalls the merge queue."
342+ "runner-cap exhaustion before it stalls the merge queue. The "
343+ "cap is auto-detected from the org's GitHub plan tier."
253344 )
254345 )
255346 parser .add_argument (
@@ -260,10 +351,12 @@ def parse_args():
260351 parser .add_argument (
261352 "--cap" ,
262353 type = int ,
263- default = DEFAULT_HOSTED_RUNNER_CAP ,
354+ default = None ,
264355 help = (
265- f"Hosted-runner concurrency cap to report against "
266- f"(default: { DEFAULT_HOSTED_RUNNER_CAP } , the standard public-repo limit)"
356+ "Hosted-runner concurrency cap to report against. Default: "
357+ "auto-detected from the org's GitHub plan tier (Free=20, "
358+ f"Team=60, Enterprise=180; fallback { DEFAULT_HOSTED_RUNNER_CAP } "
359+ "if the plan can't be queried)."
267360 ),
268361 )
269362 parser .add_argument (
0 commit comments