Skip to content

Commit bfa315c

Browse files
authored
Make all GitHub Actions cache service HTTP errors non-fatal, and add scheduled live test (#1306)
HTTP errors from the GitHub Actions cache service are no longer ever fatal, and should always continue gracefully with GitHub Actions Caching temporarily disabled. Previously only network errors and HTTP 429/503 errors failed gracefully. This should mean that if something like #1297 happens again (where we started getting 422 "Unprocessable" errors because of a breaking change to their API), users won't be broken. Also adds a scheduled daily test of the live service (just one platform, morning PT), since the above change would otherwise reduce the timeliness of us finding out about a problem with the production service.
1 parent a2dc284 commit bfa315c

File tree

4 files changed

+203
-48
lines changed

4 files changed

+203
-48
lines changed
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
name: Test real GitHub Actions cache service
2+
3+
on:
4+
schedule:
5+
- cron: '14 7 * * *' # 6:07AM PST / 7:07AM PDT
6+
7+
jobs:
8+
tests:
9+
strategy:
10+
matrix:
11+
include:
12+
- node: 20 # LTS
13+
os: ubuntu-22.04
14+
15+
runs-on: ${{ matrix.os }}
16+
17+
env:
18+
WIREIT_LOGGER: 'quiet-ci'
19+
20+
steps:
21+
- uses: actions/checkout@v4
22+
- uses: actions/setup-node@v4
23+
with:
24+
node-version: ${{ matrix.node }}
25+
cache: npm
26+
27+
# Note we intentionally don't enable Wireit caching for this workflow,
28+
# because we're looking for problems coming from the real production
29+
# GitHub Actions cache service, which is independent of any of our files.
30+
# - uses: google/wireit@setup-github-actions-caching/v2
31+
32+
- run: npm ci
33+
- run: npm run test:cache-github-real

CHANGELOG.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic
77
Versioning](https://semver.org/spec/v2.0.0.html).
88

9-
<!-- ## Unreleased -->
9+
## Unreleased
10+
11+
- HTTP errors from the GitHub Actions cache service are no longer ever fatal,
12+
and should always continue gracefully with GitHub Actions caching temporarily
13+
disabled (previously only network errors and HTTP 429/503 errors failed
14+
gracefully).
1015

1116
## [0.14.12] - 2025-04-10
1217

src/caching/github-actions-cache.ts

Lines changed: 53 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ import type {Result} from '../error.js';
2727
import type {InvalidUsage, UnknownErrorThrown} from '../event.js';
2828

2929
/**
30-
* Caches script output to the GitHub Actions caching service.
30+
* Caches script output to the GitHub Actions cache service.
3131
*/
3232
export class GitHubActionsCache implements Cache {
3333
readonly #baseUrl: string;
@@ -504,59 +504,66 @@ ${blockIds.map((blockId) => ` <Uncommitted>${blockId}</Uncommitted>`).join('\n'
504504
}
505505

506506
/**
507-
* If we received an error that indicates something is wrong with the GitHub
508-
* Actions service that is not our fault, log an error and return false.
509-
* Otherwise return true.
507+
* If we received a network or HTTP error from the given HTTP request, set the
508+
* global flag to indicate that GitHub Actions Caching is down (it's a class
509+
* property, but this class should be a global singleton), log an error
510+
* (possibly asynchronously), and return false. If the request was OK, just
511+
* return true.
510512
*/
511513
#maybeHandleServiceDown(
512514
res: Result<http.IncomingMessage, Error>,
513515
script: ScriptReference,
514516
): res is {ok: true; value: http.IncomingMessage} {
517+
const status = res.ok ? res.value.statusCode : null;
518+
if (res.ok && status != null && status >= 200 && status <= 299) {
519+
return true;
520+
}
521+
522+
if (this.#serviceIsDown) {
523+
// Reduce noise; just whatever the first error was is OK.
524+
//
525+
// Note that this cache can be accessed concurrently, so even though we
526+
// stop making HTTP requests after setting this flag, there could be >1
527+
// pending requests out at the same time before the first error is
528+
// detected.
529+
return false;
530+
}
531+
this.#serviceIsDown = true;
532+
515533
if (!res.ok) {
516-
if (!this.#serviceIsDown) {
534+
this.#logger.log({
535+
script,
536+
type: 'info',
537+
detail: 'cache-info',
538+
message:
539+
`Network error from GitHub Actions cache service.` +
540+
` GitHub Actions caching has been temporarily disabled.` +
541+
` Detail:\n\n${res.error}`,
542+
});
543+
} else {
544+
void (async () => {
545+
const body = await readBody(res.value).catch(() => '');
546+
if (this.#serviceIsDown) {
547+
return;
548+
}
549+
const message =
550+
status === 429
551+
? `Hit GitHub Actions cache service rate limit`
552+
: status === 503
553+
? `GitHub Actions cache service is temporarily unavailable`
554+
: `Unexpected HTTP ${status} error from GitHub Actions cache service`;
517555
this.#logger.log({
518556
script,
519557
type: 'info',
520558
detail: 'cache-info',
521559
message:
522-
`Connection error from GitHub Actions service, caching disabled. ` +
523-
'Detail: ' +
524-
('code' in res.error
525-
? `${(res.error as Error & {code: string}).code} `
526-
: '') +
527-
res.error.message,
560+
`${message}.` +
561+
` GitHub Actions caching has been temporarily disabled.` +
562+
` Detail:\n\nHTTP ${status}: ${body}`,
528563
});
529-
}
530-
} else {
531-
switch (res.value.statusCode) {
532-
case /* Too Many Requests */ 429: {
533-
if (!this.#serviceIsDown) {
534-
this.#logger.log({
535-
script,
536-
type: 'info',
537-
detail: 'cache-info',
538-
message: `Hit GitHub Actions cache rate limit, caching disabled.`,
539-
});
540-
}
541-
break;
542-
}
543-
case /* Service Unavailable */ 503: {
544-
if (!this.#serviceIsDown) {
545-
this.#logger.log({
546-
script,
547-
type: 'info',
548-
detail: 'cache-info',
549-
message: `GitHub Actions service is unavailable, caching disabled.`,
550-
});
551-
}
552-
break;
553-
}
554-
default: {
555-
return true;
556-
}
557-
}
564+
})();
558565
}
559-
this.#serviceIsDown = true;
566+
560567
return false;
561568
}
562569

@@ -678,6 +685,11 @@ ${blockIds.map((blockId) => ` <Uncommitted>${blockId}</Uncommitted>`).join('\n'
678685
req.end(bodyBuffer);
679686

680687
const result = await resPromise;
688+
if (result.ok && result.value.statusCode === /* Conflict */ 409) {
689+
// We must have been racing with another concurrent process running the
690+
// same script.
691+
return undefined;
692+
}
681693
if (!this.#maybeHandleServiceDown(result, script)) {
682694
return undefined;
683695
}
@@ -690,10 +702,6 @@ ${blockIds.map((blockId) => ` <Uncommitted>${blockId}</Uncommitted>`).join('\n'
690702
return resData.signed_upload_url;
691703
}
692704

693-
if (response.statusCode === /* Conflict */ 409) {
694-
return undefined;
695-
}
696-
697705
throw new Error(
698706
`GitHub Cache reserve HTTP ${String(
699707
response.statusCode,

src/test/cache-github-fake.test.ts

Lines changed: 111 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -234,9 +234,118 @@ function assertSuccess(exitResult: ExitResult) {
234234
}
235235
}
236236

237-
for (const code of [429, 503, 'ECONNRESET'] as const) {
237+
test(
238+
`gracefully handles 409 conflict on set()`,
239+
rigTest(async ({rig, server}) => {
240+
const cmdA = await rig.newCommand();
241+
const cmdB = await rig.newCommand();
242+
await rig.write({
243+
'package.json': {
244+
scripts: {
245+
a: 'wireit',
246+
b: 'wireit',
247+
},
248+
wireit: {
249+
a: {
250+
command: cmdA.command,
251+
files: ['input'],
252+
output: [],
253+
dependencies: ['b'],
254+
},
255+
b: {
256+
command: cmdB.command,
257+
files: ['input'],
258+
output: [],
259+
},
260+
},
261+
},
262+
input: 'foo',
263+
});
264+
265+
server.forceErrorOnNextRequest('createCacheEntry', 409);
266+
const exec = rig.exec('npm run a');
267+
(await cmdB.nextInvocation()).exit(0);
268+
(await cmdA.nextInvocation()).exit(0);
269+
assertSuccess(await exec.exit);
270+
assert.equal(server.metrics, {
271+
// Both scripts should check for and then try to reserve a cache entry ...
272+
getCacheEntry: 2,
273+
createCacheEntry: 2,
274+
// ... but since the first one will fail to reserve with a 409, only the
275+
// second one will actually finish uploading its entry.
276+
putBlobBlock: 1,
277+
putBlobBlockList: 1,
278+
finalizeCacheEntry: 1,
279+
getBlob: 0,
280+
} satisfies FakeGitHubActionsCacheServerMetrics);
281+
assert.equal(cmdA.numInvocations, 1);
282+
assert.equal(cmdA.numInvocations, 1);
283+
}),
284+
);
285+
286+
const randomInt = (minIncl: number, maxExcl: number) =>
287+
minIncl + Math.floor(Math.random() * (maxExcl - minIncl));
288+
289+
for (const code of [
290+
'ECONNRESET',
291+
(() => {
292+
while (true) {
293+
const status = randomInt(400, 500);
294+
if (status !== /* Conflict has special meaning (see above test) */ 409) {
295+
return status;
296+
}
297+
}
298+
})(),
299+
randomInt(500, 600),
300+
] as const) {
301+
test(
302+
`recovers from ${code} error within get()`,
303+
rigTest(async ({rig, server}) => {
304+
const cmdA = await rig.newCommand();
305+
const cmdB = await rig.newCommand();
306+
await rig.write({
307+
'package.json': {
308+
scripts: {
309+
a: 'wireit',
310+
b: 'wireit',
311+
},
312+
wireit: {
313+
a: {
314+
command: cmdA.command,
315+
files: ['input'],
316+
output: [],
317+
dependencies: ['b'],
318+
},
319+
b: {
320+
command: cmdB.command,
321+
files: ['input'],
322+
output: [],
323+
},
324+
},
325+
},
326+
input: 'foo',
327+
});
328+
329+
server.forceErrorOnNextRequest('getCacheEntry', code);
330+
const exec = rig.exec('npm run a');
331+
(await cmdB.nextInvocation()).exit(0);
332+
(await cmdA.nextInvocation()).exit(0);
333+
assertSuccess(await exec.exit);
334+
assert.equal(server.metrics, {
335+
getCacheEntry: 1,
336+
createCacheEntry: 0,
337+
putBlobBlock: 0,
338+
putBlobBlockList: 0,
339+
finalizeCacheEntry: 0,
340+
getBlob: 0,
341+
} satisfies FakeGitHubActionsCacheServerMetrics);
342+
assert.equal(cmdA.numInvocations, 1);
343+
assert.equal(cmdA.numInvocations, 1);
344+
}),
345+
);
346+
238347
test(
239-
`recovers from ${code} error`,
348+
`recovers from ${code} error within set()`,
240349
rigTest(async ({rig, server}) => {
241350
await rig.write({
242351
'package.json': {

0 commit comments

Comments
 (0)