Skip to content

Commit c4bcde0

Browse files
authored
feat: set a warning message on forbidden pages instead of blocking (#436)
* feat: set a warning message on forbidden pages instead of blocking update * send error to mattermost * fix: delete 403 files * remove color from release
1 parent 55fe3bc commit c4bcde0

7 files changed

Lines changed: 117 additions & 204 deletions

File tree

.github/workflows/release.yml

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ on:
77
workflow_dispatch:
88

99
permissions:
10-
id-token: write # Required for OIDC token generation
10+
id-token: write # Required for OIDC token generation
1111
contents: write
1212

1313
jobs:
@@ -62,6 +62,21 @@ jobs:
6262
- name: Check Refs
6363
run: yarn checkRefs
6464

65+
- name: Check for 403 errors
66+
id: check_403
67+
run: |
68+
if [ -f "./forbidden-urls.json" ]; then
69+
echo "has_403=true" >> $GITHUB_OUTPUT
70+
FORBIDDEN_CONTENT=$(cat ./forbidden-urls.json)
71+
# Escape newlines and other special characters for GitHub Actions
72+
FORBIDDEN_CONTENT="${FORBIDDEN_CONTENT//'%'/'%25'}"
73+
FORBIDDEN_CONTENT="${FORBIDDEN_CONTENT//$'\n'/'%0A'}"
74+
FORBIDDEN_CONTENT="${FORBIDDEN_CONTENT//$'\r'/'%0D'}"
75+
echo "forbidden_urls=$FORBIDDEN_CONTENT" >> $GITHUB_OUTPUT
76+
else
77+
echo "has_403=false" >> $GITHUB_OUTPUT
78+
fi
79+
6580
- name: Get metadata
6681
id: metadata
6782
shell: bash
@@ -115,10 +130,24 @@ jobs:
115130
GITHUB_TOKEN: ${{ steps.token.outputs.token }}
116131
NPM_TOKEN: ${{ secrets.SOCIALGROOVYBOT_NPM_TOKEN }}
117132

133+
- uses: mattermost/action-mattermost-notify@master
134+
if: steps.check_403.outputs.has_403 == 'true'
135+
with:
136+
MATTERMOST_WEBHOOK_URL: ${{ secrets.MATTERMOST_WEBHOOK_URL }}
137+
TEXT: |
138+
⚠️ **Avertissement** : Certaines pages ont retourné une erreur 403 (Forbidden) lors du scraping et ont été ignorées.
139+
140+
```json
141+
${{ steps.check_403.outputs.forbidden_urls }}
142+
```
143+
144+
Le processus de mise à jour a continué normalement.
145+
[Les logs complets sont disponibles ici](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})
146+
118147
- uses: mattermost/action-mattermost-notify@master
119148
if: failure()
120149
with:
121150
MATTERMOST_WEBHOOK_URL: ${{ secrets.MATTERMOST_WEBHOOK_URL }}
122151
TEXT: |
123-
La mise à jour du dépôt fiches-travail-data a échoué.
152+
La mise à jour du dépôt fiches-travail-data a échoué.
124153
[Les logs sont disponibles ici](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})

data/fiches-travail.json

Lines changed: 0 additions & 194 deletions
Large diffs are not rendered by default.

local.data.json

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -500,10 +500,6 @@
500500
"id": "article373014",
501501
"url": "https://travail-emploi.gouv.fr/les-titres-professionnels"
502502
},
503-
{
504-
"id": "article373015",
505-
"url": "https://travail-emploi.gouv.fr/le-fonds-national-de-lemploi-formation-fne-formation"
506-
},
507503
{
508504
"id": "article373029",
509505
"url": "https://travail-emploi.gouv.fr/lallocation-de-solidarite-specifique-ass"

src/fetch-data/__tests__/index.test.js

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,17 @@ scrapUrl.mockImplementation((id, url) => {
1515
if (url === "url.fail") {
1616
const error = new Error(`xxx ${url} fail`);
1717
error.url = url;
18-
return Promise.reject(url);
18+
return Promise.reject(error);
19+
}
20+
if (url === "url.forbidden") {
21+
const error = new Error(`HTTP Error: 403 - ${url} - Forbidden`);
22+
error.url = url;
23+
error.isForbidden = true;
24+
return Promise.reject(error);
1925
}
2026
});
2127

22-
test("scrap should throw if some scrapped pages failed", async () => {
28+
test("scrap should throw if some scrapped pages failed with non-403 errors", async () => {
2329
await expect(
2430
scrap([
2531
{ id: "url.a", url: "url.sections" },
@@ -28,6 +34,27 @@ test("scrap should throw if some scrapped pages failed", async () => {
2834
).rejects.toThrowError(/fetching pages fail/);
2935
});
3036

37+
test("scrap should not throw if some scrapped pages failed with 403 errors", async () => {
38+
// Mock console.warn to verify it's called
39+
const consoleWarnSpy = jest
40+
.spyOn(console, "warn")
41+
.mockImplementation(jest.fn);
42+
43+
const result = await scrap([
44+
{ id: "url.a", url: "url.sections" },
45+
{ id: "noid", url: "url.forbidden" },
46+
]);
47+
48+
// Verify that console.warn was called
49+
expect(consoleWarnSpy).toHaveBeenCalled();
50+
// Verify that we still get the successful results
51+
expect(result).toHaveLength(1);
52+
expect(result[0].pubId).toBe("url.a");
53+
54+
// Restore the original console.warn
55+
consoleWarnSpy.mockRestore();
56+
});
57+
3158
test("scrap should throw if some scrapped pages have same id", async () => {
3259
await expect(
3360
scrap([

src/fetch-data/__tests__/scrapUrl.test.js

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,16 @@ got.mockImplementation((url) => {
3131
error.name = "HTTPError";
3232
return Promise.reject(error);
3333
}
34+
if (url.startsWith("url.forbidden")) {
35+
const error = new HTTPError();
36+
error.response = {
37+
statusCode: 403,
38+
};
39+
error.options = { url: { href: "url.forbidden" } };
40+
error.message = "Forbidden";
41+
error.name = "HTTPError";
42+
return Promise.reject(error);
43+
}
3444
if (url.startsWith("url.parse.fail")) {
3545
const error = new ParseError();
3646
error.message = "parse fail";
@@ -89,4 +99,15 @@ describe("scrapUrl", () => {
8999
/Parsing Error/
90100
);
91101
});
102+
103+
test("scrapUrl should set isForbidden property for 403 errors", async () => {
104+
process.env.TOKEN_MT = "TOKEN";
105+
try {
106+
await scrapUrl("id", "url.forbidden");
107+
expect(true).toBe(false); // This will fail the test if no error is thrown
108+
} catch (error) {
109+
expect(error.message).toMatch(/HTTP Error: 403/);
110+
expect(error.isForbidden).toBe(true);
111+
}
112+
});
92113
});

src/fetch-data/index.js

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,40 @@ export async function scrap(urls) {
2222

2323
const failedPromise = results.filter(({ status }) => status === "rejected");
2424

25-
if (failedPromise.length > 0) {
25+
// Separate 403 errors from other errors
26+
const forbiddenErrors = failedPromise.filter(
27+
({ reason }) => reason.isForbidden
28+
);
29+
const otherErrors = failedPromise.filter(({ reason }) => !reason.isForbidden);
30+
31+
// Log 403 errors as warnings and save them to a file for GitHub Actions
32+
if (forbiddenErrors.length > 0) {
33+
const forbiddenUrls = forbiddenErrors.map(({ reason }) => reason.url);
34+
console.warn(
35+
"WARNING: The following pages returned 403 Forbidden and were skipped:",
36+
forbiddenUrls
37+
);
38+
39+
// Write forbidden URLs to a file for GitHub Actions to use
40+
fs.writeFileSync(
41+
path.join(__dirname, "../../forbidden-urls.json"),
42+
JSON.stringify(
43+
{
44+
urls: forbiddenUrls,
45+
timestamp: new Date().toISOString(),
46+
count: forbiddenUrls.length,
47+
},
48+
null,
49+
2
50+
)
51+
);
52+
}
53+
54+
// Only fail if there are non-403 errors
55+
if (otherErrors.length > 0) {
2656
console.error(
2757
"scrap fail",
28-
failedPromise.map(({ reason }) => reason)
58+
otherErrors.map(({ reason }) => reason)
2959
);
3060
throw new Error("Error - fetching pages fail. Some pages are missing");
3161
}

src/fetch-data/scrapUrl.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@ export async function scrapUrl(id, url) {
3434
err = new Error(
3535
`HTTP Error: ${error.response.statusCode} - ${url} - ${error.message}`
3636
);
37+
// Add a property to identify 403 errors specifically
38+
if (error.response.statusCode === 403) {
39+
err.isForbidden = true;
40+
}
3741
} else {
3842
err = new Error(error.message);
3943
}

0 commit comments

Comments
 (0)