Skip to content

Commit 7616fec

Browse files
committed
chore: setup prettier formatter
1 parent f066037 commit 7616fec

File tree

15 files changed

+4867
-2471
lines changed

15 files changed

+4867
-2471
lines changed

browser-extension/README.md

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,19 @@
11
# Extension Navigateur Balance Tes Haters
22

3-
43
Cette extension permet de capturer les commentaires depuis des publications réseau sociaux.
54

6-
7-
85
# Contributing
96

107
Cette extension utilise [wxt](https://wxt.dev/) + React
118

129
## Installation
1310

14-
* Installer pnpm
15-
* `pnpm install` dans le repertoire extension
11+
- Installer pnpm
12+
- `pnpm install` dans le repertoire extension
1613

1714
## Mode dev (avec chrome)
1815

19-
* Lancer `pnpm dev`
20-
* Charger le répertoire .output/chrome-mv3-dev comme extension non empaqueté (see https://developer.chrome.com/docs/extensions/get-started/tutorial/hello-world#load-unpacked)
21-
22-
23-
A partir de là la plupart des changements sont propagé automatiquement dans l'extension navigateur sans besoin de rafraichir manuellement l'extension.
24-
16+
- Lancer `pnpm dev`
17+
- Charger le répertoire .output/chrome-mv3-dev comme extension non empaqueté (see https://developer.chrome.com/docs/extensions/get-started/tutorial/hello-world#load-unpacked)
2518

19+
A partir de là la plupart des changements sont propagé automatiquement dans l'extension navigateur sans besoin de rafraichir manuellement l'extension.

browser-extension/entrypoints/background/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ export default defineBackground(() => {
88

99
async function handleMessages(
1010
message: Message,
11-
sender: Browser.runtime.MessageSender
11+
sender: Browser.runtime.MessageSender,
1212
) {
1313
console.debug("Message received:", message, sender);
1414

Lines changed: 127 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -1,156 +1,156 @@
11
import {
2-
ElementHandle,
3-
Page,
2+
ElementHandle,
3+
Page,
44
} from "puppeteer-core/lib/esm/puppeteer/puppeteer-core-browser.js";
55
import { PuppeteerBaseScraper } from "../puppeteer/puppeteer-base-scraper";
66
import {
7-
type Author,
8-
type Post,
9-
type Comment,
7+
type Author,
8+
type Post,
9+
type Comment,
1010
} from "../../../shared/model/post";
1111
import { parseSocialNetworkUrl } from "@/entrypoints/shared/social-network-url";
1212
import { currentIsoDate } from "../utils/current-iso-date";
1313

1414
//TODO: gérer le scroll et le chargement des commentaires
1515
//TODO: gérer le scraping des réponses aux commentaires
1616
export class InstagramScraper extends PuppeteerBaseScraper {
17-
private INSTAGRAM_URL = "https://www.instagram.com/";
17+
private INSTAGRAM_URL = "https://www.instagram.com/";
1818

19-
extractPostId(url: string): string {
20-
const parsed = parseSocialNetworkUrl(url);
21-
if (!parsed) {
22-
throw new Error("Unexpected");
23-
}
24-
return parsed.postId;
19+
extractPostId(url: string): string {
20+
const parsed = parseSocialNetworkUrl(url);
21+
if (!parsed) {
22+
throw new Error("Unexpected");
2523
}
24+
return parsed.postId;
25+
}
2626

27-
async doScrapTab(tab: Browser.tabs.Tab, page: Page): Promise<Post> {
28-
// //main/div/div/div
29-
const cadre_publication = (await page.$("::-p-xpath(//main/div/div/div)"))!;
30-
const colonne_commentaires = (await cadre_publication.$(
31-
"::-p-xpath(./div[2]/div)"
32-
))!;
27+
async doScrapTab(tab: Browser.tabs.Tab, page: Page): Promise<Post> {
28+
// //main/div/div/div
29+
const cadre_publication = (await page.$("::-p-xpath(//main/div/div/div)"))!;
30+
const colonne_commentaires = (await cadre_publication.$(
31+
"::-p-xpath(./div[2]/div)",
32+
))!;
3333

34-
// //main/div/div/div/./div[2]/div/./div[2]
35-
const zone_defilable = (await colonne_commentaires.$(
36-
"::-p-xpath(./div[2])"
37-
))!;
34+
// //main/div/div/div/./div[2]/div/./div[2]
35+
const zone_defilable = (await colonne_commentaires.$(
36+
"::-p-xpath(./div[2])",
37+
))!;
3838

39-
// //main/div/div/div/./div[2]/div/./div[2]/./div/div[1]/div/div[2]/div/span
40-
const publication = (await zone_defilable.$(
41-
"::-p-xpath(./div/div[1]/div/div[2]/div/span)"
42-
))!;
39+
// //main/div/div/div/./div[2]/div/./div[2]/./div/div[1]/div/div[2]/div/span
40+
const publication = (await zone_defilable.$(
41+
"::-p-xpath(./div/div[1]/div/div[2]/div/span)",
42+
))!;
4343

44-
// //main/div/div/div/./div[2]/div/./div[2]/./div/div[1]/div/div[2]/div/span/./div/div
45-
const entete_publication = (await publication.$("::-p-xpath(./div/div)"))!;
44+
// //main/div/div/div/./div[2]/div/./div[2]/./div/div[1]/div/div[2]/div/span/./div/div
45+
const entete_publication = (await publication.$("::-p-xpath(./div/div)"))!;
4646

47-
// //main/div/div/div/./div[2]/div/./div[2]/./div/div[1]/div/div[2]/div/span/./span[1]
48-
const auteur = await this.get_auteur_from_span(
49-
(await entete_publication.$("::-p-xpath(./span[1])"))!
50-
);
47+
// //main/div/div/div/./div[2]/div/./div[2]/./div/div[1]/div/div[2]/div/span/./span[1]
48+
const auteur = await this.get_auteur_from_span(
49+
(await entete_publication.$("::-p-xpath(./span[1])"))!,
50+
);
5151

52-
// //main/div/div/div/./div[2]/div/./div[2]/./div/div[1]/div/div[2]/div/span/./div/div/.//time
53-
const date_publication = (await entete_publication.$eval(
54-
"::-p-xpath(.//time)",
55-
(node) => node.getAttribute("datetime")
56-
))!;
52+
// //main/div/div/div/./div[2]/div/./div[2]/./div/div[1]/div/div[2]/div/span/./div/div/.//time
53+
const date_publication = (await entete_publication.$eval(
54+
"::-p-xpath(.//time)",
55+
(node) => node.getAttribute("datetime"),
56+
))!;
5757

58-
// //main/div/div/div/./div[2]/div/./div[2]/./div/div[1]/div/div[2]/div/span/./div/span
59-
const texte_publication = (await publication.$eval(
60-
"::-p-xpath(./div/span)",
61-
(node) => node.textContent
62-
))!;
58+
// //main/div/div/div/./div[2]/div/./div[2]/./div/div[1]/div/div[2]/div/span/./div/span
59+
const texte_publication = (await publication.$eval(
60+
"::-p-xpath(./div/span)",
61+
(node) => node.textContent,
62+
))!;
6363

64-
// //main/div/div/div/./div[2]/div/./div[2]/./div/div[3]
65-
const liste_commentaires = (await zone_defilable.$(
66-
"::-p-xpath(./div/div[3])"
67-
))!;
68-
//const commentaires = (await liste_commentaires.$$("::-p-xpath(./div)")).map(e => this.extract_commentaire(e))!;
69-
const commentaires: Comment[] = [];
70-
let div_commentaire = await liste_commentaires.$("::-p-xpath(./div)");
71-
while (div_commentaire) {
72-
await div_commentaire.scrollIntoView();
73-
commentaires.push(await this.extract_commentaire(div_commentaire));
74-
await this.sleep(500);
75-
// gérer un peu mieux le scroll et le temps de chargement des pages de
76-
// commentaires
77-
div_commentaire = await div_commentaire.$(
78-
"::-p-xpath(./following-sibling::*)"
79-
);
80-
// gérer ici le ce scraping des réponses aux commentaires
81-
}
82-
83-
return {
84-
postId: this.extractPostId(tab.url!),
85-
socialNetwork: "INSTAGRAM",
86-
url: tab.url!,
87-
author: auteur,
88-
scrapedAt: new Date().toISOString(),
89-
publishedAt: new Date(date_publication).toISOString(),
90-
textContent: texte_publication,
91-
comments: await Promise.all(commentaires),
92-
};
64+
// //main/div/div/div/./div[2]/div/./div[2]/./div/div[3]
65+
const liste_commentaires = (await zone_defilable.$(
66+
"::-p-xpath(./div/div[3])",
67+
))!;
68+
//const commentaires = (await liste_commentaires.$$("::-p-xpath(./div)")).map(e => this.extract_commentaire(e))!;
69+
const commentaires: Comment[] = [];
70+
let div_commentaire = await liste_commentaires.$("::-p-xpath(./div)");
71+
while (div_commentaire) {
72+
await div_commentaire.scrollIntoView();
73+
commentaires.push(await this.extract_commentaire(div_commentaire));
74+
await this.sleep(500);
75+
// gérer un peu mieux le scroll et le temps de chargement des pages de
76+
// commentaires
77+
div_commentaire = await div_commentaire.$(
78+
"::-p-xpath(./following-sibling::*)",
79+
);
80+
// gérer ici le ce scraping des réponses aux commentaires
9381
}
9482

95-
private async get_auteur_from_span(
96-
span_element: ElementHandle<Element>
97-
): Promise<Author> {
98-
const auteur_elem = (await span_element.$("::-p-xpath(.//a)"))!;
99-
const auteur_href = (await auteur_elem.$eval("::-p-xpath(.)", (node) =>
100-
node.getAttribute("href")
101-
))!;
102-
const auteur_name = (await auteur_elem.$eval(
103-
"::-p-xpath(.//span)",
104-
(node) => node.textContent
105-
))!;
106-
return {
107-
name: auteur_name,
108-
accountHref: this.urlJoin(this.INSTAGRAM_URL, auteur_href),
109-
};
110-
}
83+
return {
84+
postId: this.extractPostId(tab.url!),
85+
socialNetwork: "INSTAGRAM",
86+
url: tab.url!,
87+
author: auteur,
88+
scrapedAt: new Date().toISOString(),
89+
publishedAt: new Date(date_publication).toISOString(),
90+
textContent: texte_publication,
91+
comments: await Promise.all(commentaires),
92+
};
93+
}
11194

112-
private async extract_commentaire(
113-
comment_element: ElementHandle<Element>
114-
): Promise<Comment> {
115-
let base = (await comment_element.$(
116-
"::-p-xpath(./div/div/div[2]/div/div)"
117-
))!;
118-
base = (await base.$("::-p-xpath(.//span[1]/../..)"))!;
119-
const base_0 = (await base.$("::-p-xpath(div[1])"))!;
120-
const base_1 = (await base.$("::-p-xpath(div[2])"))!;
121-
const auteur = await this.get_auteur_from_span(base_0);
122-
let date_commentaire: Date | undefined = undefined;
123-
try {
124-
const date_str = (await base.$eval("::-p-xpath(.//time)", (node) =>
125-
node.getAttribute("datetime")
126-
))!;
127-
date_commentaire = date_str ? new Date(date_str) : undefined;
128-
} catch (_) {
129-
date_commentaire = undefined;
130-
}
95+
private async get_auteur_from_span(
96+
span_element: ElementHandle<Element>,
97+
): Promise<Author> {
98+
const auteur_elem = (await span_element.$("::-p-xpath(.//a)"))!;
99+
const auteur_href = (await auteur_elem.$eval("::-p-xpath(.)", (node) =>
100+
node.getAttribute("href"),
101+
))!;
102+
const auteur_name = (await auteur_elem.$eval(
103+
"::-p-xpath(.//span)",
104+
(node) => node.textContent,
105+
))!;
106+
return {
107+
name: auteur_name,
108+
accountHref: this.urlJoin(this.INSTAGRAM_URL, auteur_href),
109+
};
110+
}
131111

132-
const screenshot = await comment_element.screenshot({ encoding: "base64" });
133-
const screenshotDate = currentIsoDate();
134-
return {
135-
author: auteur,
136-
textContent: await base_1.$eval(
137-
"::-p-xpath(.)",
138-
(node) => node.textContent!
139-
)!,
140-
publishedAt: date_commentaire?.toISOString(),
141-
screenshotData: screenshot,
142-
scrapedAt: screenshotDate,
143-
replies: [],
144-
nbLikes: 0 // Voir https://github.com/dataforgoodfr/14_BalanceTesHaters/issues/4
145-
};
112+
private async extract_commentaire(
113+
comment_element: ElementHandle<Element>,
114+
): Promise<Comment> {
115+
let base = (await comment_element.$(
116+
"::-p-xpath(./div/div/div[2]/div/div)",
117+
))!;
118+
base = (await base.$("::-p-xpath(.//span[1]/../..)"))!;
119+
const base_0 = (await base.$("::-p-xpath(div[1])"))!;
120+
const base_1 = (await base.$("::-p-xpath(div[2])"))!;
121+
const auteur = await this.get_auteur_from_span(base_0);
122+
let date_commentaire: Date | undefined = undefined;
123+
try {
124+
const date_str = (await base.$eval("::-p-xpath(.//time)", (node) =>
125+
node.getAttribute("datetime"),
126+
))!;
127+
date_commentaire = date_str ? new Date(date_str) : undefined;
128+
} catch (_) {
129+
date_commentaire = undefined;
146130
}
147131

148-
private urlJoin(base: string, relative: string): string {
149-
const baseWithoutTrailingSlash = base.replace(/\/+$/, "");
150-
if (relative.startsWith("/")) {
151-
return baseWithoutTrailingSlash + relative;
152-
} else {
153-
return baseWithoutTrailingSlash + "/" + relative;
154-
}
132+
const screenshot = await comment_element.screenshot({ encoding: "base64" });
133+
const screenshotDate = currentIsoDate();
134+
return {
135+
author: auteur,
136+
textContent: await base_1.$eval(
137+
"::-p-xpath(.)",
138+
(node) => node.textContent!,
139+
)!,
140+
publishedAt: date_commentaire?.toISOString(),
141+
screenshotData: screenshot,
142+
scrapedAt: screenshotDate,
143+
replies: [],
144+
nbLikes: 0, // Voir https://github.com/dataforgoodfr/14_BalanceTesHaters/issues/4
145+
};
146+
}
147+
148+
private urlJoin(base: string, relative: string): string {
149+
const baseWithoutTrailingSlash = base.replace(/\/+$/, "");
150+
if (relative.startsWith("/")) {
151+
return baseWithoutTrailingSlash + relative;
152+
} else {
153+
return baseWithoutTrailingSlash + "/" + relative;
155154
}
155+
}
156156
}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { ElementHandle } from "puppeteer-core/lib/esm/puppeteer/puppeteer-core-browser.js";
22

33
export async function ariaLabel(
4-
element: ElementHandle
4+
element: ElementHandle,
55
): Promise<string | null> {
66
return await element.evaluate((e) => (e as HTMLElement).ariaLabel);
77
}

browser-extension/entrypoints/background/scraping/puppeteer/selectOrThrow.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import { innerHtml } from "./innerHtml";
33

44
export async function selectOrThrow(
55
container: ElementHandle,
6-
selector: string
6+
selector: string,
77
): Promise<ElementHandle> {
88
const selectedElement = await container.$(selector)!;
99

browser-extension/entrypoints/background/scraping/scrap-tab.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import { Post } from "../../shared/model/post";
33
import { createScraper } from "./create-scraper";
44

55
export async function scrapTab(
6-
tab: globalThis.Browser.tabs.Tab
6+
tab: globalThis.Browser.tabs.Tab,
77
): Promise<Post> {
88
if (tab.url === undefined) {
99
throw new Error("Url of tab is undefined");

0 commit comments

Comments
 (0)