-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathextract_article.ts
More file actions
42 lines (38 loc) · 1.34 KB
/
extract_article.ts
File metadata and controls
42 lines (38 loc) · 1.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
const { Readability } = require('@mozilla/readability');
const { JSDOM } = require('jsdom');
const fs = require('fs');
const url = process.argv[2];
const ARTICLE_FILE = process.argv[3] || 'extracted_article.txt';
const ARTICLE_TITLE_FILE = process.argv[4] || 'extracted_article_title.txt';
if (!url) {
console.error('Provide a URL or local file path as an argument');
process.exit(1);
}
async function extractContent(url: string) {
const article_fd = fs.openSync(ARTICLE_FILE, 'w');
const title_fd = fs.openSync(ARTICLE_TITLE_FILE, 'w');
try {
let dom: any;
if (url.startsWith('http')) {
dom = await JSDOM.fromURL(url);
} else {
const fileContent = fs.readFileSync(url, 'utf8');
dom = new JSDOM(fileContent);
}
const reader = new Readability(dom.window.document);
const article = reader.parse();
fs.writeFileSync(article_fd, article.textContent, 'utf8');
fs.writeFileSync(title_fd, article.title, 'utf8');
} catch (error: unknown) {
if (error instanceof Error) {
console.error('Error:', error.message);
} else {
console.error('An unknown error occurred');
}
process.exit(1);
} finally {
fs.closeSync(article_fd);
fs.closeSync(title_fd);
}
}
extractContent(url);