|
1 | 1 | import path from 'path' |
2 | 2 | import fsp from 'fs/promises' |
3 | | -import getPage from './getPage.js' |
| 3 | +import * as cheerio from 'cheerio' |
| 4 | +import getData from './getData.js' |
| 5 | +import getLinks from './getLinks.js' |
| 6 | +import * as prettier from 'prettier' |
4 | 7 |
|
5 | 8 | const getAbsolutePath = dirpath => path.resolve(process.cwd(), dirpath) |
6 | 9 |
|
7 | | -const generateFileName = (url) => { |
| 10 | +const generateName = (url) => { |
8 | 11 | const href = new URL(url) |
9 | | - return (href.hostname + href.pathname).replace(/[^a-zA-Z0-9]/g, '-') + '.html' |
| 12 | + return (href.hostname + href.pathname).replace(/[^a-zA-Z0-9]/g, '-') |
10 | 13 | } |
11 | 14 |
|
12 | 15 | const downloadPage = (url, outputDir) => { |
13 | | - const fileName = generateFileName(url) |
| 16 | + const fileName = generateName(url) + '.html' |
14 | 17 | const filePath = path.join(getAbsolutePath(outputDir), fileName) |
| 18 | + const filesDirName = generateName(url) + '_files' |
| 19 | + const filesDirPath = path.join(getAbsolutePath(outputDir), filesDirName) |
| 20 | + let links |
| 21 | + let $ |
15 | 22 |
|
16 | | - return getPage(url) |
17 | | - .then(pageData => fsp.writeFile(filePath, pageData) |
18 | | - .then(() => filePath)) |
| 23 | + return getData(url) |
| 24 | + .then((html) => { |
| 25 | + $ = cheerio.load(html) |
| 26 | + links = getLinks(html) |
| 27 | + |
| 28 | + if (links.length === 0) { |
| 29 | + return null |
| 30 | + } |
| 31 | + return fsp.mkdir(filesDirPath) |
| 32 | + }) |
| 33 | + .then(() => { |
| 34 | + if (links.length === 0) { |
| 35 | + return null |
| 36 | + } |
| 37 | + return Promise.all(links.map((link) => { |
| 38 | + const absoluteLink = new URL(link, url).href |
| 39 | + const imageName = generateName(absoluteLink).replace(/-(?=[a-zA-Z0-9]+$)/, '.') |
| 40 | + const imagePath = path.join(filesDirPath, imageName) |
| 41 | + |
| 42 | + return getData(absoluteLink) |
| 43 | + .then(imgData => fsp.writeFile(imagePath, imgData)) |
| 44 | + .then(() => { |
| 45 | + $('img').each((i, tag) => { |
| 46 | + if ($(tag).attr('src') === link) { |
| 47 | + $(tag).attr('src', path.join(filesDirName, imageName)) |
| 48 | + } |
| 49 | + }) |
| 50 | + }) |
| 51 | + }), |
| 52 | + ) |
| 53 | + }) |
| 54 | + .then(() => prettier.format($.html(), { parser: 'html' })) |
| 55 | + .then(formattedHtml => fsp.writeFile(filePath, formattedHtml)) |
| 56 | + .then(() => filePath) |
19 | 57 | .catch((err) => { |
| 58 | + // console.error('Full error:', err) |
20 | 59 | throw new Error(`Error: ${err.message}`) |
21 | 60 | }) |
22 | 61 | } |
|
0 commit comments