Skip to content

Commit e436c1f

Browse files
committed
Add download files logic & refactor
1 parent 77aab78 commit e436c1f

File tree

5 files changed

+54
-52
lines changed

5 files changed

+54
-52
lines changed

src/downloadFiles.js

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import path from 'path'
2+
import fsp from 'fs/promises'
3+
import getData from './getData.js'
4+
import generateName from './generateName.js'
5+
6+
const downloadFiles = ($, url, filesDirPath, selector, attr) => {
7+
const links = $(selector).map((i, tag) => $(tag).attr(attr)).get()
8+
9+
if (links.length === 0) {
10+
return Promise.resolve()
11+
}
12+
13+
return fsp.mkdir(filesDirPath, { recursive: true })
14+
.then(() => Promise.all(links.map((link) => {
15+
const absLink = new URL(link, url)
16+
if (absLink.hostname !== new URL(url).hostname) {
17+
return Promise.resolve()
18+
}
19+
const fileName = path.extname(absLink.pathname)
20+
? generateName(absLink.href).replace(/-(?=[a-zA-Z0-9]+$)/, '.')
21+
: generateName(absLink.href) + '.html'
22+
const filePath = path.join(filesDirPath, fileName)
23+
24+
return getData(absLink.href)
25+
.then(data => fsp.writeFile(filePath, data))
26+
.then(() => {
27+
$(selector).each((i, tag) => {
28+
if ($(tag).attr(attr) === link) {
29+
$(tag).attr(attr, path.join(path.basename(filesDirPath), fileName))
30+
}
31+
})
32+
})
33+
}),
34+
))
35+
}
36+
37+
export default downloadFiles

src/generateName.js

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
const generateName = (url) => {
2+
const href = new URL(url)
3+
return (href.hostname + href.pathname).replace(/[^a-zA-Z0-9]/g, '-')
4+
}
5+
6+
export default generateName

src/getData.js

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,7 @@
11
import axios from 'axios'
2-
import path from 'path'
32

4-
const getData = (url) => {
5-
const imgExtenstions = ['.png', '.jpg', '.jpeg', '.svg']
6-
if (imgExtenstions.includes(path.extname(url).toLowerCase())) {
7-
return axios.get(url, { responseType: 'arraybuffer' }).then(response => response.data)
8-
}
9-
return axios.get(url).then(response => response.data)
10-
}
3+
const getData = url => axios
4+
.get(url, { responseType: 'arraybuffer' })
5+
.then(response => response.data)
116

127
export default getData

src/getLinks.js

Lines changed: 0 additions & 10 deletions
This file was deleted.

src/index.js

Lines changed: 8 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,29 @@
11
import path from 'path'
22
import fsp from 'fs/promises'
33
import * as cheerio from 'cheerio'
4-
import getData from './getData.js'
5-
import getLinks from './getLinks.js'
64
import * as prettier from 'prettier'
5+
import getData from './getData.js'
6+
import downloadFiles from './downloadFiles.js'
7+
import generateName from './generateName.js'
78

89
const getAbsolutePath = dirpath => path.resolve(process.cwd(), dirpath)
910

10-
const generateName = (url) => {
11-
const href = new URL(url)
12-
return (href.hostname + href.pathname).replace(/[^a-zA-Z0-9]/g, '-')
13-
}
14-
1511
const downloadPage = (url, outputDir) => {
1612
const fileName = generateName(url) + '.html'
1713
const filePath = path.join(getAbsolutePath(outputDir), fileName)
1814
const filesDirName = generateName(url) + '_files'
1915
const filesDirPath = path.join(getAbsolutePath(outputDir), filesDirName)
20-
let links
2116
let $
2217

2318
return getData(url)
2419
.then((html) => {
2520
$ = cheerio.load(html)
26-
links = getLinks(html)
27-
28-
if (links.length === 0) {
29-
return null
30-
}
31-
return fsp.mkdir(filesDirPath)
32-
})
33-
.then(() => {
34-
if (links.length === 0) {
35-
return null
36-
}
37-
return Promise.all(links.map((link) => {
38-
const absoluteLink = new URL(link, url).href
39-
const imageName = generateName(absoluteLink).replace(/-(?=[a-zA-Z0-9]+$)/, '.')
40-
const imagePath = path.join(filesDirPath, imageName)
4121

42-
return getData(absoluteLink)
43-
.then(imgData => fsp.writeFile(imagePath, imgData))
44-
.then(() => {
45-
$('img').each((i, tag) => {
46-
if ($(tag).attr('src') === link) {
47-
$(tag).attr('src', path.join(filesDirName, imageName))
48-
}
49-
})
50-
})
51-
}),
52-
)
22+
return Promise.all([
23+
downloadFiles($, url, filesDirPath, 'img', 'src'),
24+
downloadFiles($, url, filesDirPath, 'link', 'href'),
25+
downloadFiles($, url, filesDirPath, 'script', 'src'),
26+
])
5327
})
5428
.then(() => prettier.format($.html(), { parser: 'html' }))
5529
.then(formattedHtml => fsp.writeFile(filePath, formattedHtml))

0 commit comments

Comments
 (0)