@@ -3,9 +3,55 @@ import fsp from 'fs/promises'
33import debug from 'debug'
44import getData from './getData.js'
55import generateName from './generateName.js'
6+ import Listr from 'listr'
67
78const log = debug ( 'page-loader' )
89
10+ const tasks = ( $ , links , url , filesDirPath , selector , attr ) => new Listr ( links
11+ . filter ( ( link ) => {
12+ const absLink = new URL ( link , url )
13+ if ( absLink . hostname === new URL ( url ) . hostname ) {
14+ return link
15+ }
16+ } )
17+ . map ( ( link ) => {
18+ const absLink = new URL ( link , url )
19+ return {
20+ title : `${ absLink . href } ` ,
21+ task : ( ) => downloadFile ( $ , link , absLink , filesDirPath , selector , attr ) ,
22+ }
23+ } ) , {
24+ concurrent : true ,
25+ exitOnError : false ,
26+ } )
27+
28+ const downloadFile = ( $ , link , absLink , filesDirPath , selector , attr ) => {
29+ const fileName = path . extname ( absLink . pathname )
30+ ? generateName ( absLink . href ) . replace ( / - (? = [ a - z A - Z 0 - 9 ] + $ ) / , '.' )
31+ : generateName ( absLink . href ) + '.html'
32+ const filePath = path . join ( filesDirPath , fileName )
33+
34+ log ( 'download file %s' , absLink . href )
35+ return getData ( absLink . href )
36+ . then ( ( data ) => {
37+ log ( 'write to file %s' , filePath )
38+ return fsp . writeFile ( filePath , data )
39+ } )
40+ . then ( ( ) => {
41+ const newLink = path . join ( path . basename ( filesDirPath ) , fileName )
42+ log ( 'change link from %s to %s in HTML' , link , newLink )
43+ $ ( selector ) . each ( ( i , tag ) => {
44+ if ( $ ( tag ) . attr ( attr ) === link ) {
45+ $ ( tag ) . attr ( attr , newLink )
46+ }
47+ } )
48+ } )
49+ . catch ( ( err ) => {
50+ log ( 'failed to download %s: %s' , absLink . href , err . message )
51+ throw new Error ( `Cant load file ${ absLink . href } ` )
52+ } )
53+ }
54+
955const downloadFiles = ( $ , url , filesDirPath , selector , attr ) => {
1056 const links = $ ( selector ) . map ( ( i , tag ) => $ ( tag ) . attr ( attr ) ) . get ( )
1157
@@ -16,38 +62,11 @@ const downloadFiles = ($, url, filesDirPath, selector, attr) => {
1662
1763 log ( `download %ss` , selector )
1864 return fsp . mkdir ( filesDirPath , { recursive : true } )
19- . then ( ( ) => Promise . all ( links . map ( ( link ) => {
20- const absLink = new URL ( link , url )
21- if ( absLink . hostname !== new URL ( url ) . hostname ) {
22- log ( 'skip external link %s' , absLink . href )
23- return Promise . resolve ( )
24- }
25- const fileName = path . extname ( absLink . pathname )
26- ? generateName ( absLink . href ) . replace ( / - (? = [ a - z A - Z 0 - 9 ] + $ ) / , '.' )
27- : generateName ( absLink . href ) + '.html'
28- const filePath = path . join ( filesDirPath , fileName )
29-
30- log ( 'download file %s' , absLink . href )
31- return getData ( absLink . href )
32- . then ( ( data ) => {
33- log ( 'write to file %s' , filePath )
34- return fsp . writeFile ( filePath , data )
35- } )
36- . then ( ( ) => {
37- const newLink = path . join ( path . basename ( filesDirPath ) , fileName )
38- log ( 'change link from %s to %s in HTML' , link , newLink )
39- $ ( selector ) . each ( ( i , tag ) => {
40- if ( $ ( tag ) . attr ( attr ) === link ) {
41- $ ( tag ) . attr ( attr , newLink )
42- }
43- } )
44- } )
45- . catch ( ( err ) => {
46- log ( 'failed to download %s: %s' , absLink . href , err . message )
47- throw new Error ( `Cant load file ${ absLink . href } ` )
48- } )
49- } ) ,
50- ) )
65+ . then ( ( ) => tasks ( $ , links , url , filesDirPath , selector , attr )
66+ . run ( ) . catch ( ( err ) => {
67+ log ( 'failed to download files: %s' , err . message )
68+ throw err
69+ } ) )
5170}
5271
5372export default downloadFiles
0 commit comments