@@ -2,34 +2,55 @@ import path from 'path'
22import fsp from 'fs/promises'
33import * as cheerio from 'cheerio'
44import * as prettier from 'prettier'
5+ import debug from 'debug'
56import getData from './getData.js'
67import downloadFiles from './downloadFiles.js'
78import generateName from './generateName.js'
89
10+ const log = debug ( 'page-loader' )
11+
912const getAbsolutePath = dirpath => path . resolve ( process . cwd ( ) , dirpath )
1013
1114const downloadPage = ( url , outputDir ) => {
15+ log ( 'download page %s to %s' , url , outputDir )
16+
17+ const absOutputDir = getAbsolutePath ( outputDir )
1218 const fileName = generateName ( url ) + '.html'
13- const filePath = path . join ( getAbsolutePath ( outputDir ) , fileName )
19+ const filePath = path . join ( absOutputDir , fileName )
1420 const filesDirName = generateName ( url ) + '_files'
15- const filesDirPath = path . join ( getAbsolutePath ( outputDir ) , filesDirName )
16- let $
21+ const filesDirPath = path . join ( absOutputDir , filesDirName )
1722
18- return getData ( url )
23+ return fsp . access ( absOutputDir )
24+ . then ( ( ) => {
25+ log ( 'output dir exists' )
26+ return getData ( url )
27+ } )
1928 . then ( ( html ) => {
20- $ = cheerio . load ( html )
29+ log ( 'HTML downloaded' )
30+ const $ = cheerio . load ( html )
2131
2232 return Promise . all ( [
2333 downloadFiles ( $ , url , filesDirPath , 'img' , 'src' ) ,
2434 downloadFiles ( $ , url , filesDirPath , 'link' , 'href' ) ,
2535 downloadFiles ( $ , url , filesDirPath , 'script' , 'src' ) ,
2636 ] )
37+ . then ( ( ) => $ )
38+ } )
39+ . then ( ( $ ) => {
40+ log ( 'files downloaded, format HTML' )
41+ return prettier . format ( $ . html ( ) , { parser : 'html' } )
42+ } )
43+ . then ( ( formattedHtml ) => {
44+ log ( 'write to file %s' , filePath )
45+ return fsp . writeFile ( filePath , formattedHtml )
46+ } )
47+ . then ( ( ) => {
48+ log ( 'done, output file is %s' , filePath )
49+ return filePath
2750 } )
28- . then ( ( ) => prettier . format ( $ . html ( ) , { parser : 'html' } ) )
29- . then ( formattedHtml => fsp . writeFile ( filePath , formattedHtml ) )
30- . then ( ( ) => filePath )
3151 . catch ( ( err ) => {
3252 // console.error('Full error:', err)
53+ log ( 'error: %s' , err . message )
3354 throw new Error ( `Error: ${ err . message } ` )
3455 } )
3556}
0 commit comments