@@ -2,7 +2,7 @@ import fs from 'node:fs'
2
2
import { writeFile } from 'node:fs/promises'
3
3
import path from 'node:path'
4
4
import { JSDOM } from 'jsdom'
5
- import puppeteer , { Browser , Page } from 'puppeteer'
5
+ import puppeteer , { Browser } from 'puppeteer'
6
6
7
7
import { batchRequest , syncBatchRequest } from './request'
8
8
import { quickSort } from './sort'
@@ -18,23 +18,22 @@ import {
18
18
} from './utils'
19
19
20
20
import {
21
- IXCrawlBaseConifg ,
22
- IFetchHTMLConfig ,
23
- IFetchDataConfig ,
24
- IFetchFileConfig ,
25
- IStartPollingConfig ,
26
- IFetchBaseConifg ,
27
- IFileInfo ,
28
- IFetchHTML ,
29
- IRequestResItem ,
30
- IRequestConfig ,
31
- IIntervalTime ,
32
- IFetchCommon ,
33
- IFetchCommonArr
34
- } from './types'
35
-
36
- function mergeConfig < T extends IFetchBaseConifg > (
37
- baseConfig : IXCrawlBaseConifg ,
21
+ FetchBaseConifgV1 ,
22
+ FetchDataConfig ,
23
+ FetchFileConfig ,
24
+ FetchHTML ,
25
+ FetchHTMLConfig ,
26
+ FetchResCommonArrV1 ,
27
+ FetchResCommonV1 ,
28
+ FileInfo ,
29
+ IntervalTime ,
30
+ StartPollingConfig
31
+ } from './types/api'
32
+ import { XCrawlBaseConifg } from './types'
33
+ import { RequestConfig , RequestResItem } from './types/request'
34
+
35
+ function mergeConfig < T extends FetchBaseConifgV1 > (
36
+ baseConfig : XCrawlBaseConifg ,
38
37
rawConfig : T
39
38
) : T {
40
39
const newConfig = structuredClone ( rawConfig )
@@ -43,22 +42,22 @@ function mergeConfig<T extends IFetchBaseConifg>(
43
42
const requestConifgArr = isArray ( newConfig . requestConifg )
44
43
? newConfig . requestConifg
45
44
: [ newConfig . requestConifg ]
46
- for ( const requestItem of requestConifgArr ) {
47
- const { url, timeout, proxy } = requestItem
45
+ for ( const requesttem of requestConifgArr ) {
46
+ const { url, timeout, proxy } = requesttem
48
47
49
48
// 1.1.baseUrl
50
49
if ( ! isUndefined ( baseConfig . baseUrl ) ) {
51
- requestItem . url = baseConfig . baseUrl + url
50
+ requesttem . url = baseConfig . baseUrl + url
52
51
}
53
52
54
53
// 1.2.timeout
55
54
if ( isUndefined ( timeout ) ) {
56
- requestItem . timeout = baseConfig . timeout
55
+ requesttem . timeout = baseConfig . timeout
57
56
}
58
57
59
58
// 1.3.porxy
60
59
if ( isUndefined ( proxy ) ) {
61
- requestItem . proxy = baseConfig . proxy
60
+ requesttem . proxy = baseConfig . proxy
62
61
}
63
62
}
64
63
@@ -72,9 +71,9 @@ function mergeConfig<T extends IFetchBaseConifg>(
72
71
73
72
async function useBatchRequestByMode (
74
73
mode : 'async' | 'sync' | undefined ,
75
- requestConifg : IRequestConfig | IRequestConfig [ ] ,
76
- intervalTime : IIntervalTime | undefined ,
77
- callback : ( requestResItem : IRequestResItem ) => void
74
+ requestConifg : RequestConfig | RequestConfig [ ] ,
75
+ intervalTime : IntervalTime | undefined ,
76
+ callback : ( requestRestem : RequestResItem ) => void
78
77
) {
79
78
const requestConfigQueue = isArray ( requestConifg )
80
79
? requestConifg
@@ -87,25 +86,33 @@ async function useBatchRequestByMode(
87
86
}
88
87
}
89
88
90
- export function createFetchHTML ( baseConfig : IXCrawlBaseConifg ) {
91
- // 初始值
89
+ export function createFetchHTML ( baseConfig : XCrawlBaseConifg ) {
92
90
let browser : Browser | null = null
93
- let page : Page | null = null
94
- let useTotal = 0
91
+ let createBrowserState : Promise < void > | null = null
92
+ let callTotal = 0
95
93
96
94
async function fetchHTML (
97
- config : IFetchHTMLConfig ,
98
- callback ?: ( res : IFetchHTML ) => void
99
- ) : Promise < IFetchHTML > {
100
- // 完成初始化
101
- if ( useTotal === 0 ) {
102
- browser = await puppeteer . launch ( )
103
- page = await browser . newPage ( )
104
- await page . setViewport ( { width : 1280 , height : 1024 } )
95
+ config : FetchHTMLConfig ,
96
+ callback ?: ( res : FetchHTML ) => void
97
+ ) : Promise < FetchHTML > {
98
+ // 记录调用次数, 为关闭浏览器
99
+ callTotal ++
100
+
101
+ // 只创建一次浏览器
102
+ if ( callTotal === 1 ) {
103
+ createBrowserState = puppeteer . launch ( ) . then ( ( res ) => {
104
+ browser = res
105
+ } )
106
+ }
107
+
108
+ // 等待浏览器创建完毕
109
+ if ( createBrowserState ) {
110
+ await Promise . all ( [ createBrowserState ] )
111
+ createBrowserState = null
105
112
}
106
113
107
- // 记录调用次数
108
- useTotal ++
114
+ const page = await browser ! . newPage ( )
115
+ await page . setViewport ( { width : 1280 , height : 1024 } )
109
116
110
117
const { requestConifg } = mergeConfig ( baseConfig , {
111
118
requestConifg : isString ( config ) ? { url : config } : config
@@ -127,13 +134,14 @@ export function createFetchHTML(baseConfig: IXCrawlBaseConifg) {
127
134
const content = await page ! . content ( )
128
135
129
136
// 关闭浏览器
130
- if ( -- useTotal === 0 ) {
137
+ if ( -- callTotal === 0 ) {
131
138
await browser ! . close ( )
132
139
}
133
140
134
- const res : IFetchHTML = {
141
+ const res : FetchHTML = {
135
142
httpResponse,
136
143
data : {
144
+ page,
137
145
content,
138
146
jsdom : new JSDOM ( content )
139
147
}
@@ -149,23 +157,23 @@ export function createFetchHTML(baseConfig: IXCrawlBaseConifg) {
149
157
return fetchHTML
150
158
}
151
159
152
- export function createFetchData ( baseConfig : IXCrawlBaseConifg ) {
160
+ export function createFetchData ( baseConfig : XCrawlBaseConifg ) {
153
161
async function fetchData < T = any > (
154
- config : IFetchDataConfig ,
155
- callback ?: ( res : IFetchCommon < T > ) => void
156
- ) : Promise < IFetchCommonArr < T > > {
162
+ config : FetchDataConfig ,
163
+ callback ?: ( res : FetchResCommonV1 < T > ) => void
164
+ ) : Promise < FetchResCommonArrV1 < T > > {
157
165
const { requestConifg, intervalTime } = mergeConfig ( baseConfig , config )
158
166
159
- const container : IFetchCommonArr < T > = [ ]
160
- function handleResItem ( requestResItem : IRequestResItem ) {
161
- const contentType = requestResItem . headers [ 'content-type' ] ?? ''
162
- const rawData = requestResItem . data
167
+ const container : FetchResCommonArrV1 < T > = [ ]
168
+ function handleRestem ( requestRestem : RequestResItem ) {
169
+ const contentType = requestRestem . headers [ 'content-type' ] ?? ''
170
+ const rawData = requestRestem . data
163
171
164
172
const data = contentType . includes ( 'text' )
165
173
? rawData . toString ( )
166
174
: JSON . parse ( rawData . toString ( ) )
167
175
168
- const itemRes = { ...requestResItem , data }
176
+ const itemRes = { ...requestRestem , data }
169
177
170
178
if ( callback ) {
171
179
callback ( itemRes )
@@ -178,7 +186,7 @@ export function createFetchData(baseConfig: IXCrawlBaseConifg) {
178
186
baseConfig . mode ,
179
187
requestConifg ,
180
188
intervalTime ,
181
- handleResItem
189
+ handleRestem
182
190
)
183
191
184
192
const res = quickSort (
@@ -190,26 +198,26 @@ export function createFetchData(baseConfig: IXCrawlBaseConifg) {
190
198
return fetchData
191
199
}
192
200
193
- export function createFetchFile ( baseConfig : IXCrawlBaseConifg ) {
201
+ export function createFetchFile ( baseConfig : XCrawlBaseConifg ) {
194
202
async function fetchFile (
195
- config : IFetchFileConfig ,
196
- callback ?: ( res : IFetchCommon < IFileInfo > ) => void
197
- ) : Promise < IFetchCommonArr < IFileInfo > > {
203
+ config : FetchFileConfig ,
204
+ callback ?: ( res : FetchResCommonV1 < FileInfo > ) => void
205
+ ) : Promise < FetchResCommonArrV1 < FileInfo > > {
198
206
const { requestConifg, intervalTime, fileConfig } = mergeConfig (
199
207
baseConfig ,
200
208
config
201
209
)
202
210
203
- const container : IFetchCommonArr < IFileInfo > = [ ]
211
+ const container : FetchResCommonArrV1 < FileInfo > = [ ]
204
212
const saveFileArr : Promise < void > [ ] = [ ]
205
213
const saveFileErrorArr : { message : string ; valueOf : ( ) => number } [ ] = [ ]
206
214
207
215
if ( ! fs . existsSync ( fileConfig . storeDir ) ) {
208
216
fs . mkdirSync ( fileConfig . storeDir )
209
217
}
210
218
211
- function handleResItem ( requestResItem : IRequestResItem ) {
212
- const { id, headers, data } = requestResItem
219
+ function handleRestem ( requestRestem : RequestResItem ) {
220
+ const { id, headers, data } = requestRestem
213
221
214
222
const mimeType = headers [ 'content-type' ] ?? ''
215
223
const fileExtension = fileConfig . extension ?? mimeType . split ( '/' ) . pop ( )
@@ -219,7 +227,7 @@ export function createFetchFile(baseConfig: IXCrawlBaseConifg) {
219
227
`${ fileName } .${ fileExtension } `
220
228
)
221
229
222
- const saveFileItem = writeFile ( filePath , data )
230
+ const saveFiletem = writeFile ( filePath , data )
223
231
. catch ( ( err ) => {
224
232
const message = `File save error at id ${ id } : ${ err . message } `
225
233
const valueOf = ( ) => id
@@ -232,7 +240,7 @@ export function createFetchFile(baseConfig: IXCrawlBaseConifg) {
232
240
if ( isError ) return
233
241
234
242
const res = {
235
- ...requestResItem ,
243
+ ...requestRestem ,
236
244
data : { fileName, mimeType, size : data . length , filePath }
237
245
}
238
246
@@ -243,14 +251,14 @@ export function createFetchFile(baseConfig: IXCrawlBaseConifg) {
243
251
container . push ( res )
244
252
} )
245
253
246
- saveFileArr . push ( saveFileItem )
254
+ saveFileArr . push ( saveFiletem )
247
255
}
248
256
249
257
await useBatchRequestByMode (
250
258
baseConfig . mode ,
251
259
requestConifg ,
252
260
intervalTime ,
253
- handleResItem
261
+ handleRestem
254
262
)
255
263
256
264
// 等待保存文件任务完成
@@ -280,17 +288,15 @@ export function createFetchFile(baseConfig: IXCrawlBaseConifg) {
280
288
}
281
289
282
290
export function startPolling (
283
- config : IStartPollingConfig ,
291
+ config : StartPollingConfig ,
284
292
callback : ( count : number ) => void
285
293
) {
286
- const { Y , M , d, h, m } = config
294
+ const { d, h, m } = config
287
295
288
- const year = ! isUndefined ( Y ) ? Y * 1000 * 60 * 60 * 24 * 365 : 0
289
- const month = ! isUndefined ( M ) ? M * 1000 * 60 * 60 * 24 * 30 : 0
290
296
const day = ! isUndefined ( d ) ? d * 1000 * 60 * 60 * 24 : 0
291
297
const hour = ! isUndefined ( h ) ? h * 1000 * 60 * 60 : 0
292
298
const minute = ! isUndefined ( m ) ? m * 1000 * 60 : 0
293
- const total = year + month + day + hour + minute
299
+ const total = day + hour + minute
294
300
295
301
let count = 0
296
302
function startCallback ( ) {
0 commit comments