|
1 | 1 | import fs from 'node:fs'
|
2 | 2 | import { writeFile } from 'node:fs/promises'
|
3 | 3 | import path from 'node:path'
|
4 |
| -import puppeteer, { Browser, Protocol } from 'puppeteer' |
| 4 | +import puppeteer, { Browser, HTTPResponse, Page, Protocol } from 'puppeteer' |
5 | 5 |
|
6 | 6 | import { useBatchCrawlHandleByMode } from './batchCrawlHandle'
|
7 | 7 | import { request } from './request'
|
@@ -166,35 +166,43 @@ export function createCrawlPage(baseConfig: LoaderXCrawlBaseConfig) {
|
166 | 166 | }
|
167 | 167 |
|
168 | 168 | async function crawlPageHandle(handleConfig: CrawlBaseConfigV1) {
|
169 |
| - const page = await browser!.newPage() |
170 |
| - await page.setViewport({ width: 1280, height: 1024 }) |
| 169 | + let page: Page | null = null |
| 170 | + let httpResponse: HTTPResponse | null = null |
| 171 | + |
| 172 | + try { |
| 173 | + page = await browser!.newPage() |
| 174 | + await page.setViewport({ width: 1280, height: 1024 }) |
| 175 | + |
| 176 | + if (handleConfig.proxy) { |
| 177 | + await browser!.createIncognitoBrowserContext({ |
| 178 | + proxyServer: handleConfig.proxy |
| 179 | + }) |
| 180 | + } else { |
| 181 | + await browser!.createIncognitoBrowserContext({ |
| 182 | + proxyServer: undefined |
| 183 | + }) |
| 184 | + } |
171 | 185 |
|
172 |
| - if (handleConfig.proxy) { |
173 |
| - await browser!.createIncognitoBrowserContext({ |
174 |
| - proxyServer: handleConfig.proxy |
175 |
| - }) |
176 |
| - } else { |
177 |
| - await browser!.createIncognitoBrowserContext({ |
178 |
| - proxyServer: undefined |
179 |
| - }) |
180 |
| - } |
| 186 | + if (handleConfig.headers) { |
| 187 | + await page.setExtraHTTPHeaders( |
| 188 | + handleConfig.headers as any as Record<string, string> |
| 189 | + ) |
| 190 | + } |
181 | 191 |
|
182 |
| - if (handleConfig.headers) { |
183 |
| - await page.setExtraHTTPHeaders( |
184 |
| - handleConfig.headers as any as Record<string, string> |
185 |
| - ) |
186 |
| - } |
| 192 | + if (handleConfig.cookies) { |
| 193 | + await page.setCookie( |
| 194 | + ...parseCrawlPageCookies(handleConfig.url, handleConfig.cookies) |
| 195 | + ) |
| 196 | + } |
187 | 197 |
|
188 |
| - if (handleConfig.cookies) { |
189 |
| - await page.setCookie( |
190 |
| - ...parseCrawlPageCookies(handleConfig.url, handleConfig.cookies) |
191 |
| - ) |
| 198 | + httpResponse = await page.goto(handleConfig.url, { |
| 199 | + timeout: handleConfig.timeout |
| 200 | + }) |
| 201 | + } catch (error) { |
| 202 | + await page?.close() |
| 203 | + throw error |
192 | 204 | }
|
193 | 205 |
|
194 |
| - const httpResponse = await page.goto(handleConfig.url, { |
195 |
| - timeout: handleConfig.timeout |
196 |
| - }) |
197 |
| - |
198 | 206 | return { httpResponse, browser: browser!, page }
|
199 | 207 | }
|
200 | 208 |
|
|
0 commit comments