coder-hxl
diff --git a/‎README.md
Lines changed: 36 additions & 39 deletions b/‎README.md
Lines changed: 36 additions & 39 deletions
diff --git a/‎assets/cn/crawler.png
-9.5 KB b/‎assets/cn/crawler.png
-9.5 KB
diff --git a/‎assets/en/crawler.png
31.4 KB b/‎assets/en/crawler.png
31.4 KB
diff --git a/‎docs/cn.md
Lines changed: 37 additions & 41 deletions b/‎docs/cn.md
Lines changed: 37 additions & 41 deletions
diff --git a/‎package.json
Lines changed: 1 addition & 1 deletion b/‎package.json
Lines changed: 1 addition & 1 deletion
@@ -16,7 +16,7 @@ x-crawl is a Nodejs multifunctional crawler library.
 
 ## Relationship with puppeteer 
 
-The fetchHTML API internally uses the [puppeteer ](https://github.com/puppeteer/puppeteer) library to crawl pages.
+The fetchPage API internally uses the [puppeteer](https://github.com/puppeteer/puppeteer) library to crawl pages.
 
 The following can be done:
 
@@ -34,7 +34,7 @@ The following can be done:
        + [Example](#Example-1)
        + [Mode](#Mode)
        + [IntervalTime](#IntervalTime)
-    * [fetchHTML](#fetchHTML)
+    * [fetchPage](#fetchPage)
        + [Type](#Type-2)
        + [Example](#Example-2)
        + [About page](#About-page)
@@ -50,19 +50,19 @@ The following can be done:
 - [Types](#Types)
     * [AnyObject](#AnyObject)
     * [Method](#Method)
+    * [RequestBaseConfig](#RequestBaseConfig)
     * [RequestConfig](#RequestConfig)
     * [IntervalTime](#IntervalTime)
     * [XCrawlBaseConfig](#XCrawlBaseConfig)
     * [FetchBaseConfigV1](#FetchBaseConfigV1)
-    * [FetchBaseConfigV2](#FetchBaseConfigV2)
-    * [FetchHTMLConfig](#FetchHTMLConfig	)
+    * [FetchPageConfig](#FetchPageConfig	)
     * [FetchDataConfig](#FetchDataConfig) 
     * [FetchFileConfig](#FetchFileConfig)
     * [StartPollingConfig](#StartPollingConfig)
     * [FetchResCommonV1](#FetchResCommonV1)
     * [FetchResCommonArrV1](#FetchResCommonArrV1)
     * [FileInfo](#FileInfo)
-    * [FetchHTML](#FetchHTML)
+    * [FetchPage](#FetchPage)
 - [More](#More)
 
 ## Install
@@ -90,9 +90,9 @@ const myXCrawl = xCrawl({
 // 3.Set the crawling task
 // Call the startPolling API to start the polling function, and the callback function will be called every other day
 myXCrawl.startPolling({ d: 1 }, () => {
-    // Call fetchHTML API to crawl HTML
-  myXCrawl.fetchHTML('https://www.youtube.com/').then((res) => {
-    const { jsdom } = res.data // By default, the JSDOM library is used to parse HTML
+    // Call fetchPage API to crawl Page
+  myXCrawl.fetchPage('https://www.youtube.com/').then((res) => {
+    const { jsdom } = res.data // By default, the JSDOM library is used to parse Page
 
     // Get the cover image element of the Promoted Video
     const imgEls = jsdom.window.document.querySelectorAll(
@@ -124,7 +124,7 @@ running result:
   <img src="https://raw.githubusercontent.com/coder-hxl/x-crawl/main/assets/en/crawler-result.png" />
 </div>
 
-**Note:** Do not crawl randomly, here is just to demonstrate how to use XCrawl, and control the request frequency within 3000ms to 2000ms.
+**Note:** Do not crawl randomly, here is just to demonstrate how to use x-crawl, and control the request frequency within 3000ms to 2000ms.
 
 ## Core concepts
 
@@ -154,9 +154,9 @@ const myXCrawl = xCrawl({
 })
 ```
 
-Passing **baseConfig** is for **fetchHTML/fetchData/fetchFile** to use these values by default.
+Passing **baseConfig** is for **fetchPage/fetchData/fetchFile** to use these values by default.
 
-**Note:** To avoid repeated creation of instances in subsequent examples, **myXCrawl** here will be the crawler instance in the **fetchHTML/fetchData/fetchFile** example.
+**Note:** To avoid repeated creation of instances in subsequent examples, **myXCrawl** here will be the crawler instance in the **fetchPage/fetchData/fetchFile** example.
 
 #### Mode 
 
@@ -176,26 +176,26 @@ The intervalTime option defaults to undefined . If there is a setting value, it
 
 The first request is not to trigger the interval.
 
-### fetchHTML
+### fetchPage
 
-fetchHTML is the method of the above [myXCrawl](https://github.com/coder-hxl/x-crawl#Example-1) instance, usually used to crawl page.
+fetchPage is the method of the above [myXCrawl](https://github.com/coder-hxl/x-crawl#Example-1) instance, usually used to crawl page.
 
 #### Type
 
-- Look at the [FetchHTMLConfig](#FetchHTMLConfig) type
-- Look at the [FetchHTML](#FetchHTML-2) type
+- Look at the [FetchPageConfig](#FetchPageConfig) type
+- Look at the [FetchPage](#FetchPage-2) type
 
 ```ts
-function fetchHTML: (
-  config: FetchHTMLConfig,
-  callback?: (res: FetchHTML) => void
-) => Promise<FetchHTML>
+function fetchPage: (
+  config: FetchPageConfig,
+  callback?: (res: FetchPage) => void
+) => Promise<FetchPage>
 ```
 
 #### Example
 
 ```js
-myXCrawl.fetchHTML('/xxx').then((res) => {
+myXCrawl.fetchPage('/xxx').then((res) => {
   const { jsdom } = res.data
   console.log(jsdom.window.document.querySelector('title')?.textContent)
 })
@@ -296,7 +296,7 @@ function startPolling(
 ```js
 myXCrawl.startPolling({ h: 1, m: 30 }, () => {
   // will be executed every one and a half hours
-  // fetchHTML/fetchData/fetchFile
+  // fetchPage/fetchData/fetchFile
 })
 ```
 
@@ -316,17 +316,24 @@ interface AnyObject extends Object {
 type Method = 'get' | 'GET' | 'delete' | 'DELETE' | 'head' | 'HEAD' | 'options' | 'OPTONS' | 'post' | 'POST' | 'put' | 'PUT' | 'patch' | 'PATCH' | 'purge' | 'PURGE' | 'link' | 'LINK' | 'unlink' | 'UNLINK'
 ```
 
+### RequestBaseConfig
+
+```ts
+interface RequestBaseConfig {
+ url: string
+ timeout?: number
+ proxy?: string
+}
+```
+
 ### RequestConfig
 
 ```ts 
-interface RequestConfig {
-  url: string
+interface RequestConfig extends RequestBaseConfig {
   method?: Method
   headers?: AnyObject
   params?: AnyObject
   data?: any
-  timeout?: number
-  proxy?: string
 }
 ```
 
@@ -360,20 +367,10 @@ interface FetchBaseConfigV1 {
 }
 ```
 
-### FetchBaseConfigV2
-
-```ts
-interface FetchBaseConfigV2 {
-  url: string
-  timeout?: number
-  proxy?: string
-}
-```
-
-### FetchHTMLConfig
+### FetchPageConfig
 
 ```ts
-type FetchHTMLConfig = string | FetchBaseConfigV2
+type FetchPageConfig = string | RequestBaseConfig
 ```
 
 ### FetchDataConfig
@@ -432,10 +429,10 @@ interface FileInfo {
 }
 ```
 
-### FetchHTML
+### FetchPage
 
 ```ts
-interface FetchHTML {
+interface FetchPage {
   httpResponse: HTTPResponse | null // The type of HTTPResponse in the puppeteer library
   data: {
     page: Page // The type of Page in the puppeteer library
 
@@ -16,7 +16,7 @@ x-crawl 是 Nodejs 多功能爬虫库。
 
 ## 跟 puppeteer 的关系
 
-fetchHTML API 内部使用 [puppeteer ](https://github.com/puppeteer/puppeteer) 库来爬取页面。
+fetchPage API 内部使用 [puppeteer](https://github.com/puppeteer/puppeteer) 库来爬取页面。
 
 可以完成以下操作:
 
@@ -34,7 +34,7 @@ fetchHTML API 内部使用 [puppeteer ](https://github.com/puppeteer/puppeteer)
        + [示例](#示例-1)
        + [模式](#模式)
        + [间隔时间](#间隔时间)
-    * [fetchHTML](#fetchHTML)
+    * [fetchPage](#fetchPage)
        + [类型](#类型-2)
        + [示例](#示例-2)
     * [fetchData](#fetchData)
@@ -54,15 +54,14 @@ fetchHTML API 内部使用 [puppeteer ](https://github.com/puppeteer/puppeteer)
     * [IntervalTime](#IntervalTime)
     * [XCrawlBaseConfig](#XCrawlBaseConfig)
     * [FetchBaseConfigV1](#FetchBaseConfigV1)
-    * [FetchBaseConfigV2](#FetchBaseConfigV2)
-    * [FetchHTMLConfig](#FetchHTMLConfig	)
+    * [FetchPageConfig](#FetchPageConfig	)
     * [FetchDataConfig](#FetchDataConfig) 
     * [FetchFileConfig](#FetchFileConfig)
     * [StartPollingConfig](#StartPollingConfig)
     * [FetchResCommonV1](#FetchResCommonV1)
     * [FetchResCommonArrV1](#FetchResCommonArrV1)
     * [FileInfo](#FileInfo)
-    * [FetchHTML](#FetchHTML)
+    * [FetchPage](#FetchPage)
 - [更多](#更多)
 
 ## 安装
@@ -83,16 +82,16 @@ import xCrawl from 'x-crawl'
 
 // 2.创建一个爬虫实例
 const myXCrawl = xCrawl({
-  timeout: 10000, // overtime time
-  intervalTime: { max: 3000, min: 2000 } // control request frequency
+  timeout: 10000, // 请求超时时间
+  intervalTime: { max: 3000, min: 2000 } // 控制请求频率
 })
 
 // 3.设置爬取任务
 // 调用 startPolling API 开始轮询功能，每隔一天会调用回调函数
 myXCrawl.startPolling({ d: 1 }, () => {
-  // 调用 fetchHTML API 爬取 HTML
-  myXCrawl.fetchHTML('https://www.bilibili.com/guochuang/').then((res) => {
-    const { jsdom } = res.data // 默认使用了 JSDOM 库解析 HTML
+  // 调用 fetchPage API 爬取 Page
+  myXCrawl.fetchPage('https://www.bilibili.com/guochuang/').then((res) => {
+    const { jsdom } = res.data // 默认使用了 JSDOM 库解析 Page
 
     // 获取轮播图片元素
     const imgEls = jsdom.window.document.querySelectorAll('.carousel-wrapper .chief-recom-item img')
@@ -117,7 +116,7 @@ myXCrawl.startPolling({ d: 1 }, () => {
   <img src="https://raw.githubusercontent.com/coder-hxl/x-crawl/main/assets/cn/crawler-result.png" />
 </div>
 
-**注意:** 请勿随意爬取，这里只是为了演示如何使用 XCrawl ，并将请求频率控制在 3000ms 到 2000ms 内。
+**注意:** 请勿随意爬取，这里只是为了演示如何使用 x-crawl ，并将请求频率控制在 3000ms 到 2000ms 内。
 
 ## 核心概念
 
@@ -147,9 +146,9 @@ const myXCrawl = xCrawl({
 })
 ```
 
-传入 **baseConfig** 是为了让 **fetchHTML/fetchData/fetchFile** 默认使用这些值。
+传入 **baseConfig** 是为了让 **fetchPage/fetchData/fetchFile** 默认使用这些值。
 
-**注意:** 为避免后续示例需要重复创建实例，这里的 **myXCrawl** 将是 **fetchHTML/fetchData/fetchFile** 示例中的爬虫实例。
+**注意:** 为避免后续示例需要重复创建实例，这里的 **myXCrawl** 将是 **fetchPage/fetchData/fetchFile** 示例中的爬虫实例。
 
 #### 模式
 
@@ -169,26 +168,26 @@ intervalTime 选项默认为 undefined 。若有设置值，则会在请求前
 
 第一次请求是不会触发间隔时间。
 
-### fetchHTML
+### fetchPage
 
-fetchHTML 是 [myXCrawl](https://github.com/coder-hxl/x-crawl/blob/main/document/cn.md#%E7%A4%BA%E4%BE%8B-1) 实例的方法，通常用于爬取页面。
+fetchPage 是 [myXCrawl](https://github.com/coder-hxl/x-crawl/blob/main/document/cn.md#%E7%A4%BA%E4%BE%8B-1) 实例的方法，通常用于爬取页面。
 
 #### 类型
 
-- 查看 [FetchHTMLConfig](#FetchHTMLConfig) 类型
-- 查看 [FetchHTML](#FetchHTML-2) 类型
+- 查看 [FetchPageConfig](#FetchPageConfig) 类型
+- 查看 [FetchPage](#FetchPage-2) 类型
 
 ```ts
-function fetchHTML: (
-  config: FetchHTMLConfig,
-  callback?: (res: FetchHTML) => void
-) => Promise<FetchHTML>
+function fetchPage: (
+  config: FetchPageConfig,
+  callback?: (res: FetchPage) => void
+) => Promise<FetchPage>
 ```
 
 #### 示例
 
 ```js
-myXCrawl.fetchHTML('/xxx').then((res) => {
+myXCrawl.fetchPage('/xxx').then((res) => {
   const { jsdom } = res.data
   console.log(jsdom.window.document.querySelector('title')?.textContent)
 })
@@ -289,7 +288,7 @@ function startPolling: (
 ```js
 myXCrawl.startPolling({ h: 1, m: 30 }, () => {
   // 每隔一个半小时会执行一次
-  // fetchHTML/fetchData/fetchFile
+  // fetchPage/fetchData/fetchFile
 })
 ```
 
@@ -309,17 +308,24 @@ interface AnyObject extends Object {
 type Method = 'get' | 'GET' | 'delete' | 'DELETE' | 'head' | 'HEAD' | 'options' | 'OPTONS' | 'post' | 'POST' | 'put' | 'PUT' | 'patch' | 'PATCH' | 'purge' | 'PURGE' | 'link' | 'LINK' | 'unlink' | 'UNLINK'
 ```
 
+### RequestBaseConfig
+
+```ts
+interface RequestBaseConfig {
+ url: string
+ timeout?: number
+ proxy?: string
+}
+```
+
 ### RequestConfig
 
 ```ts 
-interface RequestConfig {
-  url: string
+interface RequestConfig extends RequestBaseConfig {
   method?: Method
   headers?: AnyObject
   params?: AnyObject
   data?: any
-  timeout?: number
-  proxy?: string
 }
 ```
 
@@ -353,20 +359,10 @@ interface FetchBaseConfigV1 {
 }
 ```
 
-### FetchBaseConfigV2
-
-```ts
-interface FetchBaseConfigV2 {
-  url: string
-  timeout?: number
-  proxy?: string
-}
-```
-
-### FetchHTMLConfig
+### FetchPageConfig
 
 ```ts
-type FetchHTMLConfig = string | FetchBaseConfigV2
+type FetchPageConfig = string | RequestBaseConfig
 ```
 
 ### FetchDataConfig
@@ -425,10 +421,10 @@ interface FileInfo {
 }
 ```
 
-### FetchHTML
+### FetchPage
 
 ```ts
-interface FetchHTML {
+interface FetchPage {
   httpResponse: HTTPResponse | null // puppeteer 库的 HTTPResponse 类型
   data: {
     page: Page // puppeteer 库的 Page 类型
 
@@ -1,7 +1,7 @@
 {
   "private": true,
   "name": "x-crawl",
-  "version": "2.2.1",
+  "version": "2.3.0",
   "author": "coderHXL",
   "description": "XCrawl is a Nodejs multifunctional crawler library.",
   "license": "MIT",
Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"private": true,`
`3`	`3`	`"name": "x-crawl",`
`4`		`- "version": "2.2.1",`
	`4`	`+ "version": "2.3.0",`
`5`	`5`	`"author": "coderHXL",`
`6`	`6`	`"description": "XCrawl is a Nodejs multifunctional crawler library.",`
`7`	`7`	`"license": "MIT",`