Docs: Update

coder-hxl · coder-hxl · commit 2316bb4c0a61 · 2023-03-01T11:32:06.000+08:00
diff --git a/README.md b/README.md
@@ -4,17 +4,21 @@ English | [简体中文](https://github.com/coder-hxl/x-crawl/blob/main/docs/cn.
 
 x-crawl is a Nodejs multifunctional crawler library. 
 
-## Feature
+## Features
 
-- Crawl HTML, JSON, file resources, etc. with simple configuration.
-- Built-in puppeteer crawls HTML and uses JSDOM library to parse HTML.
+- Crawl pages, JSON, file resources, etc. with simple configuration.
+- The built-in puppeteer crawls the page, and uses the jsdom library to parse the page.
 - Support asynchronous/synchronous way to crawl data.
-- Support Promise/Callback way to get the result.
-- Polling function.
+- Support Promise/Callback method to get the result.
+- Polling function, fixed-point crawling.
 - Anthropomorphic request interval.
-- Written in TypeScript, provides generics.
+- Written in TypeScript, providing generics.
 
-## Benefits provided by using puppeter
+## Relationship with puppeter
+
+The fetchHTML API internally uses the [puppeter](https://github.com/puppeteer/puppeteer) library to crawl pages.
+
+The following can be done:
 
 - Generate screenshots and PDFs of pages.
 - Crawl a SPA (Single-Page Application) and generate pre-rendered content (i.e. "SSR" (Server-Side Rendering)).
@@ -33,6 +37,7 @@ x-crawl is a Nodejs multifunctional crawler library.
     * [fetchHTML](#fetchHTML)
        + [Type](#Type-2)
        + [Example](#Example-2)
+       + [About page](#About-page)
     * [fetchData](#fetchData)
        + [Type](#Type-3)
        + [Example](#Example-3)
@@ -173,12 +178,12 @@ The first request is not to trigger the interval.
 
 ### fetchHTML
 
-fetchHTML is the method of the above [myXCrawl](https://github.com/coder-hxl/x-crawl#Example-1) instance, usually used to crawl HTML.
+fetchHTML is the method of the above [myXCrawl](https://github.com/coder-hxl/x-crawl#Example-1) instance, usually used to crawl page.
 
 #### Type
 
 - Look at the [FetchHTMLConfig](#FetchHTMLConfig) type
-- Look at the [FetchHTML](#FetchHTML) type
+- Look at the [FetchHTML](#FetchHTML-2) type
 
 ```ts
 function fetchHTML: (
@@ -196,6 +201,10 @@ myXCrawl.fetchHTML('/xxx').then((res) => {
 })
 ```
 
+#### About page
+
+Get the page instance from res.data.page, which can do interactive operations such as events. For specific usage, refer to [page](https://pptr.dev/api/puppeteer.page).
+
 ### fetchData
 
 fetchData is the method of the above [myXCrawl](#Example-1) instance, which is usually used to crawl APIs to obtain JSON data and so on.
@@ -224,7 +233,7 @@ const requestConfig = [
 
 myXCrawl.fetchData({ 
   requestConfig, // Request configuration, can be RequestConfig | RequestConfig[]
-  intervalTime: { max: 5000, min: 1000 } // The intervalTime passed in when not using myXCrawl
+  intervalTime: { max: 5000, min: 1000 } // The intervalTime passed in when creating myXCrawl is not used
 }).then(res => {
   console.log(res)
 })
@@ -380,7 +389,7 @@ interface FetchDataConfig extends FetchBaseConfigV1 {
 interface FetchFileConfig extends FetchBaseConfigV1 {
   fileConfig: {
     storeDir: string // Store folder
-    extension?: string // filename extension
+    extension?: string // Filename extension
   }
 }
 ```
@@ -409,7 +418,7 @@ interface FetchCommon<T> {
 ### FetchResCommonArrV1
 
 ```ts
-type FetchCommonArr<T> = FetchCommon<T>[]
+type FetchResCommonArrV1<T> = FetchResCommonV1<T>[]
 ```
 
 ### FileInfo
diff --git a/docs/cn.md b/docs/cn.md
@@ -6,15 +6,19 @@ x-crawl 是 Nodejs 多功能爬虫库。
 
 ## 特征
 
-- 只需简单的配置即可抓取 HTML 、JSON、文件资源等等。
-- 内置 puppeteer 爬取 HTML ，并用 JSDOM 库对 HTML 解析。
+- 只需简单的配置即可抓取页面、JSON、文件资源等等。
+- 内置 puppeteer 爬取页面 ，并用采用 jsdom 库对页面解析。
 - 支持 异步/同步 方式爬取数据。
 - 支持 Promise/Callback 方式获取结果。
-- 轮询功能。
+- 轮询功能，定点爬取。
 - 拟人化的请求间隔时间。
 - 使用 TypeScript 编写，提供泛型。
 
-## 使用 puppeter 提供的好处
+## 跟 puppeter 的关系
+
+fetchHTML API 内部使用 [puppeter](https://github.com/puppeteer/puppeteer) 库来爬取页面。
+
+可以完成以下操作:
 
 - 生成页面的屏幕截图和 PDF。
 - 抓取 SPA（单页应用程序）并生成预渲染内容（即“SSR”（服务器端渲染））。
@@ -36,6 +40,7 @@ x-crawl 是 Nodejs 多功能爬虫库。
     * [fetchData](#fetchData)
        + [类型](#类型-3)
        + [示例](#示例-3)
+       + [关于 page](#关于-page)
     * [fetchFile](#fetchFile)
        + [类型](#类型-4)
        + [示例](#示例-4)
@@ -166,12 +171,12 @@ intervalTime 选项默认为 undefined 。若有设置值，则会在请求前
 
 ### fetchHTML
 
-fetchHTML 是 [myXCrawl](https://github.com/coder-hxl/x-crawl/blob/main/document/cn.md#%E7%A4%BA%E4%BE%8B-1) 实例的方法，通常用于爬取 HTML 。
+fetchHTML 是 [myXCrawl](https://github.com/coder-hxl/x-crawl/blob/main/document/cn.md#%E7%A4%BA%E4%BE%8B-1) 实例的方法，通常用于爬取页面。
 
 #### 类型
 
 - 查看 [FetchHTMLConfig](#FetchHTMLConfig) 类型
-- 查看 [FetchHTML](#FetchHTML) 类型
+- 查看 [FetchHTML](#FetchHTML-2) 类型
 
 ```ts
 function fetchHTML: (
@@ -189,6 +194,10 @@ myXCrawl.fetchHTML('/xxx').then((res) => {
 })
 ```
 
+#### 关于 page 
+
+从 res.data.page 拿到 page 实例，其可以做事件之类的交互操作，具体使用参考 [page](https://pptr.dev/api/puppeteer.page) 。
+
 ### fetchData
 
 fetch 是 [myXCrawl](#示例-1) 实例的方法，通常用于爬取 API ，可获取 JSON 数据等等。
@@ -217,7 +226,7 @@ const requestConfig = [
 
 myXCrawl.fetchData({ 
   requestConfig, // 请求配置, 可以是 RequestConfig | RequestConfig[]
-  intervalTime: { max: 5000, min: 1000 } // 不使用 myXCrawl 时传入的 intervalTime
+  intervalTime: { max: 5000, min: 1000 } // 不使用创建 myXCrawl 时传入的 intervalTime
 }).then(res => {
   console.log(res)
 })
@@ -391,7 +400,7 @@ interface StartPollingConfig {
 ### FetchResCommonV1
 
 ```ts
-interface FetchCommon<T> {
+interface FetchResCommonV1<T> {
   id: number
   statusCode: number | undefined
   headers: IncomingHttpHeaders // nodejs: http 类型
@@ -402,7 +411,7 @@ interface FetchCommon<T> {
 ### FetchResCommonArrV1
 
 ```ts
-type FetchCommonArr<T> = FetchCommon<T>[]
+type FetchResCommonArrV1<T> = FetchResCommonV1<T>[]
 ```
 
 ### FileInfo
@@ -423,7 +432,7 @@ interface FetchHTML {
   httpResponse: HTTPResponse | null // puppeteer 库的 HTTPResponse 类型
   data: {
     page: Page // puppeteer 库的 Page 类型
-    jsdom: JSDOM
+    jsdom: JSDOM // jsdom 库的 JSDOM 类型
   }
 }
 ```
diff --git a/package.json b/package.json
@@ -1,7 +1,7 @@
 {
   "private": true,
   "name": "x-crawl",
-  "version": "2.2.0",
+  "version": "2.2.1",
   "author": "coderHXL",
   "description": "XCrawl is a Nodejs multifunctional crawler library.",
   "license": "MIT",
diff --git a/publish/README.md b/publish/README.md
@@ -4,17 +4,21 @@ English | [简体中文](https://github.com/coder-hxl/x-crawl/blob/main/docs/cn.
 
 x-crawl is a Nodejs multifunctional crawler library. 
 
-## Feature
+## Features
 
-- Crawl HTML, JSON, file resources, etc. with simple configuration.
-- Built-in puppeteer crawls HTML and uses JSDOM library to parse HTML.
+- Crawl pages, JSON, file resources, etc. with simple configuration.
+- The built-in puppeteer crawls the page, and uses the jsdom library to parse the page.
 - Support asynchronous/synchronous way to crawl data.
-- Support Promise/Callback way to get the result.
-- Polling function.
+- Support Promise/Callback method to get the result.
+- Polling function, fixed-point crawling.
 - Anthropomorphic request interval.
-- Written in TypeScript, provides generics.
+- Written in TypeScript, providing generics.
 
-## Benefits provided by using puppeter
+## Relationship with puppeter
+
+The fetchHTML API internally uses the [puppeter](https://github.com/puppeteer/puppeteer) library to crawl pages.
+
+The following can be done:
 
 - Generate screenshots and PDFs of pages.
 - Crawl a SPA (Single-Page Application) and generate pre-rendered content (i.e. "SSR" (Server-Side Rendering)).
@@ -33,6 +37,7 @@ x-crawl is a Nodejs multifunctional crawler library.
     * [fetchHTML](#fetchHTML)
        + [Type](#Type-2)
        + [Example](#Example-2)
+       + [About page](#About-page)
     * [fetchData](#fetchData)
        + [Type](#Type-3)
        + [Example](#Example-3)
@@ -173,12 +178,12 @@ The first request is not to trigger the interval.
 
 ### fetchHTML
 
-fetchHTML is the method of the above [myXCrawl](https://github.com/coder-hxl/x-crawl#Example-1) instance, usually used to crawl HTML.
+fetchHTML is the method of the above [myXCrawl](https://github.com/coder-hxl/x-crawl#Example-1) instance, usually used to crawl page.
 
 #### Type
 
 - Look at the [FetchHTMLConfig](#FetchHTMLConfig) type
-- Look at the [FetchHTML](#FetchHTML) type
+- Look at the [FetchHTML](#FetchHTML-2) type
 
 ```ts
 function fetchHTML: (
@@ -196,6 +201,10 @@ myXCrawl.fetchHTML('/xxx').then((res) => {
 })
 ```
 
+#### About page
+
+Get the page instance from res.data.page, which can do interactive operations such as events. For specific usage, refer to [page](https://pptr.dev/api/puppeteer.page).
+
 ### fetchData
 
 fetchData is the method of the above [myXCrawl](#Example-1) instance, which is usually used to crawl APIs to obtain JSON data and so on.
@@ -224,7 +233,7 @@ const requestConfig = [
 
 myXCrawl.fetchData({ 
   requestConfig, // Request configuration, can be RequestConfig | RequestConfig[]
-  intervalTime: { max: 5000, min: 1000 } // The intervalTime passed in when not using myXCrawl
+  intervalTime: { max: 5000, min: 1000 } // The intervalTime passed in when creating myXCrawl is not used
 }).then(res => {
   console.log(res)
 })
@@ -380,7 +389,7 @@ interface FetchDataConfig extends FetchBaseConfigV1 {
 interface FetchFileConfig extends FetchBaseConfigV1 {
   fileConfig: {
     storeDir: string // Store folder
-    extension?: string // filename extension
+    extension?: string // Filename extension
   }
 }
 ```
@@ -409,7 +418,7 @@ interface FetchCommon<T> {
 ### FetchResCommonArrV1
 
 ```ts
-type FetchCommonArr<T> = FetchCommon<T>[]
+type FetchResCommonArrV1<T> = FetchResCommonV1<T>[]
 ```
 
 ### FileInfo
diff --git a/publish/package.json b/publish/package.json
@@ -1,6 +1,6 @@
 {
   "name": "x-crawl",
-  "version": "2.2.0",
+  "version": "2.2.1",
   "author": "coderHXL",
   "description": "XCrawl is a Nodejs multifunctional crawler library.",
   "license": "MIT",
diff --git a/src/types/index.ts b/src/types/index.ts
@@ -9,7 +9,6 @@ import {
   StartPollingConfig,
   IntervalTime
 } from './api'
-import { MapTypeObject } from './common'
 
 export interface XCrawlBaseConfig {
   baseUrl?: string
@@ -19,13 +18,10 @@ export interface XCrawlBaseConfig {
   proxy?: string
 }
 
-interface LoaderXCrawlBaseConfigValue {
+export type LoaderXCrawlBaseConfig = XCrawlBaseConfig & {
   mode: 'async' | 'sync'
 }
 
-export type LoaderXCrawlBaseConfig = XCrawlBaseConfig &
-  MapTypeObject<LoaderXCrawlBaseConfigValue>
-
 export interface XCrawlInstance {
   fetchHTML: (
     config: FetchHTMLConfig,

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"private": true,`
`3`	`3`	`"name": "x-crawl",`
`4`		`- "version": "2.2.0",`
	`4`	`+ "version": "2.2.1",`
`5`	`5`	`"author": "coderHXL",`
`6`	`6`	`"description": "XCrawl is a Nodejs multifunctional crawler library.",`
`7`	`7`	`"license": "MIT",`
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "x-crawl",`
`3`		`- "version": "2.2.0",`
	`3`	`+ "version": "2.2.1",`
`4`	`4`	`"author": "coderHXL",`
`5`	`5`	`"description": "XCrawl is a Nodejs multifunctional crawler library.",`
`6`	`6`	`"license": "MIT",`