Skip to content

Commit b2e9f07

Browse files
committed
order adjustment
1 parent 6e2947d commit b2e9f07

File tree

5 files changed

+130
-118
lines changed

5 files changed

+130
-118
lines changed

README.md

+50-50
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,9 @@ Create a crawler instance via new XCrawl.
4545
class XCrawl {
4646
private readonly baseConfig
4747
constructor(baseConfig?: IXCrawlBaseConifg)
48+
fetchHTML(config: string | IFetchHTMLConfig): Promise<JSDOM>
4849
fetchData<T = any>(config: IFetchDataConfig): Promise<IFetchCommon<T>>
4950
fetchFile(config: IFetchFileConfig): Promise<IFetchCommon<IFileInfo>>
50-
fetchHTML(config: string | IFetchHTMLConfig): Promise<JSDOM>
5151
}
5252
```
5353
@@ -67,6 +67,24 @@ const myXCrawl = new XCrawl({
6767
})
6868
```
6969
70+
### fetchHTML
71+
72+
fetchHTML is the method of the above <a href="#myXCrawl" style="text-decoration: none">myXCrawl</a> instance, usually used to crawl HTML.
73+
74+
- Type
75+
76+
```ts
77+
function fetchHTML(config: string | IFetchHTMLConfig): Promise<JSDOM>
78+
```
79+
80+
- Example
81+
82+
```js
83+
myXCrawl.fetchHTML('/xxx').then((jsdom) => {
84+
console.log(jsdom.window.document.querySelector('title')?.textContent)
85+
})
86+
```
87+
7088
### fetchData
7189
7290
fetchData is the method of the above <a href="#myXCrawl" style="text-decoration: none">myXCrawl</a> instance, which is usually used to crawl APIs to obtain JSON data and so on.
@@ -123,24 +141,6 @@ myXCrawl.fetchFile({
123141
})
124142
```
125143
126-
### fetchHTML
127-
128-
fetchHTML is the method of the above <a href="#myXCrawl" style="text-decoration: none">myXCrawl</a> instance, usually used to crawl HTML.
129-
130-
- Type
131-
132-
```ts
133-
function fetchHTML(config: string | IFetchHTMLConfig): Promise<JSDOM>
134-
```
135-
136-
- Example
137-
138-
```js
139-
myXCrawl.fetchHTML('/xxx').then((jsdom) => {
140-
console.log(jsdom.window.document.querySelector('title')?.textContent)
141-
})
142-
```
143-
144144
## Types
145145
146146
- IAnyObject
@@ -220,6 +220,12 @@ interface IXCrawlBaseConifg {
220220
}
221221
```
222222
223+
- IFetchHTMLConfig
224+
225+
```ts
226+
interface IFetchHTMLConfig extends IRequestConfig {}
227+
```
228+
223229
- IFetchDataConfig
224230
225231
```ts
@@ -237,12 +243,6 @@ interface IFetchFileConfig extends IFetchBaseConifg {
237243
}
238244
```
239245
240-
- IFetchHTMLConfig
241-
242-
```ts
243-
interface IFetchHTMLConfig extends IRequestConfig {}
244-
```
245-
246246
## More
247247
248248
If you have any **questions** or **needs** , please submit **Issues in** https://github.com/coder-hxl/x-crawl/issues .
@@ -298,9 +298,9 @@ docsXCrawl.fetchHTML('/zh/get-started').then((jsdom) => {
298298
class XCrawl {
299299
private readonly baseConfig
300300
constructor(baseConfig?: IXCrawlBaseConifg)
301+
fetchHTML(config: string | IFetchHTMLConfig): Promise<JSDOM>
301302
fetchData<T = any>(config: IFetchDataConfig): Promise<IFetchCommon<T>>
302303
fetchFile(config: IFetchFileConfig): Promise<IFetchCommon<IFileInfo>>
303-
fetchHTML(config: string | IFetchHTMLConfig): Promise<JSDOM>
304304
}
305305
```
306306

@@ -347,6 +347,24 @@ myXCrawl.fetchData({
347347
})
348348
```
349349

350+
### fetchHTML
351+
352+
fetchHTML 是上面 <a href="#cn-myXCrawl" style="text-decoration: none">myXCrawl</a> 实例的方法,通常用于爬取 HTML
353+
354+
- 类型
355+
356+
```ts
357+
function fetchHTML(config: string | IFetchHTMLConfig): Promise<JSDOM>
358+
```
359+
360+
- 示例
361+
362+
```js
363+
myXCrawl.fetchHTML('/xxx').then((jsdom) => {
364+
console.log(jsdom.window.document.querySelector('title')?.textContent)
365+
})
366+
```
367+
350368
### fetchFile
351369

352370
fetchFile 是上面 <a href="#cn-myXCrawl" style="text-decoration: none">myXCrawl</a> 实例的方法,通常用于爬取文件,可获取图片、pdf 文件等等。
@@ -376,24 +394,6 @@ myXCrawl.fetchFile({
376394
})
377395
```
378396

379-
### fetchHTML
380-
381-
fetchHTML 是上面 <a href="#cn-myXCrawl" style="text-decoration: none">myXCrawl</a> 实例的方法,通常用于爬取 HTML
382-
383-
- 类型
384-
385-
```ts
386-
function fetchHTML(config: string | IFetchHTMLConfig): Promise<JSDOM>
387-
```
388-
389-
- 示例
390-
391-
```js
392-
myXCrawl.fetchHTML('/xxx').then((jsdom) => {
393-
console.log(jsdom.window.document.querySelector('title')?.textContent)
394-
})
395-
```
396-
397397
## 类型
398398

399399
- IAnyObject
@@ -473,6 +473,12 @@ interface IXCrawlBaseConifg {
473473
}
474474
```
475475

476+
- IFetchHTMLConfig
477+
478+
```ts
479+
interface IFetchHTMLConfig extends IRequestConfig {}
480+
```
481+
476482
- IFetchDataConfig
477483

478484
```ts
@@ -490,12 +496,6 @@ interface IFetchFileConfig extends IFetchBaseConifg {
490496
}
491497
```
492498

493-
- IFetchHTMLConfig
494-
495-
```ts
496-
interface IFetchHTMLConfig extends IRequestConfig {}
497-
```
498-
499499
## 更多
500500

501501
如有 **问题****需求** 请在 https://github.com/coder-hxl/x-crawl/issues 中提 **Issues** 。

publish/README.md

+50-38
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,9 @@ Create a crawler instance via new XCrawl.
4545
class XCrawl {
4646
private readonly baseConfig
4747
constructor(baseConfig?: IXCrawlBaseConifg)
48+
fetchHTML(config: string | IFetchHTMLConfig): Promise<JSDOM>
4849
fetchData<T = any>(config: IFetchDataConfig): Promise<IFetchCommon<T>>
4950
fetchFile(config: IFetchFileConfig): Promise<IFetchCommon<IFileInfo>>
50-
fetchHTML(url: string): Promise<JSDOM>
5151
}
5252
```
5353
@@ -67,6 +67,24 @@ const myXCrawl = new XCrawl({
6767
})
6868
```
6969
70+
### fetchHTML
71+
72+
fetchHTML is the method of the above <a href="#myXCrawl" style="text-decoration: none">myXCrawl</a> instance, usually used to crawl HTML.
73+
74+
- Type
75+
76+
```ts
77+
function fetchHTML(config: string | IFetchHTMLConfig): Promise<JSDOM>
78+
```
79+
80+
- Example
81+
82+
```js
83+
myXCrawl.fetchHTML('/xxx').then((jsdom) => {
84+
console.log(jsdom.window.document.querySelector('title')?.textContent)
85+
})
86+
```
87+
7088
### fetchData
7189
7290
fetchData is the method of the above <a href="#myXCrawl" style="text-decoration: none">myXCrawl</a> instance, which is usually used to crawl APIs to obtain JSON data and so on.
@@ -123,24 +141,6 @@ myXCrawl.fetchFile({
123141
})
124142
```
125143
126-
### fetchHTML
127-
128-
fetchHTML is the method of the above <a href="#myXCrawl" style="text-decoration: none">myXCrawl</a> instance, usually used to crawl HTML.
129-
130-
- Type
131-
132-
```ts
133-
function fetchHTML(url: string): Promise<JSDOM>
134-
```
135-
136-
- Example
137-
138-
```js
139-
myXCrawl.fetchHTML('/xxx').then((jsdom) => {
140-
console.log(jsdom.window.document.querySelector('title')?.textContent)
141-
})
142-
```
143-
144144
## Types
145145
146146
- IAnyObject
@@ -220,6 +220,12 @@ interface IXCrawlBaseConifg {
220220
}
221221
```
222222
223+
- IFetchHTMLConfig
224+
225+
```ts
226+
interface IFetchHTMLConfig extends IRequestConfig {}
227+
```
228+
223229
- IFetchDataConfig
224230
225231
```ts
@@ -292,9 +298,9 @@ docsXCrawl.fetchHTML('/zh/get-started').then((jsdom) => {
292298
class XCrawl {
293299
private readonly baseConfig
294300
constructor(baseConfig?: IXCrawlBaseConifg)
301+
fetchHTML(config: string | IFetchHTMLConfig): Promise<JSDOM>
295302
fetchData<T = any>(config: IFetchDataConfig): Promise<IFetchCommon<T>>
296303
fetchFile(config: IFetchFileConfig): Promise<IFetchCommon<IFileInfo>>
297-
fetchHTML(url: string): Promise<JSDOM>
298304
}
299305
```
300306

@@ -341,6 +347,24 @@ myXCrawl.fetchData({
341347
})
342348
```
343349

350+
### fetchHTML
351+
352+
fetchHTML 是上面 <a href="#cn-myXCrawl" style="text-decoration: none">myXCrawl</a> 实例的方法,通常用于爬取 HTML
353+
354+
- 类型
355+
356+
```ts
357+
function fetchHTML(config: string | IFetchHTMLConfig): Promise<JSDOM>
358+
```
359+
360+
- 示例
361+
362+
```js
363+
myXCrawl.fetchHTML('/xxx').then((jsdom) => {
364+
console.log(jsdom.window.document.querySelector('title')?.textContent)
365+
})
366+
```
367+
344368
### fetchFile
345369

346370
fetchFile 是上面 <a href="#cn-myXCrawl" style="text-decoration: none">myXCrawl</a> 实例的方法,通常用于爬取文件,可获取图片、pdf 文件等等。
@@ -370,24 +394,6 @@ myXCrawl.fetchFile({
370394
})
371395
```
372396

373-
### fetchHTML
374-
375-
fetchHTML 是上面 <a href="#cn-myXCrawl" style="text-decoration: none">myXCrawl</a> 实例的方法,通常用于爬取 HTML
376-
377-
- 类型
378-
379-
```ts
380-
function fetchHTML(url: string): Promise<JSDOM>
381-
```
382-
383-
- 示例
384-
385-
```js
386-
myXCrawl.fetchHTML('/xxx').then((jsdom) => {
387-
console.log(jsdom.window.document.querySelector('title')?.textContent)
388-
})
389-
```
390-
391397
## 类型
392398

393399
- IAnyObject
@@ -467,6 +473,12 @@ interface IXCrawlBaseConifg {
467473
}
468474
```
469475

476+
- IFetchHTMLConfig
477+
478+
```ts
479+
interface IFetchHTMLConfig extends IRequestConfig {}
480+
```
481+
470482
- IFetchDataConfig
471483

472484
```ts

src/index.ts

+17-17
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ import { isArray, isString, isUndefined } from './utils'
77

88
import {
99
IXCrawlBaseConifg,
10+
IFetchHTMLConfig,
1011
IFetchDataConfig,
1112
IFetchFileConfig,
12-
IFetchHTMLConfig,
1313
IFetchBaseConifg,
1414
IFileInfo,
1515
IFetchCommon,
@@ -57,6 +57,22 @@ export default class XCrawl {
5757
this.baseConfig = baseConfig
5858
}
5959

60+
async fetchHTML(config: string | IFetchHTMLConfig): Promise<JSDOM> {
61+
const rawRequestConifg: IFetchHTMLConfig = isString(config)
62+
? { url: config }
63+
: config
64+
65+
const { requestConifg } = mergeConfig(this.baseConfig, {
66+
requestConifg: rawRequestConifg
67+
})
68+
69+
const requestResItem = await request(requestConifg)
70+
71+
const dom = new JSDOM(requestResItem.data)
72+
73+
return dom
74+
}
75+
6076
async fetchData<T = any>(config: IFetchDataConfig): Promise<IFetchCommon<T>> {
6177
const { requestConifg, intervalTime } = mergeConfig(this.baseConfig, config)
6278

@@ -145,20 +161,4 @@ export default class XCrawl {
145161
batchRequest(requestConfigQueue, intervalTime, batchRequestResHandle)
146162
})
147163
}
148-
149-
async fetchHTML(config: string | IFetchHTMLConfig): Promise<JSDOM> {
150-
const rawRequestConifg: IFetchHTMLConfig = isString(config)
151-
? { url: config }
152-
: config
153-
154-
const { requestConifg } = mergeConfig(this.baseConfig, {
155-
requestConifg: rawRequestConifg
156-
})
157-
158-
const requestResItem = await request(requestConifg)
159-
160-
const dom = new JSDOM(requestResItem.data)
161-
162-
return dom
163-
}
164164
}

src/types.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ export interface IFetchBaseConifg {
6666
intervalTime?: IIntervalTime
6767
}
6868

69+
export interface IFetchHTMLConfig extends IRequestConfig {}
70+
6971
export interface IFetchDataConfig extends IFetchBaseConifg {}
7072

7173
export interface IFetchFileConfig extends IFetchBaseConifg {
@@ -74,8 +76,6 @@ export interface IFetchFileConfig extends IFetchBaseConifg {
7476
}
7577
}
7678

77-
export interface IFetchHTMLConfig extends IRequestConfig {}
78-
7979
export interface IFileInfo {
8080
fileName: string
8181
mimeType: string

0 commit comments

Comments
 (0)