Skip to content

Commit 6bcb172

Browse files
committed
Sorting of error messages and fetchData/fetchFile API results
1 parent f843c9f commit 6bcb172

File tree

7 files changed

+93
-28
lines changed

7 files changed

+93
-28
lines changed

package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"private": true,
33
"name": "x-crawl",
4-
"version": "1.0.0",
4+
"version": "1.0.1",
55
"author": "CoderHxl",
66
"description": "XCrawl is a Nodejs multifunctional crawler library.",
77
"license": "MIT",

publish/package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "x-crawl",
3-
"version": "1.0.0",
3+
"version": "1.0.1",
44
"author": "CoderHxl",
55
"description": "XCrawl is a Nodejs multifunctional crawler library.",
66
"license": "MIT",

src/index.ts

+9-2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import path from 'node:path'
33
import { JSDOM } from 'jsdom'
44

55
import { batchRequest, syncBatchRequest, request } from './request'
6+
import { quickSort } from './sort'
67
import {
78
isArray,
89
isString,
@@ -140,7 +141,10 @@ export default class XCrawl {
140141

141142
await this.useBatchRequestByMode(requestConifg, intervalTime, handleResItem)
142143

143-
return container
144+
const res = quickSort(
145+
container.map((item) => ({ ...item, valueOf: () => item.id }))
146+
)
147+
return res
144148
}
145149

146150
async fetchFile(
@@ -189,8 +193,11 @@ export default class XCrawl {
189193
success
190194
)}, error: ${logError(error)}`
191195
)
196+
const res = quickSort(
197+
container.map((item) => ({ ...item, valueOf: () => item.id }))
198+
)
192199

193-
return container
200+
return res
194201
}
195202

196203
fetchPolling(config: IFetchPollingConfig, callback: (count: number) => void) {

src/request.ts

+15-6
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import https from 'node:https'
33
import Url, { URL } from 'node:url'
44
import HttpsProxyAgent from 'https-proxy-agent'
55

6+
import { quickSort } from './sort'
67
import {
78
isNumber,
89
isUndefined,
@@ -176,7 +177,9 @@ export async function batchRequest(
176177
let index = 0
177178
let successTotal = 0
178179
let errorTotal = 0
179-
const requestQueue: Promise<undefined | string>[] = []
180+
const requestQueue: Promise<void>[] = []
181+
const errorMessage: { id: number; message: string; valueOf: () => number }[] =
182+
[]
180183
for (const requestConifg of requestConifgs) {
181184
const id = ++index
182185

@@ -190,10 +193,15 @@ export async function batchRequest(
190193
const requestItem = request(requestConifg)
191194
.catch((error: any) => {
192195
errorTotal++
193-
return `Request ${id} is an error: ${error.message}`
196+
197+
const message = `Request ${id} is an error: ${error.message}`
198+
// valueOf 为排序做准备
199+
const valueOf = () => id
200+
201+
errorMessage.push({ id, message, valueOf })
194202
})
195203
.then((requestRes) => {
196-
if (typeof requestRes === 'string') return requestRes
204+
if (!requestRes) return
197205

198206
successTotal++
199207
callback({ id, ...requestRes })
@@ -204,10 +212,11 @@ export async function batchRequest(
204212

205213
log(logSuccess('All requests have been sent!'))
206214

207-
const res = await Promise.all(requestQueue)
215+
// 等待所有请求结束
216+
await Promise.all(requestQueue)
208217

209-
// 打印错误消息
210-
res.forEach((item) => (item ? log(logError(item)) : ''))
218+
// 排序后打印错误消息
219+
quickSort(errorMessage).forEach((item) => log(logError(item.message)))
211220

212221
log(
213222
`requestsTotal: ${logNumber(requestConifgs.length)}, success: ${logSuccess(

src/sort.ts

+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
function swap(arr: any[], i: number, j: number) {
2+
const temp = arr[i]
3+
arr[i] = arr[j]
4+
arr[j] = temp
5+
}
6+
7+
export function quickSort<T extends any[]>(arr: T): T {
8+
const n = arr.length
9+
10+
partition(0, n - 1)
11+
12+
function partition(left: number, right: number) {
13+
if (left >= right) return
14+
15+
// 1.找基准元素
16+
const pivot = arr[right]
17+
18+
// 2.定义双指针进行交换(左小右大)
19+
let i = left
20+
let j = right - 1
21+
while (i <= j) {
22+
while (arr[i] < pivot) {
23+
i++
24+
}
25+
26+
while (arr[j] > pivot) {
27+
j--
28+
}
29+
30+
if (i <= j) {
31+
swap(arr, i, j)
32+
i++
33+
j--
34+
}
35+
}
36+
37+
// 3.将 pivot 放到正确位置
38+
swap(arr, i, right)
39+
40+
// 4.左右划分区域
41+
partition(left, i - 1)
42+
partition(i + 1, right)
43+
}
44+
45+
return arr
46+
}
47+
48+
// console.log(quickSort([7, 3, 6, 4, 9, 2, 1, 5]))

test/start/index.js

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

test/start/index.ts

+18-17
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import XCrawl from '../../src'
33

44
const testXCrawl = new XCrawl({
55
timeout: 10000,
6-
intervalTime: { max: 2000, min: 1000 },
6+
intervalTime: { max: 0, min: 0 },
77
mode: 'async'
88
})
99

@@ -17,23 +17,23 @@ const testXCrawl = new XCrawl({
1717
// ]
1818
// })
1919

20-
testXCrawl.fetchPolling({ m: 3 }, () => {
21-
testXCrawl
22-
.fetchHTML('https://www.bilibili.com/guochuang/', (res) => {
23-
console.log('fetchHTML Callback: ', res.statusCode)
24-
})
25-
.then((res) => {
26-
const { jsdom } = res.data
20+
// testXCrawl.fetchPolling({ m: 3 }, () => {
21+
testXCrawl
22+
.fetchHTML('https://www.bilibili.com/guochuang/', (res) => {
23+
console.log('fetchHTML Callback: ', res.statusCode)
24+
})
25+
.then((res) => {
26+
const { jsdom } = res.data
2727

28-
const imgSrc: string[] = []
29-
const recomEls =
30-
jsdom.window.document.querySelectorAll('.chief-recom-item')
31-
recomEls.forEach((item) => imgSrc.push(item.querySelector('img')!.src))
28+
const imgSrc: string[] = []
29+
const recomEls = jsdom.window.document.querySelectorAll('.chief-recom-item')
30+
recomEls.forEach((item) => imgSrc.push(item.querySelector('img')!.src))
3231

33-
const requestConifg = imgSrc.map((src) => ({ url: `https:${src}` }))
34-
requestConifg.pop()
32+
const requestConifg = imgSrc.map((src) => ({ url: `https:${src}` }))
33+
requestConifg.pop()
3534

36-
testXCrawl.fetchFile(
35+
testXCrawl
36+
.fetchFile(
3737
{
3838
requestConifg,
3939
fileConfig: { storeDir: path.resolve(__dirname, './upload') }
@@ -42,5 +42,6 @@ testXCrawl.fetchPolling({ m: 3 }, () => {
4242
console.log(res.id, res.statusCode, res.data.fileName)
4343
}
4444
)
45-
})
46-
})
45+
.then((res) => console.log(res))
46+
})
47+
// })

0 commit comments

Comments
 (0)