-
-
Notifications
You must be signed in to change notification settings - Fork 3.7k
Expand file tree
/
Copy pathindex.mjs
More file actions
105 lines (87 loc) · 2.89 KB
/
Copy pathindex.mjs
File metadata and controls
105 lines (87 loc) · 2.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/env node
/**
* @license Copyright (c) 2003-2025, CKSource Holding sp. z o.o. All rights reserved.
* For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-licensing-options
*/
import minimist from 'minimist';
import { DEFAULT_CONCURRENCY, DEFAULT_TIMEOUT, runCrawler, toArray, isUrlValid } from '@ckeditor/ckeditor5-dev-web-crawler';
const options = parseArguments( process.argv.slice( 2 ) );
runCrawler( options );
/**
* Parses CLI arguments and prepares configuration for the crawler.
*
* @param {Array.<String>} args CLI arguments and options.
* @returns {Object} options
* @returns {String} options.url The URL to start crawling.
* @returns {Number} options.depth Defines how many nested page levels should be examined. Infinity by default.
* @returns {String|Array.<String>} options.exclusions A pattern or array of patterns to exclude links. Empty array by default
* to not exclude anything.
* @returns {Number} options.concurrency Number of concurrent pages (browser tabs) to be used during crawling. By default all
* links are opened one by one, sequentially (concurrency is 1).
* @returns {Boolean} options.silent Whether to display status in real-time or only the result at the end of the process.
*/
function parseArguments( args ) {
const config = {
string: [
'url',
'depth',
'exclusions',
'concurrency',
'timeout'
],
boolean: [
'docs',
'manual',
'silent'
],
alias: {
u: 'url',
d: 'depth',
e: 'exclusions',
c: 'concurrency',
t: 'timeout'
},
default: {
silent: false
}
};
const parsedOptions = minimist( args, config );
if ( parsedOptions.docs && parsedOptions.manual ) {
throw new Error( 'Mutually exclusive --docs and --manual arguments.' );
}
const defaultOptionsForDocs = minimist( [
'-u', 'http://fake.ckeditor.com:8080/ckeditor5/latest/',
'-e', '/ckfinder/',
'-e', '/api/',
'-e', '/assets/',
'-c', DEFAULT_CONCURRENCY,
'-t', DEFAULT_TIMEOUT
], config );
const defaultOptionsForManual = minimist( [
'-u', 'http://localhost:8125/',
'-d', 1,
'-c', DEFAULT_CONCURRENCY,
'-t', DEFAULT_TIMEOUT * 2
], config );
const options = {};
if ( parsedOptions.docs ) {
Object.assign( options, defaultOptionsForDocs, parsedOptions );
}
if ( parsedOptions.manual ) {
Object.assign( options, defaultOptionsForManual, parsedOptions );
}
if ( !options.url ) {
throw new Error( 'Missing required --url argument.' );
}
if ( !isUrlValid( options.url ) ) {
throw new Error( 'Provided --url argument is not a valid URL.' );
}
return {
url: options.url,
depth: options.depth ? Number( options.depth ) : Infinity,
exclusions: options.exclusions ? toArray( options.exclusions ).filter( exclusion => exclusion.length > 0 ) : [],
timeout: options.timeout ? Number( options.timeout ) : DEFAULT_TIMEOUT,
concurrency: options.concurrency ? Number( options.concurrency ) : 1,
silent: options.silent
};
}