-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathINPUT_SCHEMA.json
103 lines (103 loc) · 4.64 KB
/
INPUT_SCHEMA.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
{
"title": "Schema for batch-cancel-accounts actor",
"type": "object",
"schemaVersion": 1,
"properties": {
"domains": {
"title": "Domains",
"type": "string",
"description": "List of domains to crawl. The domains must be naked, i.e. specified without a protocol and sub-domains (e.g. <code>example.com</code>).",
"prefill": "example.com\niana.org\napify.com",
"default": "",
"editor": "textarea"
},
"domainsFileUrl": {
"title": "Domains file URL",
"type": "string",
"description": "URL of a text file that contains the list of domains to crawl. The domains must be naked, i.e. specified without a protocol and sub-domains (e.g. <code>example.com</code>).<br><br>This field is useful if you have a large number of domains.",
"editor": "textfield"
},
"domainsFileOffset": {
"title": "Domains file offset",
"type": "integer",
"description": "Indicates how many domains from the file should be skipped in the beginning. This is useful if you only want to crawl a portion of the domains.",
"default": 0,
"minimum": 0,
"maximum": 999999999
},
"domainsFileCount": {
"title": "Domains file count",
"type": "integer",
"description": "Indicates how many domains from the file should be crawled, starting from the offset. This is useful if you only want to crawl a portion of the domains. Leave empty to crawl all domains.",
"minimum": 0,
"maximum": 999999999
},
"useChrome": {
"title": "Use Chrome",
"type": "boolean",
"description": "If checked, the actor uses Chrome instead of Puppeteer's Chromium for the crawling. This might help to prevent blocking of some pages.",
"default": false
},
"useApifyProxy": {
"title": "Use Apify Proxy",
"type": "boolean",
"description": "If checked, the actor uses Apify Proxy to access the target pages. This might help to prevent blocking of some pages.",
"default": false
},
"maxRequestRetries": {
"title": "Max page retries",
"type": "integer",
"description": "Indicates how many times shall the crawler retry to load a page on error.",
"prefill": 2,
"default": 0,
"minimum": 0,
"maximum": 5
},
"crawlLinkCount": {
"title": "Crawl domain links",
"type": "integer",
"description": "Indicates how many links from the main page going to the same domain shall also be crawled.",
"prefill": 5,
"default": 0,
"minimum": 0,
"maximum": 100
},
"crawlHttpsVersion": {
"title": "Crawl HTTPS version",
"type": "boolean",
"description": "If checked, the actor attempts to crawl HTTPs version of the website (e.g. <code>https://example.com</code> for domain <code>example.com</code>).",
"default": true
},
"crawlWwwSubdomain": {
"title": "Crawl www. sub-domain",
"type": "boolean",
"description": "If checked, the actor attempts to crawl <code>www.</code> sub-domain of the website (e.g. <code>http://www.example.com</code> for domain <code>example.com</code>).",
"default": true
},
"saveScreenshot": {
"title": "Save screenshots",
"type": "boolean",
"description": "If checked, the actor stores screenshots of all loaded pages into the key-value store.",
"default": true
},
"saveHtml": {
"title": "Save HTML content",
"type": "boolean",
"description": "If checked, the actor stores HTML content of all loaded pages into the key-value store.",
"default": true
},
"saveText": {
"title": "Save text content",
"type": "boolean",
"description": "If checked, the actor stores text content of all loaded pages into the dataset results.",
"default": true
},
"considerChildFrames": {
"title": "Consider child frames",
"type": "boolean",
"description": "If checked, the actor searches for social handles even in the content of the first-level child frames. The 'page.text' also contains the combined text of the main frame and direct child frames.",
"default": false
}
},
"required": []
}