@@ -45,6 +45,30 @@ class Spider < WebWorker
45
45
accepts Website
46
46
outputs URL
47
47
48
+ param :limit , Integer , desc : 'The maximum number of pages to visit.'
49
+
50
+ param :max_depth , Integer , desc : 'The maximum link depth to follow.'
51
+
52
+ param :delay , Integer , desc : 'The number of seconds to pause between each request.'
53
+
54
+ param :open_timeout , Integer , desc : 'Optional open connection timeout.'
55
+
56
+ param :read_timeout , Integer , desc : 'Optional read timeout.'
57
+
58
+ param :ssl_timeout , Integer , desc : 'Optional SSL connection timeout.'
59
+
60
+ param :continue_timeout , Integer , desc : 'Optional continue timeout.'
61
+
62
+ param :keep_alive_timeout , Integer , desc : 'Optional `Keep-Alive` timeout.'
63
+
64
+ param :proxy , URI , desc : 'The proxy information to use.'
65
+
66
+ param :referer , String , desc : 'The `Referer` URL to send with each request.'
67
+
68
+ param :user_agent , String , desc : 'The `User-Agent` string to send with each requests.'
69
+
70
+ param :robots , Boolean , desc : 'Specifies whether `robots.txt` should be honored.'
71
+
48
72
#
49
73
# Spiders a website and yields every spidered URL.
50
74
#
@@ -60,7 +84,7 @@ class Spider < WebWorker
60
84
def process ( website )
61
85
base_uri = website . to_uri
62
86
63
- Ronin ::Web ::Spider . site ( base_uri ) do |agent |
87
+ Ronin ::Web ::Spider . site ( base_uri , ** agent_kwargs ) do |agent |
64
88
agent . every_page do |page |
65
89
if VALID_STATUS_CODES . include? ( page . code )
66
90
yield URL . new ( page . url , status : page . code ,
@@ -91,6 +115,13 @@ def process(website)
91
115
end
92
116
end
93
117
118
+ #
119
+ # Returns Hash based on the params
120
+ #
121
+ def agent_kwargs
122
+ params . slice ( :limit , :max_depth , :delay , :open_timeout , :read_timeout , :ssl_timeout , :continue_timeout , :keep_alive_timeout , :proxy , :referer , :user_agent , :robots )
123
+ end
124
+
94
125
end
95
126
end
96
127
end
0 commit comments