@@ -45,6 +45,30 @@ class Spider < WebWorker
4545 accepts Website
4646 outputs URL
4747
48+ param :limit , Integer , desc : 'The maximum number of pages to visit.'
49+
50+ param :max_depth , Integer , desc : 'The maximum link depth to follow.'
51+
52+ param :delay , Integer , desc : 'The number of seconds to pause between each request.'
53+
54+ param :open_timeout , Integer , desc : 'Optional open connection timeout.'
55+
56+ param :read_timeout , Integer , desc : 'Optional read timeout.'
57+
58+ param :ssl_timeout , Integer , desc : 'Optional SSL connection timeout.'
59+
60+ param :continue_timeout , Integer , desc : 'Optional continue timeout.'
61+
62+ param :keep_alive_timeout , Integer , desc : 'Optional `Keep-Alive` timeout.'
63+
64+ param :proxy , URI , desc : 'The proxy information to use.'
65+
66+ param :referer , String , desc : 'The `Referer` URL to send with each request.'
67+
68+ param :user_agent , String , desc : 'The `User-Agent` string to send with each requests.'
69+
70+ param :robots , Boolean , desc : 'Specifies whether `robots.txt` should be honored.'
71+
4872 #
4973 # Spiders a website and yields every spidered URL.
5074 #
@@ -60,7 +84,7 @@ class Spider < WebWorker
6084 def process ( website )
6185 base_uri = website . to_uri
6286
63- Ronin ::Web ::Spider . site ( base_uri ) do |agent |
87+ Ronin ::Web ::Spider . site ( base_uri , ** agent_kwargs ) do |agent |
6488 agent . every_page do |page |
6589 if VALID_STATUS_CODES . include? ( page . code )
6690 yield URL . new ( page . url , status : page . code ,
@@ -91,6 +115,13 @@ def process(website)
91115 end
92116 end
93117
118+ #
119+ # Returns Hash based on the params
120+ #
121+ def agent_kwargs
122+ params . slice ( :limit , :max_depth , :delay , :open_timeout , :read_timeout , :ssl_timeout , :continue_timeout , :keep_alive_timeout , :proxy , :referer , :user_agent , :robots )
123+ end
124+
94125 end
95126 end
96127 end
0 commit comments