Skip to content

Commit d8b0aef

Browse files
author
mozi
committed
Add more params to web/spider
1 parent 83b2f61 commit d8b0aef

File tree

2 files changed

+69
-1
lines changed

2 files changed

+69
-1
lines changed

Diff for: lib/ronin/recon/builtin/web/spider.rb

+32-1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,30 @@ class Spider < WebWorker
4545
accepts Website
4646
outputs URL
4747

48+
param :limit, Integer, desc: 'The maximum number of pages to visit.'
49+
50+
param :max_depth, Integer, desc: 'The maximum link depth to follow.'
51+
52+
param :delay, Integer, desc: 'The number of seconds to pause between each request.'
53+
54+
param :open_timeout, Integer, desc: 'Optional open connection timeout.'
55+
56+
param :read_timeout, Integer, desc: 'Optional read timeout.'
57+
58+
param :ssl_timeout, Integer, desc: 'Optional SSL connection timeout.'
59+
60+
param :continue_timeout, Integer, desc: 'Optional continue timeout.'
61+
62+
param :keep_alive_timeout, Integer, desc: 'Optional `Keep-Alive` timeout.'
63+
64+
param :proxy, URI, desc: 'The proxy information to use.'
65+
66+
param :referer, String, desc: 'The `Referer` URL to send with each request.'
67+
68+
param :user_agent, String, desc: 'The `User-Agent` string to send with each requests.'
69+
70+
param :robots, Boolean, desc: 'Specifies whether `robots.txt` should be honored.'
71+
4872
#
4973
# Spiders a website and yields every spidered URL.
5074
#
@@ -60,7 +84,7 @@ class Spider < WebWorker
6084
def process(website)
6185
base_uri = website.to_uri
6286

63-
Ronin::Web::Spider.site(base_uri) do |agent|
87+
Ronin::Web::Spider.site(base_uri, **agent_kwargs) do |agent|
6488
agent.every_page do |page|
6589
if VALID_STATUS_CODES.include?(page.code)
6690
yield URL.new(page.url, status: page.code,
@@ -91,6 +115,13 @@ def process(website)
91115
end
92116
end
93117

118+
#
119+
# Returns Hash based on the params
120+
#
121+
def agent_kwargs
122+
params.slice(:limit, :max_depth, :delay, :open_timeout, :read_timeout, :ssl_timeout, :continue_timeout, :keep_alive_timeout, :proxy, :referer, :user_agent, :robots)
123+
end
124+
94125
end
95126
end
96127
end

Diff for: spec/builtin/web/spider_spec.rb

+37
Original file line numberDiff line numberDiff line change
@@ -463,4 +463,41 @@ class AppWithURLsInJavaScript < Sinatra::Base
463463
end
464464
end
465465
end
466+
467+
describe "#agent_kwargs" do
468+
context "when there are params" do
469+
let(:params_hash) do
470+
{
471+
limit: 10,
472+
max_depth: 2,
473+
delay: 5,
474+
open_timeout: 10,
475+
read_timeout: 10,
476+
ssl_timeout: 10,
477+
continue_timeout: 10,
478+
keep_alive_timeout: 10,
479+
proxy: URI('http://proxy.example.com:8080'),
480+
referer: 'http://example.com/',
481+
user_agent: 'Test User Agent',
482+
robots: true
483+
}
484+
end
485+
486+
subject { described_class.new(params: params_hash) }
487+
488+
it "must return a Hash with the params" do
489+
expect(subject.agent_kwargs).to eq(params_hash)
490+
end
491+
end
492+
493+
context "when there are no params" do
494+
let(:params_hash) { {} }
495+
496+
subject { described_class.new(params: params_hash) }
497+
498+
it "must return an empty Hash" do
499+
expect(subject.agent_kwargs).to eq(params_hash)
500+
end
501+
end
502+
end
466503
end

0 commit comments

Comments
 (0)