Let site-crawl, crawl. So you don't have to!
npm install -g site-crawl
-u, --url Start URL
-p, --parallel Parallel downloads (default: "5")
-d, --depth Max crawl depth (number or "infinite") (default: "3")
-f, --filetypes Comma-separated extensions (default: .pdf) (default: ".pdf")
-H, --header <header...> Custom headers (Key:Value)
-P, --password PDF decryption password
-o, --output Output directory (default: ".")
-t, --timeout timeout (default: "10000")
-r, --max-retries max_retries (default: "3")
-e, --errors Error log file (default: "errors.txt")
--dynamic Use Puppeteer to render JS-based pages
-h, --help display help for command
site-crawl -u "https://example.com/start" -f .pdf,.docx -d infinite -p 10 --dynamic
- with custom headers
site-crawl -u "https://example.com/private" -H "Authorization:Bearer YOUR_TOKEN" -H "Accept-Language:en-US"
- For Javascript based dynamically served sites
site-crawl -u "https://nitgoa.vercel.app/" -d 3 -p 5 -f .pdf --dynamic