@@ -4,11 +4,28 @@ import (
4
4
"context"
5
5
"encoding/json"
6
6
"fmt"
7
+ "net/http"
7
8
"strings"
8
9
9
10
"github.com/gocolly/colly/v2"
10
11
)
11
12
13
+ const defUserAgent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:136.0) Gecko/20100101 Firefox/136.0"
14
+
15
+ var head = make (http.Header )
16
+
17
+ func init () {
18
+ head .Add ("User-Agent" , defUserAgent )
19
+ head .Add ("Accept" , "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" )
20
+ head .Add ("Accept-Language" , "en-US,en;q=0.5" )
21
+ head .Add ("Upgrade-Insecure-Requests" , "1" )
22
+ head .Add ("Sec-Fetch-Site" , "none" )
23
+ head .Add ("Sec-Fetch-User" , "?1" )
24
+ head .Add ("Accept-Encoding" , "gzip, deflate, br, zstd" )
25
+ head .Add ("Priority" , "u=0, i" )
26
+ head .Add ("Te" , "trailers" )
27
+ }
28
+
12
29
// ScrapeDirs scrapes all DoD website directories and saves to Mildew object's Subs field
13
30
func (mw * Mildew ) ScrapeDirs (ctx context.Context ) error {
14
31
dirStream := make (chan string )
@@ -18,6 +35,9 @@ func (mw *Mildew) ScrapeDirs(ctx context.Context) error {
18
35
// Initialize base colly collector to be used by each directory scraper function
19
36
// TODO tune colly options
20
37
c := colly .NewCollector ()
38
+ c .UserAgent = defUserAgent
39
+ c .Headers = & head
40
+
21
41
var err error
22
42
23
43
err = dirDod (c , dirStream )
0 commit comments