@@ -8,17 +8,22 @@ import (
8
8
"net/http/httputil"
9
9
"net/url"
10
10
"strings"
11
+ "sync"
12
+ "time"
11
13
12
14
"github.com/go-rod/rod"
15
+ "github.com/go-rod/rod/lib/proto"
13
16
"github.com/mileusna/useragent"
14
17
)
15
18
16
19
type Bartender struct {
17
- addr string
18
- target * url.URL
19
- proxy * httputil.ReverseProxy
20
- bypassList map [string ]bool
21
- pool rod.PagePool
20
+ addr string
21
+ target * url.URL
22
+ proxy * httputil.ReverseProxy
23
+ bypassList map [string ]bool
24
+ pool rod.PagePool
25
+ blockRequests []string
26
+ maxWait time.Duration
22
27
}
23
28
24
29
func New (addr , target string , poolSize int ) * Bartender {
@@ -45,14 +50,53 @@ func New(addr, target string, poolSize int) *Bartender {
45
50
useragent .Edge : true ,
46
51
useragent .Vivaldi : true ,
47
52
},
48
- pool : rod .NewPagePool (poolSize ),
53
+ pool : rod .NewPagePool (poolSize ),
54
+ blockRequests : []string {},
55
+ maxWait : 3 * time .Second ,
49
56
}
50
57
}
51
58
52
59
func (b * Bartender ) BypassUserAgentNames (list map [string ]bool ) {
53
60
b .bypassList = list
54
61
}
55
62
63
+ func (b * Bartender ) BlockRequest (patterns ... string ) {
64
+ b .blockRequests = patterns
65
+ }
66
+
67
+ // MaxWait sets the max wait time for the headless browser to render the page.
68
+ // If the max wait time is reached, bartender will stop waiting for page rendering and
69
+ // immediately return the current html.
70
+ func (b * Bartender ) MaxWait (d time.Duration ) {
71
+ b .maxWait = d
72
+ }
73
+
74
+ func (b * Bartender ) newPage () * rod.Page {
75
+ page := rod .New ().MustConnect ().MustPage ()
76
+
77
+ if len (b .blockRequests ) > 0 {
78
+ router := page .HijackRequests ()
79
+
80
+ for _ , pattern := range b .blockRequests {
81
+ router .MustAdd (pattern , func (ctx * rod.Hijack ) {
82
+ ctx .Response .Fail (proto .NetworkErrorReasonBlockedByClient )
83
+ })
84
+ }
85
+
86
+ go router .Run ()
87
+ }
88
+
89
+ log .Println ("headless browser started:" , page .SessionID )
90
+
91
+ return page
92
+ }
93
+
94
+ func (b * Bartender ) WarnUp () {
95
+ for i := 0 ; i < len (b .pool ); i ++ {
96
+ b .pool .Put (b .pool .Get (b .newPage ))
97
+ }
98
+ }
99
+
56
100
func (b * Bartender ) ServeHTTP (w http.ResponseWriter , r * http.Request ) {
57
101
ua := useragent .Parse (r .Header .Get ("User-Agent" ))
58
102
if r .Method != http .MethodGet || b .bypassList [ua .Name ] {
@@ -80,11 +124,6 @@ func (b *Bartender) RenderPage(w http.ResponseWriter, r *http.Request) bool {
80
124
81
125
log .Println ("headless render:" , u )
82
126
83
- page := b .pool .Get (func () * rod.Page { return rod .New ().MustConnect ().MustPage () })
84
- defer b .pool .Put (page )
85
-
86
- page .MustNavigate (u ).MustWaitStable ()
87
-
88
127
for k , vs := range resHeader {
89
128
if k == "Content-Length" {
90
129
continue
@@ -97,10 +136,34 @@ func (b *Bartender) RenderPage(w http.ResponseWriter, r *http.Request) bool {
97
136
98
137
w .WriteHeader (statusCode )
99
138
100
- _ , err := w .Write ([]byte (page .MustHTML ()))
101
- if err != nil {
102
- panic (err )
103
- }
139
+ page := b .pool .Get (b .newPage )
140
+ defer b .pool .Put (page )
141
+
142
+ page , cancel := page .WithCancel ()
143
+
144
+ once := sync.Once {}
145
+
146
+ go func () {
147
+ time .Sleep (b .maxWait )
148
+ log .Println ("max wait time reached, return current html:" , u )
149
+ once .Do (func () {
150
+ body , _ := page .HTML ()
151
+ _ , _ = w .Write ([]byte (body ))
152
+ cancel ()
153
+ })
154
+ }()
155
+
156
+ _ = page .Context (r .Context ()).Navigate (u )
157
+
158
+ _ = page .WaitStable (time .Second )
159
+
160
+ body , _ := page .HTML ()
161
+
162
+ log .Println ("headless render done:" , u )
163
+
164
+ once .Do (func () {
165
+ _ , _ = w .Write ([]byte (body ))
166
+ })
104
167
105
168
return true
106
169
}
0 commit comments