Skip to content

Commit ea086c2

Browse files
authored
Merge pull request gocolly#826 from Shinku-Chen/retry
fix : retry redirect to AlreadyVisitedUrl will error, 修正Redirect的retry报AlreadyVisitedError错误
2 parents 3bddea4 + 151ce0a commit ea086c2

File tree

2 files changed

+31
-3
lines changed

2 files changed

+31
-3
lines changed

colly.go

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,10 @@ var collectorCounter uint32
209209
type key int
210210

211211
// ProxyURLKey is the context key for the request proxy address.
212-
const ProxyURLKey key = iota
212+
const (
213+
ProxyURLKey key = iota
214+
CheckRevisitKey
215+
)
213216

214217
var (
215218
// ErrForbiddenDomain is the error thrown if visiting
@@ -667,7 +670,8 @@ func (c *Collector) scrape(u, method string, depth int, requestData io.Reader, c
667670
}
668671
// note: once 1.13 is minimum supported Go version,
669672
// replace this with http.NewRequestWithContext
670-
req = req.WithContext(c.Context)
673+
req = req.WithContext(context.WithValue(c.Context, CheckRevisitKey, checkRevisit))
674+
671675
if err := c.requestCheck(parsedURL, method, req.GetBody, depth, checkRevisit); err != nil {
672676
return err
673677
}
@@ -1477,7 +1481,9 @@ func (c *Collector) checkRedirectFunc() func(req *http.Request, via []*http.Requ
14771481
return err
14781482
}
14791483
if visited {
1480-
return &AlreadyVisitedError{req.URL}
1484+
if checkRevisit, ok := req.Context().Value(CheckRevisitKey).(bool); !ok || checkRevisit {
1485+
return &AlreadyVisitedError{req.URL}
1486+
}
14811487
}
14821488
err = c.store.Visited(uHash)
14831489
if err != nil {

colly_test.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1885,6 +1885,28 @@ func TestCollectorPostRetryUnseekable(t *testing.T) {
18851885
}
18861886
}
18871887

1888+
func TestRedirectErrorRetry(t *testing.T) {
1889+
ts := newTestServer()
1890+
defer ts.Close()
1891+
c := NewCollector()
1892+
c.OnError(func(r *Response, err error) {
1893+
if r.Ctx.Get("notFirst") == "" {
1894+
r.Ctx.Put("notFirst", "first")
1895+
_ = r.Request.Retry()
1896+
return
1897+
}
1898+
if e := (&AlreadyVisitedError{}); errors.As(err, &e) {
1899+
t.Error("loop AlreadyVisitedError")
1900+
}
1901+
1902+
})
1903+
c.OnResponse(func(response *Response) {
1904+
//println(1)
1905+
})
1906+
c.Visit(ts.URL + "/redirected/")
1907+
c.Visit(ts.URL + "/redirect")
1908+
}
1909+
18881910
func TestCheckRequestHeadersFunc(t *testing.T) {
18891911
ts := newTestServer()
18901912
defer ts.Close()

0 commit comments

Comments
 (0)