Skip to content

Commit 2d141b8

Browse files
committed
Retry http requests when crawling
1 parent 95064f1 commit 2d141b8

File tree

4 files changed

+80
-1
lines changed

4 files changed

+80
-1
lines changed

crawler/crawler.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"strings"
1414
"sync"
1515

16+
"github.com/hashicorp/go-retryablehttp"
1617
"github.com/tschaub/workgroup"
1718
)
1819

@@ -64,7 +65,7 @@ func loadFile(resourcePath string, value any) error {
6465
}
6566

6667
func loadUrl(resourceUrl string, value any) error {
67-
resp, err := http.DefaultClient.Get(resourceUrl)
68+
resp, err := retryablehttp.Get(resourceUrl)
6869
if err != nil {
6970
return err
7071
}

crawler/crawler_test.go

+69
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,16 @@ package crawler_test
33
import (
44
"context"
55
"fmt"
6+
"net/http"
7+
"net/http/httptest"
8+
"strings"
69
"sync"
710
"sync/atomic"
811
"testing"
912

1013
"github.com/planetlabs/go-stac/crawler"
1114
"github.com/stretchr/testify/assert"
15+
"github.com/stretchr/testify/require"
1216
)
1317

1418
func TestCrawler(t *testing.T) {
@@ -31,6 +35,71 @@ func TestCrawler(t *testing.T) {
3135
assert.Equal(t, uint64(3), count)
3236
}
3337

38+
func TestCrawlerHTTP(t *testing.T) {
39+
server := httptest.NewServer(http.FileServer(http.Dir("testdata")))
40+
defer server.Close()
41+
42+
count := uint64(0)
43+
visited := &sync.Map{}
44+
45+
visitor := func(location string, resource crawler.Resource) error {
46+
atomic.AddUint64(&count, 1)
47+
_, loaded := visited.LoadOrStore(location, true)
48+
if loaded {
49+
return fmt.Errorf("already visited %s", location)
50+
}
51+
return nil
52+
}
53+
c := crawler.New(visitor)
54+
55+
err := c.Crawl(context.Background(), server.URL+"/v1.0.0/catalog-with-collection-of-items.json")
56+
assert.NoError(t, err)
57+
58+
assert.Equal(t, uint64(3), count)
59+
60+
_, visitedCatalog := visited.Load(server.URL + "/v1.0.0/catalog-with-collection-of-items.json")
61+
assert.True(t, visitedCatalog)
62+
63+
_, visitedCollection := visited.Load(server.URL + "/v1.0.0/collection-with-items.json")
64+
assert.True(t, visitedCollection)
65+
66+
_, visitedItem := visited.Load(server.URL + "/v1.0.0/item-in-collection.json")
67+
assert.True(t, visitedItem)
68+
}
69+
70+
func TestCrawlerHTTPRetry(t *testing.T) {
71+
72+
tried := false
73+
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
74+
if !tried {
75+
tried = true
76+
w.WriteHeader(http.StatusInternalServerError)
77+
return
78+
}
79+
w.WriteHeader(http.StatusNotImplemented) // stop trying
80+
}))
81+
defer server.Close()
82+
83+
count := uint64(0)
84+
visited := &sync.Map{}
85+
visitor := func(location string, resource crawler.Resource) error {
86+
atomic.AddUint64(&count, 1)
87+
_, loaded := visited.LoadOrStore(location, true)
88+
if loaded {
89+
return fmt.Errorf("already visited %s", location)
90+
}
91+
return nil
92+
}
93+
c := crawler.New(visitor)
94+
95+
err := c.Crawl(context.Background(), server.URL+"/not-found")
96+
require.Error(t, err)
97+
98+
assert.True(t, strings.HasPrefix(err.Error(), "unexpected response"))
99+
assert.Equal(t, uint64(0), count)
100+
assert.True(t, tried)
101+
}
102+
34103
func TestCrawlerSingle(t *testing.T) {
35104
count := uint64(0)
36105
visited := &sync.Map{}

go.mod

+2
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ require (
66
github.com/dlclark/regexp2 v1.4.0
77
github.com/go-logr/logr v1.2.3
88
github.com/go-logr/zapr v1.2.3
9+
github.com/hashicorp/go-retryablehttp v0.7.1
910
github.com/santhosh-tekuri/jsonschema/v5 v5.0.0
1011
github.com/schollz/progressbar/v3 v3.8.6
1112
github.com/stretchr/testify v1.7.1
@@ -18,6 +19,7 @@ require (
1819
require (
1920
github.com/cpuguy83/go-md2man/v2 v2.0.1 // indirect
2021
github.com/davecgh/go-spew v1.1.1 // indirect
22+
github.com/hashicorp/go-cleanhttp v0.5.1 // indirect
2123
github.com/mattn/go-runewidth v0.0.13 // indirect
2224
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
2325
github.com/pmezard/go-difflib v1.0.0 // indirect

go.sum

+7
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,12 @@ github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0=
1313
github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
1414
github.com/go-logr/zapr v1.2.3 h1:a9vnzlIBPQBBkeaR9IuMUfmVOrQlkoC4YfPoFkX3T7A=
1515
github.com/go-logr/zapr v1.2.3/go.mod h1:eIauM6P8qSvTw5o2ez6UEAfGjQKrxQTl5EoK+Qa2oG4=
16+
github.com/hashicorp/go-cleanhttp v0.5.1 h1:dH3aiDG9Jvb5r5+bYHsikaOUIpcM0xvgMXVoDkXMzJM=
17+
github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
18+
github.com/hashicorp/go-hclog v0.9.2 h1:CG6TE5H9/JXsFWJCfoIVpKFIkFe6ysEuHirp4DxCsHI=
19+
github.com/hashicorp/go-hclog v0.9.2/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ=
20+
github.com/hashicorp/go-retryablehttp v0.7.1 h1:sUiuQAnLlbvmExtFQs72iFW/HXeUn8Z1aJLQ4LJJbTQ=
21+
github.com/hashicorp/go-retryablehttp v0.7.1/go.mod h1:vAew36LZh98gCBJNLH42IQ1ER/9wtLZZ8meHqQvEYWY=
1622
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
1723
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
1824
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
@@ -36,6 +42,7 @@ github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQD
3642
github.com/schollz/progressbar/v3 v3.8.6 h1:QruMUdzZ1TbEP++S1m73OqRJk20ON11m6Wqv4EoGg8c=
3743
github.com/schollz/progressbar/v3 v3.8.6/go.mod h1:W5IEwbJecncFGBvuEh4A7HT1nZZ6WNIL2i3qbnI0WKY=
3844
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
45+
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
3946
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
4047
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
4148
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=

0 commit comments

Comments
 (0)