Skip to content
This repository was archived by the owner on Jul 16, 2021. It is now read-only.

Commit ae97a0d

Browse files
author
Andres Martinez Gotor
authored
[chart-repo] Store repositories checksum in cache to avoid reprocessing (#637)
* Store repositories checksum in cache to avoid reprocessing Signed-off-by: Andres Martinez Gotor <[email protected]> * Update log message Signed-off-by: Andres Martinez Gotor <[email protected]>
1 parent 010bd09 commit ae97a0d

File tree

3 files changed

+112
-7
lines changed

3 files changed

+112
-7
lines changed

cmd/chart-repo/types.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,9 @@ type chartFiles struct {
5959
Repo repo
6060
Digest string
6161
}
62+
63+
type repoCheck struct {
64+
ID string `bson:"_id"`
65+
LastUpdate time.Time `bson:"last_update"`
66+
Checksum string `bson:"checksum"`
67+
}

cmd/chart-repo/utils.go

Lines changed: 55 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"archive/tar"
2121
"bytes"
2222
"compress/gzip"
23+
"crypto/sha256"
2324
"crypto/tls"
2425
"crypto/x509"
2526
"errors"
@@ -45,6 +46,7 @@ import (
4546

4647
const (
4748
chartCollection = "charts"
49+
repositoryCollection = "repos"
4850
chartFilesCollection = "files"
4951
defaultTimeoutSeconds = 10
5052
additionalCAFile = "/usr/local/share/ca-certificates/ca.crt"
@@ -62,7 +64,7 @@ type httpClient interface {
6264

6365
var netClient httpClient = &http.Client{}
6466

65-
func parseRepoUrl(repoURL string) (*url.URL, error) {
67+
func parseRepoURL(repoURL string) (*url.URL, error) {
6668
repoURL = strings.TrimSpace(repoURL)
6769
return url.ParseRequestURI(repoURL)
6870
}
@@ -85,14 +87,30 @@ func init() {
8587
// imported into the database as fast as possible. E.g. we want all icons for
8688
// charts before fetching readmes for each chart and version pair.
8789
func syncRepo(dbSession datastore.Session, repoName, repoURL string, authorizationHeader string) error {
88-
url, err := parseRepoUrl(repoURL)
90+
url, err := parseRepoURL(repoURL)
8991
if err != nil {
9092
log.WithFields(log.Fields{"url": repoURL}).WithError(err).Error("failed to parse URL")
9193
return err
9294
}
9395

9496
r := repo{Name: repoName, URL: url.String(), AuthorizationHeader: authorizationHeader}
95-
index, err := fetchRepoIndex(r)
97+
repoBytes, err := fetchRepoIndex(r)
98+
if err != nil {
99+
return err
100+
}
101+
102+
repoChecksum, err := getSha256(repoBytes)
103+
if err != nil {
104+
return err
105+
}
106+
107+
// Check if the repo has been already processed
108+
if repoAlreadyProcessed(dbSession, repoName, repoChecksum) {
109+
log.WithFields(log.Fields{"url": repoURL}).Info("Skipping repository since there are no updates")
110+
return nil
111+
}
112+
113+
index, err := parseRepoIndex(repoBytes)
96114
if err != nil {
97115
return err
98116
}
@@ -148,9 +166,39 @@ func syncRepo(dbSession datastore.Session, repoName, repoURL string, authorizati
148166
// Wait for the worker pools to finish processing
149167
wg.Wait()
150168

169+
// Update cache in the database
170+
if err = updateLastCheck(dbSession, repoName, repoChecksum, time.Now()); err != nil {
171+
return err
172+
}
173+
log.WithFields(log.Fields{"url": repoURL}).Info("Stored repository update in cache")
174+
151175
return nil
152176
}
153177

178+
func getSha256(src []byte) (string, error) {
179+
f := bytes.NewReader(src)
180+
h := sha256.New()
181+
if _, err := io.Copy(h, f); err != nil {
182+
return "", err
183+
}
184+
return fmt.Sprintf("%x", h.Sum(nil)), nil
185+
}
186+
187+
func repoAlreadyProcessed(dbSession datastore.Session, repoName string, checksum string) bool {
188+
db, closer := dbSession.DB()
189+
defer closer()
190+
lastCheck := &repoCheck{}
191+
err := db.C(repositoryCollection).Find(bson.M{"_id": repoName}).One(lastCheck)
192+
return err == nil && checksum == lastCheck.Checksum
193+
}
194+
195+
func updateLastCheck(dbSession datastore.Session, repoName string, checksum string, now time.Time) error {
196+
db, closer := dbSession.DB()
197+
defer closer()
198+
_, err := db.C(repositoryCollection).UpsertId(repoName, bson.M{"$set": bson.M{"last_update": now, "checksum": checksum}})
199+
return err
200+
}
201+
154202
func deleteRepo(dbSession datastore.Session, repoName string) error {
155203
db, closer := dbSession.DB()
156204
defer closer()
@@ -167,8 +215,8 @@ func deleteRepo(dbSession datastore.Session, repoName string) error {
167215
return err
168216
}
169217

170-
func fetchRepoIndex(r repo) (*helmrepo.IndexFile, error) {
171-
indexURL, err := parseRepoUrl(r.URL)
218+
func fetchRepoIndex(r repo) ([]byte, error) {
219+
indexURL, err := parseRepoURL(r.URL)
172220
if err != nil {
173221
log.WithFields(log.Fields{"url": r.URL}).WithError(err).Error("failed to parse URL")
174222
return nil, err
@@ -203,7 +251,7 @@ func fetchRepoIndex(r repo) (*helmrepo.IndexFile, error) {
203251
if err != nil {
204252
return nil, err
205253
}
206-
return parseRepoIndex(body)
254+
return body, nil
207255
}
208256

209257
func parseRepoIndex(body []byte) (*helmrepo.IndexFile, error) {
@@ -433,7 +481,7 @@ func extractFilesFromTarball(filenames []string, tarf *tar.Reader) (map[string]s
433481

434482
func chartTarballURL(r repo, cv chartVersion) string {
435483
source := cv.URLs[0]
436-
if _, err := parseRepoUrl(source); err != nil {
484+
if _, err := parseRepoURL(source); err != nil {
437485
// If the chart URL is not absolute, join with repo URL. It's fine if the
438486
// URL we build here is invalid as we can catch this error when actually
439487
// making the request

cmd/chart-repo/utils_test.go

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import (
3333
"path"
3434
"strings"
3535
"testing"
36+
"time"
3637

3738
"github.com/arschles/assert"
3839
"github.com/disintegration/imaging"
@@ -609,7 +610,57 @@ func (h *emptyChartRepoHTTPClient) Do(req *http.Request) (*http.Response, error)
609610
func Test_emptyChartRepo(t *testing.T) {
610611
netClient = &emptyChartRepoHTTPClient{}
611612
m := mock.Mock{}
613+
m.On("One", &repoCheck{}).Return(nil)
612614
dbSession := mockstore.NewMockSession(&m)
613615
err := syncRepo(dbSession, "testRepo", "https://my.examplerepo.com", "")
614616
assert.ExistsErr(t, err, "Failed Request")
615617
}
618+
619+
func Test_getSha256(t *testing.T) {
620+
sha, err := getSha256([]byte("this is a test"))
621+
assert.Equal(t, err, nil, "Unable to get sha")
622+
assert.Equal(t, sha, "2e99758548972a8e8822ad47fa1017ff72f06f3ff6a016851f45c398732bc50c", "Unable to get sha")
623+
}
624+
625+
func Test_repoAlreadyProcessed(t *testing.T) {
626+
tests := []struct {
627+
name string
628+
checksum string
629+
mockedLastCheck repoCheck
630+
processed bool
631+
}{
632+
{"not processed yet", "bar", repoCheck{}, false},
633+
{"already processed", "bar", repoCheck{Checksum: "bar"}, true},
634+
}
635+
636+
for _, tt := range tests {
637+
t.Run(tt.name, func(t *testing.T) {
638+
m := mock.Mock{}
639+
repo := &repoCheck{}
640+
m.On("One", repo).Run(func(args mock.Arguments) {
641+
*args.Get(0).(*repoCheck) = tt.mockedLastCheck
642+
}).Return(nil)
643+
dbSession := mockstore.NewMockSession(&m)
644+
res := repoAlreadyProcessed(dbSession, "", tt.checksum)
645+
if res != tt.processed {
646+
t.Errorf("Expected alreadyProcessed to be %v got %v", tt.processed, res)
647+
}
648+
})
649+
}
650+
}
651+
652+
func Test_updateLastCheck(t *testing.T) {
653+
m := mock.Mock{}
654+
repoName := "foo"
655+
checksum := "bar"
656+
now := time.Now()
657+
m.On("UpsertId", repoName, bson.M{"$set": bson.M{"last_update": now, "checksum": checksum}}).Return(nil)
658+
dbSession := mockstore.NewMockSession(&m)
659+
err := updateLastCheck(dbSession, repoName, checksum, now)
660+
if err != nil {
661+
t.Errorf("Unexpected error %v", err)
662+
}
663+
if len(m.Calls) != 1 {
664+
t.Errorf("Expected one call got %d", len(m.Calls))
665+
}
666+
}

0 commit comments

Comments
 (0)