Skip to content

Commit 983dc7f

Browse files
authored
Cumulative improvements for CI troubleshooting (#2996)
* feat: show more error information in zb output Signed-off-by: Andrei Aaron <[email protected]> * chore(ci): gc stress tests to save logs as artifacts Signed-off-by: Andrei Aaron <[email protected]> * chore: add benchmark results to job summaries Signed-off-by: Andrei Aaron <[email protected]> * fix: count and show zb errors Signed-off-by: Andrei Aaron <[email protected]> * ci: fix the flaky coverage of the redis logger Signed-off-by: Andrei Aaron <[email protected]> --------- Signed-off-by: Andrei Aaron <[email protected]>
1 parent 3893eec commit 983dc7f

File tree

7 files changed

+110
-27
lines changed

7 files changed

+110
-27
lines changed

.github/workflows/benchmark.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,4 +40,6 @@ jobs:
4040
external-data-json-path: ./cache/benchmark-data.json
4141
# Workflow will fail when an alert happens
4242
fail-on-alert: true
43+
# Show data in the job summary
44+
summary-always: true
4345
# Upload the updated cache file for the next job by actions/cache

.github/workflows/cluster.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,8 @@ jobs:
254254
external-data-json-path: ./cache/benchmark-data.json
255255
# Workflow will fail when an alert happens
256256
fail-on-alert: true
257+
# Show data in the job summary
258+
summary-always: true
257259
# Upload the updated cache file for the next job by actions/cache
258260

259261
minio-redis:
@@ -507,4 +509,6 @@ jobs:
507509
external-data-json-path: ./cache/benchmark-data.json
508510
# Workflow will fail when an alert happens
509511
fail-on-alert: true
512+
# Show data in the job summary
513+
summary-always: true
510514
# Upload the updated cache file for the next job by actions/cache

.github/workflows/gc-stress-test.yaml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,14 @@ jobs:
3737
sudo rm -rf /tmp/zot
3838
continue-on-error: true
3939

40+
- name: Upload zot logs
41+
uses: actions/upload-artifact@v4
42+
if: always()
43+
with:
44+
name: gc-referrers-bench-local
45+
path: /tmp/gc-referrers-bench-local.log
46+
if-no-files-found: error
47+
4048
- name: Check on failures
4149
if: steps.bench.outcome != 'success'
4250
run: |
@@ -68,6 +76,14 @@ jobs:
6876
sudo rm -rf /tmp/zot
6977
continue-on-error: true
7078

79+
- name: Upload zot logs
80+
uses: actions/upload-artifact@v4
81+
if: always()
82+
with:
83+
name: gc-bench-local
84+
path: /tmp/gc-bench-local.log
85+
if-no-files-found: error
86+
7187
- name: Check on failures
7288
if: steps.bench.outcome != 'success'
7389
run: |
@@ -142,11 +158,20 @@ jobs:
142158
AWS_SECRET_ACCESS_KEY: fake
143159
continue-on-error: true
144160

161+
- name: Upload zot logs
162+
uses: actions/upload-artifact@v4
163+
if: always()
164+
with:
165+
name: gc-referrers-bench-s3
166+
path: /tmp/gc-referrers-bench-s3.log
167+
if-no-files-found: error
168+
145169
- name: Check on failures
146170
if: steps.bench.outcome != 'success'
147171
run: |
148172
cat /tmp/gc-referrers-bench-s3.log
149173
exit 1
174+
150175
- uses: ./.github/actions/teardown-localstack
151176

152177
gc-stress-s3:
@@ -217,9 +242,18 @@ jobs:
217242
AWS_SECRET_ACCESS_KEY: fake
218243
continue-on-error: true
219244

245+
- name: Upload zot logs
246+
uses: actions/upload-artifact@v4
247+
if: always()
248+
with:
249+
name: gc-bench-s3
250+
path: /tmp/gc-bench-s3.log
251+
if-no-files-found: error
252+
220253
- name: Check on failures
221254
if: steps.bench.outcome != 'success'
222255
run: |
223256
cat /tmp/gc-bench-s3.log
224257
exit 1
258+
225259
- uses: ./.github/actions/teardown-localstack

cmd/zb/helper.go

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -119,11 +119,12 @@ func pullAndCollect(url string, repos []string, manifestItem manifestStruct,
119119
func() {
120120
start := time.Now()
121121

122-
var isConnFail, isErr bool
123-
124-
var statusCode int
125-
126-
var latency time.Duration
122+
var (
123+
isConnFail, isErr bool
124+
statusCode int
125+
latency time.Duration
126+
err error
127+
)
127128

128129
defer func() {
129130
// send a stats record
@@ -132,6 +133,7 @@ func pullAndCollect(url string, repos []string, manifestItem manifestStruct,
132133
statusCode: statusCode,
133134
isConnFail: isConnFail,
134135
isErr: isErr,
136+
err: err,
135137
}
136138
}()
137139

@@ -144,8 +146,10 @@ func pullAndCollect(url string, repos []string, manifestItem manifestStruct,
144146
for repo, manifestTag := range manifestHash {
145147
manifestLoc := fmt.Sprintf("%s/v2/%s/manifests/%s", url, repo, manifestTag)
146148

149+
var resp *resty.Response
150+
147151
// check manifest
148-
resp, err := client.R().
152+
resp, err = client.R().
149153
SetHeader("Content-Type", "application/vnd.oci.image.manifest.v1+json").
150154
Head(manifestLoc)
151155

@@ -261,7 +265,7 @@ func pullAndCollect(url string, repos []string, manifestItem manifestStruct,
261265
blobLoc := fmt.Sprintf("%s/v2/%s/blobs/%s", url, repo, blobDigest)
262266

263267
// check blob
264-
resp, err := client.R().Head(blobLoc)
268+
resp, err = client.R().Head(blobLoc)
265269

266270
latency = time.Since(start)
267271

@@ -476,11 +480,12 @@ func pushMonolithAndCollect(workdir, url, trepo string, count int,
476480
func() {
477481
start := time.Now()
478482

479-
var isConnFail, isErr bool
480-
481-
var statusCode int
482-
483-
var latency time.Duration
483+
var (
484+
isConnFail, isErr bool
485+
statusCode int
486+
latency time.Duration
487+
err error
488+
)
484489

485490
defer func() {
486491
// send a stats record
@@ -489,6 +494,7 @@ func pushMonolithAndCollect(workdir, url, trepo string, count int,
489494
statusCode: statusCode,
490495
isConnFail: isConnFail,
491496
isErr: isErr,
497+
err: err,
492498
}
493499
}()
494500

@@ -680,11 +686,12 @@ func pushChunkAndCollect(workdir, url, trepo string, count int,
680686
func() {
681687
start := time.Now()
682688

683-
var isConnFail, isErr bool
684-
685-
var statusCode int
686-
687-
var latency time.Duration
689+
var (
690+
isConnFail, isErr bool
691+
statusCode int
692+
latency time.Duration
693+
err error
694+
)
688695

689696
defer func() {
690697
// send a stats record
@@ -693,6 +700,7 @@ func pushChunkAndCollect(workdir, url, trepo string, count int,
693700
statusCode: statusCode,
694701
isConnFail: isConnFail,
695702
isErr: isErr,
703+
err: err,
696704
}
697705
}()
698706

cmd/zb/perf.go

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,8 @@ type statsSummary struct {
126126
statusHist map[string]int
127127
rps float32
128128
mixedSize, mixedType bool
129-
errors int
129+
errorCount int
130+
errors map[string]int
130131
}
131132

132133
func newStatsSummary(name string) statsSummary {
@@ -147,11 +148,16 @@ type statsRecord struct {
147148
statusCode int
148149
isConnFail bool
149150
isErr bool
151+
err error
150152
}
151153

152154
func updateStats(summary *statsSummary, record statsRecord) {
153155
if record.isConnFail || record.isErr {
154-
summary.errors++
156+
summary.errorCount++
157+
}
158+
159+
if record.err != nil {
160+
summary.errors[record.err.Error()] += 1
155161
}
156162

157163
if summary.min < 0 || record.latency < summary.min {
@@ -208,9 +214,14 @@ func printStats(requests int, summary *statsSummary, outFmt string) {
208214
log.Printf("============\n")
209215
log.Printf("Test name:\t%s", summary.name)
210216
log.Printf("Time taken for tests:\t%v", summary.total)
211-
log.Printf("Complete requests:\t%v", requests-summary.errors)
212-
log.Printf("Failed requests:\t%v", summary.errors)
213217
log.Printf("Requests per second:\t%v", summary.rps)
218+
log.Printf("Complete requests:\t%v", requests-summary.errorCount)
219+
log.Printf("Failed requests:\t%v", summary.errorCount)
220+
221+
for errStr, count := range summary.errors {
222+
log.Printf("Error %s count:\t%d", errStr, count)
223+
}
224+
214225
log.Printf("\n")
215226

216227
if summary.mixedSize {
@@ -306,13 +317,16 @@ func GetCatalog(
306317

307318
var latency time.Duration
308319

320+
var err error
321+
309322
defer func() {
310323
// send a stats record
311324
statsCh <- statsRecord{
312325
latency: latency,
313326
statusCode: statusCode,
314327
isConnFail: isConnFail,
315328
isErr: isErr,
329+
err: err,
316330
}
317331
}()
318332

@@ -749,7 +763,7 @@ func Perf(
749763

750764
printStats(requests, &summary, outFmt)
751765

752-
if summary.errors != 0 && !zbError {
766+
if summary.errorCount != 0 && !zbError {
753767
zbError = true
754768
}
755769
}

pkg/api/config/redis/redis.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,16 @@ import (
1616

1717
var once sync.Once //nolint: gochecknoglobals // redis.SetLogger modifies an unprotected global variable
1818

19-
type redisLogger struct {
20-
log log.Logger
19+
type RedisLogger struct {
20+
Log log.Logger
2121
}
2222

23-
func (r redisLogger) Printf(ctx context.Context, format string, v ...interface{}) {
24-
r.log.Debug().Msgf(format, v...)
23+
func (r RedisLogger) Printf(ctx context.Context, format string, v ...interface{}) {
24+
r.Log.Debug().Msgf(format, v...)
2525
}
2626

2727
func GetRedisClient(redisConfig map[string]interface{}, log log.Logger) (redis.UniversalClient, error) {
28-
once.Do(func() { redis.SetLogger(redisLogger{log}) }) // call redis.SetLogger only once
28+
once.Do(func() { redis.SetLogger(RedisLogger{log}) }) // call redis.SetLogger only once
2929

3030
// go-redis supports connecting via the redis uri specification (more convenient than parameter parsing)
3131
// Note failover/Sentinel cannot be configured via URL parsing at the moment

pkg/api/config/redis/redis_test.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package rediscfg_test
22

33
import (
4+
"context"
5+
"io"
46
"os"
57
"path"
68
"testing"
@@ -15,6 +17,25 @@ import (
1517
"zotregistry.dev/zot/pkg/log"
1618
)
1719

20+
func TestRedisLogger(t *testing.T) {
21+
Convey("Print using Redis logger", t, func() {
22+
logFile, err := os.CreateTemp(t.TempDir(), "zot-log*.txt")
23+
So(err, ShouldBeNil)
24+
25+
logger := log.NewLogger("debug", logFile.Name())
26+
writers := io.MultiWriter(os.Stdout, logFile)
27+
logger.Logger = logger.Output(writers)
28+
29+
rlog := rediscfg.RedisLogger{logger}
30+
rlog.Printf(context.Background(), "this is a rest string")
31+
32+
content, err := os.ReadFile(logFile.Name())
33+
So(err, ShouldBeNil)
34+
35+
So(string(content), ShouldContainSubstring, "this is a rest string")
36+
})
37+
}
38+
1839
func TestRedisOptions(t *testing.T) {
1940
Convey("Test redis initialization", t, func() {
2041
log := log.NewLogger("debug", "")

0 commit comments

Comments
 (0)