Skip to content

Commit 60effe0

Browse files
DPDV-6415: Adjust stress tests (#99)
* DPDV-6415: Fix SIGSEV error * Add logging related to locks * Another attempt to fix the SIGSEV -> it's still crashing * Fix the SIGSEV * Run stress tests * Fix stress test action * Add checkout action * Artifact name now contains os and go version * Fix stress test GHA * Do not use /usr/bin/time to make it work for all os * Make it easier to run congestion stress test * Send the batch only once * Adjust comments * Fix test to not crash when stats are not ready yet * Little bit more logging * Add more logging * Introduce yet another mutex * Use RWMutex for buffers and run Unsub in a goroutine * Add some changes from 3 months ago * Run go mod tidy * Decrease delay to speed up tests * Increase test timeout * Use just bash time since tests are running on windows as well * Increase buffer size in tests * Add comment and run unsub from go routine * Wrap Pub methods to RLocks as well. * Do not purge during testing * DPDV-6415: Adjust stress tests --------- Co-authored-by: Martin Rataj <[email protected]>
1 parent 6a9ff2e commit 60effe0

File tree

7 files changed

+176
-35
lines changed

7 files changed

+176
-35
lines changed
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
name: Stress Tests
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
pull_request:
8+
branches:
9+
- main
10+
schedule:
11+
- cron: '0 6 * * *'
12+
13+
permissions:
14+
actions: write # Needed for skip-duplicate-jobs job
15+
contents: read
16+
17+
jobs:
18+
# Special job which automatically cancels old runs for the same branch, prevents runs for the
19+
# same file set which has already passed, etc.
20+
pre_job:
21+
name: Skip Duplicate Jobs Pre Job
22+
runs-on: ubuntu-latest
23+
outputs:
24+
should_skip: ${{ steps.skip_check.outputs.should_skip }}
25+
steps:
26+
- id: skip_check
27+
uses: fkirc/skip-duplicate-actions@f75f66ce1886f00957d99748a42c724f4330bdcf # v5.3.1
28+
with:
29+
cancel_others: 'true'
30+
github_token: ${{ github.token }}
31+
32+
stress-tests:
33+
strategy:
34+
# keep in sync with https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/.github/workflows/build-and-test.yml#L237
35+
matrix:
36+
go: ['1.21', '1.22']
37+
os: [ubuntu-latest, windows-latest, macos-latest]
38+
runs-on: ${{ matrix.os }}
39+
needs: pre_job
40+
if: ${{ needs.pre_job.outputs.should_skip != 'true' || github.ref_name == 'main' }}
41+
42+
steps:
43+
- uses: actions/checkout@v4
44+
- name: Set up Go
45+
uses: actions/setup-go@v5
46+
with:
47+
go-version: ${{ matrix.go }}
48+
cache: true
49+
- name: Run Stress Test - Each bucket used once
50+
run: |
51+
./scripts/run-stress-test.sh 10000 10000
52+
- name: Run Stress Test - Max congestion
53+
run: |
54+
./scripts/run-stress-test.sh 10000 -1
55+
- uses: actions/upload-artifact@v4
56+
with:
57+
name: logs-${{ matrix.os }}-${{ matrix.go }}
58+
path: examples/stress/*.log

Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ endif
2424

2525
GO_BUILD_TAGS=""
2626
GOTEST_OPT?= -v -race -timeout 300s -parallel 4 -count=1 --tags=$(GO_BUILD_TAGS)
27-
GOTEST_LONG_RUNNING_OPT?= -v -race -timeout 600s -parallel 4 -count=1 -tags=long_running,$(GO_BUILD_TAGS)
27+
GOTEST_LONG_RUNNING_OPT?= -v -race -timeout 1200s -parallel 4 -count=1 -tags=long_running,$(GO_BUILD_TAGS)
2828
GOTEST_OPT_WITH_COVERAGE = $(GOTEST_OPT) -coverprofile=coverage.txt -covermode=atomic
2929
GOTEST_OPT_WITH_COVERAGE_LONG_RUNNING=$(GOTEST_LONG_RUNNING_OPT) -coverprofile=coverage.txt -covermode=atomic
3030
GOCMD?= go
@@ -49,11 +49,11 @@ test: test-all
4949

5050
.PHONY: test-unit
5151
test-unit:
52-
$(GOTEST) $(GOTEST_OPT) ./...
52+
time $(GOTEST) $(GOTEST_OPT) ./...
5353

5454
.PHONY: test-all
5555
test-all:
56-
$(GOTEST) $(GOTEST_LONG_RUNNING_OPT) ./...
56+
time $(GOTEST) $(GOTEST_LONG_RUNNING_OPT) ./...
5757

5858
.PHONY: test-many-times
5959
test-many-times:

examples/stress/go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ module client
33
go 1.21
44

55
require (
6-
github.com/scalyr/dataset-go v0.17.0
6+
github.com/scalyr/dataset-go v0.18.0
77
go.uber.org/zap v1.27.0
88
)
99

@@ -17,4 +17,4 @@ require (
1717
golang.org/x/exp v0.0.0-20230626212559-97b1e661b5df // indirect
1818
)
1919

20-
replace github.com/scalyr/dataset-go v0.17.0 => ./../..
20+
replace github.com/scalyr/dataset-go v0.18.0 => ./../..

examples/stress/go.sum

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM=
2-
github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
1+
github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
32
github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
43
github.com/cskr/pubsub v1.0.2 h1:vlOzMhl6PFn60gRlTQQsIfVwaPB/B/8MziK8FhEPt/0=
54
github.com/cskr/pubsub v1.0.2/go.mod h1:/8MzYXk/NJAz782G8RPkFzXTZVu63VotefPnR9TIRis=
@@ -15,26 +14,19 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
1514
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
1615
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
1716
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
18-
github.com/scalyr/dataset-go v0.17.0 h1:5YI/VlbLHr4Ui6SegWm0yjZYioypWB68U7nFQBKNOn8=
19-
github.com/scalyr/dataset-go v0.17.0/go.mod h1:ehHlPsZSgFWxOkud1eKwmKd5bLF9LcUFrU01XuCnh+8=
20-
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
21-
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
22-
go.opentelemetry.io/otel v1.23.1 h1:Za4UzOqJYS+MUczKI320AtqZHZb7EqxO00jAHE0jmQY=
23-
go.opentelemetry.io/otel v1.23.1/go.mod h1:Td0134eafDLcTS4y+zQ26GE8u3dEuRBiBCTUIRHaikA=
24-
go.opentelemetry.io/otel v1.24.0/go.mod h1:W7b9Ozg4nkF5tWI5zsXkaKKDjdVjpD4oAt9Qi/MArHo=
17+
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
18+
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
19+
go.opentelemetry.io/otel v1.27.0 h1:9BZoF3yMK/O1AafMiQTVu0YDj5Ea4hPhxCs7sGva+cg=
2520
go.opentelemetry.io/otel v1.27.0/go.mod h1:DMpAK8fzYRzs+bi3rS5REupisuqTheUlSZJ1WnZaPAQ=
26-
go.opentelemetry.io/otel/metric v1.23.1 h1:PQJmqJ9u2QaJLBOELl1cxIdPcpbwzbkjfEyelTl2rlo=
27-
go.opentelemetry.io/otel/metric v1.23.1/go.mod h1:mpG2QPlAfnK8yNhNJAxDZruU9Y1/HubbC+KyH8FaCWI=
28-
go.opentelemetry.io/otel/metric v1.24.0/go.mod h1:VYhLe1rFfxuTXLgj4CBiyz+9WYBA8pNGJgDcSFRKBco=
21+
go.opentelemetry.io/otel/metric v1.27.0 h1:hvj3vdEKyeCi4YaYfNjv2NUje8FqKqUY8IlF0FxV/ik=
2922
go.opentelemetry.io/otel/metric v1.27.0/go.mod h1:mVFgmRlhljgBiuk/MP/oKylr4hs85GZAylncepAX/ak=
30-
go.opentelemetry.io/otel/trace v1.23.1 h1:4LrmmEd8AU2rFvU1zegmvqW7+kWarxtNOPyeL6HmYY8=
31-
go.opentelemetry.io/otel/trace v1.23.1/go.mod h1:4IpnpJFwr1mo/6HL8XIPJaE9y0+u1KcVmuW7dwFSVrI=
32-
go.uber.org/goleak v1.2.0 h1:xqgm/S+aQvhWFTtR0XK3Jvg7z8kGV8P4X14IzwN3Eqk=
33-
go.uber.org/goleak v1.2.0/go.mod h1:XJYK+MuIchqpmGmUSAzotztawfKvYLUIgg7guXrwVUo=
23+
go.opentelemetry.io/otel/trace v1.27.0 h1:IqYb813p7cmbHk0a5y6pD5JPakbVfftRXABGt5/Rscw=
24+
go.opentelemetry.io/otel/trace v1.27.0/go.mod h1:6RiD1hkAprV4/q+yd2ln1HG9GoPx39SuvvstaLBl+l4=
25+
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
26+
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
3427
go.uber.org/multierr v1.10.0 h1:S0h4aNzvfcFsC3dRF1jLoaov7oRaKqRGC/pUEJ2yvPQ=
3528
go.uber.org/multierr v1.10.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
36-
go.uber.org/zap v1.26.0 h1:sI7k6L95XOKS281NhVKOFCUNIvv9e0w4BF8N3u+tCRo=
37-
go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so=
29+
go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8=
3830
go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
3931
golang.org/x/exp v0.0.0-20230626212559-97b1e661b5df h1:UA2aFVmmsIlefxMk29Dp2juaUSth8Pyn3Tq5Y5mJGME=
4032
golang.org/x/exp v0.0.0-20230626212559-97b1e661b5df/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc=

examples/stress/main.go

Lines changed: 81 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,14 @@ import (
3939
"go.uber.org/zap"
4040
)
4141

42+
const (
43+
MaxLifeTimeMultiplier = 5
44+
PurgeOlderThanMultiplier = 15
45+
)
46+
4247
func main() {
4348
eventsCount := flag.Int("events", 1e5, "number of events")
49+
bucketsCount := flag.Int("buckets", 1e5, "number of buckets")
4450
sleep := flag.Duration("sleep", 10*time.Millisecond, "sleep between sending two events")
4551
logFile := flag.String("log", fmt.Sprintf("log-%s-%d.log", version.Version, time.Now().UnixMilli()), "log file for stats")
4652
logEvery := flag.Duration("log-every", time.Second, "how often log statistics")
@@ -51,8 +57,23 @@ func main() {
5157
logger := zap.Must(zap.NewDevelopment())
5258

5359
// log input parameters
54-
logger.Info("Running stress test with:",
60+
logger.Info("Running stress test - input:",
61+
zap.Int("events", *eventsCount),
62+
zap.Int("buckets", *bucketsCount),
63+
zap.Duration("sleep", *sleep),
64+
zap.String("log", *logFile),
65+
zap.Duration("log-every", *logEvery),
66+
zap.Bool("pprof", *enablePProf),
67+
zap.String("version", version.Version),
68+
)
69+
70+
if *bucketsCount == -1 {
71+
*bucketsCount = PurgeOlderThanMultiplier
72+
}
73+
74+
logger.Info("Running stress test - adjusted:",
5575
zap.Int("events", *eventsCount),
76+
zap.Int("buckets", *bucketsCount),
5677
zap.Duration("sleep", *sleep),
5778
zap.String("log", *logFile),
5879
zap.Duration("log-every", *logEvery),
@@ -61,6 +82,8 @@ func main() {
6182
)
6283

6384
if *enablePProf {
85+
runtime.SetBlockProfileRate(1)
86+
runtime.SetMutexProfileFraction(1)
6487
go func() {
6588
http.ListenAndServe("localhost:8080", nil)
6689
}()
@@ -82,7 +105,11 @@ func main() {
82105
defer server.Close()
83106

84107
cfg := config.NewDefaultDataSetConfig()
85-
bufferCfg, err := cfg.BufferSettings.WithOptions(buffer_config.WithGroupBy([]string{"body.str"}))
108+
bufferCfg, err := cfg.BufferSettings.WithOptions(
109+
buffer_config.WithGroupBy([]string{"body.str"}),
110+
buffer_config.WithMaxLifetime(MaxLifeTimeMultiplier**sleep),
111+
buffer_config.WithPurgeOlderThan(PurgeOlderThanMultiplier**sleep),
112+
)
86113
check(err)
87114
cfgUpdated, err := cfg.WithOptions(
88115
config.WithBufferSettings(*bufferCfg),
@@ -102,9 +129,20 @@ func main() {
102129

103130
go logStats(dataSetClient, &apiCalls, *logFile, *logEvery)
104131

132+
// start sending events
133+
logger.Info(
134+
"STRESS - Start adding events",
135+
)
105136
for i := 0; i < *eventsCount; i++ {
106137
batch := make([]*add_events.EventBundle, 0)
107-
key := fmt.Sprintf("%d", i)
138+
key := fmt.Sprintf("%d", i%*bucketsCount)
139+
140+
logger.Debug(
141+
"STRESS - Creating event",
142+
zap.Int("i", i),
143+
zap.String("key", key),
144+
)
145+
108146
attrs := make(map[string]interface{})
109147
attrs["body.str"] = key
110148
attrs["attributes.p1"] = strings.Repeat("A", rand.Intn(2000))
@@ -135,6 +173,9 @@ func main() {
135173
}
136174

137175
// wait until everything is processed
176+
logger.Info(
177+
"STRESS - Wait for everything to finish",
178+
)
138179
for {
139180
processed := uint64(0)
140181
stats := dataSetClient.Statistics()
@@ -152,6 +193,9 @@ func main() {
152193
}
153194

154195
// wait for extra 1 minute to see how the memory will behave
196+
logger.Info(
197+
"STRESS - Extra sleep at the end",
198+
)
155199
extraSleepFor := 60
156200
for i := 0; i <= extraSleepFor; i++ {
157201
time.Sleep(time.Second)
@@ -175,27 +219,53 @@ func logStats(client *client.DataSetClient, apiCalls *atomic.Uint64, logFile str
175219
f, err := os.Create(logFile)
176220
check(err)
177221

178-
_, err = f.WriteString("i\tTime\tEnqueued\tProcessed\tCalls\tHeapAlloc\tHeapSys\tMallocs\tFrees\tHeapObjects\tVersion\n")
222+
_, err = f.WriteString("i\tTime\tEvEnqueued\tEvProcessed\tEvBroken\tEvDropped\tBufEnqueued\tBufProcessed\tBufBroken\tBufDropped\tSesOpened\tSesClosed\tCalls\tHeapAlloc\tHeapSys\tMallocs\tFrees\tHeapObjects\tVersion\n")
179223
check(err)
180224

181225
for i := 0; ; i++ {
182226
var memStats runtime.MemStats
183227
runtime.ReadMemStats(&memStats)
184-
enqueued := uint64(0)
185-
processed := uint64(0)
228+
evEnqueued := uint64(0)
229+
evProcessed := uint64(0)
230+
evDropped := uint64(0)
231+
evBroken := uint64(0)
232+
bufEnqueued := uint64(0)
233+
bufProcessed := uint64(0)
234+
bufDropped := uint64(0)
235+
bufBroken := uint64(0)
236+
sesOpened := uint64(0)
237+
sesClosed := uint64(0)
186238
clientStats := client.Statistics()
187239
if clientStats != nil {
188-
enqueued = clientStats.Events.Enqueued()
189-
processed = clientStats.Events.Processed()
240+
evEnqueued = clientStats.Events.Enqueued()
241+
evProcessed = clientStats.Events.Processed()
242+
evDropped = clientStats.Events.Dropped()
243+
evBroken = clientStats.Events.Broken()
244+
245+
bufEnqueued = clientStats.Buffers.Enqueued()
246+
bufProcessed = clientStats.Buffers.Processed()
247+
bufDropped = clientStats.Buffers.Dropped()
248+
bufBroken = clientStats.Buffers.Broken()
249+
250+
sesOpened = clientStats.Sessions.SessionsOpened()
251+
sesClosed = clientStats.Sessions.SessionsClosed()
190252
}
191253

192254
_, err := f.WriteString(
193255
fmt.Sprintf(
194-
"%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%s\n",
256+
"%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%s\n",
195257
i,
196258
time.Now().Unix(),
197-
enqueued,
198-
processed,
259+
evEnqueued,
260+
evProcessed,
261+
evDropped,
262+
evBroken,
263+
bufEnqueued,
264+
bufProcessed,
265+
bufDropped,
266+
bufBroken,
267+
sesOpened,
268+
sesClosed,
199269
apiCalls.Load(),
200270
memStats.HeapAlloc,
201271
memStats.HeapSys,

scripts/run-stress-test.sh

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#!/usr/bin/env bash
2+
3+
# build stress test
4+
d=$(dirname "$0");
5+
cd "${d}/../examples/stress" || exit 1
6+
7+
# build stress test
8+
rm -rfv stress
9+
go build -o stress
10+
ls -l stress
11+
12+
EVENTS="${1:=10000}"
13+
BUCKETS="${2:=10000}"
14+
15+
echo "Run stress test for ${EVENTS} events and ${BUCKETS} buckets"
16+
./stress \
17+
--events="${EVENTS}" \
18+
--buckets="${BUCKETS}" \
19+
--sleep=10ms 2>&1 | tee "out-${EVENTS}-${BUCKETS}.log"

scripts/test-ssl-certificates.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
#!/usr/bin/env bash
22

33
# build example from README.md
4-
cd ../examples/readme;
4+
d=$(dirname "$0");
5+
cd "${d}/../examples/readme" || exit 1
6+
57
pwd;
68

79
# build example

0 commit comments

Comments
 (0)