Skip to content

Commit 636c433

Browse files
committed
add unit test
1 parent f2db045 commit 636c433

File tree

3 files changed

+429
-72
lines changed

3 files changed

+429
-72
lines changed

internal/internal_worker_base.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import (
3535
"go.uber.org/cadence/internal/common/debug"
3636
"go.uber.org/cadence/internal/worker"
3737

38+
"github.com/jonboulle/clockwork"
3839
"github.com/uber-go/tally"
3940
"go.uber.org/zap"
4041
"go.uber.org/zap/zapcore"
@@ -175,13 +176,14 @@ func newBaseWorker(options baseWorkerOptions, logger *zap.Logger, metricsScope t
175176

176177
var concurrencyAS *worker.ConcurrencyAutoScaler
177178
if pollerOptions := options.pollerAutoScaler; pollerOptions.Enabled {
178-
concurrencyAS = worker.NewPollerAutoScaler(worker.ConcurrencyAutoScalerInput{
179+
concurrencyAS = worker.NewConcurrencyAutoScaler(worker.ConcurrencyAutoScalerInput{
179180
Concurrency: concurrency,
180181
Cooldown: pollerOptions.Cooldown,
181182
PollerMaxCount: pollerOptions.MaxCount,
182183
PollerMinCount: pollerOptions.MinCount,
183184
Logger: logger,
184185
Scope: metricsScope,
186+
Clock: clockwork.NewRealClock(),
185187
})
186188
}
187189

internal/worker/concurrency_auto_scaler.go

+103-71
Original file line numberDiff line numberDiff line change
@@ -21,111 +21,129 @@
2121
package worker
2222

2323
import (
24-
"context"
2524
"math"
2625
"sync"
2726
"sync/atomic"
2827
"time"
2928

29+
"github.com/jonboulle/clockwork"
3030
"github.com/uber-go/tally"
3131
"go.uber.org/zap"
3232

3333
"go.uber.org/cadence/.gen/go/shared"
3434
)
3535

3636
const (
37-
concurrencyAutoScalerUpdateTick = time.Second
38-
concurrencyAutoScalerObservabilityTick = time.Millisecond * 500
39-
targetPollerWaitTimeInMsLog2 = 4 // 16 ms
40-
numberOfPollsInRollingAverage = 20
37+
defaultAutoScalerUpdateTick = time.Second
38+
// concurrencyAutoScalerObservabilityTick = time.Millisecond * 500
39+
targetPollerWaitTimeInMsLog2 = 4 // 16 ms
40+
numberOfPollsInRollingAverage = 20
41+
42+
autoScalerEventPollerUpdate autoScalerEvent = "update-poller-limit"
43+
autoScalerEventPollerSkipUpdateCooldown = "skip-update-poller-limit-cooldown"
44+
autoScalerEventPollerSkipUpdateNoChange = "skip-update-poller-limit-no-change"
45+
autoScalerEventPollerSkipUpdateNotEnabled = "skip-update-poller-limit-not-enabled"
46+
autoScalerEventMetrics = "metrics"
47+
autoScalerEventEnable = "enable"
48+
autoScalerEventDisable = "disable"
49+
autoScalerEventStart = "start"
50+
autoScalerEventStop = "stop"
51+
autoScalerEventLogMsg string = "concurrency auto scaler event"
52+
testTimeFormat string = "15:04:05"
4153
)
4254

43-
type ConcurrencyAutoScaler struct {
44-
ctx context.Context
45-
cancel context.CancelFunc
46-
wg *sync.WaitGroup
47-
log *zap.Logger
48-
scope tally.Scope
55+
type (
56+
ConcurrencyAutoScaler struct {
57+
shutdownChan chan struct{}
58+
wg sync.WaitGroup
59+
log *zap.Logger
60+
scope tally.Scope
61+
clock clockwork.Clock
4962

50-
concurrency *ConcurrencyLimit
51-
cooldown time.Duration
63+
concurrency *ConcurrencyLimit
64+
cooldown time.Duration
65+
updateTick time.Duration
5266

53-
// enable auto scaler on concurrency or not
54-
enable atomic.Bool
67+
// enable auto scaler on concurrency or not
68+
enable atomic.Bool
5569

56-
// poller
57-
pollerMaxCount int
58-
pollerMinCount int
59-
pollerWaitTimeInMsLog2 *rollingAverage // log2(pollerWaitTimeInMs+1) for smoothing (ideal value is 0)
60-
pollerPermitLastUpdate time.Time
61-
}
70+
// poller
71+
pollerInitCount int
72+
pollerMaxCount int
73+
pollerMinCount int
74+
pollerWaitTimeInMsLog2 *rollingAverage // log2(pollerWaitTimeInMs+1) for smoothing (ideal value is 0)
75+
pollerPermitLastUpdate time.Time
76+
}
6277

63-
type ConcurrencyAutoScalerInput struct {
64-
Concurrency *ConcurrencyLimit
65-
Cooldown time.Duration // cooldown time of update
66-
PollerMaxCount int
67-
PollerMinCount int
68-
Logger *zap.Logger
69-
Scope tally.Scope
70-
}
78+
ConcurrencyAutoScalerInput struct {
79+
Concurrency *ConcurrencyLimit
80+
Cooldown time.Duration // cooldown time of update
81+
Tick time.Duration // frequency of update check
82+
PollerMaxCount int
83+
PollerMinCount int
84+
Logger *zap.Logger
85+
Scope tally.Scope
86+
Clock clockwork.Clock
87+
}
7188

72-
func NewPollerAutoScaler(input ConcurrencyAutoScalerInput) *ConcurrencyAutoScaler {
73-
ctx, cancel := context.WithCancel(context.Background())
89+
autoScalerEvent string
90+
)
7491

92+
func NewConcurrencyAutoScaler(input ConcurrencyAutoScalerInput) *ConcurrencyAutoScaler {
93+
tick := defaultAutoScalerUpdateTick
94+
if input.Tick != 0 {
95+
tick = input.Tick
96+
}
7597
return &ConcurrencyAutoScaler{
76-
ctx: ctx,
77-
cancel: cancel,
78-
wg: &sync.WaitGroup{},
98+
shutdownChan: make(chan struct{}),
7999
concurrency: input.Concurrency,
80100
cooldown: input.Cooldown,
81101
log: input.Logger,
82102
scope: input.Scope,
103+
clock: input.Clock,
104+
updateTick: tick,
83105
enable: atomic.Bool{}, // initial value should be false and is only turned on from auto config hint
106+
pollerInitCount: input.Concurrency.PollerPermit.Quota(),
84107
pollerMaxCount: input.PollerMaxCount,
85108
pollerMinCount: input.PollerMinCount,
86109
pollerWaitTimeInMsLog2: newRollingAverage(numberOfPollsInRollingAverage),
110+
pollerPermitLastUpdate: input.Clock.Now(),
87111
}
88112
}
89113

90114
func (c *ConcurrencyAutoScaler) Start() {
115+
c.logEvent(autoScalerEventStart)
116+
91117
c.wg.Add(1)
92-
go func() { // scaling daemon
118+
119+
go func() {
93120
defer c.wg.Done()
94-
ticker := time.NewTicker(concurrencyAutoScalerUpdateTick)
121+
ticker := c.clock.NewTicker(c.updateTick)
122+
defer ticker.Stop()
95123
for {
96124
select {
97-
case <-c.ctx.Done():
98-
ticker.Stop()
99-
case <-ticker.C:
125+
case <-c.shutdownChan:
126+
return
127+
case <-ticker.Chan():
128+
c.logEvent(autoScalerEventMetrics)
100129
c.updatePollerPermit()
101130
}
102131
}
103132
}()
104-
c.wg.Add(1)
105-
go func() { // observability daemon
106-
defer c.wg.Done()
107-
ticker := time.NewTicker(concurrencyAutoScalerUpdateTick)
108-
for {
109-
select {
110-
case <-c.ctx.Done():
111-
ticker.Stop()
112-
case <-ticker.C:
113-
c.emit()
114-
}
115-
}
116-
}()
117133
}
118134

119135
func (c *ConcurrencyAutoScaler) Stop() {
120-
c.cancel()
136+
close(c.shutdownChan)
121137
c.wg.Wait()
138+
c.logEvent(autoScalerEventStop)
122139
}
123140

124141
// ProcessPollerHint reads the poller response hint and take actions
125142
// 1. update poller wait time
126143
// 2. enable/disable auto scaler
127144
func (c *ConcurrencyAutoScaler) ProcessPollerHint(hint *shared.AutoConfigHint) {
128145
if hint == nil {
146+
c.log.Warn("auto config hint is nil, this results in no action")
129147
return
130148
}
131149
if hint.PollerWaitTimeInMs != nil {
@@ -134,41 +152,53 @@ func (c *ConcurrencyAutoScaler) ProcessPollerHint(hint *shared.AutoConfigHint) {
134152
}
135153

136154
/*
137-
Atomically compare and switch the auto scaler enable flag. If auto scaler is turned off, reset the concurrency limits.
155+
Atomically compare and switch the auto scaler enable flag. If auto scaler is turned off, IMMEDIATELY reset the concurrency limits.
138156
*/
139157
var shouldEnable bool
140158
if hint.EnableAutoConfig != nil && *hint.EnableAutoConfig {
141159
shouldEnable = true
142160
}
143161
if switched := c.enable.CompareAndSwap(!shouldEnable, shouldEnable); switched {
144162
if shouldEnable {
145-
c.log.Sugar().Infof("auto scaler enabled")
163+
c.logEvent(autoScalerEventEnable)
146164
} else {
147-
c.log.Sugar().Infof("auto scaler disabled")
148-
c.ResetConcurrency()
165+
c.resetConcurrency()
166+
c.logEvent(autoScalerEventDisable)
149167
}
150168
}
151169
}
152170

153-
// ResetConcurrency reset poller quota to the max value. This will be used for gracefully switching the auto scaler off to avoid workers stuck in the wrong state
154-
func (c *ConcurrencyAutoScaler) ResetConcurrency() {
155-
c.concurrency.PollerPermit.SetQuota(c.pollerMaxCount)
171+
// resetConcurrency reset poller quota to the max value. This will be used for gracefully switching the auto scaler off to avoid workers stuck in the wrong state
172+
func (c *ConcurrencyAutoScaler) resetConcurrency() {
173+
c.concurrency.PollerPermit.SetQuota(c.pollerInitCount)
156174
}
157175

158-
func (c *ConcurrencyAutoScaler) emit() {
176+
func (c *ConcurrencyAutoScaler) logEvent(event autoScalerEvent) {
159177
if c.enable.Load() {
160178
c.scope.Counter("concurrency_auto_scaler.enabled").Inc(1)
161179
} else {
162180
c.scope.Counter("concurrency_auto_scaler.disabled").Inc(1)
163181
}
164-
c.scope.Gauge("poller_in_action").Update(float64(c.concurrency.PollerPermit.Quota() - c.concurrency.PollerPermit.Count()))
182+
c.scope.Gauge("poller_in_action").Update(float64(c.concurrency.PollerPermit.Count()))
165183
c.scope.Gauge("poller_quota").Update(float64(c.concurrency.PollerPermit.Quota()))
166184
c.scope.Gauge("poller_wait_time").Update(math.Exp2(c.pollerWaitTimeInMsLog2.Average()))
185+
c.log.Debug(autoScalerEventLogMsg,
186+
zap.Time("time", c.clock.Now()),
187+
zap.String("event", string(event)),
188+
zap.Bool("enabled", c.enable.Load()),
189+
zap.Int("poller_quota", c.concurrency.PollerPermit.Quota()),
190+
zap.Int("poller_in_action", c.concurrency.PollerPermit.Count()),
191+
)
167192
}
168193

169194
func (c *ConcurrencyAutoScaler) updatePollerPermit() {
170-
updateTime := time.Now()
195+
if !c.enable.Load() { // skip update if auto scaler is disabled
196+
c.logEvent(autoScalerEventPollerSkipUpdateNotEnabled)
197+
return
198+
}
199+
updateTime := c.clock.Now()
171200
if updateTime.Before(c.pollerPermitLastUpdate.Add(c.cooldown)) { // before cooldown
201+
c.logEvent(autoScalerEventPollerSkipUpdateCooldown)
172202
return
173203
}
174204
currentQuota := c.concurrency.PollerPermit.Quota()
@@ -180,19 +210,16 @@ func (c *ConcurrencyAutoScaler) updatePollerPermit() {
180210
newQuota = c.pollerMaxCount
181211
}
182212
if newQuota == currentQuota {
183-
return
184-
}
185-
enabled := c.enable.Load()
186-
c.log.Sugar().With("applied", enabled).Infof("update poller permit: %v -> %v", currentQuota, newQuota)
187-
if !c.enable.Load() {
213+
c.logEvent(autoScalerEventPollerSkipUpdateNoChange)
188214
return
189215
}
190216
c.concurrency.PollerPermit.SetQuota(newQuota)
191217
c.pollerPermitLastUpdate = updateTime
218+
c.logEvent(autoScalerEventPollerUpdate)
192219
}
193220

194221
type rollingAverage struct {
195-
mu sync.Mutex
222+
mu sync.RWMutex
196223
window []float64
197224
index int
198225
sum float64
@@ -210,6 +237,11 @@ func (r *rollingAverage) Add(value float64) {
210237
r.mu.Lock()
211238
defer r.mu.Unlock()
212239

240+
// no op on zero rolling window
241+
if len(r.window) == 0 {
242+
return
243+
}
244+
213245
// replace the old value with the new value
214246
r.index %= len(r.window)
215247
r.sum += value - r.window[r.index]
@@ -222,8 +254,8 @@ func (r *rollingAverage) Add(value float64) {
222254
}
223255

224256
func (r *rollingAverage) Average() float64 {
225-
r.mu.Lock()
226-
defer r.mu.Unlock()
257+
r.mu.RLock()
258+
defer r.mu.RUnlock()
227259
if r.count == 0 {
228260
return 0
229261
}

0 commit comments

Comments
 (0)