@@ -24,6 +24,7 @@ import (
24
24
"fmt"
25
25
"net/http"
26
26
"reflect"
27
+ "sync/atomic"
27
28
"time"
28
29
29
30
"github.com/pkg/errors"
@@ -47,39 +48,24 @@ func (mc *MetricsCache) start() error {
47
48
func (mc * MetricsCache ) metricFeed (index int ) {
48
49
49
50
go func () {
50
- inFlight := 0
51
- gotData := false
52
51
potentialCompletion := false
53
52
var completeChan chan int
54
53
55
54
for {
56
55
select {
57
56
case _ = <- mc .stopChan :
58
57
return
59
- case inFlight = <- mc .updatesComplete :
60
- // Handle completion notifications from the update loop
61
- length := mc .metricQueue .Length ()
62
- mc .logger .Debug (`Complete update cycle - "in-flight requests"=%d; "metric queue length"=%d\n` , inFlight , length )
63
-
64
- // If data was sent and the queue is empty, mark as completion
65
- if length == 0 && gotData {
66
- switch len (mc .asyncAppendChan ) {
67
- case 0 :
68
- potentialCompletion = true
69
- if completeChan != nil {
70
- completeChan <- 0
71
- }
72
- case 1 :
73
- potentialCompletion = true
74
- }
75
- }
76
58
case app := <- mc .asyncAppendChan :
77
59
newMetrics := 0
78
60
dataQueued := 0
79
61
numPushed := 0
62
+ gotCompletion := false
80
63
inLoop:
81
64
for i := 0 ; i <= mc .cfg .BatchSize ; i ++ {
82
- if app .metric == nil {
65
+ // Handle completion notifications from the update loop
66
+ if app .isCompletion {
67
+ gotCompletion = true
68
+ } else if app .metric == nil {
83
69
// Handle update completion requests (metric == nil)
84
70
completeChan = app .resp
85
71
if potentialCompletion {
@@ -88,7 +74,6 @@ func (mc *MetricsCache) metricFeed(index int) {
88
74
} else {
89
75
potentialCompletion = false
90
76
// Handle append requests (Add / AddFast)
91
- gotData = true
92
77
metric := app .metric
93
78
metric .Lock ()
94
79
@@ -103,7 +88,7 @@ func (mc *MetricsCache) metricFeed(index int) {
103
88
metric .setState (storeStatePreGet )
104
89
}
105
90
if metric .isReady () {
106
- metric .setState (storeStateUpdate )
91
+ metric .setState (storeStateAboutToUpdate )
107
92
}
108
93
109
94
length := mc .metricQueue .Push (metric )
@@ -124,7 +109,22 @@ func (mc *MetricsCache) metricFeed(index int) {
124
109
}
125
110
// Notify the update loop that there are new metrics to process
126
111
if newMetrics > 0 {
112
+ atomic .AddInt64 (& mc .outstandingUpdates , 1 )
127
113
mc .newUpdates <- newMetrics
114
+ } else if gotCompletion {
115
+ inFlight := atomic .LoadInt64 (& mc .requestsInFlight )
116
+ outstanding := atomic .LoadInt64 (& mc .outstandingUpdates )
117
+ if outstanding == 0 && inFlight == 0 {
118
+ switch len (mc .asyncAppendChan ) {
119
+ case 0 :
120
+ potentialCompletion = true
121
+ if completeChan != nil {
122
+ completeChan <- 0
123
+ }
124
+ case 1 :
125
+ potentialCompletion = true
126
+ }
127
+ }
128
128
}
129
129
130
130
// If we have too much work, stall the queue for some time
@@ -154,7 +154,7 @@ func (mc *MetricsCache) metricsUpdateLoop(index int) {
154
154
return
155
155
case _ = <- mc .newUpdates :
156
156
// Handle new metric notifications (from metricFeed)
157
- for mc .updatesInFlight < mc .cfg .Workers * 2 { //&& newMetrics > 0{
157
+ for mc .updatesInFlight < mc .cfg .Workers * 2 {
158
158
freeSlots := mc .cfg .Workers * 2 - mc .updatesInFlight
159
159
metrics := mc .metricQueue .PopN (freeSlots )
160
160
for _ , metric := range metrics {
@@ -165,9 +165,11 @@ func (mc *MetricsCache) metricsUpdateLoop(index int) {
165
165
}
166
166
}
167
167
168
- if mc .updatesInFlight == 0 {
169
- mc .logger .Debug ("Complete new update cycle - in-flight %d.\n " , mc .updatesInFlight )
170
- mc .updatesComplete <- 0
168
+ outstandingUpdates := atomic .AddInt64 (& mc .outstandingUpdates , - 1 )
169
+
170
+ if atomic .LoadInt64 (& mc .requestsInFlight ) == 0 && outstandingUpdates == 0 {
171
+ mc .logger .Debug ("Return to feed after processing newUpdates" )
172
+ mc .asyncAppendChan <- & asyncAppend {isCompletion : true }
171
173
}
172
174
case resp := <- mc .responseChan :
173
175
// Handle V3IO async responses
@@ -188,6 +190,7 @@ func (mc *MetricsCache) metricsUpdateLoop(index int) {
188
190
if i < mc .cfg .BatchSize {
189
191
select {
190
192
case resp = <- mc .responseChan :
193
+ atomic .AddInt64 (& mc .requestsInFlight , - 1 )
191
194
default :
192
195
break inLoop
193
196
}
@@ -206,10 +209,12 @@ func (mc *MetricsCache) metricsUpdateLoop(index int) {
206
209
}
207
210
}
208
211
212
+ requestsInFlight := atomic .AddInt64 (& mc .requestsInFlight , - 1 )
213
+
209
214
// Notify the metric feeder when all in-flight tasks are done
210
- if mc .updatesInFlight == 0 {
211
- mc .logger .Debug ("Return to feed. Metric queue length: %d" , mc . metricQueue . Length () )
212
- mc .updatesComplete <- 0
215
+ if requestsInFlight == 0 && atomic . LoadInt64 ( & mc .outstandingUpdates ) == 0 {
216
+ mc .logger .Debug ("Return to feed after processing responseChan" )
217
+ mc .asyncAppendChan <- & asyncAppend { isCompletion : true }
213
218
}
214
219
}
215
220
}
@@ -223,45 +228,70 @@ func (mc *MetricsCache) postMetricUpdates(metric *MetricState) {
223
228
metric .Lock ()
224
229
defer metric .Unlock ()
225
230
var sent bool
226
- var err error
227
231
228
- if metric .getState () == storeStatePreGet {
229
- sent , err = metric .store .getChunksState (mc , metric )
230
- if err != nil {
231
- // Count errors
232
- mc .performanceReporter .IncrementCounter ("GetChunksStateError" , 1 )
233
-
234
- mc .logger .ErrorWith ("Failed to get item state" , "metric" , metric .Lset , "err" , err )
235
- setError (mc , metric , err )
236
- } else {
237
- metric .setState (storeStateGet )
232
+ // In case we are in pre get state or our data spreads across multiple partitions, get the new state for the current partition
233
+ if metric .getState () == storeStatePreGet ||
234
+ (metric .canSendRequests () && metric .shouldGetState ) {
235
+ sent = mc .sendGetMetricState (metric )
236
+ if sent {
237
+ mc .updatesInFlight ++
238
238
}
239
+ } else if metric .canSendRequests () {
240
+ sent = mc .writeChunksAndGetState (metric )
239
241
240
- } else {
241
- sent , err = metric .store .writeChunks (mc , metric )
242
- if err != nil {
243
- // Count errors
244
- mc .performanceReporter .IncrementCounter ("WriteChunksError" , 1 )
245
-
246
- mc .logger .ErrorWith ("Submit failed" , "metric" , metric .Lset , "err" , err )
247
- setError (mc , metric , errors .Wrap (err , "Chunk write submit failed." ))
248
- } else if sent {
249
- metric .setState (storeStateUpdate )
250
- }
251
242
if ! sent {
252
243
if metric .store .samplesQueueLength () == 0 {
253
244
metric .setState (storeStateReady )
254
245
} else {
255
246
if mc .metricQueue .length () > 0 {
247
+ atomic .AddInt64 (& mc .outstandingUpdates , 1 )
256
248
mc .newUpdates <- mc .metricQueue .length ()
257
249
}
258
250
}
259
251
}
260
252
}
261
253
254
+ }
255
+
256
+ func (mc * MetricsCache ) sendGetMetricState (metric * MetricState ) bool {
257
+ // If we are already in a get state, discard
258
+ if metric .getState () == storeStateGet {
259
+ return false
260
+ }
261
+
262
+ sent , err := metric .store .getChunksState (mc , metric )
263
+ if err != nil {
264
+ // Count errors
265
+ mc .performanceReporter .IncrementCounter ("GetChunksStateError" , 1 )
266
+
267
+ mc .logger .ErrorWith ("Failed to get item state" , "metric" , metric .Lset , "err" , err )
268
+ setError (mc , metric , err )
269
+ } else {
270
+ metric .setState (storeStateGet )
271
+ }
272
+
273
+ return sent
274
+ }
275
+
276
+ func (mc * MetricsCache ) writeChunksAndGetState (metric * MetricState ) bool {
277
+ sent , err := metric .store .writeChunks (mc , metric )
278
+ if err != nil {
279
+ // Count errors
280
+ mc .performanceReporter .IncrementCounter ("WriteChunksError" , 1 )
281
+
282
+ mc .logger .ErrorWith ("Submit failed" , "metric" , metric .Lset , "err" , err )
283
+ setError (mc , metric , errors .Wrap (err , "Chunk write submit failed." ))
284
+ } else if sent {
285
+ metric .setState (storeStateUpdate )
286
+ } else if metric .shouldGetState {
287
+ // In case we didn't write any data and the metric state needs to be updated, update it straight away
288
+ sent = mc .sendGetMetricState (metric )
289
+ }
290
+
262
291
if sent {
263
292
mc .updatesInFlight ++
264
293
}
294
+ return sent
265
295
}
266
296
267
297
// Handle DB responses
@@ -337,24 +367,18 @@ func (mc *MetricsCache) handleResponse(metric *MetricState, resp *v3io.Response,
337
367
metric .setState (storeStateReady )
338
368
339
369
var sent bool
340
- var err error
341
-
342
- if canWrite {
343
- sent , err = metric .store .writeChunks (mc , metric )
344
- if err != nil {
345
- // Count errors
346
- mc .performanceReporter .IncrementCounter ("WriteChunksError" , 1 )
347
370
348
- mc . logger . ErrorWith ( "Submit failed" , "metric" , metric . Lset , "err" , err )
349
- setError ( mc , metric , errors . Wrap ( err , "Chunk write submit failed." ))
350
- } else if sent {
351
- metric . setState ( storeStateUpdate )
371
+ // In case our data spreads across multiple partitions, get the new state for the current partition
372
+ if metric . shouldGetState {
373
+ sent = mc . sendGetMetricState ( metric )
374
+ if sent {
352
375
mc .updatesInFlight ++
353
376
}
354
-
377
+ } else if canWrite {
378
+ sent = mc .writeChunksAndGetState (metric )
355
379
} else if metric .store .samplesQueueLength () > 0 {
356
380
mc .metricQueue .Push (metric )
357
- metric .setState (storeStateUpdate )
381
+ metric .setState (storeStateAboutToUpdate )
358
382
}
359
383
360
384
return sent
@@ -385,6 +409,13 @@ func (mc *MetricsCache) nameUpdateRespLoop() {
385
409
}
386
410
387
411
resp .Release ()
412
+
413
+ requestsInFlight := atomic .AddInt64 (& mc .requestsInFlight , - 1 )
414
+
415
+ if requestsInFlight == 0 && atomic .LoadInt64 (& mc .outstandingUpdates ) == 0 {
416
+ mc .logger .Debug ("Return to feed after processing nameUpdateChan" )
417
+ mc .asyncAppendChan <- & asyncAppend {isCompletion : true }
418
+ }
388
419
}
389
420
}
390
421
}()
0 commit comments