Skip to content

Commit af4a223

Browse files
authored
Merge pull request #116 from kaleido-io/add_logs
fix: race condition when reading and writing to inflight queue
2 parents 5a5eed6 + da86f21 commit af4a223

File tree

2 files changed

+34
-15
lines changed

2 files changed

+34
-15
lines changed

pkg/txhandler/simple/policyloop.go

+27-12
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ func (sth *simpleTransactionHandler) markInflightUpdate() {
103103
}
104104

105105
func (sth *simpleTransactionHandler) updateInflightSet(ctx context.Context) bool {
106+
sth.inflightRWMux.Lock()
107+
defer sth.inflightRWMux.Unlock()
106108

107109
oldInflight := sth.inflight
108110
sth.inflight = make([]*pendingState, 0, len(oldInflight))
@@ -118,6 +120,7 @@ func (sth *simpleTransactionHandler) updateInflightSet(ctx context.Context) bool
118120

119121
// If we are not at maximum, then query if there are more candidates now
120122
spaces := sth.maxInFlight - len(sth.inflight)
123+
log.L(sth.ctx).Tracef("Number of spaces left '%v'", spaces)
121124
if spaces > 0 {
122125
var after string
123126
if len(sth.inflight) > 0 {
@@ -152,7 +155,7 @@ func (sth *simpleTransactionHandler) updateInflightSet(ctx context.Context) bool
152155
}
153156
newLen := len(sth.inflight)
154157
if newLen > 0 {
155-
log.L(ctx).Debugf("Inflight set updated len=%d head-seq=%s tail-seq=%s old-tail=%s", len(sth.inflight), sth.inflight[0].mtx.SequenceID, sth.inflight[newLen-1].mtx.SequenceID, after)
158+
log.L(ctx).Debugf("Inflight set updated with %d additional transactions, length is now %d head-id:%s head-seq=%s tail-id:%s tail-seq=%s old-tail=%s", len(additional), len(sth.inflight), sth.inflight[0].mtx.ID, sth.inflight[0].mtx.SequenceID, sth.inflight[newLen-1].mtx.ID, sth.inflight[newLen-1].mtx.SequenceID, after)
156159
}
157160
}
158161
sth.setTransactionInflightQueueMetrics(ctx)
@@ -161,6 +164,7 @@ func (sth *simpleTransactionHandler) updateInflightSet(ctx context.Context) bool
161164
}
162165

163166
func (sth *simpleTransactionHandler) policyLoopCycle(ctx context.Context, inflightStale bool) {
167+
log.L(ctx).Tracef("policyLoopCycle triggered inflightStatle=%v", inflightStale)
164168

165169
// Process any synchronous commands first - these might not be in our inflight set
166170
sth.processPolicyAPIRequests(ctx)
@@ -170,9 +174,12 @@ func (sth *simpleTransactionHandler) policyLoopCycle(ctx context.Context, inflig
170174
return
171175
}
172176
}
173-
// Go through executing the policy engine against them
174177

178+
sth.inflightRWMux.RLock()
179+
defer sth.inflightRWMux.RUnlock()
180+
// Go through executing the policy engine against them
175181
for _, pending := range sth.inflight {
182+
log.L(ctx).Tracef("Executing policy against tx-id=%v", pending.mtx.ID)
176183
err := sth.execPolicy(ctx, pending, nil)
177184
if err != nil {
178185
log.L(ctx).Errorf("Failed policy cycle transaction=%s operation=%s: %s", pending.mtx.TransactionHash, pending.mtx.ID, err)
@@ -204,13 +211,17 @@ func (sth *simpleTransactionHandler) processPolicyAPIRequests(ctx context.Contex
204211

205212
for _, request := range requests {
206213
var pending *pendingState
214+
215+
sth.inflightRWMux.RLock()
207216
// If this transaction is in-flight, we use that record
208217
for _, inflight := range sth.inflight {
209218
if inflight != nil && inflight.mtx != nil && inflight.mtx.ID == request.txID {
210219
pending = inflight
211220
break
212221
}
213222
}
223+
sth.inflightRWMux.RUnlock()
224+
// If this transaction is in-flight, we use that record
214225
if pending == nil {
215226
mtx, err := sth.getTransactionByID(ctx, request.txID)
216227
if err != nil {
@@ -246,7 +257,9 @@ func (sth *simpleTransactionHandler) processPolicyAPIRequests(ctx context.Contex
246257
func (sth *simpleTransactionHandler) pendingToRunContext(baseCtx context.Context, pending *pendingState, syncRequest *policyEngineAPIRequestType) (ctx *RunContext, err error) {
247258

248259
// Take a snapshot of the pending state under the lock
249-
sth.mux.Lock()
260+
pending.mux.Lock()
261+
defer pending.mux.Unlock()
262+
250263
mtx := pending.mtx
251264
ctx = &RunContext{
252265
Context: baseCtx,
@@ -267,7 +280,6 @@ func (sth *simpleTransactionHandler) pendingToRunContext(baseCtx context.Context
267280
ctx.UpdateType = Update // might change to delete later
268281
ctx.TXUpdates.DeleteRequested = mtx.DeleteRequested
269282
}
270-
sth.mux.Unlock()
271283

272284
// Process any state updates that were queued to us from notifications from the confirmation manager
273285
if receiptNotify != nil {
@@ -279,11 +291,9 @@ func (sth *simpleTransactionHandler) pendingToRunContext(baseCtx context.Context
279291
sth.incTransactionOperationCounter(ctx, pending.mtx.Namespace(ctx), "received_receipt")
280292

281293
// Clear the notification (as long as no other came through)
282-
sth.mux.Lock()
283294
if pending.receiptNotify == receiptNotify {
284295
pending.receiptNotify = nil
285296
}
286-
sth.mux.Unlock()
287297
}
288298

289299
if confirmNotify != nil && ctx.Confirmations != nil {
@@ -297,11 +307,9 @@ func (sth *simpleTransactionHandler) pendingToRunContext(baseCtx context.Context
297307
}
298308

299309
// Clear the notification (as long as no other came through)
300-
sth.mux.Lock()
301310
if pending.confirmNotify == confirmNotify {
302311
pending.confirmNotify = nil
303312
}
304-
sth.mux.Unlock()
305313
}
306314

307315
return ctx, nil
@@ -318,6 +326,7 @@ func (sth *simpleTransactionHandler) execPolicy(baseCtx context.Context, pending
318326
completed := false
319327
switch {
320328
case ctx.Confirmed && ctx.SyncAction != ActionDelete:
329+
log.L(sth.ctx).Tracef("Transaction '%s' confirmed", ctx.TX.ID)
321330
completed = true
322331
ctx.UpdateType = Update
323332
if ctx.Receipt != nil && ctx.Receipt.Success {
@@ -481,44 +490,50 @@ func (sth *simpleTransactionHandler) policyEngineAPIRequest(ctx context.Context,
481490

482491
func (sth *simpleTransactionHandler) HandleTransactionConfirmations(ctx context.Context, txID string, notification *apitypes.ConfirmationsNotification) (err error) {
483492
// Will be picked up on the next policy loop cycle
493+
sth.inflightRWMux.RLock()
484494
var pending *pendingState
485495
for _, p := range sth.inflight {
486496
if p != nil && p.mtx != nil && p.mtx.ID == txID {
487497
pending = p
488498
break
489499
}
490500
}
501+
sth.inflightRWMux.RUnlock()
491502
if pending == nil {
492503
err = i18n.NewError(ctx, tmmsgs.MsgTransactionNotFound, txID)
493504
return
494505
}
495-
sth.mux.Lock()
506+
pending.mux.Lock()
496507
pending.confirmed = notification.Confirmed
497508
pending.confirmNotify = fftypes.Now()
498509
pending.confirmations = notification
510+
pending.mux.Unlock()
499511
log.L(ctx).Infof("Received %d confirmations (resync=%t)", len(notification.Confirmations), notification.NewFork)
500-
sth.mux.Unlock()
501512

502513
sth.markInflightUpdate()
503514
return
504515
}
505516
func (sth *simpleTransactionHandler) HandleTransactionReceiptReceived(ctx context.Context, txID string, receipt *ffcapi.TransactionReceiptResponse) (err error) {
517+
log.L(ctx).Tracef("Handle transaction receipt received %s", txID)
518+
sth.inflightRWMux.RLock()
506519
var pending *pendingState
507520
for _, p := range sth.inflight {
508521
if p != nil && p.mtx != nil && p.mtx.ID == txID {
509522
pending = p
510523
break
511524
}
512525
}
526+
sth.inflightRWMux.RUnlock()
513527
if pending == nil {
514528
err = i18n.NewError(ctx, tmmsgs.MsgTransactionNotFound, txID)
515529
return
516530
}
531+
pending.mux.Lock()
517532
// Will be picked up on the next policy loop cycle - guaranteed to occur before Confirmed
518-
sth.mux.Lock()
519533
pending.receiptNotify = fftypes.Now()
520534
pending.receipt = receipt
521-
sth.mux.Unlock()
535+
pending.mux.Unlock()
536+
// Will be picked up on the next policy loop cycle - guaranteed to occur before Confirmed
522537
sth.markInflightUpdate()
523538
return
524539
}

pkg/txhandler/simple/simple_transaction_handler.go

+7-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright © 2023 Kaleido, Inc.
1+
// Copyright © 2024 Kaleido, Inc.
22
//
33
// SPDX-License-Identifier: Apache-2.0
44
//
@@ -176,7 +176,8 @@ type simpleTransactionHandler struct {
176176
policyLoopDone chan struct{}
177177
inflightStale chan bool
178178
inflightUpdate chan bool
179-
mux sync.Mutex
179+
mux sync.RWMutex
180+
inflightRWMux sync.RWMutex
180181
inflight []*pendingState
181182
policyEngineAPIRequests []*policyEngineAPIRequest
182183
maxInFlight int
@@ -195,6 +196,9 @@ type pendingState struct {
195196
confirmNotify *fftypes.FFTime
196197
remove bool
197198
subStatus apitypes.TxSubStatus
199+
// This mutex only works in a slice when the slice contains a pointer to this struct
200+
// appends to a slice copy memory but when storing pointers it does not
201+
mux sync.Mutex
198202
}
199203

200204
type simplePolicyInfo struct {
@@ -344,8 +348,8 @@ func (sth *simpleTransactionHandler) createManagedTx(ctx context.Context, txID s
344348
}
345349

346350
func (sth *simpleTransactionHandler) submitTX(ctx *RunContext) (reason ffcapi.ErrorReason, err error) {
347-
348351
mtx := ctx.TX
352+
349353
mtx.GasPrice, err = sth.getGasPrice(ctx, sth.toolkit.Connector)
350354
if err != nil {
351355
ctx.AddSubStatusAction(apitypes.TxActionRetrieveGasPrice, nil, fftypes.JSONAnyPtr(`{"error":"`+err.Error()+`"}`))

0 commit comments

Comments
 (0)