Skip to content

Commit fb01bca

Browse files
Performance improvement on unconfirmed map (#374)
* check if the producer is closed when send * fix: extract with timeout when the difference is >= timeout * perf: reduce the locks and the average size of unconfirmed messages * test: update test timeout to reduce flakyness * perf: unconfirmed optimization * refactor: move updateStatus as ConfirmationStatus method * refactor: mark as unconfirmed with entityClosed messages that have never been sent to rabbitmq * perf: optimize allocation on handleConfirm * perf: restore simple map for unconfirmed since it is 15% faster * refactor: move to a function the unconfirmed messages due to closed client --------- Signed-off-by: Gabriele Santomaggio <[email protected]> Co-authored-by: Gabriele Santomaggio <[email protected]>
1 parent d52e281 commit fb01bca

File tree

8 files changed

+138
-112
lines changed

8 files changed

+138
-112
lines changed

Makefile

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,8 @@ $(STATICCHECK):
3232
check: $(STATICCHECK)
3333
$(STATICCHECK) ./pkg/stream
3434

35-
GOMOCK ?= $(GOBIN)/mockgen
36-
GOMOCK_VERSION ?= v1.6.0
37-
$(GOMOCK):
38-
go install github.com/golang/mock/mockgen@$(GOMOCK_VERSION)
39-
40-
.PHONY: gomock
41-
gomock: $(GOMOCK)
42-
4335
NUM_PROCS ?= 2
44-
TEST_TIMEOUT ?= 2m
36+
TEST_TIMEOUT ?= 3m
4537
test: vet fmt check
4638
go run -mod=mod github.com/onsi/ginkgo/v2/ginkgo -r --procs=$(NUM_PROCS) --compilers=$(NUM_PROCS) \
4739
--randomize-all --randomize-suites \

pkg/ha/ha_publisher.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@ type ReliableProducer struct {
6363
producerOptions *stream.ProducerOptions
6464
count int32
6565
confirmMessageHandler ConfirmMessageHandler
66-
channelPublishConfirm stream.ChannelPublishConfirm
6766
mutex *sync.Mutex
6867
mutexStatus *sync.Mutex
6968
status int
@@ -108,8 +107,7 @@ func (p *ReliableProducer) newProducer() error {
108107

109108
return err
110109
}
111-
p.channelPublishConfirm = producer.NotifyPublishConfirmation()
112-
p.handlePublishConfirm(p.channelPublishConfirm)
110+
p.handlePublishConfirm(producer.NotifyPublishConfirmation())
113111
channelNotifyClose := producer.NotifyClose()
114112
p.handleNotifyClose(channelNotifyClose)
115113
p.producer = producer

pkg/stream/blocking_queue.go

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,17 @@ package stream
22

33
import (
44
"errors"
5-
"github.com/rabbitmq/rabbitmq-stream-go-client/pkg/logs"
65
"sync/atomic"
76
"time"
7+
8+
"github.com/rabbitmq/rabbitmq-stream-go-client/pkg/logs"
89
)
910

1011
var ErrBlockingQueueStopped = errors.New("blocking queue stopped")
1112

1213
type BlockingQueue[T any] struct {
13-
queue chan T
14-
status int32
15-
lastUpdate int64
14+
queue chan T
15+
status int32
1616
}
1717

1818
// NewBlockingQueue initializes a new BlockingQueue with the given capacity
@@ -28,7 +28,6 @@ func (bq *BlockingQueue[T]) Enqueue(item T) error {
2828
if bq.IsStopped() {
2929
return ErrBlockingQueueStopped
3030
}
31-
atomic.StoreInt64(&bq.lastUpdate, time.Now().UnixNano())
3231
bq.queue <- item
3332
return nil
3433
}
@@ -50,26 +49,22 @@ func (bq *BlockingQueue[T]) IsEmpty() bool {
5049
// Stop is different from Close in that it allows the
5150
// existing items to be processed.
5251
// Drain the queue to be sure there are not pending messages
53-
func (bq *BlockingQueue[T]) Stop() {
52+
func (bq *BlockingQueue[T]) Stop() []T {
5453
atomic.StoreInt32(&bq.status, 1)
5554
// drain the queue. To be sure there are not pending messages
56-
// in the queue.
57-
// it does not matter if we lose some messages here
58-
// since there is the unConfirmed map to handle the messages
59-
isActive := true
60-
for isActive {
55+
// in the queue and return to the caller the remaining pending messages
56+
msgInQueue := make([]T, 0, len(bq.queue))
57+
outer:
58+
for {
6159
select {
62-
case <-bq.queue:
63-
// do nothing
60+
case msg := <-bq.queue:
61+
msgInQueue = append(msgInQueue, msg)
6462
case <-time.After(10 * time.Millisecond):
65-
isActive = false
66-
return
67-
default:
68-
isActive = false
69-
return
63+
break outer
7064
}
7165
}
7266
logs.LogDebug("BlockingQueue stopped")
67+
return msgInQueue
7368
}
7469

7570
func (bq *BlockingQueue[T]) Close() {

pkg/stream/constants.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,8 @@ const (
8181
responseCodeNoOffset = uint16(19)
8282

8383
/// responses out of protocol
84-
closeChannel = uint16(60)
85-
connectionCloseError = uint16(61)
86-
timeoutError = uint16(62)
84+
timeoutError = uint16(62)
85+
entityClosed = uint16(63)
8786

8887
///
8988
defaultSocketCallTimeout = 10 * time.Second
@@ -194,6 +193,9 @@ func lookErrorCode(errorCode uint16) error {
194193
return AuthenticationFailureLoopbackError
195194
case timeoutError:
196195
return ConfirmationTimoutError
196+
case entityClosed:
197+
return AlreadyClosed
198+
197199
default:
198200
{
199201
logs.LogWarn("Error not handled %d", errorCode)

pkg/stream/producer.go

Lines changed: 74 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,12 @@ import (
44
"bufio"
55
"bytes"
66
"fmt"
7-
"github.com/rabbitmq/rabbitmq-stream-go-client/pkg/logs"
8-
"github.com/rabbitmq/rabbitmq-stream-go-client/pkg/message"
97
"sync"
108
"sync/atomic"
119
"time"
10+
11+
"github.com/rabbitmq/rabbitmq-stream-go-client/pkg/logs"
12+
"github.com/rabbitmq/rabbitmq-stream-go-client/pkg/message"
1213
)
1314

1415
type ConfirmationStatus struct {
@@ -50,7 +51,17 @@ func (cs *ConfirmationStatus) GetErrorCode() uint16 {
5051
return cs.errorCode
5152
}
5253

54+
func (cs *ConfirmationStatus) updateStatus(errorCode uint16, confirmed bool) {
55+
cs.confirmed = confirmed
56+
if confirmed {
57+
return
58+
}
59+
cs.errorCode = errorCode
60+
cs.err = lookErrorCode(errorCode)
61+
}
62+
5363
type messageSequence struct {
64+
sourceMsg message.StreamMessage
5465
messageBytes []byte
5566
publishingId int64
5667
filterValue string
@@ -193,18 +204,10 @@ func NewProducerOptions() *ProducerOptions {
193204
}
194205
}
195206

196-
func (producer *Producer) GetUnConfirmed() map[int64]*ConfirmationStatus {
197-
return producer.unConfirmed.getAll()
198-
}
199-
200207
func (po *ProducerOptions) isSubEntriesBatching() bool {
201208
return po.SubEntrySize > 1
202209
}
203210

204-
func (producer *Producer) lenUnConfirmed() int {
205-
return producer.unConfirmed.size()
206-
}
207-
208211
// NotifyPublishConfirmation returns a channel that receives the confirmation status of the messages sent by the producer.
209212
func (producer *Producer) NotifyPublishConfirmation() ChannelPublishConfirm {
210213
ch := make(chan []*ConfirmationStatus, 1)
@@ -292,13 +295,17 @@ func (producer *Producer) processPendingSequencesQueue() {
292295
var lastError error
293296

294297
if producer.pendingSequencesQueue.IsStopped() {
298+
// add also the last message to sequenceToSend
299+
// otherwise it will be lost
300+
sequenceToSend = append(sequenceToSend, msg)
295301
break
296302
}
297303
// There is something in the queue. Checks the buffer is still less than the maxFrame
298304
totalBufferToSend += len(msg.messageBytes)
299305
if totalBufferToSend > maxFrame {
300306
// if the totalBufferToSend is greater than the requestedMaxFrameSize
301307
// the producer sends the messages and reset the buffer
308+
producer.unConfirmed.addFromSequences(sequenceToSend, producer.GetID())
302309
lastError = producer.internalBatchSend(sequenceToSend)
303310
sequenceToSend = sequenceToSend[:0]
304311
totalBufferToSend = initBufferPublishSize
@@ -310,6 +317,7 @@ func (producer *Producer) processPendingSequencesQueue() {
310317
// the messages during the checks of the buffer. In this case
311318
if producer.pendingSequencesQueue.IsEmpty() || len(sequenceToSend) >= producer.options.BatchSize {
312319
if len(sequenceToSend) > 0 {
320+
producer.unConfirmed.addFromSequences(sequenceToSend, producer.GetID())
313321
lastError = producer.internalBatchSend(sequenceToSend)
314322
sequenceToSend = sequenceToSend[:0]
315323
totalBufferToSend += initBufferPublishSize
@@ -323,13 +331,36 @@ func (producer *Producer) processPendingSequencesQueue() {
323331
// just in case there are messages in the buffer
324332
// not matter is sent or not the messages will be timed out
325333
if len(sequenceToSend) > 0 {
326-
_ = producer.internalBatchSend(sequenceToSend)
334+
producer.markUnsentAsUnconfirmed(sequenceToSend)
327335
}
328336

329337
}()
330338
logs.LogDebug("producer %d processPendingSequencesQueue closed", producer.id)
331339
}
332340

341+
func (producer *Producer) markUnsentAsUnconfirmed(sequences []*messageSequence) {
342+
if len(sequences) == 0 {
343+
return
344+
}
345+
346+
// Send as unconfirmed the messages in the pendingSequencesQueue,
347+
// that have never been sent,
348+
// with the "entityClosed" error.
349+
confirms := make([]*ConfirmationStatus, 0, len(sequences))
350+
for _, ps := range sequences {
351+
cs := &ConfirmationStatus{
352+
inserted: time.Now(),
353+
message: ps.sourceMsg,
354+
producerID: producer.GetID(),
355+
publishingId: ps.publishingId,
356+
confirmed: false,
357+
}
358+
cs.updateStatus(entityClosed, false)
359+
confirms = append(confirms, cs)
360+
}
361+
producer.sendConfirmationStatus(confirms)
362+
}
363+
333364
func (producer *Producer) assignPublishingID(message message.StreamMessage) int64 {
334365
sequence := message.GetPublishingId()
335366
// in case of sub entry the deduplication is disabled
@@ -349,10 +380,12 @@ func (producer *Producer) fromMessageToMessageSequence(streamMessage message.Str
349380
if producer.options.IsFilterEnabled() {
350381
filterValue = producer.options.Filter.FilterValue(streamMessage)
351382
}
352-
msqSeq := &messageSequence{}
353-
msqSeq.messageBytes = marshalBinary
354-
msqSeq.publishingId = seq
355-
msqSeq.filterValue = filterValue
383+
msqSeq := &messageSequence{
384+
sourceMsg: streamMessage,
385+
messageBytes: marshalBinary,
386+
publishingId: seq,
387+
filterValue: filterValue,
388+
}
356389
return msqSeq, nil
357390
}
358391

@@ -366,9 +399,13 @@ func (producer *Producer) Send(streamMessage message.StreamMessage) error {
366399
if err != nil {
367400
return err
368401
}
369-
producer.unConfirmed.addFromSequence(messageSeq, &streamMessage, producer.GetID())
402+
if producer.getStatus() == closed {
403+
producer.markUnsentAsUnconfirmed([]*messageSequence{messageSeq})
404+
return fmt.Errorf("producer id: %d closed", producer.id)
405+
}
370406

371407
if len(messageSeq.messageBytes) > defaultMaxFrameSize {
408+
producer.unConfirmed.addFromSequences([]*messageSequence{messageSeq}, producer.GetID())
372409
tooLarge := producer.unConfirmed.extractWithError(messageSeq.publishingId, responseCodeFrameTooLarge)
373410
producer.sendConfirmationStatus([]*ConfirmationStatus{tooLarge})
374411
return FrameTooLarge
@@ -377,7 +414,7 @@ func (producer *Producer) Send(streamMessage message.StreamMessage) error {
377414
// se the processPendingSequencesQueue function
378415
err = producer.pendingSequencesQueue.Enqueue(messageSeq)
379416
if err != nil {
380-
return fmt.Errorf("error during enqueue message: %s. Message will be in timed. Producer id: %d ", err, producer.id)
417+
return fmt.Errorf("error during enqueue message: %s pending queue closed. Producer id: %d ", err, producer.id)
381418
}
382419
return nil
383420
}
@@ -389,32 +426,40 @@ func (producer *Producer) Send(streamMessage message.StreamMessage) error {
389426
// returns an error if the message could not be sent for marshal problems or if the buffer is too large
390427
func (producer *Producer) BatchSend(batchMessages []message.StreamMessage) error {
391428
maxFrame := defaultMaxFrameSize
392-
var messagesSequence = make([]*messageSequence, 0)
429+
var messagesSequences = make([]*messageSequence, 0, len(batchMessages))
393430
totalBufferToSend := 0
431+
394432
for _, batchMessage := range batchMessages {
395433
messageSeq, err := producer.fromMessageToMessageSequence(batchMessage)
396434
if err != nil {
397435
return err
398436
}
399-
producer.unConfirmed.addFromSequence(messageSeq, &batchMessage, producer.GetID())
400437

401438
totalBufferToSend += len(messageSeq.messageBytes)
402-
messagesSequence = append(messagesSequence, messageSeq)
439+
messagesSequences = append(messagesSequences, messageSeq)
440+
}
441+
442+
if producer.getStatus() == closed {
443+
producer.markUnsentAsUnconfirmed(messagesSequences)
444+
return fmt.Errorf("producer id: %d closed", producer.id)
445+
}
446+
447+
if len(messagesSequences) > 0 {
448+
producer.unConfirmed.addFromSequences(messagesSequences, producer.GetID())
403449
}
404-
//
405450

406451
if totalBufferToSend+initBufferPublishSize > maxFrame {
407452
// if the totalBufferToSend is greater than the requestedMaxFrameSize
408453
// all the messages are unconfirmed
409454

410-
for _, msg := range messagesSequence {
455+
for _, msg := range messagesSequences {
411456
m := producer.unConfirmed.extractWithError(msg.publishingId, responseCodeFrameTooLarge)
412457
producer.sendConfirmationStatus([]*ConfirmationStatus{m})
413458
}
414459
return FrameTooLarge
415460
}
416461

417-
return producer.internalBatchSend(messagesSequence)
462+
return producer.internalBatchSend(messagesSequences)
418463
}
419464

420465
func (producer *Producer) GetID() uint8 {
@@ -605,7 +650,6 @@ func (producer *Producer) close(reason Event) error {
605650
logs.LogDebug("producer options is nil, the close will be ignored")
606651
return nil
607652
}
608-
_, _ = producer.options.client.coordinator.ExtractProducerById(producer.id)
609653

610654
if !producer.options.client.socket.isOpen() {
611655
return fmt.Errorf("tcp connection is closed")
@@ -616,6 +660,8 @@ func (producer *Producer) close(reason Event) error {
616660
_ = producer.options.client.deletePublisher(producer.id)
617661
}
618662

663+
_, _ = producer.options.client.coordinator.ExtractProducerById(producer.id)
664+
619665
if producer.options.client.coordinator.ProducersCount() == 0 {
620666
_ = producer.options.client.Close()
621667
}
@@ -635,7 +681,8 @@ func (producer *Producer) stopAndWaitPendingSequencesQueue() {
635681

636682
// Stop the pendingSequencesQueue, so the producer can't send messages anymore
637683
// but the producer can still handle the inflight messages
638-
producer.pendingSequencesQueue.Stop()
684+
pendingSequences := producer.pendingSequencesQueue.Stop()
685+
producer.markUnsentAsUnconfirmed(pendingSequences)
639686

640687
// Stop the confirmationTimeoutTicker. It will flush the unconfirmed messages
641688
producer.confirmationTimeoutTicker.Stop()
@@ -657,9 +704,9 @@ func (producer *Producer) waitForInflightMessages() {
657704

658705
tentatives := 0
659706

660-
for (producer.lenUnConfirmed() > 0) && tentatives < 5 {
707+
for (producer.unConfirmed.size() > 0) && tentatives < 5 {
661708
logs.LogInfo("wait inflight messages - unconfirmed len: %d - retry: %d",
662-
producer.lenUnConfirmed(), tentatives)
709+
producer.unConfirmed.size(), tentatives)
663710
producer.flushUnConfirmedMessages()
664711
time.Sleep(time.Duration(500) * time.Millisecond)
665712
tentatives++

0 commit comments

Comments
 (0)