diff --git a/pulsar/consumer_multitopic_test.go b/pulsar/consumer_multitopic_test.go index cd236ecc2..712efc7d7 100644 --- a/pulsar/consumer_multitopic_test.go +++ b/pulsar/consumer_multitopic_test.go @@ -24,12 +24,13 @@ import ( "testing" "time" + "github.com/stretchr/testify/assert" + "github.com/apache/pulsar-client-go/pulsar/internal" pb "github.com/apache/pulsar-client-go/pulsar/internal/pulsar_proto" "github.com/apache/pulsar-client-go/pulsaradmin" "github.com/apache/pulsar-client-go/pulsaradmin/pkg/admin/config" "github.com/apache/pulsar-client-go/pulsaradmin/pkg/utils" - "github.com/stretchr/testify/assert" ) func TestMultiTopicConsumerReceive(t *testing.T) { @@ -366,6 +367,10 @@ func (dummyConnection) IsProxied() bool { return false } +func (dummyConnection) Closed() bool { + return false +} + func TestMultiTopicAckIDListTimeout(t *testing.T) { topic := fmt.Sprintf("multiTopicAckIDListTimeout%v", time.Now().UnixNano()) assert.NoError(t, createPartitionedTopic(topic, 5)) diff --git a/pulsar/internal/connection.go b/pulsar/internal/connection.go index 57fc72419..3f622aad5 100644 --- a/pulsar/internal/connection.go +++ b/pulsar/internal/connection.go @@ -88,6 +88,7 @@ type Connection interface { Close() WaitForClose() <-chan struct{} IsProxied() bool + Closed() bool } type ConsumerHandler interface { @@ -658,7 +659,7 @@ func (c *connection) SendRequestNoWait(req *pb.BaseCommand) error { } func (c *connection) internalSendRequest(req *request) { - if c.closed() { + if c.Closed() { c.log.Warnf("internalSendRequest failed for connectionClosed") if req.callback != nil { req.callback(req.cmd, ErrConnectionClosed) @@ -1064,7 +1065,7 @@ func (c *connection) setStateClosed() { c.state.Store(int32(connectionClosed)) } -func (c *connection) closed() bool { +func (c *connection) Closed() bool { return connectionClosed == c.getState() } diff --git a/pulsar/internal/connection_pool.go b/pulsar/internal/connection_pool.go index cd082188b..363c7731e 100644 --- a/pulsar/internal/connection_pool.go +++ b/pulsar/internal/connection_pool.go @@ -103,7 +103,7 @@ func (p *connectionPool) GetConnection(logicalAddr *url.URL, physicalAddr *url.U // When the current connection is in a closed state or the broker actively notifies that the // current connection is closed, we need to remove the connection object from the current // connection pool and create a new connection. - if conn.closed() { + if conn.Closed() { p.log.Debugf("Removed connection from pool key=%s logical_addr=%+v physical_addr=%+v", key, conn.logicalAddr, conn.physicalAddr) delete(p.connections, key) diff --git a/pulsar/internal/connection_reader.go b/pulsar/internal/connection_reader.go index c2541f68c..2397f29a1 100644 --- a/pulsar/internal/connection_reader.go +++ b/pulsar/internal/connection_reader.go @@ -21,8 +21,9 @@ import ( "fmt" "io" - pb "github.com/apache/pulsar-client-go/pulsar/internal/pulsar_proto" "google.golang.org/protobuf/proto" + + pb "github.com/apache/pulsar-client-go/pulsar/internal/pulsar_proto" ) type connectionReader struct { @@ -41,7 +42,7 @@ func (r *connectionReader) readFromConnection() { for { cmd, headersAndPayload, err := r.readSingleCommand() if err != nil { - if !r.cnx.closed() { + if !r.cnx.Closed() { r.cnx.log.WithError(err).Infof("Error reading from connection") r.cnx.Close() } @@ -122,7 +123,7 @@ func (r *connectionReader) readAtLeast(size uint32) error { n, err := io.ReadAtLeast(r.cnx.cnx, r.buffer.WritableSlice(), int(size)) if err != nil { // has the connection been closed? - if r.cnx.closed() { + if r.cnx.Closed() { return errConnectionClosed } r.cnx.Close() diff --git a/pulsar/producer_partition.go b/pulsar/producer_partition.go index 448f780cf..bf53a0b76 100755 --- a/pulsar/producer_partition.go +++ b/pulsar/producer_partition.go @@ -581,7 +581,8 @@ func (p *partitionProducer) runEventsLoop() { } case connectionClosed := <-p.connectClosedCh: p.log.Info("runEventsLoop will reconnect in producer") - p.reconnectToBroker(connectionClosed) + // reconnect to broker in a new goroutine so that it won't block the event loop, see issue #1332 + go p.reconnectToBroker(connectionClosed) case <-p.batchFlushTicker.C: p.internalFlushCurrentBatch() } @@ -902,7 +903,15 @@ func (p *partitionProducer) writeData(buffer internal.Buffer, sequenceID uint64, sequenceID: sequenceID, sendRequests: callbacks, }) - p._getConn().WriteData(buffer) + + // If the connection is closed, stop sending data. Continuing to send data + // to a closed connection will cause the buffer to be passed to it, which + // prevents further processing. + conn := p._getConn() + if conn.Closed() { + return + } + conn.WriteData(buffer) } } @@ -1737,10 +1746,10 @@ func (i *pendingItem) done(err error) { return } i.isDone = true - buffersPool.Put(i.buffer) if i.flushCallback != nil { i.flushCallback(err) } + buffersPool.Put(i.buffer) } // _setConn sets the internal connection field of this partition producer atomically. diff --git a/pulsar/producer_test.go b/pulsar/producer_test.go index b82792996..32f026d72 100644 --- a/pulsar/producer_test.go +++ b/pulsar/producer_test.go @@ -30,6 +30,8 @@ import ( "testing" "time" + "github.com/docker/docker/api/types/container" + "github.com/docker/go-connections/nat" "github.com/stretchr/testify/require" "github.com/testcontainers/testcontainers-go" "github.com/testcontainers/testcontainers-go/wait" @@ -2575,6 +2577,104 @@ func TestProducerKeepReconnectingAndThenCallClose(t *testing.T) { }, 30*time.Second, 1*time.Second) } +func TestProducerKeepReconnectingAndThenCallSendAsync(t *testing.T) { + req := testcontainers.ContainerRequest{ + Image: getPulsarTestImage(), + ExposedPorts: []string{"6650/tcp", "8080/tcp"}, + WaitingFor: wait.ForExposedPort(), + Cmd: []string{"bin/pulsar", "standalone", "-nfw"}, + // use fixed port binding so that it can be reconnected after restart + HostConfigModifier: func(hostConfig *container.HostConfig) { + hostConfig.PortBindings = map[nat.Port][]nat.PortBinding{ + "6650/tcp": {{HostIP: "0.0.0.0", HostPort: "6650"}}, + "8080/tcp": {{HostIP: "0.0.0.0", HostPort: "8080"}}, + } + }, + } + c, err := testcontainers.GenericContainer(context.Background(), testcontainers.GenericContainerRequest{ + ContainerRequest: req, + Started: true, + }) + require.NoError(t, err, "Failed to start the pulsar container") + endpoint, err := c.PortEndpoint(context.Background(), "6650", "pulsar") + require.NoError(t, err, "Failed to get the pulsar endpoint") + + client, err := NewClient(ClientOptions{ + URL: endpoint, + ConnectionTimeout: 5 * time.Second, + OperationTimeout: 5 * time.Second, + }) + require.NoError(t, err) + defer client.Close() + + var testProducer Producer + require.Eventually(t, func() bool { + testProducer, err = client.CreateProducer(ProducerOptions{ + Topic: newTopicName(), + Schema: NewBytesSchema(nil), + SendTimeout: 3 * time.Second, + }) + return err == nil + }, 30*time.Second, 1*time.Second) + + // send a message + errChan := make(chan error) + defer close(errChan) + + testProducer.SendAsync(context.Background(), &ProducerMessage{ + Payload: []byte("test"), + }, func(_ MessageID, _ *ProducerMessage, err error) { + errChan <- err + }) + // should success + err = <-errChan + require.NoError(t, err) + + // stop pulsar server + timeout := 10 * time.Second + _ = c.Stop(context.Background(), &timeout) + + // send again + testProducer.SendAsync(context.Background(), &ProducerMessage{ + Payload: []byte("test"), + }, func(_ MessageID, _ *ProducerMessage, err error) { + errChan <- err + }) + // should get a timeout error + err = <-errChan + require.True(t, errors.Is(err, ErrSendTimeout)) + + oldConn := testProducer.(*producer).producers[0].(*partitionProducer)._getConn() + // restart pulsar server + err = c.Start(context.Background()) + require.NoError(t, err) + defer c.Terminate(context.Background()) + + // wait for reconnection success + waitTime := 0 + for { + newConn := testProducer.(*producer).producers[0].(*partitionProducer)._getConn() + if oldConn != newConn { + break + } + time.Sleep(5 * time.Second) + waitTime += 5 + if waitTime > 60 { + break + } + } + + // send again + testProducer.SendAsync(context.Background(), &ProducerMessage{ + Payload: []byte("test"), + }, func(_ MessageID, _ *ProducerMessage, err error) { + errChan <- err + }) + // should success + err = <-errChan + require.NoError(t, err) +} + func TestSelectConnectionForSameProducer(t *testing.T) { topicName := newTopicName()