Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 33 additions & 13 deletions conn/pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,18 @@ func (p *Pools) GetAll() []*Pool {
return pool
}

// RemoveAll removes all pool entries.
func (p *Pools) RemoveAll() {
p.Lock()
defer p.Unlock()

for k, pool := range p.all {
glog.Warningf("CONN: Disconnecting from %s\n", k)
delete(p.all, k)
pool.shutdown()
}
}

// RemoveInvalid removes invalid nodes from the list of pools.
func (p *Pools) RemoveInvalid(state *pb.MembershipState) {
// Keeps track of valid IP addresses, assigned to active nodes. We do this
Expand Down Expand Up @@ -241,11 +253,10 @@ func (p *Pool) listenToHeartbeat() error {
}()

threshold := time.Now().Add(10 * time.Second)
ticker := time.NewTicker(time.Second)
defer ticker.Stop()
ticker := time.Tick(time.Second)
for {
select {
case <-ticker.C:
case <-ticker:
// Don't check before at least 10s since start.
if time.Now().Before(threshold) {
continue
Expand Down Expand Up @@ -277,11 +288,19 @@ func (p *Pool) MonitorHealth() {

// We might have lost connection to the destination. In that case, re-dial
// the connection.
reconnect := func() {
// Returns true, if reconnection was successful
reconnect := func() bool {
reconnectionTicker := time.Tick(time.Second)
for {
time.Sleep(time.Second)
select {
case <-p.closer.HasBeenClosed():
glog.Infof("CONN: Returning from MonitorHealth for %s", p.Addr)
return false
case <-reconnectionTicker:
}

if err := p.closer.Ctx().Err(); err != nil {
return
return false
}
ctx, cancel := context.WithTimeout(p.closer.Ctx(), 10*time.Second)
conn, err := grpc.NewClient(p.Addr, p.dialOpts...)
Expand All @@ -298,7 +317,7 @@ func (p *Pool) MonitorHealth() {
}
p.conn = conn
p.Unlock()
return
return true
}
glog.Errorf("CONN: Unable to connect with %s : %s\n", p.Addr, err)
if conn != nil {
Expand All @@ -309,19 +328,20 @@ func (p *Pool) MonitorHealth() {
}
}

ticker := time.Tick(time.Second)
for {
select {
case <-p.closer.HasBeenClosed():
glog.Infof("CONN: Returning from MonitorHealth for %s", p.Addr)
return
default:
err := p.listenToHeartbeat()
if err != nil {
reconnect()
case <-ticker:
}

err := p.listenToHeartbeat()
if err != nil {
if reconnect() {
glog.Infof("CONN: Re-established connection with %s.\n", p.Addr)
}
// Sleep for a bit before retrying.
time.Sleep(echoDuration)
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion dgraph/cmd/zero/raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -879,7 +879,7 @@ func (n *node) Run() {
// snapshot can cause select loop to block while deleting entries, so run
// it in goroutine
readStateCh := make(chan raft.ReadState, 100)
closer := z.NewCloser(5)
closer := z.NewCloser(4)
defer func() {
closer.SignalAndWait()
n.closer.Done()
Expand Down
4 changes: 4 additions & 0 deletions dgraph/cmd/zero/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,10 @@ func run() {
st.node.closer.SignalAndWait()
// Stop all internal requests.
_ = grpcListener.Close()
// Stop all pools
if pools := conn.GetPools(); pools != nil {
pools.RemoveAll()
}
}()

st.zero.closer.AddRunning(2)
Expand Down
Loading