Skip to content

Commit 0fa47fe

Browse files
committed
Added check to ensure all nodes acknowledge each-other after topology change.
1 parent e948ceb commit 0fa47fe

1 file changed

Lines changed: 64 additions & 0 deletions

File tree

deployment/dockerdeploy/deployer_newaddremove.go

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ import (
1010
"time"
1111

1212
"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
13+
"github.com/couchbase/gocbcorex/cbhttpx"
14+
"github.com/couchbase/gocbcorex/cbmgmtx"
1315
"github.com/couchbaselabs/cbdinocluster/clusterdef"
1416
"github.com/couchbaselabs/cbdinocluster/utils/clustercontrol"
1517
"github.com/couchbaselabs/cbdinocluster/utils/dinocerts"
@@ -777,6 +779,68 @@ func (d *Deployer) addRemoveNodes(
777779
return nil, errors.Wrap(err, "failed to get post-rebalance cluster info")
778780
}
779781

782+
// Wait until all nodes in the cluster acknowledge each other
783+
d.logger.Info("waiting for all nodes to acknowledge each other")
784+
785+
// First we fetch all the OTPs for nodes we know about in cbdinocluster
786+
thisClusterEx, err := d.getClusterInfoEx(ctx, thisCluster)
787+
if err != nil {
788+
return nil, errors.Wrap(err, "failed to get post-rebalance extended cluster info")
789+
}
790+
791+
allNodeOtps := make([]string, 0, len(thisClusterEx.NodesEx))
792+
for _, node := range thisClusterEx.NodesEx {
793+
allNodeOtps = append(allNodeOtps, node.OTPNode)
794+
}
795+
796+
// Now we check that all nodes acknowledge all other nodes using OTPs to match
797+
for {
798+
allNodesAcked := true
799+
800+
for _, node := range thisClusterEx.NodesEx {
801+
mgmt := &cbmgmtx.Management{
802+
Transport: http.DefaultTransport,
803+
Endpoint: fmt.Sprintf("http://%s:8091", node.IPAddress),
804+
Auth: cbhttpx.BasicAuth{
805+
Username: "Administrator",
806+
Password: "password",
807+
},
808+
}
809+
810+
nodeConfig, err := mgmt.GetClusterConfig(ctx, &cbmgmtx.GetClusterConfigOptions{})
811+
if err != nil {
812+
return nil, errors.Wrap(err, "failed to get cluster config from a node")
813+
}
814+
815+
nodeNodeOtps := make([]string, 0, len(nodeConfig.Nodes))
816+
for _, nodeConfigNode := range nodeConfig.Nodes {
817+
nodeNodeOtps = append(nodeNodeOtps, nodeConfigNode.OTPNode)
818+
}
819+
820+
missingNodes := false
821+
if len(nodeNodeOtps) != len(allNodeOtps) {
822+
missingNodes = true
823+
}
824+
for _, otp := range allNodeOtps {
825+
if !slices.Contains(nodeNodeOtps, otp) {
826+
missingNodes = true
827+
break
828+
}
829+
}
830+
831+
if missingNodes {
832+
allNodesAcked = false
833+
}
834+
}
835+
836+
if allNodesAcked {
837+
break
838+
}
839+
840+
d.logger.Info("not all nodes have acknowledged each other... waiting")
841+
time.Sleep(1 * time.Second)
842+
}
843+
780844
if len(nodesToAdd) > 0 {
781845
// no need to update if DNS is disabled
782846
if thisCluster.DnsName != "" {

0 commit comments

Comments
 (0)