Skip to content

Commit a2e2b26

Browse files
authored
Add getNodeManager helper func to Deployer (#186)
Motivation ---------- CollectLogs and getController previously used Nodes[0] to select a cluster node (a.k.a. node manager), but the nodes list includes all containers (load balance, rebalanced-out nodes, etc.). If a non-provisioned node was the first node, a GET /pools/default would return a 404 "unknown pool". Adding a helper function to ensure the selected cluster node is viable removes this undesired behavior. Changes ------- * Added getNodeManager helper func to Deployer * Updated getController() to use new helper func * Update CollectLogs() func to use new helper func
1 parent 662f020 commit a2e2b26

1 file changed

Lines changed: 26 additions & 17 deletions

File tree

deployment/dockerdeploy/deployer.go

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -459,18 +459,36 @@ func (d *Deployer) DestroyAllResources(ctx context.Context) error {
459459
return d.removeNodes(ctx, nodes)
460460
}
461461

462+
func (d *Deployer) getNodeManager(ctx context.Context, clusterInfo *clusterInfo) (*clustercontrol.NodeManager, []string, error) {
463+
if len(clusterInfo.Nodes) == 0 {
464+
return nil, nil, errors.New("cannot get node manager for a cluster with no nodes")
465+
}
466+
467+
for _, node := range clusterInfo.Nodes {
468+
if !node.IsClusterNode() {
469+
continue
470+
}
471+
nodeCtrl := &clustercontrol.NodeManager{
472+
Logger: d.logger,
473+
Endpoint: fmt.Sprintf("http://%s:8091", node.IPAddress),
474+
}
475+
nodeOtps, err := nodeCtrl.Controller().ListNodeOTPs(ctx)
476+
if err == nil {
477+
return nodeCtrl, nodeOtps, nil
478+
}
479+
d.logger.Debug("failed to connect to node manager, trying next node if available", zap.String("node", node.NodeID), zap.Error(err))
480+
}
481+
return nil, nil, errors.New("no responsive cluster nodes found")
482+
}
483+
462484
func (d *Deployer) getController(ctx context.Context, clusterID string) (*clustercontrol.NodeManager, error) {
463485
clusterInfo, err := d.getCluster(ctx, clusterID)
464486
if err != nil {
465487
return nil, errors.Wrap(err, "failed to get cluster info")
466488
}
467489

468-
nodeCtrl := &clustercontrol.NodeManager{
469-
Logger: d.logger,
470-
Endpoint: fmt.Sprintf("http://%s:8091", clusterInfo.Nodes[0].IPAddress),
471-
}
472-
473-
return nodeCtrl, nil
490+
nodeCtrl, _, err := d.getNodeManager(ctx, clusterInfo)
491+
return nodeCtrl, err
474492
}
475493

476494
func (d *Deployer) getAgent(ctx context.Context, clusterID string, bucketName string) (*gocbcorex.Agent, error) {
@@ -884,18 +902,9 @@ func (d *Deployer) CollectLogs(ctx context.Context, clusterID string, destPath s
884902
return nil, errors.Wrap(err, "failed to get cluster info")
885903
}
886904

887-
if len(clusterInfo.Nodes) == 0 {
888-
return nil, errors.New("cannot collection logs from a cluster with no nodes")
889-
}
890-
891-
nodeCtrl := clustercontrol.NodeManager{
892-
Logger: d.logger,
893-
Endpoint: fmt.Sprintf("http://%s:8091", clusterInfo.Nodes[0].IPAddress),
894-
}
895-
896-
nodeOtps, err := nodeCtrl.Controller().ListNodeOTPs(ctx)
905+
nodeCtrl, nodeOtps, err := d.getNodeManager(ctx, clusterInfo)
897906
if err != nil {
898-
return nil, errors.Wrap(err, "failed to list nodes")
907+
return nil, errors.Wrap(err, "failed to get node manager")
899908
}
900909

901910
d.logger.Info("beginning log collection", zap.Strings("nodes", nodeOtps))

0 commit comments

Comments
 (0)