Skip to content

Commit 8c6ec0b

Browse files
authored
Add API and CLI commands to promote/demote nodes in the Raft cluster (#996) (#1072)
* add commands to promote and demote raft peers Building on top of the new non-voter feature, this allows controlling the voting status of individual nodes using the CLI. * document new commands and api endpoints * remove DR Token options This is not supported by OpenBao and probably never will be. * add fallback if autopilot is disabled Use the raw Raft backend functions to promote or demote a node. --------- Signed-off-by: Jan Martens <jan@martens.eu.org>
1 parent 04a8a01 commit 8c6ec0b

File tree

9 files changed

+559
-2
lines changed

9 files changed

+559
-2
lines changed

command/commands.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,16 @@ func initCommands(ui, serverCmdUi cli.Ui, runOpts *RunOptions) map[string]cli.Co
404404
BaseCommand: getBaseCommand(),
405405
}, nil
406406
},
407+
"operator raft promote": func() (cli.Command, error) {
408+
return &OperatorRaftPromoteCommand{
409+
BaseCommand: getBaseCommand(),
410+
}, nil
411+
},
412+
"operator raft demote": func() (cli.Command, error) {
413+
return &OperatorRaftDemoteCommand{
414+
BaseCommand: getBaseCommand(),
415+
}, nil
416+
},
407417
"operator raft snapshot": func() (cli.Command, error) {
408418
return &OperatorRaftSnapshotCommand{
409419
BaseCommand: getBaseCommand(),

command/operator_raft_demote.go

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
// Copyright (c) 2025 OpenBao a Series of LF Projects, LLC
2+
// SPDX-License-Identifier: MPL-2.0
3+
4+
package command
5+
6+
import (
7+
"fmt"
8+
"strings"
9+
10+
"github.com/hashicorp/cli"
11+
"github.com/posener/complete"
12+
)
13+
14+
var (
15+
_ cli.Command = (*OperatorRaftDemoteCommand)(nil)
16+
_ cli.CommandAutocomplete = (*OperatorRaftDemoteCommand)(nil)
17+
)
18+
19+
type OperatorRaftDemoteCommand struct {
20+
*BaseCommand
21+
}
22+
23+
func (c *OperatorRaftDemoteCommand) Synopsis() string {
24+
return "Demotes a voter to a permanent non-voter"
25+
}
26+
27+
func (c *OperatorRaftDemoteCommand) Help() string {
28+
helpText := `
29+
Usage: bao operator raft demote <server_id>
30+
31+
Demotes voter to a permanent non-voter.
32+
33+
$ bao operator raft demote node1
34+
35+
` + c.Flags().Help()
36+
37+
return strings.TrimSpace(helpText)
38+
}
39+
40+
func (c *OperatorRaftDemoteCommand) Flags() *FlagSets {
41+
return c.flagSet(FlagSetHTTP | FlagSetOutputFormat)
42+
}
43+
44+
func (c *OperatorRaftDemoteCommand) AutocompleteArgs() complete.Predictor {
45+
return complete.PredictAnything
46+
}
47+
48+
func (c *OperatorRaftDemoteCommand) AutocompleteFlags() complete.Flags {
49+
return c.Flags().Completions()
50+
}
51+
52+
func (c *OperatorRaftDemoteCommand) Run(args []string) int {
53+
f := c.Flags()
54+
55+
if err := f.Parse(args); err != nil {
56+
c.UI.Error(err.Error())
57+
return 1
58+
}
59+
60+
serverID := ""
61+
62+
args = f.Args()
63+
switch len(args) {
64+
case 1:
65+
serverID = strings.TrimSpace(args[0])
66+
default:
67+
c.UI.Error(fmt.Sprintf("Incorrect arguments (expected 1, got %d)", len(args)))
68+
return 1
69+
}
70+
71+
if len(serverID) == 0 {
72+
c.UI.Error("Server id is required")
73+
return 1
74+
}
75+
76+
client, err := c.Client()
77+
if err != nil {
78+
c.UI.Error(err.Error())
79+
return 2
80+
}
81+
82+
_, err = client.Logical().Write("sys/storage/raft/demote", map[string]interface{}{
83+
"server_id": serverID,
84+
})
85+
if err != nil {
86+
c.UI.Error(fmt.Sprintf("Error promoting server: %s", err))
87+
return 2
88+
}
89+
90+
c.UI.Output("Server demoted successfully!")
91+
92+
return 0
93+
}

command/operator_raft_promote.go

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
// Copyright (c) 2025 OpenBao a Series of LF Projects, LLC
2+
// SPDX-License-Identifier: MPL-2.0
3+
4+
package command
5+
6+
import (
7+
"fmt"
8+
"strings"
9+
10+
"github.com/hashicorp/cli"
11+
"github.com/posener/complete"
12+
)
13+
14+
var (
15+
_ cli.Command = (*OperatorRaftPromoteCommand)(nil)
16+
_ cli.CommandAutocomplete = (*OperatorRaftPromoteCommand)(nil)
17+
)
18+
19+
type OperatorRaftPromoteCommand struct {
20+
*BaseCommand
21+
}
22+
23+
func (c *OperatorRaftPromoteCommand) Synopsis() string {
24+
return "Promotes a permanent non-voter to a voter"
25+
}
26+
27+
func (c *OperatorRaftPromoteCommand) Help() string {
28+
helpText := `
29+
Usage: bao operator raft promote <server_id>
30+
31+
Promotes a permanent non-voter to a voter.
32+
33+
$ bao operator raft promote node1
34+
35+
` + c.Flags().Help()
36+
37+
return strings.TrimSpace(helpText)
38+
}
39+
40+
func (c *OperatorRaftPromoteCommand) Flags() *FlagSets {
41+
return c.flagSet(FlagSetHTTP | FlagSetOutputFormat)
42+
}
43+
44+
func (c *OperatorRaftPromoteCommand) AutocompleteArgs() complete.Predictor {
45+
return complete.PredictAnything
46+
}
47+
48+
func (c *OperatorRaftPromoteCommand) AutocompleteFlags() complete.Flags {
49+
return c.Flags().Completions()
50+
}
51+
52+
func (c *OperatorRaftPromoteCommand) Run(args []string) int {
53+
f := c.Flags()
54+
55+
if err := f.Parse(args); err != nil {
56+
c.UI.Error(err.Error())
57+
return 1
58+
}
59+
60+
serverID := ""
61+
62+
args = f.Args()
63+
switch len(args) {
64+
case 1:
65+
serverID = strings.TrimSpace(args[0])
66+
default:
67+
c.UI.Error(fmt.Sprintf("Incorrect arguments (expected 1, got %d)", len(args)))
68+
return 1
69+
}
70+
71+
if len(serverID) == 0 {
72+
c.UI.Error("Server id is required")
73+
return 1
74+
}
75+
76+
client, err := c.Client()
77+
if err != nil {
78+
c.UI.Error(err.Error())
79+
return 2
80+
}
81+
82+
_, err = client.Logical().Write("sys/storage/raft/promote", map[string]interface{}{
83+
"server_id": serverID,
84+
})
85+
if err != nil {
86+
c.UI.Error(fmt.Sprintf("Error promoting server: %s", err))
87+
return 2
88+
}
89+
90+
c.UI.Output("Server promoted successfully!")
91+
92+
return 0
93+
}

physical/raft/raft.go

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"os"
1515
"path/filepath"
1616
"strconv"
17+
"strings"
1718
"sync"
1819
"sync/atomic"
1920
"time"
@@ -1232,6 +1233,115 @@ func (b *RaftBackend) RemovePeer(ctx context.Context, peerID string) error {
12321233
return b.autopilot.RemoveServer(raft.ServerID(peerID))
12331234
}
12341235

1236+
// PromotePeer promotes a permanent non-voter to voter
1237+
func (b *RaftBackend) PromotePeer(ctx context.Context, peerID string) error {
1238+
b.l.RLock()
1239+
defer b.l.RUnlock()
1240+
1241+
if err := ctx.Err(); err != nil {
1242+
return err
1243+
}
1244+
1245+
if b.disableAutopilot {
1246+
if b.raft == nil {
1247+
return errors.New("raft storage is not initialized")
1248+
}
1249+
peers, err := b.Peers(ctx)
1250+
if err != nil {
1251+
return fmt.Errorf("failed to get Raft peers: %s", err)
1252+
}
1253+
1254+
found := false
1255+
addr := ""
1256+
for _, peer := range peers {
1257+
if peer.ID == peerID {
1258+
addr = peer.Address
1259+
found = true
1260+
break
1261+
}
1262+
}
1263+
if !found {
1264+
return fmt.Errorf("server %s not found in raft configuration", peerID)
1265+
}
1266+
1267+
future := b.raft.AddVoter(raft.ServerID(peerID), raft.ServerAddress(addr), 0, 0)
1268+
if err := future.Error(); err != nil {
1269+
return fmt.Errorf("failed to promote non-voter to voter: %s", err)
1270+
}
1271+
return nil
1272+
}
1273+
1274+
if b.autopilot == nil {
1275+
return errors.New("raft storage autopilot is not initialized")
1276+
}
1277+
1278+
if !b.delegate.IsNonVoter(raft.ServerID(peerID)) {
1279+
return errors.New("server is not a non-voter")
1280+
}
1281+
1282+
b.logger.Trace("promoting non-voter to voter", "id", peerID)
1283+
return b.delegate.RemoveNonVoter(raft.ServerID(peerID))
1284+
}
1285+
1286+
// DemotePeer demotes a voter to a permanent non-voter
1287+
func (b *RaftBackend) DemotePeer(ctx context.Context, peerID string) error {
1288+
b.l.RLock()
1289+
defer b.l.RUnlock()
1290+
1291+
if err := ctx.Err(); err != nil {
1292+
return err
1293+
}
1294+
1295+
if b.disableAutopilot {
1296+
if b.raft == nil {
1297+
return errors.New("raft storage is not initialized")
1298+
}
1299+
1300+
// refuse to demote current leader to not trigger a leader election
1301+
// when the leader is demoted. This is not necessary if autopilot is enabled,
1302+
// as it will handle this case for us and only demote the leader after a
1303+
// leader election
1304+
if strings.EqualFold(peerID, b.localID) {
1305+
return errors.New("refusing to demote current leader")
1306+
}
1307+
1308+
peers, err := b.Peers(ctx)
1309+
if err != nil {
1310+
return fmt.Errorf("failed to get Raft peers: %s", err)
1311+
}
1312+
1313+
found := false
1314+
for _, peer := range peers {
1315+
if peer.ID == peerID {
1316+
found = true
1317+
break
1318+
}
1319+
}
1320+
1321+
if !found {
1322+
return fmt.Errorf("server %s not found in raft configuration", peerID)
1323+
}
1324+
1325+
future := b.raft.DemoteVoter(raft.ServerID(peerID), 0, 0)
1326+
if err := future.Error(); err != nil {
1327+
return fmt.Errorf("failed to demote voter to non-voter: %s", err)
1328+
}
1329+
return nil
1330+
}
1331+
1332+
if b.autopilot == nil {
1333+
return errors.New("raft storage autopilot is not initialized")
1334+
}
1335+
1336+
b.logger.Trace("demoting voter to non-voter", "id", peerID)
1337+
1338+
if b.delegate.IsNonVoter(raft.ServerID(peerID)) {
1339+
return errors.New("server is already a non-voter")
1340+
}
1341+
1342+
return b.delegate.AddNonVoter(raft.ServerID(peerID))
1343+
}
1344+
12351345
// GetConfigurationOffline is used to read the stale, last known raft
12361346
// configuration to this node. It accesses the last state written into the
12371347
// FSM. When a server is online use GetConfiguration instead.

physical/raft/raft_autopilot_promoter.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,14 @@ func (_ *CustomPromoter) CalculatePromotionsAndDemotions(c *autopilot.Config, s
4343
minStableDuration := s.ServerStabilizationTime(c)
4444
nonVoters := c.Ext.(map[raft.ServerID]bool)
4545
for id, server := range s.Servers {
46-
// Ignore non-voters
4746
if _, ok := nonVoters[id]; ok {
48-
continue
47+
// If the server is marked as a non-voter, demote it
48+
if server.State == autopilot.RaftVoter {
49+
changes.Demotions = append(changes.Demotions, id)
50+
} else {
51+
// If the server is already a non-voter, skip it
52+
continue
53+
}
4954
}
5055
// If the server is healthy and stable, promote it to a voter
5156
if server.State == autopilot.RaftNonVoter && server.Health.IsStable(now, minStableDuration) {

0 commit comments

Comments
 (0)