Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions internal/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -637,6 +637,14 @@ func (app *App) stateManager() appState {
if lightMaintenance {
app.logger.Debugf("cannot perform switchover: blocked by light maintenance mode, skipping iteration")
} else {
if !switchover.InitiatedAt.IsZero() && time.Since(switchover.InitiatedAt) > app.config.SwitchoverTimeout {
app.logger.Errorf("switchover %s => %s timed out after %s", switchover.From, switchover.To, time.Since(switchover.InitiatedAt))
err = app.FailSwitchover(switchover, fmt.Errorf("switchover timed out after %s", time.Since(switchover.InitiatedAt)))
if err != nil {
app.logger.Errorf("failed to report switchover timeout: %s", err)
}
return stateManager
}
err = app.approveSwitchover(switchover, activeNodes, clusterState)
if err != nil {
app.logger.Errorf("cannot perform switchover: %s", err)
Expand Down Expand Up @@ -1429,6 +1437,10 @@ func (app *App) performSwitchover(clusterState map[string]*nodestate.NodeState,
return fmt.Errorf("%s", errMessage)
}
app.logger.Infof("switchover: host %s replication IO thread stopped", host)
ns := app.getNodeState(host)
if broken, _ := ns.IsReplicationPermanentlyBroken(); broken {
return fmt.Errorf("switchover: host %s replication is permanently broken", host)
}
return nil
}, filterOut(activeNodes, []string{oldMaster}))

Expand Down
2 changes: 2 additions & 0 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ type Config struct {
ManagerSwitchover bool `config:"manager_switchover" yaml:"manager_switchover"`
ForceSwitchover bool `config:"force_switchover" yaml:"force_switchover"` // TODO: Remove when we will be sure it's right way to do switchover
ReplicationConvergenceTimeoutSwitchover time.Duration `config:"replication_convergence_timeout_switchover" yaml:"replication_convergence_timeout_switchover"`
SwitchoverTimeout time.Duration `config:"switchover_timeout" yaml:"switchover_timeout"`
DSNSettings string `config:"dsn_settings" yaml:"dsn_settings"`
OptimizationConfig OptimizationConfig `config:"optimization_config" yaml:"optimization_config"`
}
Expand Down Expand Up @@ -204,6 +205,7 @@ func DefaultConfig() (Config, error) {
ShowOnlyGTIDDiff: false,
ManagerSwitchover: false,
ForceSwitchover: false,
SwitchoverTimeout: 30 * time.Minute,
ReplicationConvergenceTimeoutSwitchover: 300 * time.Second,
DSNSettings: "?autocommit=1&sql_log_off=1",
OptimizationConfig: OptimizationConfig{
Expand Down
1 change: 1 addition & 0 deletions tests/images/mysql_jepsen/my.cnf
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ relay_log_recovery = ON
log_bin = mysql-bin-log
relay_log = mysql-relay-log
sync_binlog = 1
sync_relay_log = 1
rpl_semi_sync_master_timeout = 31536000000
rpl_semi_sync_master_wait_for_slave_count = 1
rpl_semi_sync_master_wait_no_slave = ON
Expand Down
Loading