Skip to content

Commit 1b86c58

Browse files
committed
pglookout: support explicit failover priorities
Support explicit prioritization between instances. This can be configured via ``failover_priorities`` key, and will be consulted upon picking up the standby that should do the promotion in cases where multiple nodes have a matching replication position. Previously, and also as the current default, the selection was based on the sorting order of the remote nodes. The configuration option allows some additional flexibility, and supports e.g. topologies where we have more favorable and less desirable standbys in multiple different network locations.
1 parent 60f65b2 commit 1b86c58

File tree

3 files changed

+76
-8
lines changed

3 files changed

+76
-8
lines changed

README.rst

+8
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,14 @@ over_warning_limit_command and to create a warning file.
295295

296296
Shell command to execute in case the node has deemed itself in need of promotion
297297

298+
``failover_priorities`` (default ``{}``)
299+
300+
Define priority of nodes for promotion, in case there are multiple candidates
301+
with the same replication position. This allows to ensure all pglookout instances
302+
would elect the same standby for promotion, while still allowing for topologies
303+
with e.g. less preferred standbys in secondary network locations. By default,
304+
pglookout uses remote connection ids for the same selection purpose.
305+
298306
``known_gone_nodes`` (default ``[]``)
299307

300308
Lists nodes that are explicitly known to have left the cluster. If the old

pglookout/pglookout.py

+15-8
Original file line numberDiff line numberDiff line change
@@ -643,14 +643,21 @@ def do_failover_decision(self, standby_nodes):
643643
if not known_replication_positions:
644644
self.log.warning("No known replication positions, canceling failover consideration")
645645
return
646-
# If there are multiple nodes with the same replication positions pick the one with the "highest" name
647-
# to make sure pglookouts running on all standbys make the same decision. The rationale for picking
648-
# the "highest" node is that there's no obvious way for pglookout to decide which of the nodes is
649-
# "best" beyond looking at replication positions, but picking the highest id supports environments
650-
# where nodes are assigned identifiers from an incrementing sequence identifiers and where we want to
651-
# promote the latest and greatest node. In static environments node identifiers can be priority
652-
# numbers, with the highest number being the one that should be preferred.
653-
furthest_along_instance = max(known_replication_positions[max(known_replication_positions)])
646+
647+
# Find the instance that is furthest along.
648+
# If there are multiple nodes with the same replication positions, try to identify one to promote either
649+
# via explicit failover priority configuration or pick the one with the "highest" name.
650+
# The rationale of this logic is to ensure all participating pglookouts running on all standbys make
651+
# the same decision. The "highest" name works well in environments where nodes are assigned identifiers
652+
# from an incrementing sequence and where we want to promote the latest and greatest node.
653+
def _priority_or_id(instance):
654+
priority = self.config.get("failover_priorities", {}).get(instance)
655+
if priority is None:
656+
priority = instance
657+
return priority
658+
659+
furthest_along_instances = known_replication_positions[max(known_replication_positions)]
660+
furthest_along_instance = sorted(furthest_along_instances, key=_priority_or_id, reverse=True)[0]
654661
self.log.warning(
655662
"Node that is furthest along is: %r, all replication positions were: %r",
656663
furthest_along_instance,

test/test_lookout.py

+53
Original file line numberDiff line numberDiff line change
@@ -1005,6 +1005,59 @@ def test_standbys_failover_equal_replication_positions(pgl):
10051005
assert pgl.execute_external_command.call_count == 1
10061006

10071007

1008+
def test_standbys_failover_equal_replication_positions_with_priorities(pgl):
1009+
now = datetime.datetime.utcnow()
1010+
_set_instance_cluster_state(
1011+
pgl,
1012+
instance="192.168.54.183",
1013+
pg_last_xlog_receive_location="0/70004D8",
1014+
pg_is_in_recovery=True,
1015+
connection=True,
1016+
replication_time_lag=400.435871,
1017+
fetch_time=now,
1018+
db_time=now,
1019+
conn_info="foobar",
1020+
)
1021+
_set_instance_cluster_state(
1022+
pgl,
1023+
instance="192.168.57.180",
1024+
pg_last_xlog_receive_location=None,
1025+
pg_is_in_recovery=False,
1026+
connection=False,
1027+
replication_time_lag=0.0,
1028+
fetch_time=now - datetime.timedelta(seconds=3600),
1029+
db_time=now - datetime.timedelta(seconds=3600),
1030+
conn_info="foobar",
1031+
)
1032+
_set_instance_cluster_state(
1033+
pgl,
1034+
instance="192.168.63.4",
1035+
pg_last_xlog_receive_location="0/70004D8",
1036+
pg_is_in_recovery=True,
1037+
connection=True,
1038+
replication_time_lag=401.104655,
1039+
fetch_time=now,
1040+
db_time=now,
1041+
conn_info="foobar",
1042+
)
1043+
1044+
pgl.current_master = "192.168.57.180"
1045+
1046+
pgl.config["failover_priorities"] = {
1047+
"192.168.54.183": 1000,
1048+
"192.168.63.4": 0,
1049+
}
1050+
1051+
# This is highest by instance, but lower in priority
1052+
pgl.own_db = "192.168.63.4"
1053+
pgl.check_cluster_state()
1054+
assert pgl.execute_external_command.call_count == 0
1055+
# Lower by instance, but higher in priority
1056+
pgl.own_db = "192.168.54.183"
1057+
pgl.check_cluster_state()
1058+
assert pgl.execute_external_command.call_count == 1
1059+
1060+
10081061
def test_node_map_when_only_observer_sees_master(pgl):
10091062
cluster_state = {
10101063
"10.255.255.10": {

0 commit comments

Comments
 (0)