Aiven-Open
diff --git a/‎README.rst
Lines changed: 8 additions & 0 deletions b/‎README.rst
Lines changed: 8 additions & 0 deletions
diff --git a/‎pglookout/pglookout.py
Lines changed: 25 additions & 8 deletions b/‎pglookout/pglookout.py
Lines changed: 25 additions & 8 deletions
diff --git a/‎pyproject.toml
Lines changed: 1 addition & 0 deletions b/‎pyproject.toml
Lines changed: 1 addition & 0 deletions
@@ -295,6 +295,14 @@ over_warning_limit_command and to create a warning file.
 
 Shell command to execute in case the node has deemed itself in need of promotion
 
+``failover_priorities`` (default ``{}``)
+
+Define priority of nodes for promotion, in case there are multiple candidates
+with the same replication position.  This allows to ensure all pglookout instances
+would elect the same standby for promotion, while still allowing for topologies
+with e.g. less preferred standbys in secondary network locations. By default,
+pglookout uses remote connection ids for the same selection purpose.
+
 ``known_gone_nodes`` (default ``[]``)
 
 Lists nodes that are explicitly known to have left the cluster.  If the old
 
@@ -7,6 +7,7 @@
 This file is under the Apache License, Version 2.0.
 See the file `LICENSE` for details.
 """
+
 from . import logutil, statsd, version
 from .cluster_monitor import ClusterMonitor
 from .common import convert_xlog_location_to_offset, get_iso_timestamp, parse_iso_datetime
@@ -643,19 +644,35 @@ def do_failover_decision(self, standby_nodes):
         if not known_replication_positions:
             self.log.warning("No known replication positions, canceling failover consideration")
             return
-        # If there are multiple nodes with the same replication positions pick the one with the "highest" name
-        # to make sure pglookouts running on all standbys make the same decision.  The rationale for picking
-        # the "highest" node is that there's no obvious way for pglookout to decide which of the nodes is
-        # "best" beyond looking at replication positions, but picking the highest id supports environments
-        # where nodes are assigned identifiers from an incrementing sequence identifiers and where we want to
-        # promote the latest and greatest node.  In static environments node identifiers can be priority
-        # numbers, with the highest number being the one that should be preferred.
-        furthest_along_instance = max(known_replication_positions[max(known_replication_positions)])
+
+        # Find the instance that is furthest along.
+        # If there are multiple nodes with the same replication positions, try to identify one to promote either
+        # via explicit failover priority configuration or pick the one with the "highest" name by sort order.
+        # The rationale of this logic is to ensure all participating pglookouts running on all standbys make
+        # the same decision. The "highest" name works well in environments where nodes are assigned identifiers
+        # from an incrementing sequence and where we want to promote the latest and greatest node.
+
+        # First, find the list of instances that share the more recent replication position
+        furthest_along_instances = known_replication_positions[max(known_replication_positions)]
+        # Second, sort them by "instance name"
+        furthest_along_instances = sorted(furthest_along_instances, reverse=True)
+        # Third, if we have explicit failover priorities, use those for selecting the to be promoted instance
+        if "failover_priorities" in self.config:
+            highest_priority = max(
+                self.config["failover_priorities"].get(instance, 0) for instance in furthest_along_instances
+            )
+            furthest_along_instances = [
+                instance
+                for instance in furthest_along_instances
+                if self.config["failover_priorities"].get(instance) == highest_priority
+            ]
+        furthest_along_instance = furthest_along_instances[0]
         self.log.warning(
             "Node that is furthest along is: %r, all replication positions were: %r",
             furthest_along_instance,
             sorted(known_replication_positions),
         )
+
         total_observers = len(self.connected_observer_nodes) + len(self.disconnected_observer_nodes)
         # +1 in the calculation comes from the master node
         total_amount_of_nodes = len(standby_nodes) + 1 - len(self.never_promote_these_nodes) + total_observers
 
@@ -47,6 +47,7 @@ exclude = [
     'test/test_lookout.py',
     'test/test_pgutil.py',
     'test/test_webserver.py',
+    'test/utils.py',
     # Other.
     'setup.py',
     'version.py',