BugFix: Switchover (during a Node drain) fails randomly in synchronous mode (zalando#1984)

JBWatenbergScality · web-flow · commit b91b69c736ad · 2022-08-19T15:14:53.000+02:00
* Use getSwitchoverCandidate instead of masterCandidate when trying to migrating master pod to a replica Ref: zalando#1983 * Remove unused masterCandidate (replaced by getSwitchoverCandidate) Ref: zalando#1983
diff --git a/pkg/cluster/pod.go b/pkg/cluster/pod.go
@@ -3,7 +3,6 @@ package cluster
 import (
 	"context"
 	"fmt"
-	"math/rand"
 	"sort"
 	"strconv"
 	"time"
@@ -212,42 +211,12 @@ func (c *Cluster) movePodFromEndOfLifeNode(pod *v1.Pod) (*v1.Pod, error) {
 	return newPod, nil
 }
 
-func (c *Cluster) masterCandidate(oldNodeName string) (*v1.Pod, error) {
-
-	// Wait until at least one replica pod will come up
-	if err := c.waitForAnyReplicaLabelReady(); err != nil {
-		c.logger.Warningf("could not find at least one ready replica: %v", err)
-	}
-
-	replicas, err := c.getRolePods(Replica)
-	if err != nil {
-		return nil, fmt.Errorf("could not get replica pods: %v", err)
-	}
-
-	if len(replicas) == 0 {
-		c.logger.Warningf("no available master candidates, migration will cause longer downtime of Postgres cluster")
-		return nil, nil
-	}
-
-	for i, pod := range replicas {
-		// look for replicas running on live nodes. Ignore errors when querying the nodes.
-		if pod.Spec.NodeName != oldNodeName {
-			eol, err := c.podIsEndOfLife(&pod)
-			if err == nil && !eol {
-				return &replicas[i], nil
-			}
-		}
-	}
-	c.logger.Warningf("no available master candidates on live nodes")
-	return &replicas[rand.Intn(len(replicas))], nil
-}
-
 // MigrateMasterPod migrates master pod via failover to a replica
 func (c *Cluster) MigrateMasterPod(podName spec.NamespacedName) error {
 	var (
-		masterCandidatePod *v1.Pod
-		err                error
-		eol                bool
+		masterCandidateName spec.NamespacedName
+		err                 error
+		eol                 bool
 	)
 
 	oldMaster, err := c.KubeClient.Pods(podName.Namespace).Get(context.TODO(), podName.Name, metav1.GetOptions{})
@@ -283,13 +252,19 @@ func (c *Cluster) MigrateMasterPod(podName spec.NamespacedName) error {
 	}
 	// We may not have a cached statefulset if the initial cluster sync has aborted, revert to the spec in that case.
 	if *c.Statefulset.Spec.Replicas > 1 {
-		if masterCandidatePod, err = c.masterCandidate(oldMaster.Spec.NodeName); err != nil {
+		if masterCandidateName, err = c.getSwitchoverCandidate(oldMaster); err != nil {
 			return fmt.Errorf("could not find suitable replica pod as candidate for failover: %v", err)
 		}
 	} else {
 		c.logger.Warningf("migrating single pod cluster %q, this will cause downtime of the Postgres cluster until pod is back", c.clusterName())
 	}
 
+	masterCandidatePod, err := c.KubeClient.Pods(masterCandidateName.Namespace).Get(context.TODO(), masterCandidateName.Name, metav1.GetOptions{})
+
+	if err != nil {
+		return fmt.Errorf("could not get master candidate pod: %v", err)
+	}
+
 	// there are two cases for each postgres cluster that has its master pod on the node to migrate from:
 	// - the cluster has some replicas - migrate one of those if necessary and failover to it
 	// - there are no replicas - just terminate the master and wait until it respawns
@@ -306,7 +281,6 @@ func (c *Cluster) MigrateMasterPod(podName spec.NamespacedName) error {
 		return fmt.Errorf("could not move pod: %v", err)
 	}
 
-	masterCandidateName := util.NameFromMeta(masterCandidatePod.ObjectMeta)
 	err = retryutil.Retry(1*time.Minute, 5*time.Minute,
 		func() (bool, error) {
 			err := c.Switchover(oldMaster, masterCandidateName)