@@ -3,7 +3,6 @@ package cluster
33import (
44 "context"
55 "fmt"
6- "math/rand"
76 "sort"
87 "strconv"
98 "time"
@@ -212,42 +211,12 @@ func (c *Cluster) movePodFromEndOfLifeNode(pod *v1.Pod) (*v1.Pod, error) {
212211 return newPod , nil
213212}
214213
215- func (c * Cluster ) masterCandidate (oldNodeName string ) (* v1.Pod , error ) {
216-
217- // Wait until at least one replica pod will come up
218- if err := c .waitForAnyReplicaLabelReady (); err != nil {
219- c .logger .Warningf ("could not find at least one ready replica: %v" , err )
220- }
221-
222- replicas , err := c .getRolePods (Replica )
223- if err != nil {
224- return nil , fmt .Errorf ("could not get replica pods: %v" , err )
225- }
226-
227- if len (replicas ) == 0 {
228- c .logger .Warningf ("no available master candidates, migration will cause longer downtime of Postgres cluster" )
229- return nil , nil
230- }
231-
232- for i , pod := range replicas {
233- // look for replicas running on live nodes. Ignore errors when querying the nodes.
234- if pod .Spec .NodeName != oldNodeName {
235- eol , err := c .podIsEndOfLife (& pod )
236- if err == nil && ! eol {
237- return & replicas [i ], nil
238- }
239- }
240- }
241- c .logger .Warningf ("no available master candidates on live nodes" )
242- return & replicas [rand .Intn (len (replicas ))], nil
243- }
244-
245214// MigrateMasterPod migrates master pod via failover to a replica
246215func (c * Cluster ) MigrateMasterPod (podName spec.NamespacedName ) error {
247216 var (
248- masterCandidatePod * v1. Pod
249- err error
250- eol bool
217+ masterCandidateName spec. NamespacedName
218+ err error
219+ eol bool
251220 )
252221
253222 oldMaster , err := c .KubeClient .Pods (podName .Namespace ).Get (context .TODO (), podName .Name , metav1.GetOptions {})
@@ -283,13 +252,19 @@ func (c *Cluster) MigrateMasterPod(podName spec.NamespacedName) error {
283252 }
284253 // We may not have a cached statefulset if the initial cluster sync has aborted, revert to the spec in that case.
285254 if * c .Statefulset .Spec .Replicas > 1 {
286- if masterCandidatePod , err = c .masterCandidate (oldMaster . Spec . NodeName ); err != nil {
255+ if masterCandidateName , err = c .getSwitchoverCandidate (oldMaster ); err != nil {
287256 return fmt .Errorf ("could not find suitable replica pod as candidate for failover: %v" , err )
288257 }
289258 } else {
290259 c .logger .Warningf ("migrating single pod cluster %q, this will cause downtime of the Postgres cluster until pod is back" , c .clusterName ())
291260 }
292261
262+ masterCandidatePod , err := c .KubeClient .Pods (masterCandidateName .Namespace ).Get (context .TODO (), masterCandidateName .Name , metav1.GetOptions {})
263+
264+ if err != nil {
265+ return fmt .Errorf ("could not get master candidate pod: %v" , err )
266+ }
267+
293268 // there are two cases for each postgres cluster that has its master pod on the node to migrate from:
294269 // - the cluster has some replicas - migrate one of those if necessary and failover to it
295270 // - there are no replicas - just terminate the master and wait until it respawns
@@ -306,7 +281,6 @@ func (c *Cluster) MigrateMasterPod(podName spec.NamespacedName) error {
306281 return fmt .Errorf ("could not move pod: %v" , err )
307282 }
308283
309- masterCandidateName := util .NameFromMeta (masterCandidatePod .ObjectMeta )
310284 err = retryutil .Retry (1 * time .Minute , 5 * time .Minute ,
311285 func () (bool , error ) {
312286 err := c .Switchover (oldMaster , masterCandidateName )
0 commit comments