@@ -1032,12 +1032,20 @@ func (c *Cluster) processPodEvent(obj interface{}) error {
10321032 return fmt .Errorf ("could not cast to PodEvent" )
10331033 }
10341034
1035+ // can only take lock when (un)registerPodSubscriber is finshed
10351036 c .podSubscribersMu .RLock ()
10361037 subscriber , ok := c .podSubscribers [spec .NamespacedName (event .PodName )]
1037- c .podSubscribersMu .RUnlock ()
10381038 if ok {
1039- subscriber <- event
1039+ select {
1040+ case subscriber <- event :
1041+ default :
1042+ // ending up here when there is no receiver on the channel (i.e. waitForPodLabel finished)
1043+ // avoids blocking channel: https://gobyexample.com/non-blocking-channel-operations
1044+ }
10401045 }
1046+ // hold lock for the time of processing the event to avoid race condition
1047+ // with unregisterPodSubscriber closing the channel (see #1876)
1048+ c .podSubscribersMu .RUnlock ()
10411049
10421050 return nil
10431051}
@@ -1501,49 +1509,23 @@ func (c *Cluster) Switchover(curMaster *v1.Pod, candidate spec.NamespacedName) e
15011509 var err error
15021510 c .logger .Debugf ("switching over from %q to %q" , curMaster .Name , candidate )
15031511 c .eventRecorder .Eventf (c .GetReference (), v1 .EventTypeNormal , "Switchover" , "Switching over from %q to %q" , curMaster .Name , candidate )
1504-
1505- var wg sync.WaitGroup
1506-
1507- podLabelErr := make (chan error )
15081512 stopCh := make (chan struct {})
1509-
1510- wg .Add (1 )
1511-
1512- go func () {
1513- defer wg .Done ()
1514- ch := c .registerPodSubscriber (candidate )
1515- defer c .unregisterPodSubscriber (candidate )
1516-
1517- role := Master
1518-
1519- select {
1520- case <- stopCh :
1521- case podLabelErr <- func () (err2 error ) {
1522- _ , err2 = c .waitForPodLabel (ch , stopCh , & role )
1523- return
1524- }():
1525- }
1526- }()
1513+ ch := c .registerPodSubscriber (candidate )
1514+ defer c .unregisterPodSubscriber (candidate )
1515+ defer close (stopCh )
15271516
15281517 if err = c .patroni .Switchover (curMaster , candidate .Name ); err == nil {
15291518 c .logger .Debugf ("successfully switched over from %q to %q" , curMaster .Name , candidate )
15301519 c .eventRecorder .Eventf (c .GetReference (), v1 .EventTypeNormal , "Switchover" , "Successfully switched over from %q to %q" , curMaster .Name , candidate )
1531- if err = <- podLabelErr ; err != nil {
1520+ _ , err = c .waitForPodLabel (ch , stopCh , nil )
1521+ if err != nil {
15321522 err = fmt .Errorf ("could not get master pod label: %v" , err )
15331523 }
15341524 } else {
15351525 err = fmt .Errorf ("could not switch over from %q to %q: %v" , curMaster .Name , candidate , err )
15361526 c .eventRecorder .Eventf (c .GetReference (), v1 .EventTypeNormal , "Switchover" , "Switchover from %q to %q FAILED: %v" , curMaster .Name , candidate , err )
15371527 }
15381528
1539- // signal the role label waiting goroutine to close the shop and go home
1540- close (stopCh )
1541- // wait until the goroutine terminates, since unregisterPodSubscriber
1542- // must be called before the outer return; otherwise we risk subscribing to the same pod twice.
1543- wg .Wait ()
1544- // close the label waiting channel no sooner than the waiting goroutine terminates.
1545- close (podLabelErr )
1546-
15471529 return err
15481530}
15491531
0 commit comments