@@ -1032,12 +1032,20 @@ func (c *Cluster) processPodEvent(obj interface{}) error {
1032
1032
return fmt .Errorf ("could not cast to PodEvent" )
1033
1033
}
1034
1034
1035
+ // can only take lock when (un)registerPodSubscriber is finshed
1035
1036
c .podSubscribersMu .RLock ()
1036
1037
subscriber , ok := c .podSubscribers [spec .NamespacedName (event .PodName )]
1037
- c .podSubscribersMu .RUnlock ()
1038
1038
if ok {
1039
- subscriber <- event
1039
+ select {
1040
+ case subscriber <- event :
1041
+ default :
1042
+ // ending up here when there is no receiver on the channel (i.e. waitForPodLabel finished)
1043
+ // avoids blocking channel: https://gobyexample.com/non-blocking-channel-operations
1044
+ }
1040
1045
}
1046
+ // hold lock for the time of processing the event to avoid race condition
1047
+ // with unregisterPodSubscriber closing the channel (see #1876)
1048
+ c .podSubscribersMu .RUnlock ()
1041
1049
1042
1050
return nil
1043
1051
}
@@ -1501,49 +1509,23 @@ func (c *Cluster) Switchover(curMaster *v1.Pod, candidate spec.NamespacedName) e
1501
1509
var err error
1502
1510
c .logger .Debugf ("switching over from %q to %q" , curMaster .Name , candidate )
1503
1511
c .eventRecorder .Eventf (c .GetReference (), v1 .EventTypeNormal , "Switchover" , "Switching over from %q to %q" , curMaster .Name , candidate )
1504
-
1505
- var wg sync.WaitGroup
1506
-
1507
- podLabelErr := make (chan error )
1508
1512
stopCh := make (chan struct {})
1509
-
1510
- wg .Add (1 )
1511
-
1512
- go func () {
1513
- defer wg .Done ()
1514
- ch := c .registerPodSubscriber (candidate )
1515
- defer c .unregisterPodSubscriber (candidate )
1516
-
1517
- role := Master
1518
-
1519
- select {
1520
- case <- stopCh :
1521
- case podLabelErr <- func () (err2 error ) {
1522
- _ , err2 = c .waitForPodLabel (ch , stopCh , & role )
1523
- return
1524
- }():
1525
- }
1526
- }()
1513
+ ch := c .registerPodSubscriber (candidate )
1514
+ defer c .unregisterPodSubscriber (candidate )
1515
+ defer close (stopCh )
1527
1516
1528
1517
if err = c .patroni .Switchover (curMaster , candidate .Name ); err == nil {
1529
1518
c .logger .Debugf ("successfully switched over from %q to %q" , curMaster .Name , candidate )
1530
1519
c .eventRecorder .Eventf (c .GetReference (), v1 .EventTypeNormal , "Switchover" , "Successfully switched over from %q to %q" , curMaster .Name , candidate )
1531
- if err = <- podLabelErr ; err != nil {
1520
+ _ , err = c .waitForPodLabel (ch , stopCh , nil )
1521
+ if err != nil {
1532
1522
err = fmt .Errorf ("could not get master pod label: %v" , err )
1533
1523
}
1534
1524
} else {
1535
1525
err = fmt .Errorf ("could not switch over from %q to %q: %v" , curMaster .Name , candidate , err )
1536
1526
c .eventRecorder .Eventf (c .GetReference (), v1 .EventTypeNormal , "Switchover" , "Switchover from %q to %q FAILED: %v" , curMaster .Name , candidate , err )
1537
1527
}
1538
1528
1539
- // signal the role label waiting goroutine to close the shop and go home
1540
- close (stopCh )
1541
- // wait until the goroutine terminates, since unregisterPodSubscriber
1542
- // must be called before the outer return; otherwise we risk subscribing to the same pod twice.
1543
- wg .Wait ()
1544
- // close the label waiting channel no sooner than the waiting goroutine terminates.
1545
- close (podLabelErr )
1546
-
1547
1529
return err
1548
1530
}
1549
1531
0 commit comments