Skip to content

Commit 1eafd68

Browse files
authored
restart master first in some edge cases (zalando#1655)
* restart master first in some edge cases * edge case is when desired is lower than effective * wait after config patch and restart on sync whenever we see pending_restart * convert options to int to check decrease and add unit test * minor update to e2e tests * wait only after restart not every sync * using spilo 14 e2e images
1 parent d20f511 commit 1eafd68

File tree

7 files changed

+299
-70
lines changed

7 files changed

+299
-70
lines changed

docker/logical-backup/Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ RUN apt-get update \
2323
&& curl --silent https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - \
2424
&& apt-get update \
2525
&& apt-get install --no-install-recommends -y \
26+
postgresql-client-14 \
2627
postgresql-client-13 \
2728
postgresql-client-12 \
2829
postgresql-client-11 \

e2e/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ RUN apt-get update \
1616
curl \
1717
vim \
1818
&& pip3 install --no-cache-dir -r requirements.txt \
19-
&& curl -LO https://storage.googleapis.com/kubernetes-release/release/v1.18.0/bin/linux/amd64/kubectl \
19+
&& curl -LO https://storage.googleapis.com/kubernetes-release/release/v1.22.0/bin/linux/amd64/kubectl \
2020
&& chmod +x ./kubectl \
2121
&& mv ./kubectl /usr/local/bin/kubectl \
2222
&& apt-get clean \

e2e/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ IFS=$'\n\t'
88

99
readonly cluster_name="postgres-operator-e2e-tests"
1010
readonly kubeconfig_path="/tmp/kind-config-${cluster_name}"
11-
readonly spilo_image="registry.opensource.zalan.do/acid/spilo-13-e2e:0.3"
11+
readonly spilo_image="registry.opensource.zalan.do/acid/spilo-14-e2e:0.1"
1212
readonly e2e_test_runner_image="registry.opensource.zalan.do/acid/postgres-operator-e2e-tests-runner:0.3"
1313

1414
export GOPATH=${GOPATH-~/go}

e2e/tests/test_e2e.py

Lines changed: 63 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
from tests.k8s_api import K8s
1212
from kubernetes.client.rest import ApiException
1313

14-
SPILO_CURRENT = "registry.opensource.zalan.do/acid/spilo-13-e2e:0.3"
15-
SPILO_LAZY = "registry.opensource.zalan.do/acid/spilo-13-e2e:0.4"
14+
SPILO_CURRENT = "registry.opensource.zalan.do/acid/spilo-14-e2e:0.1"
15+
SPILO_LAZY = "registry.opensource.zalan.do/acid/spilo-14-e2e:0.2"
1616

1717

1818
def to_selector(labels):
@@ -85,6 +85,7 @@ def setUpClass(cls):
8585

8686
# set a single K8s wrapper for all tests
8787
k8s = cls.k8s = K8s()
88+
cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster'
8889

8990
# remove existing local storage class and create hostpath class
9091
try:
@@ -150,8 +151,8 @@ def setUpClass(cls):
150151
result = k8s.create_with_kubectl("manifests/minimal-postgres-manifest.yaml")
151152
print('stdout: {}, stderr: {}'.format(result.stdout, result.stderr))
152153
try:
153-
k8s.wait_for_pod_start('spilo-role=master')
154-
k8s.wait_for_pod_start('spilo-role=replica')
154+
k8s.wait_for_pod_start('spilo-role=master,' + cluster_label)
155+
k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
155156
except timeout_decorator.TimeoutError:
156157
print('Operator log: {}'.format(k8s.get_operator_log()))
157158
raise
@@ -1064,12 +1065,13 @@ def test_patroni_config_update(self):
10641065
via restarting cluster through Patroni's rest api
10651066
'''
10661067
k8s = self.k8s
1067-
masterPod = k8s.get_cluster_leader_pod()
1068-
labels = 'application=spilo,cluster-name=acid-minimal-cluster,spilo-role=master'
1069-
creationTimestamp = masterPod.metadata.creation_timestamp
1068+
leader = k8s.get_cluster_leader_pod()
1069+
replica = k8s.get_cluster_replica_pod()
1070+
masterCreationTimestamp = leader.metadata.creation_timestamp
1071+
replicaCreationTimestamp = replica.metadata.creation_timestamp
10701072
new_max_connections_value = "50"
10711073

1072-
# adjust max_connection
1074+
# adjust Postgres config
10731075
pg_patch_config = {
10741076
"spec": {
10751077
"postgresql": {
@@ -1098,7 +1100,7 @@ def test_patroni_config_update(self):
10981100
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")
10991101

11001102
def compare_config():
1101-
effective_config = k8s.patroni_rest(masterPod.metadata.name, "config")
1103+
effective_config = k8s.patroni_rest(leader.metadata.name, "config")
11021104
desired_config = pg_patch_config["spec"]["patroni"]
11031105
desired_parameters = pg_patch_config["spec"]["postgresql"]["parameters"]
11041106
effective_parameters = effective_config["postgresql"]["parameters"]
@@ -1115,19 +1117,63 @@ def compare_config():
11151117
"synchronous_mode not updated")
11161118
return True
11171119

1120+
# check if Patroni config has been updated
11181121
self.eventuallyTrue(compare_config, "Postgres config not applied")
11191122

1123+
# make sure that pods were not recreated
1124+
leader = k8s.get_cluster_leader_pod()
1125+
replica = k8s.get_cluster_replica_pod()
1126+
self.assertEqual(masterCreationTimestamp, leader.metadata.creation_timestamp,
1127+
"Master pod creation timestamp is updated")
1128+
self.assertEqual(replicaCreationTimestamp, replica.metadata.creation_timestamp,
1129+
"Master pod creation timestamp is updated")
1130+
1131+
# query max_connections setting
11201132
setting_query = """
11211133
SELECT setting
11221134
FROM pg_settings
11231135
WHERE name = 'max_connections';
11241136
"""
1125-
self.eventuallyEqual(lambda: self.query_database(masterPod.metadata.name, "postgres", setting_query)[0], new_max_connections_value,
1126-
"New max_connections setting not applied", 10, 5)
1137+
self.eventuallyEqual(lambda: self.query_database(leader.metadata.name, "postgres", setting_query)[0], new_max_connections_value,
1138+
"New max_connections setting not applied on master", 10, 5)
1139+
self.eventuallyNotEqual(lambda: self.query_database(replica.metadata.name, "postgres", setting_query)[0], new_max_connections_value,
1140+
"Expected max_connections not to be updated on replica since Postgres was restarted there first", 10, 5)
11271141

1128-
# make sure that pod wasn't recreated
1129-
self.assertEqual(creationTimestamp, masterPod.metadata.creation_timestamp,
1130-
"Master pod creation timestamp is updated")
1142+
# the next sync should restart the replica because it has pending_restart flag set
1143+
# force next sync by deleting the operator pod
1144+
k8s.delete_operator_pod()
1145+
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")
1146+
1147+
self.eventuallyEqual(lambda: self.query_database(replica.metadata.name, "postgres", setting_query)[0], new_max_connections_value,
1148+
"New max_connections setting not applied on replica", 10, 5)
1149+
1150+
# decrease max_connections again
1151+
# this time restart will be correct and new value should appear on both instances
1152+
lower_max_connections_value = "30"
1153+
pg_patch_max_connections = {
1154+
"spec": {
1155+
"postgresql": {
1156+
"parameters": {
1157+
"max_connections": lower_max_connections_value
1158+
}
1159+
}
1160+
}
1161+
}
1162+
1163+
k8s.api.custom_objects_api.patch_namespaced_custom_object(
1164+
"acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_max_connections)
1165+
1166+
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")
1167+
1168+
# check Patroni config again
1169+
pg_patch_config["spec"]["postgresql"]["parameters"]["max_connections"] = lower_max_connections_value
1170+
self.eventuallyTrue(compare_config, "Postgres config not applied")
1171+
1172+
# and query max_connections setting again
1173+
self.eventuallyEqual(lambda: self.query_database(leader.metadata.name, "postgres", setting_query)[0], lower_max_connections_value,
1174+
"Previous max_connections setting not applied on master", 10, 5)
1175+
self.eventuallyEqual(lambda: self.query_database(replica.metadata.name, "postgres", setting_query)[0], lower_max_connections_value,
1176+
"Previous max_connections setting not applied on replica", 10, 5)
11311177

11321178
except timeout_decorator.TimeoutError:
11331179
print('Operator log: {}'.format(k8s.get_operator_log()))
@@ -1554,6 +1600,7 @@ def assert_distributed_pods(self, master_node, replica_nodes, cluster_label):
15541600
Toggle pod anti affinty to distribute pods accross nodes (replica in particular).
15551601
'''
15561602
k8s = self.k8s
1603+
cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster'
15571604
failover_targets = self.get_failover_targets(master_node, replica_nodes)
15581605

15591606
# enable pod anti affintiy in config map which should trigger movement of replica
@@ -1572,8 +1619,8 @@ def assert_distributed_pods(self, master_node, replica_nodes, cluster_label):
15721619
}
15731620
}
15741621
k8s.update_config(patch_disable_antiaffinity, "disable antiaffinity")
1575-
k8s.wait_for_pod_start('spilo-role=master')
1576-
k8s.wait_for_pod_start('spilo-role=replica')
1622+
k8s.wait_for_pod_start('spilo-role=master,' + cluster_label)
1623+
k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label)
15771624
return True
15781625

15791626
def list_databases(self, pod_name):

pkg/cluster/sync.go

Lines changed: 81 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"fmt"
77
"reflect"
88
"regexp"
9+
"strconv"
910
"strings"
1011
"time"
1112

@@ -20,6 +21,14 @@ import (
2021
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2122
)
2223

24+
var requireMasterRestartWhenDecreased = []string{
25+
"max_connections",
26+
"max_prepared_transactions",
27+
"max_locks_per_transaction",
28+
"max_worker_processes",
29+
"max_wal_senders",
30+
}
31+
2332
// Sync syncs the cluster, making sure the actual Kubernetes objects correspond to what is defined in the manifest.
2433
// Unlike the update, sync does not error out if some objects do not exist and takes care of creating them.
2534
func (c *Cluster) Sync(newSpec *acidv1.Postgresql) error {
@@ -264,11 +273,9 @@ func (c *Cluster) syncPodDisruptionBudget(isUpdate bool) error {
264273

265274
func (c *Cluster) syncStatefulSet() error {
266275
var (
267-
masterPod *v1.Pod
268-
postgresConfig map[string]interface{}
269-
instanceRestartRequired bool
276+
restartWait uint32
277+
restartMasterFirst bool
270278
)
271-
272279
podsToRecreate := make([]v1.Pod, 0)
273280
switchoverCandidates := make([]spec.NamespacedName, 0)
274281

@@ -402,38 +409,41 @@ func (c *Cluster) syncStatefulSet() error {
402409
c.logger.Warningf("could not get Postgres config from pod %s: %v", podName, err)
403410
continue
404411
}
412+
restartWait = patroniConfig.LoopWait
405413

406414
// empty config probably means cluster is not fully initialized yet, e.g. restoring from backup
407415
// do not attempt a restart
408416
if !reflect.DeepEqual(patroniConfig, emptyPatroniConfig) || len(pgParameters) > 0 {
409-
instanceRestartRequired, err = c.checkAndSetGlobalPostgreSQLConfiguration(&pod, patroniConfig, pgParameters)
417+
restartMasterFirst, err = c.checkAndSetGlobalPostgreSQLConfiguration(&pod, patroniConfig, pgParameters)
410418
if err != nil {
411419
c.logger.Warningf("could not set PostgreSQL configuration options for pod %s: %v", podName, err)
412420
continue
413421
}
422+
// it could take up to LoopWait to apply the config
423+
time.Sleep(time.Duration(restartWait)*time.Second + time.Second*2)
414424
break
415425
}
416426
}
417427

418-
// if the config update requires a restart, call Patroni restart for replicas first, then master
419-
if instanceRestartRequired {
420-
c.logger.Debug("restarting Postgres server within pods")
421-
ttl, ok := postgresConfig["ttl"].(int32)
422-
if !ok {
423-
ttl = 30
424-
}
425-
for i, pod := range pods {
426-
role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel])
427-
if role == Master {
428-
masterPod = &pods[i]
429-
continue
430-
}
431-
c.restartInstance(&pod)
432-
time.Sleep(time.Duration(ttl) * time.Second)
428+
// restart instances if required
429+
remainingPods := make([]*v1.Pod, 0)
430+
skipRole := Master
431+
if restartMasterFirst {
432+
skipRole = Replica
433+
}
434+
for i, pod := range pods {
435+
role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel])
436+
if role == skipRole {
437+
remainingPods = append(remainingPods, &pods[i])
438+
continue
433439
}
440+
c.restartInstance(&pod, restartWait)
441+
}
434442

435-
if masterPod != nil {
436-
c.restartInstance(masterPod)
443+
// in most cases only the master should be left to restart
444+
if len(remainingPods) > 0 {
445+
for _, remainingPod := range remainingPods {
446+
c.restartInstance(remainingPod, restartWait)
437447
}
438448
}
439449

@@ -450,19 +460,27 @@ func (c *Cluster) syncStatefulSet() error {
450460
return nil
451461
}
452462

453-
func (c *Cluster) restartInstance(pod *v1.Pod) {
463+
func (c *Cluster) restartInstance(pod *v1.Pod, restartWait uint32) {
454464
podName := util.NameFromMeta(pod.ObjectMeta)
455465
role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel])
456466

457-
c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", fmt.Sprintf("restarting Postgres server within %s pod %s", role, pod.Name))
458-
459-
if err := c.patroni.Restart(pod); err != nil {
460-
c.logger.Warningf("could not restart Postgres server within %s pod %s: %v", role, podName, err)
467+
// if the config update requires a restart, call Patroni restart
468+
memberData, err := c.patroni.GetMemberData(pod)
469+
if err != nil {
470+
c.logger.Debugf("could not get member data of %s pod %s - skipping possible restart attempt: %v", role, podName, err)
461471
return
462472
}
463473

464-
c.logger.Debugf("Postgres server successfuly restarted in %s pod %s", role, podName)
465-
c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", fmt.Sprintf("Postgres server restart done for %s pod %s", role, pod.Name))
474+
// do restart only when it is pending
475+
if memberData.PendingRestart {
476+
c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", fmt.Sprintf("restarting Postgres server within %s pod %s", role, pod.Name))
477+
if err := c.patroni.Restart(pod); err != nil {
478+
c.logger.Warningf("could not restart Postgres server within %s pod %s: %v", role, podName, err)
479+
return
480+
}
481+
time.Sleep(time.Duration(restartWait) * time.Second)
482+
c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", fmt.Sprintf("Postgres server restart done for %s pod %s", role, pod.Name))
483+
}
466484
}
467485

468486
// AnnotationsToPropagate get the annotations to update if required
@@ -502,21 +520,10 @@ func (c *Cluster) AnnotationsToPropagate(annotations map[string]string) map[stri
502520
func (c *Cluster) checkAndSetGlobalPostgreSQLConfiguration(pod *v1.Pod, patroniConfig acidv1.Patroni, effectivePgParameters map[string]string) (bool, error) {
503521
configToSet := make(map[string]interface{})
504522
parametersToSet := make(map[string]string)
523+
restartMaster := make([]bool, 0)
524+
requiresMasterRestart := false
505525

506-
// compare parameters under postgresql section with c.Spec.Postgresql.Parameters from manifest
507-
desiredPgParameters := c.Spec.Parameters
508-
for desiredOption, desiredValue := range desiredPgParameters {
509-
effectiveValue := effectivePgParameters[desiredOption]
510-
if isBootstrapOnlyParameter(desiredOption) && (effectiveValue != desiredValue) {
511-
parametersToSet[desiredOption] = desiredValue
512-
}
513-
}
514-
515-
if len(parametersToSet) > 0 {
516-
configToSet["postgresql"] = map[string]interface{}{constants.PatroniPGParametersParameterName: parametersToSet}
517-
}
518-
519-
// compare other options from config with c.Spec.Patroni from manifest
526+
// compare options from config with c.Spec.Patroni from manifest
520527
desiredPatroniConfig := c.Spec.Patroni
521528
if desiredPatroniConfig.LoopWait > 0 && desiredPatroniConfig.LoopWait != patroniConfig.LoopWait {
522529
configToSet["loop_wait"] = desiredPatroniConfig.LoopWait
@@ -554,6 +561,35 @@ func (c *Cluster) checkAndSetGlobalPostgreSQLConfiguration(pod *v1.Pod, patroniC
554561
configToSet["slots"] = slotsToSet
555562
}
556563

564+
// compare parameters under postgresql section with c.Spec.Postgresql.Parameters from manifest
565+
desiredPgParameters := c.Spec.Parameters
566+
for desiredOption, desiredValue := range desiredPgParameters {
567+
effectiveValue := effectivePgParameters[desiredOption]
568+
if isBootstrapOnlyParameter(desiredOption) && (effectiveValue != desiredValue) {
569+
parametersToSet[desiredOption] = desiredValue
570+
if util.SliceContains(requireMasterRestartWhenDecreased, desiredOption) {
571+
effectiveValueNum, errConv := strconv.Atoi(effectiveValue)
572+
desiredValueNum, errConv2 := strconv.Atoi(desiredValue)
573+
if errConv != nil || errConv2 != nil {
574+
continue
575+
}
576+
if effectiveValueNum > desiredValueNum {
577+
restartMaster = append(restartMaster, true)
578+
continue
579+
}
580+
}
581+
restartMaster = append(restartMaster, false)
582+
}
583+
}
584+
585+
if !util.SliceContains(restartMaster, false) && len(configToSet) == 0 {
586+
requiresMasterRestart = true
587+
}
588+
589+
if len(parametersToSet) > 0 {
590+
configToSet["postgresql"] = map[string]interface{}{constants.PatroniPGParametersParameterName: parametersToSet}
591+
}
592+
557593
if len(configToSet) == 0 {
558594
return false, nil
559595
}
@@ -569,10 +605,10 @@ func (c *Cluster) checkAndSetGlobalPostgreSQLConfiguration(pod *v1.Pod, patroniC
569605
c.logger.Debugf("patching Postgres config via Patroni API on pod %s with following options: %s",
570606
podName, configToSetJson)
571607
if err = c.patroni.SetConfig(pod, configToSet); err != nil {
572-
return true, fmt.Errorf("could not patch postgres parameters with a pod %s: %v", podName, err)
608+
return requiresMasterRestart, fmt.Errorf("could not patch postgres parameters with a pod %s: %v", podName, err)
573609
}
574610

575-
return true, nil
611+
return requiresMasterRestart, nil
576612
}
577613

578614
func (c *Cluster) syncSecrets() error {

0 commit comments

Comments
 (0)