Skip to content

Commit 636a9a8

Browse files
authored
Support major version upgrade via manifest and global upgrades via min version (zalando#1372)
Support major version upgrade trigger via manifest. There is `off` `manual` and `full`. Manual is what you expect, and full will auto upgrade clusters below a certain threshold.
1 parent ca968ca commit 636a9a8

File tree

14 files changed

+331
-62
lines changed

14 files changed

+331
-62
lines changed

e2e/tests/test_e2e.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -926,6 +926,33 @@ def test_statefulset_annotation_propagation(self):
926926
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")
927927
self.eventuallyTrue(lambda: k8s.check_statefulset_annotations(cluster_label, annotations), "Annotations missing")
928928

929+
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
930+
@unittest.skip("Skipping this test until fixed")
931+
def test_zaa_test_major_version_upgrade(self):
932+
k8s = self.k8s
933+
result = k8s.create_with_kubectl("manifests/minimal-postgres-manifest-12.yaml")
934+
self.eventuallyEqual(lambda: k8s.count_running_pods(labels="application=spilo,cluster-name=acid-upgrade-test"), 2, "No 2 pods running")
935+
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")
936+
937+
pg_patch_version = {
938+
"spec": {
939+
"postgres": {
940+
"version": "13"
941+
}
942+
}
943+
}
944+
k8s.api.custom_objects_api.patch_namespaced_custom_object(
945+
"acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version)
946+
947+
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")
948+
949+
def check_version_13():
950+
p = k8s.get_patroni_state("acid-upgrade-test-0")
951+
version = p["server_version"][0:2]
952+
return version
953+
954+
self.evantuallyEqual(check_version_13, "13", "Version was not upgrade to 13")
955+
929956
@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
930957
@unittest.skip("Skipping this test until fixed")
931958
def test_zzz_taint_based_eviction(self):

manifests/configmap.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ data:
7474
# logical_backup_s3_secret_access_key: ""
7575
logical_backup_s3_sse: "AES256"
7676
logical_backup_schedule: "30 00 * * *"
77+
major_version_upgrade_mode: "manual"
7778
master_dns_name_format: "{cluster}.{team}.{hostedzone}"
7879
# master_pod_move_timeout: 20m
7980
# max_instances: "-1"
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
apiVersion: "acid.zalan.do/v1"
2+
kind: postgresql
3+
metadata:
4+
name: acid-upgrade-test
5+
namespace: default
6+
spec:
7+
teamId: "acid"
8+
volume:
9+
size: 1Gi
10+
numberOfInstances: 2
11+
users:
12+
zalando: # database owner
13+
- superuser
14+
- createdb
15+
foo_user: [] # role for application foo
16+
databases:
17+
foo: zalando # dbname: owner
18+
preparedDatabases:
19+
bar: {}
20+
postgresql:
21+
version: "12"

pkg/cluster/cluster.go

Lines changed: 31 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -83,15 +83,16 @@ type Cluster struct {
8383
deleteOptions metav1.DeleteOptions
8484
podEventsQueue *cache.FIFO
8585

86-
teamsAPIClient teams.Interface
87-
oauthTokenGetter OAuthTokenGetter
88-
KubeClient k8sutil.KubernetesClient //TODO: move clients to the better place?
89-
currentProcess Process
90-
processMu sync.RWMutex // protects the current operation for reporting, no need to hold the master mutex
91-
specMu sync.RWMutex // protects the spec for reporting, no need to hold the master mutex
92-
ConnectionPooler map[PostgresRole]*ConnectionPoolerObjects
93-
EBSVolumes map[string]volumes.VolumeProperties
94-
VolumeResizer volumes.VolumeResizer
86+
teamsAPIClient teams.Interface
87+
oauthTokenGetter OAuthTokenGetter
88+
KubeClient k8sutil.KubernetesClient //TODO: move clients to the better place?
89+
currentProcess Process
90+
processMu sync.RWMutex // protects the current operation for reporting, no need to hold the master mutex
91+
specMu sync.RWMutex // protects the spec for reporting, no need to hold the master mutex
92+
ConnectionPooler map[PostgresRole]*ConnectionPoolerObjects
93+
EBSVolumes map[string]volumes.VolumeProperties
94+
VolumeResizer volumes.VolumeResizer
95+
currentMajorVersion int
9596
}
9697

9798
type compareStatefulsetResult struct {
@@ -128,15 +129,16 @@ func New(cfg Config, kubeClient k8sutil.KubernetesClient, pgSpec acidv1.Postgres
128129
Secrets: make(map[types.UID]*v1.Secret),
129130
Services: make(map[PostgresRole]*v1.Service),
130131
Endpoints: make(map[PostgresRole]*v1.Endpoints)},
131-
userSyncStrategy: users.DefaultUserSyncStrategy{PasswordEncryption: passwordEncryption},
132-
deleteOptions: metav1.DeleteOptions{PropagationPolicy: &deletePropagationPolicy},
133-
podEventsQueue: podEventsQueue,
134-
KubeClient: kubeClient,
132+
userSyncStrategy: users.DefaultUserSyncStrategy{PasswordEncryption: passwordEncryption},
133+
deleteOptions: metav1.DeleteOptions{PropagationPolicy: &deletePropagationPolicy},
134+
podEventsQueue: podEventsQueue,
135+
KubeClient: kubeClient,
136+
currentMajorVersion: 0,
135137
}
136138
cluster.logger = logger.WithField("pkg", "cluster").WithField("cluster-name", cluster.clusterName())
137139
cluster.teamsAPIClient = teams.NewTeamsAPI(cfg.OpConfig.TeamsAPIUrl, logger)
138140
cluster.oauthTokenGetter = newSecretOauthTokenGetter(&kubeClient, cfg.OpConfig.OAuthTokenSecretName)
139-
cluster.patroni = patroni.New(cluster.logger)
141+
cluster.patroni = patroni.New(cluster.logger, nil)
140142
cluster.eventRecorder = eventRecorder
141143

142144
cluster.EBSVolumes = make(map[string]volumes.VolumeProperties)
@@ -359,7 +361,7 @@ func (c *Cluster) compareStatefulSetWith(statefulSet *appsv1.StatefulSet) *compa
359361
}
360362
if !reflect.DeepEqual(c.Statefulset.Annotations, statefulSet.Annotations) {
361363
match = false
362-
reasons = append(reasons, "new statefulset's annotations does not match the current one")
364+
reasons = append(reasons, "new statefulset's annotations do not match the current one")
363365
}
364366

365367
needsRollUpdate, reasons = c.compareContainers("initContainers", c.Statefulset.Spec.Template.Spec.InitContainers, statefulSet.Spec.Template.Spec.InitContainers, needsRollUpdate, reasons)
@@ -614,17 +616,14 @@ func (c *Cluster) Update(oldSpec, newSpec *acidv1.Postgresql) error {
614616

615617
logNiceDiff(c.logger, oldSpec, newSpec)
616618

617-
if oldSpec.Spec.PostgresqlParam.PgVersion > newSpec.Spec.PostgresqlParam.PgVersion {
618-
c.logger.Warningf("postgresql version change(%q -> %q) has no effect",
619-
oldSpec.Spec.PostgresqlParam.PgVersion, newSpec.Spec.PostgresqlParam.PgVersion)
620-
c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeWarning, "PostgreSQL", "postgresql version change(%q -> %q) has no effect",
621-
oldSpec.Spec.PostgresqlParam.PgVersion, newSpec.Spec.PostgresqlParam.PgVersion)
622-
// we need that hack to generate statefulset with the old version
623-
newSpec.Spec.PostgresqlParam.PgVersion = oldSpec.Spec.PostgresqlParam.PgVersion
624-
} else if oldSpec.Spec.PostgresqlParam.PgVersion < newSpec.Spec.PostgresqlParam.PgVersion {
625-
c.logger.Infof("postgresql version increased (%q -> %q), major version upgrade can be done manually after StatefulSet Sync",
619+
if IsBiggerPostgresVersion(oldSpec.Spec.PostgresqlParam.PgVersion, c.GetDesiredMajorVersion()) {
620+
c.logger.Infof("postgresql version increased (%s -> %s), depending on config manual upgrade needed",
626621
oldSpec.Spec.PostgresqlParam.PgVersion, newSpec.Spec.PostgresqlParam.PgVersion)
627622
syncStatetfulSet = true
623+
} else {
624+
c.logger.Infof("postgresql major version unchanged or smaller, no changes needed")
625+
// sticking with old version, this will also advance GetDesiredVersion next time.
626+
newSpec.Spec.PostgresqlParam.PgVersion = oldSpec.Spec.PostgresqlParam.PgVersion
628627
}
629628

630629
// Service
@@ -781,6 +780,14 @@ func (c *Cluster) Update(oldSpec, newSpec *acidv1.Postgresql) error {
781780
updateFailed = true
782781
}
783782

783+
if !updateFailed {
784+
// Major version upgrade must only fire after success of earlier operations and should stay last
785+
if err := c.majorVersionUpgrade(); err != nil {
786+
c.logger.Errorf("major version upgrade failed: %v", err)
787+
updateFailed = true
788+
}
789+
}
790+
784791
return nil
785792
}
786793

pkg/cluster/k8sres.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -734,7 +734,7 @@ func (c *Cluster) generateSpiloPodEnvVars(uid types.UID, spiloConfiguration stri
734734
},
735735
}
736736
if c.OpConfig.EnablePgVersionEnvVar {
737-
envVars = append(envVars, v1.EnvVar{Name: "PGVERSION", Value: c.Spec.PgVersion})
737+
envVars = append(envVars, v1.EnvVar{Name: "PGVERSION", Value: c.GetDesiredMajorVersion()})
738738
}
739739
// Spilo expects cluster labels as JSON
740740
if clusterLabels, err := json.Marshal(labels.Set(c.OpConfig.ClusterLabels)); err != nil {

pkg/cluster/majorversionupgrade.go

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
package cluster
2+
3+
import (
4+
"fmt"
5+
6+
"github.com/zalando/postgres-operator/pkg/spec"
7+
v1 "k8s.io/api/core/v1"
8+
)
9+
10+
// VersionMap Map of version numbers
11+
var VersionMap = map[string]int{
12+
"9.5": 90500,
13+
"9.6": 90600,
14+
"10": 100000,
15+
"11": 110000,
16+
"12": 120000,
17+
"13": 130000,
18+
}
19+
20+
// IsBiggerPostgresVersion Compare two Postgres version numbers
21+
func IsBiggerPostgresVersion(old string, new string) bool {
22+
oldN, _ := VersionMap[old]
23+
newN, _ := VersionMap[new]
24+
return newN > oldN
25+
}
26+
27+
// GetDesiredMajorVersionAsInt Convert string to comparable integer of PG version
28+
func (c *Cluster) GetDesiredMajorVersionAsInt() int {
29+
return VersionMap[c.GetDesiredMajorVersion()]
30+
}
31+
32+
// GetDesiredMajorVersion returns major version to use, incl. potential auto upgrade
33+
func (c *Cluster) GetDesiredMajorVersion() string {
34+
35+
if c.Config.OpConfig.MajorVersionUpgradeMode == "full" {
36+
// current is 9.5, minimal is 11 allowing 11 to 13 clusters, everything below is upgraded
37+
if IsBiggerPostgresVersion(c.Spec.PgVersion, c.Config.OpConfig.MinimalMajorVersion) {
38+
c.logger.Infof("overwriting configured major version %s to %s", c.Spec.PgVersion, c.Config.OpConfig.TargetMajorVersion)
39+
return c.Config.OpConfig.TargetMajorVersion
40+
}
41+
}
42+
43+
return c.Spec.PgVersion
44+
}
45+
46+
func (c *Cluster) majorVersionUpgrade() error {
47+
48+
if c.OpConfig.MajorVersionUpgradeMode == "off" {
49+
return nil
50+
}
51+
52+
desiredVersion := c.GetDesiredMajorVersionAsInt()
53+
54+
if c.currentMajorVersion >= desiredVersion {
55+
c.logger.Infof("cluster version up to date. current: %d desired: %d", c.currentMajorVersion, desiredVersion)
56+
return nil
57+
}
58+
59+
pods, err := c.listPods()
60+
if err != nil {
61+
return err
62+
}
63+
64+
allRunning := true
65+
66+
var masterPod *v1.Pod
67+
68+
for _, pod := range pods {
69+
ps, _ := c.patroni.GetMemberData(&pod)
70+
71+
if ps.State != "running" {
72+
allRunning = false
73+
c.logger.Infof("identified non running pod, potentially skipping major version upgrade")
74+
}
75+
76+
if ps.Role == "master" {
77+
masterPod = &pod
78+
c.currentMajorVersion = ps.ServerVersion
79+
}
80+
}
81+
82+
numberOfPods := len(pods)
83+
if allRunning && masterPod != nil {
84+
c.logger.Infof("healthy cluster ready to upgrade, current: %d desired: %d", c.currentMajorVersion, desiredVersion)
85+
if c.currentMajorVersion < desiredVersion {
86+
podName := &spec.NamespacedName{Namespace: masterPod.Namespace, Name: masterPod.Name}
87+
c.logger.Infof("triggering major version upgrade on pod %s of %d pods", masterPod.Name, numberOfPods)
88+
upgradeCommand := fmt.Sprintf("/usr/bin/python3 /scripts/inplace_upgrade.py %d 2>&1 | tee last_upgrade.log", numberOfPods)
89+
_, err := c.ExecCommand(podName, "/bin/su", "postgres", "-c", upgradeCommand)
90+
if err != nil {
91+
return err
92+
}
93+
}
94+
}
95+
96+
return nil
97+
}

pkg/cluster/pod.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212

1313
"github.com/zalando/postgres-operator/pkg/spec"
1414
"github.com/zalando/postgres-operator/pkg/util"
15+
"github.com/zalando/postgres-operator/pkg/util/patroni"
1516
"github.com/zalando/postgres-operator/pkg/util/retryutil"
1617
)
1718

@@ -312,14 +313,14 @@ func (c *Cluster) isSafeToRecreatePods(pods *v1.PodList) bool {
312313

313314
for _, pod := range pods.Items {
314315

315-
var state string
316+
var data patroni.MemberData
316317

317318
err := retryutil.Retry(1*time.Second, 5*time.Second,
318319
func() (bool, error) {
319320

320321
var err error
321322

322-
state, err = c.patroni.GetPatroniMemberState(&pod)
323+
data, err = c.patroni.GetMemberData(&pod)
323324

324325
if err != nil {
325326
return false, err
@@ -331,7 +332,7 @@ func (c *Cluster) isSafeToRecreatePods(pods *v1.PodList) bool {
331332
if err != nil {
332333
c.logger.Errorf("failed to get Patroni state for pod: %s", err)
333334
return false
334-
} else if state == "creating replica" {
335+
} else if data.State == "creating replica" {
335336
c.logger.Warningf("cannot re-create replica %s: it is currently being initialized", pod.Name)
336337
return false
337338
}

pkg/cluster/sync.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,11 @@ func (c *Cluster) Sync(newSpec *acidv1.Postgresql) error {
118118
return fmt.Errorf("could not sync connection pooler: %v", err)
119119
}
120120

121+
// Major version upgrade must only run after success of all earlier operations, must remain last item in sync
122+
if err := c.majorVersionUpgrade(); err != nil {
123+
c.logger.Errorf("major version upgrade failed: %v", err)
124+
}
125+
121126
return err
122127
}
123128

@@ -471,7 +476,7 @@ func (c *Cluster) syncSecrets() error {
471476
for secretUsername, secretSpec := range secrets {
472477
if secret, err = c.KubeClient.Secrets(secretSpec.Namespace).Create(context.TODO(), secretSpec, metav1.CreateOptions{}); err == nil {
473478
c.Secrets[secret.UID] = secret
474-
c.logger.Debugf("created new secret %q, uid: %q", util.NameFromMeta(secret.ObjectMeta), secret.UID)
479+
c.logger.Debugf("created new secret %s, uid: %s", util.NameFromMeta(secret.ObjectMeta), secret.UID)
475480
continue
476481
}
477482
if k8sutil.ResourceAlreadyExists(err) {
@@ -480,7 +485,7 @@ func (c *Cluster) syncSecrets() error {
480485
return fmt.Errorf("could not get current secret: %v", err)
481486
}
482487
if secretUsername != string(secret.Data["username"]) {
483-
c.logger.Errorf("secret %s does not contain the role %q", secretSpec.Name, secretUsername)
488+
c.logger.Errorf("secret %s does not contain the role %s", secretSpec.Name, secretUsername)
484489
continue
485490
}
486491
c.Secrets[secret.UID] = secret
@@ -499,7 +504,7 @@ func (c *Cluster) syncSecrets() error {
499504
if pwdUser.Password != string(secret.Data["password"]) &&
500505
pwdUser.Origin == spec.RoleOriginInfrastructure {
501506

502-
c.logger.Debugf("updating the secret %q from the infrastructure roles", secretSpec.Name)
507+
c.logger.Debugf("updating the secret %s from the infrastructure roles", secretSpec.Name)
503508
if _, err = c.KubeClient.Secrets(secretSpec.Namespace).Update(context.TODO(), secretSpec, metav1.UpdateOptions{}); err != nil {
504509
return fmt.Errorf("could not update infrastructure role secret for role %q: %v", secretUsername, err)
505510
}
@@ -509,7 +514,7 @@ func (c *Cluster) syncSecrets() error {
509514
userMap[secretUsername] = pwdUser
510515
}
511516
} else {
512-
return fmt.Errorf("could not create secret for user %q: %v", secretUsername, err)
517+
return fmt.Errorf("could not create secret for user %s: %v", secretUsername, err)
513518
}
514519
}
515520

0 commit comments

Comments
 (0)