Skip to content

Commit 70f3ee8

Browse files
authored
skip db sync on failed initUsers during UPDATE (zalando#2083)
* skip db sync on failed initUsers during UPDATE * provide unit test for teams API being unavailable * add test for 404 case
1 parent d55e74e commit 70f3ee8

File tree

4 files changed

+120
-59
lines changed

4 files changed

+120
-59
lines changed

pkg/cluster/cluster.go

Lines changed: 41 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,10 @@ func (c *Cluster) initUsers() error {
227227
}
228228

229229
if err := c.initHumanUsers(); err != nil {
230+
// remember all cached users in c.pgUsers
231+
for cachedUserName, cachedUser := range c.pgUsersCache {
232+
c.pgUsers[cachedUserName] = cachedUser
233+
}
230234
return fmt.Errorf("could not init human users: %v", err)
231235
}
232236

@@ -748,6 +752,7 @@ func (c *Cluster) compareServices(old, new *v1.Service) (bool, string) {
748752
// for a cluster that had no such job before. In this case a missing job is not an error.
749753
func (c *Cluster) Update(oldSpec, newSpec *acidv1.Postgresql) error {
750754
updateFailed := false
755+
userInitFailed := false
751756
syncStatefulSet := false
752757

753758
c.mu.Lock()
@@ -785,32 +790,39 @@ func (c *Cluster) Update(oldSpec, newSpec *acidv1.Postgresql) error {
785790
}
786791
}
787792

788-
// check if users need to be synced
789-
sameUsers := reflect.DeepEqual(oldSpec.Spec.Users, newSpec.Spec.Users) &&
790-
reflect.DeepEqual(oldSpec.Spec.PreparedDatabases, newSpec.Spec.PreparedDatabases)
791-
sameRotatedUsers := reflect.DeepEqual(oldSpec.Spec.UsersWithSecretRotation, newSpec.Spec.UsersWithSecretRotation) &&
792-
reflect.DeepEqual(oldSpec.Spec.UsersWithInPlaceSecretRotation, newSpec.Spec.UsersWithInPlaceSecretRotation)
793-
794-
// connection pooler needs one system user created, which is done in
795-
// initUsers. Check if it needs to be called.
796-
needConnectionPooler := needMasterConnectionPoolerWorker(&newSpec.Spec) ||
797-
needReplicaConnectionPoolerWorker(&newSpec.Spec)
798-
799-
if !sameUsers || !sameRotatedUsers || needConnectionPooler {
800-
c.logger.Debugf("initialize users")
801-
if err := c.initUsers(); err != nil {
802-
c.logger.Errorf("could not init users: %v", err)
803-
updateFailed = true
804-
}
805-
806-
c.logger.Debugf("syncing secrets")
793+
// Users
794+
func() {
795+
// check if users need to be synced during update
796+
sameUsers := reflect.DeepEqual(oldSpec.Spec.Users, newSpec.Spec.Users) &&
797+
reflect.DeepEqual(oldSpec.Spec.PreparedDatabases, newSpec.Spec.PreparedDatabases)
798+
sameRotatedUsers := reflect.DeepEqual(oldSpec.Spec.UsersWithSecretRotation, newSpec.Spec.UsersWithSecretRotation) &&
799+
reflect.DeepEqual(oldSpec.Spec.UsersWithInPlaceSecretRotation, newSpec.Spec.UsersWithInPlaceSecretRotation)
800+
801+
// connection pooler needs one system user created who is initialized in initUsers
802+
// only when disabled in oldSpec and enabled in newSpec
803+
needPoolerUser := c.needConnectionPoolerUser(&oldSpec.Spec, &newSpec.Spec)
804+
805+
// streams new replication user created who is initialized in initUsers
806+
// only when streams were not specified in oldSpec but in newSpec
807+
needStreamUser := len(oldSpec.Spec.Streams) == 0 && len(newSpec.Spec.Streams) > 0
808+
809+
if !sameUsers || !sameRotatedUsers || needPoolerUser || needStreamUser {
810+
c.logger.Debugf("initialize users")
811+
if err := c.initUsers(); err != nil {
812+
c.logger.Errorf("could not init users - skipping sync of secrets and databases: %v", err)
813+
userInitFailed = true
814+
updateFailed = true
815+
return
816+
}
807817

808-
//TODO: mind the secrets of the deleted/new users
809-
if err := c.syncSecrets(); err != nil {
810-
c.logger.Errorf("could not sync secrets: %v", err)
811-
updateFailed = true
818+
c.logger.Debugf("syncing secrets")
819+
//TODO: mind the secrets of the deleted/new users
820+
if err := c.syncSecrets(); err != nil {
821+
c.logger.Errorf("could not sync secrets: %v", err)
822+
updateFailed = true
823+
}
812824
}
813-
}
825+
}()
814826

815827
// Volume
816828
if c.OpConfig.StorageResizeMode != "off" {
@@ -892,7 +904,7 @@ func (c *Cluster) Update(oldSpec, newSpec *acidv1.Postgresql) error {
892904
}()
893905

894906
// Roles and Databases
895-
if !(c.databaseAccessDisabled() || c.getNumberOfInstances(&c.Spec) <= 0 || c.Spec.StandbyCluster != nil) {
907+
if !userInitFailed && !(c.databaseAccessDisabled() || c.getNumberOfInstances(&c.Spec) <= 0 || c.Spec.StandbyCluster != nil) {
896908
c.logger.Debugf("syncing roles")
897909
if err := c.syncRoles(); err != nil {
898910
c.logger.Errorf("could not sync roles: %v", err)
@@ -920,13 +932,12 @@ func (c *Cluster) Update(oldSpec, newSpec *acidv1.Postgresql) error {
920932
// need to process. In the future we may want to do this more careful and
921933
// check which databases we need to process, but even repeating the whole
922934
// installation process should be good enough.
923-
924935
if _, err := c.syncConnectionPooler(oldSpec, newSpec, c.installLookupFunction); err != nil {
925936
c.logger.Errorf("could not sync connection pooler: %v", err)
926937
updateFailed = true
927938
}
928939

929-
if len(c.Spec.Streams) > 0 {
940+
if len(newSpec.Spec.Streams) > 0 {
930941
if err := c.syncStreams(); err != nil {
931942
c.logger.Errorf("could not sync streams: %v", err)
932943
updateFailed = true
@@ -1094,28 +1105,10 @@ func (c *Cluster) initSystemUsers() {
10941105
Password: util.RandomPassword(constants.PasswordLength),
10951106
}
10961107

1097-
// Connection pooler user is an exception, if requested it's going to be
1098-
// created by operator as a normal pgUser
1108+
// Connection pooler user is an exception
1109+
// if requested it's going to be created by operator
10991110
if needConnectionPooler(&c.Spec) {
1100-
connectionPoolerSpec := c.Spec.ConnectionPooler
1101-
if connectionPoolerSpec == nil {
1102-
connectionPoolerSpec = &acidv1.ConnectionPooler{}
1103-
}
1104-
1105-
// Using superuser as pooler user is not a good idea. First of all it's
1106-
// not going to be synced correctly with the current implementation,
1107-
// and second it's a bad practice.
1108-
username := c.OpConfig.ConnectionPooler.User
1109-
1110-
isSuperUser := connectionPoolerSpec.User == c.OpConfig.SuperUsername
1111-
isProtectedUser := c.shouldAvoidProtectedOrSystemRole(
1112-
connectionPoolerSpec.User, "connection pool role")
1113-
1114-
if !isSuperUser && !isProtectedUser {
1115-
username = util.Coalesce(
1116-
connectionPoolerSpec.User,
1117-
c.OpConfig.ConnectionPooler.User)
1118-
}
1111+
username := c.poolerUser(&c.Spec)
11191112

11201113
// connection pooler application should be able to login with this role
11211114
connectionPoolerUser := spec.PgUser{

pkg/cluster/cluster_test.go

Lines changed: 48 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package cluster
22

33
import (
44
"fmt"
5+
"net/http"
56
"reflect"
67
"strings"
78
"testing"
@@ -222,7 +223,14 @@ type mockTeamsAPIClient struct {
222223
}
223224

224225
func (m *mockTeamsAPIClient) TeamInfo(teamID, token string) (tm *teams.Team, statusCode int, err error) {
225-
return &teams.Team{Members: m.members}, statusCode, nil
226+
if len(m.members) > 0 {
227+
return &teams.Team{Members: m.members}, http.StatusOK, nil
228+
}
229+
230+
// when members are not set handle this as an error for this mock API
231+
// makes it easier to test behavior when teams API is unavailable
232+
return nil, http.StatusInternalServerError,
233+
fmt.Errorf("mocked %d error of mock Teams API for team %q", http.StatusInternalServerError, teamID)
226234
}
227235

228236
func (m *mockTeamsAPIClient) setMembers(members []string) {
@@ -237,32 +245,53 @@ func TestInitHumanUsers(t *testing.T) {
237245

238246
// members of a product team are granted superuser rights for DBs of their team
239247
cl.OpConfig.EnableTeamSuperuser = true
240-
241248
cl.OpConfig.EnableTeamsAPI = true
249+
cl.OpConfig.EnableTeamMemberDeprecation = true
242250
cl.OpConfig.PamRoleName = "zalandos"
243251
cl.Spec.TeamID = "test"
252+
cl.Spec.Users = map[string]acidv1.UserFlags{"bar": []string{}}
244253

245254
tests := []struct {
246255
existingRoles map[string]spec.PgUser
247256
teamRoles []string
248257
result map[string]spec.PgUser
258+
err error
249259
}{
250260
{
251261
existingRoles: map[string]spec.PgUser{"foo": {Name: "foo", Origin: spec.RoleOriginTeamsAPI,
252-
Flags: []string{"NOLOGIN"}}, "bar": {Name: "bar", Flags: []string{"NOLOGIN"}}},
262+
Flags: []string{"LOGIN"}}, "bar": {Name: "bar", Flags: []string{"LOGIN"}}},
253263
teamRoles: []string{"foo"},
254264
result: map[string]spec.PgUser{"foo": {Name: "foo", Origin: spec.RoleOriginTeamsAPI,
255265
MemberOf: []string{cl.OpConfig.PamRoleName}, Flags: []string{"LOGIN", "SUPERUSER"}},
256-
"bar": {Name: "bar", Flags: []string{"NOLOGIN"}}},
266+
"bar": {Name: "bar", Flags: []string{"LOGIN"}}},
267+
err: fmt.Errorf("could not init human users: cannot initialize members for team %q who owns the Postgres cluster: could not get list of team members for team %q: could not get team info for team %q: mocked %d error of mock Teams API for team %q",
268+
cl.Spec.TeamID, cl.Spec.TeamID, cl.Spec.TeamID, http.StatusInternalServerError, cl.Spec.TeamID),
257269
},
258270
{
259271
existingRoles: map[string]spec.PgUser{},
260272
teamRoles: []string{"admin", replicationUserName},
261273
result: map[string]spec.PgUser{},
274+
err: nil,
262275
},
263276
}
264277

265278
for _, tt := range tests {
279+
// set pgUsers so that initUsers sets up pgUsersCache with team roles
280+
cl.pgUsers = tt.existingRoles
281+
282+
// initUsers calls initHumanUsers which should fail
283+
// because no members are set for mocked teams API
284+
if err := cl.initUsers(); err != nil {
285+
// check that at least team roles are remembered in c.pgUsers
286+
if len(cl.pgUsers) < len(tt.teamRoles) {
287+
t.Errorf("%s unexpected size of pgUsers: expected at least %d, got %d", t.Name(), len(tt.teamRoles), len(cl.pgUsers))
288+
}
289+
if err.Error() != tt.err.Error() {
290+
t.Errorf("%s expected error %v, got %v", t.Name(), err, tt.err)
291+
}
292+
}
293+
294+
// set pgUsers again to test initHumanUsers with working teams API
266295
cl.pgUsers = tt.existingRoles
267296
mockTeamsAPI.setMembers(tt.teamRoles)
268297
if err := cl.initHumanUsers(); err != nil {
@@ -288,12 +317,14 @@ type mockTeamsAPIClientMultipleTeams struct {
288317
func (m *mockTeamsAPIClientMultipleTeams) TeamInfo(teamID, token string) (tm *teams.Team, statusCode int, err error) {
289318
for _, team := range m.teams {
290319
if team.teamID == teamID {
291-
return &teams.Team{Members: team.members}, statusCode, nil
320+
return &teams.Team{Members: team.members}, http.StatusOK, nil
292321
}
293322
}
294323

295-
// should not be reached if a slice with teams is populated correctly
296-
return nil, statusCode, nil
324+
// when given teamId is not found in teams return StatusNotFound
325+
// the operator should only return a warning in this case and not error out (#1842)
326+
return nil, http.StatusNotFound,
327+
fmt.Errorf("mocked %d error of mock Teams API for team %q", http.StatusNotFound, teamID)
297328
}
298329

299330
// Test adding members of maintenance teams that get superuser rights for all PG databases
@@ -392,6 +423,16 @@ func TestInitHumanUsersWithSuperuserTeams(t *testing.T) {
392423
"postgres_superuser": userA,
393424
},
394425
},
426+
// case 4: the team does not exist which should not return an error
427+
{
428+
ownerTeam: "acid",
429+
existingRoles: map[string]spec.PgUser{},
430+
superuserTeams: []string{"postgres_superusers"},
431+
teams: []mockTeam{teamA, teamB, teamTest},
432+
result: map[string]spec.PgUser{
433+
"postgres_superuser": userA,
434+
},
435+
},
395436
}
396437

397438
for _, tt := range tests {

pkg/cluster/connection_pooler.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,37 @@ func needReplicaConnectionPoolerWorker(spec *acidv1.PostgresSpec) bool {
7575
*spec.EnableReplicaConnectionPooler
7676
}
7777

78+
func (c *Cluster) needConnectionPoolerUser(oldSpec, newSpec *acidv1.PostgresSpec) bool {
79+
// return true if pooler is needed AND was not disabled before OR user name differs
80+
return (needMasterConnectionPoolerWorker(newSpec) || needReplicaConnectionPoolerWorker(newSpec)) &&
81+
((!needMasterConnectionPoolerWorker(oldSpec) &&
82+
!needReplicaConnectionPoolerWorker(oldSpec)) ||
83+
c.poolerUser(oldSpec) != c.poolerUser(newSpec))
84+
}
85+
86+
func (c *Cluster) poolerUser(spec *acidv1.PostgresSpec) string {
87+
connectionPoolerSpec := spec.ConnectionPooler
88+
if connectionPoolerSpec == nil {
89+
connectionPoolerSpec = &acidv1.ConnectionPooler{}
90+
}
91+
// Using superuser as pooler user is not a good idea. First of all it's
92+
// not going to be synced correctly with the current implementation,
93+
// and second it's a bad practice.
94+
username := c.OpConfig.ConnectionPooler.User
95+
96+
isSuperUser := connectionPoolerSpec.User == c.OpConfig.SuperUsername
97+
isProtectedUser := c.shouldAvoidProtectedOrSystemRole(
98+
connectionPoolerSpec.User, "connection pool role")
99+
100+
if !isSuperUser && !isProtectedUser {
101+
username = util.Coalesce(
102+
connectionPoolerSpec.User,
103+
c.OpConfig.ConnectionPooler.User)
104+
}
105+
106+
return username
107+
}
108+
78109
// when listing pooler k8s objects
79110
func (c *Cluster) poolerLabelsSet(addExtraLabels bool) labels.Set {
80111
poolerLabels := c.labelsSet(addExtraLabels)

pkg/cluster/sync.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,10 +104,6 @@ func (c *Cluster) Sync(newSpec *acidv1.Postgresql) error {
104104
if !(c.databaseAccessDisabled() || c.getNumberOfInstances(&newSpec.Spec) <= 0 || c.Spec.StandbyCluster != nil) {
105105
c.logger.Debug("syncing roles")
106106
if err = c.syncRoles(); err != nil {
107-
// remember all cached users in c.pgUsers
108-
for cachedUserName, cachedUser := range c.pgUsersCache {
109-
c.pgUsers[cachedUserName] = cachedUser
110-
}
111107
c.logger.Errorf("could not sync roles: %v", err)
112108
}
113109
c.logger.Debug("syncing databases")

0 commit comments

Comments
 (0)