@@ -168,12 +168,25 @@ def test_additional_pod_capabilities(self):
168168 "additional_pod_capabilities" : ',' .join (capabilities ),
169169 },
170170 }
171- self .k8s .update_config (patch_capabilities )
172- self .eventuallyEqual (lambda : self .k8s .get_operator_state (), {"0" : "idle" },
173- "Operator does not get in sync" )
174-
175- self .eventuallyEqual (lambda : self .k8s .count_pods_with_container_capabilities (capabilities , cluster_label ),
176- 2 , "Container capabilities not updated" )
171+
172+ # get node and replica (expected target of new master)
173+ _ , replica_nodes = self .k8s .get_pg_nodes (cluster_label )
174+
175+ try :
176+ self .k8s .update_config (patch_capabilities )
177+ self .eventuallyEqual (lambda : self .k8s .get_operator_state (), {"0" : "idle" },
178+ "Operator does not get in sync" )
179+
180+ # changed security context of postrges container should trigger a rolling update
181+ self .k8s .wait_for_pod_failover (replica_nodes , 'spilo-role=master,' + cluster_label )
182+ self .k8s .wait_for_pod_start ('spilo-role=replica,' + cluster_label )
183+
184+ self .eventuallyEqual (lambda : self .k8s .count_pods_with_container_capabilities (capabilities , cluster_label ),
185+ 2 , "Container capabilities not updated" )
186+
187+ except timeout_decorator .TimeoutError :
188+ print ('Operator log: {}' .format (k8s .get_operator_log ()))
189+ raise
177190
178191 @timeout_decorator .timeout (TEST_TIMEOUT_SEC )
179192 def test_additional_teams_and_members (self ):
@@ -212,7 +225,7 @@ def test_additional_teams_and_members(self):
212225 # make sure we let one sync pass and the new user being added
213226 time .sleep (15 )
214227
215- leader = self .k8s .get_cluster_leader_pod ('acid-minimal-cluster' )
228+ leader = self .k8s .get_cluster_leader_pod ()
216229 user_query = """
217230 SELECT usename
218231 FROM pg_catalog.pg_user
@@ -392,7 +405,7 @@ def test_enable_disable_connection_pooler(self):
392405 # credentials.
393406 db_list = []
394407
395- leader = k8s .get_cluster_leader_pod ('acid-minimal-cluster' )
408+ leader = k8s .get_cluster_leader_pod ()
396409 schemas_query = """
397410 select schema_name
398411 from information_schema.schemata
@@ -611,7 +624,7 @@ def test_lazy_spilo_upgrade(self):
611624 k8s .update_config (unpatch_lazy_spilo_upgrade , step = "patch lazy upgrade" )
612625
613626 # at this point operator will complete the normal rolling upgrade
614- # so we additonally test if disabling the lazy upgrade - forcing the normal rolling upgrade - works
627+ # so we additionally test if disabling the lazy upgrade - forcing the normal rolling upgrade - works
615628 self .eventuallyEqual (lambda : k8s .get_effective_pod_image (pod0 ),
616629 conf_image , "Rolling upgrade was not executed" ,
617630 50 , 3 )
@@ -750,12 +763,6 @@ def verify_pod_limits():
750763
751764 self .eventuallyTrue (verify_pod_limits , "Pod limits where not adjusted" )
752765
753- @classmethod
754- def setUp (cls ):
755- # cls.k8s.update_config({}, step="Setup")
756- cls .k8s .patch_statefulset ({"meta" : {"annotations" : {"zalando-postgres-operator-rolling-update-required" : False }}})
757- pass
758-
759766 @timeout_decorator .timeout (TEST_TIMEOUT_SEC )
760767 def test_multi_namespace_support (self ):
761768 '''
@@ -784,6 +791,139 @@ def test_multi_namespace_support(self):
784791 "acid.zalan.do" , "v1" , self .test_namespace , "postgresqls" , "acid-test-cluster" )
785792 time .sleep (5 )
786793
794+ @timeout_decorator .timeout (TEST_TIMEOUT_SEC )
795+ def test_rolling_update_flag (self ):
796+ '''
797+ Add rolling update flag to only the master and see it failing over
798+ '''
799+ k8s = self .k8s
800+ cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster'
801+
802+ # verify we are in good state from potential previous tests
803+ self .eventuallyEqual (lambda : k8s .count_running_pods (), 2 , "No 2 pods running" )
804+
805+ # get node and replica (expected target of new master)
806+ _ , replica_nodes = k8s .get_pg_nodes (cluster_label )
807+
808+ # rolling update annotation
809+ flag = {
810+ "metadata" : {
811+ "annotations" : {
812+ "zalando-postgres-operator-rolling-update-required" : "true" ,
813+ }
814+ }
815+ }
816+
817+ try :
818+ podsList = k8s .api .core_v1 .list_namespaced_pod ('default' , label_selector = cluster_label )
819+ for pod in podsList .items :
820+ # add flag only to the master to make it appear to the operator as a leftover from a rolling update
821+ if pod .metadata .labels .get ('spilo-role' ) == 'master' :
822+ old_creation_timestamp = pod .metadata .creation_timestamp
823+ k8s .patch_pod (flag , pod .metadata .name , pod .metadata .namespace )
824+ else :
825+ # remember replica name to check if operator does a switchover
826+ switchover_target = pod .metadata .name
827+
828+ # do not wait until the next sync
829+ k8s .delete_operator_pod ()
830+
831+ # operator should now recreate the master pod and do a switchover before
832+ k8s .wait_for_pod_failover (replica_nodes , 'spilo-role=master,' + cluster_label )
833+
834+ # check if the former replica is now the new master
835+ leader = k8s .get_cluster_leader_pod ()
836+ self .eventuallyEqual (lambda : leader .metadata .name , switchover_target , "Rolling update flag did not trigger switchover" )
837+
838+ # check that the old master has been recreated
839+ k8s .wait_for_pod_start ('spilo-role=replica,' + cluster_label )
840+ replica = k8s .get_cluster_replica_pod ()
841+ self .assertTrue (replica .metadata .creation_timestamp > old_creation_timestamp , "Old master pod was not recreated" )
842+
843+
844+ except timeout_decorator .TimeoutError :
845+ print ('Operator log: {}' .format (k8s .get_operator_log ()))
846+ raise
847+
848+ @timeout_decorator .timeout (TEST_TIMEOUT_SEC )
849+ def test_rolling_update_label_timeout (self ):
850+ '''
851+ Simulate case when replica does not receive label in time and rolling update does not finish
852+ '''
853+ k8s = self .k8s
854+ cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster'
855+ flag = "zalando-postgres-operator-rolling-update-required"
856+
857+ # verify we are in good state from potential previous tests
858+ self .eventuallyEqual (lambda : k8s .count_running_pods (), 2 , "No 2 pods running" )
859+
860+ # get node and replica (expected target of new master)
861+ _ , replica_nodes = k8s .get_pg_nodes (cluster_label )
862+
863+ # rolling update annotation
864+ rolling_update_patch = {
865+ "metadata" : {
866+ "annotations" : {
867+ flag : "true" ,
868+ }
869+ }
870+ }
871+
872+ # make pod_label_wait_timeout so short that rolling update fails on first try
873+ # temporarily lower resync interval to reduce waiting for further tests
874+ # pods should get healthy in the meantime
875+ patch_resync_config = {
876+ "data" : {
877+ "pod_label_wait_timeout" : "2s" ,
878+ "resync_period" : "20s" ,
879+ }
880+ }
881+
882+ try :
883+ # patch both pods for rolling update
884+ podList = k8s .api .core_v1 .list_namespaced_pod ('default' , label_selector = cluster_label )
885+ for pod in podList .items :
886+ k8s .patch_pod (rolling_update_patch , pod .metadata .name , pod .metadata .namespace )
887+ if pod .metadata .labels .get ('spilo-role' ) == 'replica' :
888+ switchover_target = pod .metadata .name
889+
890+ # update config and restart operator
891+ k8s .update_config (patch_resync_config , "update resync interval and pod_label_wait_timeout" )
892+
893+ # operator should now recreate the replica pod first and do a switchover after
894+ k8s .wait_for_pod_start ('spilo-role=replica,' + cluster_label )
895+
896+ # pod_label_wait_timeout should have been exceeded hence the rolling update is continued on next sync
897+ # check if the cluster state is "SyncFailed"
898+ self .eventuallyEqual (lambda : k8s .pg_get_status (), "SyncFailed" , "Expected SYNC event to fail" )
899+
900+ # wait for next sync, replica should be running normally by now and be ready for switchover
901+ k8s .wait_for_pod_failover (replica_nodes , 'spilo-role=master,' + cluster_label )
902+
903+ # check if the former replica is now the new master
904+ leader = k8s .get_cluster_leader_pod ()
905+ self .eventuallyEqual (lambda : leader .metadata .name , switchover_target , "Rolling update flag did not trigger switchover" )
906+
907+ # wait for the old master to get restarted
908+ k8s .wait_for_pod_start ('spilo-role=replica,' + cluster_label )
909+
910+ # status should again be "SyncFailed" but turn into "Running" on the next sync
911+ time .sleep (10 )
912+ self .eventuallyEqual (lambda : k8s .pg_get_status (), "Running" , "Expected running cluster after two syncs" )
913+
914+ # revert config changes
915+ patch_resync_config = {
916+ "data" : {
917+ "pod_label_wait_timeout" : "10m" ,
918+ "resync_period" : "30m" ,
919+ }
920+ }
921+ k8s .update_config (patch_resync_config , "revert resync interval and pod_label_wait_timeout" )
922+
923+
924+ except timeout_decorator .TimeoutError :
925+ print ('Operator log: {}' .format (k8s .get_operator_log ()))
926+ raise
787927
788928 @timeout_decorator .timeout (TEST_TIMEOUT_SEC )
789929 def test_zz_node_readiness_label (self ):
0 commit comments