Skip to content

Commit 0f9de79

Browse files
PWX-37204 , PWX- 37178 : Resolving gaps in KVDB fix (#1542)
* check kvdb nodes only on fresh install * correcting UTs * making requested changes * correcting UT
1 parent 9854899 commit 0f9de79

File tree

2 files changed

+21
-7
lines changed

2 files changed

+21
-7
lines changed

pkg/controller/storagecluster/controller_test.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3886,6 +3886,7 @@ func TestUpdateStorageClusterWithKVDBDown(t *testing.T) {
38863886
cluster.Spec.Kvdb = &corev1.KvdbSpec{
38873887
Internal: true,
38883888
}
3889+
cluster.Status.Phase = "Online"
38893890
k8sVersion, _ := version.NewVersion(minSupportedK8sVersion)
38903891
storageLabels := map[string]string{
38913892
constants.LabelKeyClusterName: cluster.Name,
@@ -3992,6 +3993,12 @@ func TestUpdateStorageClusterWithKVDBDown(t *testing.T) {
39923993
require.Contains(t, err.Error(), fmt.Sprintf("couldn't get unavailable numbers: couldn't get list of storage nodes during rolling update of storage cluster %s/%s: %s", cluster.Namespace, cluster.Name, getStorageNodeserr))
39933994

39943995
// When GetKvdbMembers returns an error
3996+
err = testutil.Get(k8sClient, cluster, cluster.Name, cluster.Namespace)
3997+
require.NoError(t, err)
3998+
cluster.Status.Phase = "Online"
3999+
err = k8sClient.Update(context.TODO(), cluster)
4000+
require.NoError(t, err)
4001+
39954002
getKvdbMemberserr := fmt.Errorf("test error 2")
39964003
driver.EXPECT().GetKVDBMembers(gomock.Any()).Return(nil, getKvdbMemberserr).Times(1)
39974004
driver.EXPECT().GetStorageNodes(gomock.Any()).Return(storageNodes, nil).AnyTimes()

pkg/controller/storagecluster/update.go

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,15 @@ func (c *Controller) rollingUpdate(cluster *corev1.StorageCluster, hash string)
8181
// Need to generalise code for 5 KVDB nodes
8282

8383
// get the number of kvdb members which are unavailable in case of internal kvdb
84-
numUnavailableKvdb, kvdbNodes, err := c.getKVDBNodeAvailability(cluster)
85-
if err != nil {
86-
return err
84+
numUnavailableKvdb := -1
85+
var kvdbNodes map[string]bool
86+
if !util.IsFreshInstall(cluster) {
87+
numUnavailableKvdb, kvdbNodes, err = c.getKVDBNodeAvailability(cluster)
88+
if err != nil {
89+
return err
90+
}
8791
}
92+
8893
logrus.Debugf("Marking old pods for deletion")
8994
for _, pod := range oldAvailablePods {
9095
if numUnavailable >= maxUnavailable {
@@ -94,10 +99,12 @@ func (c *Controller) rollingUpdate(cluster *corev1.StorageCluster, hash string)
9499
}
95100

96101
// check if pod is running in a node which has internal kvdb running in it
97-
if _, isKvdbNode := kvdbNodes[pod.Spec.NodeName]; cluster.Spec.Kvdb != nil && cluster.Spec.Kvdb.Internal && isKvdbNode {
98-
// if number of unavailable kvdb nodes is greater than or equal to 1, then dont restart portworx on this node
99-
if numUnavailableKvdb > 0 {
100-
logrus.Infof("Number of unavaliable KVDB members exceeds 1, temporarily skipping update for this node to prevent KVDB from going out of quorum ")
102+
// numUnavailableKvdb will be 0 or more when it is not a fresh install. We want to skip this for fresh install
103+
if _, isKvdbNode := kvdbNodes[pod.Spec.NodeName]; numUnavailableKvdb >= 0 && cluster.Spec.Kvdb != nil && cluster.Spec.Kvdb.Internal && isKvdbNode {
104+
// if number of unavailable kvdb nodes is greater than or equal to 1, or lesser than 3 entries are present in the kvdb map
105+
// then dont restart portworx on this node
106+
if numUnavailableKvdb > 0 || len(kvdbNodes) < 3 {
107+
logrus.Infof("One or more KVDB members are down, temporarily skipping update for this node to prevent KVDB from going out of quorum ")
101108
continue
102109
} else {
103110
numUnavailableKvdb++

0 commit comments

Comments
 (0)