Skip to content

Commit f4fa759

Browse files
fix: Disable syncing taints from nodeclaims to nodes (#8029)
1 parent 2cc08f8 commit f4fa759

14 files changed

+63
-15
lines changed

pkg/providers/amifamily/suite_test.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -359,9 +359,9 @@ var _ = Describe("AMIProvider", func() {
359359
},
360360
}
361361
})
362-
It("should priortize the older non-deprecated ami without deprecation time", func() {
362+
It("should prioritize the older non-deprecated ami without deprecation time", func() {
363363
// Here we have two AMIs one which is deprecated and newer and one which is older and non-deprecated without a deprecation time
364-
// List operation will priortize the non-deprecated AMI without the deprecation time
364+
// List operation will prioritize the non-deprecated AMI without the deprecation time
365365
awsEnv.EC2API.DescribeImagesOutput.Set(&ec2.DescribeImagesOutput{
366366
Images: []ec2types.Image{
367367
{
@@ -401,9 +401,9 @@ var _ = Describe("AMIProvider", func() {
401401
),
402402
}))
403403
})
404-
It("should priortize the non-deprecated ami with deprecation time when both have same creation time", func() {
404+
It("should prioritize the non-deprecated ami with deprecation time when both have same creation time", func() {
405405
// Here we have two AMIs one which is deprecated and one which is non-deprecated both with the same creation time
406-
// List operation will priortize the non-deprecated AMI
406+
// List operation will prioritize the non-deprecated AMI
407407
awsEnv.EC2API.DescribeImagesOutput.Set(&ec2.DescribeImagesOutput{
408408
Images: []ec2types.Image{
409409
{
@@ -445,9 +445,9 @@ var _ = Describe("AMIProvider", func() {
445445
),
446446
}))
447447
})
448-
It("should priortize the non-deprecated ami with deprecation time when both have same creation time and different name", func() {
448+
It("should prioritize the non-deprecated ami with deprecation time when both have same creation time and different name", func() {
449449
// Here we have two AMIs one which is deprecated and one which is non-deprecated both with the same creation time but with different names
450-
// List operation will priortize the non-deprecated AMI
450+
// List operation will prioritize the non-deprecated AMI
451451
awsEnv.EC2API.DescribeImagesOutput.Set(&ec2.DescribeImagesOutput{
452452
Images: []ec2types.Image{
453453
{
@@ -489,7 +489,7 @@ var _ = Describe("AMIProvider", func() {
489489
),
490490
}))
491491
})
492-
It("should priortize the newer ami if both are deprecated", func() {
492+
It("should prioritize the newer ami if both are deprecated", func() {
493493
//Both amis are deprecated and have the same deprecation time
494494
awsEnv.EC2API.DescribeImagesOutput.Set(&ec2.DescribeImagesOutput{
495495
Images: []ec2types.Image{

pkg/providers/launchtemplate/launchtemplate.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ func (p *DefaultProvider) CreateAMIOptions(ctx context.Context, nodeClass *v1.EC
171171
delete(labels, k)
172172
}
173173
}
174+
labels = InjectDoNotSyncTaintsLabel(nodeClass.AMIFamily(), labels)
174175
// Relying on the status rather than an API call means that Karpenter is subject to a race
175176
// condition where EC2NodeClass spec changes haven't propagated to the status once a node
176177
// has launched.
@@ -498,3 +499,16 @@ func (p *DefaultProvider) ResolveClusterCIDR(ctx context.Context) error {
498499
}
499500
return fmt.Errorf("no CIDR found in DescribeCluster response")
500501
}
502+
503+
// InjectDoNotSyncTaintsLabel adds a label for all non-custom AMI families. It is exported just for ease
504+
// of testing.
505+
// This label is to tell karpenter that it should *not* sync taints. This is to work around a race condition.
506+
// By default, this label is not added to the custom AMI family as users may still want their taints synced. Startup
507+
// taints will be racy for custom AMIs if they do not add this label, however.
508+
// https://github.com/kubernetes-sigs/karpenter/issues/1772
509+
func InjectDoNotSyncTaintsLabel(amiFamilyName string, labels map[string]string) map[string]string {
510+
if amiFamilyName != v1.AMIFamilyCustom {
511+
return lo.Assign(labels, map[string]string{karpv1.NodeDoNotSyncTaintsLabelKey: "true"})
512+
}
513+
return labels
514+
}

pkg/providers/launchtemplate/suite_test.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,34 @@ var _ = Describe("LaunchTemplate Provider", func() {
196196
Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypes(ctx)).To(Succeed())
197197
Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypeOfferings(ctx)).To(Succeed())
198198
})
199+
It("should not add the do not sync taints label to nodes when AMI type is custom", func() {
200+
labels := launchtemplate.InjectDoNotSyncTaintsLabel("Custom", make(map[string]string))
201+
Expect(labels).To(HaveLen(0))
202+
})
203+
204+
It("should add the do not sync taints label to nodes when AMI type is al2", func() {
205+
labels := launchtemplate.InjectDoNotSyncTaintsLabel("AL2", make(map[string]string))
206+
Expect(labels).To(HaveLen(1))
207+
Expect(labels).Should(HaveKeyWithValue(karpv1.NodeDoNotSyncTaintsLabelKey, "true"))
208+
})
209+
210+
It("should add the do not sync taints label to nodes when AMI type is al2023", func() {
211+
labels := launchtemplate.InjectDoNotSyncTaintsLabel("AL2023", make(map[string]string))
212+
Expect(labels).To(HaveLen(1))
213+
Expect(labels).Should(HaveKeyWithValue(karpv1.NodeDoNotSyncTaintsLabelKey, "true"))
214+
})
215+
216+
It("should add the do not sync taints label to nodes when AMI type is br", func() {
217+
labels := launchtemplate.InjectDoNotSyncTaintsLabel("Bottlerocket", make(map[string]string))
218+
Expect(labels).To(HaveLen(1))
219+
Expect(labels).Should(HaveKeyWithValue(karpv1.NodeDoNotSyncTaintsLabelKey, "true"))
220+
})
221+
222+
It("should add the do not sync taints label to nodes when AMI type is windows", func() {
223+
labels := launchtemplate.InjectDoNotSyncTaintsLabel("Windows", make(map[string]string))
224+
Expect(labels).To(HaveLen(1))
225+
Expect(labels).Should(HaveKeyWithValue(karpv1.NodeDoNotSyncTaintsLabelKey, "true"))
226+
})
199227
It("should create unique launch templates for multiple identical nodeClasses", func() {
200228
nodeClass2 := test.EC2NodeClass(v1.EC2NodeClass{
201229
Status: v1.EC2NodeClassStatus{

pkg/providers/launchtemplate/testdata/al2023_mime_userdata_merged.golden

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,6 @@ spec:
4646
- effect: NoExecute
4747
key: karpenter.sh/unregistered
4848
flags:
49-
- --node-labels="karpenter.k8s.aws/ec2nodeclass=%s,karpenter.sh/capacity-type=on-demand,%s=%s,testing/cluster=unspecified"
49+
- --node-labels="karpenter.k8s.aws/ec2nodeclass=%s,karpenter.sh/capacity-type=on-demand,karpenter.sh/do-not-sync-taints=true,%s=%s,testing/cluster=unspecified"
5050

5151
--//--

pkg/providers/launchtemplate/testdata/al2023_shell_userdata_merged.golden

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,6 @@ spec:
3232
- effect: NoExecute
3333
key: karpenter.sh/unregistered
3434
flags:
35-
- --node-labels="karpenter.k8s.aws/ec2nodeclass=%s,karpenter.sh/capacity-type=on-demand,%s=%s,testing/cluster=unspecified"
35+
- --node-labels="karpenter.k8s.aws/ec2nodeclass=%s,karpenter.sh/capacity-type=on-demand,karpenter.sh/do-not-sync-taints=true,%s=%s,testing/cluster=unspecified"
3636

3737
--//--

pkg/providers/launchtemplate/testdata/al2023_userdata_unmerged.golden

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,6 @@ spec:
2727
- effect: NoExecute
2828
key: karpenter.sh/unregistered
2929
flags:
30-
- --node-labels="karpenter.k8s.aws/ec2nodeclass=%s,karpenter.sh/capacity-type=on-demand,%s=%s,testing/cluster=unspecified"
30+
- --node-labels="karpenter.k8s.aws/ec2nodeclass=%s,karpenter.sh/capacity-type=on-demand,karpenter.sh/do-not-sync-taints=true,%s=%s,testing/cluster=unspecified"
3131

3232
--//--

pkg/providers/launchtemplate/testdata/al2023_yaml_userdata_merged.golden

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,6 @@ spec:
4141
- effect: NoExecute
4242
key: karpenter.sh/unregistered
4343
flags:
44-
- --node-labels="karpenter.k8s.aws/ec2nodeclass=%s,karpenter.sh/capacity-type=on-demand,%s=%s,testing/cluster=unspecified"
44+
- --node-labels="karpenter.k8s.aws/ec2nodeclass=%s,karpenter.sh/capacity-type=on-demand,karpenter.sh/do-not-sync-taints=true,%s=%s,testing/cluster=unspecified"
4545

4646
--//--

pkg/providers/launchtemplate/testdata/al2_userdata_merged.golden

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,5 @@ exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1
1515
/etc/eks/bootstrap.sh 'test-cluster' --apiserver-endpoint 'https://test-cluster' --b64-cluster-ca 'ca-bundle' \
1616
--dns-cluster-ip '10.0.100.10' \
1717
--use-max-pods false \
18-
--kubelet-extra-args '--node-labels="karpenter.k8s.aws/ec2nodeclass=%s,karpenter.sh/capacity-type=on-demand,%s=%s,testing/cluster=unspecified" --register-with-taints="karpenter.sh/unregistered:NoExecute" --max-pods=110'
18+
--kubelet-extra-args '--node-labels="karpenter.k8s.aws/ec2nodeclass=%s,karpenter.sh/capacity-type=on-demand,karpenter.sh/do-not-sync-taints=true,%s=%s,testing/cluster=unspecified" --register-with-taints="karpenter.sh/unregistered:NoExecute" --max-pods=110'
1919
--//--

pkg/providers/launchtemplate/testdata/al2_userdata_unmerged.golden

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,5 @@ exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1
99
/etc/eks/bootstrap.sh 'test-cluster' --apiserver-endpoint 'https://test-cluster' --b64-cluster-ca 'ca-bundle' \
1010
--dns-cluster-ip '10.0.100.10' \
1111
--use-max-pods false \
12-
--kubelet-extra-args '--node-labels="karpenter.k8s.aws/ec2nodeclass=%s,karpenter.sh/capacity-type=on-demand,%s=%s,testing/cluster=unspecified" --register-with-taints="karpenter.sh/unregistered:NoExecute" --max-pods=110'
12+
--kubelet-extra-args '--node-labels="karpenter.k8s.aws/ec2nodeclass=%s,karpenter.sh/capacity-type=on-demand,karpenter.sh/do-not-sync-taints=true,%s=%s,testing/cluster=unspecified" --register-with-taints="karpenter.sh/unregistered:NoExecute" --max-pods=110'
1313
--//--

pkg/providers/launchtemplate/testdata/br_userdata_merged.golden

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ max-pods = 110
1111
custom-node-label = 'custom'
1212
'karpenter.k8s.aws/ec2nodeclass' = '%s'
1313
'karpenter.sh/capacity-type' = 'on-demand'
14+
'karpenter.sh/do-not-sync-taints' = 'true'
1415
'%s' = '%s'
1516
'testing/cluster' = 'unspecified'
1617

pkg/providers/launchtemplate/testdata/br_userdata_unmerged.golden

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ max-pods = 110
99
[settings.kubernetes.node-labels]
1010
'karpenter.k8s.aws/ec2nodeclass' = '%s'
1111
'karpenter.sh/capacity-type' = 'on-demand'
12+
'karpenter.sh/do-not-sync-taints' = 'true'
1213
'%s' = '%s'
1314
'testing/cluster' = 'unspecified'
1415

pkg/providers/launchtemplate/testdata/windows_userdata_merged.golden

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
Write-Host "Running custom user data script"
33
Write-Host "Finished running custom user data script"
44
[string]$EKSBootstrapScriptFile = "$env:ProgramFiles\Amazon\EKS\Start-EKSBootstrap.ps1"
5-
& $EKSBootstrapScriptFile -EKSClusterName 'test-cluster' -APIServerEndpoint 'https://test-cluster' -Base64ClusterCA 'ca-bundle' -KubeletExtraArgs '--node-labels="karpenter.k8s.aws/ec2nodeclass=%s,karpenter.sh/capacity-type=spot,%s=%s,testing/cluster=unspecified" --register-with-taints="karpenter.sh/unregistered:NoExecute" --max-pods=110' -DNSClusterIP '10.0.100.10'
5+
& $EKSBootstrapScriptFile -EKSClusterName 'test-cluster' -APIServerEndpoint 'https://test-cluster' -Base64ClusterCA 'ca-bundle' -KubeletExtraArgs '--node-labels="karpenter.k8s.aws/ec2nodeclass=%s,karpenter.sh/capacity-type=spot,karpenter.sh/do-not-sync-taints=true,%s=%s,testing/cluster=unspecified" --register-with-taints="karpenter.sh/unregistered:NoExecute" --max-pods=110' -DNSClusterIP '10.0.100.10'
66
</powershell>
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
<powershell>
22
[string]$EKSBootstrapScriptFile = "$env:ProgramFiles\Amazon\EKS\Start-EKSBootstrap.ps1"
3-
& $EKSBootstrapScriptFile -EKSClusterName 'test-cluster' -APIServerEndpoint 'https://test-cluster' -Base64ClusterCA 'ca-bundle' -KubeletExtraArgs '--node-labels="karpenter.k8s.aws/ec2nodeclass=%s,karpenter.sh/capacity-type=spot,%s=%s,testing/cluster=unspecified" --register-with-taints="karpenter.sh/unregistered:NoExecute" --max-pods=110' -DNSClusterIP '10.0.100.10'
3+
& $EKSBootstrapScriptFile -EKSClusterName 'test-cluster' -APIServerEndpoint 'https://test-cluster' -Base64ClusterCA 'ca-bundle' -KubeletExtraArgs '--node-labels="karpenter.k8s.aws/ec2nodeclass=%s,karpenter.sh/capacity-type=spot,karpenter.sh/do-not-sync-taints=true,%s=%s,testing/cluster=unspecified" --register-with-taints="karpenter.sh/unregistered:NoExecute" --max-pods=110' -DNSClusterIP '10.0.100.10'
44
</powershell>

test/suites/ami/suite_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,7 @@ var _ = Describe("AMI", func() {
312312
// Since the node has joined the cluster, we know our bootstrapping was correct.
313313
// Just verify if the UserData contains our custom content too, rather than doing a byte-wise comparison.
314314
Expect(string(actualUserData)).To(ContainSubstring("Running custom user data script"))
315+
Expect(string(actualUserData)).To(ContainSubstring("karpenter.sh/do-not-sync-taints=true"))
315316
})
316317
It("should merge non-MIME UserData contents for AL2 AMIFamily", func() {
317318
content, err := os.ReadFile("testdata/al2_no_mime_userdata_input.sh")
@@ -333,6 +334,7 @@ var _ = Describe("AMI", func() {
333334
// Since the node has joined the cluster, we know our bootstrapping was correct.
334335
// Just verify if the UserData contains our custom content too, rather than doing a byte-wise comparison.
335336
Expect(string(actualUserData)).To(ContainSubstring("Running custom user data script"))
337+
Expect(string(actualUserData)).To(ContainSubstring("karpenter.sh/do-not-sync-taints=true"))
336338
})
337339
It("should merge UserData contents for Bottlerocket AMIFamily", func() {
338340
content, err := os.ReadFile("testdata/br_userdata_input.sh")
@@ -352,6 +354,7 @@ var _ = Describe("AMI", func() {
352354
actualUserData, err := base64.StdEncoding.DecodeString(*getInstanceAttribute(pod.Spec.NodeName, "userData").UserData.Value)
353355
Expect(err).ToNot(HaveOccurred())
354356
Expect(string(actualUserData)).To(ContainSubstring("kube-api-qps = 30"))
357+
Expect(string(actualUserData)).To(ContainSubstring("'karpenter.sh/do-not-sync-taints' = 'true'"))
355358
Expect(string(actualUserData)).To(ContainSubstring("eviction-max-pod-grace-period = 40"))
356359
Expect(string(actualUserData)).To(ContainSubstring("[settings.kubernetes.eviction-soft]\n'memory.available' = '100Mi'"))
357360
Expect(string(actualUserData)).To(ContainSubstring("[settings.kubernetes.eviction-soft-grace-period]\n'memory.available' = '30s'"))
@@ -402,6 +405,7 @@ var _ = Describe("AMI", func() {
402405
Expect(err).ToNot(HaveOccurred())
403406
Expect(string(actualUserData)).To(ContainSubstring("Write-Host \"Running custom user data script\""))
404407
Expect(string(actualUserData)).To(ContainSubstring("[string]$EKSBootstrapScriptFile = \"$env:ProgramFiles\\Amazon\\EKS\\Start-EKSBootstrap.ps1\""))
408+
Expect(string(actualUserData)).To(ContainSubstring("karpenter.sh/do-not-sync-taints=true"))
405409
})
406410
})
407411
})

0 commit comments

Comments
 (0)