Skip to content

Commit a52e75c

Browse files
committed
Log and run each informer explicitly
Since adding a wait for the cache to sync for Kubernetes endpoints, there appears to be conditions where the code waits indefinitely. The issue can be reproduced by scaling the gateway to zero replicas and back to 1, some of the time it passes the sync, other times it gets stuck. This patch appears to fix the problem and has been tested in controller and operator mode on an RPi 4x node cluster and with k3d with hey running a load test into the nodeinfo function with a minimum of 10 replicas. Signed-off-by: Alex Ellis (OpenFaaS Ltd) <[email protected]>
1 parent 9b3b1f3 commit a52e75c

File tree

4 files changed

+96
-62
lines changed

4 files changed

+96
-62
lines changed

Makefile

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
.PHONY: build local push namespaces install charts start-kind stop-kind build-buildx render-charts
22
TAG?=latest
3+
OWNER?=openfaas
34
export DOCKER_CLI_EXPERIMENTAL=enabled
45

56
all: build-docker
@@ -9,15 +10,15 @@ local:
910

1011
build-docker:
1112
docker build \
12-
-t ghcr.io/openfaas/faas-netes:$(TAG) .
13+
-t ghcr.io/$(OWNER)/faas-netes:$(TAG) .
1314

1415
.PHONY: build-buildx
1516
build-buildx:
1617
@docker buildx create --use --name=multiarch --node=multiarch && \
1718
docker buildx build \
1819
--output "type=docker,push=false" \
1920
--platform linux/amd64 \
20-
--tag ghcr.io/openfaas/faas-netes:$(TAG) \
21+
--tag ghcr.io/$(OWNER)/faas-netes:$(TAG) \
2122
.
2223

2324
.PHONY: build-buildx-all
@@ -26,11 +27,21 @@ build-buildx-all:
2627
docker buildx build \
2728
--platform linux/amd64,linux/arm/v7,linux/arm64 \
2829
--output "type=image,push=false" \
29-
--tag ghcr.io/openfaas/faas-netes:$(TAG) \
30+
--tag ghcr.io/$(OWNER)/faas-netes:$(TAG) \
31+
.
32+
33+
.PHONY: publish-buildx-all
34+
publish-buildx-all:
35+
@echo ghcr.io/$(OWNER)/faas-netes:$(TAG) && \
36+
docker buildx create --use --name=multiarch --node=multiarch && \
37+
docker buildx build \
38+
--platform linux/amd64,linux/arm/v7,linux/arm64 \
39+
--push=true \
40+
--tag ghcr.io/$(OWNER)/faas-netes:$(TAG) \
3041
.
3142

3243
push:
33-
docker push ghcr.io/openfaas/faas-netes:$(TAG)
44+
docker push ghcr.io/$(OWNER)/faas-netes:$(TAG)
3445

3546
namespaces:
3647
kubectl apply -f namespaces.yml

contrib/qemu.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/bin/bash
2+
3+
# Support multi-arch builds with buildx on a Linux host.
4+
docker run --rm --privileged multiarch/qemu-user-static --reset -p yes

main.go

Lines changed: 75 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111

1212
clientset "github.com/openfaas/faas-netes/pkg/client/clientset/versioned"
1313
informers "github.com/openfaas/faas-netes/pkg/client/informers/externalversions"
14+
v1 "github.com/openfaas/faas-netes/pkg/client/informers/externalversions/openfaas/v1"
1415
"github.com/openfaas/faas-netes/pkg/config"
1516
"github.com/openfaas/faas-netes/pkg/controller"
1617
"github.com/openfaas/faas-netes/pkg/handlers"
@@ -22,7 +23,10 @@ import (
2223
"github.com/openfaas/faas-provider/logs"
2324
"github.com/openfaas/faas-provider/proxy"
2425
providertypes "github.com/openfaas/faas-provider/types"
26+
2527
kubeinformers "k8s.io/client-go/informers"
28+
v1apps "k8s.io/client-go/informers/apps/v1"
29+
v1core "k8s.io/client-go/informers/core/v1"
2630
"k8s.io/client-go/kubernetes"
2731
"k8s.io/client-go/tools/cache"
2832
"k8s.io/client-go/tools/clientcmd"
@@ -62,6 +66,12 @@ func main() {
6266
log.Fatalf("Error building kubeconfig: %s", err.Error())
6367
}
6468

69+
kubeconfigQPS := 100
70+
kubeconfigBurst := 250
71+
72+
clientCmdConfig.QPS = float32(kubeconfigQPS)
73+
clientCmdConfig.Burst = kubeconfigBurst
74+
6575
kubeClient, err := kubernetes.NewForConfig(clientCmdConfig)
6676
if err != nil {
6777
log.Fatalf("Error building Kubernetes clientset: %s", err.Error())
@@ -118,6 +128,7 @@ func main() {
118128
// this is where we need to swap to the faasInformerFactory
119129
profileInformerOpt := informers.WithNamespace(config.ProfilesNamespace)
120130
profileInformerFactory := informers.NewSharedInformerFactoryWithOptions(faasClient, defaultResync, profileInformerOpt)
131+
121132
profileLister := profileInformerFactory.Openfaas().V1().Profiles().Lister()
122133
factory := k8s.NewFunctionFactory(kubeClient, deployConfig, profileLister)
123134

@@ -131,59 +142,85 @@ func main() {
131142
faasClient: faasClient,
132143
}
133144

134-
if !operator {
135-
log.Println("Starting controller")
136-
runController(setup)
137-
} else {
145+
if operator {
138146
log.Println("Starting operator")
139147
runOperator(setup, config)
148+
} else {
149+
log.Println("Starting controller")
150+
runController(setup)
140151
}
141152
}
142153

143-
// runController runs the faas-netes imperative controller
144-
func runController(setup serverSetup) {
145-
// pull out the required config and clients fromthe setup, this is largely a
146-
// leftover from refactoring the setup to a shared step and keeping the function
147-
// signature readable
148-
config := setup.config
149-
kubeClient := setup.kubeClient
150-
factory := setup.functionFactory
154+
type customInformers struct {
155+
EndpointsInformer v1core.EndpointsInformer
156+
DeploymentInformer v1apps.DeploymentInformer
157+
FunctionsInformer v1.FunctionInformer
158+
}
159+
160+
func startInformers(setup serverSetup, stopCh <-chan struct{}, operator bool) customInformers {
151161
kubeInformerFactory := setup.kubeInformerFactory
152162
faasInformerFactory := setup.faasInformerFactory
153163

154-
// set up signals so we handle the first shutdown signal gracefully
155-
stopCh := signals.SetupSignalHandler()
164+
var functions v1.FunctionInformer
165+
if operator {
166+
// go faasInformerFactory.Start(stopCh)
167+
168+
functions = faasInformerFactory.Openfaas().V1().Functions()
169+
go functions.Informer().Run(stopCh)
170+
if ok := cache.WaitForNamedCacheSync("faas-netes:functions", stopCh, functions.Informer().HasSynced); !ok {
171+
log.Fatalf("failed to wait for cache to sync")
172+
}
173+
}
156174

157-
endpointsInformer := kubeInformerFactory.Core().V1().Endpoints()
175+
// go kubeInformerFactory.Start(stopCh)
158176

159-
deploymentLister := kubeInformerFactory.Apps().V1().
160-
Deployments().Lister()
177+
deployments := kubeInformerFactory.Apps().V1().Deployments()
178+
go deployments.Informer().Run(stopCh)
179+
if ok := cache.WaitForNamedCacheSync("faas-netes:deployments", stopCh, deployments.Informer().HasSynced); !ok {
180+
log.Fatalf("failed to wait for cache to sync")
181+
}
161182

162-
go faasInformerFactory.Start(stopCh)
163-
go kubeInformerFactory.Start(stopCh)
164-
go setup.profileInformerFactory.Start(stopCh)
183+
endpoints := kubeInformerFactory.Core().V1().Endpoints()
184+
go endpoints.Informer().Run(stopCh)
185+
if ok := cache.WaitForNamedCacheSync("faas-netes:endpoints", stopCh, endpoints.Informer().HasSynced); !ok {
186+
log.Fatalf("failed to wait for cache to sync")
187+
}
165188

166-
// Any "Wait" calls need to be made, after the informers have been started
167-
start := time.Now()
168-
glog.Infof("Waiting for cache sync in main")
169-
kubeInformerFactory.WaitForCacheSync(stopCh)
170-
setup.profileInformerFactory.WaitForCacheSync(stopCh)
189+
// go setup.profileInformerFactory.Start(stopCh)
171190

172-
// Block and wait for the endpoints to become synchronised
173-
cache.WaitForCacheSync(stopCh, endpointsInformer.Informer().HasSynced)
191+
profileInformerFactory := setup.profileInformerFactory
192+
profiles := profileInformerFactory.Openfaas().V1().Profiles()
193+
go profiles.Informer().Run(stopCh)
194+
if ok := cache.WaitForNamedCacheSync("faas-netes:profiles", stopCh, profiles.Informer().HasSynced); !ok {
195+
log.Fatalf("failed to wait for cache to sync")
196+
}
174197

175-
glog.Infof("Cache sync done in: %fs", time.Since(start).Seconds())
176-
glog.Infof("Endpoints synced? %v", endpointsInformer.Informer().HasSynced())
198+
return customInformers{
199+
EndpointsInformer: endpoints,
200+
DeploymentInformer: deployments,
201+
FunctionsInformer: functions,
202+
}
203+
}
177204

178-
lister := endpointsInformer.Lister()
179-
functionLookup := k8s.NewFunctionLookup(config.DefaultFunctionNamespace, lister)
205+
// runController runs the faas-netes imperative controller
206+
func runController(setup serverSetup) {
207+
config := setup.config
208+
kubeClient := setup.kubeClient
209+
factory := setup.functionFactory
210+
211+
// set up signals so we handle the first shutdown signal gracefully
212+
stopCh := signals.SetupSignalHandler()
213+
operator := false
214+
listers := startInformers(setup, stopCh, operator)
215+
216+
functionLookup := k8s.NewFunctionLookup(config.DefaultFunctionNamespace, listers.EndpointsInformer.Lister())
180217

181218
bootstrapHandlers := providertypes.FaaSHandlers{
182219
FunctionProxy: proxy.NewHandlerFunc(config.FaaSConfig, functionLookup),
183220
DeleteHandler: handlers.MakeDeleteHandler(config.DefaultFunctionNamespace, kubeClient),
184221
DeployHandler: handlers.MakeDeployHandler(config.DefaultFunctionNamespace, factory),
185-
FunctionReader: handlers.MakeFunctionReader(config.DefaultFunctionNamespace, deploymentLister),
186-
ReplicaReader: handlers.MakeReplicaReader(config.DefaultFunctionNamespace, deploymentLister),
222+
FunctionReader: handlers.MakeFunctionReader(config.DefaultFunctionNamespace, listers.DeploymentInformer.Lister()),
223+
ReplicaReader: handlers.MakeReplicaReader(config.DefaultFunctionNamespace, listers.DeploymentInformer.Lister()),
187224
ReplicaUpdater: handlers.MakeReplicaUpdater(config.DefaultFunctionNamespace, kubeClient),
188225
UpdateHandler: handlers.MakeUpdateHandler(config.DefaultFunctionNamespace, factory),
189226
HealthHandler: handlers.MakeHealthHandler(),
@@ -198,9 +235,6 @@ func runController(setup serverSetup) {
198235

199236
// runOperator runs the CRD Operator
200237
func runOperator(setup serverSetup, cfg config.BootstrapConfig) {
201-
// pull out the required config and clients fromthe setup, this is largely a
202-
// leftover from refactoring the setup to a shared step and keeping the function
203-
// signature readable
204238
kubeClient := setup.kubeClient
205239
faasClient := setup.faasClient
206240
kubeInformerFactory := setup.kubeInformerFactory
@@ -214,8 +248,10 @@ func runOperator(setup serverSetup, cfg config.BootstrapConfig) {
214248
setupLogging()
215249
// set up signals so we handle the first shutdown signal gracefully
216250
stopCh := signals.SetupSignalHandler()
217-
endpointsInformer := kubeInformerFactory.Core().V1().Endpoints()
218-
deploymentInformer := kubeInformerFactory.Apps().V1().Deployments()
251+
// set up signals so we handle the first shutdown signal gracefully
252+
253+
operator := true
254+
listers := startInformers(setup, stopCh, operator)
219255

220256
ctrl := controller.NewController(
221257
kubeClient,
@@ -225,23 +261,7 @@ func runOperator(setup serverSetup, cfg config.BootstrapConfig) {
225261
factory,
226262
)
227263

228-
srv := server.New(faasClient, kubeClient, endpointsInformer, deploymentInformer, cfg.ClusterRole, cfg)
229-
230-
go faasInformerFactory.Start(stopCh)
231-
go kubeInformerFactory.Start(stopCh)
232-
go setup.profileInformerFactory.Start(stopCh)
233-
234-
// Any "Wait" calls need to be made, after the informers have been started
235-
start := time.Now()
236-
glog.Infof("Waiting for cache sync in main")
237-
kubeInformerFactory.WaitForCacheSync(stopCh)
238-
setup.profileInformerFactory.WaitForCacheSync(stopCh)
239-
240-
// Block and wait for the endpoints to become synchronised
241-
cache.WaitForCacheSync(stopCh, endpointsInformer.Informer().HasSynced)
242-
243-
glog.Infof("Cache sync done in: %fs", time.Since(start).Seconds())
244-
glog.Infof("Endpoints synced? %v", endpointsInformer.Informer().HasSynced())
264+
srv := server.New(faasClient, kubeClient, listers.EndpointsInformer, listers.DeploymentInformer.Lister(), cfg.ClusterRole, cfg)
245265

246266
go srv.Start()
247267
if err := ctrl.Run(1, stopCh); err != nil {

pkg/server/server.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,13 @@ import (
1212
"github.com/openfaas/faas-netes/pkg/k8s"
1313
faasnetesk8s "github.com/openfaas/faas-netes/pkg/k8s"
1414
bootstrap "github.com/openfaas/faas-provider"
15+
v1apps "k8s.io/client-go/listers/apps/v1"
1516

1617
"github.com/openfaas/faas-provider/logs"
1718
"github.com/openfaas/faas-provider/proxy"
1819
"github.com/openfaas/faas-provider/types"
1920
"github.com/prometheus/client_golang/prometheus/promhttp"
2021

21-
appsinformer "k8s.io/client-go/informers/apps/v1"
2222
coreinformer "k8s.io/client-go/informers/core/v1"
2323
"k8s.io/client-go/kubernetes"
2424
glog "k8s.io/klog"
@@ -34,7 +34,7 @@ const defaultWriteTimeout = 8
3434
func New(client clientset.Interface,
3535
kube kubernetes.Interface,
3636
endpointsInformer coreinformer.EndpointsInformer,
37-
deploymentsInformer appsinformer.DeploymentInformer,
37+
deploymentLister v1apps.DeploymentLister,
3838
clusterRole bool,
3939
cfg config.BootstrapConfig) *Server {
4040

@@ -51,7 +51,6 @@ func New(client clientset.Interface,
5151
lister := endpointsInformer.Lister()
5252
functionLookup := k8s.NewFunctionLookup(functionNamespace, lister)
5353

54-
deploymentLister := deploymentsInformer.Lister()
5554
bootstrapConfig := types.FaaSConfig{
5655
ReadTimeout: cfg.FaaSConfig.ReadTimeout,
5756
WriteTimeout: cfg.FaaSConfig.WriteTimeout,

0 commit comments

Comments
 (0)