Skip to content

Commit 99de6ad

Browse files
authored
Merge pull request kubernetes-sigs#239 from droot/metrics/reconciles_total
✨ added reconciles_total metric
2 parents b497fd5 + 192a7d8 commit 99de6ad

File tree

3 files changed

+144
-7
lines changed

3 files changed

+144
-7
lines changed

pkg/internal/controller/controller.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,13 +216,15 @@ func (c *Controller) processNextWorkItem() bool {
216216
c.Queue.AddRateLimited(req)
217217
log.Error(err, "Reconciler error", "controller", c.Name, "request", req)
218218
ctrlmetrics.ReconcileErrors.WithLabelValues(c.Name).Inc()
219-
219+
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, "error").Inc()
220220
return false
221221
} else if result.RequeueAfter > 0 {
222222
c.Queue.AddAfter(req, result.RequeueAfter)
223+
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, "requeue_after").Inc()
223224
return true
224225
} else if result.Requeue {
225226
c.Queue.AddRateLimited(req)
227+
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, "requeue").Inc()
226228
return true
227229
}
228230

@@ -233,6 +235,7 @@ func (c *Controller) processNextWorkItem() bool {
233235
// TODO(directxman12): What does 1 mean? Do we want level constants? Do we want levels at all?
234236
log.V(1).Info("Successfully Reconciled", "controller", c.Name, "request", req)
235237

238+
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, "success").Inc()
236239
// Return true, don't take a break
237240
return true
238241
}

pkg/internal/controller/controller_test.go

Lines changed: 127 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -408,14 +408,138 @@ var _ = Describe("controller", func() {
408408
// TODO(community): write this test
409409
})
410410

411+
Context("prometheus metric reconcile_total", func() {
412+
var reconcileTotal dto.Metric
413+
414+
BeforeEach(func() {
415+
ctrlmetrics.ReconcileTotal.Reset()
416+
reconcileTotal.Reset()
417+
})
418+
419+
It("should get updated on successful reconciliation", func(done Done) {
420+
Expect(func() error {
421+
ctrlmetrics.ReconcileTotal.WithLabelValues(ctrl.Name, "success").Write(&reconcileTotal)
422+
if reconcileTotal.GetCounter().GetValue() != 0.0 {
423+
return fmt.Errorf("metric reconcile total not reset")
424+
}
425+
return nil
426+
}()).Should(Succeed())
427+
428+
go func() {
429+
defer GinkgoRecover()
430+
Expect(ctrl.Start(stop)).NotTo(HaveOccurred())
431+
}()
432+
By("Invoking Reconciler which will succeed")
433+
ctrl.Queue.Add(request)
434+
435+
Expect(<-reconciled).To(Equal(request))
436+
Eventually(func() error {
437+
ctrlmetrics.ReconcileTotal.WithLabelValues(ctrl.Name, "success").Write(&reconcileTotal)
438+
if actual := reconcileTotal.GetCounter().GetValue(); actual != 1.0 {
439+
return fmt.Errorf("metric reconcile total expected: %v and got: %v", 1.0, actual)
440+
}
441+
return nil
442+
}, 2.0).Should(Succeed())
443+
444+
close(done)
445+
}, 2.0)
446+
447+
It("should get updated on reconcile errors", func(done Done) {
448+
Expect(func() error {
449+
ctrlmetrics.ReconcileTotal.WithLabelValues(ctrl.Name, "error").Write(&reconcileTotal)
450+
if reconcileTotal.GetCounter().GetValue() != 0.0 {
451+
return fmt.Errorf("metric reconcile total not reset")
452+
}
453+
return nil
454+
}()).Should(Succeed())
455+
456+
fakeReconcile.Err = fmt.Errorf("expected error: reconcile")
457+
go func() {
458+
defer GinkgoRecover()
459+
Expect(ctrl.Start(stop)).NotTo(HaveOccurred())
460+
}()
461+
By("Invoking Reconciler which will give an error")
462+
ctrl.Queue.Add(request)
463+
464+
Expect(<-reconciled).To(Equal(request))
465+
Eventually(func() error {
466+
ctrlmetrics.ReconcileTotal.WithLabelValues(ctrl.Name, "error").Write(&reconcileTotal)
467+
if actual := reconcileTotal.GetCounter().GetValue(); actual != 1.0 {
468+
return fmt.Errorf("metric reconcile total expected: %v and got: %v", 1.0, actual)
469+
}
470+
return nil
471+
}, 2.0).Should(Succeed())
472+
473+
close(done)
474+
}, 2.0)
475+
476+
It("should get updated when reconcile returns with retry enabled", func(done Done) {
477+
Expect(func() error {
478+
ctrlmetrics.ReconcileTotal.WithLabelValues(ctrl.Name, "retry").Write(&reconcileTotal)
479+
if reconcileTotal.GetCounter().GetValue() != 0.0 {
480+
return fmt.Errorf("metric reconcile total not reset")
481+
}
482+
return nil
483+
}()).Should(Succeed())
484+
485+
fakeReconcile.Result.Requeue = true
486+
go func() {
487+
defer GinkgoRecover()
488+
Expect(ctrl.Start(stop)).NotTo(HaveOccurred())
489+
}()
490+
By("Invoking Reconciler which will return result with Requeue enabled")
491+
ctrl.Queue.Add(request)
492+
493+
Expect(<-reconciled).To(Equal(request))
494+
Eventually(func() error {
495+
ctrlmetrics.ReconcileTotal.WithLabelValues(ctrl.Name, "requeue").Write(&reconcileTotal)
496+
if actual := reconcileTotal.GetCounter().GetValue(); actual != 1.0 {
497+
return fmt.Errorf("metric reconcile total expected: %v and got: %v", 1.0, actual)
498+
}
499+
return nil
500+
}, 2.0).Should(Succeed())
501+
502+
close(done)
503+
}, 2.0)
504+
505+
It("should get updated when reconcile returns with retryAfter enabled", func(done Done) {
506+
Expect(func() error {
507+
ctrlmetrics.ReconcileTotal.WithLabelValues(ctrl.Name, "retry_after").Write(&reconcileTotal)
508+
if reconcileTotal.GetCounter().GetValue() != 0.0 {
509+
return fmt.Errorf("metric reconcile total not reset")
510+
}
511+
return nil
512+
}()).Should(Succeed())
513+
514+
fakeReconcile.Result.RequeueAfter = 5 * time.Hour
515+
go func() {
516+
defer GinkgoRecover()
517+
Expect(ctrl.Start(stop)).NotTo(HaveOccurred())
518+
}()
519+
By("Invoking Reconciler which will return result with requeueAfter enabled")
520+
ctrl.Queue.Add(request)
521+
522+
Expect(<-reconciled).To(Equal(request))
523+
Eventually(func() error {
524+
ctrlmetrics.ReconcileTotal.WithLabelValues(ctrl.Name, "requeue_after").Write(&reconcileTotal)
525+
if actual := reconcileTotal.GetCounter().GetValue(); actual != 1.0 {
526+
return fmt.Errorf("metric reconcile total expected: %v and got: %v", 1.0, actual)
527+
}
528+
return nil
529+
}, 2.0).Should(Succeed())
530+
531+
close(done)
532+
}, 2.0)
533+
})
534+
411535
Context("should update prometheus metrics", func() {
412536
It("should requeue a Request if there is an error and continue processing items", func(done Done) {
413537
var queueLength, reconcileErrs dto.Metric
414538
ctrlmetrics.QueueLength.Reset()
415539
Expect(func() error {
416540
ctrlmetrics.QueueLength.WithLabelValues(ctrl.Name).Write(&queueLength)
417541
if queueLength.GetGauge().GetValue() != 0.0 {
418-
return fmt.Errorf("metrics not reset")
542+
return fmt.Errorf("metric queue length not reset")
419543
}
420544
return nil
421545
}()).Should(Succeed())
@@ -424,7 +548,7 @@ var _ = Describe("controller", func() {
424548
Expect(func() error {
425549
ctrlmetrics.ReconcileErrors.WithLabelValues(ctrl.Name).Write(&reconcileErrs)
426550
if reconcileErrs.GetCounter().GetValue() != 0.0 {
427-
return fmt.Errorf("metrics not reset")
551+
return fmt.Errorf("metric reconcile errors not reset")
428552
}
429553
return nil
430554
}()).Should(Succeed())
@@ -444,7 +568,7 @@ var _ = Describe("controller", func() {
444568
Eventually(func() error {
445569
ctrlmetrics.QueueLength.WithLabelValues(ctrl.Name).Write(&queueLength)
446570
if queueLength.GetGauge().GetValue() != 1.0 {
447-
return fmt.Errorf("metrics not updated")
571+
return fmt.Errorf("metric queue length not updated")
448572
}
449573
return nil
450574
}, 2.0).Should(Succeed())

pkg/internal/controller/metrics/metrics.go

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,24 +29,34 @@ var (
2929
Help: "Length of reconcile queue per controller",
3030
}, []string{"controller"})
3131

32+
// ReconcileTotal is a prometheus counter metrics which holds the total
33+
// number of reconciliations per controller. It has two labels. controller label refers
34+
// to the controller name and result label refers to the reconcile result i.e
35+
// success, error, requeue, requeue_after
36+
ReconcileTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
37+
Name: "controller_runtime_reconcile_total",
38+
Help: "Total number of reconciliations per controller",
39+
}, []string{"controller", "result"})
40+
3241
// ReconcileErrors is a prometheus counter metrics which holds the total
3342
// number of errors from the Reconciler
3443
ReconcileErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
3544
Name: "controller_runtime_reconcile_errors_total",
36-
Help: "Total number of reconcile errors per controller",
45+
Help: "Total number of reconciliation errors per controller",
3746
}, []string{"controller"})
3847

3948
// ReconcileTime is a prometheus metric which keeps track of the duration
40-
// of reconciles
49+
// of reconciliations
4150
ReconcileTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
4251
Name: "controller_runtime_reconcile_time_seconds",
43-
Help: "Length of time per reconcile per controller",
52+
Help: "Length of time per reconciliation per controller",
4453
}, []string{"controller"})
4554
)
4655

4756
func init() {
4857
metrics.Registry.MustRegister(
4958
QueueLength,
59+
ReconcileTotal,
5060
ReconcileErrors,
5161
ReconcileTime,
5262
// expose process metrics like CPU, Memory, file descriptor usage etc.

0 commit comments

Comments
 (0)