From 392fc6668f5536af71950f7df56fe3f89256a66b Mon Sep 17 00:00:00 2001 From: Jan Chaloupka Date: Fri, 5 Feb 2016 16:27:06 +0100 Subject: [PATCH] Jittering periods of some kubelet's sync loops: - pod_workers: pod syncing - prober workers: container syncing In order to synchronize the current state of Kubernetes's objects (e.g. pods, containers, etc.), periodic synch loops are run. When there is a lot of objects to synchronize with, loops increase communication traffic. At some point when all the traffic interfere cpu usage curve hits the roof causing 100% cpu utilization. To distribute the traffic in time, some sync loops can jitter their period in each loop and help to flatten the curve. --- pkg/kubelet/pod_workers.go | 13 +++++++++++-- pkg/kubelet/prober/worker.go | 8 +++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/pkg/kubelet/pod_workers.go b/pkg/kubelet/pod_workers.go index 9e1f9efa60..15e7cf668b 100644 --- a/pkg/kubelet/pod_workers.go +++ b/pkg/kubelet/pod_workers.go @@ -28,6 +28,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/util/queue" "k8s.io/kubernetes/pkg/types" "k8s.io/kubernetes/pkg/util/runtime" + "k8s.io/kubernetes/pkg/util/wait" ) // PodWorkers is an abstract interface for testability. @@ -39,6 +40,14 @@ type PodWorkers interface { type syncPodFnType func(*api.Pod, *api.Pod, *kubecontainer.PodStatus, kubetypes.SyncPodType) error +const ( + // jitter factor for resyncInterval + workerResyncIntervalJitterFactor = 0.5 + + // jitter factor for backOffPeriod + workerBackOffPeriodJitterFactor = 0.5 +) + type podWorkers struct { // Protects all per worker fields. podLock sync.Mutex @@ -209,10 +218,10 @@ func (p *podWorkers) wrapUp(uid types.UID, syncErr error) { switch { case syncErr == nil: // No error; requeue at the regular resync interval. - p.workQueue.Enqueue(uid, p.resyncInterval) + p.workQueue.Enqueue(uid, wait.Jitter(p.resyncInterval, workerResyncIntervalJitterFactor)) default: // Error occurred during the sync; back off and then retry. - p.workQueue.Enqueue(uid, p.backOffPeriod) + p.workQueue.Enqueue(uid, wait.Jitter(p.backOffPeriod, workerBackOffPeriodJitterFactor)) } p.checkForUpdates(uid) } diff --git a/pkg/kubelet/prober/worker.go b/pkg/kubelet/prober/worker.go index 60f4c6d674..5e97cb4bee 100644 --- a/pkg/kubelet/prober/worker.go +++ b/pkg/kubelet/prober/worker.go @@ -17,6 +17,7 @@ limitations under the License. package prober import ( + "math/rand" "time" "github.com/golang/glog" @@ -93,7 +94,8 @@ func newWorker( // run periodically probes the container. func (w *worker) run() { - probeTicker := time.NewTicker(time.Duration(w.spec.PeriodSeconds) * time.Second) + probeTickerPeriod := time.Duration(w.spec.PeriodSeconds) * time.Second + probeTicker := time.NewTicker(probeTickerPeriod) defer func() { // Clean up. @@ -105,6 +107,10 @@ func (w *worker) run() { w.probeManager.removeWorker(w.pod.UID, w.container.Name, w.probeType) }() + // If kubelet restarted the probes could be started in rapid succession. + // Let the worker wait for a random portion of tickerPeriod before probing. + time.Sleep(time.Duration(rand.Float64() * float64(probeTickerPeriod))) + probeLoop: for w.doProbe() { // Wait for next probe tick.