From 5161d169baa5716c39be7fc7c9b070b3ec43933a Mon Sep 17 00:00:00 2001 From: Jay Vyas Date: Mon, 7 Dec 2015 15:02:39 -0500 Subject: [PATCH] Parameterization for downscale, upscale, and tolerance with backsolving unit test for visible testing of tolerance algorithm expectations. Logs for dScale, uScale, tol creation. --- .../app/controllermanager.go | 6 +- pkg/controller/podautoscaler/horizontal.go | 48 +++++++----- .../podautoscaler/horizontal_test.go | 74 ++++++++++++++++++- 3 files changed, 104 insertions(+), 24 deletions(-) diff --git a/cmd/kube-controller-manager/app/controllermanager.go b/cmd/kube-controller-manager/app/controllermanager.go index 8f9283856d..b36c824846 100644 --- a/cmd/kube-controller-manager/app/controllermanager.go +++ b/cmd/kube-controller-manager/app/controllermanager.go @@ -362,7 +362,11 @@ func (s *CMServer) Run(_ []string) error { metrics.DefaultHeapsterService, metrics.DefaultHeapsterPort, ) - podautoscaler.NewHorizontalController(hpaClient, metricsClient). + // TODO parameterize tolerance/downscale/upscale options. + tolerance := 1.0 + downScale := time.Duration(5) * time.Second + upScale := time.Duration(3) * time.Second + podautoscaler.NewHorizontalController(kubeClient, metricsClient, tolerance, downScale, upScale). Run(s.HorizontalPodAutoscalerSyncPeriod) } diff --git a/pkg/controller/podautoscaler/horizontal.go b/pkg/controller/podautoscaler/horizontal.go index 63219c1c32..29f34b9d1c 100644 --- a/pkg/controller/podautoscaler/horizontal.go +++ b/pkg/controller/podautoscaler/horizontal.go @@ -31,30 +31,40 @@ import ( "k8s.io/kubernetes/pkg/util" ) -const ( - // Usage shoud exceed the tolerance before we start downscale or upscale the pods. - // TODO: make it a flag or HPA spec element. - tolerance = 0.1 -) - type HorizontalController struct { - client client.Interface - metricsClient metrics.MetricsClient - eventRecorder record.EventRecorder + client client.Interface + metricsClient metrics.MetricsClient + eventRecorder record.EventRecorder + tolerance float64 + downscaleForbiddenWindow time.Duration + upscaleForbiddenWindow time.Duration } -var downscaleForbiddenWindow = 5 * time.Minute -var upscaleForbiddenWindow = 3 * time.Minute - -func NewHorizontalController(client client.Interface, metricsClient metrics.MetricsClient) *HorizontalController { +func NewHorizontalController(client client.Interface, metricsClient metrics.MetricsClient, tol float64, dScale, uScale time.Duration) *HorizontalController { broadcaster := record.NewBroadcaster() broadcaster.StartRecordingToSink(client.Events("")) recorder := broadcaster.NewRecorder(api.EventSource{Component: "horizontal-pod-autoscaler"}) + if tol < 0 || tol > 1 { + glog.Warningf("Invalid tolerance provided %v using default.", tol) + tol = .1 + } + if uScale == 0*time.Second { + glog.Warningf("Invalid upscale value provided, %v using default.", uScale) + uScale = 3 * time.Minute + } + if dScale == 0*time.Second { + glog.Warningf("Invalid downscale value provided, %v using default.", dScale) + dScale = 5 * time.Minute + } + glog.V(2).Infof("Created Horizontal Controller with downscale %v, upscale %v, and tolerance %v", tol, uScale, dScale) return &HorizontalController{ - client: client, - metricsClient: metricsClient, - eventRecorder: recorder, + client: client, + metricsClient: metricsClient, + eventRecorder: recorder, + tolerance: tol, + downscaleForbiddenWindow: dScale, + upscaleForbiddenWindow: uScale, } } @@ -83,7 +93,7 @@ func (a *HorizontalController) computeReplicasForCPUUtilization(hpa extensions.H } usageRatio := float64(*currentUtilization) / float64(hpa.Spec.CPUUtilization.TargetPercentage) - if math.Abs(1.0-usageRatio) > tolerance { + if math.Abs(1.0-usageRatio) > a.tolerance { return int(math.Ceil(usageRatio * float64(currentReplicas))), currentUtilization, timestamp, nil } else { return currentReplicas, currentUtilization, timestamp, nil @@ -125,7 +135,7 @@ func (a *HorizontalController) reconcileAutoscaler(hpa extensions.HorizontalPodA // and there was no rescaling in the last downscaleForbiddenWindow. if desiredReplicas < currentReplicas && (hpa.Status.LastScaleTime == nil || - hpa.Status.LastScaleTime.Add(downscaleForbiddenWindow).Before(timestamp)) { + hpa.Status.LastScaleTime.Add(a.downscaleForbiddenWindow).Before(timestamp)) { rescale = true } @@ -133,7 +143,7 @@ func (a *HorizontalController) reconcileAutoscaler(hpa extensions.HorizontalPodA // and there was no rescaling in the last upscaleForbiddenWindow. if desiredReplicas > currentReplicas && (hpa.Status.LastScaleTime == nil || - hpa.Status.LastScaleTime.Add(upscaleForbiddenWindow).Before(timestamp)) { + hpa.Status.LastScaleTime.Add(a.upscaleForbiddenWindow).Before(timestamp)) { rescale = true } } diff --git a/pkg/controller/podautoscaler/horizontal_test.go b/pkg/controller/podautoscaler/horizontal_test.go index 03334e7f50..680cea232f 100644 --- a/pkg/controller/podautoscaler/horizontal_test.go +++ b/pkg/controller/podautoscaler/horizontal_test.go @@ -20,6 +20,7 @@ import ( "encoding/json" "fmt" "io" + "math" "testing" "time" @@ -32,9 +33,14 @@ import ( "k8s.io/kubernetes/pkg/controller/podautoscaler/metrics" "k8s.io/kubernetes/pkg/runtime" - heapster "k8s.io/heapster/api/v1/types" - + glog "github.com/golang/glog" "github.com/stretchr/testify/assert" + heapster "k8s.io/heapster/api/v1/types" +) + +// unit tests need tolerance awareness to calibrate. +const ( + tolerance = .1 ) func (w fakeResponseWrapper) DoRaw() ([]byte, error) { @@ -206,7 +212,7 @@ func (tc *testCase) verifyResults(t *testing.T) { func (tc *testCase) runTest(t *testing.T) { testClient := tc.prepareTestClient(t) metricsClient := metrics.NewHeapsterMetricsClient(testClient, metrics.DefaultHeapsterNamespace, metrics.DefaultHeapsterScheme, metrics.DefaultHeapsterService, metrics.DefaultHeapsterPort) - hpaController := NewHorizontalController(testClient, metricsClient) + hpaController := NewHorizontalController(testClient, metricsClient, tolerance, time.Second, time.Second) err := hpaController.reconcileAutoscalers() assert.Equal(t, nil, err) if tc.verifyEvents { @@ -360,4 +366,64 @@ func TestEventNotCreated(t *testing.T) { tc.runTest(t) } -// TODO: add more tests +// TestComputedToleranceAlgImplementation is a regression test which +// back-calculates a minimal percentage for downscaling based on a small percentage +// increase in pod utilization which is calibrated against the tolerance value. +func TestComputedToleranceAlgImplementation(t *testing.T) { + + startPods := 10 + // 150 mCPU per pod. + totalUsedCPUOfAllPods := uint64(startPods * 150) + // Each pod starts out asking for 2X what is really needed. + // This means we will have a 50% ratio of used/requested + totalRequestedCPUOfAllPods := 2 * totalUsedCPUOfAllPods + requestedToUsed := float64(totalRequestedCPUOfAllPods / totalUsedCPUOfAllPods) + // Spread the amount we ask over 10 pods. We can add some jitter later in reportedLevels. + perPodRequested := int(totalRequestedCPUOfAllPods) / startPods + + // Force a minimal scaling event by satisfying (tolerance < 1 - resourcesUsedRatio). + target := math.Abs(1/(requestedToUsed*(1-tolerance))) + .01 + finalCpuPercentTarget := int(target * 100) + resourcesUsedRatio := float64(totalUsedCPUOfAllPods) / float64(float64(totalRequestedCPUOfAllPods)*target) + // the autoscaler will compare this vs. tolearnce. Lets calculate the usageRatio, which will be + // compared w tolerance. + usageRatioToleranceValue := float64(1 - resourcesUsedRatio) + // i.e. .60 * 20 -> scaled down expectation. + finalPods := math.Ceil(resourcesUsedRatio * float64(startPods)) + + glog.Infof("To breach tolerance %f we will create a utilization ratio difference of %f", tolerance, usageRatioToleranceValue) + tc := testCase{ + minReplicas: 0, + maxReplicas: 1000, + initialReplicas: startPods, + desiredReplicas: int(finalPods), + CPUTarget: finalCpuPercentTarget, + reportedLevels: []uint64{ + totalUsedCPUOfAllPods / 10, + totalUsedCPUOfAllPods / 10, + totalUsedCPUOfAllPods / 10, + totalUsedCPUOfAllPods / 10, + totalUsedCPUOfAllPods / 10, + totalUsedCPUOfAllPods / 10, + totalUsedCPUOfAllPods / 10, + totalUsedCPUOfAllPods / 10, + totalUsedCPUOfAllPods / 10, + totalUsedCPUOfAllPods / 10, + }, + reportedCPURequests: []resource.Quantity{ + resource.MustParse(fmt.Sprint(perPodRequested+100) + "m"), + resource.MustParse(fmt.Sprint(perPodRequested-100) + "m"), + resource.MustParse(fmt.Sprint(perPodRequested+10) + "m"), + resource.MustParse(fmt.Sprint(perPodRequested-10) + "m"), + resource.MustParse(fmt.Sprint(perPodRequested+2) + "m"), + resource.MustParse(fmt.Sprint(perPodRequested-2) + "m"), + resource.MustParse(fmt.Sprint(perPodRequested+1) + "m"), + resource.MustParse(fmt.Sprint(perPodRequested-1) + "m"), + resource.MustParse(fmt.Sprint(perPodRequested) + "m"), + resource.MustParse(fmt.Sprint(perPodRequested) + "m"), + }, + } + tc.runTest(t) +} + +// TODO: add more tests, e.g., enforcement of upscal/downscale window.