Parameterization for downscale, upscale, and tolerance with backsolving

unit test for visible testing of tolerance algorithm expectations. Logs for dScale, uScale, tol creation.
2015-12-07 15:02:39 -05:00 · 2015-12-07 15:02:39 -05:00 · 5161d169ba
parent dc4bc2a566
commit 5161d169ba
3 changed files with 104 additions and 24 deletions
--- a/cmd/kube-controller-manager/app/controllermanager.go
+++ b/cmd/kube-controller-manager/app/controllermanager.go
@ -362,7 +362,11 @@ func (s *CMServer) Run(_ []string) error {
 				metrics.DefaultHeapsterService,
 				metrics.DefaultHeapsterPort,
 			)
-			podautoscaler.NewHorizontalController(hpaClient, metricsClient).
+			// TODO parameterize tolerance/downscale/upscale options.
+			tolerance := 1.0
+			downScale := time.Duration(5) * time.Second
+			upScale := time.Duration(3) * time.Second
+			podautoscaler.NewHorizontalController(kubeClient, metricsClient, tolerance, downScale, upScale).
 				Run(s.HorizontalPodAutoscalerSyncPeriod)
 		}

--- a/pkg/controller/podautoscaler/horizontal.go
+++ b/pkg/controller/podautoscaler/horizontal.go
@ -31,30 +31,40 @@ import (
 	"k8s.io/kubernetes/pkg/util"
 )

-const (
-	// Usage shoud exceed the tolerance before we start downscale or upscale the pods.
-	// TODO: make it a flag or HPA spec element.
-	tolerance = 0.1
-)
-
 type HorizontalController struct {
-	client        client.Interface
-	metricsClient metrics.MetricsClient
-	eventRecorder record.EventRecorder
+	client                   client.Interface
+	metricsClient            metrics.MetricsClient
+	eventRecorder            record.EventRecorder
+	tolerance                float64
+	downscaleForbiddenWindow time.Duration
+	upscaleForbiddenWindow   time.Duration
 }

-var downscaleForbiddenWindow = 5 * time.Minute
-var upscaleForbiddenWindow = 3 * time.Minute
-
-func NewHorizontalController(client client.Interface, metricsClient metrics.MetricsClient) *HorizontalController {
+func NewHorizontalController(client client.Interface, metricsClient metrics.MetricsClient, tol float64, dScale, uScale time.Duration) *HorizontalController {
 	broadcaster := record.NewBroadcaster()
 	broadcaster.StartRecordingToSink(client.Events(""))
 	recorder := broadcaster.NewRecorder(api.EventSource{Component: "horizontal-pod-autoscaler"})

+	if tol < 0 || tol > 1 {
+		glog.Warningf("Invalid tolerance provided %v using default.", tol)
+		tol = .1
+	}
+	if uScale == 0*time.Second {
+		glog.Warningf("Invalid upscale value provided, %v using default.", uScale)
+		uScale = 3 * time.Minute
+	}
+	if dScale == 0*time.Second {
+		glog.Warningf("Invalid downscale value provided, %v using default.", dScale)
+		dScale = 5 * time.Minute
+	}
+	glog.V(2).Infof("Created Horizontal Controller with downscale %v, upscale %v, and tolerance %v", tol, uScale, dScale)
 	return &HorizontalController{
-		client:        client,
-		metricsClient: metricsClient,
-		eventRecorder: recorder,
+		client:                   client,
+		metricsClient:            metricsClient,
+		eventRecorder:            recorder,
+		tolerance:                tol,
+		downscaleForbiddenWindow: dScale,
+		upscaleForbiddenWindow:   uScale,
 	}
 }

@ -83,7 +93,7 @@ func (a *HorizontalController) computeReplicasForCPUUtilization(hpa extensions.H
 	}

 	usageRatio := float64(*currentUtilization) / float64(hpa.Spec.CPUUtilization.TargetPercentage)
-	if math.Abs(1.0-usageRatio) > tolerance {
+	if math.Abs(1.0-usageRatio) > a.tolerance {
 		return int(math.Ceil(usageRatio * float64(currentReplicas))), currentUtilization, timestamp, nil
 	} else {
 		return currentReplicas, currentUtilization, timestamp, nil
@ -125,7 +135,7 @@ func (a *HorizontalController) reconcileAutoscaler(hpa extensions.HorizontalPodA
 		// and there was no rescaling in the last downscaleForbiddenWindow.
 		if desiredReplicas < currentReplicas &&
 			(hpa.Status.LastScaleTime == nil ||
-				hpa.Status.LastScaleTime.Add(downscaleForbiddenWindow).Before(timestamp)) {
+				hpa.Status.LastScaleTime.Add(a.downscaleForbiddenWindow).Before(timestamp)) {
 			rescale = true
 		}

@ -133,7 +143,7 @@ func (a *HorizontalController) reconcileAutoscaler(hpa extensions.HorizontalPodA
 		// and there was no rescaling in the last upscaleForbiddenWindow.
 		if desiredReplicas > currentReplicas &&
 			(hpa.Status.LastScaleTime == nil ||
-				hpa.Status.LastScaleTime.Add(upscaleForbiddenWindow).Before(timestamp)) {
+				hpa.Status.LastScaleTime.Add(a.upscaleForbiddenWindow).Before(timestamp)) {
 			rescale = true
 		}
 	}
--- a/pkg/controller/podautoscaler/horizontal_test.go
+++ b/pkg/controller/podautoscaler/horizontal_test.go
@ -20,6 +20,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
+	"math"
 	"testing"
 	"time"

@ -32,9 +33,14 @@ import (
 	"k8s.io/kubernetes/pkg/controller/podautoscaler/metrics"
 	"k8s.io/kubernetes/pkg/runtime"

-	heapster "k8s.io/heapster/api/v1/types"
-
+	glog "github.com/golang/glog"
 	"github.com/stretchr/testify/assert"
+	heapster "k8s.io/heapster/api/v1/types"
+)
+
+// unit tests need tolerance awareness to calibrate.
+const (
+	tolerance = .1
 )

 func (w fakeResponseWrapper) DoRaw() ([]byte, error) {
@ -206,7 +212,7 @@ func (tc *testCase) verifyResults(t *testing.T) {
 func (tc *testCase) runTest(t *testing.T) {
 	testClient := tc.prepareTestClient(t)
 	metricsClient := metrics.NewHeapsterMetricsClient(testClient, metrics.DefaultHeapsterNamespace, metrics.DefaultHeapsterScheme, metrics.DefaultHeapsterService, metrics.DefaultHeapsterPort)
-	hpaController := NewHorizontalController(testClient, metricsClient)
+	hpaController := NewHorizontalController(testClient, metricsClient, tolerance, time.Second, time.Second)
 	err := hpaController.reconcileAutoscalers()
 	assert.Equal(t, nil, err)
 	if tc.verifyEvents {
@ -360,4 +366,64 @@ func TestEventNotCreated(t *testing.T) {
 	tc.runTest(t)
 }

-// TODO: add more tests
+// TestComputedToleranceAlgImplementation is a regression test which
+// back-calculates a minimal percentage for downscaling based on a small percentage
+// increase in pod utilization which is calibrated against the tolerance value.
+func TestComputedToleranceAlgImplementation(t *testing.T) {
+
+	startPods := 10
+	// 150 mCPU per pod.
+	totalUsedCPUOfAllPods := uint64(startPods * 150)
+	// Each pod starts out asking for 2X what is really needed.
+	// This means we will have a 50% ratio of used/requested
+	totalRequestedCPUOfAllPods := 2 * totalUsedCPUOfAllPods
+	requestedToUsed := float64(totalRequestedCPUOfAllPods / totalUsedCPUOfAllPods)
+	// Spread the amount we ask over 10 pods.  We can add some jitter later in reportedLevels.
+	perPodRequested := int(totalRequestedCPUOfAllPods) / startPods
+
+	// Force a minimal scaling event by satisfying  (tolerance < 1 - resourcesUsedRatio).
+	target := math.Abs(1/(requestedToUsed*(1-tolerance))) + .01
+	finalCpuPercentTarget := int(target * 100)
+	resourcesUsedRatio := float64(totalUsedCPUOfAllPods) / float64(float64(totalRequestedCPUOfAllPods)*target)
+	// the autoscaler will compare this vs. tolearnce.  Lets calculate the usageRatio, which will be
+	// compared w tolerance.
+	usageRatioToleranceValue := float64(1 - resourcesUsedRatio)
+	// i.e. .60 * 20 -> scaled down expectation.
+	finalPods := math.Ceil(resourcesUsedRatio * float64(startPods))
+
+	glog.Infof("To breach tolerance %f we will create a utilization ratio difference of %f", tolerance, usageRatioToleranceValue)
+	tc := testCase{
+		minReplicas:     0,
+		maxReplicas:     1000,
+		initialReplicas: startPods,
+		desiredReplicas: int(finalPods),
+		CPUTarget:       finalCpuPercentTarget,
+		reportedLevels: []uint64{
+			totalUsedCPUOfAllPods / 10,
+			totalUsedCPUOfAllPods / 10,
+			totalUsedCPUOfAllPods / 10,
+			totalUsedCPUOfAllPods / 10,
+			totalUsedCPUOfAllPods / 10,
+			totalUsedCPUOfAllPods / 10,
+			totalUsedCPUOfAllPods / 10,
+			totalUsedCPUOfAllPods / 10,
+			totalUsedCPUOfAllPods / 10,
+			totalUsedCPUOfAllPods / 10,
+		},
+		reportedCPURequests: []resource.Quantity{
+			resource.MustParse(fmt.Sprint(perPodRequested+100) + "m"),
+			resource.MustParse(fmt.Sprint(perPodRequested-100) + "m"),
+			resource.MustParse(fmt.Sprint(perPodRequested+10) + "m"),
+			resource.MustParse(fmt.Sprint(perPodRequested-10) + "m"),
+			resource.MustParse(fmt.Sprint(perPodRequested+2) + "m"),
+			resource.MustParse(fmt.Sprint(perPodRequested-2) + "m"),
+			resource.MustParse(fmt.Sprint(perPodRequested+1) + "m"),
+			resource.MustParse(fmt.Sprint(perPodRequested-1) + "m"),
+			resource.MustParse(fmt.Sprint(perPodRequested) + "m"),
+			resource.MustParse(fmt.Sprint(perPodRequested) + "m"),
+		},
+	}
+	tc.runTest(t)
+}
+
+// TODO: add more tests, e.g., enforcement of upscal/downscale window.