k3s/test/e2e/cluster_size_autoscaling.go

/*
Copyright 2015 The Kubernetes Authors All rights reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package e2e

import (
	"fmt"
	"os/exec"
	"time"

	"k8s.io/kubernetes/pkg/api"

	. "github.com/onsi/ginkgo"
	. "github.com/onsi/gomega"
)

const (
	scaleUpTimeout   = 20 * time.Minute
	scaleDownTimeout = 30 * time.Minute
)

// [Feature:ClusterSizeAutoscaling]: Cluster size autoscaling is experimental
// and require Google Cloud Monitoring to be enabled, so these tests are not
// run by default.
//
// These tests take ~20 minutes to run each.
var _ = Describe("Cluster size autoscaling [Feature:ClusterSizeAutoscaling] [Slow]", func() {
	f := NewDefaultFramework("autoscaling")
	var nodeCount int
	var coresPerNode int
	var memCapacityMb int

	BeforeEach(func() {
		SkipUnlessProviderIs("gce")

		nodes := ListSchedulableNodesOrDie(f.Client)
		nodeCount = len(nodes.Items)
		Expect(nodeCount).NotTo(BeZero())
		cpu := nodes.Items[0].Status.Capacity[api.ResourceCPU]
		mem := nodes.Items[0].Status.Capacity[api.ResourceMemory]
		coresPerNode = int((&cpu).MilliValue() / 1000)
		memCapacityMb = int((&mem).Value() / 1024 / 1024)
	})

	AfterEach(func() {
		cleanUpAutoscaler()
	})

	It("Should scale cluster size based on cpu utilization", func() {
		setUpAutoscaler("cpu/node_utilization", 0.4, nodeCount, nodeCount+1)

		// Consume 50% CPU
		rcs := createConsumingRCs(f, "cpu-utilization", nodeCount*coresPerNode, 500, 0)
		err := waitForClusterSize(f.Client, nodeCount+1, scaleUpTimeout)
		for _, rc := range rcs {
			rc.CleanUp()
		}
		expectNoError(err)

		expectNoError(waitForClusterSize(f.Client, nodeCount, scaleDownTimeout))
	})

	It("Should scale cluster size based on cpu reservation", func() {
		setUpAutoscaler("cpu/node_reservation", 0.5, nodeCount, nodeCount+1)

		ReserveCpu(f, "cpu-reservation", 600*nodeCount*coresPerNode)
		expectNoError(waitForClusterSize(f.Client, nodeCount+1, scaleUpTimeout))

		expectNoError(DeleteRC(f.Client, f.Namespace.Name, "cpu-reservation"))
		expectNoError(waitForClusterSize(f.Client, nodeCount, scaleDownTimeout))
	})

	It("Should scale cluster size based on memory utilization", func() {
		setUpAutoscaler("memory/node_utilization", 0.6, nodeCount, nodeCount+1)

		// Consume 60% of total memory capacity
		megabytesPerReplica := int(memCapacityMb * 6 / 10 / coresPerNode)
		rcs := createConsumingRCs(f, "mem-utilization", nodeCount*coresPerNode, 0, megabytesPerReplica)
		err := waitForClusterSize(f.Client, nodeCount+1, scaleUpTimeout)
		for _, rc := range rcs {
			rc.CleanUp()
		}
		expectNoError(err)

		expectNoError(waitForClusterSize(f.Client, nodeCount, scaleDownTimeout))
	})

	It("Should scale cluster size based on memory reservation", func() {
		setUpAutoscaler("memory/node_reservation", 0.5, nodeCount, nodeCount+1)

		ReserveMemory(f, "memory-reservation", nodeCount*memCapacityMb*6/10)
		expectNoError(waitForClusterSize(f.Client, nodeCount+1, scaleUpTimeout))

		expectNoError(DeleteRC(f.Client, f.Namespace.Name, "memory-reservation"))
		expectNoError(waitForClusterSize(f.Client, nodeCount, scaleDownTimeout))
	})
})

func setUpAutoscaler(metric string, target float64, min, max int) {
	// TODO integrate with kube-up.sh script once it will support autoscaler setup.
	By("Setting up autoscaler to scale based on " + metric)
	out, err := exec.Command("gcloud", "compute", "instance-groups", "managed", "set-autoscaling",
		testContext.CloudConfig.NodeInstanceGroup,
		"--project="+testContext.CloudConfig.ProjectID,
		"--zone="+testContext.CloudConfig.Zone,
		"--custom-metric-utilization=metric=custom.cloudmonitoring.googleapis.com/kubernetes.io/"+metric+fmt.Sprintf(",utilization-target=%v", target)+",utilization-target-type=GAUGE",
		fmt.Sprintf("--min-num-replicas=%v", min),
		fmt.Sprintf("--max-num-replicas=%v", max),
	).CombinedOutput()
	expectNoError(err, "Output: "+string(out))
}

func createConsumingRCs(f *Framework, name string, count, cpuPerReplica, memPerReplica int) []*ResourceConsumer {
	var res []*ResourceConsumer
	for i := 1; i <= count; i++ {
		name := fmt.Sprintf("%s-%d", name, i)
		res = append(res, NewStaticResourceConsumer(name, 1, cpuPerReplica, memPerReplica, 0, int64(cpuPerReplica), int64(memPerReplica+100), f))
	}
	return res
}

func cleanUpAutoscaler() {
	By("Removing autoscaler")
	out, err := exec.Command("gcloud", "compute", "instance-groups", "managed", "stop-autoscaling",
		testContext.CloudConfig.NodeInstanceGroup,
		"--project="+testContext.CloudConfig.ProjectID,
		"--zone="+testContext.CloudConfig.Zone,
	).CombinedOutput()
	expectNoError(err, "Output: "+string(out))
}

func ReserveCpu(f *Framework, id string, millicores int) {
	By(fmt.Sprintf("Running RC which reserves %v millicores", millicores))
	config := &RCConfig{
		Client:     f.Client,
		Name:       id,
		Namespace:  f.Namespace.Name,
		Timeout:    10 * time.Minute,
		Image:      "gcr.io/google_containers/pause:2.0",
		Replicas:   millicores / 100,
		CpuRequest: 100,
	}
	expectNoError(RunRC(*config))
}

func ReserveMemory(f *Framework, id string, megabytes int) {
	By(fmt.Sprintf("Running RC which reserves %v MB of memory", megabytes))
	config := &RCConfig{
		Client:     f.Client,
		Name:       id,
		Namespace:  f.Namespace.Name,
		Timeout:    10 * time.Minute,
		Image:      "gcr.io/google_containers/pause:2.0",
		Replicas:   megabytes / 500,
		MemRequest: 500 * 1024 * 1024,
	}
	expectNoError(RunRC(*config))
}
Added cluster size autoscaling e2e test 2015-07-21 14:15:55 +00:00			`/*`
			`Copyright 2015 The Kubernetes Authors All rights reserved.`

			`Licensed under the Apache License, Version 2.0 (the "License");`
			`you may not use this file except in compliance with the License.`
			`You may obtain a copy of the License at`

			`http://www.apache.org/licenses/LICENSE-2.0`

			`Unless required by applicable law or agreed to in writing, software`
			`distributed under the License is distributed on an "AS IS" BASIS,`
			`WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`See the License for the specific language governing permissions and`
			`limitations under the License.`
			`*/`

			`package e2e`

			`import (`
			`"fmt"`
			`"os/exec"`
			`"time"`

Enabled Autoscaling e2e test for cpu utilization 2015-08-14 09:50:19 +00:00			`"k8s.io/kubernetes/pkg/api"`

Added cluster size autoscaling e2e test 2015-07-21 14:15:55 +00:00			`. "github.com/onsi/ginkgo"`
Enabled Autoscaling e2e test for cpu utilization 2015-08-14 09:50:19 +00:00			`. "github.com/onsi/gomega"`
Added cluster size autoscaling e2e test 2015-07-21 14:15:55 +00:00			`)`

Fixed flaky cluster size autoscaling e2e. 2015-10-07 09:15:58 +00:00			`const (`
			`scaleUpTimeout = 20 * time.Minute`
			`scaleDownTimeout = 30 * time.Minute`
			`)`

Move HPA e2es into the default slow suite, and add [Feature:ClusterSizeAutoscaling] and [Feature:InitialResources] 2016-01-28 16:56:27 +00:00			`// [Feature:ClusterSizeAutoscaling]: Cluster size autoscaling is experimental`
			`// and require Google Cloud Monitoring to be enabled, so these tests are not`
			`// run by default.`
			`//`
			`// These tests take ~20 minutes to run each.`
			`var _ = Describe("Cluster size autoscaling [Feature:ClusterSizeAutoscaling] [Slow]", func() {`
Add an option to pass client's QPS/burst to e2e framework 2016-02-24 15:24:36 +00:00			`f := NewDefaultFramework("autoscaling")`
Enabled Autoscaling e2e test for cpu utilization 2015-08-14 09:50:19 +00:00			`var nodeCount int`
			`var coresPerNode int`
Enabled Autoscaling test that uses mem utilization metric 2015-08-19 13:05:13 +00:00			`var memCapacityMb int`
Added cluster size autoscaling e2e test 2015-07-21 14:15:55 +00:00
			`BeforeEach(func() {`
Enabled Autoscaling e2e test for cpu utilization 2015-08-14 09:50:19 +00:00			`SkipUnlessProviderIs("gce")`

Update e2e tests to list only schedulable nodes, to make them work with master Node registered. 2015-12-10 14:35:58 +00:00			`nodes := ListSchedulableNodesOrDie(f.Client)`
Enabled Autoscaling e2e test for cpu utilization 2015-08-14 09:50:19 +00:00			`nodeCount = len(nodes.Items)`
			`Expect(nodeCount).NotTo(BeZero())`
Enabled Autoscaling test that uses mem utilization metric 2015-08-19 13:05:13 +00:00			`cpu := nodes.Items[0].Status.Capacity[api.ResourceCPU]`
			`mem := nodes.Items[0].Status.Capacity[api.ResourceMemory]`
			`coresPerNode = int((&cpu).MilliValue() / 1000)`
			`memCapacityMb = int((&mem).Value() / 1024 / 1024)`
Added cluster size autoscaling e2e test 2015-07-21 14:15:55 +00:00			`})`

			`AfterEach(func() {`
			`cleanUpAutoscaler()`
			`})`

Cleanup e2e tags - Remove "Suite" from e2e tag names - Move grouping e2e tags to the front of the test, describe or context name - Move Conformance e2e tags to the end of test names (test specific) - Move Skipped e2e tags to the end of names, but to as high a context as applicable 2015-11-12 22:30:06 +00:00			`It("Should scale cluster size based on cpu utilization", func() {`
Fixed flakiness in autoscaling e2e with no scheduling pods 2015-09-29 13:52:30 +00:00			`setUpAutoscaler("cpu/node_utilization", 0.4, nodeCount, nodeCount+1)`
Added cluster size autoscaling e2e test 2015-07-21 14:15:55 +00:00
Fixed flakiness in autoscaling e2e with no scheduling pods 2015-09-29 13:52:30 +00:00			`// Consume 50% CPU`
Fixed flakiness in cluster size autoscaling e2e 2015-10-14 18:45:32 +00:00			`rcs := createConsumingRCs(f, "cpu-utilization", nodeCount*coresPerNode, 500, 0)`
			`err := waitForClusterSize(f.Client, nodeCount+1, scaleUpTimeout)`
			`for _, rc := range rcs {`
			`rc.CleanUp()`
			`}`
			`expectNoError(err)`
Added cluster size autoscaling e2e test 2015-07-21 14:15:55 +00:00
Fixed flaky cluster size autoscaling e2e. 2015-10-07 09:15:58 +00:00			`expectNoError(waitForClusterSize(f.Client, nodeCount, scaleDownTimeout))`
Added cluster size autoscaling e2e test 2015-07-21 14:15:55 +00:00			`})`

Cleanup e2e tags - Remove "Suite" from e2e tag names - Move grouping e2e tags to the front of the test, describe or context name - Move Conformance e2e tags to the end of test names (test specific) - Move Skipped e2e tags to the end of names, but to as high a context as applicable 2015-11-12 22:30:06 +00:00			`It("Should scale cluster size based on cpu reservation", func() {`
Enabled cluster autoscaling based on cpu/mem reservation e2e tests 2015-09-25 09:41:26 +00:00			`setUpAutoscaler("cpu/node_reservation", 0.5, nodeCount, nodeCount+1)`
Added cluster size autoscaling e2e test 2015-07-21 14:15:55 +00:00
Enabled cluster autoscaling based on cpu/mem reservation e2e tests 2015-09-25 09:41:26 +00:00			`ReserveCpu(f, "cpu-reservation", 600nodeCountcoresPerNode)`
Fixed flaky cluster size autoscaling e2e. 2015-10-07 09:15:58 +00:00			`expectNoError(waitForClusterSize(f.Client, nodeCount+1, scaleUpTimeout))`
Added cluster size autoscaling e2e test 2015-07-21 14:15:55 +00:00
Enabled cluster autoscaling based on cpu/mem reservation e2e tests 2015-09-25 09:41:26 +00:00			`expectNoError(DeleteRC(f.Client, f.Namespace.Name, "cpu-reservation"))`
Fixed flaky cluster size autoscaling e2e. 2015-10-07 09:15:58 +00:00			`expectNoError(waitForClusterSize(f.Client, nodeCount, scaleDownTimeout))`
Added cluster size autoscaling e2e test 2015-07-21 14:15:55 +00:00			`})`

Cleanup e2e tags - Remove "Suite" from e2e tag names - Move grouping e2e tags to the front of the test, describe or context name - Move Conformance e2e tags to the end of test names (test specific) - Move Skipped e2e tags to the end of names, but to as high a context as applicable 2015-11-12 22:30:06 +00:00			`It("Should scale cluster size based on memory utilization", func() {`
Another try to fix flaky autoscaling test. 2015-10-05 19:03:17 +00:00			`setUpAutoscaler("memory/node_utilization", 0.6, nodeCount, nodeCount+1)`
Added cluster size autoscaling e2e test 2015-07-21 14:15:55 +00:00
Use Resource Consumer for tests in autoscaling.go 2015-09-21 14:41:07 +00:00			`// Consume 60% of total memory capacity`
			`megabytesPerReplica := int(memCapacityMb * 6 / 10 / coresPerNode)`
Fixed flakiness in cluster size autoscaling e2e 2015-10-14 18:45:32 +00:00			`rcs := createConsumingRCs(f, "mem-utilization", nodeCount*coresPerNode, 0, megabytesPerReplica)`
			`err := waitForClusterSize(f.Client, nodeCount+1, scaleUpTimeout)`
			`for _, rc := range rcs {`
			`rc.CleanUp()`
			`}`
			`expectNoError(err)`
Added cluster size autoscaling e2e test 2015-07-21 14:15:55 +00:00
Fixed flaky cluster size autoscaling e2e. 2015-10-07 09:15:58 +00:00			`expectNoError(waitForClusterSize(f.Client, nodeCount, scaleDownTimeout))`
Added cluster size autoscaling e2e test 2015-07-21 14:15:55 +00:00			`})`

Cleanup e2e tags - Remove "Suite" from e2e tag names - Move grouping e2e tags to the front of the test, describe or context name - Move Conformance e2e tags to the end of test names (test specific) - Move Skipped e2e tags to the end of names, but to as high a context as applicable 2015-11-12 22:30:06 +00:00			`It("Should scale cluster size based on memory reservation", func() {`
Enabled cluster autoscaling based on cpu/mem reservation e2e tests 2015-09-25 09:41:26 +00:00			`setUpAutoscaler("memory/node_reservation", 0.5, nodeCount, nodeCount+1)`
Added cluster size autoscaling e2e test 2015-07-21 14:15:55 +00:00
Enabled cluster autoscaling based on cpu/mem reservation e2e tests 2015-09-25 09:41:26 +00:00			`ReserveMemory(f, "memory-reservation", nodeCountmemCapacityMb6/10)`
Fixed flaky cluster size autoscaling e2e. 2015-10-07 09:15:58 +00:00			`expectNoError(waitForClusterSize(f.Client, nodeCount+1, scaleUpTimeout))`
Added cluster size autoscaling e2e test 2015-07-21 14:15:55 +00:00
Enabled cluster autoscaling based on cpu/mem reservation e2e tests 2015-09-25 09:41:26 +00:00			`expectNoError(DeleteRC(f.Client, f.Namespace.Name, "memory-reservation"))`
Fixed flaky cluster size autoscaling e2e. 2015-10-07 09:15:58 +00:00			`expectNoError(waitForClusterSize(f.Client, nodeCount, scaleDownTimeout))`
Added cluster size autoscaling e2e test 2015-07-21 14:15:55 +00:00			`})`
			`})`

Enabled Autoscaling e2e test for cpu utilization 2015-08-14 09:50:19 +00:00			`func setUpAutoscaler(metric string, target float64, min, max int) {`
Added cluster size autoscaling e2e test 2015-07-21 14:15:55 +00:00			`// TODO integrate with kube-up.sh script once it will support autoscaler setup.`
			`By("Setting up autoscaler to scale based on " + metric)`
Migrated Autoscaling e2e test to new gcloud commands 2015-08-17 19:09:08 +00:00			`out, err := exec.Command("gcloud", "compute", "instance-groups", "managed", "set-autoscaling",`
			`testContext.CloudConfig.NodeInstanceGroup,`
Added cluster size autoscaling e2e test 2015-07-21 14:15:55 +00:00			`"--project="+testContext.CloudConfig.ProjectID,`
Migrated Autoscaling e2e test to new gcloud commands 2015-08-17 19:09:08 +00:00			`"--zone="+testContext.CloudConfig.Zone,`
			`"--custom-metric-utilization=metric=custom.cloudmonitoring.googleapis.com/kubernetes.io/"+metric+fmt.Sprintf(",utilization-target=%v", target)+",utilization-target-type=GAUGE",`
Added cluster size autoscaling e2e test 2015-07-21 14:15:55 +00:00			`fmt.Sprintf("--min-num-replicas=%v", min),`
			`fmt.Sprintf("--max-num-replicas=%v", max),`
			`).CombinedOutput()`
Added more logs to Autoscaling e2e test 2015-08-17 14:12:32 +00:00			`expectNoError(err, "Output: "+string(out))`
Added cluster size autoscaling e2e test 2015-07-21 14:15:55 +00:00			`}`

Fixed flakiness in cluster size autoscaling e2e 2015-10-14 18:45:32 +00:00			`func createConsumingRCs(f Framework, name string, count, cpuPerReplica, memPerReplica int) []ResourceConsumer {`
			`var res []*ResourceConsumer`
			`for i := 1; i <= count; i++ {`
			`name := fmt.Sprintf("%s-%d", name, i)`
CustomMetric support in autoscaling utils 2016-03-04 14:50:55 +00:00			`res = append(res, NewStaticResourceConsumer(name, 1, cpuPerReplica, memPerReplica, 0, int64(cpuPerReplica), int64(memPerReplica+100), f))`
Fixed flakiness in cluster size autoscaling e2e 2015-10-14 18:45:32 +00:00			`}`
			`return res`
			`}`

Added cluster size autoscaling e2e test 2015-07-21 14:15:55 +00:00			`func cleanUpAutoscaler() {`
			`By("Removing autoscaler")`
Migrated Autoscaling e2e test to new gcloud commands 2015-08-17 19:09:08 +00:00			`out, err := exec.Command("gcloud", "compute", "instance-groups", "managed", "stop-autoscaling",`
			`testContext.CloudConfig.NodeInstanceGroup,`
			`"--project="+testContext.CloudConfig.ProjectID,`
			`"--zone="+testContext.CloudConfig.Zone,`
			`).CombinedOutput()`
Added more logs to Autoscaling e2e test 2015-08-17 14:12:32 +00:00			`expectNoError(err, "Output: "+string(out))`
Added cluster size autoscaling e2e test 2015-07-21 14:15:55 +00:00			`}`

			`func ReserveCpu(f *Framework, id string, millicores int) {`
			`By(fmt.Sprintf("Running RC which reserves %v millicores", millicores))`
			`config := &RCConfig{`
Enabled cluster autoscaling based on cpu/mem reservation e2e tests 2015-09-25 09:41:26 +00:00			`Client: f.Client,`
			`Name: id,`
			`Namespace: f.Namespace.Name,`
			`Timeout: 10 * time.Minute,`
Drop the beta for GCR v2 images. beta.gcr.io is no longer needed to pull through v2. 2015-11-04 23:52:49 +00:00			`Image: "gcr.io/google_containers/pause:2.0",`
Enabled cluster autoscaling based on cpu/mem reservation e2e tests 2015-09-25 09:41:26 +00:00			`Replicas: millicores / 100,`
			`CpuRequest: 100,`
Added cluster size autoscaling e2e test 2015-07-21 14:15:55 +00:00			`}`
			`expectNoError(RunRC(*config))`
			`}`

Enabled cluster autoscaling based on cpu/mem reservation e2e tests 2015-09-25 09:41:26 +00:00			`func ReserveMemory(f *Framework, id string, megabytes int) {`
			`By(fmt.Sprintf("Running RC which reserves %v MB of memory", megabytes))`
Added cluster size autoscaling e2e test 2015-07-21 14:15:55 +00:00			`config := &RCConfig{`
Enabled cluster autoscaling based on cpu/mem reservation e2e tests 2015-09-25 09:41:26 +00:00			`Client: f.Client,`
			`Name: id,`
			`Namespace: f.Namespace.Name,`
			`Timeout: 10 * time.Minute,`
Drop the beta for GCR v2 images. beta.gcr.io is no longer needed to pull through v2. 2015-11-04 23:52:49 +00:00			`Image: "gcr.io/google_containers/pause:2.0",`
Enabled cluster autoscaling based on cpu/mem reservation e2e tests 2015-09-25 09:41:26 +00:00			`Replicas: megabytes / 500,`
			`MemRequest: 500 * 1024 * 1024,`
Added cluster size autoscaling e2e test 2015-07-21 14:15:55 +00:00			`}`
			`expectNoError(RunRC(*config))`
			`}`