mirror of https://github.com/k3s-io/k3s
Multiple MIGs in e2e cluster autoscaling tests.
Implemented support for multiple MIGs in e2e cluster autoscaling tests.pull/6/head
parent
0a6a52b19d
commit
93e5b12a06
|
@ -72,6 +72,19 @@ else
|
|||
NODE_INSTANCE_GROUP=""
|
||||
fi
|
||||
|
||||
if [[ "${KUBERNETES_PROVIDER}" == "gce" ]]; then
|
||||
set_num_migs
|
||||
NODE_INSTANCE_GROUP=""
|
||||
for ((i=1; i<=${NUM_MIGS}; i++)); do
|
||||
if [[ $i == ${NUM_MIGS} ]]; then
|
||||
# We are assigning the same mig names as create-nodes function from cluster/gce/util.sh.
|
||||
NODE_INSTANCE_GROUP="${NODE_INSTANCE_GROUP}${NODE_INSTANCE_PREFIX}-group"
|
||||
else
|
||||
NODE_INSTANCE_GROUP="${NODE_INSTANCE_GROUP}${NODE_INSTANCE_PREFIX}-group-${i},"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
if [[ "${KUBERNETES_PROVIDER}" == "gke" ]]; then
|
||||
detect-node-instance-group
|
||||
fi
|
||||
|
|
|
@ -18,6 +18,7 @@ package e2e
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
|
@ -37,11 +38,12 @@ const (
|
|||
scaleDownTimeout = 15 * time.Minute
|
||||
)
|
||||
|
||||
var _ = framework.KubeDescribe("Cluster size autoscaling scale up [Feature:ClusterSizeAutoscaling] [Slow]", func() {
|
||||
var _ = framework.KubeDescribe("Cluster size autoscaling [Slow]", func() {
|
||||
f := framework.NewDefaultFramework("autoscaling")
|
||||
var nodeCount int
|
||||
var coresPerNode int
|
||||
var memCapacityMb int
|
||||
var originalSizes map[string]int
|
||||
|
||||
BeforeEach(func() {
|
||||
framework.SkipUnlessProviderIs("gce")
|
||||
|
@ -53,57 +55,54 @@ var _ = framework.KubeDescribe("Cluster size autoscaling scale up [Feature:Clust
|
|||
mem := nodes.Items[0].Status.Capacity[api.ResourceMemory]
|
||||
coresPerNode = int((&cpu).MilliValue() / 1000)
|
||||
memCapacityMb = int((&mem).Value() / 1024 / 1024)
|
||||
|
||||
originalSizes = make(map[string]int)
|
||||
sum := 0
|
||||
for _, mig := range strings.Split(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") {
|
||||
size, err := GroupSize(mig)
|
||||
framework.ExpectNoError(err)
|
||||
By(fmt.Sprintf("Initial size of %s: %d", mig, size))
|
||||
originalSizes[mig] = size
|
||||
sum += size
|
||||
}
|
||||
Expect(nodeCount).Should(Equal(sum))
|
||||
})
|
||||
|
||||
It("Should correctly handle pending pods", func() {
|
||||
By("Too large pending pod does not increase cluster size")
|
||||
It("shouldn't increase cluster size if pending pod it too large [Feature:ClusterSizeAutoscalingScaleUp]", func() {
|
||||
ReserveMemory(f, "memory-reservation", 1, memCapacityMb, false)
|
||||
// Verify, that cluster size is not changed.
|
||||
// TODO: find a better way of verification that the cluster size will remain unchanged.
|
||||
// TODO: find a better way of verification that the cluster size will remain unchanged using events.
|
||||
time.Sleep(scaleUpTimeout)
|
||||
framework.ExpectNoError(framework.WaitForClusterSize(f.Client, nodeCount, scaleUpTimeout))
|
||||
framework.ExpectNoError(WaitForClusterSizeFunc(f.Client,
|
||||
func(size int) bool { return size <= nodeCount }, scaleDownTimeout))
|
||||
framework.ExpectNoError(framework.DeleteRC(f.Client, f.Namespace.Name, "memory-reservation"))
|
||||
framework.ExpectNoError(ResizeGroup(int32(nodeCount)))
|
||||
framework.ExpectNoError(framework.WaitForClusterSize(f.Client, nodeCount, resizeTimeout))
|
||||
framework.ExpectNoError(WaitForClusterSizeFunc(f.Client,
|
||||
func(size int) bool { return size <= nodeCount }, scaleDownTimeout))
|
||||
})
|
||||
|
||||
By("Small pending pods increase cluster size")
|
||||
It("should increase cluster size if pending pods are small [Feature:ClusterSizeAutoscalingScaleUp]", func() {
|
||||
ReserveMemory(f, "memory-reservation", 100, nodeCount*memCapacityMb, false)
|
||||
// Verify, that cluster size is increased
|
||||
framework.ExpectNoError(WaitForClusterSizeFunc(f.Client,
|
||||
func(size int) bool { return size >= nodeCount+1 }, scaleUpTimeout))
|
||||
framework.ExpectNoError(framework.DeleteRC(f.Client, f.Namespace.Name, "memory-reservation"))
|
||||
framework.ExpectNoError(ResizeGroup(int32(nodeCount)))
|
||||
framework.ExpectNoError(framework.WaitForClusterSize(f.Client, nodeCount, resizeTimeout))
|
||||
restoreSizes(originalSizes)
|
||||
framework.ExpectNoError(WaitForClusterSizeFunc(f.Client,
|
||||
func(size int) bool { return size <= nodeCount }, scaleDownTimeout))
|
||||
})
|
||||
|
||||
By("Handling node port pods")
|
||||
It("should increase cluster size if pods are pending due to host port conflict [Feature:ClusterSizeAutoscalingScaleUp]", func() {
|
||||
CreateHostPortPods(f, "host-port", nodeCount+2, false)
|
||||
framework.ExpectNoError(WaitForClusterSizeFunc(f.Client,
|
||||
func(size int) bool { return size >= nodeCount+2 }, scaleUpTimeout))
|
||||
framework.ExpectNoError(framework.DeleteRC(f.Client, f.Namespace.Name, "host-port"))
|
||||
restoreSizes(originalSizes)
|
||||
framework.ExpectNoError(WaitForClusterSizeFunc(f.Client,
|
||||
func(size int) bool { return size <= nodeCount }, scaleDownTimeout))
|
||||
|
||||
framework.ExpectNoError(ResizeGroup(int32(nodeCount)))
|
||||
framework.ExpectNoError(framework.WaitForClusterSize(f.Client, nodeCount, resizeTimeout))
|
||||
})
|
||||
})
|
||||
|
||||
var _ = framework.KubeDescribe("Cluster size autoscaling scale down[Feature:ClusterSizeAutoscalingScaleDown] [Slow]", func() {
|
||||
f := framework.NewDefaultFramework("autoscaling")
|
||||
var nodeCount int
|
||||
var coresPerNode int
|
||||
var memCapacityMb int
|
||||
|
||||
BeforeEach(func() {
|
||||
framework.SkipUnlessProviderIs("gce")
|
||||
|
||||
nodes := framework.GetReadySchedulableNodesOrDie(f.Client)
|
||||
nodeCount = len(nodes.Items)
|
||||
Expect(nodeCount).NotTo(BeZero())
|
||||
cpu := nodes.Items[0].Status.Capacity[api.ResourceCPU]
|
||||
mem := nodes.Items[0].Status.Capacity[api.ResourceMemory]
|
||||
coresPerNode = int((&cpu).MilliValue() / 1000)
|
||||
memCapacityMb = int((&mem).Value() / 1024 / 1024)
|
||||
})
|
||||
|
||||
It("Should correctly handle pending and scale down after deletion", func() {
|
||||
It("should correctly handle pending and scale down after deletion [Feature:ClusterSizeAutoscalingScaleDown]", func() {
|
||||
By("Small pending pods increase cluster size")
|
||||
ReserveMemory(f, "memory-reservation", 100, nodeCount*memCapacityMb, false)
|
||||
// Verify, that cluster size is increased
|
||||
|
@ -192,3 +191,16 @@ func WaitForClusterSizeFunc(c *client.Client, sizeFunc func(int) bool, timeout t
|
|||
}
|
||||
return fmt.Errorf("timeout waiting %v for appropriate cluster size", timeout)
|
||||
}
|
||||
|
||||
func restoreSizes(sizes map[string]int) {
|
||||
By(fmt.Sprintf("Restoring initial size of the cluster"))
|
||||
for mig, desiredSize := range sizes {
|
||||
currentSize, err := GroupSize(mig)
|
||||
framework.ExpectNoError(err)
|
||||
if desiredSize != currentSize {
|
||||
By(fmt.Sprintf("Setting size of %s to %d", mig, desiredSize))
|
||||
err = ResizeGroup(mig, int32(desiredSize))
|
||||
framework.ExpectNoError(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -113,7 +113,7 @@ func RegisterFlags() {
|
|||
flag.StringVar(&cloudConfig.Zone, "gce-zone", "", "GCE zone being used, if applicable")
|
||||
flag.StringVar(&cloudConfig.ServiceAccount, "gce-service-account", "", "GCE service account to use for GCE API calls, if applicable")
|
||||
flag.StringVar(&cloudConfig.Cluster, "gke-cluster", "", "GKE name of cluster being used, if applicable")
|
||||
flag.StringVar(&cloudConfig.NodeInstanceGroup, "node-instance-group", "", "Name of the managed instance group for nodes. Valid only for gce, gke or aws")
|
||||
flag.StringVar(&cloudConfig.NodeInstanceGroup, "node-instance-group", "", "Name of the managed instance group for nodes. Valid only for gce, gke or aws. If there is more than one group: comma separated list of groups.")
|
||||
flag.IntVar(&cloudConfig.NumNodes, "num-nodes", -1, "Number of nodes in the cluster")
|
||||
|
||||
flag.StringVar(&cloudConfig.ClusterTag, "cluster-tag", "", "Tag used to identify resources. Only required if provider is aws.")
|
||||
|
|
|
@ -53,7 +53,7 @@ const (
|
|||
testPort = 9376
|
||||
)
|
||||
|
||||
func ResizeGroup(size int32) error {
|
||||
func ResizeGroup(group string, size int32) error {
|
||||
if framework.TestContext.ReportDir != "" {
|
||||
framework.CoreDump(framework.TestContext.ReportDir)
|
||||
defer framework.CoreDump(framework.TestContext.ReportDir)
|
||||
|
@ -62,7 +62,7 @@ func ResizeGroup(size int32) error {
|
|||
// TODO: make this hit the compute API directly instead of shelling out to gcloud.
|
||||
// TODO: make gce/gke implement InstanceGroups, so we can eliminate the per-provider logic
|
||||
output, err := exec.Command("gcloud", "compute", "instance-groups", "managed", "resize",
|
||||
framework.TestContext.CloudConfig.NodeInstanceGroup, fmt.Sprintf("--size=%v", size),
|
||||
group, fmt.Sprintf("--size=%v", size),
|
||||
"--project="+framework.TestContext.CloudConfig.ProjectID, "--zone="+framework.TestContext.CloudConfig.Zone).CombinedOutput()
|
||||
if err != nil {
|
||||
framework.Logf("Failed to resize node instance group: %v", string(output))
|
||||
|
@ -70,18 +70,18 @@ func ResizeGroup(size int32) error {
|
|||
return err
|
||||
} else if framework.TestContext.Provider == "aws" {
|
||||
client := autoscaling.New(session.New())
|
||||
return awscloud.ResizeInstanceGroup(client, framework.TestContext.CloudConfig.NodeInstanceGroup, int(size))
|
||||
return awscloud.ResizeInstanceGroup(client, group, int(size))
|
||||
} else {
|
||||
return fmt.Errorf("Provider does not support InstanceGroups")
|
||||
}
|
||||
}
|
||||
|
||||
func groupSize() (int, error) {
|
||||
func GroupSize(group string) (int, error) {
|
||||
if framework.TestContext.Provider == "gce" || framework.TestContext.Provider == "gke" {
|
||||
// TODO: make this hit the compute API directly instead of shelling out to gcloud.
|
||||
// TODO: make gce/gke implement InstanceGroups, so we can eliminate the per-provider logic
|
||||
output, err := exec.Command("gcloud", "compute", "instance-groups", "managed",
|
||||
"list-instances", framework.TestContext.CloudConfig.NodeInstanceGroup, "--project="+framework.TestContext.CloudConfig.ProjectID,
|
||||
"list-instances", group, "--project="+framework.TestContext.CloudConfig.ProjectID,
|
||||
"--zone="+framework.TestContext.CloudConfig.Zone).CombinedOutput()
|
||||
if err != nil {
|
||||
return -1, err
|
||||
|
@ -90,12 +90,12 @@ func groupSize() (int, error) {
|
|||
return len(re.FindAllString(string(output), -1)), nil
|
||||
} else if framework.TestContext.Provider == "aws" {
|
||||
client := autoscaling.New(session.New())
|
||||
instanceGroup, err := awscloud.DescribeInstanceGroup(client, framework.TestContext.CloudConfig.NodeInstanceGroup)
|
||||
instanceGroup, err := awscloud.DescribeInstanceGroup(client, group)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("error describing instance group: %v", err)
|
||||
}
|
||||
if instanceGroup == nil {
|
||||
return -1, fmt.Errorf("instance group not found: %s", framework.TestContext.CloudConfig.NodeInstanceGroup)
|
||||
return -1, fmt.Errorf("instance group not found: %s", group)
|
||||
}
|
||||
return instanceGroup.CurrentSize()
|
||||
} else {
|
||||
|
@ -103,10 +103,10 @@ func groupSize() (int, error) {
|
|||
}
|
||||
}
|
||||
|
||||
func waitForGroupSize(size int32) error {
|
||||
func WaitForGroupSize(group string, size int32) error {
|
||||
timeout := 10 * time.Minute
|
||||
for start := time.Now(); time.Since(start) < timeout; time.Sleep(5 * time.Second) {
|
||||
currentSize, err := groupSize()
|
||||
currentSize, err := GroupSize(group)
|
||||
if err != nil {
|
||||
framework.Logf("Failed to get node instance group size: %v", err)
|
||||
continue
|
||||
|
@ -347,13 +347,19 @@ var _ = framework.KubeDescribe("Nodes [Disruptive]", func() {
|
|||
var c *client.Client
|
||||
var ns string
|
||||
ignoreLabels := framework.ImagePullerLabels
|
||||
var group string
|
||||
|
||||
BeforeEach(func() {
|
||||
c = f.Client
|
||||
ns = f.Namespace.Name
|
||||
systemPods, err := framework.GetPodsInNamespace(c, ns, ignoreLabels)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
systemPodsNo = int32(len(systemPods))
|
||||
|
||||
if strings.Index(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") >= 0 {
|
||||
framework.Failf("Test dose not support cluster setup with more than one MIG: %s", framework.TestContext.CloudConfig.NodeInstanceGroup)
|
||||
} else {
|
||||
group = framework.TestContext.CloudConfig.NodeInstanceGroup
|
||||
}
|
||||
})
|
||||
|
||||
// Slow issue #13323 (8 min)
|
||||
|
@ -373,7 +379,7 @@ var _ = framework.KubeDescribe("Nodes [Disruptive]", func() {
|
|||
}
|
||||
|
||||
By("restoring the original node instance group size")
|
||||
if err := ResizeGroup(int32(framework.TestContext.CloudConfig.NumNodes)); err != nil {
|
||||
if err := ResizeGroup(group, int32(framework.TestContext.CloudConfig.NumNodes)); err != nil {
|
||||
framework.Failf("Couldn't restore the original node instance group size: %v", err)
|
||||
}
|
||||
// In GKE, our current tunneling setup has the potential to hold on to a broken tunnel (from a
|
||||
|
@ -388,7 +394,7 @@ var _ = framework.KubeDescribe("Nodes [Disruptive]", func() {
|
|||
By("waiting 5 minutes for all dead tunnels to be dropped")
|
||||
time.Sleep(5 * time.Minute)
|
||||
}
|
||||
if err := waitForGroupSize(int32(framework.TestContext.CloudConfig.NumNodes)); err != nil {
|
||||
if err := WaitForGroupSize(group, int32(framework.TestContext.CloudConfig.NumNodes)); err != nil {
|
||||
framework.Failf("Couldn't restore the original node instance group size: %v", err)
|
||||
}
|
||||
if err := framework.WaitForClusterSize(c, framework.TestContext.CloudConfig.NumNodes, 10*time.Minute); err != nil {
|
||||
|
@ -412,9 +418,9 @@ var _ = framework.KubeDescribe("Nodes [Disruptive]", func() {
|
|||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
By(fmt.Sprintf("decreasing cluster size to %d", replicas-1))
|
||||
err = ResizeGroup(replicas - 1)
|
||||
err = ResizeGroup(group, replicas-1)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = waitForGroupSize(replicas - 1)
|
||||
err = WaitForGroupSize(group, replicas-1)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = framework.WaitForClusterSize(c, int(replicas-1), 10*time.Minute)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
@ -436,9 +442,9 @@ var _ = framework.KubeDescribe("Nodes [Disruptive]", func() {
|
|||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
By(fmt.Sprintf("increasing cluster size to %d", replicas+1))
|
||||
err = ResizeGroup(replicas + 1)
|
||||
err = ResizeGroup(group, replicas+1)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = waitForGroupSize(replicas + 1)
|
||||
err = WaitForGroupSize(group, replicas+1)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = framework.WaitForClusterSize(c, int(replicas+1), 10*time.Minute)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
|
Loading…
Reference in New Issue