2015-04-23 14:28:16 +00:00
|
|
|
/*
|
|
|
|
Copyright 2015 The Kubernetes Authors All rights reserved.
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package e2e
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"os/exec"
|
2015-07-22 11:40:22 +00:00
|
|
|
"regexp"
|
2015-04-23 14:28:16 +00:00
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
2015-06-17 07:13:26 +00:00
|
|
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/latest"
|
2015-04-23 14:28:16 +00:00
|
|
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
|
|
|
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
|
|
|
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
|
|
|
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
|
|
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/util/wait"
|
|
|
|
|
2015-07-24 11:36:16 +00:00
|
|
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/cloudprovider/aws"
|
2015-04-23 14:28:16 +00:00
|
|
|
. "github.com/onsi/ginkgo"
|
|
|
|
. "github.com/onsi/gomega"
|
|
|
|
)
|
|
|
|
|
2015-06-17 07:13:26 +00:00
|
|
|
const serveHostnameImage = "gcr.io/google_containers/serve_hostname:1.1"
|
2015-06-18 11:18:21 +00:00
|
|
|
|
2015-07-27 02:24:26 +00:00
|
|
|
const resizeNodeReadyTimeout = 2 * time.Minute
|
|
|
|
const resizeNodeNotReadyTimeout = 2 * time.Minute
|
|
|
|
|
2015-06-17 07:13:26 +00:00
|
|
|
func resizeGroup(size int) error {
|
2015-06-13 14:24:35 +00:00
|
|
|
if testContext.Provider == "gce" || testContext.Provider == "gke" {
|
2015-07-06 12:32:56 +00:00
|
|
|
// TODO: make this hit the compute API directly instead of shelling out to gcloud.
|
2015-06-13 14:24:35 +00:00
|
|
|
// TODO: make gce/gke implement InstanceGroups, so we can eliminate the per-provider logic
|
2015-07-22 11:40:22 +00:00
|
|
|
output, err := exec.Command("gcloud", "compute", "instance-groups", "managed", "resize",
|
|
|
|
testContext.CloudConfig.NodeInstanceGroup, fmt.Sprintf("--size=%v", size),
|
|
|
|
"--project="+testContext.CloudConfig.ProjectID, "--zone="+testContext.CloudConfig.Zone).CombinedOutput()
|
2015-06-13 14:24:35 +00:00
|
|
|
if err != nil {
|
|
|
|
Logf("Failed to resize node instance group: %v", string(output))
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
} else {
|
|
|
|
// Supported by aws
|
|
|
|
instanceGroups, ok := testContext.CloudConfig.Provider.(aws_cloud.InstanceGroups)
|
|
|
|
if !ok {
|
|
|
|
return fmt.Errorf("Provider does not support InstanceGroups")
|
|
|
|
}
|
|
|
|
return instanceGroups.ResizeInstanceGroup(testContext.CloudConfig.NodeInstanceGroup, size)
|
2015-04-23 14:28:16 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-06-17 07:13:26 +00:00
|
|
|
func groupSize() (int, error) {
|
2015-06-13 14:24:35 +00:00
|
|
|
if testContext.Provider == "gce" || testContext.Provider == "gke" {
|
2015-07-06 12:32:56 +00:00
|
|
|
// TODO: make this hit the compute API directly instead of shelling out to gcloud.
|
2015-06-13 14:24:35 +00:00
|
|
|
// TODO: make gce/gke implement InstanceGroups, so we can eliminate the per-provider logic
|
2015-07-22 11:40:22 +00:00
|
|
|
output, err := exec.Command("gcloud", "compute", "instance-groups", "managed",
|
|
|
|
"list-instances", testContext.CloudConfig.NodeInstanceGroup, "--project="+testContext.CloudConfig.ProjectID,
|
|
|
|
"--zone="+testContext.CloudConfig.Zone).CombinedOutput()
|
2015-06-13 14:24:35 +00:00
|
|
|
if err != nil {
|
|
|
|
return -1, err
|
|
|
|
}
|
2015-07-22 11:40:22 +00:00
|
|
|
re := regexp.MustCompile("RUNNING")
|
|
|
|
return len(re.FindAllString(string(output), -1)), nil
|
2015-06-13 14:24:35 +00:00
|
|
|
} else {
|
|
|
|
// Supported by aws
|
|
|
|
instanceGroups, ok := testContext.CloudConfig.Provider.(aws_cloud.InstanceGroups)
|
|
|
|
if !ok {
|
|
|
|
return -1, fmt.Errorf("provider does not support InstanceGroups")
|
|
|
|
}
|
|
|
|
instanceGroup, err := instanceGroups.DescribeInstanceGroup(testContext.CloudConfig.NodeInstanceGroup)
|
|
|
|
if err != nil {
|
|
|
|
return -1, fmt.Errorf("error describing instance group: %v", err)
|
|
|
|
}
|
|
|
|
if instanceGroup == nil {
|
|
|
|
return -1, fmt.Errorf("instance group not found: %s", testContext.CloudConfig.NodeInstanceGroup)
|
|
|
|
}
|
|
|
|
return instanceGroup.CurrentSize()
|
2015-04-23 14:28:16 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-06-17 07:13:26 +00:00
|
|
|
func waitForGroupSize(size int) error {
|
2015-08-03 07:51:17 +00:00
|
|
|
timeout := 10 * time.Minute
|
2015-07-27 02:24:26 +00:00
|
|
|
for start := time.Now(); time.Since(start) < timeout; time.Sleep(5 * time.Second) {
|
2015-06-17 07:13:26 +00:00
|
|
|
currentSize, err := groupSize()
|
2015-04-23 14:28:16 +00:00
|
|
|
if err != nil {
|
|
|
|
Logf("Failed to get node instance group size: %v", err)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if currentSize != size {
|
2015-06-09 18:49:40 +00:00
|
|
|
Logf("Waiting for node instance group size %d, current size %d", size, currentSize)
|
2015-04-23 14:28:16 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
Logf("Node instance group has reached the desired size %d", size)
|
|
|
|
return nil
|
|
|
|
}
|
2015-07-27 02:24:26 +00:00
|
|
|
return fmt.Errorf("timeout waiting %v for node instance group size to be %d", timeout, size)
|
2015-04-23 14:28:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func waitForClusterSize(c *client.Client, size int) error {
|
2015-07-21 14:15:55 +00:00
|
|
|
timeout := 10 * time.Minute
|
2015-07-11 14:31:47 +00:00
|
|
|
for start := time.Now(); time.Since(start) < timeout; time.Sleep(20 * time.Second) {
|
2015-04-23 14:28:16 +00:00
|
|
|
nodes, err := c.Nodes().List(labels.Everything(), fields.Everything())
|
|
|
|
if err != nil {
|
|
|
|
Logf("Failed to list nodes: %v", err)
|
|
|
|
continue
|
|
|
|
}
|
2015-07-06 09:53:34 +00:00
|
|
|
// Filter out not-ready nodes.
|
|
|
|
filterNodes(nodes, func(node api.Node) bool {
|
|
|
|
return isNodeReadySetAsExpected(&node, true)
|
|
|
|
})
|
|
|
|
|
2015-04-23 14:28:16 +00:00
|
|
|
if len(nodes.Items) == size {
|
|
|
|
Logf("Cluster has reached the desired size %d", size)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
Logf("Waiting for cluster size %d, current size %d", size, len(nodes.Items))
|
|
|
|
}
|
2015-07-27 02:24:26 +00:00
|
|
|
return fmt.Errorf("timeout waiting %v for cluster size to be %d", timeout, size)
|
2015-04-23 14:28:16 +00:00
|
|
|
}
|
|
|
|
|
2015-06-17 07:13:26 +00:00
|
|
|
func svcByName(name string) *api.Service {
|
2015-04-23 14:28:16 +00:00
|
|
|
return &api.Service{
|
|
|
|
ObjectMeta: api.ObjectMeta{
|
|
|
|
Name: "test-service",
|
|
|
|
},
|
|
|
|
Spec: api.ServiceSpec{
|
|
|
|
Selector: map[string]string{
|
|
|
|
"name": name,
|
|
|
|
},
|
|
|
|
Ports: []api.ServicePort{{
|
|
|
|
Port: 9376,
|
|
|
|
TargetPort: util.NewIntOrStringFromInt(9376),
|
|
|
|
}},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-06-17 07:13:26 +00:00
|
|
|
func newSVCByName(c *client.Client, ns, name string) error {
|
|
|
|
_, err := c.Services(ns).Create(svcByName(name))
|
2015-04-23 14:28:16 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2015-06-17 07:13:26 +00:00
|
|
|
func podOnNode(podName, nodeName string, image string) *api.Pod {
|
2015-06-18 11:18:21 +00:00
|
|
|
return &api.Pod{
|
|
|
|
ObjectMeta: api.ObjectMeta{
|
|
|
|
Name: podName,
|
|
|
|
Labels: map[string]string{
|
|
|
|
"name": podName,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
Spec: api.PodSpec{
|
|
|
|
Containers: []api.Container{
|
|
|
|
{
|
|
|
|
Name: podName,
|
|
|
|
Image: image,
|
|
|
|
Ports: []api.ContainerPort{{ContainerPort: 9376}},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
NodeName: nodeName,
|
|
|
|
RestartPolicy: api.RestartPolicyNever,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-06-17 07:13:26 +00:00
|
|
|
func newPodOnNode(c *client.Client, namespace, podName, nodeName string) error {
|
|
|
|
pod, err := c.Pods(namespace).Create(podOnNode(podName, nodeName, serveHostnameImage))
|
2015-06-18 11:18:21 +00:00
|
|
|
if err == nil {
|
|
|
|
Logf("Created pod %s on node %s", pod.ObjectMeta.Name, nodeName)
|
|
|
|
} else {
|
2015-06-17 07:13:26 +00:00
|
|
|
Logf("Failed to create pod %s on node %s: %v", podName, nodeName, err)
|
2015-06-18 11:18:21 +00:00
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2015-06-17 07:13:26 +00:00
|
|
|
func rcByName(name string, replicas int, image string, labels map[string]string) *api.ReplicationController {
|
|
|
|
return rcByNameContainer(name, replicas, image, labels, api.Container{
|
|
|
|
Name: name,
|
|
|
|
Image: image,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
func rcByNamePort(name string, replicas int, image string, port int, labels map[string]string) *api.ReplicationController {
|
|
|
|
return rcByNameContainer(name, replicas, image, labels, api.Container{
|
|
|
|
Name: name,
|
|
|
|
Image: image,
|
|
|
|
Ports: []api.ContainerPort{{ContainerPort: port}},
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
func rcByNameContainer(name string, replicas int, image string, labels map[string]string, c api.Container) *api.ReplicationController {
|
|
|
|
// Add "name": name to the labels, overwriting if it exists.
|
|
|
|
labels["name"] = name
|
2015-04-23 14:28:16 +00:00
|
|
|
return &api.ReplicationController{
|
2015-06-17 07:13:26 +00:00
|
|
|
TypeMeta: api.TypeMeta{
|
|
|
|
Kind: "ReplicationController",
|
|
|
|
APIVersion: latest.Version,
|
|
|
|
},
|
2015-04-23 14:28:16 +00:00
|
|
|
ObjectMeta: api.ObjectMeta{
|
|
|
|
Name: name,
|
|
|
|
},
|
|
|
|
Spec: api.ReplicationControllerSpec{
|
|
|
|
Replicas: replicas,
|
|
|
|
Selector: map[string]string{
|
|
|
|
"name": name,
|
|
|
|
},
|
|
|
|
Template: &api.PodTemplateSpec{
|
|
|
|
ObjectMeta: api.ObjectMeta{
|
2015-06-17 07:13:26 +00:00
|
|
|
Labels: labels,
|
2015-04-23 14:28:16 +00:00
|
|
|
},
|
|
|
|
Spec: api.PodSpec{
|
2015-06-17 07:13:26 +00:00
|
|
|
Containers: []api.Container{c},
|
2015-04-23 14:28:16 +00:00
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-06-17 07:13:26 +00:00
|
|
|
// newRCByName creates a replication controller with a selector by name of name.
|
|
|
|
func newRCByName(c *client.Client, ns, name string, replicas int) (*api.ReplicationController, error) {
|
2015-04-23 14:28:16 +00:00
|
|
|
By(fmt.Sprintf("creating replication controller %s", name))
|
2015-06-17 07:13:26 +00:00
|
|
|
return c.ReplicationControllers(ns).Create(rcByNamePort(
|
|
|
|
name, replicas, serveHostnameImage, 9376, map[string]string{}))
|
2015-04-23 14:28:16 +00:00
|
|
|
}
|
|
|
|
|
2015-06-17 07:13:26 +00:00
|
|
|
func resizeRC(c *client.Client, ns, name string, replicas int) error {
|
2015-04-23 14:28:16 +00:00
|
|
|
rc, err := c.ReplicationControllers(ns).Get(name)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
rc.Spec.Replicas = replicas
|
|
|
|
_, err = c.ReplicationControllers(rc.Namespace).Update(rc)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2015-06-17 07:13:26 +00:00
|
|
|
func podsCreated(c *client.Client, ns, name string, replicas int) (*api.PodList, error) {
|
2015-07-27 02:24:26 +00:00
|
|
|
timeout := time.Minute
|
2015-04-23 14:28:16 +00:00
|
|
|
// List the pods, making sure we observe all the replicas.
|
|
|
|
label := labels.SelectorFromSet(labels.Set(map[string]string{"name": name}))
|
2015-07-27 02:24:26 +00:00
|
|
|
for start := time.Now(); time.Since(start) < timeout; time.Sleep(5 * time.Second) {
|
2015-04-23 14:28:16 +00:00
|
|
|
pods, err := c.Pods(ns).List(label, fields.Everything())
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2015-06-18 11:18:21 +00:00
|
|
|
Logf("Pod name %s: Found %d pods out of %d", name, len(pods.Items), replicas)
|
2015-04-23 14:28:16 +00:00
|
|
|
if len(pods.Items) == replicas {
|
|
|
|
return pods, nil
|
|
|
|
}
|
|
|
|
}
|
2015-07-27 02:24:26 +00:00
|
|
|
return nil, fmt.Errorf("Pod name %s: Gave up waiting %v for %d pods to come up", name, timeout, replicas)
|
2015-04-23 14:28:16 +00:00
|
|
|
}
|
|
|
|
|
2015-06-17 07:13:26 +00:00
|
|
|
func podsRunning(c *client.Client, pods *api.PodList) []error {
|
2015-04-23 14:28:16 +00:00
|
|
|
// Wait for the pods to enter the running state. Waiting loops until the pods
|
|
|
|
// are running so non-running pods cause a timeout for this test.
|
|
|
|
By("ensuring each pod is running")
|
|
|
|
e := []error{}
|
|
|
|
for _, pod := range pods.Items {
|
|
|
|
// TODO: make waiting parallel.
|
|
|
|
err := waitForPodRunningInNamespace(c, pod.Name, pod.Namespace)
|
|
|
|
if err != nil {
|
|
|
|
e = append(e, err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return e
|
|
|
|
}
|
|
|
|
|
2015-06-17 07:13:26 +00:00
|
|
|
func verifyPods(c *client.Client, ns, name string, wantName bool, replicas int) error {
|
|
|
|
pods, err := podsCreated(c, ns, name, replicas)
|
2015-06-08 08:52:37 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2015-06-17 07:13:26 +00:00
|
|
|
e := podsRunning(c, pods)
|
2015-06-08 08:52:37 +00:00
|
|
|
if len(e) > 0 {
|
2015-06-25 07:51:38 +00:00
|
|
|
return fmt.Errorf("failed to wait for pods running: %v", e)
|
2015-06-08 08:52:37 +00:00
|
|
|
}
|
2015-06-17 07:13:26 +00:00
|
|
|
err = podsResponding(c, ns, name, wantName, pods)
|
2015-06-08 08:52:37 +00:00
|
|
|
if err != nil {
|
2015-06-25 07:51:38 +00:00
|
|
|
return fmt.Errorf("failed to wait for pods responding: %v", err)
|
2015-06-08 08:52:37 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2015-06-18 11:18:21 +00:00
|
|
|
// Blocks outgoing network traffic on 'node'. Then verifies that 'podNameToDisappear',
|
|
|
|
// that belongs to replication controller 'rcName', really disappeared.
|
|
|
|
// Finally, it checks that the replication controller recreates the
|
|
|
|
// pods on another node and that now the number of replicas is equal 'replicas'.
|
|
|
|
// At the end (even in case of errors), the network traffic is brought back to normal.
|
|
|
|
// This function executes commands on a node so it will work only for some
|
|
|
|
// environments.
|
|
|
|
func performTemporaryNetworkFailure(c *client.Client, ns, rcName string, replicas int, podNameToDisappear string, node *api.Node) {
|
|
|
|
Logf("Getting external IP address for %s", node.Name)
|
|
|
|
host := ""
|
|
|
|
for _, a := range node.Status.Addresses {
|
|
|
|
if a.Type == api.NodeExternalIP {
|
|
|
|
host = a.Address + ":22"
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if host == "" {
|
|
|
|
Failf("Couldn't get the external IP of host %s with addresses %v", node.Name, node.Status.Addresses)
|
|
|
|
}
|
|
|
|
By(fmt.Sprintf("block network traffic from node %s to the master", node.Name))
|
2015-07-30 13:29:01 +00:00
|
|
|
|
|
|
|
// TODO marekbiskup 2015-06-19 #10085
|
|
|
|
// The use of MasterName will cause iptables to do a DNS lookup to
|
|
|
|
// resolve the name to an IP address, which will slow down the test
|
|
|
|
// and cause it to fail if DNS is absent or broken.
|
|
|
|
// Use the IP address instead.
|
|
|
|
|
|
|
|
destination := testContext.CloudConfig.MasterName
|
|
|
|
if providerIs("aws") {
|
|
|
|
// This is the (internal) IP address used on AWS for the master
|
|
|
|
// TODO: Use IP address for all clouds?
|
|
|
|
// TODO: Avoid hard-coding this
|
|
|
|
destination = "172.20.0.9"
|
|
|
|
}
|
|
|
|
|
|
|
|
iptablesRule := fmt.Sprintf("OUTPUT --destination %s --jump DROP", destination)
|
2015-06-18 11:18:21 +00:00
|
|
|
defer func() {
|
|
|
|
// This code will execute even if setting the iptables rule failed.
|
|
|
|
// It is on purpose because we may have an error even if the new rule
|
|
|
|
// had been inserted. (yes, we could look at the error code and ssh error
|
|
|
|
// separately, but I prefer to stay on the safe side).
|
|
|
|
|
|
|
|
By(fmt.Sprintf("Unblock network traffic from node %s to the master", node.Name))
|
|
|
|
undropCmd := fmt.Sprintf("sudo iptables --delete %s", iptablesRule)
|
|
|
|
// Undrop command may fail if the rule has never been created.
|
|
|
|
// In such case we just lose 30 seconds, but the cluster is healthy.
|
|
|
|
// But if the rule had been created and removing it failed, the node is broken and
|
|
|
|
// not coming back. Subsequent tests will run or fewer nodes (some of the tests
|
|
|
|
// may fail). Manual intervention is required in such case (recreating the
|
|
|
|
// cluster solves the problem too).
|
|
|
|
err := wait.Poll(time.Millisecond*100, time.Second*30, func() (bool, error) {
|
2015-07-24 11:36:16 +00:00
|
|
|
_, _, code, err := SSH(undropCmd, host, testContext.Provider)
|
2015-06-18 11:18:21 +00:00
|
|
|
if code == 0 && err == nil {
|
|
|
|
return true, nil
|
|
|
|
} else {
|
|
|
|
Logf("Expected 0 exit code and nil error when running '%s' on %s, got %d and %v",
|
|
|
|
undropCmd, node.Name, code, err)
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
Failf("Failed to remove the iptable DROP rule. Manual intervention is "+
|
|
|
|
"required on node %s: remove rule %s, if exists", node.Name, iptablesRule)
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2015-07-27 02:24:26 +00:00
|
|
|
Logf("Waiting %v to ensure node %s is ready before beginning test...", resizeNodeReadyTimeout, node.Name)
|
|
|
|
if !waitForNodeToBe(c, node.Name, true, resizeNodeReadyTimeout) {
|
|
|
|
Failf("Node %s did not become ready within %v", node.Name, resizeNodeReadyTimeout)
|
2015-07-06 12:33:46 +00:00
|
|
|
}
|
2015-07-06 04:38:53 +00:00
|
|
|
|
2015-06-18 11:18:21 +00:00
|
|
|
// The command will block all outgoing network traffic from the node to the master
|
|
|
|
// When multi-master is implemented, this test will have to be improved to block
|
|
|
|
// network traffic to all masters.
|
2015-07-06 12:32:56 +00:00
|
|
|
// We could also block network traffic from the master(s) to this node,
|
2015-06-18 11:18:21 +00:00
|
|
|
// but blocking it one way is sufficient for this test.
|
|
|
|
dropCmd := fmt.Sprintf("sudo iptables --insert %s", iptablesRule)
|
2015-07-24 11:36:16 +00:00
|
|
|
if _, _, code, err := SSH(dropCmd, host, testContext.Provider); code != 0 || err != nil {
|
2015-06-18 11:18:21 +00:00
|
|
|
Failf("Expected 0 exit code and nil error when running %s on %s, got %d and %v",
|
|
|
|
dropCmd, node.Name, code, err)
|
|
|
|
}
|
|
|
|
|
2015-07-27 02:24:26 +00:00
|
|
|
Logf("Waiting %v for node %s to be not ready after simulated network failure", resizeNodeNotReadyTimeout, node.Name)
|
|
|
|
if !waitForNodeToBe(c, node.Name, false, resizeNodeNotReadyTimeout) {
|
|
|
|
Failf("Node %s did not become not-ready within %v", node.Name, resizeNodeNotReadyTimeout)
|
2015-07-06 12:33:46 +00:00
|
|
|
}
|
2015-06-18 11:18:21 +00:00
|
|
|
|
|
|
|
Logf("Waiting for pod %s to be removed", podNameToDisappear)
|
2015-07-06 12:33:46 +00:00
|
|
|
err := waitForRCPodToDisappear(c, ns, rcName, podNameToDisappear)
|
|
|
|
Expect(err).NotTo(HaveOccurred())
|
2015-06-18 11:18:21 +00:00
|
|
|
|
|
|
|
By("verifying whether the pod from the unreachable node is recreated")
|
2015-07-06 12:33:46 +00:00
|
|
|
err = verifyPods(c, ns, rcName, true, replicas)
|
2015-06-18 11:18:21 +00:00
|
|
|
Expect(err).NotTo(HaveOccurred())
|
|
|
|
|
2015-07-06 12:32:56 +00:00
|
|
|
// network traffic is unblocked in a deferred function
|
2015-06-18 11:18:21 +00:00
|
|
|
}
|
|
|
|
|
2015-06-08 08:52:37 +00:00
|
|
|
var _ = Describe("Nodes", func() {
|
2015-04-23 14:28:16 +00:00
|
|
|
var c *client.Client
|
|
|
|
var ns string
|
|
|
|
|
|
|
|
BeforeEach(func() {
|
|
|
|
var err error
|
|
|
|
c, err = loadClient()
|
|
|
|
expectNoError(err)
|
|
|
|
testingNs, err := createTestingNS("resize-nodes", c)
|
|
|
|
ns = testingNs.Name
|
|
|
|
Expect(err).NotTo(HaveOccurred())
|
|
|
|
})
|
|
|
|
|
|
|
|
AfterEach(func() {
|
2015-07-07 11:53:03 +00:00
|
|
|
By("checking whether all nodes are healthy")
|
2015-07-09 18:38:10 +00:00
|
|
|
if err := allNodesReady(c, time.Minute); err != nil {
|
2015-07-07 11:53:03 +00:00
|
|
|
Failf("Not all nodes are ready: %v", err)
|
|
|
|
}
|
2015-04-23 14:28:16 +00:00
|
|
|
By(fmt.Sprintf("destroying namespace for this suite %s", ns))
|
|
|
|
if err := c.Namespaces().Delete(ns); err != nil {
|
|
|
|
Failf("Couldn't delete namespace '%s', %v", ns, err)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
2015-06-08 08:52:37 +00:00
|
|
|
Describe("Resize", func() {
|
2015-06-22 21:14:54 +00:00
|
|
|
var skipped bool
|
|
|
|
|
2015-06-08 08:52:37 +00:00
|
|
|
BeforeEach(func() {
|
2015-06-22 21:14:54 +00:00
|
|
|
skipped = true
|
|
|
|
SkipUnlessProviderIs("gce", "gke", "aws")
|
|
|
|
SkipUnlessNodeCountIsAtLeast(2)
|
|
|
|
skipped = false
|
2015-06-08 08:52:37 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
AfterEach(func() {
|
2015-06-22 21:14:54 +00:00
|
|
|
if skipped {
|
2015-06-08 08:52:37 +00:00
|
|
|
return
|
|
|
|
}
|
2015-06-22 21:14:54 +00:00
|
|
|
|
2015-06-08 08:52:37 +00:00
|
|
|
By("restoring the original node instance group size")
|
2015-06-17 07:13:26 +00:00
|
|
|
if err := resizeGroup(testContext.CloudConfig.NumNodes); err != nil {
|
2015-06-08 08:52:37 +00:00
|
|
|
Failf("Couldn't restore the original node instance group size: %v", err)
|
|
|
|
}
|
2015-06-17 07:13:26 +00:00
|
|
|
if err := waitForGroupSize(testContext.CloudConfig.NumNodes); err != nil {
|
2015-06-08 08:52:37 +00:00
|
|
|
Failf("Couldn't restore the original node instance group size: %v", err)
|
|
|
|
}
|
|
|
|
if err := waitForClusterSize(c, testContext.CloudConfig.NumNodes); err != nil {
|
|
|
|
Failf("Couldn't restore the original cluster size: %v", err)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
2015-06-22 21:14:54 +00:00
|
|
|
It("should be able to delete nodes", func() {
|
2015-06-08 08:52:37 +00:00
|
|
|
// Create a replication controller for a service that serves its hostname.
|
2015-07-06 12:32:56 +00:00
|
|
|
// The source for the Docker container kubernetes/serve_hostname is in contrib/for-demos/serve_hostname
|
2015-06-08 08:52:37 +00:00
|
|
|
name := "my-hostname-delete-node"
|
|
|
|
replicas := testContext.CloudConfig.NumNodes
|
2015-06-17 07:13:26 +00:00
|
|
|
newRCByName(c, ns, name, replicas)
|
|
|
|
err := verifyPods(c, ns, name, true, replicas)
|
2015-06-08 08:52:37 +00:00
|
|
|
Expect(err).NotTo(HaveOccurred())
|
|
|
|
|
|
|
|
By(fmt.Sprintf("decreasing cluster size to %d", replicas-1))
|
2015-06-17 07:13:26 +00:00
|
|
|
err = resizeGroup(replicas - 1)
|
2015-06-08 08:52:37 +00:00
|
|
|
Expect(err).NotTo(HaveOccurred())
|
2015-06-17 07:13:26 +00:00
|
|
|
err = waitForGroupSize(replicas - 1)
|
2015-06-08 08:52:37 +00:00
|
|
|
Expect(err).NotTo(HaveOccurred())
|
|
|
|
err = waitForClusterSize(c, replicas-1)
|
|
|
|
Expect(err).NotTo(HaveOccurred())
|
|
|
|
|
|
|
|
By("verifying whether the pods from the removed node are recreated")
|
2015-06-17 07:13:26 +00:00
|
|
|
err = verifyPods(c, ns, name, true, replicas)
|
2015-06-08 08:52:37 +00:00
|
|
|
Expect(err).NotTo(HaveOccurred())
|
|
|
|
})
|
|
|
|
|
2015-06-22 21:14:54 +00:00
|
|
|
// TODO: Bug here - testName is not correct
|
|
|
|
It("should be able to add nodes", func() {
|
2015-06-08 08:52:37 +00:00
|
|
|
// Create a replication controller for a service that serves its hostname.
|
2015-07-06 12:32:56 +00:00
|
|
|
// The source for the Docker container kubernetes/serve_hostname is in contrib/for-demos/serve_hostname
|
2015-06-08 08:52:37 +00:00
|
|
|
name := "my-hostname-add-node"
|
2015-06-17 07:13:26 +00:00
|
|
|
newSVCByName(c, ns, name)
|
2015-06-08 08:52:37 +00:00
|
|
|
replicas := testContext.CloudConfig.NumNodes
|
2015-06-17 07:13:26 +00:00
|
|
|
newRCByName(c, ns, name, replicas)
|
|
|
|
err := verifyPods(c, ns, name, true, replicas)
|
2015-06-08 08:52:37 +00:00
|
|
|
Expect(err).NotTo(HaveOccurred())
|
|
|
|
|
|
|
|
By(fmt.Sprintf("increasing cluster size to %d", replicas+1))
|
2015-06-17 07:13:26 +00:00
|
|
|
err = resizeGroup(replicas + 1)
|
2015-06-08 08:52:37 +00:00
|
|
|
Expect(err).NotTo(HaveOccurred())
|
2015-06-17 07:13:26 +00:00
|
|
|
err = waitForGroupSize(replicas + 1)
|
2015-06-08 08:52:37 +00:00
|
|
|
Expect(err).NotTo(HaveOccurred())
|
|
|
|
err = waitForClusterSize(c, replicas+1)
|
|
|
|
Expect(err).NotTo(HaveOccurred())
|
|
|
|
|
|
|
|
By(fmt.Sprintf("increasing size of the replication controller to %d and verifying all pods are running", replicas+1))
|
2015-06-17 07:13:26 +00:00
|
|
|
err = resizeRC(c, ns, name, replicas+1)
|
2015-06-08 08:52:37 +00:00
|
|
|
Expect(err).NotTo(HaveOccurred())
|
2015-06-17 07:13:26 +00:00
|
|
|
err = verifyPods(c, ns, name, true, replicas+1)
|
2015-06-08 08:52:37 +00:00
|
|
|
Expect(err).NotTo(HaveOccurred())
|
|
|
|
})
|
2015-04-23 14:28:16 +00:00
|
|
|
})
|
|
|
|
|
2015-06-08 08:52:37 +00:00
|
|
|
Describe("Network", func() {
|
2015-06-22 21:14:54 +00:00
|
|
|
Context("when a minion node becomes unreachable", func() {
|
|
|
|
BeforeEach(func() {
|
|
|
|
SkipUnlessProviderIs("gce", "gke", "aws")
|
|
|
|
SkipUnlessNodeCountIsAtLeast(2)
|
|
|
|
})
|
|
|
|
|
|
|
|
// TODO marekbiskup 2015-06-19 #10085
|
|
|
|
// This test has nothing to do with resizing nodes so it should be moved elsewhere.
|
|
|
|
// Two things are tested here:
|
|
|
|
// 1. pods from a uncontactable nodes are rescheduled
|
|
|
|
// 2. when a node joins the cluster, it can host new pods.
|
|
|
|
// Factor out the cases into two separate tests.
|
|
|
|
It("[replication controller] recreates pods scheduled on the unreachable minion node "+
|
|
|
|
"AND allows scheduling of pods on a minion after it rejoins the cluster", func() {
|
|
|
|
|
|
|
|
// Create a replication controller for a service that serves its hostname.
|
2015-07-06 12:32:56 +00:00
|
|
|
// The source for the Docker container kubernetes/serve_hostname is in contrib/for-demos/serve_hostname
|
2015-06-22 21:14:54 +00:00
|
|
|
name := "my-hostname-net"
|
|
|
|
newSVCByName(c, ns, name)
|
|
|
|
replicas := testContext.CloudConfig.NumNodes
|
|
|
|
newRCByName(c, ns, name, replicas)
|
|
|
|
err := verifyPods(c, ns, name, true, replicas)
|
|
|
|
Expect(err).NotTo(HaveOccurred(), "Each pod should start running and responding")
|
|
|
|
|
|
|
|
By("choose a node with at least one pod - we will block some network traffic on this node")
|
|
|
|
label := labels.SelectorFromSet(labels.Set(map[string]string{"name": name}))
|
|
|
|
pods, err := c.Pods(ns).List(label, fields.Everything()) // list pods after all have been scheduled
|
|
|
|
Expect(err).NotTo(HaveOccurred())
|
|
|
|
nodeName := pods.Items[0].Spec.NodeName
|
2015-06-08 08:52:37 +00:00
|
|
|
|
2015-06-22 21:14:54 +00:00
|
|
|
node, err := c.Nodes().Get(nodeName)
|
|
|
|
Expect(err).NotTo(HaveOccurred())
|
2015-06-08 08:52:37 +00:00
|
|
|
|
2015-06-22 21:14:54 +00:00
|
|
|
By(fmt.Sprintf("block network traffic from node %s", node.Name))
|
|
|
|
performTemporaryNetworkFailure(c, ns, name, replicas, pods.Items[0].Name, node)
|
2015-07-27 02:24:26 +00:00
|
|
|
Logf("Waiting %v for node %s to be ready once temporary network failure ends", resizeNodeReadyTimeout, node.Name)
|
|
|
|
if !waitForNodeToBe(c, node.Name, true, resizeNodeReadyTimeout) {
|
|
|
|
Failf("Node %s did not become ready within %v", node.Name, resizeNodeReadyTimeout)
|
2015-07-06 12:33:46 +00:00
|
|
|
}
|
2015-06-18 11:18:21 +00:00
|
|
|
|
2015-07-03 20:29:14 +00:00
|
|
|
By("verify whether new pods can be created on the re-attached node")
|
2015-06-22 21:14:54 +00:00
|
|
|
// increasing the RC size is not a valid way to test this
|
|
|
|
// since we have no guarantees the pod will be scheduled on our node.
|
|
|
|
additionalPod := "additionalpod"
|
|
|
|
err = newPodOnNode(c, ns, additionalPod, node.Name)
|
2015-06-18 11:18:21 +00:00
|
|
|
Expect(err).NotTo(HaveOccurred())
|
2015-06-22 21:14:54 +00:00
|
|
|
err = verifyPods(c, ns, additionalPod, true, 1)
|
|
|
|
Expect(err).NotTo(HaveOccurred())
|
|
|
|
|
|
|
|
// verify that it is really on the requested node
|
|
|
|
{
|
|
|
|
pod, err := c.Pods(ns).Get(additionalPod)
|
|
|
|
Expect(err).NotTo(HaveOccurred())
|
|
|
|
if pod.Spec.NodeName != node.Name {
|
|
|
|
Logf("Pod %s found on invalid node: %s instead of %s", pod.Spec.NodeName, node.Name)
|
|
|
|
}
|
2015-06-18 11:18:21 +00:00
|
|
|
}
|
2015-06-22 21:14:54 +00:00
|
|
|
})
|
2015-06-08 08:52:37 +00:00
|
|
|
})
|
2015-04-23 14:28:16 +00:00
|
|
|
})
|
|
|
|
})
|