mirror of https://github.com/k3s-io/k3s
Merge pull request #9392 from jszczepkowski/e2e-net
Added e2e test case for network partition.pull/6/head
commit
2bb0fc00e5
|
@ -207,7 +207,23 @@ func verifyPodsResponding(c *client.Client, ns, name string, pods *api.PodList)
|
|||
return wait.Poll(retryInterval, retryTimeout, podResponseChecker{c, ns, label, name, pods}.checkAllResponses)
|
||||
}
|
||||
|
||||
var _ = Describe("ResizeNodes", func() {
|
||||
func waitForPodsCreatedRunningResponding(c *client.Client, ns, name string, replicas int) error {
|
||||
pods, err := waitForPodsCreated(c, ns, name, replicas)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
e := waitForPodsRunning(c, pods)
|
||||
if len(e) > 0 {
|
||||
return fmt.Errorf("Failed to wait for pods running: %v", e)
|
||||
}
|
||||
err = verifyPodsResponding(c, ns, name, pods)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var _ = Describe("Nodes", func() {
|
||||
supportedProviders := []string{"gce"}
|
||||
var testName string
|
||||
var c *client.Client
|
||||
|
@ -223,116 +239,173 @@ var _ = Describe("ResizeNodes", func() {
|
|||
})
|
||||
|
||||
AfterEach(func() {
|
||||
if !providerIs(supportedProviders...) {
|
||||
return
|
||||
}
|
||||
By(fmt.Sprintf("destroying namespace for this suite %s", ns))
|
||||
if err := c.Namespaces().Delete(ns); err != nil {
|
||||
Failf("Couldn't delete namespace '%s', %v", ns, err)
|
||||
}
|
||||
By("restoring the original node instance group size")
|
||||
if err := resizeNodeInstanceGroup(testContext.CloudConfig.NumNodes); err != nil {
|
||||
Failf("Couldn't restore the original node instance group size: %v", err)
|
||||
}
|
||||
if err := waitForNodeInstanceGroupSize(testContext.CloudConfig.NumNodes); err != nil {
|
||||
Failf("Couldn't restore the original node instance group size: %v", err)
|
||||
}
|
||||
if err := waitForClusterSize(c, testContext.CloudConfig.NumNodes); err != nil {
|
||||
Failf("Couldn't restore the original cluster size: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
testName = "should be able to delete nodes."
|
||||
It(testName, func() {
|
||||
Logf("starting test %s", testName)
|
||||
Describe("Resize", func() {
|
||||
BeforeEach(func() {
|
||||
if !providerIs(supportedProviders...) {
|
||||
Failf("Nodes.Resize test is only supported for providers %v (not %s). You can avoid this failure by using ginkgo.skip=Nodes.Resize in your environment.",
|
||||
supportedProviders, testContext.Provider)
|
||||
}
|
||||
})
|
||||
|
||||
if !providerIs(supportedProviders...) {
|
||||
By(fmt.Sprintf("Skipping %s test, which is only supported for providers %v (not %s)",
|
||||
testName, supportedProviders, testContext.Provider))
|
||||
return
|
||||
}
|
||||
AfterEach(func() {
|
||||
if !providerIs(supportedProviders...) {
|
||||
return
|
||||
}
|
||||
By("restoring the original node instance group size")
|
||||
if err := resizeNodeInstanceGroup(testContext.CloudConfig.NumNodes); err != nil {
|
||||
Failf("Couldn't restore the original node instance group size: %v", err)
|
||||
}
|
||||
if err := waitForNodeInstanceGroupSize(testContext.CloudConfig.NumNodes); err != nil {
|
||||
Failf("Couldn't restore the original node instance group size: %v", err)
|
||||
}
|
||||
if err := waitForClusterSize(c, testContext.CloudConfig.NumNodes); err != nil {
|
||||
Failf("Couldn't restore the original cluster size: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
if testContext.CloudConfig.NumNodes < 2 {
|
||||
By(fmt.Sprintf("skipping %s test, which requires at lease 2 nodes (not %d)",
|
||||
testName, testContext.CloudConfig.NumNodes))
|
||||
return
|
||||
}
|
||||
testName = "should be able to delete nodes."
|
||||
It(testName, func() {
|
||||
Logf("starting test %s", testName)
|
||||
|
||||
// Create a replication controller for a service that serves its hostname.
|
||||
// The source for the Docker containter kubernetes/serve_hostname is in contrib/for-demos/serve_hostname
|
||||
name := "my-hostname-delete-node-" + string(util.NewUUID())
|
||||
replicas := testContext.CloudConfig.NumNodes
|
||||
createServeHostnameReplicationController(c, ns, name, replicas)
|
||||
pods, err := waitForPodsCreated(c, ns, name, replicas)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
e := waitForPodsRunning(c, pods)
|
||||
if len(e) > 0 {
|
||||
Failf("Failed to wait for pods running: %v", e)
|
||||
}
|
||||
err = verifyPodsResponding(c, ns, name, pods)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
if testContext.CloudConfig.NumNodes < 2 {
|
||||
Failf("Failing test %s as it requires at lease 2 nodes (not %d)", testName, testContext.CloudConfig.NumNodes)
|
||||
return
|
||||
}
|
||||
|
||||
By(fmt.Sprintf("decreasing cluster size to %d", replicas-1))
|
||||
err = resizeNodeInstanceGroup(replicas - 1)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = waitForNodeInstanceGroupSize(replicas - 1)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = waitForClusterSize(c, replicas-1)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
// Create a replication controller for a service that serves its hostname.
|
||||
// The source for the Docker containter kubernetes/serve_hostname is in contrib/for-demos/serve_hostname
|
||||
name := "my-hostname-delete-node"
|
||||
replicas := testContext.CloudConfig.NumNodes
|
||||
createServeHostnameReplicationController(c, ns, name, replicas)
|
||||
err := waitForPodsCreatedRunningResponding(c, ns, name, replicas)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
By("verifying whether the pods from the removed node are recreated")
|
||||
pods, err = waitForPodsCreated(c, ns, name, replicas)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
e = waitForPodsRunning(c, pods)
|
||||
if len(e) > 0 {
|
||||
Failf("Failed to wait for pods running: %v", e)
|
||||
}
|
||||
err = verifyPodsResponding(c, ns, name, pods)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
By(fmt.Sprintf("decreasing cluster size to %d", replicas-1))
|
||||
err = resizeNodeInstanceGroup(replicas - 1)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = waitForNodeInstanceGroupSize(replicas - 1)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = waitForClusterSize(c, replicas-1)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
By("verifying whether the pods from the removed node are recreated")
|
||||
err = waitForPodsCreatedRunningResponding(c, ns, name, replicas)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
})
|
||||
|
||||
testName = "should be able to add nodes."
|
||||
It(testName, func() {
|
||||
Logf("starting test %s", testName)
|
||||
|
||||
if testContext.CloudConfig.NumNodes < 2 {
|
||||
Failf("Failing test %s as it requires at lease 2 nodes (not %d)", testName, testContext.CloudConfig.NumNodes)
|
||||
return
|
||||
}
|
||||
|
||||
// Create a replication controller for a service that serves its hostname.
|
||||
// The source for the Docker containter kubernetes/serve_hostname is in contrib/for-demos/serve_hostname
|
||||
name := "my-hostname-add-node"
|
||||
createServiceWithNameSelector(c, ns, name)
|
||||
replicas := testContext.CloudConfig.NumNodes
|
||||
createServeHostnameReplicationController(c, ns, name, replicas)
|
||||
err := waitForPodsCreatedRunningResponding(c, ns, name, replicas)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
By(fmt.Sprintf("increasing cluster size to %d", replicas+1))
|
||||
err = resizeNodeInstanceGroup(replicas + 1)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = waitForNodeInstanceGroupSize(replicas + 1)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = waitForClusterSize(c, replicas+1)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
By(fmt.Sprintf("increasing size of the replication controller to %d and verifying all pods are running", replicas+1))
|
||||
err = resizeReplicationController(c, ns, name, replicas+1)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = waitForPodsCreatedRunningResponding(c, ns, name, replicas+1)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
})
|
||||
})
|
||||
|
||||
testName = "should be able to add nodes."
|
||||
It(testName, func() {
|
||||
Logf("starting test %s", testName)
|
||||
Describe("Network", func() {
|
||||
BeforeEach(func() {
|
||||
if !providerIs(supportedProviders...) {
|
||||
Failf("Nodes.Network test is only supported for providers %v (not %s). You can avoid this failure by using ginkgo.skip=Nodes.Network in your environment.",
|
||||
supportedProviders, testContext.Provider)
|
||||
}
|
||||
})
|
||||
|
||||
if !providerIs(supportedProviders...) {
|
||||
By(fmt.Sprintf("Skipping %s test, which is only supported for providers %v (not %s)",
|
||||
testName, supportedProviders, testContext.Provider))
|
||||
return
|
||||
}
|
||||
testName = "should survive network partition."
|
||||
It(testName, func() {
|
||||
if testContext.CloudConfig.NumNodes < 2 {
|
||||
By(fmt.Sprintf("skipping %s test, which requires at lease 2 nodes (not %d)",
|
||||
testName, testContext.CloudConfig.NumNodes))
|
||||
return
|
||||
}
|
||||
|
||||
// Create a replication controller for a service that serves its hostname.
|
||||
// The source for the Docker containter kubernetes/serve_hostname is in contrib/for-demos/serve_hostname
|
||||
name := "my-hostname-add-node-" + string(util.NewUUID())
|
||||
createServiceWithNameSelector(c, ns, name)
|
||||
replicas := testContext.CloudConfig.NumNodes
|
||||
createServeHostnameReplicationController(c, ns, name, replicas)
|
||||
pods, err := waitForPodsCreated(c, ns, name, replicas)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
e := waitForPodsRunning(c, pods)
|
||||
if len(e) > 0 {
|
||||
Failf("Failed to wait for pods running: %v", e)
|
||||
}
|
||||
err = verifyPodsResponding(c, ns, name, pods)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
// Create a replication controller for a service that serves its hostname.
|
||||
// The source for the Docker containter kubernetes/serve_hostname is in contrib/for-demos/serve_hostname
|
||||
name := "my-hostname-net"
|
||||
createServiceWithNameSelector(c, ns, name)
|
||||
replicas := testContext.CloudConfig.NumNodes
|
||||
createServeHostnameReplicationController(c, ns, name, replicas)
|
||||
err := waitForPodsCreatedRunningResponding(c, ns, name, replicas)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
By(fmt.Sprintf("increasing cluster size to %d", replicas+1))
|
||||
err = resizeNodeInstanceGroup(replicas + 1)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = waitForNodeInstanceGroupSize(replicas + 1)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = waitForClusterSize(c, replicas+1)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
By("cause network partition on one node")
|
||||
nodelist, err := c.Nodes().List(labels.Everything(), fields.Everything())
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
node := nodelist.Items[0]
|
||||
pod, err := waitForRCPodOnNode(c, ns, name, node.Name)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Logf("Getting external IP address for %s", name)
|
||||
host := ""
|
||||
for _, a := range node.Status.Addresses {
|
||||
if a.Type == api.NodeExternalIP {
|
||||
host = a.Address + ":22"
|
||||
break
|
||||
}
|
||||
}
|
||||
Logf("Setting network partition on %s", node.Name)
|
||||
dropCmd := fmt.Sprintf("sudo iptables -I OUTPUT 1 -d %s -j DROP", testContext.CloudConfig.MasterName)
|
||||
if _, _, code, err := SSH(dropCmd, host, testContext.Provider); code != 0 || err != nil {
|
||||
Failf("Expected 0 exit code and nil error when running %s on %s, got %d and %v",
|
||||
dropCmd, node, code, err)
|
||||
}
|
||||
|
||||
By(fmt.Sprintf("increasing size of the replication controller to %d and verifying all pods are running", replicas+1))
|
||||
resizeReplicationController(c, ns, name, replicas+1)
|
||||
pods, err = waitForPodsCreated(c, ns, name, replicas+1)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
e = waitForPodsRunning(c, pods)
|
||||
if len(e) > 0 {
|
||||
Failf("Failed to wait for pods running: %v", e)
|
||||
}
|
||||
err = verifyPodsResponding(c, ns, name, pods)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Logf("Waiting for node %s to be not ready", node.Name)
|
||||
waitForNodeToBe(c, node.Name, false, 2*time.Minute)
|
||||
|
||||
Logf("Waiting for pod %s to be removed", pod.Name)
|
||||
waitForRCPodToDisappear(c, ns, name, pod.Name)
|
||||
|
||||
By("verifying whether the pod from the partitioned node is recreated")
|
||||
err = waitForPodsCreatedRunningResponding(c, ns, name, replicas)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
By("remove network partition")
|
||||
undropCmd := "sudo iptables --delete OUTPUT 1"
|
||||
if _, _, code, err := SSH(undropCmd, host, testContext.Provider); code != 0 || err != nil {
|
||||
Failf("Expected 0 exit code and nil error when running %s on %s, got %d and %v",
|
||||
undropCmd, node, code, err)
|
||||
}
|
||||
|
||||
Logf("Waiting for node %s to be ready", node.Name)
|
||||
waitForNodeToBe(c, node.Name, true, 2*time.Minute)
|
||||
|
||||
By("verify wheter new pods can be created on the re-attached node")
|
||||
err = resizeReplicationController(c, ns, name, replicas+1)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
err = waitForPodsCreatedRunningResponding(c, ns, name, replicas+1)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
_, err = waitForRCPodOnNode(c, ns, name, node.Name)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
})
|
||||
})
|
||||
})
|
||||
|
|
|
@ -42,6 +42,7 @@ import (
|
|||
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/runtime"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/util/wait"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/watch"
|
||||
|
||||
"code.google.com/p/go-uuid/uuid"
|
||||
|
@ -397,6 +398,54 @@ func waitForPodSuccess(c *client.Client, podName string, contName string) error
|
|||
return waitForPodSuccessInNamespace(c, podName, contName, api.NamespaceDefault)
|
||||
}
|
||||
|
||||
// waitForRCPodOnNode returns the pod from the given replication controller (decribed by rcName) which is scheduled on the given node.
|
||||
// In case of failure or too long waiting time, an error is returned.
|
||||
func waitForRCPodOnNode(c *client.Client, ns, rcName, node string) (*api.Pod, error) {
|
||||
label := labels.SelectorFromSet(labels.Set(map[string]string{"name": rcName}))
|
||||
var p *api.Pod = nil
|
||||
err := wait.Poll(10*time.Second, 5*time.Minute, func() (bool, error) {
|
||||
Logf("Waiting for pod %s to appear on node %s", rcName, node)
|
||||
pods, err := c.Pods(ns).List(label, fields.Everything())
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
for _, pod := range pods.Items {
|
||||
if pod.Spec.NodeName == node {
|
||||
Logf("Pod %s found on node %s", pod.Name, node)
|
||||
p = &pod
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
})
|
||||
return p, err
|
||||
}
|
||||
|
||||
// waitForRCPodOnNode returns nil if the pod from the given replication controller (decribed by rcName) no longer exists.
|
||||
// In case of failure or too long waiting time, an error is returned.
|
||||
func waitForRCPodToDisappear(c *client.Client, ns, rcName, podName string) error {
|
||||
label := labels.SelectorFromSet(labels.Set(map[string]string{"name": rcName}))
|
||||
return wait.Poll(20*time.Second, 5*time.Minute, func() (bool, error) {
|
||||
Logf("Waiting for pod %s to disappear", podName)
|
||||
pods, err := c.Pods(ns).List(label, fields.Everything())
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
found := false
|
||||
for _, pod := range pods.Items {
|
||||
if pod.Name == podName {
|
||||
Logf("Pod %s still exists", podName)
|
||||
found = true
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
Logf("Pod %s no longer exists", podName)
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
})
|
||||
}
|
||||
|
||||
// Context for checking pods responses by issuing GETs to them and verifying if the answer with pod name.
|
||||
type podResponseChecker struct {
|
||||
c *client.Client
|
||||
|
|
Loading…
Reference in New Issue