Implement e2e test for node out of disk condition.

The ability to SSH into individual nodes, described by api.Node, and running commands on them appears to be useful for other e2e tests too. So, move issueSSHCommand utility function to e2e util file. Also, modify waitForNodeToBe e2e util function to take any node condition. The current implementation only takes api.NodeReady condition into consideration. This change makes the function take any node condition.
2015-10-27 17:05:38 -07:00 · 2015-10-27 17:05:38 -07:00 · 706d768e20
parent a10a2f0e2b
commit 706d768e20
7 changed files with 286 additions and 41 deletions
--- a/hack/jenkins/e2e.sh
+++ b/hack/jenkins/e2e.sh
@ -197,6 +197,7 @@ GCE_PARALLEL_SKIP_TESTS=(
    "Resource\susage\sof\ssystem\scontainers"
    "SchedulerPredicates"
    "resource\susage\stracking"
+    "NodeOutOfDisk"
    "${DISRUPTIVE_TESTS[@]}"
    )

--- a/test/e2e/es_cluster_logging.go
+++ b/test/e2e/es_cluster_logging.go
@ -185,7 +185,7 @@ func ClusterLevelLoggingWithElasticsearch(f *Framework) {
 	// Previous tests may have cause failures of some nodes. Let's skip
 	// 'Not Ready' nodes, just in case (there is no need to fail the test).
 	filterNodes(nodes, func(node api.Node) bool {
-		return isNodeReadySetAsExpected(&node, true)
+		return isNodeConditionSetAsExpected(&node, api.NodeReady, true)
 	})
 	if len(nodes.Items) < 2 {
 		Failf("Less than two nodes were found Ready: %d", len(nodes.Items))
--- a/test/e2e/networking.go
+++ b/test/e2e/networking.go
@ -145,7 +145,7 @@ var _ = Describe("Networking", func() {
 		// previous tests may have cause failures of some nodes. Let's skip
 		// 'Not Ready' nodes, just in case (there is no need to fail the test).
 		filterNodes(nodes, func(node api.Node) bool {
-			return isNodeReadySetAsExpected(&node, true)
+			return isNodeConditionSetAsExpected(&node, api.NodeReady, true)
 		})

 		if len(nodes.Items) == 0 {
--- a/test/e2e/nodeoutofdisk.go
+++ b/test/e2e/nodeoutofdisk.go
@ -0,0 +1,245 @@
+/*
+Copyright 2015 The Kubernetes Authors All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package e2e
+
+import (
+	"encoding/json"
+	"fmt"
+	"time"
+
+	cadvisorapi "github.com/google/cadvisor/info/v1"
+	"k8s.io/kubernetes/pkg/api"
+	"k8s.io/kubernetes/pkg/api/resource"
+	client "k8s.io/kubernetes/pkg/client/unversioned"
+	"k8s.io/kubernetes/pkg/fields"
+	"k8s.io/kubernetes/pkg/labels"
+	"k8s.io/kubernetes/pkg/util/wait"
+
+	. "github.com/onsi/ginkgo"
+	. "github.com/onsi/gomega"
+)
+
+const (
+	mb = 1024 * 1024
+	gb = 1024 * mb
+
+	// TODO(madhusudancs): find a way to query kubelet's disk space manager to obtain this value. 256MB
+	// is the default that is set today. This test might break if the default value changes. This value
+	// can be configured by setting the "low-diskspace-threshold-mb" flag while starting a kubelet.
+	// However, kubelets are started as part of the cluster start up, once, before any e2e test is run,
+	// and remain unchanged until all the tests are run and the cluster is brought down. Changing the
+	// flag value affects all the e2e tests. So we are hard-coding this value for now.
+	lowDiskSpaceThreshold uint64 = 256 * mb
+
+	nodeOODTimeOut = 1 * time.Minute
+
+	numNodeOODPods = 3
+)
+
+// Plan:
+// 1. Fill disk space on all nodes except one. One node is left out so that we can schedule pods
+//    on that node. Arbitrarily choose that node to be node with index 0.
+// 2. Get the CPU capacity on unfilled node.
+// 3. Divide the CPU capacity into one less than the number of pods we want to schedule. We want
+//    to schedule 3 pods, so divide CPU capacity by 2.
+// 4. Request the divided capacity for each pod.
+// 5. Observe that 2 of the pods schedule onto the node whose disk is not full, and the remaining
+//    pod stays pending and does not schedule onto the nodes whose disks are full nor the node
+//    with the other two pods, since there is not enough free CPU capacity there.
+// 6. Recover disk space from one of the nodes whose disk space was previously filled. Arbritrarily
+//    choose that node to be node with index 1.
+// 7. Observe that the pod in pending status schedules on that node.
+//
+var _ = Describe("NodeOutOfDisk", func() {
+	var c *client.Client
+	var unfilledNodeName, recoveredNodeName string
+	framework := Framework{BaseName: "node-outofdisk"}
+
+	BeforeEach(func() {
+		framework.beforeEach()
+		c = framework.Client
+
+		nodelist, err := listNodes(c, labels.Everything(), fields.Everything())
+		expectNoError(err, "Error retrieving nodes")
+		Expect(len(nodelist.Items)).To(BeNumerically(">", 1))
+
+		unfilledNodeName = nodelist.Items[0].Name
+		for _, node := range nodelist.Items[1:] {
+			fillDiskSpace(c, &node)
+		}
+	})
+
+	AfterEach(func() {
+		defer framework.afterEach()
+
+		nodelist, err := listNodes(c, labels.Everything(), fields.Everything())
+		expectNoError(err, "Error retrieving nodes")
+		Expect(len(nodelist.Items)).ToNot(BeZero())
+		for _, node := range nodelist.Items {
+			if unfilledNodeName == node.Name || recoveredNodeName == node.Name {
+				continue
+			}
+			recoverDiskSpace(c, &node)
+		}
+	})
+
+	It("runs out of disk space", func() {
+		unfilledNode, err := c.Nodes().Get(unfilledNodeName)
+		expectNoError(err)
+
+		By(fmt.Sprintf("Get CPU capacity on node %s", unfilledNode.Name))
+
+		milliCPU := unfilledNode.Status.Capacity.Cpu().MilliValue()
+		// Per pod CPU should be just enough to fit only (numNodeOODPods - 1) pods on the
+		// given node. We compute this value by dividing the available CPU capacity on the given
+		// node by (numNodeOODPods - 1) and subtracting ϵ from it.
+		podCPU := (milliCPU / (numNodeOODPods - 1)) - (milliCPU / 5)
+
+		ns := framework.Namespace.Name
+		podClient := c.Pods(ns)
+
+		By("Creating pods and waiting for all but one pods to be scheduled")
+
+		for i := 0; i < numNodeOODPods-1; i++ {
+			name := fmt.Sprintf("pod-node-outofdisk-%d", i)
+			createOutOfDiskPod(c, ns, name, podCPU)
+
+			expectNoError(framework.WaitForPodRunning(name))
+			pod, err := podClient.Get(name)
+			expectNoError(err)
+			Expect(pod.Spec.NodeName).To(Equal(unfilledNodeName))
+		}
+
+		pendingPodName := fmt.Sprintf("pod-node-outofdisk-%d", numNodeOODPods-1)
+		createOutOfDiskPod(c, ns, pendingPodName, podCPU)
+
+		By(fmt.Sprintf("Finding a failed scheduler event for pod %s", pendingPodName))
+		wait.Poll(2*time.Second, 5*time.Minute, func() (bool, error) {
+			schedEvents, err := c.Events(ns).List(
+				labels.Everything(),
+				fields.Set{
+					"involvedObject.kind":      "Pod",
+					"involvedObject.name":      pendingPodName,
+					"involvedObject.namespace": ns,
+					"source":                   "scheduler",
+					"reason":                   "FailedScheduling",
+				}.AsSelector())
+			expectNoError(err)
+
+			if len(schedEvents.Items) > 0 {
+				return true, nil
+			} else {
+				return false, nil
+			}
+		})
+
+		nodelist, err := listNodes(c, labels.Everything(), fields.Everything())
+		expectNoError(err, "Error retrieving nodes")
+		Expect(len(nodelist.Items)).To(BeNumerically(">", 1))
+
+		nodeToRecover := nodelist.Items[1]
+		Expect(nodeToRecover.Name).ToNot(Equal(unfilledNodeName))
+
+		By(fmt.Sprintf("Recovering disk space on node %s", nodeToRecover.Name))
+		recoverDiskSpace(c, &nodeToRecover)
+		recoveredNodeName = nodeToRecover.Name
+
+		By(fmt.Sprintf("Verifying that pod %s schedules on node %s", pendingPodName, recoveredNodeName))
+		expectNoError(framework.WaitForPodRunning(pendingPodName))
+		pendingPod, err := podClient.Get(pendingPodName)
+		expectNoError(err)
+		Expect(pendingPod.Spec.NodeName).To(Equal(recoveredNodeName))
+	})
+})
+
+// createOutOfDiskPod creates a pod in the given namespace with the requested amount of CPU.
+func createOutOfDiskPod(c *client.Client, ns, name string, milliCPU int64) {
+	podClient := c.Pods(ns)
+
+	pod := &api.Pod{
+		ObjectMeta: api.ObjectMeta{
+			Name: name,
+		},
+		Spec: api.PodSpec{
+			Containers: []api.Container{
+				{
+					Name:  "pause",
+					Image: "beta.gcr.io/google_containers/pause:2.0",
+					Resources: api.ResourceRequirements{
+						Requests: api.ResourceList{
+							// Request enough CPU to fit only two pods on a given node.
+							api.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
+						},
+					},
+				},
+			},
+		},
+	}
+
+	_, err := podClient.Create(pod)
+	expectNoError(err)
+}
+
+// availSize returns the available disk space on a given node by querying node stats which
+// is in turn obtained internally from cadvisor.
+func availSize(c *client.Client, node *api.Node) (uint64, error) {
+	statsResource := fmt.Sprintf("api/v1/proxy/nodes/%s/stats/", node.Name)
+	Logf("Querying stats for node %s using url %s", node.Name, statsResource)
+	res, err := c.Get().AbsPath(statsResource).Timeout(timeout).Do().Raw()
+	if err != nil {
+		return 0, fmt.Errorf("error querying cAdvisor API: %v", err)
+	}
+	ci := cadvisorapi.ContainerInfo{}
+	err = json.Unmarshal(res, &ci)
+	if err != nil {
+		return 0, fmt.Errorf("couldn't unmarshal container info: %v", err)
+	}
+	return ci.Stats[len(ci.Stats)-1].Filesystem[0].Available, nil
+}
+
+// fillDiskSpace fills the available disk space on a given node by creating a large file. The disk
+// space on the node is filled in such a way that the available space after filling the disk is just
+// below the lowDiskSpaceThreshold mark.
+func fillDiskSpace(c *client.Client, node *api.Node) {
+	avail, err := availSize(c, node)
+	expectNoError(err, "Node %s: couldn't obtain available disk size %v", node.Name, err)
+
+	fillSize := (avail - lowDiskSpaceThreshold + (100 * mb))
+
+	Logf("Node %s: disk space available %d bytes", node.Name, avail)
+	By(fmt.Sprintf("Node %s: creating a file of size %d bytes to fill the available disk space", node.Name, fillSize))
+
+	cmd := fmt.Sprintf("fallocate -l %d test.img", fillSize)
+	expectNoError(issueSSHCommand(cmd, testContext.Provider, node))
+
+	ood := waitForNodeToBe(c, node.Name, api.NodeOutOfDisk, true, nodeOODTimeOut)
+	Expect(ood).To(BeTrue(), "Node %s did not run out of disk within %v", node.Name, nodeOODTimeOut)
+
+	avail, err = availSize(c, node)
+	Logf("Node %s: disk space available %d bytes", node.Name, avail)
+	Expect(avail < lowDiskSpaceThreshold).To(BeTrue())
+}
+
+// recoverDiskSpace recovers disk space, filled by creating a large file, on a given node.
+func recoverDiskSpace(c *client.Client, node *api.Node) {
+	By(fmt.Sprintf("Recovering disk space on node %s", node.Name))
+	cmd := "rm -f test.img"
+	expectNoError(issueSSHCommand(cmd, testContext.Provider, node))
+
+	ood := waitForNodeToBe(c, node.Name, api.NodeOutOfDisk, false, nodeOODTimeOut)
+	Expect(ood).To(BeTrue(), "Node %s's out of disk condition status did not change to false within %v", node.Name, nodeOODTimeOut)
+}
--- a/test/e2e/reboot.go
+++ b/test/e2e/reboot.go
@ -149,25 +149,6 @@ func testReboot(c *client.Client, rebootCmd string) {
 	}
 }

-func issueSSHCommand(node *api.Node, provider, cmd string) error {
-	Logf("Getting external IP address for %s", node.Name)
-	host := ""
-	for _, a := range node.Status.Addresses {
-		if a.Type == api.NodeExternalIP {
-			host = a.Address + ":22"
-			break
-		}
-	}
-	if host == "" {
-		return fmt.Errorf("couldn't find external IP address for node %s", node.Name)
-	}
-	Logf("Calling %s on %s", cmd, node.Name)
-	if _, _, code, err := SSH(cmd, host, provider); code != 0 || err != nil {
-		return fmt.Errorf("when running %s on %s, got %d and %v", cmd, node.Name, code, err)
-	}
-	return nil
-}
-
 // rebootNode takes node name on provider through the following steps using c:
 //  - ensures the node is ready
 //  - ensures all pods on the node are running and ready
@ -222,7 +203,7 @@ func rebootNode(c *client.Client, provider, name, rebootCmd string) bool {
 	}

 	// Reboot the node.
-	if err = issueSSHCommand(node, provider, rebootCmd); err != nil {
+	if err = issueSSHCommand(rebootCmd, provider, node); err != nil {
 		Logf("Error while issuing ssh command: %v", err)
 		return false
 	}
--- a/test/e2e/resize_nodes.go
+++ b/test/e2e/resize_nodes.go
@ -354,7 +354,7 @@ func performTemporaryNetworkFailure(c *client.Client, ns, rcName string, replica
 	}()

 	Logf("Waiting %v to ensure node %s is ready before beginning test...", resizeNodeReadyTimeout, node.Name)
-	if !waitForNodeToBe(c, node.Name, true, resizeNodeReadyTimeout) {
+	if !waitForNodeToBe(c, node.Name, api.NodeReady, true, resizeNodeReadyTimeout) {
 		Failf("Node %s did not become ready within %v", node.Name, resizeNodeReadyTimeout)
 	}

@ -370,7 +370,7 @@ func performTemporaryNetworkFailure(c *client.Client, ns, rcName string, replica
 	}

 	Logf("Waiting %v for node %s to be not ready after simulated network failure", resizeNodeNotReadyTimeout, node.Name)
-	if !waitForNodeToBe(c, node.Name, false, resizeNodeNotReadyTimeout) {
+	if !waitForNodeToBe(c, node.Name, api.NodeReady, false, resizeNodeNotReadyTimeout) {
 		Failf("Node %s did not become not-ready within %v", node.Name, resizeNodeNotReadyTimeout)
 	}

--- a/test/e2e/util.go
+++ b/test/e2e/util.go
@ -1953,6 +1953,25 @@ func sshCore(cmd, host, provider string, verbose bool) (string, string, int, err
 	return stdout, stderr, code, err
 }

+func issueSSHCommand(cmd, provider string, node *api.Node) error {
+	Logf("Getting external IP address for %s", node.Name)
+	host := ""
+	for _, a := range node.Status.Addresses {
+		if a.Type == api.NodeExternalIP {
+			host = a.Address + ":22"
+			break
+		}
+	}
+	if host == "" {
+		return fmt.Errorf("couldn't find external IP address for node %s", node.Name)
+	}
+	Logf("Calling %s on %s", cmd, node.Name)
+	if _, _, code, err := SSH(cmd, host, provider); code != 0 || err != nil {
+		return fmt.Errorf("when running %s on %s, got %d and %v", cmd, node.Name, code, err)
+	}
+	return nil
+}
+
 // NewHostExecPodSpec returns the pod spec of hostexec pod
 func NewHostExecPodSpec(ns, name string) *api.Pod {
 	pod := &api.Pod{
@ -2055,38 +2074,37 @@ func checkPodsRunningReady(c *client.Client, ns string, podNames []string, timeo

 // waitForNodeToBeReady returns whether node name is ready within timeout.
 func waitForNodeToBeReady(c *client.Client, name string, timeout time.Duration) bool {
-	return waitForNodeToBe(c, name, true, timeout)
+	return waitForNodeToBe(c, name, api.NodeReady, true, timeout)
 }

 // waitForNodeToBeNotReady returns whether node name is not ready (i.e. the
 // readiness condition is anything but ready, e.g false or unknown) within
 // timeout.
 func waitForNodeToBeNotReady(c *client.Client, name string, timeout time.Duration) bool {
-	return waitForNodeToBe(c, name, false, timeout)
+	return waitForNodeToBe(c, name, api.NodeReady, false, timeout)
 }

-func isNodeReadySetAsExpected(node *api.Node, wantReady bool) bool {
+func isNodeConditionSetAsExpected(node *api.Node, conditionType api.NodeConditionType, wantTrue bool) bool {
 	// Check the node readiness condition (logging all).
 	for i, cond := range node.Status.Conditions {
 		Logf("Node %s condition %d/%d: type: %v, status: %v, reason: %q, message: %q, last transition time: %v",
 			node.Name, i+1, len(node.Status.Conditions), cond.Type, cond.Status,
 			cond.Reason, cond.Message, cond.LastTransitionTime)
-		// Ensure that the condition type is readiness and the status
-		// matches as desired.
-		if cond.Type == api.NodeReady && (cond.Status == api.ConditionTrue) == wantReady {
-			Logf("Successfully found node %s readiness to be %t", node.Name, wantReady)
+		// Ensure that the condition type and the status matches as desired.
+		if cond.Type == conditionType && (cond.Status == api.ConditionTrue) == wantTrue {
+			Logf("Successfully found condition %s of node %s to be %t", conditionType, node.Name, wantTrue)
 			return true
 		}
 	}
 	return false
 }

-// waitForNodeToBe returns whether node name's readiness state matches wantReady
-// within timeout. If wantReady is true, it will ensure the node is ready; if
-// it's false, it ensures the node is in any state other than ready (e.g. not
-// ready or unknown).
-func waitForNodeToBe(c *client.Client, name string, wantReady bool, timeout time.Duration) bool {
-	Logf("Waiting up to %v for node %s readiness to be %t", timeout, name, wantReady)
+// waitForNodeToBe returns whether node "name's" condition state matches wantTrue
+// within timeout. If wantTrue is true, it will ensure the node condition status
+// is ConditionTrue; if it's false, it ensures the node condition is in any state
+// other than ConditionTrue (e.g. not true or unknown).
+func waitForNodeToBe(c *client.Client, name string, conditionType api.NodeConditionType, wantTrue bool, timeout time.Duration) bool {
+	Logf("Waiting up to %v for node %s condition %s to be %t", timeout, name, conditionType, wantTrue)
 	for start := time.Now(); time.Since(start) < timeout; time.Sleep(poll) {
 		node, err := c.Nodes().Get(name)
 		if err != nil {
@ -2094,11 +2112,11 @@ func waitForNodeToBe(c *client.Client, name string, wantReady bool, timeout time
 			continue
 		}

-		if isNodeReadySetAsExpected(node, wantReady) {
+		if isNodeConditionSetAsExpected(node, conditionType, wantTrue) {
 			return true
 		}
 	}
-	Logf("Node %s didn't reach desired readiness (%t) within %v", name, wantReady, timeout)
+	Logf("Node %s didn't reach desired %s condition status (%t) within %v", name, conditionType, wantTrue, timeout)
 	return false
 }

@ -2114,7 +2132,7 @@ func allNodesReady(c *client.Client, timeout time.Duration) error {
 			return false, err
 		}
 		for _, node := range nodes.Items {
-			if !isNodeReadySetAsExpected(&node, true) {
+			if !isNodeConditionSetAsExpected(&node, api.NodeReady, true) {
 				notReady = append(notReady, node)
 			}
 		}
@ -2215,7 +2233,7 @@ func waitForClusterSize(c *client.Client, size int, timeout time.Duration) error

 		// Filter out not-ready nodes.
 		filterNodes(nodes, func(node api.Node) bool {
-			return isNodeReadySetAsExpected(&node, true)
+			return isNodeConditionSetAsExpected(&node, api.NodeReady, true)
 		})
 		numReady := len(nodes.Items)