diff --git a/test/e2e/kubelet_perf.go b/test/e2e/kubelet_perf.go index 9643c75a60..19c2cb1201 100644 --- a/test/e2e/kubelet_perf.go +++ b/test/e2e/kubelet_perf.go @@ -18,6 +18,7 @@ package e2e import ( "fmt" + "strings" "time" "k8s.io/kubernetes/pkg/api/unversioned" @@ -31,13 +32,18 @@ import ( const ( // Interval to poll /stats/container on a node - containerStatsPollingPeriod = 10 * time.Second + containerStatsPollingPeriod = 3 * time.Second // The monitoring time for one test. monitoringTime = 20 * time.Minute // The periodic reporting period. reportingPeriod = 5 * time.Minute ) +type resourceTest struct { + podsPerNode int + limits containersCPUSummary +} + func logPodsOnNodes(c *client.Client, nodeNames []string) { for _, n := range nodeNames { podList, err := GetKubeletPods(c, n) @@ -49,7 +55,7 @@ func logPodsOnNodes(c *client.Client, nodeNames []string) { } } -func runResourceTrackingTest(framework *Framework, podsPerNode int, nodeNames sets.String, resourceMonitor *resourceMonitor) { +func runResourceTrackingTest(framework *Framework, podsPerNode int, nodeNames sets.String, rm *resourceMonitor, expected map[string]map[float64]float64) { numNodes := nodeNames.Len() totalPods := podsPerNode * numNodes By(fmt.Sprintf("Creating a RC of %d pods and wait until all pods of this RC are running", totalPods)) @@ -65,8 +71,8 @@ func runResourceTrackingTest(framework *Framework, podsPerNode int, nodeNames se })).NotTo(HaveOccurred()) // Log once and flush the stats. - resourceMonitor.LogLatest() - resourceMonitor.Reset() + rm.LogLatest() + rm.Reset() By("Start monitoring resource usage") // Periodically dump the cpu summary until the deadline is met. @@ -76,8 +82,6 @@ func runResourceTrackingTest(framework *Framework, podsPerNode int, nodeNames se // entries if we plan to monitor longer (e.g., 8 hours). deadline := time.Now().Add(monitoringTime) for time.Now().Before(deadline) { - Logf("Still running...%v left", deadline.Sub(time.Now())) - time.Sleep(reportingPeriod) timeLeft := deadline.Sub(time.Now()) Logf("Still running...%v left", timeLeft) if timeLeft < reportingPeriod { @@ -90,17 +94,54 @@ func runResourceTrackingTest(framework *Framework, podsPerNode int, nodeNames se By("Reporting overall resource usage") logPodsOnNodes(framework.Client, nodeNames.List()) - resourceMonitor.LogCPUSummary() - resourceMonitor.LogLatest() + rm.LogLatest() + + summary := rm.GetCPUSummary() + Logf("%s", rm.FormatCPUSummary(summary)) + verifyCPULimits(expected, summary) By("Deleting the RC") DeleteRC(framework.Client, framework.Namespace.Name, rcName) } +func verifyCPULimits(expected containersCPUSummary, actual nodesCPUSummary) { + if expected == nil { + return + } + var errList []string + for nodeName, perNodeSummary := range actual { + var nodeErrs []string + for cName, expectedResult := range expected { + perContainerSummary, ok := perNodeSummary[cName] + if !ok { + nodeErrs = append(nodeErrs, fmt.Sprintf("container %q: missing", cName)) + continue + } + for p, expectedValue := range expectedResult { + actualValue, ok := perContainerSummary[p] + if !ok { + nodeErrs = append(nodeErrs, fmt.Sprintf("container %q: missing percentile %v", cName, p)) + continue + } + if actualValue > expectedValue { + nodeErrs = append(nodeErrs, fmt.Sprintf("container %q: expected %.0fth%% usage < %.3f; got %.3f", + cName, p*100, expectedValue, actualValue)) + } + } + } + if len(nodeErrs) > 0 { + errList = append(errList, fmt.Sprintf("node %v:\n %s", nodeName, strings.Join(nodeErrs, ", "))) + } + } + if len(errList) > 0 { + Failf("CPU usage exceeding limits:\n %s", strings.Join(errList, "\n")) + } +} + var _ = Describe("Kubelet", func() { var nodeNames sets.String framework := NewFramework("kubelet-perf") - var resourceMonitor *resourceMonitor + var rm *resourceMonitor BeforeEach(func() { nodes, err := framework.Client.Nodes().List(unversioned.ListOptions{}) @@ -109,22 +150,35 @@ var _ = Describe("Kubelet", func() { for _, node := range nodes.Items { nodeNames.Insert(node.Name) } - resourceMonitor = newResourceMonitor(framework.Client, targetContainers(), containerStatsPollingPeriod) - resourceMonitor.Start() + rm = newResourceMonitor(framework.Client, targetContainers(), containerStatsPollingPeriod) + rm.Start() }) AfterEach(func() { - resourceMonitor.Stop() + rm.Stop() }) - Describe("regular resource usage tracking", func() { - density := []int{0, 40} - for i := range density { - podsPerNode := density[i] + rTests := []resourceTest{ + {podsPerNode: 0, + limits: containersCPUSummary{ + "/kubelet": {0.50: 0.05, 0.95: 0.15}, + "/docker-daemon": {0.50: 0.03, 0.95: 0.06}, + }, + }, + {podsPerNode: 40, + limits: containersCPUSummary{ + "/kubelet": {0.50: 0.15, 0.95: 0.35}, + "/docker-daemon": {0.50: 0.06, 0.95: 0.30}, + }, + }, + } + for _, testArg := range rTests { + itArg := testArg + podsPerNode := itArg.podsPerNode name := fmt.Sprintf( - "over %v with %d pods per node", monitoringTime, podsPerNode) + "for %d pods per node over %v", podsPerNode, monitoringTime) It(name, func() { - runResourceTrackingTest(framework, podsPerNode, nodeNames, resourceMonitor) + runResourceTrackingTest(framework, podsPerNode, nodeNames, rm, itArg.limits) }) } }) @@ -133,9 +187,9 @@ var _ = Describe("Kubelet", func() { for i := range density { podsPerNode := density[i] name := fmt.Sprintf( - "over %v with %d pods per node.", monitoringTime, podsPerNode) + "for %d pods per node over %v", podsPerNode, monitoringTime) It(name, func() { - runResourceTrackingTest(framework, podsPerNode, nodeNames, resourceMonitor) + runResourceTrackingTest(framework, podsPerNode, nodeNames, rm, nil) }) } }) diff --git a/test/e2e/kubelet_stats.go b/test/e2e/kubelet_stats.go index 78182c1a94..0bba0ee90b 100644 --- a/test/e2e/kubelet_stats.go +++ b/test/e2e/kubelet_stats.go @@ -692,7 +692,7 @@ func (r *resourceMonitor) Start() { } r.collectors = make(map[string]*resourceCollector, 0) for _, node := range nodes.Items { - collector := newResourceCollector(r.client, node.Name, r.containers, pollInterval) + collector := newResourceCollector(r.client, node.Name, r.containers, r.pollingInterval) r.collectors[node.Name] = collector collector.Start() } @@ -716,33 +716,64 @@ func (r *resourceMonitor) LogLatest() { } } -func (r *resourceMonitor) LogCPUSummary() { +// containersCPUSummary is indexed by the container name with each entry a +// (percentile, value) map. +type containersCPUSummary map[string]map[float64]float64 + +// nodesCPUSummary is indexed by the node name with each entry a +// containersCPUSummary map. +type nodesCPUSummary map[string]containersCPUSummary + +func (r *resourceMonitor) FormatCPUSummary(summary nodesCPUSummary) string { // Example output for a node (the percentiles may differ): - // CPU usage of containers on node "e2e-test-yjhong-minion-0vj7": + // CPU usage of containers on node "e2e-test-foo-minion-0vj7": // container 5th% 50th% 90th% 95th% // "/" 0.051 0.159 0.387 0.455 // "/docker-daemon" 0.000 0.000 0.146 0.166 // "/kubelet" 0.036 0.053 0.091 0.154 // "/system" 0.001 0.001 0.001 0.002 + var summaryStrings []string var header []string header = append(header, "container") for _, p := range percentiles { header = append(header, fmt.Sprintf("%.0fth%%", p*100)) } - for nodeName, collector := range r.collectors { + for nodeName, containers := range summary { buf := &bytes.Buffer{} w := tabwriter.NewWriter(buf, 1, 0, 1, ' ', 0) fmt.Fprintf(w, "%s\n", strings.Join(header, "\t")) for _, containerName := range targetContainers() { - data := collector.GetBasicCPUStats(containerName) var s []string s = append(s, fmt.Sprintf("%q", containerName)) + data, ok := containers[containerName] for _, p := range percentiles { - s = append(s, fmt.Sprintf("%.3f", data[p])) + value := "N/A" + if ok { + value = fmt.Sprintf("%.3f", data[p]) + } + s = append(s, value) } fmt.Fprintf(w, "%s\n", strings.Join(s, "\t")) } w.Flush() - Logf("\nCPU usage of containers on node %q:\n%s", nodeName, buf.String()) + summaryStrings = append(summaryStrings, fmt.Sprintf("CPU usage of containers on node %q\n:%s", nodeName, buf.String())) } + return strings.Join(summaryStrings, "\n") +} + +func (r *resourceMonitor) LogCPUSummary() { + summary := r.GetCPUSummary() + Logf(r.FormatCPUSummary(summary)) +} + +func (r *resourceMonitor) GetCPUSummary() nodesCPUSummary { + result := make(nodesCPUSummary) + for nodeName, collector := range r.collectors { + result[nodeName] = make(containersCPUSummary) + for _, containerName := range targetContainers() { + data := collector.GetBasicCPUStats(containerName) + result[nodeName][containerName] = data + } + } + return result }