e2e: set expected cpu usage upper bounds

Fail the test if the actual cpu usage is greater than expected.
pull/6/head
Yu-Ju Hong 2015-12-08 08:14:48 -08:00
parent c1a0502487
commit 7e8f4d831d
2 changed files with 112 additions and 27 deletions

View File

@ -18,6 +18,7 @@ package e2e
import (
"fmt"
"strings"
"time"
"k8s.io/kubernetes/pkg/api/unversioned"
@ -31,13 +32,18 @@ import (
const (
// Interval to poll /stats/container on a node
containerStatsPollingPeriod = 10 * time.Second
containerStatsPollingPeriod = 3 * time.Second
// The monitoring time for one test.
monitoringTime = 20 * time.Minute
// The periodic reporting period.
reportingPeriod = 5 * time.Minute
)
type resourceTest struct {
podsPerNode int
limits containersCPUSummary
}
func logPodsOnNodes(c *client.Client, nodeNames []string) {
for _, n := range nodeNames {
podList, err := GetKubeletPods(c, n)
@ -49,7 +55,7 @@ func logPodsOnNodes(c *client.Client, nodeNames []string) {
}
}
func runResourceTrackingTest(framework *Framework, podsPerNode int, nodeNames sets.String, resourceMonitor *resourceMonitor) {
func runResourceTrackingTest(framework *Framework, podsPerNode int, nodeNames sets.String, rm *resourceMonitor, expected map[string]map[float64]float64) {
numNodes := nodeNames.Len()
totalPods := podsPerNode * numNodes
By(fmt.Sprintf("Creating a RC of %d pods and wait until all pods of this RC are running", totalPods))
@ -65,8 +71,8 @@ func runResourceTrackingTest(framework *Framework, podsPerNode int, nodeNames se
})).NotTo(HaveOccurred())
// Log once and flush the stats.
resourceMonitor.LogLatest()
resourceMonitor.Reset()
rm.LogLatest()
rm.Reset()
By("Start monitoring resource usage")
// Periodically dump the cpu summary until the deadline is met.
@ -76,8 +82,6 @@ func runResourceTrackingTest(framework *Framework, podsPerNode int, nodeNames se
// entries if we plan to monitor longer (e.g., 8 hours).
deadline := time.Now().Add(monitoringTime)
for time.Now().Before(deadline) {
Logf("Still running...%v left", deadline.Sub(time.Now()))
time.Sleep(reportingPeriod)
timeLeft := deadline.Sub(time.Now())
Logf("Still running...%v left", timeLeft)
if timeLeft < reportingPeriod {
@ -90,17 +94,54 @@ func runResourceTrackingTest(framework *Framework, podsPerNode int, nodeNames se
By("Reporting overall resource usage")
logPodsOnNodes(framework.Client, nodeNames.List())
resourceMonitor.LogCPUSummary()
resourceMonitor.LogLatest()
rm.LogLatest()
summary := rm.GetCPUSummary()
Logf("%s", rm.FormatCPUSummary(summary))
verifyCPULimits(expected, summary)
By("Deleting the RC")
DeleteRC(framework.Client, framework.Namespace.Name, rcName)
}
func verifyCPULimits(expected containersCPUSummary, actual nodesCPUSummary) {
if expected == nil {
return
}
var errList []string
for nodeName, perNodeSummary := range actual {
var nodeErrs []string
for cName, expectedResult := range expected {
perContainerSummary, ok := perNodeSummary[cName]
if !ok {
nodeErrs = append(nodeErrs, fmt.Sprintf("container %q: missing", cName))
continue
}
for p, expectedValue := range expectedResult {
actualValue, ok := perContainerSummary[p]
if !ok {
nodeErrs = append(nodeErrs, fmt.Sprintf("container %q: missing percentile %v", cName, p))
continue
}
if actualValue > expectedValue {
nodeErrs = append(nodeErrs, fmt.Sprintf("container %q: expected %.0fth%% usage < %.3f; got %.3f",
cName, p*100, expectedValue, actualValue))
}
}
}
if len(nodeErrs) > 0 {
errList = append(errList, fmt.Sprintf("node %v:\n %s", nodeName, strings.Join(nodeErrs, ", ")))
}
}
if len(errList) > 0 {
Failf("CPU usage exceeding limits:\n %s", strings.Join(errList, "\n"))
}
}
var _ = Describe("Kubelet", func() {
var nodeNames sets.String
framework := NewFramework("kubelet-perf")
var resourceMonitor *resourceMonitor
var rm *resourceMonitor
BeforeEach(func() {
nodes, err := framework.Client.Nodes().List(unversioned.ListOptions{})
@ -109,22 +150,35 @@ var _ = Describe("Kubelet", func() {
for _, node := range nodes.Items {
nodeNames.Insert(node.Name)
}
resourceMonitor = newResourceMonitor(framework.Client, targetContainers(), containerStatsPollingPeriod)
resourceMonitor.Start()
rm = newResourceMonitor(framework.Client, targetContainers(), containerStatsPollingPeriod)
rm.Start()
})
AfterEach(func() {
resourceMonitor.Stop()
rm.Stop()
})
Describe("regular resource usage tracking", func() {
density := []int{0, 40}
for i := range density {
podsPerNode := density[i]
rTests := []resourceTest{
{podsPerNode: 0,
limits: containersCPUSummary{
"/kubelet": {0.50: 0.05, 0.95: 0.15},
"/docker-daemon": {0.50: 0.03, 0.95: 0.06},
},
},
{podsPerNode: 40,
limits: containersCPUSummary{
"/kubelet": {0.50: 0.15, 0.95: 0.35},
"/docker-daemon": {0.50: 0.06, 0.95: 0.30},
},
},
}
for _, testArg := range rTests {
itArg := testArg
podsPerNode := itArg.podsPerNode
name := fmt.Sprintf(
"over %v with %d pods per node", monitoringTime, podsPerNode)
"for %d pods per node over %v", podsPerNode, monitoringTime)
It(name, func() {
runResourceTrackingTest(framework, podsPerNode, nodeNames, resourceMonitor)
runResourceTrackingTest(framework, podsPerNode, nodeNames, rm, itArg.limits)
})
}
})
@ -133,9 +187,9 @@ var _ = Describe("Kubelet", func() {
for i := range density {
podsPerNode := density[i]
name := fmt.Sprintf(
"over %v with %d pods per node.", monitoringTime, podsPerNode)
"for %d pods per node over %v", podsPerNode, monitoringTime)
It(name, func() {
runResourceTrackingTest(framework, podsPerNode, nodeNames, resourceMonitor)
runResourceTrackingTest(framework, podsPerNode, nodeNames, rm, nil)
})
}
})

View File

@ -692,7 +692,7 @@ func (r *resourceMonitor) Start() {
}
r.collectors = make(map[string]*resourceCollector, 0)
for _, node := range nodes.Items {
collector := newResourceCollector(r.client, node.Name, r.containers, pollInterval)
collector := newResourceCollector(r.client, node.Name, r.containers, r.pollingInterval)
r.collectors[node.Name] = collector
collector.Start()
}
@ -716,33 +716,64 @@ func (r *resourceMonitor) LogLatest() {
}
}
func (r *resourceMonitor) LogCPUSummary() {
// containersCPUSummary is indexed by the container name with each entry a
// (percentile, value) map.
type containersCPUSummary map[string]map[float64]float64
// nodesCPUSummary is indexed by the node name with each entry a
// containersCPUSummary map.
type nodesCPUSummary map[string]containersCPUSummary
func (r *resourceMonitor) FormatCPUSummary(summary nodesCPUSummary) string {
// Example output for a node (the percentiles may differ):
// CPU usage of containers on node "e2e-test-yjhong-minion-0vj7":
// CPU usage of containers on node "e2e-test-foo-minion-0vj7":
// container 5th% 50th% 90th% 95th%
// "/" 0.051 0.159 0.387 0.455
// "/docker-daemon" 0.000 0.000 0.146 0.166
// "/kubelet" 0.036 0.053 0.091 0.154
// "/system" 0.001 0.001 0.001 0.002
var summaryStrings []string
var header []string
header = append(header, "container")
for _, p := range percentiles {
header = append(header, fmt.Sprintf("%.0fth%%", p*100))
}
for nodeName, collector := range r.collectors {
for nodeName, containers := range summary {
buf := &bytes.Buffer{}
w := tabwriter.NewWriter(buf, 1, 0, 1, ' ', 0)
fmt.Fprintf(w, "%s\n", strings.Join(header, "\t"))
for _, containerName := range targetContainers() {
data := collector.GetBasicCPUStats(containerName)
var s []string
s = append(s, fmt.Sprintf("%q", containerName))
data, ok := containers[containerName]
for _, p := range percentiles {
s = append(s, fmt.Sprintf("%.3f", data[p]))
value := "N/A"
if ok {
value = fmt.Sprintf("%.3f", data[p])
}
s = append(s, value)
}
fmt.Fprintf(w, "%s\n", strings.Join(s, "\t"))
}
w.Flush()
Logf("\nCPU usage of containers on node %q:\n%s", nodeName, buf.String())
summaryStrings = append(summaryStrings, fmt.Sprintf("CPU usage of containers on node %q\n:%s", nodeName, buf.String()))
}
return strings.Join(summaryStrings, "\n")
}
func (r *resourceMonitor) LogCPUSummary() {
summary := r.GetCPUSummary()
Logf(r.FormatCPUSummary(summary))
}
func (r *resourceMonitor) GetCPUSummary() nodesCPUSummary {
result := make(nodesCPUSummary)
for nodeName, collector := range r.collectors {
result[nodeName] = make(containersCPUSummary)
for _, containerName := range targetContainers() {
data := collector.GetBasicCPUStats(containerName)
result[nodeName][containerName] = data
}
}
return result
}