Split kubelet e2e resource usage tracking tests

Some tests in this test suite expects --max-pods (i.e. the maximum pod capacity
on kubelet) to be greater than default, which applies only to the GCE test
environment. Split the tests into two sets so that we can better categorize
the tests in the jenkins setup, without making the test itself aware of the
environment.
pull/6/head
Yu-Ju Hong 2015-10-01 10:46:46 -07:00
parent a512111c9d
commit 15cdc45b09
2 changed files with 72 additions and 52 deletions

View File

@ -112,8 +112,14 @@ GKE_REQUIRED_SKIP_TESTS=(
"Shell"
"Daemon\sset"
"Deployment"
"experimental\sresource\susage\stracking" # Expect --max-pods=100
)
# Tests which cannot be run on AWS.
AWS_REQUIRED_SKIP_TESTS=(
"experimental\sresource\susage\stracking" # Expect --max-pods=100
)
# The following tests are known to be flaky, and are thus run only in their own
# -flaky- build variants.
GCE_FLAKY_TESTS=(
@ -301,6 +307,7 @@ case ${JOB_NAME} in
${GCE_PARALLEL_SKIP_TESTS[@]:+${GCE_PARALLEL_SKIP_TESTS[@]}} \
${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
${GCE_PARALLEL_FLAKY_TESTS[@]:+${GCE_PARALLEL_FLAKY_TESTS[@]}} \
${AWS_REQUIRED_SKIP_TESTS[@]:+${AWS_REQUIRED_SKIP_TESTS[@]}} \
)"}
: ${ENABLE_DEPLOYMENTS:=true}
# Override AWS defaults.
@ -439,6 +446,7 @@ case ${JOB_NAME} in
${GKE_REQUIRED_SKIP_TESTS[@]:+${GKE_REQUIRED_SKIP_TESTS[@]}} \
${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \
${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
${GCE_SLOW_TESTS[@]:+${GCE_SLOW_TESTS[@]}} \
)"}
;;
@ -455,6 +463,7 @@ case ${JOB_NAME} in
${REBOOT_SKIP_TESTS[@]:+${REBOOT_SKIP_TESTS[@]}} \
${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
${GCE_PARALLEL_SKIP_TESTS[@]:+${GCE_PARALLEL_SKIP_TESTS[@]}} \
${GCE_SLOW_TESTS[@]:+${GCE_SLOW_TESTS[@]}} \
)"}
;;
@ -499,6 +508,7 @@ case ${JOB_NAME} in
${GKE_REQUIRED_SKIP_TESTS[@]:+${GKE_REQUIRED_SKIP_TESTS[@]}} \
${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \
${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
${GCE_SLOW_TESTS[@]:+${GCE_SLOW_TESTS[@]}} \
)"}
;;

View File

@ -50,8 +50,55 @@ func logPodsOnNodes(c *client.Client, nodeNames []string) {
}
}
func runResourceTrackingTest(framework *Framework, podsPerNode int, nodeNames sets.String, resourceMonitor *resourceMonitor) {
numNodes := nodeNames.Len()
totalPods := podsPerNode * numNodes
By(fmt.Sprintf("Creating a RC of %d pods and wait until all pods of this RC are running", totalPods))
rcName := fmt.Sprintf("resource%d-%s", totalPods, string(util.NewUUID()))
// TODO: Use a more realistic workload
Expect(RunRC(RCConfig{
Client: framework.Client,
Name: rcName,
Namespace: framework.Namespace.Name,
Image: "gcr.io/google_containers/pause:go",
Replicas: totalPods,
})).NotTo(HaveOccurred())
// Log once and flush the stats.
resourceMonitor.LogLatest()
resourceMonitor.Reset()
By("Start monitoring resource usage")
// Periodically dump the cpu summary until the deadline is met.
// Note that without calling resourceMonitor.Reset(), the stats
// would occupy increasingly more memory. This should be fine
// for the current test duration, but we should reclaim the
// entries if we plan to monitor longer (e.g., 8 hours).
deadline := time.Now().Add(monitoringTime)
for time.Now().Before(deadline) {
Logf("Still running...%v left", deadline.Sub(time.Now()))
time.Sleep(reportingPeriod)
timeLeft := deadline.Sub(time.Now())
Logf("Still running...%v left", timeLeft)
if timeLeft < reportingPeriod {
time.Sleep(timeLeft)
} else {
time.Sleep(reportingPeriod)
}
logPodsOnNodes(framework.Client, nodeNames.List())
}
By("Reporting overall resource usage")
logPodsOnNodes(framework.Client, nodeNames.List())
resourceMonitor.LogCPUSummary()
resourceMonitor.LogLatest()
By("Deleting the RC")
DeleteRC(framework.Client, framework.Namespace.Name, rcName)
}
var _ = Describe("Kubelet", func() {
var numNodes int
var nodeNames sets.String
framework := NewFramework("kubelet-perf")
var resourceMonitor *resourceMonitor
@ -59,7 +106,6 @@ var _ = Describe("Kubelet", func() {
BeforeEach(func() {
nodes, err := framework.Client.Nodes().List(labels.Everything(), fields.Everything())
expectNoError(err)
numNodes = len(nodes.Items)
nodeNames = sets.NewString()
for _, node := range nodes.Items {
nodeNames.Insert(node.Name)
@ -72,61 +118,25 @@ var _ = Describe("Kubelet", func() {
resourceMonitor.Stop()
})
Describe("resource usage tracking", func() {
density := []int{0, 50}
Describe("regular resource usage tracking", func() {
density := []int{0, 35}
for i := range density {
podsPerNode := density[i]
name := fmt.Sprintf(
"over %v with %d pods per node.", monitoringTime, podsPerNode)
It(name, func() {
// Skip this test for GKE.
// TODO: Re-activate this for GKE
SkipIfProviderIs("gke")
totalPods := podsPerNode * numNodes
By(fmt.Sprintf("Creating a RC of %d pods and wait until all pods of this RC are running", totalPods))
rcName := fmt.Sprintf("resource%d-%s", totalPods, string(util.NewUUID()))
// TODO: Use a more realistic workload
Expect(RunRC(RCConfig{
Client: framework.Client,
Name: rcName,
Namespace: framework.Namespace.Name,
Image: "gcr.io/google_containers/pause:go",
Replicas: totalPods,
})).NotTo(HaveOccurred())
// Log once and flush the stats.
resourceMonitor.LogLatest()
resourceMonitor.Reset()
By("Start monitoring resource usage")
// Periodically dump the cpu summary until the deadline is met.
// Note that without calling resourceMonitor.Reset(), the stats
// would occupy increasingly more memory. This should be fine
// for the current test duration, but we should reclaim the
// entries if we plan to monitor longer (e.g., 8 hours).
deadline := time.Now().Add(monitoringTime)
for time.Now().Before(deadline) {
Logf("Still running...%v left", deadline.Sub(time.Now()))
time.Sleep(reportingPeriod)
timeLeft := deadline.Sub(time.Now())
Logf("Still running...%v left", timeLeft)
if timeLeft < reportingPeriod {
time.Sleep(timeLeft)
} else {
time.Sleep(reportingPeriod)
}
logPodsOnNodes(framework.Client, nodeNames.List())
}
By("Reporting overall resource usage")
logPodsOnNodes(framework.Client, nodeNames.List())
resourceMonitor.LogCPUSummary()
resourceMonitor.LogLatest()
By("Deleting the RC")
DeleteRC(framework.Client, framework.Namespace.Name, rcName)
runResourceTrackingTest(framework, podsPerNode, nodeNames, resourceMonitor)
})
}
})
Describe("experimental resource usage tracking", func() {
density := []int{50}
for i := range density {
podsPerNode := density[i]
name := fmt.Sprintf(
"over %v with %d pods per node.", monitoringTime, podsPerNode)
It(name, func() {
runResourceTrackingTest(framework, podsPerNode, nodeNames, resourceMonitor)
})
}
})