mirror of https://github.com/k3s-io/k3s
Split kubelet e2e resource usage tracking tests
Some tests in this test suite expects --max-pods (i.e. the maximum pod capacity on kubelet) to be greater than default, which applies only to the GCE test environment. Split the tests into two sets so that we can better categorize the tests in the jenkins setup, without making the test itself aware of the environment.pull/6/head
parent
a512111c9d
commit
15cdc45b09
|
@ -112,8 +112,14 @@ GKE_REQUIRED_SKIP_TESTS=(
|
|||
"Shell"
|
||||
"Daemon\sset"
|
||||
"Deployment"
|
||||
"experimental\sresource\susage\stracking" # Expect --max-pods=100
|
||||
)
|
||||
|
||||
# Tests which cannot be run on AWS.
|
||||
AWS_REQUIRED_SKIP_TESTS=(
|
||||
"experimental\sresource\susage\stracking" # Expect --max-pods=100
|
||||
)
|
||||
|
||||
# The following tests are known to be flaky, and are thus run only in their own
|
||||
# -flaky- build variants.
|
||||
GCE_FLAKY_TESTS=(
|
||||
|
@ -301,6 +307,7 @@ case ${JOB_NAME} in
|
|||
${GCE_PARALLEL_SKIP_TESTS[@]:+${GCE_PARALLEL_SKIP_TESTS[@]}} \
|
||||
${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
|
||||
${GCE_PARALLEL_FLAKY_TESTS[@]:+${GCE_PARALLEL_FLAKY_TESTS[@]}} \
|
||||
${AWS_REQUIRED_SKIP_TESTS[@]:+${AWS_REQUIRED_SKIP_TESTS[@]}} \
|
||||
)"}
|
||||
: ${ENABLE_DEPLOYMENTS:=true}
|
||||
# Override AWS defaults.
|
||||
|
@ -439,6 +446,7 @@ case ${JOB_NAME} in
|
|||
${GKE_REQUIRED_SKIP_TESTS[@]:+${GKE_REQUIRED_SKIP_TESTS[@]}} \
|
||||
${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \
|
||||
${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
|
||||
${GCE_SLOW_TESTS[@]:+${GCE_SLOW_TESTS[@]}} \
|
||||
)"}
|
||||
;;
|
||||
|
||||
|
@ -455,6 +463,7 @@ case ${JOB_NAME} in
|
|||
${REBOOT_SKIP_TESTS[@]:+${REBOOT_SKIP_TESTS[@]}} \
|
||||
${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
|
||||
${GCE_PARALLEL_SKIP_TESTS[@]:+${GCE_PARALLEL_SKIP_TESTS[@]}} \
|
||||
${GCE_SLOW_TESTS[@]:+${GCE_SLOW_TESTS[@]}} \
|
||||
)"}
|
||||
;;
|
||||
|
||||
|
@ -499,6 +508,7 @@ case ${JOB_NAME} in
|
|||
${GKE_REQUIRED_SKIP_TESTS[@]:+${GKE_REQUIRED_SKIP_TESTS[@]}} \
|
||||
${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \
|
||||
${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
|
||||
${GCE_SLOW_TESTS[@]:+${GCE_SLOW_TESTS[@]}} \
|
||||
)"}
|
||||
;;
|
||||
|
||||
|
|
|
@ -50,8 +50,55 @@ func logPodsOnNodes(c *client.Client, nodeNames []string) {
|
|||
}
|
||||
}
|
||||
|
||||
func runResourceTrackingTest(framework *Framework, podsPerNode int, nodeNames sets.String, resourceMonitor *resourceMonitor) {
|
||||
numNodes := nodeNames.Len()
|
||||
totalPods := podsPerNode * numNodes
|
||||
By(fmt.Sprintf("Creating a RC of %d pods and wait until all pods of this RC are running", totalPods))
|
||||
rcName := fmt.Sprintf("resource%d-%s", totalPods, string(util.NewUUID()))
|
||||
|
||||
// TODO: Use a more realistic workload
|
||||
Expect(RunRC(RCConfig{
|
||||
Client: framework.Client,
|
||||
Name: rcName,
|
||||
Namespace: framework.Namespace.Name,
|
||||
Image: "gcr.io/google_containers/pause:go",
|
||||
Replicas: totalPods,
|
||||
})).NotTo(HaveOccurred())
|
||||
|
||||
// Log once and flush the stats.
|
||||
resourceMonitor.LogLatest()
|
||||
resourceMonitor.Reset()
|
||||
|
||||
By("Start monitoring resource usage")
|
||||
// Periodically dump the cpu summary until the deadline is met.
|
||||
// Note that without calling resourceMonitor.Reset(), the stats
|
||||
// would occupy increasingly more memory. This should be fine
|
||||
// for the current test duration, but we should reclaim the
|
||||
// entries if we plan to monitor longer (e.g., 8 hours).
|
||||
deadline := time.Now().Add(monitoringTime)
|
||||
for time.Now().Before(deadline) {
|
||||
Logf("Still running...%v left", deadline.Sub(time.Now()))
|
||||
time.Sleep(reportingPeriod)
|
||||
timeLeft := deadline.Sub(time.Now())
|
||||
Logf("Still running...%v left", timeLeft)
|
||||
if timeLeft < reportingPeriod {
|
||||
time.Sleep(timeLeft)
|
||||
} else {
|
||||
time.Sleep(reportingPeriod)
|
||||
}
|
||||
logPodsOnNodes(framework.Client, nodeNames.List())
|
||||
}
|
||||
|
||||
By("Reporting overall resource usage")
|
||||
logPodsOnNodes(framework.Client, nodeNames.List())
|
||||
resourceMonitor.LogCPUSummary()
|
||||
resourceMonitor.LogLatest()
|
||||
|
||||
By("Deleting the RC")
|
||||
DeleteRC(framework.Client, framework.Namespace.Name, rcName)
|
||||
}
|
||||
|
||||
var _ = Describe("Kubelet", func() {
|
||||
var numNodes int
|
||||
var nodeNames sets.String
|
||||
framework := NewFramework("kubelet-perf")
|
||||
var resourceMonitor *resourceMonitor
|
||||
|
@ -59,7 +106,6 @@ var _ = Describe("Kubelet", func() {
|
|||
BeforeEach(func() {
|
||||
nodes, err := framework.Client.Nodes().List(labels.Everything(), fields.Everything())
|
||||
expectNoError(err)
|
||||
numNodes = len(nodes.Items)
|
||||
nodeNames = sets.NewString()
|
||||
for _, node := range nodes.Items {
|
||||
nodeNames.Insert(node.Name)
|
||||
|
@ -72,61 +118,25 @@ var _ = Describe("Kubelet", func() {
|
|||
resourceMonitor.Stop()
|
||||
})
|
||||
|
||||
Describe("resource usage tracking", func() {
|
||||
density := []int{0, 50}
|
||||
Describe("regular resource usage tracking", func() {
|
||||
density := []int{0, 35}
|
||||
for i := range density {
|
||||
podsPerNode := density[i]
|
||||
name := fmt.Sprintf(
|
||||
"over %v with %d pods per node.", monitoringTime, podsPerNode)
|
||||
It(name, func() {
|
||||
// Skip this test for GKE.
|
||||
// TODO: Re-activate this for GKE
|
||||
SkipIfProviderIs("gke")
|
||||
|
||||
totalPods := podsPerNode * numNodes
|
||||
By(fmt.Sprintf("Creating a RC of %d pods and wait until all pods of this RC are running", totalPods))
|
||||
rcName := fmt.Sprintf("resource%d-%s", totalPods, string(util.NewUUID()))
|
||||
|
||||
// TODO: Use a more realistic workload
|
||||
Expect(RunRC(RCConfig{
|
||||
Client: framework.Client,
|
||||
Name: rcName,
|
||||
Namespace: framework.Namespace.Name,
|
||||
Image: "gcr.io/google_containers/pause:go",
|
||||
Replicas: totalPods,
|
||||
})).NotTo(HaveOccurred())
|
||||
|
||||
// Log once and flush the stats.
|
||||
resourceMonitor.LogLatest()
|
||||
resourceMonitor.Reset()
|
||||
|
||||
By("Start monitoring resource usage")
|
||||
// Periodically dump the cpu summary until the deadline is met.
|
||||
// Note that without calling resourceMonitor.Reset(), the stats
|
||||
// would occupy increasingly more memory. This should be fine
|
||||
// for the current test duration, but we should reclaim the
|
||||
// entries if we plan to monitor longer (e.g., 8 hours).
|
||||
deadline := time.Now().Add(monitoringTime)
|
||||
for time.Now().Before(deadline) {
|
||||
Logf("Still running...%v left", deadline.Sub(time.Now()))
|
||||
time.Sleep(reportingPeriod)
|
||||
timeLeft := deadline.Sub(time.Now())
|
||||
Logf("Still running...%v left", timeLeft)
|
||||
if timeLeft < reportingPeriod {
|
||||
time.Sleep(timeLeft)
|
||||
} else {
|
||||
time.Sleep(reportingPeriod)
|
||||
}
|
||||
logPodsOnNodes(framework.Client, nodeNames.List())
|
||||
}
|
||||
|
||||
By("Reporting overall resource usage")
|
||||
logPodsOnNodes(framework.Client, nodeNames.List())
|
||||
resourceMonitor.LogCPUSummary()
|
||||
resourceMonitor.LogLatest()
|
||||
|
||||
By("Deleting the RC")
|
||||
DeleteRC(framework.Client, framework.Namespace.Name, rcName)
|
||||
runResourceTrackingTest(framework, podsPerNode, nodeNames, resourceMonitor)
|
||||
})
|
||||
}
|
||||
})
|
||||
Describe("experimental resource usage tracking", func() {
|
||||
density := []int{50}
|
||||
for i := range density {
|
||||
podsPerNode := density[i]
|
||||
name := fmt.Sprintf(
|
||||
"over %v with %d pods per node.", monitoringTime, podsPerNode)
|
||||
It(name, func() {
|
||||
runResourceTrackingTest(framework, podsPerNode, nodeNames, resourceMonitor)
|
||||
})
|
||||
}
|
||||
})
|
||||
|
|
Loading…
Reference in New Issue