mirror of https://github.com/k3s-io/k3s
Merge pull request #30568 from coufon/node_density_and_performance_test
Automatic merge from submit-queue Add tag [benchmark] to node-e2e-test where performance limits are not verified This PR adds a new tag "[benchmark]" to density and resource-usage node e2e test. The performance limits will not be verified at the end of benchmark tests.pull/6/head
commit
d989fa9751
|
@ -49,31 +49,36 @@ const (
|
||||||
|
|
||||||
var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {
|
var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {
|
||||||
const (
|
const (
|
||||||
// the data collection time of `resource collector' and the standalone cadvisor
|
// The data collection time of resource collector and the standalone cadvisor
|
||||||
// is not synchronizated. Therefore `resource collector' may miss data or
|
// is not synchronizated, so resource collector may miss data or
|
||||||
// collect duplicated data
|
// collect duplicated data
|
||||||
monitoringInterval = 500 * time.Millisecond
|
containerStatsPollingPeriod = 500 * time.Millisecond
|
||||||
sleepBeforeCreatePods = 30 * time.Second
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
ns string
|
ns string
|
||||||
nodeName string
|
nodeName string
|
||||||
|
rc *ResourceCollector
|
||||||
)
|
)
|
||||||
|
|
||||||
f := framework.NewDefaultFramework("density-test")
|
f := framework.NewDefaultFramework("density-test")
|
||||||
podType := "density_test_pod"
|
|
||||||
|
|
||||||
BeforeEach(func() {
|
BeforeEach(func() {
|
||||||
ns = f.Namespace.Name
|
ns = f.Namespace.Name
|
||||||
nodeName = framework.TestContext.NodeName
|
nodeName = framework.TestContext.NodeName
|
||||||
|
// Start a standalone cadvisor pod using 'createSync', the pod is running when it returns
|
||||||
|
createCadvisorPod(f)
|
||||||
|
// Resource collector monitors fine-grain CPU/memory usage by a standalone Cadvisor with
|
||||||
|
// 1s housingkeeping interval
|
||||||
|
rc = NewResourceCollector(containerStatsPollingPeriod)
|
||||||
})
|
})
|
||||||
|
|
||||||
AfterEach(func() {
|
AfterEach(func() {
|
||||||
})
|
})
|
||||||
|
|
||||||
Context("create a batch of pods", func() {
|
Context("create a batch of pods", func() {
|
||||||
// TODO(coufon): add more tests and the values are generous, set more precise limits after benchmark
|
// TODO(coufon): the values are generous, set more precise limits with benchmark data
|
||||||
|
// and add more tests
|
||||||
dTests := []densityTest{
|
dTests := []densityTest{
|
||||||
{
|
{
|
||||||
podsNr: 10,
|
podsNr: 10,
|
||||||
|
@ -101,102 +106,55 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {
|
||||||
itArg := testArg
|
itArg := testArg
|
||||||
It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval",
|
It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval",
|
||||||
itArg.podsNr, itArg.interval), func() {
|
itArg.podsNr, itArg.interval), func() {
|
||||||
var (
|
|
||||||
mutex = &sync.Mutex{}
|
|
||||||
watchTimes = make(map[string]unversioned.Time, 0)
|
|
||||||
stopCh = make(chan struct{})
|
|
||||||
)
|
|
||||||
|
|
||||||
// create specifications of the test pods
|
batchLag, e2eLags := runDensityBatchTest(f, rc, itArg)
|
||||||
pods := newTestPods(itArg.podsNr, ImageRegistry[pauseImage], podType)
|
|
||||||
|
|
||||||
// start a standalone cadvisor pod
|
|
||||||
// it uses `createSync', so the pod is running when it returns
|
|
||||||
createCadvisorPod(f)
|
|
||||||
|
|
||||||
// `resource collector' monitoring fine-grain CPU/memory usage by a standalone Cadvisor with
|
|
||||||
// 1s housingkeeping interval
|
|
||||||
rc := NewResourceCollector(monitoringInterval)
|
|
||||||
|
|
||||||
// the controller watches the change of pod status
|
|
||||||
controller := newInformerWatchPod(f, mutex, watchTimes, podType)
|
|
||||||
go controller.Run(stopCh)
|
|
||||||
|
|
||||||
// Zhou: In test we see kubelet starts while it is busy on something, as a result `syncLoop'
|
|
||||||
// does not response to pod creation immediately. Creating the first pod has a delay around 5s.
|
|
||||||
// The node status has been `ready' so `wait and check node being ready' does not help here.
|
|
||||||
// Now wait here for a grace period to have `syncLoop' be ready
|
|
||||||
time.Sleep(sleepBeforeCreatePods)
|
|
||||||
|
|
||||||
// the density test only monitors the overhead of creating pod
|
|
||||||
// or start earliest and call `rc.Reset()' here to clear the buffer
|
|
||||||
rc.Start()
|
|
||||||
|
|
||||||
By("Creating a batch of pods")
|
|
||||||
// it returns a map[`pod name']`creation time' as the creation timestamps
|
|
||||||
createTimes := createBatchPodWithRateControl(f, pods, itArg.interval)
|
|
||||||
|
|
||||||
By("Waiting for all Pods to be observed by the watch...")
|
|
||||||
// checks every 10s util all pods are running. it times out ater 10min
|
|
||||||
Eventually(func() bool {
|
|
||||||
return len(watchTimes) == itArg.podsNr
|
|
||||||
}, 10*time.Minute, 10*time.Second).Should(BeTrue())
|
|
||||||
|
|
||||||
if len(watchTimes) < itArg.podsNr {
|
|
||||||
framework.Failf("Timeout reached waiting for all Pods to be observed by the watch.")
|
|
||||||
}
|
|
||||||
|
|
||||||
// stop the watching controller, and the resource collector
|
|
||||||
close(stopCh)
|
|
||||||
rc.Stop()
|
|
||||||
|
|
||||||
// data analyis
|
|
||||||
var (
|
|
||||||
firstCreate unversioned.Time
|
|
||||||
lastRunning unversioned.Time
|
|
||||||
init = true
|
|
||||||
e2eLags = make([]framework.PodLatencyData, 0)
|
|
||||||
)
|
|
||||||
|
|
||||||
for name, create := range createTimes {
|
|
||||||
watch, ok := watchTimes[name]
|
|
||||||
Expect(ok).To(Equal(true))
|
|
||||||
|
|
||||||
e2eLags = append(e2eLags,
|
|
||||||
framework.PodLatencyData{Name: name, Latency: watch.Time.Sub(create.Time)})
|
|
||||||
|
|
||||||
if !init {
|
|
||||||
if firstCreate.Time.After(create.Time) {
|
|
||||||
firstCreate = create
|
|
||||||
}
|
|
||||||
if lastRunning.Time.Before(watch.Time) {
|
|
||||||
lastRunning = watch
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
init = false
|
|
||||||
firstCreate, lastRunning = create, watch
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
sort.Sort(framework.LatencySlice(e2eLags))
|
|
||||||
|
|
||||||
// verify latency
|
|
||||||
By("Verifying latency")
|
By("Verifying latency")
|
||||||
verifyLatency(lastRunning.Time.Sub(firstCreate.Time), e2eLags, itArg)
|
printAndVerifyLatency(batchLag, e2eLags, itArg, true)
|
||||||
|
|
||||||
// verify resource
|
|
||||||
By("Verifying resource")
|
By("Verifying resource")
|
||||||
verifyResource(f, itArg.cpuLimits, itArg.memLimits, rc)
|
printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, true)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
Context("create a batch of pods [Benchmark]", func() {
|
||||||
|
dTests := []densityTest{
|
||||||
|
{
|
||||||
|
podsNr: 10,
|
||||||
|
interval: 0 * time.Millisecond,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
podsNr: 35,
|
||||||
|
interval: 0 * time.Millisecond,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
podsNr: 105,
|
||||||
|
interval: 0 * time.Millisecond,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, testArg := range dTests {
|
||||||
|
itArg := testArg
|
||||||
|
It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval",
|
||||||
|
itArg.podsNr, itArg.interval), func() {
|
||||||
|
|
||||||
|
batchLag, e2eLags := runDensityBatchTest(f, rc, itArg)
|
||||||
|
|
||||||
|
By("Verifying latency")
|
||||||
|
printAndVerifyLatency(batchLag, e2eLags, itArg, false)
|
||||||
|
|
||||||
|
By("Verifying resource")
|
||||||
|
printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, false)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
Context("create a sequence of pods", func() {
|
Context("create a sequence of pods", func() {
|
||||||
// TODO(coufon): add more tests and the values are generous, set more precise limits after benchmark
|
|
||||||
dTests := []densityTest{
|
dTests := []densityTest{
|
||||||
{
|
{
|
||||||
podsNr: 10,
|
podsNr: 10,
|
||||||
bgPodsNr: 10,
|
bgPodsNr: 50,
|
||||||
cpuLimits: framework.ContainersCPUSummary{
|
cpuLimits: framework.ContainersCPUSummary{
|
||||||
stats.SystemContainerKubelet: {0.50: 0.20, 0.95: 0.25},
|
stats.SystemContainerKubelet: {0.50: 0.20, 0.95: 0.25},
|
||||||
stats.SystemContainerRuntime: {0.50: 0.40, 0.95: 0.60},
|
stats.SystemContainerRuntime: {0.50: 0.40, 0.95: 0.60},
|
||||||
|
@ -217,34 +175,46 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {
|
||||||
itArg := testArg
|
itArg := testArg
|
||||||
It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods",
|
It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods",
|
||||||
itArg.podsNr, itArg.bgPodsNr), func() {
|
itArg.podsNr, itArg.bgPodsNr), func() {
|
||||||
bgPods := newTestPods(itArg.bgPodsNr, ImageRegistry[pauseImage], "background_pod")
|
|
||||||
testPods := newTestPods(itArg.podsNr, ImageRegistry[pauseImage], podType)
|
|
||||||
|
|
||||||
createCadvisorPod(f)
|
batchlag, e2eLags := runDensitySeqTest(f, rc, itArg)
|
||||||
rc := NewResourceCollector(monitoringInterval)
|
|
||||||
|
|
||||||
By("Creating a batch of background pods")
|
|
||||||
// creatBatch is synchronized
|
|
||||||
// all pods are running when it returns
|
|
||||||
f.PodClient().CreateBatch(bgPods)
|
|
||||||
|
|
||||||
time.Sleep(sleepBeforeCreatePods)
|
|
||||||
|
|
||||||
// starting resource monitoring
|
|
||||||
rc.Start()
|
|
||||||
|
|
||||||
// do a sequential creation of pod (back to back)
|
|
||||||
batchlag, e2eLags := createBatchPodSequential(f, testPods)
|
|
||||||
|
|
||||||
rc.Stop()
|
|
||||||
|
|
||||||
// verify latency
|
|
||||||
By("Verifying latency")
|
By("Verifying latency")
|
||||||
verifyLatency(batchlag, e2eLags, itArg)
|
printAndVerifyLatency(batchlag, e2eLags, itArg, true)
|
||||||
|
|
||||||
// verify resource
|
|
||||||
By("Verifying resource")
|
By("Verifying resource")
|
||||||
verifyResource(f, itArg.cpuLimits, itArg.memLimits, rc)
|
printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, true)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
Context("create a sequence of pods [Benchmark]", func() {
|
||||||
|
dTests := []densityTest{
|
||||||
|
{
|
||||||
|
podsNr: 10,
|
||||||
|
bgPodsNr: 50,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
podsNr: 30,
|
||||||
|
bgPodsNr: 50,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
podsNr: 50,
|
||||||
|
bgPodsNr: 50,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, testArg := range dTests {
|
||||||
|
itArg := testArg
|
||||||
|
It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods",
|
||||||
|
itArg.podsNr, itArg.bgPodsNr), func() {
|
||||||
|
|
||||||
|
batchlag, e2eLags := runDensitySeqTest(f, rc, itArg)
|
||||||
|
|
||||||
|
By("Verifying latency")
|
||||||
|
printAndVerifyLatency(batchlag, e2eLags, itArg, false)
|
||||||
|
|
||||||
|
By("Verifying resource")
|
||||||
|
printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, false)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
@ -257,14 +227,116 @@ type densityTest struct {
|
||||||
bgPodsNr int
|
bgPodsNr int
|
||||||
// interval between creating pod (rate control)
|
// interval between creating pod (rate control)
|
||||||
interval time.Duration
|
interval time.Duration
|
||||||
// resource bound
|
// performance limits
|
||||||
cpuLimits framework.ContainersCPUSummary
|
cpuLimits framework.ContainersCPUSummary
|
||||||
memLimits framework.ResourceUsagePerContainer
|
memLimits framework.ResourceUsagePerContainer
|
||||||
podStartupLimits framework.LatencyMetric
|
podStartupLimits framework.LatencyMetric
|
||||||
podBatchStartupLimit time.Duration
|
podBatchStartupLimit time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
// it creates a batch of pods concurrently, uses one goroutine for each creation.
|
// runDensityBatchTest runs the density batch pod creation test
|
||||||
|
func runDensityBatchTest(f *framework.Framework, rc *ResourceCollector, testArg densityTest) (time.Duration, []framework.PodLatencyData) {
|
||||||
|
const (
|
||||||
|
podType = "density_test_pod"
|
||||||
|
sleepBeforeCreatePods = 30 * time.Second
|
||||||
|
)
|
||||||
|
var (
|
||||||
|
mutex = &sync.Mutex{}
|
||||||
|
watchTimes = make(map[string]unversioned.Time, 0)
|
||||||
|
stopCh = make(chan struct{})
|
||||||
|
)
|
||||||
|
|
||||||
|
// create test pod data structure
|
||||||
|
pods := newTestPods(testArg.podsNr, ImageRegistry[pauseImage], podType)
|
||||||
|
|
||||||
|
// the controller watches the change of pod status
|
||||||
|
controller := newInformerWatchPod(f, mutex, watchTimes, podType)
|
||||||
|
go controller.Run(stopCh)
|
||||||
|
defer close(stopCh)
|
||||||
|
|
||||||
|
// TODO(coufon): in the test we found kubelet starts while it is busy on something, as a result 'syncLoop'
|
||||||
|
// does not response to pod creation immediately. Creating the first pod has a delay around 5s.
|
||||||
|
// The node status has already been 'ready' so `wait and check node being ready does not help here.
|
||||||
|
// Now wait here for a grace period to let 'syncLoop' be ready
|
||||||
|
time.Sleep(sleepBeforeCreatePods)
|
||||||
|
|
||||||
|
rc.Start()
|
||||||
|
defer rc.Stop()
|
||||||
|
|
||||||
|
By("Creating a batch of pods")
|
||||||
|
// It returns a map['pod name']'creation time' containing the creation timestamps
|
||||||
|
createTimes := createBatchPodWithRateControl(f, pods, testArg.interval)
|
||||||
|
|
||||||
|
By("Waiting for all Pods to be observed by the watch...")
|
||||||
|
|
||||||
|
Eventually(func() bool {
|
||||||
|
return len(watchTimes) == testArg.podsNr
|
||||||
|
}, 10*time.Minute, 10*time.Second).Should(BeTrue())
|
||||||
|
|
||||||
|
if len(watchTimes) < testArg.podsNr {
|
||||||
|
framework.Failf("Timeout reached waiting for all Pods to be observed by the watch.")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Analyze results
|
||||||
|
var (
|
||||||
|
firstCreate unversioned.Time
|
||||||
|
lastRunning unversioned.Time
|
||||||
|
init = true
|
||||||
|
e2eLags = make([]framework.PodLatencyData, 0)
|
||||||
|
)
|
||||||
|
|
||||||
|
for name, create := range createTimes {
|
||||||
|
watch, ok := watchTimes[name]
|
||||||
|
Expect(ok).To(Equal(true))
|
||||||
|
|
||||||
|
e2eLags = append(e2eLags,
|
||||||
|
framework.PodLatencyData{Name: name, Latency: watch.Time.Sub(create.Time)})
|
||||||
|
|
||||||
|
if !init {
|
||||||
|
if firstCreate.Time.After(create.Time) {
|
||||||
|
firstCreate = create
|
||||||
|
}
|
||||||
|
if lastRunning.Time.Before(watch.Time) {
|
||||||
|
lastRunning = watch
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
init = false
|
||||||
|
firstCreate, lastRunning = create, watch
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.Sort(framework.LatencySlice(e2eLags))
|
||||||
|
batchLag := lastRunning.Time.Sub(firstCreate.Time)
|
||||||
|
|
||||||
|
return batchLag, e2eLags
|
||||||
|
}
|
||||||
|
|
||||||
|
// runDensitySeqTest runs the density sequential pod creation test
|
||||||
|
func runDensitySeqTest(f *framework.Framework, rc *ResourceCollector, testArg densityTest) (time.Duration, []framework.PodLatencyData) {
|
||||||
|
const (
|
||||||
|
podType = "density_test_pod"
|
||||||
|
sleepBeforeCreatePods = 30 * time.Second
|
||||||
|
)
|
||||||
|
bgPods := newTestPods(testArg.bgPodsNr, ImageRegistry[pauseImage], "background_pod")
|
||||||
|
testPods := newTestPods(testArg.podsNr, ImageRegistry[pauseImage], podType)
|
||||||
|
|
||||||
|
By("Creating a batch of background pods")
|
||||||
|
|
||||||
|
// CreatBatch is synchronized, all pods are running when it returns
|
||||||
|
f.PodClient().CreateBatch(bgPods)
|
||||||
|
|
||||||
|
time.Sleep(sleepBeforeCreatePods)
|
||||||
|
|
||||||
|
rc.Start()
|
||||||
|
defer rc.Stop()
|
||||||
|
|
||||||
|
// create pods sequentially (back-to-back)
|
||||||
|
batchlag, e2eLags := createBatchPodSequential(f, testPods)
|
||||||
|
|
||||||
|
return batchlag, e2eLags
|
||||||
|
}
|
||||||
|
|
||||||
|
// createBatchPodWithRateControl creates a batch of pods concurrently, uses one goroutine for each creation.
|
||||||
// between creations there is an interval for throughput control
|
// between creations there is an interval for throughput control
|
||||||
func createBatchPodWithRateControl(f *framework.Framework, pods []*api.Pod, interval time.Duration) map[string]unversioned.Time {
|
func createBatchPodWithRateControl(f *framework.Framework, pods []*api.Pod, interval time.Duration) map[string]unversioned.Time {
|
||||||
createTimes := make(map[string]unversioned.Time)
|
createTimes := make(map[string]unversioned.Time)
|
||||||
|
@ -286,7 +358,7 @@ func checkPodDeleted(f *framework.Framework, podName string) error {
|
||||||
return errors.New("Pod Not Deleted")
|
return errors.New("Pod Not Deleted")
|
||||||
}
|
}
|
||||||
|
|
||||||
// get prometheus metric `pod start latency' from kubelet
|
// getPodStartLatency gets prometheus metric 'pod start latency' from kubelet
|
||||||
func getPodStartLatency(node string) (framework.KubeletLatencyMetrics, error) {
|
func getPodStartLatency(node string) (framework.KubeletLatencyMetrics, error) {
|
||||||
latencyMetrics := framework.KubeletLatencyMetrics{}
|
latencyMetrics := framework.KubeletLatencyMetrics{}
|
||||||
ms, err := metrics.GrabKubeletMetricsWithoutProxy(node)
|
ms, err := metrics.GrabKubeletMetricsWithoutProxy(node)
|
||||||
|
@ -367,32 +439,6 @@ func newInformerWatchPod(f *framework.Framework, mutex *sync.Mutex, watchTimes m
|
||||||
return controller
|
return controller
|
||||||
}
|
}
|
||||||
|
|
||||||
// verifyLatency verifies that whether pod creation latency satisfies the limit.
|
|
||||||
func verifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyData, testArg densityTest) {
|
|
||||||
framework.PrintLatencies(e2eLags, "worst client e2e total latencies")
|
|
||||||
|
|
||||||
// Zhou: do not trust `kubelet' metrics since they are not reset!
|
|
||||||
latencyMetrics, _ := getPodStartLatency(kubeletAddr)
|
|
||||||
framework.Logf("Kubelet Prometheus metrics (not reset):\n%s", framework.PrettyPrintJSON(latencyMetrics))
|
|
||||||
|
|
||||||
// check whether e2e pod startup time is acceptable.
|
|
||||||
podCreateLatency := framework.PodStartupLatency{Latency: framework.ExtractLatencyMetrics(e2eLags)}
|
|
||||||
framework.Logf("Pod create latency: %s", framework.PrettyPrintJSON(podCreateLatency))
|
|
||||||
framework.ExpectNoError(verifyPodStartupLatency(testArg.podStartupLimits, podCreateLatency.Latency))
|
|
||||||
|
|
||||||
// check bactch pod creation latency
|
|
||||||
if testArg.podBatchStartupLimit > 0 {
|
|
||||||
Expect(batchLag <= testArg.podBatchStartupLimit).To(Equal(true), "Batch creation startup time %v exceed limit %v",
|
|
||||||
batchLag, testArg.podBatchStartupLimit)
|
|
||||||
}
|
|
||||||
|
|
||||||
// calculate and log throughput
|
|
||||||
throughputBatch := float64(testArg.podsNr) / batchLag.Minutes()
|
|
||||||
framework.Logf("Batch creation throughput is %.1f pods/min", throughputBatch)
|
|
||||||
throughputSequential := 1.0 / e2eLags[len(e2eLags)-1].Latency.Minutes()
|
|
||||||
framework.Logf("Sequential creation throughput is %.1f pods/min", throughputSequential)
|
|
||||||
}
|
|
||||||
|
|
||||||
// createBatchPodSequential creats pods back-to-back in sequence.
|
// createBatchPodSequential creats pods back-to-back in sequence.
|
||||||
func createBatchPodSequential(f *framework.Framework, pods []*api.Pod) (time.Duration, []framework.PodLatencyData) {
|
func createBatchPodSequential(f *framework.Framework, pods []*api.Pod) (time.Duration, []framework.PodLatencyData) {
|
||||||
batchStartTime := unversioned.Now()
|
batchStartTime := unversioned.Now()
|
||||||
|
@ -401,9 +447,38 @@ func createBatchPodSequential(f *framework.Framework, pods []*api.Pod) (time.Dur
|
||||||
create := unversioned.Now()
|
create := unversioned.Now()
|
||||||
f.PodClient().CreateSync(pod)
|
f.PodClient().CreateSync(pod)
|
||||||
e2eLags = append(e2eLags,
|
e2eLags = append(e2eLags,
|
||||||
framework.PodLatencyData{Name: pod.ObjectMeta.Name, Latency: unversioned.Now().Time.Sub(create.Time)})
|
framework.PodLatencyData{Name: pod.Name, Latency: unversioned.Now().Time.Sub(create.Time)})
|
||||||
}
|
}
|
||||||
batchLag := unversioned.Now().Time.Sub(batchStartTime.Time)
|
batchLag := unversioned.Now().Time.Sub(batchStartTime.Time)
|
||||||
sort.Sort(framework.LatencySlice(e2eLags))
|
sort.Sort(framework.LatencySlice(e2eLags))
|
||||||
return batchLag, e2eLags
|
return batchLag, e2eLags
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// printAndVerifyLatency verifies that whether pod creation latency satisfies the limit.
|
||||||
|
func printAndVerifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyData, testArg densityTest, isVerify bool) {
|
||||||
|
framework.PrintLatencies(e2eLags, "worst client e2e total latencies")
|
||||||
|
|
||||||
|
// TODO(coufon): do not trust `kubelet' metrics since they are not reset!
|
||||||
|
latencyMetrics, _ := getPodStartLatency(kubeletAddr)
|
||||||
|
framework.Logf("Kubelet Prometheus metrics (not reset):\n%s", framework.PrettyPrintJSON(latencyMetrics))
|
||||||
|
|
||||||
|
// check whether e2e pod startup time is acceptable.
|
||||||
|
podCreateLatency := framework.PodStartupLatency{Latency: framework.ExtractLatencyMetrics(e2eLags)}
|
||||||
|
framework.Logf("Pod create latency: %s", framework.PrettyPrintJSON(podCreateLatency))
|
||||||
|
|
||||||
|
// calculate and log throughput
|
||||||
|
throughputBatch := float64(testArg.podsNr) / batchLag.Minutes()
|
||||||
|
framework.Logf("Batch creation throughput is %.1f pods/min", throughputBatch)
|
||||||
|
throughputSequential := 1.0 / e2eLags[len(e2eLags)-1].Latency.Minutes()
|
||||||
|
framework.Logf("Sequential creation throughput is %.1f pods/min", throughputSequential)
|
||||||
|
|
||||||
|
if isVerify {
|
||||||
|
framework.ExpectNoError(verifyPodStartupLatency(testArg.podStartupLimits, podCreateLatency.Latency))
|
||||||
|
|
||||||
|
// check bactch pod creation latency
|
||||||
|
if testArg.podBatchStartupLimit > 0 {
|
||||||
|
Expect(batchLag <= testArg.podBatchStartupLimit).To(Equal(true), "Batch creation startup time %v exceed limit %v",
|
||||||
|
batchLag, testArg.podBatchStartupLimit)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -461,7 +461,7 @@ func (r *ResourceCollector) GetResourceSeriesWithLabels(labels map[string]string
|
||||||
return seriesPerContainer
|
return seriesPerContainer
|
||||||
}
|
}
|
||||||
|
|
||||||
// Zhou: code for getting container name of docker, copied from pkg/kubelet/cm/container_manager_linux.go
|
// Code for getting container name of docker, copied from pkg/kubelet/cm/container_manager_linux.go
|
||||||
// since they are not exposed
|
// since they are not exposed
|
||||||
const (
|
const (
|
||||||
kubeletProcessName = "kubelet"
|
kubeletProcessName = "kubelet"
|
||||||
|
|
|
@ -35,12 +35,6 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() {
|
||||||
const (
|
const (
|
||||||
// Interval to poll /stats/container on a node
|
// Interval to poll /stats/container on a node
|
||||||
containerStatsPollingPeriod = 10 * time.Second
|
containerStatsPollingPeriod = 10 * time.Second
|
||||||
// The monitoring time for one test.
|
|
||||||
monitoringTime = 10 * time.Minute
|
|
||||||
// The periodic reporting period.
|
|
||||||
reportingPeriod = 5 * time.Minute
|
|
||||||
|
|
||||||
sleepAfterCreatePods = 10 * time.Second
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -54,6 +48,12 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() {
|
||||||
BeforeEach(func() {
|
BeforeEach(func() {
|
||||||
ns = f.Namespace.Name
|
ns = f.Namespace.Name
|
||||||
om = framework.NewRuntimeOperationMonitor(f.Client)
|
om = framework.NewRuntimeOperationMonitor(f.Client)
|
||||||
|
// The test collects resource usage from a standalone Cadvisor pod.
|
||||||
|
// The Cadvsior of Kubelet has a housekeeping interval of 10s, which is too long to
|
||||||
|
// show the resource usage spikes. But changing its interval increases the overhead
|
||||||
|
// of kubelet. Hence we use a Cadvisor pod.
|
||||||
|
createCadvisorPod(f)
|
||||||
|
rc = NewResourceCollector(containerStatsPollingPeriod)
|
||||||
})
|
})
|
||||||
|
|
||||||
AfterEach(func() {
|
AfterEach(func() {
|
||||||
|
@ -67,13 +67,11 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() {
|
||||||
Context("regular resource usage tracking", func() {
|
Context("regular resource usage tracking", func() {
|
||||||
rTests := []resourceTest{
|
rTests := []resourceTest{
|
||||||
{
|
{
|
||||||
podsPerNode: 10,
|
pods: 10,
|
||||||
cpuLimits: framework.ContainersCPUSummary{
|
cpuLimits: framework.ContainersCPUSummary{
|
||||||
stats.SystemContainerKubelet: {0.50: 0.25, 0.95: 0.30},
|
stats.SystemContainerKubelet: {0.50: 0.25, 0.95: 0.30},
|
||||||
stats.SystemContainerRuntime: {0.50: 0.30, 0.95: 0.40},
|
stats.SystemContainerRuntime: {0.50: 0.30, 0.95: 0.40},
|
||||||
},
|
},
|
||||||
// We set the memory limits generously because the distribution
|
|
||||||
// of the addon pods affect the memory usage on each node.
|
|
||||||
memLimits: framework.ResourceUsagePerContainer{
|
memLimits: framework.ResourceUsagePerContainer{
|
||||||
stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 100 * 1024 * 1024},
|
stats.SystemContainerKubelet: &framework.ContainerResourceUsage{MemoryRSSInBytes: 100 * 1024 * 1024},
|
||||||
stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 400 * 1024 * 1024},
|
stats.SystemContainerRuntime: &framework.ContainerResourceUsage{MemoryRSSInBytes: 400 * 1024 * 1024},
|
||||||
|
@ -84,70 +82,95 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() {
|
||||||
for _, testArg := range rTests {
|
for _, testArg := range rTests {
|
||||||
itArg := testArg
|
itArg := testArg
|
||||||
|
|
||||||
podsPerNode := itArg.podsPerNode
|
It(fmt.Sprintf("resource tracking for %d pods per node", itArg.pods), func() {
|
||||||
name := fmt.Sprintf("resource tracking for %d pods per node", podsPerNode)
|
runResourceUsageTest(f, rc, itArg)
|
||||||
|
|
||||||
It(name, func() {
|
|
||||||
// The test collects resource usage from a standalone Cadvisor pod.
|
|
||||||
// The Cadvsior of Kubelet has a housekeeping interval of 10s, which is too long to
|
|
||||||
// show the resource usage spikes. But changing its interval increases the overhead
|
|
||||||
// of kubelet. Hence we use a Cadvisor pod.
|
|
||||||
createCadvisorPod(f)
|
|
||||||
rc = NewResourceCollector(containerStatsPollingPeriod)
|
|
||||||
rc.Start()
|
|
||||||
|
|
||||||
By("Creating a batch of Pods")
|
|
||||||
pods := newTestPods(podsPerNode, ImageRegistry[pauseImage], "test_pod")
|
|
||||||
for _, pod := range pods {
|
|
||||||
f.PodClient().CreateSync(pod)
|
|
||||||
}
|
|
||||||
|
|
||||||
// wait for a while to let the node be steady
|
|
||||||
time.Sleep(sleepAfterCreatePods)
|
|
||||||
|
|
||||||
// Log once and flush the stats.
|
|
||||||
rc.LogLatest()
|
|
||||||
rc.Reset()
|
|
||||||
|
|
||||||
By("Start monitoring resource usage")
|
|
||||||
// Periodically dump the cpu summary until the deadline is met.
|
|
||||||
// Note that without calling framework.ResourceMonitor.Reset(), the stats
|
|
||||||
// would occupy increasingly more memory. This should be fine
|
|
||||||
// for the current test duration, but we should reclaim the
|
|
||||||
// entries if we plan to monitor longer (e.g., 8 hours).
|
|
||||||
deadline := time.Now().Add(monitoringTime)
|
|
||||||
for time.Now().Before(deadline) {
|
|
||||||
timeLeft := deadline.Sub(time.Now())
|
|
||||||
framework.Logf("Still running...%v left", timeLeft)
|
|
||||||
if timeLeft < reportingPeriod {
|
|
||||||
time.Sleep(timeLeft)
|
|
||||||
} else {
|
|
||||||
time.Sleep(reportingPeriod)
|
|
||||||
}
|
|
||||||
logPods(f.Client)
|
|
||||||
}
|
|
||||||
|
|
||||||
rc.Stop()
|
|
||||||
|
|
||||||
By("Reporting overall resource usage")
|
|
||||||
logPods(f.Client)
|
|
||||||
|
|
||||||
// Log and verify resource usage
|
// Log and verify resource usage
|
||||||
verifyResource(f, itArg.cpuLimits, itArg.memLimits, rc)
|
printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, true)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
Context("regular resource usage tracking [Benchmark]", func() {
|
||||||
|
rTests := []resourceTest{
|
||||||
|
{
|
||||||
|
pods: 10,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pods: 35,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pods: 105,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, testArg := range rTests {
|
||||||
|
itArg := testArg
|
||||||
|
|
||||||
|
It(fmt.Sprintf("resource tracking for %d pods per node", itArg.pods), func() {
|
||||||
|
runResourceUsageTest(f, rc, itArg)
|
||||||
|
// Log and verify resource usage
|
||||||
|
printAndVerifyResource(f, rc, itArg.cpuLimits, itArg.memLimits, true)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
type resourceTest struct {
|
type resourceTest struct {
|
||||||
podsPerNode int
|
pods int
|
||||||
cpuLimits framework.ContainersCPUSummary
|
cpuLimits framework.ContainersCPUSummary
|
||||||
memLimits framework.ResourceUsagePerContainer
|
memLimits framework.ResourceUsagePerContainer
|
||||||
}
|
}
|
||||||
|
|
||||||
// verifyResource verifies whether resource usage satisfies the limit.
|
// runResourceUsageTest runs the resource usage test
|
||||||
func verifyResource(f *framework.Framework, cpuLimits framework.ContainersCPUSummary,
|
func runResourceUsageTest(f *framework.Framework, rc *ResourceCollector, testArg resourceTest) {
|
||||||
memLimits framework.ResourceUsagePerContainer, rc *ResourceCollector) {
|
const (
|
||||||
|
// The monitoring time for one test
|
||||||
|
monitoringTime = 10 * time.Minute
|
||||||
|
// The periodic reporting period
|
||||||
|
reportingPeriod = 5 * time.Minute
|
||||||
|
// sleep for an interval here to measure steady data
|
||||||
|
sleepAfterCreatePods = 10 * time.Second
|
||||||
|
)
|
||||||
|
|
||||||
|
rc.Start()
|
||||||
|
defer rc.Stop()
|
||||||
|
|
||||||
|
By("Creating a batch of Pods")
|
||||||
|
pods := newTestPods(testArg.pods, ImageRegistry[pauseImage], "test_pod")
|
||||||
|
f.PodClient().CreateBatch(pods)
|
||||||
|
|
||||||
|
// wait for a while to let the node be steady
|
||||||
|
time.Sleep(sleepAfterCreatePods)
|
||||||
|
|
||||||
|
// Log once and flush the stats.
|
||||||
|
rc.LogLatest()
|
||||||
|
rc.Reset()
|
||||||
|
|
||||||
|
By("Start monitoring resource usage")
|
||||||
|
// Periodically dump the cpu summary until the deadline is met.
|
||||||
|
// Note that without calling framework.ResourceMonitor.Reset(), the stats
|
||||||
|
// would occupy increasingly more memory. This should be fine
|
||||||
|
// for the current test duration, but we should reclaim the
|
||||||
|
// entries if we plan to monitor longer (e.g., 8 hours).
|
||||||
|
deadline := time.Now().Add(monitoringTime)
|
||||||
|
for time.Now().Before(deadline) {
|
||||||
|
timeLeft := deadline.Sub(time.Now())
|
||||||
|
framework.Logf("Still running...%v left", timeLeft)
|
||||||
|
if timeLeft < reportingPeriod {
|
||||||
|
time.Sleep(timeLeft)
|
||||||
|
} else {
|
||||||
|
time.Sleep(reportingPeriod)
|
||||||
|
}
|
||||||
|
logPods(f.Client)
|
||||||
|
}
|
||||||
|
|
||||||
|
By("Reporting overall resource usage")
|
||||||
|
logPods(f.Client)
|
||||||
|
}
|
||||||
|
|
||||||
|
// printAndVerifyResource prints the resource usage as perf data and verifies whether resource usage satisfies the limit.
|
||||||
|
func printAndVerifyResource(f *framework.Framework, rc *ResourceCollector, cpuLimits framework.ContainersCPUSummary,
|
||||||
|
memLimits framework.ResourceUsagePerContainer, isVerify bool) {
|
||||||
nodeName := framework.TestContext.NodeName
|
nodeName := framework.TestContext.NodeName
|
||||||
|
|
||||||
// Obtain memory PerfData
|
// Obtain memory PerfData
|
||||||
|
@ -158,20 +181,22 @@ func verifyResource(f *framework.Framework, cpuLimits framework.ContainersCPUSum
|
||||||
usagePerNode := make(framework.ResourceUsagePerNode)
|
usagePerNode := make(framework.ResourceUsagePerNode)
|
||||||
usagePerNode[nodeName] = usagePerContainer
|
usagePerNode[nodeName] = usagePerContainer
|
||||||
|
|
||||||
// Obtain cpu PerfData
|
// Obtain CPU PerfData
|
||||||
cpuSummary := rc.GetCPUSummary()
|
cpuSummary := rc.GetCPUSummary()
|
||||||
framework.Logf("%s", formatCPUSummary(cpuSummary))
|
framework.Logf("%s", formatCPUSummary(cpuSummary))
|
||||||
|
|
||||||
cpuSummaryPerNode := make(framework.NodesCPUSummary)
|
cpuSummaryPerNode := make(framework.NodesCPUSummary)
|
||||||
cpuSummaryPerNode[nodeName] = cpuSummary
|
cpuSummaryPerNode[nodeName] = cpuSummary
|
||||||
|
|
||||||
// Log resource usage
|
// Print resource usage
|
||||||
framework.PrintPerfData(framework.ResourceUsageToPerfData(usagePerNode))
|
framework.PrintPerfData(framework.ResourceUsageToPerfData(usagePerNode))
|
||||||
framework.PrintPerfData(framework.CPUUsageToPerfData(cpuSummaryPerNode))
|
framework.PrintPerfData(framework.CPUUsageToPerfData(cpuSummaryPerNode))
|
||||||
|
|
||||||
// Verify resource usage
|
// Verify resource usage
|
||||||
verifyMemoryLimits(f.Client, memLimits, usagePerNode)
|
if isVerify {
|
||||||
verifyCPULimits(cpuLimits, cpuSummaryPerNode)
|
verifyMemoryLimits(f.Client, memLimits, usagePerNode)
|
||||||
|
verifyCPULimits(cpuLimits, cpuSummaryPerNode)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func verifyMemoryLimits(c *client.Client, expected framework.ResourceUsagePerContainer, actual framework.ResourceUsagePerNode) {
|
func verifyMemoryLimits(c *client.Client, expected framework.ResourceUsagePerContainer, actual framework.ResourceUsagePerNode) {
|
||||||
|
|
Loading…
Reference in New Issue