2016-08-03 18:43:24 +00:00
// +build linux
/ *
Copyright 2015 The Kubernetes Authors .
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package e2e_node
import (
"fmt"
"sort"
"strconv"
"sync"
"time"
2017-06-22 18:24:23 +00:00
"k8s.io/api/core/v1"
2017-01-11 14:09:48 +00:00
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/watch"
2017-01-24 14:11:51 +00:00
"k8s.io/client-go/tools/cache"
2017-02-23 00:05:05 +00:00
stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
2016-08-03 18:43:24 +00:00
kubemetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
"k8s.io/kubernetes/test/e2e/framework"
2017-07-07 11:13:44 +00:00
"k8s.io/kubernetes/test/e2e/metrics"
2016-08-03 18:43:24 +00:00
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)
const (
kubeletAddr = "localhost:10255"
)
var _ = framework . KubeDescribe ( "Density [Serial] [Slow]" , func ( ) {
const (
2016-08-13 03:56:10 +00:00
// The data collection time of resource collector and the standalone cadvisor
// is not synchronizated, so resource collector may miss data or
2016-08-03 18:43:24 +00:00
// collect duplicated data
2016-08-13 03:56:10 +00:00
containerStatsPollingPeriod = 500 * time . Millisecond
2016-08-03 18:43:24 +00:00
)
var (
2016-09-09 00:24:26 +00:00
rc * ResourceCollector
2016-08-03 18:43:24 +00:00
)
f := framework . NewDefaultFramework ( "density-test" )
BeforeEach ( func ( ) {
2016-08-13 03:56:10 +00:00
// Start a standalone cadvisor pod using 'createSync', the pod is running when it returns
2016-08-30 17:08:25 +00:00
f . PodClient ( ) . CreateSync ( getCadvisorPod ( ) )
2016-08-13 03:56:10 +00:00
// Resource collector monitors fine-grain CPU/memory usage by a standalone Cadvisor with
// 1s housingkeeping interval
rc = NewResourceCollector ( containerStatsPollingPeriod )
2016-08-03 18:43:24 +00:00
} )
Context ( "create a batch of pods" , func ( ) {
2016-08-13 03:56:10 +00:00
// TODO(coufon): the values are generous, set more precise limits with benchmark data
// and add more tests
2016-08-10 17:48:32 +00:00
dTests := [ ] densityTest {
2016-08-03 18:43:24 +00:00
{
podsNr : 10 ,
interval : 0 * time . Millisecond ,
cpuLimits : framework . ContainersCPUSummary {
2016-09-02 16:12:40 +00:00
stats . SystemContainerKubelet : { 0.50 : 0.30 , 0.95 : 0.50 } ,
2016-08-03 18:43:24 +00:00
stats . SystemContainerRuntime : { 0.50 : 0.40 , 0.95 : 0.60 } ,
} ,
memLimits : framework . ResourceUsagePerContainer {
stats . SystemContainerKubelet : & framework . ContainerResourceUsage { MemoryRSSInBytes : 100 * 1024 * 1024 } ,
2016-08-30 01:32:16 +00:00
stats . SystemContainerRuntime : & framework . ContainerResourceUsage { MemoryRSSInBytes : 500 * 1024 * 1024 } ,
2016-08-03 18:43:24 +00:00
} ,
// percentile limit of single pod startup latency
podStartupLimits : framework . LatencyMetric {
2016-08-09 23:13:09 +00:00
Perc50 : 16 * time . Second ,
Perc90 : 18 * time . Second ,
2016-08-03 18:43:24 +00:00
Perc99 : 20 * time . Second ,
} ,
// upbound of startup latency of a batch of pods
podBatchStartupLimit : 25 * time . Second ,
} ,
}
2016-08-10 17:48:32 +00:00
for _ , testArg := range dTests {
2016-08-03 18:43:24 +00:00
itArg := testArg
2017-06-09 16:56:44 +00:00
desc := fmt . Sprintf ( "latency/resource should be within limit when create %d pods with %v interval" , itArg . podsNr , itArg . interval )
It ( desc , func ( ) {
2016-08-17 17:38:43 +00:00
itArg . createMethod = "batch"
2017-06-09 16:56:44 +00:00
testInfo := getTestNodeInfo ( f , itArg . getTestName ( ) , desc )
2016-09-09 00:24:26 +00:00
batchLag , e2eLags := runDensityBatchTest ( f , rc , itArg , testInfo , false )
2016-08-13 03:56:10 +00:00
By ( "Verifying latency" )
2016-09-09 00:24:26 +00:00
logAndVerifyLatency ( batchLag , e2eLags , itArg . podStartupLimits , itArg . podBatchStartupLimit , testInfo , true )
2016-08-13 03:56:10 +00:00
By ( "Verifying resource" )
2016-09-09 00:24:26 +00:00
logAndVerifyResource ( f , rc , itArg . cpuLimits , itArg . memLimits , testInfo , true )
2016-08-13 03:56:10 +00:00
} )
}
} )
2016-08-18 16:25:20 +00:00
Context ( "create a batch of pods" , func ( ) {
2016-08-13 03:56:10 +00:00
dTests := [ ] densityTest {
{
podsNr : 10 ,
interval : 0 * time . Millisecond ,
} ,
{
podsNr : 35 ,
interval : 0 * time . Millisecond ,
} ,
{
podsNr : 105 ,
interval : 0 * time . Millisecond ,
} ,
2016-08-19 21:03:21 +00:00
{
podsNr : 10 ,
interval : 100 * time . Millisecond ,
} ,
{
podsNr : 35 ,
interval : 100 * time . Millisecond ,
} ,
{
podsNr : 105 ,
interval : 100 * time . Millisecond ,
} ,
{
podsNr : 10 ,
interval : 300 * time . Millisecond ,
} ,
{
podsNr : 35 ,
interval : 300 * time . Millisecond ,
} ,
{
podsNr : 105 ,
interval : 300 * time . Millisecond ,
} ,
2016-08-13 03:56:10 +00:00
}
for _ , testArg := range dTests {
itArg := testArg
2017-06-09 16:56:44 +00:00
desc := fmt . Sprintf ( "latency/resource should be within limit when create %d pods with %v interval [Benchmark]" , itArg . podsNr , itArg . interval )
It ( desc , func ( ) {
2016-08-17 17:38:43 +00:00
itArg . createMethod = "batch"
2017-06-09 16:56:44 +00:00
testInfo := getTestNodeInfo ( f , itArg . getTestName ( ) , desc )
2016-09-09 00:24:26 +00:00
batchLag , e2eLags := runDensityBatchTest ( f , rc , itArg , testInfo , true )
2016-08-13 03:56:10 +00:00
2016-08-03 18:43:24 +00:00
By ( "Verifying latency" )
2016-09-09 00:24:26 +00:00
logAndVerifyLatency ( batchLag , e2eLags , itArg . podStartupLimits , itArg . podBatchStartupLimit , testInfo , false )
2016-08-03 18:43:24 +00:00
By ( "Verifying resource" )
2016-09-09 00:24:26 +00:00
logAndVerifyResource ( f , rc , itArg . cpuLimits , itArg . memLimits , testInfo , false )
2016-08-03 18:43:24 +00:00
} )
}
} )
2016-09-09 22:57:52 +00:00
Context ( "create a batch of pods with higher API QPS" , func ( ) {
2016-09-07 23:34:47 +00:00
dTests := [ ] densityTest {
{
2016-09-09 22:57:52 +00:00
podsNr : 105 ,
interval : 0 * time . Millisecond ,
APIQPSLimit : 60 ,
2016-09-07 23:34:47 +00:00
} ,
{
2016-09-09 22:57:52 +00:00
podsNr : 105 ,
interval : 100 * time . Millisecond ,
APIQPSLimit : 60 ,
2016-09-07 23:34:47 +00:00
} ,
{
2016-09-09 22:57:52 +00:00
podsNr : 105 ,
interval : 300 * time . Millisecond ,
APIQPSLimit : 60 ,
2016-09-07 23:34:47 +00:00
} ,
}
for _ , testArg := range dTests {
itArg := testArg
2017-06-09 16:56:44 +00:00
desc := fmt . Sprintf ( "latency/resource should be within limit when create %d pods with %v interval (QPS %d) [Benchmark]" , itArg . podsNr , itArg . interval , itArg . APIQPSLimit )
It ( desc , func ( ) {
2016-09-07 23:34:47 +00:00
itArg . createMethod = "batch"
2017-06-09 16:56:44 +00:00
testInfo := getTestNodeInfo ( f , itArg . getTestName ( ) , desc )
2016-09-07 23:34:47 +00:00
// The latency caused by API QPS limit takes a large portion (up to ~33%) of e2e latency.
// It makes the pod startup latency of Kubelet (creation throughput as well) under-estimated.
// Here we set API QPS limit from default 5 to 60 in order to test real Kubelet performance.
// Note that it will cause higher resource usage.
2016-09-09 22:57:52 +00:00
setKubeletAPIQPSLimit ( f , int32 ( itArg . APIQPSLimit ) )
2016-09-09 00:24:26 +00:00
batchLag , e2eLags := runDensityBatchTest ( f , rc , itArg , testInfo , true )
2016-09-07 23:34:47 +00:00
By ( "Verifying latency" )
2016-09-09 00:24:26 +00:00
logAndVerifyLatency ( batchLag , e2eLags , itArg . podStartupLimits , itArg . podBatchStartupLimit , testInfo , false )
2016-09-07 23:34:47 +00:00
By ( "Verifying resource" )
2016-09-09 00:24:26 +00:00
logAndVerifyResource ( f , rc , itArg . cpuLimits , itArg . memLimits , testInfo , false )
2016-09-07 23:34:47 +00:00
} )
}
} )
2016-08-03 18:43:24 +00:00
Context ( "create a sequence of pods" , func ( ) {
2016-08-10 17:48:32 +00:00
dTests := [ ] densityTest {
2016-08-03 18:43:24 +00:00
{
podsNr : 10 ,
2016-08-13 03:56:10 +00:00
bgPodsNr : 50 ,
2016-08-03 18:43:24 +00:00
cpuLimits : framework . ContainersCPUSummary {
2016-09-02 16:12:40 +00:00
stats . SystemContainerKubelet : { 0.50 : 0.30 , 0.95 : 0.50 } ,
2016-08-03 18:43:24 +00:00
stats . SystemContainerRuntime : { 0.50 : 0.40 , 0.95 : 0.60 } ,
} ,
memLimits : framework . ResourceUsagePerContainer {
stats . SystemContainerKubelet : & framework . ContainerResourceUsage { MemoryRSSInBytes : 100 * 1024 * 1024 } ,
2016-08-30 01:32:16 +00:00
stats . SystemContainerRuntime : & framework . ContainerResourceUsage { MemoryRSSInBytes : 500 * 1024 * 1024 } ,
2016-08-03 18:43:24 +00:00
} ,
podStartupLimits : framework . LatencyMetric {
2016-08-30 01:32:16 +00:00
Perc50 : 5000 * time . Millisecond ,
Perc90 : 9000 * time . Millisecond ,
Perc99 : 10000 * time . Millisecond ,
2016-08-03 18:43:24 +00:00
} ,
} ,
}
2016-08-10 17:48:32 +00:00
for _ , testArg := range dTests {
2016-08-03 18:43:24 +00:00
itArg := testArg
2017-06-09 16:56:44 +00:00
desc := fmt . Sprintf ( "latency/resource should be within limit when create %d pods with %d background pods" , itArg . podsNr , itArg . bgPodsNr )
It ( desc , func ( ) {
2016-08-17 17:38:43 +00:00
itArg . createMethod = "sequence"
2017-06-09 16:56:44 +00:00
testInfo := getTestNodeInfo ( f , itArg . getTestName ( ) , desc )
2016-09-09 00:24:26 +00:00
batchlag , e2eLags := runDensitySeqTest ( f , rc , itArg , testInfo )
2016-08-03 18:43:24 +00:00
2016-08-13 03:56:10 +00:00
By ( "Verifying latency" )
2016-09-09 00:24:26 +00:00
logAndVerifyLatency ( batchlag , e2eLags , itArg . podStartupLimits , itArg . podBatchStartupLimit , testInfo , true )
2016-08-03 18:43:24 +00:00
2016-08-13 03:56:10 +00:00
By ( "Verifying resource" )
2016-09-09 00:24:26 +00:00
logAndVerifyResource ( f , rc , itArg . cpuLimits , itArg . memLimits , testInfo , true )
2016-08-13 03:56:10 +00:00
} )
}
} )
2016-08-03 18:43:24 +00:00
2016-08-18 16:25:20 +00:00
Context ( "create a sequence of pods" , func ( ) {
2016-08-13 03:56:10 +00:00
dTests := [ ] densityTest {
{
podsNr : 10 ,
bgPodsNr : 50 ,
} ,
{
podsNr : 30 ,
bgPodsNr : 50 ,
} ,
{
podsNr : 50 ,
bgPodsNr : 50 ,
} ,
}
2016-08-03 18:43:24 +00:00
2016-08-13 03:56:10 +00:00
for _ , testArg := range dTests {
itArg := testArg
2017-06-09 16:56:44 +00:00
desc := fmt . Sprintf ( "latency/resource should be within limit when create %d pods with %d background pods [Benchmark]" , itArg . podsNr , itArg . bgPodsNr )
It ( desc , func ( ) {
2016-08-17 17:38:43 +00:00
itArg . createMethod = "sequence"
2017-06-09 16:56:44 +00:00
testInfo := getTestNodeInfo ( f , itArg . getTestName ( ) , desc )
2016-09-09 00:24:26 +00:00
batchlag , e2eLags := runDensitySeqTest ( f , rc , itArg , testInfo )
2016-08-03 18:43:24 +00:00
By ( "Verifying latency" )
2016-09-09 00:24:26 +00:00
logAndVerifyLatency ( batchlag , e2eLags , itArg . podStartupLimits , itArg . podBatchStartupLimit , testInfo , false )
2016-08-03 18:43:24 +00:00
By ( "Verifying resource" )
2016-09-09 00:24:26 +00:00
logAndVerifyResource ( f , rc , itArg . cpuLimits , itArg . memLimits , testInfo , false )
2016-08-03 18:43:24 +00:00
} )
}
} )
} )
2016-08-10 17:48:32 +00:00
type densityTest struct {
2016-08-03 18:43:24 +00:00
// number of pods
podsNr int
// number of background pods
bgPodsNr int
// interval between creating pod (rate control)
interval time . Duration
2016-08-17 17:38:43 +00:00
// create pods in 'batch' or 'sequence'
createMethod string
2016-09-09 22:57:52 +00:00
// API QPS limit
APIQPSLimit int
2016-08-13 03:56:10 +00:00
// performance limits
2016-08-03 18:43:24 +00:00
cpuLimits framework . ContainersCPUSummary
memLimits framework . ResourceUsagePerContainer
podStartupLimits framework . LatencyMetric
podBatchStartupLimit time . Duration
}
2016-08-17 17:38:43 +00:00
func ( dt * densityTest ) getTestName ( ) string {
2016-09-09 22:57:52 +00:00
// The current default API QPS limit is 5
// TODO(coufon): is there any way to not hard code this?
APIQPSLimit := 5
if dt . APIQPSLimit > 0 {
APIQPSLimit = dt . APIQPSLimit
}
return fmt . Sprintf ( "density_create_%s_%d_%d_%d_%d" , dt . createMethod , dt . podsNr , dt . bgPodsNr ,
dt . interval . Nanoseconds ( ) / 1000000 , APIQPSLimit )
2016-08-17 17:38:43 +00:00
}
2016-08-13 03:56:10 +00:00
// runDensityBatchTest runs the density batch pod creation test
2016-09-09 00:24:26 +00:00
func runDensityBatchTest ( f * framework . Framework , rc * ResourceCollector , testArg densityTest , testInfo map [ string ] string ,
2016-08-17 17:38:43 +00:00
isLogTimeSeries bool ) ( time . Duration , [ ] framework . PodLatencyData ) {
2016-08-13 03:56:10 +00:00
const (
podType = "density_test_pod"
sleepBeforeCreatePods = 30 * time . Second
)
var (
mutex = & sync . Mutex { }
2016-12-03 18:57:26 +00:00
watchTimes = make ( map [ string ] metav1 . Time , 0 )
2016-08-13 03:56:10 +00:00
stopCh = make ( chan struct { } )
)
// create test pod data structure
2017-02-02 16:51:01 +00:00
pods := newTestPods ( testArg . podsNr , true , framework . GetPauseImageNameForHostArch ( ) , podType )
2016-08-13 03:56:10 +00:00
// the controller watches the change of pod status
controller := newInformerWatchPod ( f , mutex , watchTimes , podType )
go controller . Run ( stopCh )
defer close ( stopCh )
// TODO(coufon): in the test we found kubelet starts while it is busy on something, as a result 'syncLoop'
// does not response to pod creation immediately. Creating the first pod has a delay around 5s.
// The node status has already been 'ready' so `wait and check node being ready does not help here.
// Now wait here for a grace period to let 'syncLoop' be ready
time . Sleep ( sleepBeforeCreatePods )
rc . Start ( )
By ( "Creating a batch of pods" )
// It returns a map['pod name']'creation time' containing the creation timestamps
createTimes := createBatchPodWithRateControl ( f , pods , testArg . interval )
By ( "Waiting for all Pods to be observed by the watch..." )
Eventually ( func ( ) bool {
return len ( watchTimes ) == testArg . podsNr
} , 10 * time . Minute , 10 * time . Second ) . Should ( BeTrue ( ) )
if len ( watchTimes ) < testArg . podsNr {
framework . Failf ( "Timeout reached waiting for all Pods to be observed by the watch." )
}
// Analyze results
var (
2016-12-03 18:57:26 +00:00
firstCreate metav1 . Time
lastRunning metav1 . Time
2016-08-13 03:56:10 +00:00
init = true
e2eLags = make ( [ ] framework . PodLatencyData , 0 )
)
for name , create := range createTimes {
watch , ok := watchTimes [ name ]
Expect ( ok ) . To ( Equal ( true ) )
e2eLags = append ( e2eLags ,
framework . PodLatencyData { Name : name , Latency : watch . Time . Sub ( create . Time ) } )
if ! init {
if firstCreate . Time . After ( create . Time ) {
firstCreate = create
}
if lastRunning . Time . Before ( watch . Time ) {
lastRunning = watch
}
} else {
init = false
firstCreate , lastRunning = create , watch
}
}
sort . Sort ( framework . LatencySlice ( e2eLags ) )
batchLag := lastRunning . Time . Sub ( firstCreate . Time )
2017-02-02 16:51:01 +00:00
rc . Stop ( )
2017-02-03 18:33:43 +00:00
deletePodsSync ( f , pods )
2017-02-02 16:51:01 +00:00
2016-08-17 17:38:43 +00:00
// Log time series data.
if isLogTimeSeries {
2016-09-09 00:24:26 +00:00
logDensityTimeSeries ( rc , createTimes , watchTimes , testInfo )
2016-08-17 17:38:43 +00:00
}
// Log throughput data.
2016-09-09 00:24:26 +00:00
logPodCreateThroughput ( batchLag , e2eLags , testArg . podsNr , testInfo )
2016-08-17 17:38:43 +00:00
2017-02-03 18:33:43 +00:00
deletePodsSync ( f , [ ] * v1 . Pod { getCadvisorPod ( ) } )
2016-08-13 03:56:10 +00:00
return batchLag , e2eLags
}
// runDensitySeqTest runs the density sequential pod creation test
2016-09-09 00:24:26 +00:00
func runDensitySeqTest ( f * framework . Framework , rc * ResourceCollector , testArg densityTest , testInfo map [ string ] string ) ( time . Duration , [ ] framework . PodLatencyData ) {
2016-08-13 03:56:10 +00:00
const (
podType = "density_test_pod"
sleepBeforeCreatePods = 30 * time . Second
)
2017-02-02 16:51:01 +00:00
bgPods := newTestPods ( testArg . bgPodsNr , true , framework . GetPauseImageNameForHostArch ( ) , "background_pod" )
testPods := newTestPods ( testArg . podsNr , true , framework . GetPauseImageNameForHostArch ( ) , podType )
2016-08-13 03:56:10 +00:00
By ( "Creating a batch of background pods" )
// CreatBatch is synchronized, all pods are running when it returns
f . PodClient ( ) . CreateBatch ( bgPods )
time . Sleep ( sleepBeforeCreatePods )
rc . Start ( )
2016-08-19 21:21:04 +00:00
// Create pods sequentially (back-to-back). e2eLags have been sorted.
2016-08-13 03:56:10 +00:00
batchlag , e2eLags := createBatchPodSequential ( f , testPods )
2017-02-02 16:51:01 +00:00
rc . Stop ( )
2017-02-03 18:33:43 +00:00
deletePodsSync ( f , append ( bgPods , testPods ... ) )
2017-02-02 16:51:01 +00:00
2016-08-17 17:38:43 +00:00
// Log throughput data.
2016-09-09 00:24:26 +00:00
logPodCreateThroughput ( batchlag , e2eLags , testArg . podsNr , testInfo )
2016-08-17 17:38:43 +00:00
2017-02-03 18:33:43 +00:00
deletePodsSync ( f , [ ] * v1 . Pod { getCadvisorPod ( ) } )
2016-08-13 03:56:10 +00:00
return batchlag , e2eLags
}
// createBatchPodWithRateControl creates a batch of pods concurrently, uses one goroutine for each creation.
2016-08-03 18:43:24 +00:00
// between creations there is an interval for throughput control
2016-12-03 18:57:26 +00:00
func createBatchPodWithRateControl ( f * framework . Framework , pods [ ] * v1 . Pod , interval time . Duration ) map [ string ] metav1 . Time {
createTimes := make ( map [ string ] metav1 . Time )
2016-08-03 18:43:24 +00:00
for _ , pod := range pods {
2016-12-03 18:57:26 +00:00
createTimes [ pod . ObjectMeta . Name ] = metav1 . Now ( )
2016-08-03 18:43:24 +00:00
go f . PodClient ( ) . Create ( pod )
time . Sleep ( interval )
}
return createTimes
}
2016-08-13 03:56:10 +00:00
// getPodStartLatency gets prometheus metric 'pod start latency' from kubelet
2016-08-03 18:43:24 +00:00
func getPodStartLatency ( node string ) ( framework . KubeletLatencyMetrics , error ) {
latencyMetrics := framework . KubeletLatencyMetrics { }
ms , err := metrics . GrabKubeletMetricsWithoutProxy ( node )
Expect ( err ) . NotTo ( HaveOccurred ( ) )
for _ , samples := range ms {
for _ , sample := range samples {
if sample . Metric [ "__name__" ] == kubemetrics . KubeletSubsystem + "_" + kubemetrics . PodStartLatencyKey {
quantile , _ := strconv . ParseFloat ( string ( sample . Metric [ "quantile" ] ) , 64 )
latencyMetrics = append ( latencyMetrics ,
framework . KubeletLatencyMetric {
Quantile : quantile ,
Method : kubemetrics . PodStartLatencyKey ,
Latency : time . Duration ( int ( sample . Value ) ) * time . Microsecond } )
}
}
}
return latencyMetrics , nil
}
2016-08-10 17:48:32 +00:00
// verifyPodStartupLatency verifies whether 50, 90 and 99th percentiles of PodStartupLatency are
2016-08-03 18:43:24 +00:00
// within the threshold.
func verifyPodStartupLatency ( expect , actual framework . LatencyMetric ) error {
if actual . Perc50 > expect . Perc50 {
return fmt . Errorf ( "too high pod startup latency 50th percentile: %v" , actual . Perc50 )
}
if actual . Perc90 > expect . Perc90 {
return fmt . Errorf ( "too high pod startup latency 90th percentile: %v" , actual . Perc90 )
}
2016-11-10 18:26:00 +00:00
if actual . Perc99 > expect . Perc99 {
return fmt . Errorf ( "too high pod startup latency 99th percentile: %v" , actual . Perc99 )
2016-08-03 18:43:24 +00:00
}
return nil
}
2016-08-10 17:48:32 +00:00
// newInformerWatchPod creates an informer to check whether all pods are running.
2017-01-12 13:45:53 +00:00
func newInformerWatchPod ( f * framework . Framework , mutex * sync . Mutex , watchTimes map [ string ] metav1 . Time , podType string ) cache . Controller {
2016-08-03 18:43:24 +00:00
ns := f . Namespace . Name
2016-11-18 20:55:46 +00:00
checkPodRunning := func ( p * v1 . Pod ) {
2016-08-03 18:43:24 +00:00
mutex . Lock ( )
defer mutex . Unlock ( )
defer GinkgoRecover ( )
2016-11-18 20:55:46 +00:00
if p . Status . Phase == v1 . PodRunning {
2016-08-03 18:43:24 +00:00
if _ , found := watchTimes [ p . Name ] ; ! found {
2016-12-03 18:57:26 +00:00
watchTimes [ p . Name ] = metav1 . Now ( )
2016-08-03 18:43:24 +00:00
}
}
}
2016-09-14 18:35:38 +00:00
_ , controller := cache . NewInformer (
2016-08-03 18:43:24 +00:00
& cache . ListWatch {
2017-01-22 03:36:02 +00:00
ListFunc : func ( options metav1 . ListOptions ) ( runtime . Object , error ) {
2016-11-18 20:55:46 +00:00
options . LabelSelector = labels . SelectorFromSet ( labels . Set { "type" : podType } ) . String ( )
2016-10-18 13:00:38 +00:00
obj , err := f . ClientSet . Core ( ) . Pods ( ns ) . List ( options )
return runtime . Object ( obj ) , err
2016-08-03 18:43:24 +00:00
} ,
2017-01-22 03:36:02 +00:00
WatchFunc : func ( options metav1 . ListOptions ) ( watch . Interface , error ) {
2016-11-18 20:55:46 +00:00
options . LabelSelector = labels . SelectorFromSet ( labels . Set { "type" : podType } ) . String ( )
2016-10-18 13:00:38 +00:00
return f . ClientSet . Core ( ) . Pods ( ns ) . Watch ( options )
2016-08-03 18:43:24 +00:00
} ,
} ,
2016-11-18 20:55:46 +00:00
& v1 . Pod { } ,
2016-08-03 18:43:24 +00:00
0 ,
2016-09-14 18:35:38 +00:00
cache . ResourceEventHandlerFuncs {
2016-08-03 18:43:24 +00:00
AddFunc : func ( obj interface { } ) {
2016-11-18 20:55:46 +00:00
p , ok := obj . ( * v1 . Pod )
2016-08-03 18:43:24 +00:00
Expect ( ok ) . To ( Equal ( true ) )
go checkPodRunning ( p )
} ,
UpdateFunc : func ( oldObj , newObj interface { } ) {
2016-11-18 20:55:46 +00:00
p , ok := newObj . ( * v1 . Pod )
2016-08-03 18:43:24 +00:00
Expect ( ok ) . To ( Equal ( true ) )
go checkPodRunning ( p )
} ,
} ,
)
return controller
}
2016-08-13 03:56:10 +00:00
// createBatchPodSequential creats pods back-to-back in sequence.
2016-11-18 20:55:46 +00:00
func createBatchPodSequential ( f * framework . Framework , pods [ ] * v1 . Pod ) ( time . Duration , [ ] framework . PodLatencyData ) {
2016-12-03 18:57:26 +00:00
batchStartTime := metav1 . Now ( )
2016-08-13 03:56:10 +00:00
e2eLags := make ( [ ] framework . PodLatencyData , 0 )
for _ , pod := range pods {
2016-12-03 18:57:26 +00:00
create := metav1 . Now ( )
2016-08-13 03:56:10 +00:00
f . PodClient ( ) . CreateSync ( pod )
e2eLags = append ( e2eLags ,
2016-12-03 18:57:26 +00:00
framework . PodLatencyData { Name : pod . Name , Latency : metav1 . Now ( ) . Time . Sub ( create . Time ) } )
2016-08-13 03:56:10 +00:00
}
2016-12-03 18:57:26 +00:00
batchLag := metav1 . Now ( ) . Time . Sub ( batchStartTime . Time )
2016-08-13 03:56:10 +00:00
sort . Sort ( framework . LatencySlice ( e2eLags ) )
return batchLag , e2eLags
}
2016-08-17 17:38:43 +00:00
// logAndVerifyLatency verifies that whether pod creation latency satisfies the limit.
func logAndVerifyLatency ( batchLag time . Duration , e2eLags [ ] framework . PodLatencyData , podStartupLimits framework . LatencyMetric ,
2016-09-09 00:24:26 +00:00
podBatchStartupLimit time . Duration , testInfo map [ string ] string , isVerify bool ) {
2016-08-03 18:43:24 +00:00
framework . PrintLatencies ( e2eLags , "worst client e2e total latencies" )
2016-08-17 17:38:43 +00:00
// TODO(coufon): do not trust 'kubelet' metrics since they are not reset!
2016-08-03 18:43:24 +00:00
latencyMetrics , _ := getPodStartLatency ( kubeletAddr )
framework . Logf ( "Kubelet Prometheus metrics (not reset):\n%s" , framework . PrettyPrintJSON ( latencyMetrics ) )
podCreateLatency := framework . PodStartupLatency { Latency : framework . ExtractLatencyMetrics ( e2eLags ) }
2016-08-17 17:38:43 +00:00
// log latency perf data
2017-06-09 16:56:44 +00:00
logPerfData ( getLatencyPerfData ( podCreateLatency . Latency , testInfo ) , "latency" )
2016-08-03 18:43:24 +00:00
2016-08-13 03:56:10 +00:00
if isVerify {
2016-08-17 17:38:43 +00:00
// check whether e2e pod startup time is acceptable.
framework . ExpectNoError ( verifyPodStartupLatency ( podStartupLimits , podCreateLatency . Latency ) )
2016-08-13 03:56:10 +00:00
// check bactch pod creation latency
2016-08-17 17:38:43 +00:00
if podBatchStartupLimit > 0 {
Expect ( batchLag <= podBatchStartupLimit ) . To ( Equal ( true ) , "Batch creation startup time %v exceed limit %v" ,
batchLag , podBatchStartupLimit )
2016-08-13 03:56:10 +00:00
}
2016-08-03 18:43:24 +00:00
}
}
2016-08-17 17:38:43 +00:00
// logThroughput calculates and logs pod creation throughput.
2016-09-09 00:24:26 +00:00
func logPodCreateThroughput ( batchLag time . Duration , e2eLags [ ] framework . PodLatencyData , podsNr int , testInfo map [ string ] string ) {
2017-06-09 16:56:44 +00:00
logPerfData ( getThroughputPerfData ( batchLag , e2eLags , podsNr , testInfo ) , "throughput" )
2016-08-17 17:38:43 +00:00
}
2016-09-07 23:34:47 +00:00
// increaseKubeletAPIQPSLimit sets Kubelet API QPS via ConfigMap. Kubelet will restart with the new QPS.
func setKubeletAPIQPSLimit ( f * framework . Framework , newAPIQPS int32 ) {
const restartGap = 40 * time . Second
resp := pollConfigz ( 2 * time . Minute , 5 * time . Second )
kubeCfg , err := decodeConfigz ( resp )
framework . ExpectNoError ( err )
framework . Logf ( "Old QPS limit is: %d\n" , kubeCfg . KubeAPIQPS )
// Set new API QPS limit
kubeCfg . KubeAPIQPS = newAPIQPS
2016-09-09 22:57:52 +00:00
// TODO(coufon): createConfigMap should firstly check whether configmap already exists, if so, use updateConfigMap.
// Calling createConfigMap twice will result in error. It is fine for benchmark test because we only run one test on a new node.
2016-09-07 23:34:47 +00:00
_ , err = createConfigMap ( f , kubeCfg )
framework . ExpectNoError ( err )
// Wait for Kubelet to restart
time . Sleep ( restartGap )
// Check new QPS has been set
resp = pollConfigz ( 2 * time . Minute , 5 * time . Second )
kubeCfg , err = decodeConfigz ( resp )
framework . ExpectNoError ( err )
framework . Logf ( "New QPS limit is: %d\n" , kubeCfg . KubeAPIQPS )
2016-09-09 22:57:52 +00:00
// TODO(coufon): check test result to see if we need to retry here
if kubeCfg . KubeAPIQPS != newAPIQPS {
framework . Failf ( "Fail to set new kubelet API QPS limit." )
}
2016-09-07 23:34:47 +00:00
}