From 48ea20a7950e231a531bb64b41bf34271a0354e6 Mon Sep 17 00:00:00 2001 From: Wojciech Tyczynski Date: Fri, 5 Feb 2016 16:02:00 +0100 Subject: [PATCH 1/2] Higher QPS limits in load.go test --- test/e2e/load.go | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/test/e2e/load.go b/test/e2e/load.go index d41870dddf..bfa2a8d003 100644 --- a/test/e2e/load.go +++ b/test/e2e/load.go @@ -70,7 +70,17 @@ var _ = Describe("Load capacity", func() { framework.NamespaceDeletionTimeout = time.Hour BeforeEach(func() { - c = framework.Client + // Explicitly create a client with higher QPS limits. + config, err := loadConfig() + Expect(err).NotTo(HaveOccurred()) + config.QPS = 50 + config.Burst = 100 + c, err = client.New(config) + Expect(err).NotTo(HaveOccurred()) + if c.Client.Timeout == 0 { + c.Client.Timeout = singleCallTimeout + } + ns = framework.Namespace.Name nodes := ListSchedulableNodesOrDie(c) nodeCount = len(nodes.Items) @@ -79,7 +89,7 @@ var _ = Describe("Load capacity", func() { // Terminating a namespace (deleting the remaining objects from it - which // generally means events) can affect the current run. Thus we wait for all // terminating namespace to be finally deleted before starting this test. - err := checkTestingNSDeletedExcept(c, ns) + err = checkTestingNSDeletedExcept(c, ns) expectNoError(err) expectNoError(resetMetrics(c)) From a1a6218dc69da594d853bcc35f554e467f56ae95 Mon Sep 17 00:00:00 2001 From: Wojciech Tyczynski Date: Fri, 5 Feb 2016 16:44:08 +0100 Subject: [PATCH 2/2] Adjust timeouts in load test for larger clusters --- test/e2e/load.go | 46 ++++++++++++++++++++++++++-------------------- test/e2e/util.go | 14 +++++++++----- 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/test/e2e/load.go b/test/e2e/load.go index bfa2a8d003..6875cef033 100644 --- a/test/e2e/load.go +++ b/test/e2e/load.go @@ -56,8 +56,6 @@ var _ = Describe("Load capacity", func() { // Gathers metrics before teardown // TODO add flag that allows to skip cleanup on failure AfterEach(func() { - deleteAllRC(configs) - // Verify latency metrics highLatencyRequests, err := HighLatencyRequests(c) expectNoError(err, "Too many instances metrics above the threshold") @@ -75,11 +73,8 @@ var _ = Describe("Load capacity", func() { Expect(err).NotTo(HaveOccurred()) config.QPS = 50 config.Burst = 100 - c, err = client.New(config) + c, err = loadClientFromConfig(config) Expect(err).NotTo(HaveOccurred()) - if c.Client.Timeout == 0 { - c.Client.Timeout = singleCallTimeout - } ns = framework.Namespace.Name nodes := ListSchedulableNodesOrDie(c) @@ -117,7 +112,8 @@ var _ = Describe("Load capacity", func() { itArg := testArg It(name, func() { - configs = generateRCConfigs(itArg.podsPerNode*nodeCount, itArg.image, itArg.command, c, ns) + totalPods := itArg.podsPerNode * nodeCount + configs = generateRCConfigs(totalPods, itArg.image, itArg.command, c, ns) // Simulate lifetime of RC: // * create with initial size @@ -126,16 +122,28 @@ var _ = Describe("Load capacity", func() { // * delete it // // This will generate ~5 creations/deletions per second assuming: - // - 300 small RCs each 5 pods - // - 25 medium RCs each 30 pods - // - 3 big RCs each 250 pods - createAllRC(configs) - // TODO add reseting latency metrics here, once it would be supported. + // - X small RCs each 5 pods [ 5 * X = totalPods / 2 ] + // - Y medium RCs each 30 pods [ 30 * Y = totalPods / 4 ] + // - Z big RCs each 250 pods [ 250 * Z = totalPods / 4] + + // We would like to spread creating replication controllers over time + // to make it possible to create/schedule them in the meantime. + // Currently we assume 5 pods/second average throughput. + // We may want to revisit it in the future. + creatingTime := time.Duration(totalPods/5) * time.Second + createAllRC(configs, creatingTime) + By("============================================================================") scaleAllRC(configs) By("============================================================================") scaleAllRC(configs) By("============================================================================") + + // Cleanup all created replication controllers. + // Currently we assume 5 pods/second average deletion throughput. + // We may want to revisit it in the future. + deletingTime := time.Duration(totalPods/5) * time.Second + deleteAllRC(configs, deletingTime) }) } }) @@ -186,19 +194,18 @@ func sleepUpTo(d time.Duration) { time.Sleep(time.Duration(rand.Int63n(d.Nanoseconds()))) } -func createAllRC(configs []*RCConfig) { +func createAllRC(configs []*RCConfig, creatingTime time.Duration) { var wg sync.WaitGroup wg.Add(len(configs)) for _, config := range configs { - go createRC(&wg, config) + go createRC(&wg, config, creatingTime) } wg.Wait() } -func createRC(wg *sync.WaitGroup, config *RCConfig) { +func createRC(wg *sync.WaitGroup, config *RCConfig, creatingTime time.Duration) { defer GinkgoRecover() defer wg.Done() - creatingTime := 10 * time.Minute sleepUpTo(creatingTime) expectNoError(RunRC(*config), fmt.Sprintf("creating rc %s", config.Name)) @@ -233,19 +240,18 @@ func scaleRC(wg *sync.WaitGroup, config *RCConfig) { expectNoError(err, fmt.Sprintf("listing pods from rc %v", config.Name)) } -func deleteAllRC(configs []*RCConfig) { +func deleteAllRC(configs []*RCConfig, deletingTime time.Duration) { var wg sync.WaitGroup wg.Add(len(configs)) for _, config := range configs { - go deleteRC(&wg, config) + go deleteRC(&wg, config, deletingTime) } wg.Wait() } -func deleteRC(wg *sync.WaitGroup, config *RCConfig) { +func deleteRC(wg *sync.WaitGroup, config *RCConfig, deletingTime time.Duration) { defer GinkgoRecover() defer wg.Done() - deletingTime := 10 * time.Minute sleepUpTo(deletingTime) expectNoError(DeleteRC(config.Client, config.Namespace, config.Name), fmt.Sprintf("deleting rc %s", config.Name)) diff --git a/test/e2e/util.go b/test/e2e/util.go index 95b7038ad7..0b11d11801 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -1115,11 +1115,7 @@ func loadConfig() (*client.Config, error) { } } -func loadClient() (*client.Client, error) { - config, err := loadConfig() - if err != nil { - return nil, fmt.Errorf("error creating client: %v", err.Error()) - } +func loadClientFromConfig(config *client.Config) (*client.Client, error) { c, err := client.New(config) if err != nil { return nil, fmt.Errorf("error creating client: %v", err.Error()) @@ -1130,6 +1126,14 @@ func loadClient() (*client.Client, error) { return c, nil } +func loadClient() (*client.Client, error) { + config, err := loadConfig() + if err != nil { + return nil, fmt.Errorf("error creating client: %v", err.Error()) + } + return loadClientFromConfig(config) +} + // randomSuffix provides a random string to append to pods,services,rcs. // TODO: Allow service names to have the same form as names // for pods and replication controllers so we don't