Merge pull request #46461 from ncdc/e2e-suite-metrics

Automatic merge from submit-queue Support grabbing test suite metrics **What this PR does / why we need it**: Add support for grabbing metrics that cover the entire test suite's execution. Update the "interesting" controller-manager metrics to match the current names for the garbage collector, and add namespace controller metrics to the list. If you enable `--gather-suite-metrics-at-teardown`, the metrics file is written to a file with a name such as `MetricsForE2ESuite_2017-05-25T20:25:57Z.json` in the `--report-dir`. If you don't specify `--report-dir`, the metrics are written to the test log output. I'd like to enable this for some of the `pull-*` CI jobs, which will require a separate PR to test-infra. **Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes # **Special notes for your reviewer**: **Release note**: ```release-note NONE ``` @kubernetes/sig-testing-pr-reviews @smarterclayton @wojtek-t @gmarek @derekwaynecarr @timothysc
2017-05-30 16:41:49 -07:00 · 2017-05-30 16:41:49 -07:00 · 40dcbc4eb3
parent b0c3fb16ca 41345418cb
commit 40dcbc4eb3
5 changed files with 61 additions and 6 deletions
--- a/hack/verify-flags/known-flags.txt
+++ b/hack/verify-flags/known-flags.txt
@ -292,6 +292,7 @@ garbage-collector-enabled
 gather-logs-sizes
 gather-metrics-at-teardown
 gather-resource-usage
+gather-suite-metrics-at-teardown
 gce-multizone
 gce-project
 gce-service-account
--- a/test/e2e/e2e.go
+++ b/test/e2e/e2e.go
@ -18,6 +18,7 @@ package e2e

 import (
 	"fmt"
+	"io/ioutil"
 	"os"
 	"path"
 	"sync"
@ -39,6 +40,7 @@ import (
 	"k8s.io/kubernetes/pkg/client/clientset_generated/clientset"
 	"k8s.io/kubernetes/pkg/cloudprovider/providers/azure"
 	gcecloud "k8s.io/kubernetes/pkg/cloudprovider/providers/gce"
+	"k8s.io/kubernetes/pkg/metrics"
 	"k8s.io/kubernetes/pkg/util/logs"
 	commontest "k8s.io/kubernetes/test/e2e/common"
 	"k8s.io/kubernetes/test/e2e/framework"
@ -266,8 +268,45 @@ var _ = ginkgo.SynchronizedAfterSuite(func() {
 	if framework.TestContext.ReportDir != "" {
 		framework.CoreDump(framework.TestContext.ReportDir)
 	}
+	if framework.TestContext.GatherSuiteMetricsAfterTest {
+		if err := gatherTestSuiteMetrics(); err != nil {
+			framework.Logf("Error gathering metrics: %v", err)
+		}
+	}
 })

+func gatherTestSuiteMetrics() error {
+	framework.Logf("Gathering metrics")
+	c, err := framework.LoadClientset()
+	if err != nil {
+		return fmt.Errorf("error loading client: %v", err)
+	}
+
+	// Grab metrics for apiserver, scheduler, controller-manager, kubelet (for non-kubemark case).
+	grabber, err := metrics.NewMetricsGrabber(c, !framework.ProviderIs("kubemark"), true, true, true)
+	if err != nil {
+		return fmt.Errorf("failed to create MetricsGrabber: %v", err)
+	}
+
+	received, err := grabber.Grab()
+	if err != nil {
+		return fmt.Errorf("failed to grab metrics: %v", err)
+	}
+
+	metricsForE2E := (*framework.MetricsForE2E)(&received)
+	metricsJson := metricsForE2E.PrintJSON()
+	if framework.TestContext.ReportDir != "" {
+		filePath := path.Join(framework.TestContext.ReportDir, "MetricsForE2ESuite_"+time.Now().Format(time.RFC3339)+".json")
+		if err := ioutil.WriteFile(filePath, []byte(metricsJson), 0644); err != nil {
+			return fmt.Errorf("error writing to %q: %v", filePath, err)
+		}
+	} else {
+		framework.Logf("\n\nTest Suite Metrics:\n%s\n\n", metricsJson)
+	}
+
+	return nil
+}
+
 // TestE2E checks configuration parameters (specified through flags) and then runs
 // E2E tests using the Ginkgo runner.
 // If a "report directory" is specified, one or more JUnit test reports will be
--- a/test/e2e/framework/framework.go
+++ b/test/e2e/framework/framework.go
@ -325,9 +325,8 @@ func (f *Framework) AfterEach() {

 	if TestContext.GatherMetricsAfterTest {
 		By("Gathering metrics")
-		// Grab apiserver metrics and nodes' kubelet metrics (for non-kubemark case).
-		// TODO: enable Scheduler and ControllerManager metrics grabbing when Master's Kubelet will be registered.
-		grabber, err := metrics.NewMetricsGrabber(f.ClientSet, !ProviderIs("kubemark"), false, false, true)
+		// Grab apiserver, scheduler, controller-manager metrics and nodes' kubelet metrics (for non-kubemark case).
+		grabber, err := metrics.NewMetricsGrabber(f.ClientSet, !ProviderIs("kubemark"), true, true, true)
 		if err != nil {
 			Logf("Failed to create MetricsGrabber (skipping metrics gathering): %v", err)
 		} else {
--- a/test/e2e/framework/metrics_util.go
+++ b/test/e2e/framework/metrics_util.go
@ -122,9 +122,23 @@ var InterestingApiServerMetrics = []string{
 }

 var InterestingControllerManagerMetrics = []string{
-	"garbage_collector_event_queue_latency",
-	"garbage_collector_dirty_queue_latency",
-	"garbage_collector_orhan_queue_latency",
+	"garbage_collector_attempt_to_delete_queue_latency",
+	"garbage_collector_attempt_to_delete_work_duration",
+	"garbage_collector_attempt_to_orphan_queue_latency",
+	"garbage_collector_attempt_to_orphan_work_duration",
+	"garbage_collector_dirty_processing_latency_microseconds",
+	"garbage_collector_event_processing_latency_microseconds",
+	"garbage_collector_graph_changes_queue_latency",
+	"garbage_collector_graph_changes_work_duration",
+	"garbage_collector_orphan_processing_latency_microseconds",
+
+	"namespace_queue_latency",
+	"namespace_queue_latency_sum",
+	"namespace_queue_latency_count",
+	"namespace_retries",
+	"namespace_work_duration",
+	"namespace_work_duration_sum",
+	"namespace_work_duration_count",
 }

 var InterestingKubeletMetrics = []string{
--- a/test/e2e/framework/test_context.go
+++ b/test/e2e/framework/test_context.go
@ -73,6 +73,7 @@ type TestContextType struct {
 	GatherKubeSystemResourceUsageData string
 	GatherLogsSizes                   bool
 	GatherMetricsAfterTest            bool
+	GatherSuiteMetricsAfterTest       bool
 	// Currently supported values are 'hr' for human-readable and 'json'. It's a comma separated list.
 	OutputPrintType string
 	// NodeSchedulableTimeout is the timeout for waiting for all nodes to be schedulable.
@ -162,6 +163,7 @@ func RegisterCommonFlags() {
 	flag.StringVar(&TestContext.GatherKubeSystemResourceUsageData, "gather-resource-usage", "false", "If set to 'true' or 'all' framework will be monitoring resource usage of system all add-ons in (some) e2e tests, if set to 'master' framework will be monitoring master node only, if set to 'none' of 'false' monitoring will be turned off.")
 	flag.BoolVar(&TestContext.GatherLogsSizes, "gather-logs-sizes", false, "If set to true framework will be monitoring logs sizes on all machines running e2e tests.")
 	flag.BoolVar(&TestContext.GatherMetricsAfterTest, "gather-metrics-at-teardown", false, "If set to true framwork will gather metrics from all components after each test.")
+	flag.BoolVar(&TestContext.GatherSuiteMetricsAfterTest, "gather-suite-metrics-at-teardown", false, "If set to true framwork will gather metrics from all components after the whole test suite completes.")
 	flag.StringVar(&TestContext.OutputPrintType, "output-print-type", "json", "Format in which summaries should be printed: 'hr' for human readable, 'json' for JSON ones.")
 	flag.BoolVar(&TestContext.DumpLogsOnFailure, "dump-logs-on-failure", true, "If set to true test will dump data about the namespace in which test was running.")
 	flag.BoolVar(&TestContext.DisableLogDump, "disable-log-dump", false, "If set to true, logs from master and nodes won't be gathered after test run.")