Merge pull request #46461 from ncdc/e2e-suite-metrics

Automatic merge from submit-queue

Support grabbing test suite metrics

**What this PR does / why we need it**:
Add support for grabbing metrics that cover the entire test suite's execution.

Update the "interesting" controller-manager metrics to match the
current names for the garbage collector, and add namespace controller
metrics to the list.

If you enable `--gather-suite-metrics-at-teardown`, the metrics file is written to a file with a name such as `MetricsForE2ESuite_2017-05-25T20:25:57Z.json` in the `--report-dir`. If you don't specify `--report-dir`, the metrics are written to the test log output.

I'd like to enable this for some of the `pull-*` CI jobs, which will require a separate PR to test-infra.

**Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: fixes #

**Special notes for your reviewer**:

**Release note**:

```release-note
NONE
```

@kubernetes/sig-testing-pr-reviews @smarterclayton @wojtek-t @gmarek @derekwaynecarr @timothysc
pull/6/head
Kubernetes Submit Queue 2017-05-30 16:41:49 -07:00 committed by GitHub
commit 40dcbc4eb3
5 changed files with 61 additions and 6 deletions

View File

@ -292,6 +292,7 @@ garbage-collector-enabled
gather-logs-sizes
gather-metrics-at-teardown
gather-resource-usage
gather-suite-metrics-at-teardown
gce-multizone
gce-project
gce-service-account

View File

@ -18,6 +18,7 @@ package e2e
import (
"fmt"
"io/ioutil"
"os"
"path"
"sync"
@ -39,6 +40,7 @@ import (
"k8s.io/kubernetes/pkg/client/clientset_generated/clientset"
"k8s.io/kubernetes/pkg/cloudprovider/providers/azure"
gcecloud "k8s.io/kubernetes/pkg/cloudprovider/providers/gce"
"k8s.io/kubernetes/pkg/metrics"
"k8s.io/kubernetes/pkg/util/logs"
commontest "k8s.io/kubernetes/test/e2e/common"
"k8s.io/kubernetes/test/e2e/framework"
@ -266,8 +268,45 @@ var _ = ginkgo.SynchronizedAfterSuite(func() {
if framework.TestContext.ReportDir != "" {
framework.CoreDump(framework.TestContext.ReportDir)
}
if framework.TestContext.GatherSuiteMetricsAfterTest {
if err := gatherTestSuiteMetrics(); err != nil {
framework.Logf("Error gathering metrics: %v", err)
}
}
})
func gatherTestSuiteMetrics() error {
framework.Logf("Gathering metrics")
c, err := framework.LoadClientset()
if err != nil {
return fmt.Errorf("error loading client: %v", err)
}
// Grab metrics for apiserver, scheduler, controller-manager, kubelet (for non-kubemark case).
grabber, err := metrics.NewMetricsGrabber(c, !framework.ProviderIs("kubemark"), true, true, true)
if err != nil {
return fmt.Errorf("failed to create MetricsGrabber: %v", err)
}
received, err := grabber.Grab()
if err != nil {
return fmt.Errorf("failed to grab metrics: %v", err)
}
metricsForE2E := (*framework.MetricsForE2E)(&received)
metricsJson := metricsForE2E.PrintJSON()
if framework.TestContext.ReportDir != "" {
filePath := path.Join(framework.TestContext.ReportDir, "MetricsForE2ESuite_"+time.Now().Format(time.RFC3339)+".json")
if err := ioutil.WriteFile(filePath, []byte(metricsJson), 0644); err != nil {
return fmt.Errorf("error writing to %q: %v", filePath, err)
}
} else {
framework.Logf("\n\nTest Suite Metrics:\n%s\n\n", metricsJson)
}
return nil
}
// TestE2E checks configuration parameters (specified through flags) and then runs
// E2E tests using the Ginkgo runner.
// If a "report directory" is specified, one or more JUnit test reports will be

View File

@ -325,9 +325,8 @@ func (f *Framework) AfterEach() {
if TestContext.GatherMetricsAfterTest {
By("Gathering metrics")
// Grab apiserver metrics and nodes' kubelet metrics (for non-kubemark case).
// TODO: enable Scheduler and ControllerManager metrics grabbing when Master's Kubelet will be registered.
grabber, err := metrics.NewMetricsGrabber(f.ClientSet, !ProviderIs("kubemark"), false, false, true)
// Grab apiserver, scheduler, controller-manager metrics and nodes' kubelet metrics (for non-kubemark case).
grabber, err := metrics.NewMetricsGrabber(f.ClientSet, !ProviderIs("kubemark"), true, true, true)
if err != nil {
Logf("Failed to create MetricsGrabber (skipping metrics gathering): %v", err)
} else {

View File

@ -122,9 +122,23 @@ var InterestingApiServerMetrics = []string{
}
var InterestingControllerManagerMetrics = []string{
"garbage_collector_event_queue_latency",
"garbage_collector_dirty_queue_latency",
"garbage_collector_orhan_queue_latency",
"garbage_collector_attempt_to_delete_queue_latency",
"garbage_collector_attempt_to_delete_work_duration",
"garbage_collector_attempt_to_orphan_queue_latency",
"garbage_collector_attempt_to_orphan_work_duration",
"garbage_collector_dirty_processing_latency_microseconds",
"garbage_collector_event_processing_latency_microseconds",
"garbage_collector_graph_changes_queue_latency",
"garbage_collector_graph_changes_work_duration",
"garbage_collector_orphan_processing_latency_microseconds",
"namespace_queue_latency",
"namespace_queue_latency_sum",
"namespace_queue_latency_count",
"namespace_retries",
"namespace_work_duration",
"namespace_work_duration_sum",
"namespace_work_duration_count",
}
var InterestingKubeletMetrics = []string{

View File

@ -73,6 +73,7 @@ type TestContextType struct {
GatherKubeSystemResourceUsageData string
GatherLogsSizes bool
GatherMetricsAfterTest bool
GatherSuiteMetricsAfterTest bool
// Currently supported values are 'hr' for human-readable and 'json'. It's a comma separated list.
OutputPrintType string
// NodeSchedulableTimeout is the timeout for waiting for all nodes to be schedulable.
@ -162,6 +163,7 @@ func RegisterCommonFlags() {
flag.StringVar(&TestContext.GatherKubeSystemResourceUsageData, "gather-resource-usage", "false", "If set to 'true' or 'all' framework will be monitoring resource usage of system all add-ons in (some) e2e tests, if set to 'master' framework will be monitoring master node only, if set to 'none' of 'false' monitoring will be turned off.")
flag.BoolVar(&TestContext.GatherLogsSizes, "gather-logs-sizes", false, "If set to true framework will be monitoring logs sizes on all machines running e2e tests.")
flag.BoolVar(&TestContext.GatherMetricsAfterTest, "gather-metrics-at-teardown", false, "If set to true framwork will gather metrics from all components after each test.")
flag.BoolVar(&TestContext.GatherSuiteMetricsAfterTest, "gather-suite-metrics-at-teardown", false, "If set to true framwork will gather metrics from all components after the whole test suite completes.")
flag.StringVar(&TestContext.OutputPrintType, "output-print-type", "json", "Format in which summaries should be printed: 'hr' for human readable, 'json' for JSON ones.")
flag.BoolVar(&TestContext.DumpLogsOnFailure, "dump-logs-on-failure", true, "If set to true test will dump data about the namespace in which test was running.")
flag.BoolVar(&TestContext.DisableLogDump, "disable-log-dump", false, "If set to true, logs from master and nodes won't be gathered after test run.")