Add a flag that will make test gather metrics from all running components after the test finishes.

2015-12-23 15:56:56 +01:00 · 2015-12-23 15:56:56 +01:00 · 2dcafa3854
parent c36226bc39
commit 2dcafa3854
8 changed files with 125 additions and 28 deletions
--- a/hack/verify-flags/known-flags.txt
+++ b/hack/verify-flags/known-flags.txt
@ -113,6 +113,7 @@ from-literal
 func-dest
 fuzz-iters
 gather-logs-sizes
 gather-metrics-at-teardown
 gather-resource-usage
 gce-project
 gce-service-account
--- a/pkg/metrics/api_server_metrics.go
+++ b/pkg/metrics/api_server_metrics.go
@ -42,8 +42,6 @@ var KnownApiServerMetrics = map[string][]string{
 	"etcd_request_latencies_summary":                 {"operation", "type", "quantile"},
 	"etcd_request_latencies_summary_count":           {"operation", "type"},
 	"etcd_request_latencies_summary_sum":             {"operation", "type"},
 	"get_token_count":                                {},
 	"get_token_fail_count":                           {},
 	"rest_client_request_latency_microseconds":       {"url", "verb", "quantile"},
 	"rest_client_request_latency_microseconds_count": {"url", "verb"},
 	"rest_client_request_latency_microseconds_sum":   {"url", "verb"},
--- a/pkg/metrics/generic_metrics.go
+++ b/pkg/metrics/generic_metrics.go
@ -29,38 +29,55 @@ import (
 )
 var CommonMetrics = map[string][]string{
-	"process_start_time_seconds":    {},
+	"get_token_count":                          {},
-	"process_resident_memory_bytes": {},
+	"get_token_fail_count":                     {},
-	"process_virtual_memory_bytes":  {},
+	"go_gc_duration_seconds":                   {"quantile"},
-	"process_cpu_seconds_total":     {},
+	"go_gc_duration_seconds_count":             {},
-	"process_max_fds":               {},
+	"go_gc_duration_seconds_sum":               {},
-	"process_open_fds":              {},
+	"go_goroutines":                            {},
 	"http_request_size_bytes":                  {"handler", "quantile"},
 	"http_request_size_bytes_count":            {"handler"},
 	"http_request_size_bytes_sum":              {"handler"},
 	"http_request_duration_microseconds":       {"handler", "quantile"},
 	"http_request_duration_microseconds_count": {"handler"},
 	"http_request_duration_microseconds_sum":   {"handler"},
 	"http_request_size_bytes":                  {"handler", "quantile"},
 	"http_request_size_bytes_count":            {"handler"},
 	"http_request_size_bytes_sum":              {"handler"},
 	"http_requests_total":                      {"handler", "method", "code"},
-
+	"http_response_size_bytes":                 {"handler", "quantile"},
-	"http_response_size_bytes":       {"handler", "quantile"},
+	"http_response_size_bytes_count":           {"handler"},
-	"http_response_size_bytes_count": {"handler"},
+	"http_response_size_bytes_sum":             {"handler"},
-	"http_response_size_bytes_sum":   {"handler"},
+	"kubernetes_build_info":                    {"major", "minor", "gitCommit", "gitTreeState", "gitVersion"},
-
+	"process_cpu_seconds_total":                {},
-	"ssh_tunnel_open_fail_count": {},
+	"process_max_fds":                          {},
-	"ssh_tunnel_open_count":      {},
+	"process_open_fds":                         {},
-
+	"process_resident_memory_bytes":            {},
-	"go_gc_duration_seconds":       {"quantile"},
+	"process_start_time_seconds":               {},
-	"go_gc_duration_seconds_count": {},
+	"process_virtual_memory_bytes":             {},
-	"go_gc_duration_seconds_sum":   {},
+	"ssh_tunnel_open_count":                    {},
-	"go_goroutines":                {},
+	"ssh_tunnel_open_fail_count":               {},
 	"kubernetes_build_info": {"major", "minor", "gitCommit", "gitTreeState", "gitVersion"},
 }
 type Metrics map[string]model.Samples
 func PrintSample(sample *model.Sample) string {
 	buf := make([]string, 0)
 	// Id is a VERY special label. For 'normal' container it's usless, but it's necessary
 	// for 'system' containers (e.g. /docker-daemon, /kubelet, etc.). We know if that's the
 	// case by checking if there's a label "kubernetes_container_name" present. It's hacky
 	// but it works...
 	_, normalContainer := sample.Metric["kubernetes_container_name"]
 	for k, v := range sample.Metric {
 		if strings.HasPrefix(string(k), "__") || KubeletMetricsLabelsToSkip.Has(string(k)) {
 			continue
 		}
 		if string(k) == "id" && normalContainer {
 			continue
 		}
 		buf = append(buf, fmt.Sprintf("%v=%v", string(k), v))
 	}
 	return fmt.Sprintf("[%v] = %v", strings.Join(buf, ","), sample.Value)
 }
 func NewMetrics() Metrics {
 	result := make(Metrics)
 	for metric := range CommonMetrics {
--- a/pkg/metrics/kubelet_metrics.go
+++ b/pkg/metrics/kubelet_metrics.go
@ -61,8 +61,6 @@ var KnownKubeletMetrics = map[string][]string{
 	"container_spec_memory_swap_limit_bytes":                 {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"},
 	"container_start_time_seconds":                           {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"},
 	"container_tasks_state":                                  {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name", "state"},
 	"get_token_count":                                        {},
 	"get_token_fail_count":                                   {},
 	"kubelet_container_manager_latency_microseconds":         {"operation_type", "quantile"},
 	"kubelet_container_manager_latency_microseconds_count":   {"operation_type"},
 	"kubelet_container_manager_latency_microseconds_sum":     {"operation_type"},
@ -98,6 +96,12 @@ var KnownKubeletMetrics = map[string][]string{
 	"rest_client_request_status_codes":                       {"code", "host", "method"},
 }
 var KubeletMetricsLabelsToSkip = sets.NewString(
 	"kubernetes_namespace",
 	"image",
 	"name",
 )
 type KubeletMetrics Metrics
 func NewKubeletMetrics() KubeletMetrics {
--- a/test/e2e/e2e_test.go
+++ b/test/e2e/e2e_test.go
@ -90,6 +90,7 @@ func init() {
 	flag.BoolVar(&testContext.CleanStart, "clean-start", false, "If true, purge all namespaces except default and system before running tests. This serves to cleanup test namespaces from failed/interrupted e2e runs in a long-lived cluster.")
 	flag.BoolVar(&testContext.GatherKubeSystemResourceUsageData, "gather-resource-usage", false, "If set to true framework will be monitoring resource usage of system add-ons in (some) e2e tests.")
 	flag.BoolVar(&testContext.GatherLogsSizes, "gather-logs-sizes", false, "If set to true framework will be monitoring logs sizes on all machines running e2e tests.")
 	flag.BoolVar(&testContext.GatherMetricsAfterTest, "gather-metrics-at-teardown", false, "If set to true framwork will gather metrics from all components after each test.")
 }
 func TestE2E(t *testing.T) {
--- a/test/e2e/framework.go
+++ b/test/e2e/framework.go
@ -26,6 +26,7 @@ import (
 	"k8s.io/kubernetes/pkg/api"
 	client "k8s.io/kubernetes/pkg/client/unversioned"
 	"k8s.io/kubernetes/pkg/fields"
 	"k8s.io/kubernetes/pkg/metrics"
 	. "github.com/onsi/ginkgo"
 	. "github.com/onsi/gomega"
@ -152,6 +153,38 @@ func (f *Framework) afterEach() {
 		close(f.logsSizeCloseChannel)
 		f.logsSizeWaitGroup.Wait()
 	}
 	if testContext.GatherMetricsAfterTest {
 		// TODO: enable Scheduler and ControllerManager metrics grabbing when Master's Kubelet will be registered.
 		grabber, err := metrics.NewMetricsGrabber(f.Client, true, false, false, true)
 		if err != nil {
 			Logf("Failed to create MetricsGrabber. Skipping metrics gathering.")
 		} else {
 			received, err := grabber.Grab(nil)
 			if err != nil {
 				Logf("MetricsGrabber failed grab metrics. Skipping metrics gathering.")
 			} else {
 				buf := bytes.Buffer{}
 				for interestingMetric := range InterestingApiServerMetrics {
 					buf.WriteString(fmt.Sprintf("For %v:\n", interestingMetric))
 					for _, sample := range received.ApiServerMetrics[interestingMetric] {
 						buf.WriteString(fmt.Sprintf("\t%v\n", metrics.PrintSample(sample)))
 					}
 				}
 				for kubelet, grabbed := range received.KubeletMetrics {
 					buf.WriteString(fmt.Sprintf("For %v:\n", kubelet))
 					for interestingMetric := range InterestingKubeletMetrics {
 						buf.WriteString(fmt.Sprintf("\tFor %v:\n", interestingMetric))
 						for _, sample := range grabbed[interestingMetric] {
 							buf.WriteString(fmt.Sprintf("\t\t%v\n", metrics.PrintSample(sample)))
 						}
 					}
 				}
 				Logf("%v", buf.String())
 			}
 		}
 	}
 	// Paranoia-- prevent reuse!
 	f.Namespace = nil
 	f.Client = nil
--- a/test/e2e/metrics_util.go
+++ b/test/e2e/metrics_util.go
@ -46,6 +46,48 @@ const (
 	apiCallLatencyLargeThreshold  time.Duration = 1 * time.Second
 )
 var InterestingApiServerMetrics = sets.NewString(
 	"apiserver_request_count",
 	"apiserver_request_latencies_bucket",
 	"etcd_helper_cache_entry_count",
 	"etcd_helper_cache_hit_count",
 	"etcd_helper_cache_miss_count",
 	"etcd_request_cache_add_latencies_summary",
 	"etcd_request_cache_get_latencies_summary",
 	"etcd_request_latencies_summary",
 	"go_gc_duration_seconds",
 	"go_goroutines",
 	"process_cpu_seconds_total",
 	"process_open_fds",
 	"process_resident_memory_bytes",
 	"process_start_time_seconds",
 	"process_virtual_memory_bytes",
 )
 var InterestingKubeletMetrics = sets.NewString(
 	"container_cpu_system_seconds_total",
 	"container_cpu_user_seconds_total",
 	"container_fs_io_time_weighted_seconds_total",
 	"container_memory_usage_bytes",
 	"container_spec_cpu_shares",
 	"container_start_time_seconds",
 	"go_gc_duration_seconds",
 	"go_goroutines",
 	"kubelet_container_manager_latency_microseconds",
 	"kubelet_docker_errors",
 	"kubelet_docker_operations_latency_microseconds",
 	"kubelet_generate_pod_status_latency_microseconds",
 	"kubelet_pod_start_latency_microseconds",
 	"kubelet_pod_worker_latency_microseconds",
 	"kubelet_pod_worker_start_latency_microseconds",
 	"kubelet_sync_pods_latency_microseconds",
 	"process_cpu_seconds_total",
 	"process_open_fds",
 	"process_resident_memory_bytes",
 	"process_start_time_seconds",
 	"process_virtual_memory_bytes",
 )
 // Dashboard metrics
 type LatencyMetric struct {
 	Perc50 time.Duration `json:"Perc50"`
--- a/test/e2e/util.go
+++ b/test/e2e/util.go
@ -153,6 +153,7 @@ type TestContextType struct {
 	// It will read the data every 30 seconds from all Nodes and print summary during afterEach.
 	GatherKubeSystemResourceUsageData bool
 	GatherLogsSizes                   bool
 	GatherMetricsAfterTest            bool
 }
 var testContext TestContextType