diff --git a/hack/verify-flags/known-flags.txt b/hack/verify-flags/known-flags.txt index 1838ce9bb8..539ae155b9 100644 --- a/hack/verify-flags/known-flags.txt +++ b/hack/verify-flags/known-flags.txt @@ -113,6 +113,7 @@ from-literal func-dest fuzz-iters gather-logs-sizes +gather-metrics-at-teardown gather-resource-usage gce-project gce-service-account diff --git a/pkg/metrics/api_server_metrics.go b/pkg/metrics/api_server_metrics.go index 56fc5f5550..8a2d114dfb 100644 --- a/pkg/metrics/api_server_metrics.go +++ b/pkg/metrics/api_server_metrics.go @@ -42,8 +42,6 @@ var KnownApiServerMetrics = map[string][]string{ "etcd_request_latencies_summary": {"operation", "type", "quantile"}, "etcd_request_latencies_summary_count": {"operation", "type"}, "etcd_request_latencies_summary_sum": {"operation", "type"}, - "get_token_count": {}, - "get_token_fail_count": {}, "rest_client_request_latency_microseconds": {"url", "verb", "quantile"}, "rest_client_request_latency_microseconds_count": {"url", "verb"}, "rest_client_request_latency_microseconds_sum": {"url", "verb"}, diff --git a/pkg/metrics/generic_metrics.go b/pkg/metrics/generic_metrics.go index 999e72df67..5d3d5f17b6 100644 --- a/pkg/metrics/generic_metrics.go +++ b/pkg/metrics/generic_metrics.go @@ -29,38 +29,55 @@ import ( ) var CommonMetrics = map[string][]string{ - "process_start_time_seconds": {}, - "process_resident_memory_bytes": {}, - "process_virtual_memory_bytes": {}, - "process_cpu_seconds_total": {}, - "process_max_fds": {}, - "process_open_fds": {}, - - "http_request_size_bytes": {"handler", "quantile"}, - "http_request_size_bytes_count": {"handler"}, - "http_request_size_bytes_sum": {"handler"}, + "get_token_count": {}, + "get_token_fail_count": {}, + "go_gc_duration_seconds": {"quantile"}, + "go_gc_duration_seconds_count": {}, + "go_gc_duration_seconds_sum": {}, + "go_goroutines": {}, "http_request_duration_microseconds": {"handler", "quantile"}, "http_request_duration_microseconds_count": {"handler"}, "http_request_duration_microseconds_sum": {"handler"}, + "http_request_size_bytes": {"handler", "quantile"}, + "http_request_size_bytes_count": {"handler"}, + "http_request_size_bytes_sum": {"handler"}, "http_requests_total": {"handler", "method", "code"}, - - "http_response_size_bytes": {"handler", "quantile"}, - "http_response_size_bytes_count": {"handler"}, - "http_response_size_bytes_sum": {"handler"}, - - "ssh_tunnel_open_fail_count": {}, - "ssh_tunnel_open_count": {}, - - "go_gc_duration_seconds": {"quantile"}, - "go_gc_duration_seconds_count": {}, - "go_gc_duration_seconds_sum": {}, - "go_goroutines": {}, - - "kubernetes_build_info": {"major", "minor", "gitCommit", "gitTreeState", "gitVersion"}, + "http_response_size_bytes": {"handler", "quantile"}, + "http_response_size_bytes_count": {"handler"}, + "http_response_size_bytes_sum": {"handler"}, + "kubernetes_build_info": {"major", "minor", "gitCommit", "gitTreeState", "gitVersion"}, + "process_cpu_seconds_total": {}, + "process_max_fds": {}, + "process_open_fds": {}, + "process_resident_memory_bytes": {}, + "process_start_time_seconds": {}, + "process_virtual_memory_bytes": {}, + "ssh_tunnel_open_count": {}, + "ssh_tunnel_open_fail_count": {}, } type Metrics map[string]model.Samples +func PrintSample(sample *model.Sample) string { + buf := make([]string, 0) + // Id is a VERY special label. For 'normal' container it's usless, but it's necessary + // for 'system' containers (e.g. /docker-daemon, /kubelet, etc.). We know if that's the + // case by checking if there's a label "kubernetes_container_name" present. It's hacky + // but it works... + _, normalContainer := sample.Metric["kubernetes_container_name"] + for k, v := range sample.Metric { + if strings.HasPrefix(string(k), "__") || KubeletMetricsLabelsToSkip.Has(string(k)) { + continue + } + + if string(k) == "id" && normalContainer { + continue + } + buf = append(buf, fmt.Sprintf("%v=%v", string(k), v)) + } + return fmt.Sprintf("[%v] = %v", strings.Join(buf, ","), sample.Value) +} + func NewMetrics() Metrics { result := make(Metrics) for metric := range CommonMetrics { diff --git a/pkg/metrics/kubelet_metrics.go b/pkg/metrics/kubelet_metrics.go index 9d618247f5..b9eb26717f 100644 --- a/pkg/metrics/kubelet_metrics.go +++ b/pkg/metrics/kubelet_metrics.go @@ -61,8 +61,6 @@ var KnownKubeletMetrics = map[string][]string{ "container_spec_memory_swap_limit_bytes": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, "container_start_time_seconds": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"}, "container_tasks_state": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name", "state"}, - "get_token_count": {}, - "get_token_fail_count": {}, "kubelet_container_manager_latency_microseconds": {"operation_type", "quantile"}, "kubelet_container_manager_latency_microseconds_count": {"operation_type"}, "kubelet_container_manager_latency_microseconds_sum": {"operation_type"}, @@ -98,6 +96,12 @@ var KnownKubeletMetrics = map[string][]string{ "rest_client_request_status_codes": {"code", "host", "method"}, } +var KubeletMetricsLabelsToSkip = sets.NewString( + "kubernetes_namespace", + "image", + "name", +) + type KubeletMetrics Metrics func NewKubeletMetrics() KubeletMetrics { diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 1de2b4f4cd..1da3e22b1f 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -90,6 +90,7 @@ func init() { flag.BoolVar(&testContext.CleanStart, "clean-start", false, "If true, purge all namespaces except default and system before running tests. This serves to cleanup test namespaces from failed/interrupted e2e runs in a long-lived cluster.") flag.BoolVar(&testContext.GatherKubeSystemResourceUsageData, "gather-resource-usage", false, "If set to true framework will be monitoring resource usage of system add-ons in (some) e2e tests.") flag.BoolVar(&testContext.GatherLogsSizes, "gather-logs-sizes", false, "If set to true framework will be monitoring logs sizes on all machines running e2e tests.") + flag.BoolVar(&testContext.GatherMetricsAfterTest, "gather-metrics-at-teardown", false, "If set to true framwork will gather metrics from all components after each test.") } func TestE2E(t *testing.T) { diff --git a/test/e2e/framework.go b/test/e2e/framework.go index 0bc3b0783b..afad37ce89 100644 --- a/test/e2e/framework.go +++ b/test/e2e/framework.go @@ -26,6 +26,7 @@ import ( "k8s.io/kubernetes/pkg/api" client "k8s.io/kubernetes/pkg/client/unversioned" "k8s.io/kubernetes/pkg/fields" + "k8s.io/kubernetes/pkg/metrics" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" @@ -152,6 +153,38 @@ func (f *Framework) afterEach() { close(f.logsSizeCloseChannel) f.logsSizeWaitGroup.Wait() } + + if testContext.GatherMetricsAfterTest { + // TODO: enable Scheduler and ControllerManager metrics grabbing when Master's Kubelet will be registered. + grabber, err := metrics.NewMetricsGrabber(f.Client, true, false, false, true) + if err != nil { + Logf("Failed to create MetricsGrabber. Skipping metrics gathering.") + } else { + received, err := grabber.Grab(nil) + if err != nil { + Logf("MetricsGrabber failed grab metrics. Skipping metrics gathering.") + } else { + buf := bytes.Buffer{} + for interestingMetric := range InterestingApiServerMetrics { + buf.WriteString(fmt.Sprintf("For %v:\n", interestingMetric)) + for _, sample := range received.ApiServerMetrics[interestingMetric] { + buf.WriteString(fmt.Sprintf("\t%v\n", metrics.PrintSample(sample))) + } + } + for kubelet, grabbed := range received.KubeletMetrics { + buf.WriteString(fmt.Sprintf("For %v:\n", kubelet)) + for interestingMetric := range InterestingKubeletMetrics { + buf.WriteString(fmt.Sprintf("\tFor %v:\n", interestingMetric)) + for _, sample := range grabbed[interestingMetric] { + buf.WriteString(fmt.Sprintf("\t\t%v\n", metrics.PrintSample(sample))) + } + } + } + Logf("%v", buf.String()) + } + } + } + // Paranoia-- prevent reuse! f.Namespace = nil f.Client = nil diff --git a/test/e2e/metrics_util.go b/test/e2e/metrics_util.go index d738570164..c68c9f2d30 100644 --- a/test/e2e/metrics_util.go +++ b/test/e2e/metrics_util.go @@ -46,6 +46,48 @@ const ( apiCallLatencyLargeThreshold time.Duration = 1 * time.Second ) +var InterestingApiServerMetrics = sets.NewString( + "apiserver_request_count", + "apiserver_request_latencies_bucket", + "etcd_helper_cache_entry_count", + "etcd_helper_cache_hit_count", + "etcd_helper_cache_miss_count", + "etcd_request_cache_add_latencies_summary", + "etcd_request_cache_get_latencies_summary", + "etcd_request_latencies_summary", + "go_gc_duration_seconds", + "go_goroutines", + "process_cpu_seconds_total", + "process_open_fds", + "process_resident_memory_bytes", + "process_start_time_seconds", + "process_virtual_memory_bytes", +) + +var InterestingKubeletMetrics = sets.NewString( + "container_cpu_system_seconds_total", + "container_cpu_user_seconds_total", + "container_fs_io_time_weighted_seconds_total", + "container_memory_usage_bytes", + "container_spec_cpu_shares", + "container_start_time_seconds", + "go_gc_duration_seconds", + "go_goroutines", + "kubelet_container_manager_latency_microseconds", + "kubelet_docker_errors", + "kubelet_docker_operations_latency_microseconds", + "kubelet_generate_pod_status_latency_microseconds", + "kubelet_pod_start_latency_microseconds", + "kubelet_pod_worker_latency_microseconds", + "kubelet_pod_worker_start_latency_microseconds", + "kubelet_sync_pods_latency_microseconds", + "process_cpu_seconds_total", + "process_open_fds", + "process_resident_memory_bytes", + "process_start_time_seconds", + "process_virtual_memory_bytes", +) + // Dashboard metrics type LatencyMetric struct { Perc50 time.Duration `json:"Perc50"` diff --git a/test/e2e/util.go b/test/e2e/util.go index 1fae3e4032..8ed47b7704 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -153,6 +153,7 @@ type TestContextType struct { // It will read the data every 30 seconds from all Nodes and print summary during afterEach. GatherKubeSystemResourceUsageData bool GatherLogsSizes bool + GatherMetricsAfterTest bool } var testContext TestContextType