Add a flag that will make test gather metrics from all running components after the test finishes.

pull/6/head
gmarek 2015-12-23 15:56:56 +01:00
parent c36226bc39
commit 2dcafa3854
8 changed files with 125 additions and 28 deletions

View File

@ -113,6 +113,7 @@ from-literal
func-dest
fuzz-iters
gather-logs-sizes
gather-metrics-at-teardown
gather-resource-usage
gce-project
gce-service-account

View File

@ -42,8 +42,6 @@ var KnownApiServerMetrics = map[string][]string{
"etcd_request_latencies_summary": {"operation", "type", "quantile"},
"etcd_request_latencies_summary_count": {"operation", "type"},
"etcd_request_latencies_summary_sum": {"operation", "type"},
"get_token_count": {},
"get_token_fail_count": {},
"rest_client_request_latency_microseconds": {"url", "verb", "quantile"},
"rest_client_request_latency_microseconds_count": {"url", "verb"},
"rest_client_request_latency_microseconds_sum": {"url", "verb"},

View File

@ -29,38 +29,55 @@ import (
)
var CommonMetrics = map[string][]string{
"process_start_time_seconds": {},
"process_resident_memory_bytes": {},
"process_virtual_memory_bytes": {},
"process_cpu_seconds_total": {},
"process_max_fds": {},
"process_open_fds": {},
"http_request_size_bytes": {"handler", "quantile"},
"http_request_size_bytes_count": {"handler"},
"http_request_size_bytes_sum": {"handler"},
"http_request_duration_microseconds": {"handler", "quantile"},
"http_request_duration_microseconds_count": {"handler"},
"http_request_duration_microseconds_sum": {"handler"},
"http_requests_total": {"handler", "method", "code"},
"http_response_size_bytes": {"handler", "quantile"},
"http_response_size_bytes_count": {"handler"},
"http_response_size_bytes_sum": {"handler"},
"ssh_tunnel_open_fail_count": {},
"ssh_tunnel_open_count": {},
"get_token_count": {},
"get_token_fail_count": {},
"go_gc_duration_seconds": {"quantile"},
"go_gc_duration_seconds_count": {},
"go_gc_duration_seconds_sum": {},
"go_goroutines": {},
"http_request_duration_microseconds": {"handler", "quantile"},
"http_request_duration_microseconds_count": {"handler"},
"http_request_duration_microseconds_sum": {"handler"},
"http_request_size_bytes": {"handler", "quantile"},
"http_request_size_bytes_count": {"handler"},
"http_request_size_bytes_sum": {"handler"},
"http_requests_total": {"handler", "method", "code"},
"http_response_size_bytes": {"handler", "quantile"},
"http_response_size_bytes_count": {"handler"},
"http_response_size_bytes_sum": {"handler"},
"kubernetes_build_info": {"major", "minor", "gitCommit", "gitTreeState", "gitVersion"},
"process_cpu_seconds_total": {},
"process_max_fds": {},
"process_open_fds": {},
"process_resident_memory_bytes": {},
"process_start_time_seconds": {},
"process_virtual_memory_bytes": {},
"ssh_tunnel_open_count": {},
"ssh_tunnel_open_fail_count": {},
}
type Metrics map[string]model.Samples
func PrintSample(sample *model.Sample) string {
buf := make([]string, 0)
// Id is a VERY special label. For 'normal' container it's usless, but it's necessary
// for 'system' containers (e.g. /docker-daemon, /kubelet, etc.). We know if that's the
// case by checking if there's a label "kubernetes_container_name" present. It's hacky
// but it works...
_, normalContainer := sample.Metric["kubernetes_container_name"]
for k, v := range sample.Metric {
if strings.HasPrefix(string(k), "__") || KubeletMetricsLabelsToSkip.Has(string(k)) {
continue
}
if string(k) == "id" && normalContainer {
continue
}
buf = append(buf, fmt.Sprintf("%v=%v", string(k), v))
}
return fmt.Sprintf("[%v] = %v", strings.Join(buf, ","), sample.Value)
}
func NewMetrics() Metrics {
result := make(Metrics)
for metric := range CommonMetrics {

View File

@ -61,8 +61,6 @@ var KnownKubeletMetrics = map[string][]string{
"container_spec_memory_swap_limit_bytes": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"},
"container_start_time_seconds": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"},
"container_tasks_state": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name", "state"},
"get_token_count": {},
"get_token_fail_count": {},
"kubelet_container_manager_latency_microseconds": {"operation_type", "quantile"},
"kubelet_container_manager_latency_microseconds_count": {"operation_type"},
"kubelet_container_manager_latency_microseconds_sum": {"operation_type"},
@ -98,6 +96,12 @@ var KnownKubeletMetrics = map[string][]string{
"rest_client_request_status_codes": {"code", "host", "method"},
}
var KubeletMetricsLabelsToSkip = sets.NewString(
"kubernetes_namespace",
"image",
"name",
)
type KubeletMetrics Metrics
func NewKubeletMetrics() KubeletMetrics {

View File

@ -90,6 +90,7 @@ func init() {
flag.BoolVar(&testContext.CleanStart, "clean-start", false, "If true, purge all namespaces except default and system before running tests. This serves to cleanup test namespaces from failed/interrupted e2e runs in a long-lived cluster.")
flag.BoolVar(&testContext.GatherKubeSystemResourceUsageData, "gather-resource-usage", false, "If set to true framework will be monitoring resource usage of system add-ons in (some) e2e tests.")
flag.BoolVar(&testContext.GatherLogsSizes, "gather-logs-sizes", false, "If set to true framework will be monitoring logs sizes on all machines running e2e tests.")
flag.BoolVar(&testContext.GatherMetricsAfterTest, "gather-metrics-at-teardown", false, "If set to true framwork will gather metrics from all components after each test.")
}
func TestE2E(t *testing.T) {

View File

@ -26,6 +26,7 @@ import (
"k8s.io/kubernetes/pkg/api"
client "k8s.io/kubernetes/pkg/client/unversioned"
"k8s.io/kubernetes/pkg/fields"
"k8s.io/kubernetes/pkg/metrics"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
@ -152,6 +153,38 @@ func (f *Framework) afterEach() {
close(f.logsSizeCloseChannel)
f.logsSizeWaitGroup.Wait()
}
if testContext.GatherMetricsAfterTest {
// TODO: enable Scheduler and ControllerManager metrics grabbing when Master's Kubelet will be registered.
grabber, err := metrics.NewMetricsGrabber(f.Client, true, false, false, true)
if err != nil {
Logf("Failed to create MetricsGrabber. Skipping metrics gathering.")
} else {
received, err := grabber.Grab(nil)
if err != nil {
Logf("MetricsGrabber failed grab metrics. Skipping metrics gathering.")
} else {
buf := bytes.Buffer{}
for interestingMetric := range InterestingApiServerMetrics {
buf.WriteString(fmt.Sprintf("For %v:\n", interestingMetric))
for _, sample := range received.ApiServerMetrics[interestingMetric] {
buf.WriteString(fmt.Sprintf("\t%v\n", metrics.PrintSample(sample)))
}
}
for kubelet, grabbed := range received.KubeletMetrics {
buf.WriteString(fmt.Sprintf("For %v:\n", kubelet))
for interestingMetric := range InterestingKubeletMetrics {
buf.WriteString(fmt.Sprintf("\tFor %v:\n", interestingMetric))
for _, sample := range grabbed[interestingMetric] {
buf.WriteString(fmt.Sprintf("\t\t%v\n", metrics.PrintSample(sample)))
}
}
}
Logf("%v", buf.String())
}
}
}
// Paranoia-- prevent reuse!
f.Namespace = nil
f.Client = nil

View File

@ -46,6 +46,48 @@ const (
apiCallLatencyLargeThreshold time.Duration = 1 * time.Second
)
var InterestingApiServerMetrics = sets.NewString(
"apiserver_request_count",
"apiserver_request_latencies_bucket",
"etcd_helper_cache_entry_count",
"etcd_helper_cache_hit_count",
"etcd_helper_cache_miss_count",
"etcd_request_cache_add_latencies_summary",
"etcd_request_cache_get_latencies_summary",
"etcd_request_latencies_summary",
"go_gc_duration_seconds",
"go_goroutines",
"process_cpu_seconds_total",
"process_open_fds",
"process_resident_memory_bytes",
"process_start_time_seconds",
"process_virtual_memory_bytes",
)
var InterestingKubeletMetrics = sets.NewString(
"container_cpu_system_seconds_total",
"container_cpu_user_seconds_total",
"container_fs_io_time_weighted_seconds_total",
"container_memory_usage_bytes",
"container_spec_cpu_shares",
"container_start_time_seconds",
"go_gc_duration_seconds",
"go_goroutines",
"kubelet_container_manager_latency_microseconds",
"kubelet_docker_errors",
"kubelet_docker_operations_latency_microseconds",
"kubelet_generate_pod_status_latency_microseconds",
"kubelet_pod_start_latency_microseconds",
"kubelet_pod_worker_latency_microseconds",
"kubelet_pod_worker_start_latency_microseconds",
"kubelet_sync_pods_latency_microseconds",
"process_cpu_seconds_total",
"process_open_fds",
"process_resident_memory_bytes",
"process_start_time_seconds",
"process_virtual_memory_bytes",
)
// Dashboard metrics
type LatencyMetric struct {
Perc50 time.Duration `json:"Perc50"`

View File

@ -153,6 +153,7 @@ type TestContextType struct {
// It will read the data every 30 seconds from all Nodes and print summary during afterEach.
GatherKubeSystemResourceUsageData bool
GatherLogsSizes bool
GatherMetricsAfterTest bool
}
var testContext TestContextType