mirror of https://github.com/k3s-io/k3s
Add a flag that will make test gather metrics from all running components after the test finishes.
parent
c36226bc39
commit
2dcafa3854
|
@ -113,6 +113,7 @@ from-literal
|
||||||
func-dest
|
func-dest
|
||||||
fuzz-iters
|
fuzz-iters
|
||||||
gather-logs-sizes
|
gather-logs-sizes
|
||||||
|
gather-metrics-at-teardown
|
||||||
gather-resource-usage
|
gather-resource-usage
|
||||||
gce-project
|
gce-project
|
||||||
gce-service-account
|
gce-service-account
|
||||||
|
|
|
@ -42,8 +42,6 @@ var KnownApiServerMetrics = map[string][]string{
|
||||||
"etcd_request_latencies_summary": {"operation", "type", "quantile"},
|
"etcd_request_latencies_summary": {"operation", "type", "quantile"},
|
||||||
"etcd_request_latencies_summary_count": {"operation", "type"},
|
"etcd_request_latencies_summary_count": {"operation", "type"},
|
||||||
"etcd_request_latencies_summary_sum": {"operation", "type"},
|
"etcd_request_latencies_summary_sum": {"operation", "type"},
|
||||||
"get_token_count": {},
|
|
||||||
"get_token_fail_count": {},
|
|
||||||
"rest_client_request_latency_microseconds": {"url", "verb", "quantile"},
|
"rest_client_request_latency_microseconds": {"url", "verb", "quantile"},
|
||||||
"rest_client_request_latency_microseconds_count": {"url", "verb"},
|
"rest_client_request_latency_microseconds_count": {"url", "verb"},
|
||||||
"rest_client_request_latency_microseconds_sum": {"url", "verb"},
|
"rest_client_request_latency_microseconds_sum": {"url", "verb"},
|
||||||
|
|
|
@ -29,38 +29,55 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
var CommonMetrics = map[string][]string{
|
var CommonMetrics = map[string][]string{
|
||||||
"process_start_time_seconds": {},
|
"get_token_count": {},
|
||||||
"process_resident_memory_bytes": {},
|
"get_token_fail_count": {},
|
||||||
"process_virtual_memory_bytes": {},
|
"go_gc_duration_seconds": {"quantile"},
|
||||||
"process_cpu_seconds_total": {},
|
"go_gc_duration_seconds_count": {},
|
||||||
"process_max_fds": {},
|
"go_gc_duration_seconds_sum": {},
|
||||||
"process_open_fds": {},
|
"go_goroutines": {},
|
||||||
|
|
||||||
"http_request_size_bytes": {"handler", "quantile"},
|
|
||||||
"http_request_size_bytes_count": {"handler"},
|
|
||||||
"http_request_size_bytes_sum": {"handler"},
|
|
||||||
"http_request_duration_microseconds": {"handler", "quantile"},
|
"http_request_duration_microseconds": {"handler", "quantile"},
|
||||||
"http_request_duration_microseconds_count": {"handler"},
|
"http_request_duration_microseconds_count": {"handler"},
|
||||||
"http_request_duration_microseconds_sum": {"handler"},
|
"http_request_duration_microseconds_sum": {"handler"},
|
||||||
|
"http_request_size_bytes": {"handler", "quantile"},
|
||||||
|
"http_request_size_bytes_count": {"handler"},
|
||||||
|
"http_request_size_bytes_sum": {"handler"},
|
||||||
"http_requests_total": {"handler", "method", "code"},
|
"http_requests_total": {"handler", "method", "code"},
|
||||||
|
"http_response_size_bytes": {"handler", "quantile"},
|
||||||
"http_response_size_bytes": {"handler", "quantile"},
|
"http_response_size_bytes_count": {"handler"},
|
||||||
"http_response_size_bytes_count": {"handler"},
|
"http_response_size_bytes_sum": {"handler"},
|
||||||
"http_response_size_bytes_sum": {"handler"},
|
"kubernetes_build_info": {"major", "minor", "gitCommit", "gitTreeState", "gitVersion"},
|
||||||
|
"process_cpu_seconds_total": {},
|
||||||
"ssh_tunnel_open_fail_count": {},
|
"process_max_fds": {},
|
||||||
"ssh_tunnel_open_count": {},
|
"process_open_fds": {},
|
||||||
|
"process_resident_memory_bytes": {},
|
||||||
"go_gc_duration_seconds": {"quantile"},
|
"process_start_time_seconds": {},
|
||||||
"go_gc_duration_seconds_count": {},
|
"process_virtual_memory_bytes": {},
|
||||||
"go_gc_duration_seconds_sum": {},
|
"ssh_tunnel_open_count": {},
|
||||||
"go_goroutines": {},
|
"ssh_tunnel_open_fail_count": {},
|
||||||
|
|
||||||
"kubernetes_build_info": {"major", "minor", "gitCommit", "gitTreeState", "gitVersion"},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type Metrics map[string]model.Samples
|
type Metrics map[string]model.Samples
|
||||||
|
|
||||||
|
func PrintSample(sample *model.Sample) string {
|
||||||
|
buf := make([]string, 0)
|
||||||
|
// Id is a VERY special label. For 'normal' container it's usless, but it's necessary
|
||||||
|
// for 'system' containers (e.g. /docker-daemon, /kubelet, etc.). We know if that's the
|
||||||
|
// case by checking if there's a label "kubernetes_container_name" present. It's hacky
|
||||||
|
// but it works...
|
||||||
|
_, normalContainer := sample.Metric["kubernetes_container_name"]
|
||||||
|
for k, v := range sample.Metric {
|
||||||
|
if strings.HasPrefix(string(k), "__") || KubeletMetricsLabelsToSkip.Has(string(k)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if string(k) == "id" && normalContainer {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
buf = append(buf, fmt.Sprintf("%v=%v", string(k), v))
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("[%v] = %v", strings.Join(buf, ","), sample.Value)
|
||||||
|
}
|
||||||
|
|
||||||
func NewMetrics() Metrics {
|
func NewMetrics() Metrics {
|
||||||
result := make(Metrics)
|
result := make(Metrics)
|
||||||
for metric := range CommonMetrics {
|
for metric := range CommonMetrics {
|
||||||
|
|
|
@ -61,8 +61,6 @@ var KnownKubeletMetrics = map[string][]string{
|
||||||
"container_spec_memory_swap_limit_bytes": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"},
|
"container_spec_memory_swap_limit_bytes": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"},
|
||||||
"container_start_time_seconds": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"},
|
"container_start_time_seconds": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"},
|
||||||
"container_tasks_state": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name", "state"},
|
"container_tasks_state": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name", "state"},
|
||||||
"get_token_count": {},
|
|
||||||
"get_token_fail_count": {},
|
|
||||||
"kubelet_container_manager_latency_microseconds": {"operation_type", "quantile"},
|
"kubelet_container_manager_latency_microseconds": {"operation_type", "quantile"},
|
||||||
"kubelet_container_manager_latency_microseconds_count": {"operation_type"},
|
"kubelet_container_manager_latency_microseconds_count": {"operation_type"},
|
||||||
"kubelet_container_manager_latency_microseconds_sum": {"operation_type"},
|
"kubelet_container_manager_latency_microseconds_sum": {"operation_type"},
|
||||||
|
@ -98,6 +96,12 @@ var KnownKubeletMetrics = map[string][]string{
|
||||||
"rest_client_request_status_codes": {"code", "host", "method"},
|
"rest_client_request_status_codes": {"code", "host", "method"},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var KubeletMetricsLabelsToSkip = sets.NewString(
|
||||||
|
"kubernetes_namespace",
|
||||||
|
"image",
|
||||||
|
"name",
|
||||||
|
)
|
||||||
|
|
||||||
type KubeletMetrics Metrics
|
type KubeletMetrics Metrics
|
||||||
|
|
||||||
func NewKubeletMetrics() KubeletMetrics {
|
func NewKubeletMetrics() KubeletMetrics {
|
||||||
|
|
|
@ -90,6 +90,7 @@ func init() {
|
||||||
flag.BoolVar(&testContext.CleanStart, "clean-start", false, "If true, purge all namespaces except default and system before running tests. This serves to cleanup test namespaces from failed/interrupted e2e runs in a long-lived cluster.")
|
flag.BoolVar(&testContext.CleanStart, "clean-start", false, "If true, purge all namespaces except default and system before running tests. This serves to cleanup test namespaces from failed/interrupted e2e runs in a long-lived cluster.")
|
||||||
flag.BoolVar(&testContext.GatherKubeSystemResourceUsageData, "gather-resource-usage", false, "If set to true framework will be monitoring resource usage of system add-ons in (some) e2e tests.")
|
flag.BoolVar(&testContext.GatherKubeSystemResourceUsageData, "gather-resource-usage", false, "If set to true framework will be monitoring resource usage of system add-ons in (some) e2e tests.")
|
||||||
flag.BoolVar(&testContext.GatherLogsSizes, "gather-logs-sizes", false, "If set to true framework will be monitoring logs sizes on all machines running e2e tests.")
|
flag.BoolVar(&testContext.GatherLogsSizes, "gather-logs-sizes", false, "If set to true framework will be monitoring logs sizes on all machines running e2e tests.")
|
||||||
|
flag.BoolVar(&testContext.GatherMetricsAfterTest, "gather-metrics-at-teardown", false, "If set to true framwork will gather metrics from all components after each test.")
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestE2E(t *testing.T) {
|
func TestE2E(t *testing.T) {
|
||||||
|
|
|
@ -26,6 +26,7 @@ import (
|
||||||
"k8s.io/kubernetes/pkg/api"
|
"k8s.io/kubernetes/pkg/api"
|
||||||
client "k8s.io/kubernetes/pkg/client/unversioned"
|
client "k8s.io/kubernetes/pkg/client/unversioned"
|
||||||
"k8s.io/kubernetes/pkg/fields"
|
"k8s.io/kubernetes/pkg/fields"
|
||||||
|
"k8s.io/kubernetes/pkg/metrics"
|
||||||
|
|
||||||
. "github.com/onsi/ginkgo"
|
. "github.com/onsi/ginkgo"
|
||||||
. "github.com/onsi/gomega"
|
. "github.com/onsi/gomega"
|
||||||
|
@ -152,6 +153,38 @@ func (f *Framework) afterEach() {
|
||||||
close(f.logsSizeCloseChannel)
|
close(f.logsSizeCloseChannel)
|
||||||
f.logsSizeWaitGroup.Wait()
|
f.logsSizeWaitGroup.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if testContext.GatherMetricsAfterTest {
|
||||||
|
// TODO: enable Scheduler and ControllerManager metrics grabbing when Master's Kubelet will be registered.
|
||||||
|
grabber, err := metrics.NewMetricsGrabber(f.Client, true, false, false, true)
|
||||||
|
if err != nil {
|
||||||
|
Logf("Failed to create MetricsGrabber. Skipping metrics gathering.")
|
||||||
|
} else {
|
||||||
|
received, err := grabber.Grab(nil)
|
||||||
|
if err != nil {
|
||||||
|
Logf("MetricsGrabber failed grab metrics. Skipping metrics gathering.")
|
||||||
|
} else {
|
||||||
|
buf := bytes.Buffer{}
|
||||||
|
for interestingMetric := range InterestingApiServerMetrics {
|
||||||
|
buf.WriteString(fmt.Sprintf("For %v:\n", interestingMetric))
|
||||||
|
for _, sample := range received.ApiServerMetrics[interestingMetric] {
|
||||||
|
buf.WriteString(fmt.Sprintf("\t%v\n", metrics.PrintSample(sample)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for kubelet, grabbed := range received.KubeletMetrics {
|
||||||
|
buf.WriteString(fmt.Sprintf("For %v:\n", kubelet))
|
||||||
|
for interestingMetric := range InterestingKubeletMetrics {
|
||||||
|
buf.WriteString(fmt.Sprintf("\tFor %v:\n", interestingMetric))
|
||||||
|
for _, sample := range grabbed[interestingMetric] {
|
||||||
|
buf.WriteString(fmt.Sprintf("\t\t%v\n", metrics.PrintSample(sample)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Logf("%v", buf.String())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Paranoia-- prevent reuse!
|
// Paranoia-- prevent reuse!
|
||||||
f.Namespace = nil
|
f.Namespace = nil
|
||||||
f.Client = nil
|
f.Client = nil
|
||||||
|
|
|
@ -46,6 +46,48 @@ const (
|
||||||
apiCallLatencyLargeThreshold time.Duration = 1 * time.Second
|
apiCallLatencyLargeThreshold time.Duration = 1 * time.Second
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var InterestingApiServerMetrics = sets.NewString(
|
||||||
|
"apiserver_request_count",
|
||||||
|
"apiserver_request_latencies_bucket",
|
||||||
|
"etcd_helper_cache_entry_count",
|
||||||
|
"etcd_helper_cache_hit_count",
|
||||||
|
"etcd_helper_cache_miss_count",
|
||||||
|
"etcd_request_cache_add_latencies_summary",
|
||||||
|
"etcd_request_cache_get_latencies_summary",
|
||||||
|
"etcd_request_latencies_summary",
|
||||||
|
"go_gc_duration_seconds",
|
||||||
|
"go_goroutines",
|
||||||
|
"process_cpu_seconds_total",
|
||||||
|
"process_open_fds",
|
||||||
|
"process_resident_memory_bytes",
|
||||||
|
"process_start_time_seconds",
|
||||||
|
"process_virtual_memory_bytes",
|
||||||
|
)
|
||||||
|
|
||||||
|
var InterestingKubeletMetrics = sets.NewString(
|
||||||
|
"container_cpu_system_seconds_total",
|
||||||
|
"container_cpu_user_seconds_total",
|
||||||
|
"container_fs_io_time_weighted_seconds_total",
|
||||||
|
"container_memory_usage_bytes",
|
||||||
|
"container_spec_cpu_shares",
|
||||||
|
"container_start_time_seconds",
|
||||||
|
"go_gc_duration_seconds",
|
||||||
|
"go_goroutines",
|
||||||
|
"kubelet_container_manager_latency_microseconds",
|
||||||
|
"kubelet_docker_errors",
|
||||||
|
"kubelet_docker_operations_latency_microseconds",
|
||||||
|
"kubelet_generate_pod_status_latency_microseconds",
|
||||||
|
"kubelet_pod_start_latency_microseconds",
|
||||||
|
"kubelet_pod_worker_latency_microseconds",
|
||||||
|
"kubelet_pod_worker_start_latency_microseconds",
|
||||||
|
"kubelet_sync_pods_latency_microseconds",
|
||||||
|
"process_cpu_seconds_total",
|
||||||
|
"process_open_fds",
|
||||||
|
"process_resident_memory_bytes",
|
||||||
|
"process_start_time_seconds",
|
||||||
|
"process_virtual_memory_bytes",
|
||||||
|
)
|
||||||
|
|
||||||
// Dashboard metrics
|
// Dashboard metrics
|
||||||
type LatencyMetric struct {
|
type LatencyMetric struct {
|
||||||
Perc50 time.Duration `json:"Perc50"`
|
Perc50 time.Duration `json:"Perc50"`
|
||||||
|
|
|
@ -153,6 +153,7 @@ type TestContextType struct {
|
||||||
// It will read the data every 30 seconds from all Nodes and print summary during afterEach.
|
// It will read the data every 30 seconds from all Nodes and print summary during afterEach.
|
||||||
GatherKubeSystemResourceUsageData bool
|
GatherKubeSystemResourceUsageData bool
|
||||||
GatherLogsSizes bool
|
GatherLogsSizes bool
|
||||||
|
GatherMetricsAfterTest bool
|
||||||
}
|
}
|
||||||
|
|
||||||
var testContext TestContextType
|
var testContext TestContextType
|
||||||
|
|
Loading…
Reference in New Issue