mirror of https://github.com/k3s-io/k3s
Add a flag that will make test gather metrics from all running components after the test finishes.
parent
c36226bc39
commit
2dcafa3854
|
@ -113,6 +113,7 @@ from-literal
|
|||
func-dest
|
||||
fuzz-iters
|
||||
gather-logs-sizes
|
||||
gather-metrics-at-teardown
|
||||
gather-resource-usage
|
||||
gce-project
|
||||
gce-service-account
|
||||
|
|
|
@ -42,8 +42,6 @@ var KnownApiServerMetrics = map[string][]string{
|
|||
"etcd_request_latencies_summary": {"operation", "type", "quantile"},
|
||||
"etcd_request_latencies_summary_count": {"operation", "type"},
|
||||
"etcd_request_latencies_summary_sum": {"operation", "type"},
|
||||
"get_token_count": {},
|
||||
"get_token_fail_count": {},
|
||||
"rest_client_request_latency_microseconds": {"url", "verb", "quantile"},
|
||||
"rest_client_request_latency_microseconds_count": {"url", "verb"},
|
||||
"rest_client_request_latency_microseconds_sum": {"url", "verb"},
|
||||
|
|
|
@ -29,38 +29,55 @@ import (
|
|||
)
|
||||
|
||||
var CommonMetrics = map[string][]string{
|
||||
"process_start_time_seconds": {},
|
||||
"process_resident_memory_bytes": {},
|
||||
"process_virtual_memory_bytes": {},
|
||||
"process_cpu_seconds_total": {},
|
||||
"process_max_fds": {},
|
||||
"process_open_fds": {},
|
||||
|
||||
"http_request_size_bytes": {"handler", "quantile"},
|
||||
"http_request_size_bytes_count": {"handler"},
|
||||
"http_request_size_bytes_sum": {"handler"},
|
||||
"get_token_count": {},
|
||||
"get_token_fail_count": {},
|
||||
"go_gc_duration_seconds": {"quantile"},
|
||||
"go_gc_duration_seconds_count": {},
|
||||
"go_gc_duration_seconds_sum": {},
|
||||
"go_goroutines": {},
|
||||
"http_request_duration_microseconds": {"handler", "quantile"},
|
||||
"http_request_duration_microseconds_count": {"handler"},
|
||||
"http_request_duration_microseconds_sum": {"handler"},
|
||||
"http_request_size_bytes": {"handler", "quantile"},
|
||||
"http_request_size_bytes_count": {"handler"},
|
||||
"http_request_size_bytes_sum": {"handler"},
|
||||
"http_requests_total": {"handler", "method", "code"},
|
||||
|
||||
"http_response_size_bytes": {"handler", "quantile"},
|
||||
"http_response_size_bytes_count": {"handler"},
|
||||
"http_response_size_bytes_sum": {"handler"},
|
||||
|
||||
"ssh_tunnel_open_fail_count": {},
|
||||
"ssh_tunnel_open_count": {},
|
||||
|
||||
"go_gc_duration_seconds": {"quantile"},
|
||||
"go_gc_duration_seconds_count": {},
|
||||
"go_gc_duration_seconds_sum": {},
|
||||
"go_goroutines": {},
|
||||
|
||||
"kubernetes_build_info": {"major", "minor", "gitCommit", "gitTreeState", "gitVersion"},
|
||||
"http_response_size_bytes": {"handler", "quantile"},
|
||||
"http_response_size_bytes_count": {"handler"},
|
||||
"http_response_size_bytes_sum": {"handler"},
|
||||
"kubernetes_build_info": {"major", "minor", "gitCommit", "gitTreeState", "gitVersion"},
|
||||
"process_cpu_seconds_total": {},
|
||||
"process_max_fds": {},
|
||||
"process_open_fds": {},
|
||||
"process_resident_memory_bytes": {},
|
||||
"process_start_time_seconds": {},
|
||||
"process_virtual_memory_bytes": {},
|
||||
"ssh_tunnel_open_count": {},
|
||||
"ssh_tunnel_open_fail_count": {},
|
||||
}
|
||||
|
||||
type Metrics map[string]model.Samples
|
||||
|
||||
func PrintSample(sample *model.Sample) string {
|
||||
buf := make([]string, 0)
|
||||
// Id is a VERY special label. For 'normal' container it's usless, but it's necessary
|
||||
// for 'system' containers (e.g. /docker-daemon, /kubelet, etc.). We know if that's the
|
||||
// case by checking if there's a label "kubernetes_container_name" present. It's hacky
|
||||
// but it works...
|
||||
_, normalContainer := sample.Metric["kubernetes_container_name"]
|
||||
for k, v := range sample.Metric {
|
||||
if strings.HasPrefix(string(k), "__") || KubeletMetricsLabelsToSkip.Has(string(k)) {
|
||||
continue
|
||||
}
|
||||
|
||||
if string(k) == "id" && normalContainer {
|
||||
continue
|
||||
}
|
||||
buf = append(buf, fmt.Sprintf("%v=%v", string(k), v))
|
||||
}
|
||||
return fmt.Sprintf("[%v] = %v", strings.Join(buf, ","), sample.Value)
|
||||
}
|
||||
|
||||
func NewMetrics() Metrics {
|
||||
result := make(Metrics)
|
||||
for metric := range CommonMetrics {
|
||||
|
|
|
@ -61,8 +61,6 @@ var KnownKubeletMetrics = map[string][]string{
|
|||
"container_spec_memory_swap_limit_bytes": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"},
|
||||
"container_start_time_seconds": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name"},
|
||||
"container_tasks_state": {"id", "image", "kubernetes_container_name", "kubernetes_namespace", "kubernetes_pod_name", "name", "state"},
|
||||
"get_token_count": {},
|
||||
"get_token_fail_count": {},
|
||||
"kubelet_container_manager_latency_microseconds": {"operation_type", "quantile"},
|
||||
"kubelet_container_manager_latency_microseconds_count": {"operation_type"},
|
||||
"kubelet_container_manager_latency_microseconds_sum": {"operation_type"},
|
||||
|
@ -98,6 +96,12 @@ var KnownKubeletMetrics = map[string][]string{
|
|||
"rest_client_request_status_codes": {"code", "host", "method"},
|
||||
}
|
||||
|
||||
var KubeletMetricsLabelsToSkip = sets.NewString(
|
||||
"kubernetes_namespace",
|
||||
"image",
|
||||
"name",
|
||||
)
|
||||
|
||||
type KubeletMetrics Metrics
|
||||
|
||||
func NewKubeletMetrics() KubeletMetrics {
|
||||
|
|
|
@ -90,6 +90,7 @@ func init() {
|
|||
flag.BoolVar(&testContext.CleanStart, "clean-start", false, "If true, purge all namespaces except default and system before running tests. This serves to cleanup test namespaces from failed/interrupted e2e runs in a long-lived cluster.")
|
||||
flag.BoolVar(&testContext.GatherKubeSystemResourceUsageData, "gather-resource-usage", false, "If set to true framework will be monitoring resource usage of system add-ons in (some) e2e tests.")
|
||||
flag.BoolVar(&testContext.GatherLogsSizes, "gather-logs-sizes", false, "If set to true framework will be monitoring logs sizes on all machines running e2e tests.")
|
||||
flag.BoolVar(&testContext.GatherMetricsAfterTest, "gather-metrics-at-teardown", false, "If set to true framwork will gather metrics from all components after each test.")
|
||||
}
|
||||
|
||||
func TestE2E(t *testing.T) {
|
||||
|
|
|
@ -26,6 +26,7 @@ import (
|
|||
"k8s.io/kubernetes/pkg/api"
|
||||
client "k8s.io/kubernetes/pkg/client/unversioned"
|
||||
"k8s.io/kubernetes/pkg/fields"
|
||||
"k8s.io/kubernetes/pkg/metrics"
|
||||
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
|
@ -152,6 +153,38 @@ func (f *Framework) afterEach() {
|
|||
close(f.logsSizeCloseChannel)
|
||||
f.logsSizeWaitGroup.Wait()
|
||||
}
|
||||
|
||||
if testContext.GatherMetricsAfterTest {
|
||||
// TODO: enable Scheduler and ControllerManager metrics grabbing when Master's Kubelet will be registered.
|
||||
grabber, err := metrics.NewMetricsGrabber(f.Client, true, false, false, true)
|
||||
if err != nil {
|
||||
Logf("Failed to create MetricsGrabber. Skipping metrics gathering.")
|
||||
} else {
|
||||
received, err := grabber.Grab(nil)
|
||||
if err != nil {
|
||||
Logf("MetricsGrabber failed grab metrics. Skipping metrics gathering.")
|
||||
} else {
|
||||
buf := bytes.Buffer{}
|
||||
for interestingMetric := range InterestingApiServerMetrics {
|
||||
buf.WriteString(fmt.Sprintf("For %v:\n", interestingMetric))
|
||||
for _, sample := range received.ApiServerMetrics[interestingMetric] {
|
||||
buf.WriteString(fmt.Sprintf("\t%v\n", metrics.PrintSample(sample)))
|
||||
}
|
||||
}
|
||||
for kubelet, grabbed := range received.KubeletMetrics {
|
||||
buf.WriteString(fmt.Sprintf("For %v:\n", kubelet))
|
||||
for interestingMetric := range InterestingKubeletMetrics {
|
||||
buf.WriteString(fmt.Sprintf("\tFor %v:\n", interestingMetric))
|
||||
for _, sample := range grabbed[interestingMetric] {
|
||||
buf.WriteString(fmt.Sprintf("\t\t%v\n", metrics.PrintSample(sample)))
|
||||
}
|
||||
}
|
||||
}
|
||||
Logf("%v", buf.String())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Paranoia-- prevent reuse!
|
||||
f.Namespace = nil
|
||||
f.Client = nil
|
||||
|
|
|
@ -46,6 +46,48 @@ const (
|
|||
apiCallLatencyLargeThreshold time.Duration = 1 * time.Second
|
||||
)
|
||||
|
||||
var InterestingApiServerMetrics = sets.NewString(
|
||||
"apiserver_request_count",
|
||||
"apiserver_request_latencies_bucket",
|
||||
"etcd_helper_cache_entry_count",
|
||||
"etcd_helper_cache_hit_count",
|
||||
"etcd_helper_cache_miss_count",
|
||||
"etcd_request_cache_add_latencies_summary",
|
||||
"etcd_request_cache_get_latencies_summary",
|
||||
"etcd_request_latencies_summary",
|
||||
"go_gc_duration_seconds",
|
||||
"go_goroutines",
|
||||
"process_cpu_seconds_total",
|
||||
"process_open_fds",
|
||||
"process_resident_memory_bytes",
|
||||
"process_start_time_seconds",
|
||||
"process_virtual_memory_bytes",
|
||||
)
|
||||
|
||||
var InterestingKubeletMetrics = sets.NewString(
|
||||
"container_cpu_system_seconds_total",
|
||||
"container_cpu_user_seconds_total",
|
||||
"container_fs_io_time_weighted_seconds_total",
|
||||
"container_memory_usage_bytes",
|
||||
"container_spec_cpu_shares",
|
||||
"container_start_time_seconds",
|
||||
"go_gc_duration_seconds",
|
||||
"go_goroutines",
|
||||
"kubelet_container_manager_latency_microseconds",
|
||||
"kubelet_docker_errors",
|
||||
"kubelet_docker_operations_latency_microseconds",
|
||||
"kubelet_generate_pod_status_latency_microseconds",
|
||||
"kubelet_pod_start_latency_microseconds",
|
||||
"kubelet_pod_worker_latency_microseconds",
|
||||
"kubelet_pod_worker_start_latency_microseconds",
|
||||
"kubelet_sync_pods_latency_microseconds",
|
||||
"process_cpu_seconds_total",
|
||||
"process_open_fds",
|
||||
"process_resident_memory_bytes",
|
||||
"process_start_time_seconds",
|
||||
"process_virtual_memory_bytes",
|
||||
)
|
||||
|
||||
// Dashboard metrics
|
||||
type LatencyMetric struct {
|
||||
Perc50 time.Duration `json:"Perc50"`
|
||||
|
|
|
@ -153,6 +153,7 @@ type TestContextType struct {
|
|||
// It will read the data every 30 seconds from all Nodes and print summary during afterEach.
|
||||
GatherKubeSystemResourceUsageData bool
|
||||
GatherLogsSizes bool
|
||||
GatherMetricsAfterTest bool
|
||||
}
|
||||
|
||||
var testContext TestContextType
|
||||
|
|
Loading…
Reference in New Issue