Move rest of performance data gathered by tests to Summaries

2017-05-10 14:50:38 +02:00 · 2017-05-10 14:50:38 +02:00 · f68b884a9d
parent 3fbfafdd0a
commit f68b884a9d
6 changed files with 134 additions and 84 deletions
--- a/test/e2e/framework/framework.go
+++ b/test/e2e/framework/framework.go
@ -21,7 +21,6 @@ import (
 	"bytes"
 	"encoding/json"
 	"fmt"
-	"io/ioutil"
 	"os"
 	"path"
 	"reflect"
@ -91,6 +90,10 @@ type Framework struct {

 	// configuration for framework's client
 	Options FrameworkOptions
+
+	// Place where various additional data is stored during test run to be printed to ReportDir,
+	// or stdout if ReportDir is not set once test ends.
+	TestSummaries []TestDataSummary
 }

 type TestDataSummary interface {
@ -307,19 +310,18 @@ func (f *Framework) AfterEach() {
 		LogContainersInPodsWithLabels(f.ClientSet, metav1.NamespaceSystem, ImagePullerLabels, "image-puller", logFunc)
 	}

-	summaries := make([]TestDataSummary, 0)
 	if TestContext.GatherKubeSystemResourceUsageData != "false" && TestContext.GatherKubeSystemResourceUsageData != "none" && f.gatherer != nil {
 		By("Collecting resource usage data")
 		summary, resourceViolationError := f.gatherer.stopAndSummarize([]int{90, 99, 100}, f.AddonResourceConstraints)
 		defer ExpectNoError(resourceViolationError)
-		summaries = append(summaries, summary)
+		f.TestSummaries = append(f.TestSummaries, summary)
 	}

 	if TestContext.GatherLogsSizes {
 		By("Gathering log sizes data")
 		close(f.logsSizeCloseChannel)
 		f.logsSizeWaitGroup.Wait()
-		summaries = append(summaries, f.logsSizeVerifier.GetSummary())
+		f.TestSummaries = append(f.TestSummaries, f.logsSizeVerifier.GetSummary())
 	}

 	if TestContext.GatherMetricsAfterTest {
@ -334,44 +336,12 @@ func (f *Framework) AfterEach() {
 			if err != nil {
 				Logf("MetricsGrabber failed to grab metrics (skipping metrics gathering): %v", err)
 			} else {
-				summaries = append(summaries, (*MetricsForE2E)(&received))
+				f.TestSummaries = append(f.TestSummaries, (*MetricsForE2E)(&received))
 			}
 		}
 	}

-	outputTypes := strings.Split(TestContext.OutputPrintType, ",")
-	now := time.Now()
-	for _, printType := range outputTypes {
-		switch printType {
-		case "hr":
-			for i := range summaries {
-				if TestContext.ReportDir == "" {
-					Logf(summaries[i].PrintHumanReadable())
-				} else {
-					// TODO: learn to extract test name and append it to the kind instead of timestamp.
-					filePath := path.Join(TestContext.ReportDir, summaries[i].SummaryKind()+"_"+f.BaseName+"_"+now.Format(time.RFC3339)+".txt")
-					if err := ioutil.WriteFile(filePath, []byte(summaries[i].PrintHumanReadable()), 0644); err != nil {
-						Logf("Failed to write file %v with test performance data: %v", filePath, err)
-					}
-				}
-			}
-		case "json":
-			for i := range summaries {
-				if TestContext.ReportDir == "" {
-					Logf("%v JSON\n%v", summaries[i].SummaryKind(), summaries[i].PrintJSON())
-					Logf("Finished")
-				} else {
-					// TODO: learn to extract test name and append it to the kind instead of timestamp.
-					filePath := path.Join(TestContext.ReportDir, summaries[i].SummaryKind()+"_"+f.BaseName+"_"+now.Format(time.RFC3339)+".json")
-					if err := ioutil.WriteFile(filePath, []byte(summaries[i].PrintJSON()), 0644); err != nil {
-						Logf("Failed to write file %v with test performance data: %v", filePath, err)
-					}
-				}
-			}
-		default:
-			Logf("Unknown output type: %v. Skipping.", printType)
-		}
-	}
+	PrintSummaries(f.TestSummaries, f.BaseName)

 	// Check whether all nodes are ready after the test.
 	// This is explicitly done at the very end of the test, to avoid
--- a/test/e2e/framework/metrics_util.go
+++ b/test/e2e/framework/metrics_util.go
@ -150,12 +150,36 @@ type PodStartupLatency struct {
 	Latency LatencyMetric `json:"latency"`
 }

+func (l *PodStartupLatency) SummaryKind() string {
+	return "PodStartupLatency"
+}
+
+func (l *PodStartupLatency) PrintHumanReadable() string {
+	return PrettyPrintJSON(*l)
+}
+
+func (l *PodStartupLatency) PrintJSON() string {
+	return PrettyPrintJSON(*l)
+}
+
 type SchedulingLatency struct {
 	Scheduling LatencyMetric `json:"scheduling"`
 	Binding    LatencyMetric `json:"binding"`
 	Total      LatencyMetric `json:"total"`
 }

+func (l *SchedulingLatency) SummaryKind() string {
+	return "SchedulingLatency"
+}
+
+func (l *SchedulingLatency) PrintHumanReadable() string {
+	return PrettyPrintJSON(*l)
+}
+
+func (l *SchedulingLatency) PrintJSON() string {
+	return PrettyPrintJSON(*l)
+}
+
 type SaturationTime struct {
 	TimeToSaturate time.Duration `json:"timeToStaturate"`
 	NumberOfNodes  int           `json:"numberOfNodes"`
@ -174,9 +198,23 @@ type APIResponsiveness struct {
 	APICalls []APICall `json:"apicalls"`
 }

-func (a APIResponsiveness) Len() int      { return len(a.APICalls) }
-func (a APIResponsiveness) Swap(i, j int) { a.APICalls[i], a.APICalls[j] = a.APICalls[j], a.APICalls[i] }
-func (a APIResponsiveness) Less(i, j int) bool {
+func (a *APIResponsiveness) SummaryKind() string {
+	return "APIResponsiveness"
+}
+
+func (a *APIResponsiveness) PrintHumanReadable() string {
+	return PrettyPrintJSON(*a)
+}
+
+func (a *APIResponsiveness) PrintJSON() string {
+	return PrettyPrintJSON(*a)
+}
+
+func (a *APIResponsiveness) Len() int { return len(a.APICalls) }
+func (a *APIResponsiveness) Swap(i, j int) {
+	a.APICalls[i], a.APICalls[j] = a.APICalls[j], a.APICalls[i]
+}
+func (a *APIResponsiveness) Less(i, j int) bool {
 	return a.APICalls[i].Latency.Perc99 < a.APICalls[j].Latency.Perc99
 }

@ -225,17 +263,17 @@ func (a *APIResponsiveness) addMetricRequestCount(resource, verb string, count i
 	a.APICalls = append(a.APICalls, apicall)
 }

-func readLatencyMetrics(c clientset.Interface) (APIResponsiveness, error) {
+func readLatencyMetrics(c clientset.Interface) (*APIResponsiveness, error) {
 	var a APIResponsiveness

 	body, err := getMetrics(c)
 	if err != nil {
-		return a, err
+		return nil, err
 	}

 	samples, err := extractMetricSamples(body)
 	if err != nil {
-		return a, err
+		return nil, err
 	}

 	ignoredResources := sets.NewString("events")
@ -262,7 +300,7 @@ func readLatencyMetrics(c clientset.Interface) (APIResponsiveness, error) {
 			latency := sample.Value
 			quantile, err := strconv.ParseFloat(string(sample.Metric[model.QuantileLabel]), 64)
 			if err != nil {
-				return a, err
+				return nil, err
 			}
 			a.addMetricRequestLatency(resource, verb, quantile, time.Duration(int64(latency))*time.Microsecond)
 		case "apiserver_request_count":
@ -272,22 +310,22 @@ func readLatencyMetrics(c clientset.Interface) (APIResponsiveness, error) {
 		}
 	}

-	return a, err
+	return &a, err
 }

 // Prints top five summary metrics for request types with latency and returns
 // number of such request types above threshold.
-func HighLatencyRequests(c clientset.Interface) (int, error) {
+func HighLatencyRequests(c clientset.Interface) (int, *APIResponsiveness, error) {
 	metrics, err := readLatencyMetrics(c)
 	if err != nil {
-		return 0, err
+		return 0, metrics, err
 	}
 	sort.Sort(sort.Reverse(metrics))
 	badMetrics := 0
 	top := 5
-	for _, metric := range metrics.APICalls {
+	for i := range metrics.APICalls {
 		isBad := false
-		if metric.Latency.Perc99 > apiCallLatencyThreshold {
+		if metrics.APICalls[i].Latency.Perc99 > apiCallLatencyThreshold {
 			badMetrics++
 			isBad = true
 		}
@ -297,23 +335,15 @@ func HighLatencyRequests(c clientset.Interface) (int, error) {
 			if isBad {
 				prefix = "WARNING "
 			}
-			Logf("%vTop latency metric: %+v", prefix, metric)
+			Logf("%vTop latency metric: %+v", prefix, metrics.APICalls[i])
 		}
 	}
-
-	// TODO(random-liu): Remove the log when we migrate to new perfdash
-	Logf("API calls latencies: %s", PrettyPrintJSON(metrics))
-	// Log perf data
-	PrintPerfData(ApiCallToPerfData(metrics))
-
-	return badMetrics, nil
+	return badMetrics, metrics, nil
 }

 // Verifies whether 50, 90 and 99th percentiles of PodStartupLatency are
 // within the threshold.
-func VerifyPodStartupLatency(latency PodStartupLatency) error {
-	Logf("Pod startup latency: %s", PrettyPrintJSON(latency))
-
+func VerifyPodStartupLatency(latency *PodStartupLatency) error {
 	if latency.Latency.Perc50 > podStartupThreshold {
 		return fmt.Errorf("too high pod startup latency 50th percentile: %v", latency.Latency.Perc50)
 	}
@ -349,7 +379,7 @@ func getMetrics(c clientset.Interface) (string, error) {
 }

 // Retrieves scheduler metrics information.
-func getSchedulingLatency(c clientset.Interface) (SchedulingLatency, error) {
+func getSchedulingLatency(c clientset.Interface) (*SchedulingLatency, error) {
 	result := SchedulingLatency{}

 	// Check if master Node is registered
@ -358,7 +388,7 @@ func getSchedulingLatency(c clientset.Interface) (SchedulingLatency, error) {

 	subResourceProxyAvailable, err := ServerVersionGTE(SubResourcePodProxyVersion, c.Discovery())
 	if err != nil {
-		return result, err
+		return nil, err
 	}

 	var data string
@ -400,13 +430,13 @@ func getSchedulingLatency(c clientset.Interface) (SchedulingLatency, error) {
 		cmd := "curl http://localhost:10251/metrics"
 		sshResult, err := SSH(cmd, GetMasterHost()+":22", TestContext.Provider)
 		if err != nil || sshResult.Code != 0 {
-			return result, fmt.Errorf("unexpected error (code: %d) in ssh connection to master: %#v", sshResult.Code, err)
+			return &result, fmt.Errorf("unexpected error (code: %d) in ssh connection to master: %#v", sshResult.Code, err)
 		}
 		data = sshResult.Stdout
 	}
 	samples, err := extractMetricSamples(data)
 	if err != nil {
-		return result, err
+		return nil, err
 	}

 	for _, sample := range samples {
@ -426,23 +456,20 @@ func getSchedulingLatency(c clientset.Interface) (SchedulingLatency, error) {
 		latency := sample.Value
 		quantile, err := strconv.ParseFloat(string(sample.Metric[model.QuantileLabel]), 64)
 		if err != nil {
-			return result, err
+			return nil, err
 		}
 		setQuantile(metric, quantile, time.Duration(int64(latency))*time.Microsecond)
 	}
-	return result, nil
+	return &result, nil
 }

 // Verifies (currently just by logging them) the scheduling latencies.
-func VerifySchedulerLatency(c clientset.Interface) error {
+func VerifySchedulerLatency(c clientset.Interface) (*SchedulingLatency, error) {
 	latency, err := getSchedulingLatency(c)
 	if err != nil {
-		return err
+		return nil, err
 	}
-	Logf("Scheduling latency: %s", PrettyPrintJSON(latency))
-
-	// TODO: Add some reasonable checks once we know more about the values.
-	return nil
+	return latency, nil
 }

 func PrettyPrintJSON(metrics interface{}) string {
--- a/test/e2e/framework/test_context.go
+++ b/test/e2e/framework/test_context.go
@ -162,7 +162,7 @@ func RegisterCommonFlags() {
 	flag.StringVar(&TestContext.GatherKubeSystemResourceUsageData, "gather-resource-usage", "false", "If set to 'true' or 'all' framework will be monitoring resource usage of system all add-ons in (some) e2e tests, if set to 'master' framework will be monitoring master node only, if set to 'none' of 'false' monitoring will be turned off.")
 	flag.BoolVar(&TestContext.GatherLogsSizes, "gather-logs-sizes", false, "If set to true framework will be monitoring logs sizes on all machines running e2e tests.")
 	flag.BoolVar(&TestContext.GatherMetricsAfterTest, "gather-metrics-at-teardown", false, "If set to true framwork will gather metrics from all components after each test.")
-	flag.StringVar(&TestContext.OutputPrintType, "output-print-type", "hr", "Comma separated list: 'hr' for human readable summaries 'json' for JSON ones.")
+	flag.StringVar(&TestContext.OutputPrintType, "output-print-type", "json", "Format in which summaries should be printed: 'hr' for human readable, 'json' for JSON ones.")
 	flag.BoolVar(&TestContext.DumpLogsOnFailure, "dump-logs-on-failure", true, "If set to true test will dump data about the namespace in which test was running.")
 	flag.BoolVar(&TestContext.DisableLogDump, "disable-log-dump", false, "If set to true, logs from master and nodes won't be gathered after test run.")
 	flag.BoolVar(&TestContext.DeleteNamespace, "delete-namespace", true, "If true tests will delete namespace after completion. It is only designed to make debugging easier, DO NOT turn it off by default.")
--- a/test/e2e/framework/util.go
+++ b/test/e2e/framework/util.go
@ -5526,3 +5526,38 @@ func GetAzureCloud() (*azure.Cloud, error) {
 	}
 	return cloud, nil
 }
+
+func PrintSummaries(summaries []TestDataSummary, testBaseName string) {
+	now := time.Now()
+	for i := range summaries {
+		Logf("Printing summary: %v", summaries[i].SummaryKind())
+		switch TestContext.OutputPrintType {
+		case "hr":
+			if TestContext.ReportDir == "" {
+				Logf(summaries[i].PrintHumanReadable())
+			} else {
+				// TODO: learn to extract test name and append it to the kind instead of timestamp.
+				filePath := path.Join(TestContext.ReportDir, summaries[i].SummaryKind()+"_"+testBaseName+"_"+now.Format(time.RFC3339)+".txt")
+				if err := ioutil.WriteFile(filePath, []byte(summaries[i].PrintHumanReadable()), 0644); err != nil {
+					Logf("Failed to write file %v with test performance data: %v", filePath, err)
+				}
+			}
+		case "json":
+			fallthrough
+		default:
+			if TestContext.OutputPrintType != "json" {
+				Logf("Unknown output type: %v. Printing JSON", TestContext.OutputPrintType)
+			}
+			if TestContext.ReportDir == "" {
+				Logf("%v JSON\n%v", summaries[i].SummaryKind(), summaries[i].PrintJSON())
+				Logf("Finished")
+			} else {
+				// TODO: learn to extract test name and append it to the kind instead of timestamp.
+				filePath := path.Join(TestContext.ReportDir, summaries[i].SummaryKind()+"_"+testBaseName+"_"+now.Format(time.RFC3339)+".json")
+				if err := ioutil.WriteFile(filePath, []byte(summaries[i].PrintJSON()), 0644); err != nil {
+					Logf("Failed to write file %v with test performance data: %v", filePath, err)
+				}
+			}
+		}
+	}
+}
--- a/test/e2e/perf/density.go
+++ b/test/e2e/perf/density.go
@ -304,6 +304,8 @@ var _ = framework.KubeDescribe("Density", func() {
 	var nodes *v1.NodeList
 	var masters sets.String

+	testCaseBaseName := "density"
+
 	// Gathers data prior to framework namespace teardown
 	AfterEach(func() {
 		saturationThreshold := time.Duration((totalPods / MinPodsPerSecondThroughput)) * time.Second
@ -319,17 +321,25 @@ var _ = framework.KubeDescribe("Density", func() {
 		}
 		framework.Logf("Cluster saturation time: %s", framework.PrettyPrintJSON(saturationData))

+		summaries := make([]framework.TestDataSummary, 0, 2)
 		// Verify latency metrics.
-		highLatencyRequests, err := framework.HighLatencyRequests(c)
+		highLatencyRequests, metrics, err := framework.HighLatencyRequests(c)
 		framework.ExpectNoError(err)
-		Expect(highLatencyRequests).NotTo(BeNumerically(">", 0), "There should be no high-latency requests")
+		if err == nil {
+			summaries = append(summaries, metrics)
+			Expect(highLatencyRequests).NotTo(BeNumerically(">", 0), "There should be no high-latency requests")
+		}

 		// Verify scheduler metrics.
 		// TODO: Reset metrics at the beginning of the test.
 		// We should do something similar to how we do it for APIserver.
-		if err = framework.VerifySchedulerLatency(c); err != nil {
-			framework.Logf("Warning: Scheduler latency not calculated, %v", err)
+		latency, err := framework.VerifySchedulerLatency(c)
+		framework.ExpectNoError(err)
+		if err == nil {
+			summaries = append(summaries, latency)
 		}
+
+		framework.PrintSummaries(summaries, testCaseBaseName)
 	})

 	options := framework.FrameworkOptions{
@ -338,7 +348,7 @@ var _ = framework.KubeDescribe("Density", func() {
 	}
 	// Explicitly put here, to delete namespace at the end of the test
 	// (after measuring latency metrics, etc.).
-	f := framework.NewFramework("density", options, nil)
+	f := framework.NewFramework(testCaseBaseName, options, nil)
 	f.NamespaceDeletionTimeout = time.Hour

 	BeforeEach(func() {
@ -718,7 +728,8 @@ var _ = framework.KubeDescribe("Density", func() {
 				framework.PrintLatencies(e2eLag, "worst e2e total latencies")

 				// Test whether e2e pod startup time is acceptable.
-				podStartupLatency := framework.PodStartupLatency{Latency: framework.ExtractLatencyMetrics(e2eLag)}
+				podStartupLatency := &framework.PodStartupLatency{Latency: framework.ExtractLatencyMetrics(e2eLag)}
+				f.TestSummaries = append(f.TestSummaries, podStartupLatency)
 				framework.ExpectNoError(framework.VerifyPodStartupLatency(podStartupLatency))

 				framework.LogSuspiciousLatency(startupLag, e2eLag, nodeCount, c)
--- a/test/e2e/perf/load.go
+++ b/test/e2e/perf/load.go
@ -81,13 +81,20 @@ var _ = framework.KubeDescribe("Load capacity", func() {
 	var configs []testutils.RunObjectConfig
 	var secretConfigs []*testutils.SecretConfig

+	testCaseBaseName := "load"
+
 	// Gathers metrics before teardown
 	// TODO add flag that allows to skip cleanup on failure
 	AfterEach(func() {
 		// Verify latency metrics
-		highLatencyRequests, err := framework.HighLatencyRequests(clientset)
-		framework.ExpectNoError(err, "Too many instances metrics above the threshold")
-		Expect(highLatencyRequests).NotTo(BeNumerically(">", 0))
+		highLatencyRequests, metrics, err := framework.HighLatencyRequests(clientset)
+		framework.ExpectNoError(err)
+		if err == nil {
+			summaries := make([]framework.TestDataSummary, 0, 1)
+			summaries = append(summaries, metrics)
+			framework.PrintSummaries(summaries, testCaseBaseName)
+			Expect(highLatencyRequests).NotTo(BeNumerically(">", 0), "There should be no high-latency requests")
+		}
 	})

 	// We assume a default throughput of 10 pods/second throughput.
@ -106,7 +113,7 @@ var _ = framework.KubeDescribe("Load capacity", func() {
 		ClientQPS:   float32(math.Max(50.0, float64(2*throughput))),
 		ClientBurst: int(math.Max(100.0, float64(4*throughput))),
 	}
-	f := framework.NewFramework("load", options, nil)
+	f := framework.NewFramework(testCaseBaseName, options, nil)
 	f.NamespaceDeletionTimeout = time.Hour

 	BeforeEach(func() {