Logs node e2e perf data to standalone json files

2017-06-09 09:56:44 -07:00 · 2017-06-09 09:56:44 -07:00 · 29b2db5af3
parent 3837d95191
commit 29b2db5af3
11 changed files with 154 additions and 61 deletions
--- a/hack/.linted_packages
+++ b/hack/.linted_packages
@ -452,6 +452,7 @@ staging/src/k8s.io/metrics/pkg/apis/metrics/install
 staging/src/k8s.io/sample-apiserver
 staging/src/k8s.io/sample-apiserver/pkg/apis/wardle/install
 test/e2e/perftype
+test/e2e_node/perftype
 test/e2e_node/runner/local
 test/images/clusterapi-tester
 test/images/entrypoint-tester
--- a/test/e2e/framework/perf_util.go
+++ b/test/e2e/framework/perf_util.go
@ -71,9 +71,10 @@ func PodStartupLatencyToPerfData(latency *PodStartupLatency) *perftype.PerfData
 	return perfData
 }

-// currentKubeletPerfMetricsVersion is the current kubelet performance metrics version. We should
-// bump up the version each time we make incompatible change to the metrics.
-const currentKubeletPerfMetricsVersion = "v2"
+// CurrentKubeletPerfMetricsVersion is the current kubelet performance metrics
+// version. This is used by mutiple perf related data structures. We should
+// bump up the version each time we make an incompatible change to the metrics.
+const CurrentKubeletPerfMetricsVersion = "v2"

 // ResourceUsageToPerfData transforms ResourceUsagePerNode to PerfData. Notice that this function
 // only cares about memory usage, because cpu usage information will be extracted from NodesCPUSummary.
@ -119,7 +120,7 @@ func ResourceUsageToPerfDataWithLabels(usagePerNode ResourceUsagePerNode, labels
 		}
 	}
 	return &perftype.PerfData{
-		Version:   currentKubeletPerfMetricsVersion,
+		Version:   CurrentKubeletPerfMetricsVersion,
 		DataItems: items,
 		Labels:    labels,
 	}
@ -149,7 +150,7 @@ func CPUUsageToPerfDataWithLabels(usagePerNode NodesCPUSummary, labels map[strin
 		}
 	}
 	return &perftype.PerfData{
-		Version:   currentKubeletPerfMetricsVersion,
+		Version:   CurrentKubeletPerfMetricsVersion,
 		DataItems: items,
 		Labels:    labels,
 	}
--- a/test/e2e_node/BUILD
+++ b/test/e2e_node/BUILD
@ -41,6 +41,7 @@ go_library(
        "//test/e2e/common:go_default_library",
        "//test/e2e/framework:go_default_library",
        "//test/e2e/perftype:go_default_library",
+        "//test/e2e_node/perftype:go_default_library",
        "//vendor/github.com/golang/glog:go_default_library",
        "//vendor/github.com/google/cadvisor/client/v2:go_default_library",
        "//vendor/github.com/google/cadvisor/info/v2:go_default_library",
@ -163,6 +164,7 @@ filegroup(
        ":package-srcs",
        "//test/e2e_node/builder:all-srcs",
        "//test/e2e_node/environment:all-srcs",
+        "//test/e2e_node/perftype:all-srcs",
        "//test/e2e_node/remote:all-srcs",
        "//test/e2e_node/runner/local:all-srcs",
        "//test/e2e_node/runner/remote:all-srcs",
--- a/test/e2e_node/benchmark_util.go
+++ b/test/e2e_node/benchmark_util.go
@ -20,36 +20,57 @@ package e2e_node

 import (
 	"fmt"
+	"io/ioutil"
+	"path"
 	"sort"
+	"strconv"
 	"time"

 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/kubernetes/test/e2e/framework"
 	"k8s.io/kubernetes/test/e2e/perftype"
+	nodeperftype "k8s.io/kubernetes/test/e2e_node/perftype"

 	. "github.com/onsi/gomega"
 )

 const (
-	// TODO(coufon): be consistent with perf_util.go version
-	currentDataVersion = "v1"
-	TimeSeriesTag      = "[Result:TimeSeries]"
-	TimeSeriesEnd      = "[Finish:TimeSeries]"
+	TimeSeriesTag = "[Result:TimeSeries]"
+	TimeSeriesEnd = "[Finish:TimeSeries]"
 )

-type NodeTimeSeries struct {
-	// value in OperationData is an array of timestamps
-	OperationData map[string][]int64         `json:"op_series,omitempty"`
-	ResourceData  map[string]*ResourceSeries `json:"resource_series,omitempty"`
-	Labels        map[string]string          `json:"labels"`
-	Version       string                     `json:"version"`
+// dumpDataToFile inserts the current timestamp into the labels and writes the
+// data for the test into the file with the specified prefix.
+func dumpDataToFile(data interface{}, labels map[string]string, prefix string) {
+	testName := labels["test"]
+	fileName := path.Join(framework.TestContext.ReportDir, fmt.Sprintf("%s-%s.json", prefix, testName))
+	labels["timestamp"] = strconv.FormatInt(time.Now().UTC().Unix(), 10)
+	framework.Logf("Dumping perf data for test %q to %q.", testName, fileName)
+	if err := ioutil.WriteFile(fileName, []byte(framework.PrettyPrintJSON(data)), 0644); err != nil {
+		framework.Logf("Failed to write perf data for test %q to %q: %v", testName, fileName, err)
+	}
 }

-// logDensityTimeSeries logs the time series data of operation and resource usage
+// logPerfData writes the perf data to a standalone json file if the
+// framework.TestContext.ReportDir is non-empty, or to the general build log
+// otherwise. The perfType identifies which type of the perf data it is, such
+// as "cpu" and "memory". If an error occurs, no perf data will be logged.
+func logPerfData(p *perftype.PerfData, perfType string) {
+	if framework.TestContext.ReportDir == "" {
+		framework.PrintPerfData(p)
+		return
+	}
+	dumpDataToFile(p, p.Labels, "performance-"+perfType)
+}
+
+// logDensityTimeSeries writes the time series data of operation and resource
+// usage to a standalone json file if the framework.TestContext.ReportDir is
+// non-empty, or to the general build log otherwise. If an error occurs,
+// no perf data will be logged.
 func logDensityTimeSeries(rc *ResourceCollector, create, watch map[string]metav1.Time, testInfo map[string]string) {
-	timeSeries := &NodeTimeSeries{
+	timeSeries := &nodeperftype.NodeTimeSeries{
 		Labels:  testInfo,
-		Version: currentDataVersion,
+		Version: framework.CurrentKubeletPerfMetricsVersion,
 	}
 	// Attach operation time series.
 	timeSeries.OperationData = map[string][]int64{
@ -58,8 +79,12 @@ func logDensityTimeSeries(rc *ResourceCollector, create, watch map[string]metav1
 	}
 	// Attach resource time series.
 	timeSeries.ResourceData = rc.GetResourceTimeSeries()
-	// Log time series with tags
-	framework.Logf("%s %s\n%s", TimeSeriesTag, framework.PrettyPrintJSON(timeSeries), TimeSeriesEnd)
+
+	if framework.TestContext.ReportDir == "" {
+		framework.Logf("%s %s\n%s", TimeSeriesTag, framework.PrettyPrintJSON(timeSeries), TimeSeriesEnd)
+		return
+	}
+	dumpDataToFile(timeSeries, timeSeries.Labels, "time_series")
 }

 type int64arr []int64
@ -82,7 +107,7 @@ func getCumulatedPodTimeSeries(timePerPod map[string]metav1.Time) []int64 {
 // getLatencyPerfData returns perf data of pod startup latency.
 func getLatencyPerfData(latency framework.LatencyMetric, testInfo map[string]string) *perftype.PerfData {
 	return &perftype.PerfData{
-		Version: currentDataVersion,
+		Version: framework.CurrentKubeletPerfMetricsVersion,
 		DataItems: []perftype.DataItem{
 			{
 				Data: map[string]float64{
@ -105,7 +130,7 @@ func getLatencyPerfData(latency framework.LatencyMetric, testInfo map[string]str
 // getThroughputPerfData returns perf data of pod creation startup throughput.
 func getThroughputPerfData(batchLag time.Duration, e2eLags []framework.PodLatencyData, podsNr int, testInfo map[string]string) *perftype.PerfData {
 	return &perftype.PerfData{
-		Version: currentDataVersion,
+		Version: framework.CurrentKubeletPerfMetricsVersion,
 		DataItems: []perftype.DataItem{
 			{
 				Data: map[string]float64{
@ -123,8 +148,10 @@ func getThroughputPerfData(batchLag time.Duration, e2eLags []framework.PodLatenc
 	}
 }

-// getTestNodeInfo fetches the capacity of a node from API server and returns a map of labels.
-func getTestNodeInfo(f *framework.Framework, testName string) map[string]string {
+// getTestNodeInfo returns a label map containing the test name and
+// description, the name of the node on which the test will be run, the image
+// name of the node, and the node capacities.
+func getTestNodeInfo(f *framework.Framework, testName, testDesc string) map[string]string {
 	nodeName := framework.TestContext.NodeName
 	node, err := f.ClientSet.Core().Nodes().Get(nodeName, metav1.GetOptions{})
 	Expect(err).NotTo(HaveOccurred())
@ -154,5 +181,6 @@ func getTestNodeInfo(f *framework.Framework, testName string) map[string]string
 		"test":    testName,
 		"image":   node.Status.NodeInfo.OSImage,
 		"machine": fmt.Sprintf("cpu:%dcore,memory:%.1fGB", cpuValue, float32(memoryValue)/(1024*1024*1024)),
+		"desc":    testDesc,
 	}
 }
--- a/test/e2e_node/density_test.go
+++ b/test/e2e_node/density_test.go
@ -94,10 +94,10 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {

 		for _, testArg := range dTests {
 			itArg := testArg
-			It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval",
-				itArg.podsNr, itArg.interval), func() {
+			desc := fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval", itArg.podsNr, itArg.interval)
+			It(desc, func() {
 				itArg.createMethod = "batch"
-				testInfo := getTestNodeInfo(f, itArg.getTestName())
+				testInfo := getTestNodeInfo(f, itArg.getTestName(), desc)

 				batchLag, e2eLags := runDensityBatchTest(f, rc, itArg, testInfo, false)

@ -152,10 +152,10 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {

 		for _, testArg := range dTests {
 			itArg := testArg
-			It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval [Benchmark]",
-				itArg.podsNr, itArg.interval), func() {
+			desc := fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval [Benchmark]", itArg.podsNr, itArg.interval)
+			It(desc, func() {
 				itArg.createMethod = "batch"
-				testInfo := getTestNodeInfo(f, itArg.getTestName())
+				testInfo := getTestNodeInfo(f, itArg.getTestName(), desc)

 				batchLag, e2eLags := runDensityBatchTest(f, rc, itArg, testInfo, true)

@ -189,10 +189,10 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {

 		for _, testArg := range dTests {
 			itArg := testArg
-			It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval (QPS %d) [Benchmark]",
-				itArg.podsNr, itArg.interval, itArg.APIQPSLimit), func() {
+			desc := fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval (QPS %d) [Benchmark]", itArg.podsNr, itArg.interval, itArg.APIQPSLimit)
+			It(desc, func() {
 				itArg.createMethod = "batch"
-				testInfo := getTestNodeInfo(f, itArg.getTestName())
+				testInfo := getTestNodeInfo(f, itArg.getTestName(), desc)
 				// The latency caused by API QPS limit takes a large portion (up to ~33%) of e2e latency.
 				// It makes the pod startup latency of Kubelet (creation throughput as well) under-estimated.
 				// Here we set API QPS limit from default 5 to 60 in order to test real Kubelet performance.
@ -232,10 +232,10 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {

 		for _, testArg := range dTests {
 			itArg := testArg
-			It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods",
-				itArg.podsNr, itArg.bgPodsNr), func() {
+			desc := fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods", itArg.podsNr, itArg.bgPodsNr)
+			It(desc, func() {
 				itArg.createMethod = "sequence"
-				testInfo := getTestNodeInfo(f, itArg.getTestName())
+				testInfo := getTestNodeInfo(f, itArg.getTestName(), desc)
 				batchlag, e2eLags := runDensitySeqTest(f, rc, itArg, testInfo)

 				By("Verifying latency")
@ -265,10 +265,10 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {

 		for _, testArg := range dTests {
 			itArg := testArg
-			It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods [Benchmark]",
-				itArg.podsNr, itArg.bgPodsNr), func() {
+			desc := fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods [Benchmark]", itArg.podsNr, itArg.bgPodsNr)
+			It(desc, func() {
 				itArg.createMethod = "sequence"
-				testInfo := getTestNodeInfo(f, itArg.getTestName())
+				testInfo := getTestNodeInfo(f, itArg.getTestName(), desc)
 				batchlag, e2eLags := runDensitySeqTest(f, rc, itArg, testInfo)

 				By("Verifying latency")
@ -551,7 +551,7 @@ func logAndVerifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyD
 	podCreateLatency := framework.PodStartupLatency{Latency: framework.ExtractLatencyMetrics(e2eLags)}

 	// log latency perf data
-	framework.PrintPerfData(getLatencyPerfData(podCreateLatency.Latency, testInfo))
+	logPerfData(getLatencyPerfData(podCreateLatency.Latency, testInfo), "latency")

 	if isVerify {
 		// check whether e2e pod startup time is acceptable.
@ -567,7 +567,7 @@ func logAndVerifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyD

 // logThroughput calculates and logs pod creation throughput.
 func logPodCreateThroughput(batchLag time.Duration, e2eLags []framework.PodLatencyData, podsNr int, testInfo map[string]string) {
-	framework.PrintPerfData(getThroughputPerfData(batchLag, e2eLags, podsNr, testInfo))
+	logPerfData(getThroughputPerfData(batchLag, e2eLags, podsNr, testInfo), "throughput")
 }

 // increaseKubeletAPIQPSLimit sets Kubelet API QPS via ConfigMap. Kubelet will restart with the new QPS.
--- a/test/e2e_node/gubernator.sh
+++ b/test/e2e_node/gubernator.sh
@ -119,7 +119,7 @@ for upload_attempt in $(seq 3); do
  if [[ -d "${ARTIFACTS}" && -n $(ls -A "${ARTIFACTS}") ]]; then
    V=2 kube::log::status "Uploading artifacts"
    gsutil -m -q -o "GSUtil:use_magicfile=True" cp -a "${gcs_acl}" -r -c \
-      -z log,xml,xml "${ARTIFACTS}" "${GCS_LOGS_PATH}/artifacts" || continue
+      -z log,xml,json "${ARTIFACTS}" "${GCS_LOGS_PATH}/artifacts" || continue
  fi
  break
 done
--- a/test/e2e_node/perftype/BUILD
+++ b/test/e2e_node/perftype/BUILD
@ -0,0 +1,27 @@
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])
+
+load(
+    "@io_bazel_rules_go//go:def.bzl",
+    "go_library",
+)
+
+go_library(
+    name = "go_default_library",
+    srcs = ["perftype.go"],
+    tags = ["automanaged"],
+)
+
+filegroup(
+    name = "package-srcs",
+    srcs = glob(["**"]),
+    tags = ["automanaged"],
+    visibility = ["//visibility:private"],
+)
+
+filegroup(
+    name = "all-srcs",
+    srcs = [":package-srcs"],
+    tags = ["automanaged"],
+)
--- a/test/e2e_node/perftype/perftype.go
+++ b/test/e2e_node/perftype/perftype.go
@ -0,0 +1,34 @@
+/*
+Copyright 2017 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package perftype
+
+// ResourceSeries defines the time series of the resource usage.
+type ResourceSeries struct {
+	Timestamp            []int64           `json:"ts"`
+	CPUUsageInMilliCores []int64           `json:"cpu"`
+	MemoryRSSInMegaBytes []int64           `json:"memory"`
+	Units                map[string]string `json:"unit"`
+}
+
+// NodeTimeSeries defines the time series of the operations and the resource
+// usage.
+type NodeTimeSeries struct {
+	OperationData map[string][]int64         `json:"op_series,omitempty"`
+	ResourceData  map[string]*ResourceSeries `json:"resource_series,omitempty"`
+	Labels        map[string]string          `json:"labels"`
+	Version       string                     `json:"version"`
+}
--- a/test/e2e_node/remote/remote.go
+++ b/test/e2e_node/remote/remote.go
@ -143,6 +143,13 @@ func getTestArtifacts(host, testDir string) error {
 	if err != nil {
 		return err
 	}
+	// Copy json files (if any) to artifacts.
+	if _, err = SSH(host, "ls", fmt.Sprintf("%s/results/*.json", testDir)); err == nil {
+		_, err = runSSHCommand("scp", "-r", fmt.Sprintf("%s:%s/results/*.json", GetHostnameOrIp(host), testDir), *resultsDir)
+		if err != nil {
+			return err
+		}
+	}
 	// Copy junit to the top of artifacts
 	_, err = runSSHCommand("scp", fmt.Sprintf("%s:%s/results/junit*", GetHostnameOrIp(host), testDir), *resultsDir)
 	if err != nil {
--- a/test/e2e_node/resource_collector.go
+++ b/test/e2e_node/resource_collector.go
@ -43,6 +43,7 @@ import (
 	stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
 	"k8s.io/kubernetes/pkg/util/procfs"
 	"k8s.io/kubernetes/test/e2e/framework"
+	"k8s.io/kubernetes/test/e2e_node/perftype"

 	. "github.com/onsi/gomega"
 )
@ -440,19 +441,11 @@ func newTestPods(numPods int, volume bool, imageName, podType string) []*v1.Pod
 	return pods
 }

-// Time series of resource usage
-type ResourceSeries struct {
-	Timestamp            []int64           `json:"ts"`
-	CPUUsageInMilliCores []int64           `json:"cpu"`
-	MemoryRSSInMegaBytes []int64           `json:"memory"`
-	Units                map[string]string `json:"unit"`
-}
-
 // GetResourceSeriesWithLabels gets the time series of resource usage of each container.
-func (r *ResourceCollector) GetResourceTimeSeries() map[string]*ResourceSeries {
-	resourceSeries := make(map[string]*ResourceSeries)
+func (r *ResourceCollector) GetResourceTimeSeries() map[string]*perftype.ResourceSeries {
+	resourceSeries := make(map[string]*perftype.ResourceSeries)
 	for key, name := range systemContainers {
-		newSeries := &ResourceSeries{Units: map[string]string{
+		newSeries := &perftype.ResourceSeries{Units: map[string]string{
 			"cpu":    "mCPU",
 			"memory": "MB",
 		}}
--- a/test/e2e_node/resource_usage_test.go
+++ b/test/e2e_node/resource_usage_test.go
@ -79,9 +79,9 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() {

 		for _, testArg := range rTests {
 			itArg := testArg
-
-			It(fmt.Sprintf("resource tracking for %d pods per node", itArg.podsNr), func() {
-				testInfo := getTestNodeInfo(f, itArg.getTestName())
+			desc := fmt.Sprintf("resource tracking for %d pods per node", itArg.podsNr)
+			It(desc, func() {
+				testInfo := getTestNodeInfo(f, itArg.getTestName(), desc)

 				runResourceUsageTest(f, rc, itArg)

@ -109,9 +109,9 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() {

 		for _, testArg := range rTests {
 			itArg := testArg
-
-			It(fmt.Sprintf("resource tracking for %d pods per node [Benchmark]", itArg.podsNr), func() {
-				testInfo := getTestNodeInfo(f, itArg.getTestName())
+			desc := fmt.Sprintf("resource tracking for %d pods per node [Benchmark]", itArg.podsNr)
+			It(desc, func() {
+				testInfo := getTestNodeInfo(f, itArg.getTestName(), desc)

 				runResourceUsageTest(f, rc, itArg)

@ -202,8 +202,8 @@ func logAndVerifyResource(f *framework.Framework, rc *ResourceCollector, cpuLimi
 	cpuSummaryPerNode[nodeName] = cpuSummary

 	// Print resource usage
-	framework.PrintPerfData(framework.ResourceUsageToPerfDataWithLabels(usagePerNode, testInfo))
-	framework.PrintPerfData(framework.CPUUsageToPerfDataWithLabels(cpuSummaryPerNode, testInfo))
+	logPerfData(framework.ResourceUsageToPerfDataWithLabels(usagePerNode, testInfo), "memory")
+	logPerfData(framework.CPUUsageToPerfDataWithLabels(cpuSummaryPerNode, testInfo), "cpu")

 	// Verify resource usage
 	if isVerify {