From 29b2db5af3ce1a8ad8144a24c5ba209ce5c9f2d8 Mon Sep 17 00:00:00 2001 From: Yang Guo Date: Fri, 9 Jun 2017 09:56:44 -0700 Subject: [PATCH] Logs node e2e perf data to standalone json files --- hack/.linted_packages | 1 + test/e2e/framework/perf_util.go | 11 ++--- test/e2e_node/BUILD | 2 + test/e2e_node/benchmark_util.go | 66 ++++++++++++++++++++-------- test/e2e_node/density_test.go | 34 +++++++------- test/e2e_node/gubernator.sh | 2 +- test/e2e_node/perftype/BUILD | 27 ++++++++++++ test/e2e_node/perftype/perftype.go | 34 ++++++++++++++ test/e2e_node/remote/remote.go | 7 +++ test/e2e_node/resource_collector.go | 15 ++----- test/e2e_node/resource_usage_test.go | 16 +++---- 11 files changed, 154 insertions(+), 61 deletions(-) create mode 100644 test/e2e_node/perftype/BUILD create mode 100644 test/e2e_node/perftype/perftype.go diff --git a/hack/.linted_packages b/hack/.linted_packages index 59299ce553..4426a9bdff 100644 --- a/hack/.linted_packages +++ b/hack/.linted_packages @@ -452,6 +452,7 @@ staging/src/k8s.io/metrics/pkg/apis/metrics/install staging/src/k8s.io/sample-apiserver staging/src/k8s.io/sample-apiserver/pkg/apis/wardle/install test/e2e/perftype +test/e2e_node/perftype test/e2e_node/runner/local test/images/clusterapi-tester test/images/entrypoint-tester diff --git a/test/e2e/framework/perf_util.go b/test/e2e/framework/perf_util.go index 8f643a8663..538a67e7b3 100644 --- a/test/e2e/framework/perf_util.go +++ b/test/e2e/framework/perf_util.go @@ -71,9 +71,10 @@ func PodStartupLatencyToPerfData(latency *PodStartupLatency) *perftype.PerfData return perfData } -// currentKubeletPerfMetricsVersion is the current kubelet performance metrics version. We should -// bump up the version each time we make incompatible change to the metrics. -const currentKubeletPerfMetricsVersion = "v2" +// CurrentKubeletPerfMetricsVersion is the current kubelet performance metrics +// version. This is used by mutiple perf related data structures. We should +// bump up the version each time we make an incompatible change to the metrics. +const CurrentKubeletPerfMetricsVersion = "v2" // ResourceUsageToPerfData transforms ResourceUsagePerNode to PerfData. Notice that this function // only cares about memory usage, because cpu usage information will be extracted from NodesCPUSummary. @@ -119,7 +120,7 @@ func ResourceUsageToPerfDataWithLabels(usagePerNode ResourceUsagePerNode, labels } } return &perftype.PerfData{ - Version: currentKubeletPerfMetricsVersion, + Version: CurrentKubeletPerfMetricsVersion, DataItems: items, Labels: labels, } @@ -149,7 +150,7 @@ func CPUUsageToPerfDataWithLabels(usagePerNode NodesCPUSummary, labels map[strin } } return &perftype.PerfData{ - Version: currentKubeletPerfMetricsVersion, + Version: CurrentKubeletPerfMetricsVersion, DataItems: items, Labels: labels, } diff --git a/test/e2e_node/BUILD b/test/e2e_node/BUILD index b8956ab138..991ebff095 100644 --- a/test/e2e_node/BUILD +++ b/test/e2e_node/BUILD @@ -41,6 +41,7 @@ go_library( "//test/e2e/common:go_default_library", "//test/e2e/framework:go_default_library", "//test/e2e/perftype:go_default_library", + "//test/e2e_node/perftype:go_default_library", "//vendor/github.com/golang/glog:go_default_library", "//vendor/github.com/google/cadvisor/client/v2:go_default_library", "//vendor/github.com/google/cadvisor/info/v2:go_default_library", @@ -163,6 +164,7 @@ filegroup( ":package-srcs", "//test/e2e_node/builder:all-srcs", "//test/e2e_node/environment:all-srcs", + "//test/e2e_node/perftype:all-srcs", "//test/e2e_node/remote:all-srcs", "//test/e2e_node/runner/local:all-srcs", "//test/e2e_node/runner/remote:all-srcs", diff --git a/test/e2e_node/benchmark_util.go b/test/e2e_node/benchmark_util.go index fd30d46dd2..4cc9322e44 100644 --- a/test/e2e_node/benchmark_util.go +++ b/test/e2e_node/benchmark_util.go @@ -20,36 +20,57 @@ package e2e_node import ( "fmt" + "io/ioutil" + "path" "sort" + "strconv" "time" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/kubernetes/test/e2e/framework" "k8s.io/kubernetes/test/e2e/perftype" + nodeperftype "k8s.io/kubernetes/test/e2e_node/perftype" . "github.com/onsi/gomega" ) const ( - // TODO(coufon): be consistent with perf_util.go version - currentDataVersion = "v1" - TimeSeriesTag = "[Result:TimeSeries]" - TimeSeriesEnd = "[Finish:TimeSeries]" + TimeSeriesTag = "[Result:TimeSeries]" + TimeSeriesEnd = "[Finish:TimeSeries]" ) -type NodeTimeSeries struct { - // value in OperationData is an array of timestamps - OperationData map[string][]int64 `json:"op_series,omitempty"` - ResourceData map[string]*ResourceSeries `json:"resource_series,omitempty"` - Labels map[string]string `json:"labels"` - Version string `json:"version"` +// dumpDataToFile inserts the current timestamp into the labels and writes the +// data for the test into the file with the specified prefix. +func dumpDataToFile(data interface{}, labels map[string]string, prefix string) { + testName := labels["test"] + fileName := path.Join(framework.TestContext.ReportDir, fmt.Sprintf("%s-%s.json", prefix, testName)) + labels["timestamp"] = strconv.FormatInt(time.Now().UTC().Unix(), 10) + framework.Logf("Dumping perf data for test %q to %q.", testName, fileName) + if err := ioutil.WriteFile(fileName, []byte(framework.PrettyPrintJSON(data)), 0644); err != nil { + framework.Logf("Failed to write perf data for test %q to %q: %v", testName, fileName, err) + } } -// logDensityTimeSeries logs the time series data of operation and resource usage +// logPerfData writes the perf data to a standalone json file if the +// framework.TestContext.ReportDir is non-empty, or to the general build log +// otherwise. The perfType identifies which type of the perf data it is, such +// as "cpu" and "memory". If an error occurs, no perf data will be logged. +func logPerfData(p *perftype.PerfData, perfType string) { + if framework.TestContext.ReportDir == "" { + framework.PrintPerfData(p) + return + } + dumpDataToFile(p, p.Labels, "performance-"+perfType) +} + +// logDensityTimeSeries writes the time series data of operation and resource +// usage to a standalone json file if the framework.TestContext.ReportDir is +// non-empty, or to the general build log otherwise. If an error occurs, +// no perf data will be logged. func logDensityTimeSeries(rc *ResourceCollector, create, watch map[string]metav1.Time, testInfo map[string]string) { - timeSeries := &NodeTimeSeries{ + timeSeries := &nodeperftype.NodeTimeSeries{ Labels: testInfo, - Version: currentDataVersion, + Version: framework.CurrentKubeletPerfMetricsVersion, } // Attach operation time series. timeSeries.OperationData = map[string][]int64{ @@ -58,8 +79,12 @@ func logDensityTimeSeries(rc *ResourceCollector, create, watch map[string]metav1 } // Attach resource time series. timeSeries.ResourceData = rc.GetResourceTimeSeries() - // Log time series with tags - framework.Logf("%s %s\n%s", TimeSeriesTag, framework.PrettyPrintJSON(timeSeries), TimeSeriesEnd) + + if framework.TestContext.ReportDir == "" { + framework.Logf("%s %s\n%s", TimeSeriesTag, framework.PrettyPrintJSON(timeSeries), TimeSeriesEnd) + return + } + dumpDataToFile(timeSeries, timeSeries.Labels, "time_series") } type int64arr []int64 @@ -82,7 +107,7 @@ func getCumulatedPodTimeSeries(timePerPod map[string]metav1.Time) []int64 { // getLatencyPerfData returns perf data of pod startup latency. func getLatencyPerfData(latency framework.LatencyMetric, testInfo map[string]string) *perftype.PerfData { return &perftype.PerfData{ - Version: currentDataVersion, + Version: framework.CurrentKubeletPerfMetricsVersion, DataItems: []perftype.DataItem{ { Data: map[string]float64{ @@ -105,7 +130,7 @@ func getLatencyPerfData(latency framework.LatencyMetric, testInfo map[string]str // getThroughputPerfData returns perf data of pod creation startup throughput. func getThroughputPerfData(batchLag time.Duration, e2eLags []framework.PodLatencyData, podsNr int, testInfo map[string]string) *perftype.PerfData { return &perftype.PerfData{ - Version: currentDataVersion, + Version: framework.CurrentKubeletPerfMetricsVersion, DataItems: []perftype.DataItem{ { Data: map[string]float64{ @@ -123,8 +148,10 @@ func getThroughputPerfData(batchLag time.Duration, e2eLags []framework.PodLatenc } } -// getTestNodeInfo fetches the capacity of a node from API server and returns a map of labels. -func getTestNodeInfo(f *framework.Framework, testName string) map[string]string { +// getTestNodeInfo returns a label map containing the test name and +// description, the name of the node on which the test will be run, the image +// name of the node, and the node capacities. +func getTestNodeInfo(f *framework.Framework, testName, testDesc string) map[string]string { nodeName := framework.TestContext.NodeName node, err := f.ClientSet.Core().Nodes().Get(nodeName, metav1.GetOptions{}) Expect(err).NotTo(HaveOccurred()) @@ -154,5 +181,6 @@ func getTestNodeInfo(f *framework.Framework, testName string) map[string]string "test": testName, "image": node.Status.NodeInfo.OSImage, "machine": fmt.Sprintf("cpu:%dcore,memory:%.1fGB", cpuValue, float32(memoryValue)/(1024*1024*1024)), + "desc": testDesc, } } diff --git a/test/e2e_node/density_test.go b/test/e2e_node/density_test.go index f9c57ad1f9..4ec04d480e 100644 --- a/test/e2e_node/density_test.go +++ b/test/e2e_node/density_test.go @@ -94,10 +94,10 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() { for _, testArg := range dTests { itArg := testArg - It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval", - itArg.podsNr, itArg.interval), func() { + desc := fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval", itArg.podsNr, itArg.interval) + It(desc, func() { itArg.createMethod = "batch" - testInfo := getTestNodeInfo(f, itArg.getTestName()) + testInfo := getTestNodeInfo(f, itArg.getTestName(), desc) batchLag, e2eLags := runDensityBatchTest(f, rc, itArg, testInfo, false) @@ -152,10 +152,10 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() { for _, testArg := range dTests { itArg := testArg - It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval [Benchmark]", - itArg.podsNr, itArg.interval), func() { + desc := fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval [Benchmark]", itArg.podsNr, itArg.interval) + It(desc, func() { itArg.createMethod = "batch" - testInfo := getTestNodeInfo(f, itArg.getTestName()) + testInfo := getTestNodeInfo(f, itArg.getTestName(), desc) batchLag, e2eLags := runDensityBatchTest(f, rc, itArg, testInfo, true) @@ -189,10 +189,10 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() { for _, testArg := range dTests { itArg := testArg - It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval (QPS %d) [Benchmark]", - itArg.podsNr, itArg.interval, itArg.APIQPSLimit), func() { + desc := fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval (QPS %d) [Benchmark]", itArg.podsNr, itArg.interval, itArg.APIQPSLimit) + It(desc, func() { itArg.createMethod = "batch" - testInfo := getTestNodeInfo(f, itArg.getTestName()) + testInfo := getTestNodeInfo(f, itArg.getTestName(), desc) // The latency caused by API QPS limit takes a large portion (up to ~33%) of e2e latency. // It makes the pod startup latency of Kubelet (creation throughput as well) under-estimated. // Here we set API QPS limit from default 5 to 60 in order to test real Kubelet performance. @@ -232,10 +232,10 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() { for _, testArg := range dTests { itArg := testArg - It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods", - itArg.podsNr, itArg.bgPodsNr), func() { + desc := fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods", itArg.podsNr, itArg.bgPodsNr) + It(desc, func() { itArg.createMethod = "sequence" - testInfo := getTestNodeInfo(f, itArg.getTestName()) + testInfo := getTestNodeInfo(f, itArg.getTestName(), desc) batchlag, e2eLags := runDensitySeqTest(f, rc, itArg, testInfo) By("Verifying latency") @@ -265,10 +265,10 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() { for _, testArg := range dTests { itArg := testArg - It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods [Benchmark]", - itArg.podsNr, itArg.bgPodsNr), func() { + desc := fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods [Benchmark]", itArg.podsNr, itArg.bgPodsNr) + It(desc, func() { itArg.createMethod = "sequence" - testInfo := getTestNodeInfo(f, itArg.getTestName()) + testInfo := getTestNodeInfo(f, itArg.getTestName(), desc) batchlag, e2eLags := runDensitySeqTest(f, rc, itArg, testInfo) By("Verifying latency") @@ -551,7 +551,7 @@ func logAndVerifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyD podCreateLatency := framework.PodStartupLatency{Latency: framework.ExtractLatencyMetrics(e2eLags)} // log latency perf data - framework.PrintPerfData(getLatencyPerfData(podCreateLatency.Latency, testInfo)) + logPerfData(getLatencyPerfData(podCreateLatency.Latency, testInfo), "latency") if isVerify { // check whether e2e pod startup time is acceptable. @@ -567,7 +567,7 @@ func logAndVerifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyD // logThroughput calculates and logs pod creation throughput. func logPodCreateThroughput(batchLag time.Duration, e2eLags []framework.PodLatencyData, podsNr int, testInfo map[string]string) { - framework.PrintPerfData(getThroughputPerfData(batchLag, e2eLags, podsNr, testInfo)) + logPerfData(getThroughputPerfData(batchLag, e2eLags, podsNr, testInfo), "throughput") } // increaseKubeletAPIQPSLimit sets Kubelet API QPS via ConfigMap. Kubelet will restart with the new QPS. diff --git a/test/e2e_node/gubernator.sh b/test/e2e_node/gubernator.sh index 7a8c549497..6a41900c81 100755 --- a/test/e2e_node/gubernator.sh +++ b/test/e2e_node/gubernator.sh @@ -119,7 +119,7 @@ for upload_attempt in $(seq 3); do if [[ -d "${ARTIFACTS}" && -n $(ls -A "${ARTIFACTS}") ]]; then V=2 kube::log::status "Uploading artifacts" gsutil -m -q -o "GSUtil:use_magicfile=True" cp -a "${gcs_acl}" -r -c \ - -z log,xml,xml "${ARTIFACTS}" "${GCS_LOGS_PATH}/artifacts" || continue + -z log,xml,json "${ARTIFACTS}" "${GCS_LOGS_PATH}/artifacts" || continue fi break done diff --git a/test/e2e_node/perftype/BUILD b/test/e2e_node/perftype/BUILD new file mode 100644 index 0000000000..b815d9296d --- /dev/null +++ b/test/e2e_node/perftype/BUILD @@ -0,0 +1,27 @@ +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) + +load( + "@io_bazel_rules_go//go:def.bzl", + "go_library", +) + +go_library( + name = "go_default_library", + srcs = ["perftype.go"], + tags = ["automanaged"], +) + +filegroup( + name = "package-srcs", + srcs = glob(["**"]), + tags = ["automanaged"], + visibility = ["//visibility:private"], +) + +filegroup( + name = "all-srcs", + srcs = [":package-srcs"], + tags = ["automanaged"], +) diff --git a/test/e2e_node/perftype/perftype.go b/test/e2e_node/perftype/perftype.go new file mode 100644 index 0000000000..bedd2e118f --- /dev/null +++ b/test/e2e_node/perftype/perftype.go @@ -0,0 +1,34 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package perftype + +// ResourceSeries defines the time series of the resource usage. +type ResourceSeries struct { + Timestamp []int64 `json:"ts"` + CPUUsageInMilliCores []int64 `json:"cpu"` + MemoryRSSInMegaBytes []int64 `json:"memory"` + Units map[string]string `json:"unit"` +} + +// NodeTimeSeries defines the time series of the operations and the resource +// usage. +type NodeTimeSeries struct { + OperationData map[string][]int64 `json:"op_series,omitempty"` + ResourceData map[string]*ResourceSeries `json:"resource_series,omitempty"` + Labels map[string]string `json:"labels"` + Version string `json:"version"` +} diff --git a/test/e2e_node/remote/remote.go b/test/e2e_node/remote/remote.go index decf23d9df..9b6d0b9b7b 100644 --- a/test/e2e_node/remote/remote.go +++ b/test/e2e_node/remote/remote.go @@ -143,6 +143,13 @@ func getTestArtifacts(host, testDir string) error { if err != nil { return err } + // Copy json files (if any) to artifacts. + if _, err = SSH(host, "ls", fmt.Sprintf("%s/results/*.json", testDir)); err == nil { + _, err = runSSHCommand("scp", "-r", fmt.Sprintf("%s:%s/results/*.json", GetHostnameOrIp(host), testDir), *resultsDir) + if err != nil { + return err + } + } // Copy junit to the top of artifacts _, err = runSSHCommand("scp", fmt.Sprintf("%s:%s/results/junit*", GetHostnameOrIp(host), testDir), *resultsDir) if err != nil { diff --git a/test/e2e_node/resource_collector.go b/test/e2e_node/resource_collector.go index 14e949266a..81c2db7a8c 100644 --- a/test/e2e_node/resource_collector.go +++ b/test/e2e_node/resource_collector.go @@ -43,6 +43,7 @@ import ( stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1" "k8s.io/kubernetes/pkg/util/procfs" "k8s.io/kubernetes/test/e2e/framework" + "k8s.io/kubernetes/test/e2e_node/perftype" . "github.com/onsi/gomega" ) @@ -440,19 +441,11 @@ func newTestPods(numPods int, volume bool, imageName, podType string) []*v1.Pod return pods } -// Time series of resource usage -type ResourceSeries struct { - Timestamp []int64 `json:"ts"` - CPUUsageInMilliCores []int64 `json:"cpu"` - MemoryRSSInMegaBytes []int64 `json:"memory"` - Units map[string]string `json:"unit"` -} - // GetResourceSeriesWithLabels gets the time series of resource usage of each container. -func (r *ResourceCollector) GetResourceTimeSeries() map[string]*ResourceSeries { - resourceSeries := make(map[string]*ResourceSeries) +func (r *ResourceCollector) GetResourceTimeSeries() map[string]*perftype.ResourceSeries { + resourceSeries := make(map[string]*perftype.ResourceSeries) for key, name := range systemContainers { - newSeries := &ResourceSeries{Units: map[string]string{ + newSeries := &perftype.ResourceSeries{Units: map[string]string{ "cpu": "mCPU", "memory": "MB", }} diff --git a/test/e2e_node/resource_usage_test.go b/test/e2e_node/resource_usage_test.go index bdb254f21a..b0c76ed46a 100644 --- a/test/e2e_node/resource_usage_test.go +++ b/test/e2e_node/resource_usage_test.go @@ -79,9 +79,9 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() { for _, testArg := range rTests { itArg := testArg - - It(fmt.Sprintf("resource tracking for %d pods per node", itArg.podsNr), func() { - testInfo := getTestNodeInfo(f, itArg.getTestName()) + desc := fmt.Sprintf("resource tracking for %d pods per node", itArg.podsNr) + It(desc, func() { + testInfo := getTestNodeInfo(f, itArg.getTestName(), desc) runResourceUsageTest(f, rc, itArg) @@ -109,9 +109,9 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() { for _, testArg := range rTests { itArg := testArg - - It(fmt.Sprintf("resource tracking for %d pods per node [Benchmark]", itArg.podsNr), func() { - testInfo := getTestNodeInfo(f, itArg.getTestName()) + desc := fmt.Sprintf("resource tracking for %d pods per node [Benchmark]", itArg.podsNr) + It(desc, func() { + testInfo := getTestNodeInfo(f, itArg.getTestName(), desc) runResourceUsageTest(f, rc, itArg) @@ -202,8 +202,8 @@ func logAndVerifyResource(f *framework.Framework, rc *ResourceCollector, cpuLimi cpuSummaryPerNode[nodeName] = cpuSummary // Print resource usage - framework.PrintPerfData(framework.ResourceUsageToPerfDataWithLabels(usagePerNode, testInfo)) - framework.PrintPerfData(framework.CPUUsageToPerfDataWithLabels(cpuSummaryPerNode, testInfo)) + logPerfData(framework.ResourceUsageToPerfDataWithLabels(usagePerNode, testInfo), "memory") + logPerfData(framework.CPUUsageToPerfDataWithLabels(cpuSummaryPerNode, testInfo), "cpu") // Verify resource usage if isVerify {