Logs node e2e perf data to standalone json files

pull/6/head
Yang Guo 2017-06-09 09:56:44 -07:00
parent 3837d95191
commit 29b2db5af3
11 changed files with 154 additions and 61 deletions

View File

@ -452,6 +452,7 @@ staging/src/k8s.io/metrics/pkg/apis/metrics/install
staging/src/k8s.io/sample-apiserver
staging/src/k8s.io/sample-apiserver/pkg/apis/wardle/install
test/e2e/perftype
test/e2e_node/perftype
test/e2e_node/runner/local
test/images/clusterapi-tester
test/images/entrypoint-tester

View File

@ -71,9 +71,10 @@ func PodStartupLatencyToPerfData(latency *PodStartupLatency) *perftype.PerfData
return perfData
}
// currentKubeletPerfMetricsVersion is the current kubelet performance metrics version. We should
// bump up the version each time we make incompatible change to the metrics.
const currentKubeletPerfMetricsVersion = "v2"
// CurrentKubeletPerfMetricsVersion is the current kubelet performance metrics
// version. This is used by mutiple perf related data structures. We should
// bump up the version each time we make an incompatible change to the metrics.
const CurrentKubeletPerfMetricsVersion = "v2"
// ResourceUsageToPerfData transforms ResourceUsagePerNode to PerfData. Notice that this function
// only cares about memory usage, because cpu usage information will be extracted from NodesCPUSummary.
@ -119,7 +120,7 @@ func ResourceUsageToPerfDataWithLabels(usagePerNode ResourceUsagePerNode, labels
}
}
return &perftype.PerfData{
Version: currentKubeletPerfMetricsVersion,
Version: CurrentKubeletPerfMetricsVersion,
DataItems: items,
Labels: labels,
}
@ -149,7 +150,7 @@ func CPUUsageToPerfDataWithLabels(usagePerNode NodesCPUSummary, labels map[strin
}
}
return &perftype.PerfData{
Version: currentKubeletPerfMetricsVersion,
Version: CurrentKubeletPerfMetricsVersion,
DataItems: items,
Labels: labels,
}

View File

@ -41,6 +41,7 @@ go_library(
"//test/e2e/common:go_default_library",
"//test/e2e/framework:go_default_library",
"//test/e2e/perftype:go_default_library",
"//test/e2e_node/perftype:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/github.com/google/cadvisor/client/v2:go_default_library",
"//vendor/github.com/google/cadvisor/info/v2:go_default_library",
@ -163,6 +164,7 @@ filegroup(
":package-srcs",
"//test/e2e_node/builder:all-srcs",
"//test/e2e_node/environment:all-srcs",
"//test/e2e_node/perftype:all-srcs",
"//test/e2e_node/remote:all-srcs",
"//test/e2e_node/runner/local:all-srcs",
"//test/e2e_node/runner/remote:all-srcs",

View File

@ -20,36 +20,57 @@ package e2e_node
import (
"fmt"
"io/ioutil"
"path"
"sort"
"strconv"
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/kubernetes/test/e2e/framework"
"k8s.io/kubernetes/test/e2e/perftype"
nodeperftype "k8s.io/kubernetes/test/e2e_node/perftype"
. "github.com/onsi/gomega"
)
const (
// TODO(coufon): be consistent with perf_util.go version
currentDataVersion = "v1"
TimeSeriesTag = "[Result:TimeSeries]"
TimeSeriesEnd = "[Finish:TimeSeries]"
TimeSeriesTag = "[Result:TimeSeries]"
TimeSeriesEnd = "[Finish:TimeSeries]"
)
type NodeTimeSeries struct {
// value in OperationData is an array of timestamps
OperationData map[string][]int64 `json:"op_series,omitempty"`
ResourceData map[string]*ResourceSeries `json:"resource_series,omitempty"`
Labels map[string]string `json:"labels"`
Version string `json:"version"`
// dumpDataToFile inserts the current timestamp into the labels and writes the
// data for the test into the file with the specified prefix.
func dumpDataToFile(data interface{}, labels map[string]string, prefix string) {
testName := labels["test"]
fileName := path.Join(framework.TestContext.ReportDir, fmt.Sprintf("%s-%s.json", prefix, testName))
labels["timestamp"] = strconv.FormatInt(time.Now().UTC().Unix(), 10)
framework.Logf("Dumping perf data for test %q to %q.", testName, fileName)
if err := ioutil.WriteFile(fileName, []byte(framework.PrettyPrintJSON(data)), 0644); err != nil {
framework.Logf("Failed to write perf data for test %q to %q: %v", testName, fileName, err)
}
}
// logDensityTimeSeries logs the time series data of operation and resource usage
// logPerfData writes the perf data to a standalone json file if the
// framework.TestContext.ReportDir is non-empty, or to the general build log
// otherwise. The perfType identifies which type of the perf data it is, such
// as "cpu" and "memory". If an error occurs, no perf data will be logged.
func logPerfData(p *perftype.PerfData, perfType string) {
if framework.TestContext.ReportDir == "" {
framework.PrintPerfData(p)
return
}
dumpDataToFile(p, p.Labels, "performance-"+perfType)
}
// logDensityTimeSeries writes the time series data of operation and resource
// usage to a standalone json file if the framework.TestContext.ReportDir is
// non-empty, or to the general build log otherwise. If an error occurs,
// no perf data will be logged.
func logDensityTimeSeries(rc *ResourceCollector, create, watch map[string]metav1.Time, testInfo map[string]string) {
timeSeries := &NodeTimeSeries{
timeSeries := &nodeperftype.NodeTimeSeries{
Labels: testInfo,
Version: currentDataVersion,
Version: framework.CurrentKubeletPerfMetricsVersion,
}
// Attach operation time series.
timeSeries.OperationData = map[string][]int64{
@ -58,8 +79,12 @@ func logDensityTimeSeries(rc *ResourceCollector, create, watch map[string]metav1
}
// Attach resource time series.
timeSeries.ResourceData = rc.GetResourceTimeSeries()
// Log time series with tags
framework.Logf("%s %s\n%s", TimeSeriesTag, framework.PrettyPrintJSON(timeSeries), TimeSeriesEnd)
if framework.TestContext.ReportDir == "" {
framework.Logf("%s %s\n%s", TimeSeriesTag, framework.PrettyPrintJSON(timeSeries), TimeSeriesEnd)
return
}
dumpDataToFile(timeSeries, timeSeries.Labels, "time_series")
}
type int64arr []int64
@ -82,7 +107,7 @@ func getCumulatedPodTimeSeries(timePerPod map[string]metav1.Time) []int64 {
// getLatencyPerfData returns perf data of pod startup latency.
func getLatencyPerfData(latency framework.LatencyMetric, testInfo map[string]string) *perftype.PerfData {
return &perftype.PerfData{
Version: currentDataVersion,
Version: framework.CurrentKubeletPerfMetricsVersion,
DataItems: []perftype.DataItem{
{
Data: map[string]float64{
@ -105,7 +130,7 @@ func getLatencyPerfData(latency framework.LatencyMetric, testInfo map[string]str
// getThroughputPerfData returns perf data of pod creation startup throughput.
func getThroughputPerfData(batchLag time.Duration, e2eLags []framework.PodLatencyData, podsNr int, testInfo map[string]string) *perftype.PerfData {
return &perftype.PerfData{
Version: currentDataVersion,
Version: framework.CurrentKubeletPerfMetricsVersion,
DataItems: []perftype.DataItem{
{
Data: map[string]float64{
@ -123,8 +148,10 @@ func getThroughputPerfData(batchLag time.Duration, e2eLags []framework.PodLatenc
}
}
// getTestNodeInfo fetches the capacity of a node from API server and returns a map of labels.
func getTestNodeInfo(f *framework.Framework, testName string) map[string]string {
// getTestNodeInfo returns a label map containing the test name and
// description, the name of the node on which the test will be run, the image
// name of the node, and the node capacities.
func getTestNodeInfo(f *framework.Framework, testName, testDesc string) map[string]string {
nodeName := framework.TestContext.NodeName
node, err := f.ClientSet.Core().Nodes().Get(nodeName, metav1.GetOptions{})
Expect(err).NotTo(HaveOccurred())
@ -154,5 +181,6 @@ func getTestNodeInfo(f *framework.Framework, testName string) map[string]string
"test": testName,
"image": node.Status.NodeInfo.OSImage,
"machine": fmt.Sprintf("cpu:%dcore,memory:%.1fGB", cpuValue, float32(memoryValue)/(1024*1024*1024)),
"desc": testDesc,
}
}

View File

@ -94,10 +94,10 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {
for _, testArg := range dTests {
itArg := testArg
It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval",
itArg.podsNr, itArg.interval), func() {
desc := fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval", itArg.podsNr, itArg.interval)
It(desc, func() {
itArg.createMethod = "batch"
testInfo := getTestNodeInfo(f, itArg.getTestName())
testInfo := getTestNodeInfo(f, itArg.getTestName(), desc)
batchLag, e2eLags := runDensityBatchTest(f, rc, itArg, testInfo, false)
@ -152,10 +152,10 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {
for _, testArg := range dTests {
itArg := testArg
It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval [Benchmark]",
itArg.podsNr, itArg.interval), func() {
desc := fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval [Benchmark]", itArg.podsNr, itArg.interval)
It(desc, func() {
itArg.createMethod = "batch"
testInfo := getTestNodeInfo(f, itArg.getTestName())
testInfo := getTestNodeInfo(f, itArg.getTestName(), desc)
batchLag, e2eLags := runDensityBatchTest(f, rc, itArg, testInfo, true)
@ -189,10 +189,10 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {
for _, testArg := range dTests {
itArg := testArg
It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval (QPS %d) [Benchmark]",
itArg.podsNr, itArg.interval, itArg.APIQPSLimit), func() {
desc := fmt.Sprintf("latency/resource should be within limit when create %d pods with %v interval (QPS %d) [Benchmark]", itArg.podsNr, itArg.interval, itArg.APIQPSLimit)
It(desc, func() {
itArg.createMethod = "batch"
testInfo := getTestNodeInfo(f, itArg.getTestName())
testInfo := getTestNodeInfo(f, itArg.getTestName(), desc)
// The latency caused by API QPS limit takes a large portion (up to ~33%) of e2e latency.
// It makes the pod startup latency of Kubelet (creation throughput as well) under-estimated.
// Here we set API QPS limit from default 5 to 60 in order to test real Kubelet performance.
@ -232,10 +232,10 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {
for _, testArg := range dTests {
itArg := testArg
It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods",
itArg.podsNr, itArg.bgPodsNr), func() {
desc := fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods", itArg.podsNr, itArg.bgPodsNr)
It(desc, func() {
itArg.createMethod = "sequence"
testInfo := getTestNodeInfo(f, itArg.getTestName())
testInfo := getTestNodeInfo(f, itArg.getTestName(), desc)
batchlag, e2eLags := runDensitySeqTest(f, rc, itArg, testInfo)
By("Verifying latency")
@ -265,10 +265,10 @@ var _ = framework.KubeDescribe("Density [Serial] [Slow]", func() {
for _, testArg := range dTests {
itArg := testArg
It(fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods [Benchmark]",
itArg.podsNr, itArg.bgPodsNr), func() {
desc := fmt.Sprintf("latency/resource should be within limit when create %d pods with %d background pods [Benchmark]", itArg.podsNr, itArg.bgPodsNr)
It(desc, func() {
itArg.createMethod = "sequence"
testInfo := getTestNodeInfo(f, itArg.getTestName())
testInfo := getTestNodeInfo(f, itArg.getTestName(), desc)
batchlag, e2eLags := runDensitySeqTest(f, rc, itArg, testInfo)
By("Verifying latency")
@ -551,7 +551,7 @@ func logAndVerifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyD
podCreateLatency := framework.PodStartupLatency{Latency: framework.ExtractLatencyMetrics(e2eLags)}
// log latency perf data
framework.PrintPerfData(getLatencyPerfData(podCreateLatency.Latency, testInfo))
logPerfData(getLatencyPerfData(podCreateLatency.Latency, testInfo), "latency")
if isVerify {
// check whether e2e pod startup time is acceptable.
@ -567,7 +567,7 @@ func logAndVerifyLatency(batchLag time.Duration, e2eLags []framework.PodLatencyD
// logThroughput calculates and logs pod creation throughput.
func logPodCreateThroughput(batchLag time.Duration, e2eLags []framework.PodLatencyData, podsNr int, testInfo map[string]string) {
framework.PrintPerfData(getThroughputPerfData(batchLag, e2eLags, podsNr, testInfo))
logPerfData(getThroughputPerfData(batchLag, e2eLags, podsNr, testInfo), "throughput")
}
// increaseKubeletAPIQPSLimit sets Kubelet API QPS via ConfigMap. Kubelet will restart with the new QPS.

View File

@ -119,7 +119,7 @@ for upload_attempt in $(seq 3); do
if [[ -d "${ARTIFACTS}" && -n $(ls -A "${ARTIFACTS}") ]]; then
V=2 kube::log::status "Uploading artifacts"
gsutil -m -q -o "GSUtil:use_magicfile=True" cp -a "${gcs_acl}" -r -c \
-z log,xml,xml "${ARTIFACTS}" "${GCS_LOGS_PATH}/artifacts" || continue
-z log,xml,json "${ARTIFACTS}" "${GCS_LOGS_PATH}/artifacts" || continue
fi
break
done

View File

@ -0,0 +1,27 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["perftype.go"],
tags = ["automanaged"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
)

View File

@ -0,0 +1,34 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package perftype
// ResourceSeries defines the time series of the resource usage.
type ResourceSeries struct {
Timestamp []int64 `json:"ts"`
CPUUsageInMilliCores []int64 `json:"cpu"`
MemoryRSSInMegaBytes []int64 `json:"memory"`
Units map[string]string `json:"unit"`
}
// NodeTimeSeries defines the time series of the operations and the resource
// usage.
type NodeTimeSeries struct {
OperationData map[string][]int64 `json:"op_series,omitempty"`
ResourceData map[string]*ResourceSeries `json:"resource_series,omitempty"`
Labels map[string]string `json:"labels"`
Version string `json:"version"`
}

View File

@ -143,6 +143,13 @@ func getTestArtifacts(host, testDir string) error {
if err != nil {
return err
}
// Copy json files (if any) to artifacts.
if _, err = SSH(host, "ls", fmt.Sprintf("%s/results/*.json", testDir)); err == nil {
_, err = runSSHCommand("scp", "-r", fmt.Sprintf("%s:%s/results/*.json", GetHostnameOrIp(host), testDir), *resultsDir)
if err != nil {
return err
}
}
// Copy junit to the top of artifacts
_, err = runSSHCommand("scp", fmt.Sprintf("%s:%s/results/junit*", GetHostnameOrIp(host), testDir), *resultsDir)
if err != nil {

View File

@ -43,6 +43,7 @@ import (
stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
"k8s.io/kubernetes/pkg/util/procfs"
"k8s.io/kubernetes/test/e2e/framework"
"k8s.io/kubernetes/test/e2e_node/perftype"
. "github.com/onsi/gomega"
)
@ -440,19 +441,11 @@ func newTestPods(numPods int, volume bool, imageName, podType string) []*v1.Pod
return pods
}
// Time series of resource usage
type ResourceSeries struct {
Timestamp []int64 `json:"ts"`
CPUUsageInMilliCores []int64 `json:"cpu"`
MemoryRSSInMegaBytes []int64 `json:"memory"`
Units map[string]string `json:"unit"`
}
// GetResourceSeriesWithLabels gets the time series of resource usage of each container.
func (r *ResourceCollector) GetResourceTimeSeries() map[string]*ResourceSeries {
resourceSeries := make(map[string]*ResourceSeries)
func (r *ResourceCollector) GetResourceTimeSeries() map[string]*perftype.ResourceSeries {
resourceSeries := make(map[string]*perftype.ResourceSeries)
for key, name := range systemContainers {
newSeries := &ResourceSeries{Units: map[string]string{
newSeries := &perftype.ResourceSeries{Units: map[string]string{
"cpu": "mCPU",
"memory": "MB",
}}

View File

@ -79,9 +79,9 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() {
for _, testArg := range rTests {
itArg := testArg
It(fmt.Sprintf("resource tracking for %d pods per node", itArg.podsNr), func() {
testInfo := getTestNodeInfo(f, itArg.getTestName())
desc := fmt.Sprintf("resource tracking for %d pods per node", itArg.podsNr)
It(desc, func() {
testInfo := getTestNodeInfo(f, itArg.getTestName(), desc)
runResourceUsageTest(f, rc, itArg)
@ -109,9 +109,9 @@ var _ = framework.KubeDescribe("Resource-usage [Serial] [Slow]", func() {
for _, testArg := range rTests {
itArg := testArg
It(fmt.Sprintf("resource tracking for %d pods per node [Benchmark]", itArg.podsNr), func() {
testInfo := getTestNodeInfo(f, itArg.getTestName())
desc := fmt.Sprintf("resource tracking for %d pods per node [Benchmark]", itArg.podsNr)
It(desc, func() {
testInfo := getTestNodeInfo(f, itArg.getTestName(), desc)
runResourceUsageTest(f, rc, itArg)
@ -202,8 +202,8 @@ func logAndVerifyResource(f *framework.Framework, rc *ResourceCollector, cpuLimi
cpuSummaryPerNode[nodeName] = cpuSummary
// Print resource usage
framework.PrintPerfData(framework.ResourceUsageToPerfDataWithLabels(usagePerNode, testInfo))
framework.PrintPerfData(framework.CPUUsageToPerfDataWithLabels(cpuSummaryPerNode, testInfo))
logPerfData(framework.ResourceUsageToPerfDataWithLabels(usagePerNode, testInfo), "memory")
logPerfData(framework.CPUUsageToPerfDataWithLabels(cpuSummaryPerNode, testInfo), "cpu")
// Verify resource usage
if isVerify {