Introduce apiserver profile-gathering library in testing framework

pull/6/head
Shyam Jeedigunta 2018-01-24 20:02:58 +01:00
parent 47d61ef472
commit f8cee91372
3 changed files with 158 additions and 0 deletions

View File

@ -26,6 +26,7 @@ go_library(
"nodes_util.go",
"perf_util.go",
"pods.go",
"profile_gatherer.go",
"psp_util.go",
"pv_util.go",
"rc_util.go",

View File

@ -0,0 +1,155 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package framework
import (
"fmt"
"io/ioutil"
"os"
"os/exec"
"path"
"strings"
"sync"
)
const (
// Default value for how long the CPU profile is gathered for.
DefaultCPUProfileSeconds = 30
)
func getProfilesDirectoryPath() string {
return path.Join(TestContext.ReportDir, "profiles")
}
func createProfilesDirectoryIfNeeded() error {
profileDirPath := getProfilesDirectoryPath()
if _, err := os.Stat(profileDirPath); os.IsNotExist(err) {
if mkdirErr := os.Mkdir(profileDirPath, 0777); mkdirErr != nil {
return fmt.Errorf("Failed to create profiles dir: %v", mkdirErr)
}
} else if err != nil {
return fmt.Errorf("Failed to check existence of profiles dir: %v", err)
}
return nil
}
func checkProfileGatheringPrerequisites() error {
if !TestContext.AllowGatheringProfiles {
return fmt.Errorf("Can't gather profiles as --allow-gathering-profiles is false")
}
if TestContext.ReportDir == "" {
return fmt.Errorf("Can't gather profiles as --report-dir is empty")
}
if err := createProfilesDirectoryIfNeeded(); err != nil {
return fmt.Errorf("Failed to ensure profiles dir: %v", err)
}
return nil
}
func gatherProfileOfKind(profileBaseName, kind string) error {
// Check some prerequisites before gathering the profile.
if err := checkProfileGatheringPrerequisites(); err != nil {
return err
}
// Get the profile data over SSH.
getCommand := fmt.Sprintf("curl -s localhost:8080/debug/pprof/%s", kind)
sshResult, err := SSH(getCommand, GetMasterHost()+":22", TestContext.Provider)
if err != nil {
return fmt.Errorf("Failed to execute curl command on master through SSH: %v", err)
}
// Write the data to a temp file.
var tmpfile *os.File
tmpfile, err = ioutil.TempFile("", "apiserver-profile")
if err != nil {
return fmt.Errorf("Failed to create temp file for profile data: %v", err)
}
defer os.Remove(tmpfile.Name())
if _, err := tmpfile.Write([]byte(sshResult.Stdout)); err != nil {
return fmt.Errorf("Failed to write temp file with profile data: %v", err)
}
if err := tmpfile.Close(); err != nil {
return fmt.Errorf("Failed to close temp file: %v", err)
}
// Create a graph from the data and write it to a pdf file.
var cmd *exec.Cmd
var profilePrefix string
switch {
// TODO: Support other profile kinds if needed (e.g inuse_space, alloc_objects, mutex, etc)
case kind == "heap":
cmd = exec.Command("go", "tool", "pprof", "-pdf", "--alloc_space", tmpfile.Name())
profilePrefix = "ApiserverMemoryProfile_"
case strings.HasPrefix(kind, "profile"):
cmd = exec.Command("go", "tool", "pprof", "-pdf", tmpfile.Name())
profilePrefix = "ApiserverCPUProfile_"
default:
return fmt.Errorf("Unknown profile kind provided: %s", kind)
}
outfilePath := path.Join(getProfilesDirectoryPath(), profilePrefix+profileBaseName+".pdf")
var outfile *os.File
outfile, err = os.Create(outfilePath)
if err != nil {
return fmt.Errorf("Failed to create file for the profile graph: %v", err)
}
defer outfile.Close()
cmd.Stdout = outfile
if err := cmd.Run(); nil != err {
return fmt.Errorf("Failed to run 'go tool pprof': %v", err)
}
return nil
}
// The below exposed functions can take a while to execute as they SSH to the master,
// collect and copy the profile over and then graph it. To allow waiting for these to
// finish before the parent goroutine itself finishes, we accept a sync.WaitGroup
// argument in these functions. Typically you would use the following pattern:
//
// func TestFooBar() {
// var wg sync.WaitGroup
// wg.Add(3)
// go framework.GatherApiserverCPUProfile(&wg, "doing_foo")
// go framework.GatherApiserverMemoryProfile(&wg, "doing_foo")
// <<<< some code doing foo >>>>>>
// go framework.GatherApiserverCPUProfile(&wg, "doing_bar")
// <<<< some code doing bar >>>>>>
// wg.Wait()
// }
//
// If you do not wish to exercise the waiting logic, pass a nil value for the
// waitgroup argument instead. However, then you would be responsible for ensuring
// that the function finishes.
func GatherApiserverCPUProfile(wg *sync.WaitGroup, profileBaseName string) {
GatherApiserverCPUProfileForNSeconds(wg, profileBaseName, DefaultCPUProfileSeconds)
}
func GatherApiserverCPUProfileForNSeconds(wg *sync.WaitGroup, profileBaseName string, n int) {
if wg != nil {
defer wg.Done()
}
if err := gatherProfileOfKind(profileBaseName, fmt.Sprintf("profile?seconds=%v", n)); err != nil {
Logf("Failed to gather apiserver CPU profile: %v", err)
}
}
func GatherApiserverMemoryProfile(wg *sync.WaitGroup, profileBaseName string) {
if wg != nil {
defer wg.Done()
}
if err := gatherProfileOfKind(profileBaseName, "heap"); err != nil {
Logf("Failed to gather apiserver memory profile: %v", err)
}
}

View File

@ -85,6 +85,7 @@ type TestContextType struct {
GatherLogsSizes bool
GatherMetricsAfterTest string
GatherSuiteMetricsAfterTest bool
AllowGatheringProfiles bool
// If set to 'true' framework will gather ClusterAutoscaler metrics when gathering them for other components.
IncludeClusterAutoscalerMetrics bool
// Currently supported values are 'hr' for human-readable and 'json'. It's a comma separated list.
@ -190,6 +191,7 @@ func RegisterCommonFlags() {
flag.BoolVar(&TestContext.GatherLogsSizes, "gather-logs-sizes", false, "If set to true framework will be monitoring logs sizes on all machines running e2e tests.")
flag.StringVar(&TestContext.GatherMetricsAfterTest, "gather-metrics-at-teardown", "false", "If set to 'true' framework will gather metrics from all components after each test. If set to 'master' only master component metrics would be gathered.")
flag.BoolVar(&TestContext.GatherSuiteMetricsAfterTest, "gather-suite-metrics-at-teardown", false, "If set to true framwork will gather metrics from all components after the whole test suite completes.")
flag.BoolVar(&TestContext.AllowGatheringProfiles, "allow-gathering-profiles", true, "If set to true framework will allow to gather CPU/memory allocation pprof profiles from the master.")
flag.BoolVar(&TestContext.IncludeClusterAutoscalerMetrics, "include-cluster-autoscaler", false, "If set to true, framework will include Cluster Autoscaler when gathering metrics.")
flag.StringVar(&TestContext.OutputPrintType, "output-print-type", "json", "Format in which summaries should be printed: 'hr' for human readable, 'json' for JSON ones.")
flag.BoolVar(&TestContext.DumpLogsOnFailure, "dump-logs-on-failure", true, "If set to true test will dump data about the namespace in which test was running.")