Introduce apiserver profile-gathering library in testing framework

2018-01-24 20:02:58 +01:00 · 2018-01-24 20:02:58 +01:00 · f8cee91372
parent 47d61ef472
commit f8cee91372
3 changed files with 158 additions and 0 deletions
--- a/test/e2e/framework/BUILD
+++ b/test/e2e/framework/BUILD
@ -26,6 +26,7 @@ go_library(
        "nodes_util.go",
        "perf_util.go",
        "pods.go",
+        "profile_gatherer.go",
        "psp_util.go",
        "pv_util.go",
        "rc_util.go",
--- a/test/e2e/framework/profile_gatherer.go
+++ b/test/e2e/framework/profile_gatherer.go
@ -0,0 +1,155 @@
+/*
+Copyright 2018 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package framework
+
+import (
+	"fmt"
+	"io/ioutil"
+	"os"
+	"os/exec"
+	"path"
+	"strings"
+	"sync"
+)
+
+const (
+	// Default value for how long the CPU profile is gathered for.
+	DefaultCPUProfileSeconds = 30
+)
+
+func getProfilesDirectoryPath() string {
+	return path.Join(TestContext.ReportDir, "profiles")
+}
+
+func createProfilesDirectoryIfNeeded() error {
+	profileDirPath := getProfilesDirectoryPath()
+	if _, err := os.Stat(profileDirPath); os.IsNotExist(err) {
+		if mkdirErr := os.Mkdir(profileDirPath, 0777); mkdirErr != nil {
+			return fmt.Errorf("Failed to create profiles dir: %v", mkdirErr)
+		}
+	} else if err != nil {
+		return fmt.Errorf("Failed to check existence of profiles dir: %v", err)
+	}
+	return nil
+}
+
+func checkProfileGatheringPrerequisites() error {
+	if !TestContext.AllowGatheringProfiles {
+		return fmt.Errorf("Can't gather profiles as --allow-gathering-profiles is false")
+	}
+	if TestContext.ReportDir == "" {
+		return fmt.Errorf("Can't gather profiles as --report-dir is empty")
+	}
+	if err := createProfilesDirectoryIfNeeded(); err != nil {
+		return fmt.Errorf("Failed to ensure profiles dir: %v", err)
+	}
+	return nil
+}
+
+func gatherProfileOfKind(profileBaseName, kind string) error {
+	// Check some prerequisites before gathering the profile.
+	if err := checkProfileGatheringPrerequisites(); err != nil {
+		return err
+	}
+	// Get the profile data over SSH.
+	getCommand := fmt.Sprintf("curl -s localhost:8080/debug/pprof/%s", kind)
+	sshResult, err := SSH(getCommand, GetMasterHost()+":22", TestContext.Provider)
+	if err != nil {
+		return fmt.Errorf("Failed to execute curl command on master through SSH: %v", err)
+	}
+	// Write the data to a temp file.
+	var tmpfile *os.File
+	tmpfile, err = ioutil.TempFile("", "apiserver-profile")
+	if err != nil {
+		return fmt.Errorf("Failed to create temp file for profile data: %v", err)
+	}
+	defer os.Remove(tmpfile.Name())
+	if _, err := tmpfile.Write([]byte(sshResult.Stdout)); err != nil {
+		return fmt.Errorf("Failed to write temp file with profile data: %v", err)
+	}
+	if err := tmpfile.Close(); err != nil {
+		return fmt.Errorf("Failed to close temp file: %v", err)
+	}
+	// Create a graph from the data and write it to a pdf file.
+	var cmd *exec.Cmd
+	var profilePrefix string
+	switch {
+	// TODO: Support other profile kinds if needed (e.g inuse_space, alloc_objects, mutex, etc)
+	case kind == "heap":
+		cmd = exec.Command("go", "tool", "pprof", "-pdf", "--alloc_space", tmpfile.Name())
+		profilePrefix = "ApiserverMemoryProfile_"
+	case strings.HasPrefix(kind, "profile"):
+		cmd = exec.Command("go", "tool", "pprof", "-pdf", tmpfile.Name())
+		profilePrefix = "ApiserverCPUProfile_"
+	default:
+		return fmt.Errorf("Unknown profile kind provided: %s", kind)
+	}
+	outfilePath := path.Join(getProfilesDirectoryPath(), profilePrefix+profileBaseName+".pdf")
+	var outfile *os.File
+	outfile, err = os.Create(outfilePath)
+	if err != nil {
+		return fmt.Errorf("Failed to create file for the profile graph: %v", err)
+	}
+	defer outfile.Close()
+	cmd.Stdout = outfile
+	if err := cmd.Run(); nil != err {
+		return fmt.Errorf("Failed to run 'go tool pprof': %v", err)
+	}
+	return nil
+}
+
+// The below exposed functions can take a while to execute as they SSH to the master,
+// collect and copy the profile over and then graph it. To allow waiting for these to
+// finish before the parent goroutine itself finishes, we accept a sync.WaitGroup
+// argument in these functions. Typically you would use the following pattern:
+//
+// func TestFooBar() {
+//		var wg sync.WaitGroup
+//		wg.Add(3)
+//		go framework.GatherApiserverCPUProfile(&wg, "doing_foo")
+//		go framework.GatherApiserverMemoryProfile(&wg, "doing_foo")
+//		<<<< some code doing foo >>>>>>
+//		go framework.GatherApiserverCPUProfile(&wg, "doing_bar")
+//		<<<< some code doing bar >>>>>>
+//		wg.Wait()
+// }
+//
+// If you do not wish to exercise the waiting logic, pass a nil value for the
+// waitgroup argument instead. However, then you would be responsible for ensuring
+// that the function finishes.
+
+func GatherApiserverCPUProfile(wg *sync.WaitGroup, profileBaseName string) {
+	GatherApiserverCPUProfileForNSeconds(wg, profileBaseName, DefaultCPUProfileSeconds)
+}
+
+func GatherApiserverCPUProfileForNSeconds(wg *sync.WaitGroup, profileBaseName string, n int) {
+	if wg != nil {
+		defer wg.Done()
+	}
+	if err := gatherProfileOfKind(profileBaseName, fmt.Sprintf("profile?seconds=%v", n)); err != nil {
+		Logf("Failed to gather apiserver CPU profile: %v", err)
+	}
+}
+
+func GatherApiserverMemoryProfile(wg *sync.WaitGroup, profileBaseName string) {
+	if wg != nil {
+		defer wg.Done()
+	}
+	if err := gatherProfileOfKind(profileBaseName, "heap"); err != nil {
+		Logf("Failed to gather apiserver memory profile: %v", err)
+	}
+}
--- a/test/e2e/framework/test_context.go
+++ b/test/e2e/framework/test_context.go
@ -85,6 +85,7 @@ type TestContextType struct {
 	GatherLogsSizes                   bool
 	GatherMetricsAfterTest            string
 	GatherSuiteMetricsAfterTest       bool
+	AllowGatheringProfiles            bool
 	// If set to 'true' framework will gather ClusterAutoscaler metrics when gathering them for other components.
 	IncludeClusterAutoscalerMetrics bool
 	// Currently supported values are 'hr' for human-readable and 'json'. It's a comma separated list.
@ -190,6 +191,7 @@ func RegisterCommonFlags() {
 	flag.BoolVar(&TestContext.GatherLogsSizes, "gather-logs-sizes", false, "If set to true framework will be monitoring logs sizes on all machines running e2e tests.")
 	flag.StringVar(&TestContext.GatherMetricsAfterTest, "gather-metrics-at-teardown", "false", "If set to 'true' framework will gather metrics from all components after each test. If set to 'master' only master component metrics would be gathered.")
 	flag.BoolVar(&TestContext.GatherSuiteMetricsAfterTest, "gather-suite-metrics-at-teardown", false, "If set to true framwork will gather metrics from all components after the whole test suite completes.")
+	flag.BoolVar(&TestContext.AllowGatheringProfiles, "allow-gathering-profiles", true, "If set to true framework will allow to gather CPU/memory allocation pprof profiles from the master.")
 	flag.BoolVar(&TestContext.IncludeClusterAutoscalerMetrics, "include-cluster-autoscaler", false, "If set to true, framework will include Cluster Autoscaler when gathering metrics.")
 	flag.StringVar(&TestContext.OutputPrintType, "output-print-type", "json", "Format in which summaries should be printed: 'hr' for human readable, 'json' for JSON ones.")
 	flag.BoolVar(&TestContext.DumpLogsOnFailure, "dump-logs-on-failure", true, "If set to true test will dump data about the namespace in which test was running.")