From 251026e374c3a0d1ab606a18573b004333815d84 Mon Sep 17 00:00:00 2001 From: Daniel Nephin Date: Fri, 6 Aug 2021 13:07:13 -0400 Subject: [PATCH 1/6] debug: remove unused --- command/debug/debug.go | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/command/debug/debug.go b/command/debug/debug.go index a0ec146f5f..dcb03fca0f 100644 --- a/command/debug/debug.go +++ b/command/debug/debug.go @@ -679,30 +679,17 @@ func (c *cmd) createArchiveTemp(path string) (tempName string, err error) { // defaultTargets specifies the list of all targets that // will be captured by default func (c *cmd) defaultTargets() []string { - return append(c.dynamicTargets(), c.staticTargets()...) -} - -// dynamicTargets returns all the supported targets -// that are retrieved at the interval specified -func (c *cmd) dynamicTargets() []string { - return []string{"metrics", "logs", "pprof"} -} - -// staticTargets returns all the supported targets -// that are retrieved at the start of the command execution -func (c *cmd) staticTargets() []string { - return []string{"host", "agent", "cluster"} + return []string{"metrics", "logs", "pprof", "host", "agent", "cluster"} } func (c *cmd) Synopsis() string { - return synopsis + return "Records a debugging archive for operators" } func (c *cmd) Help() string { return c.help } -const synopsis = "Records a debugging archive for operators" const help = ` Usage: consul debug [options] From bbf6a94c9af79b7d0d1281979ba8ec7edcf51a41 Mon Sep 17 00:00:00 2001 From: Daniel Nephin Date: Fri, 6 Aug 2021 13:30:32 -0400 Subject: [PATCH 2/6] debug: rename cluster target to members The API is called members. Using the same name as the API should help describe the contents of the file. --- command/debug/debug.go | 117 ++++++++++++++++++++---------------- command/debug/debug_test.go | 8 +-- 2 files changed, 69 insertions(+), 56 deletions(-) diff --git a/command/debug/debug.go b/command/debug/debug.go index dcb03fca0f..ccb2607889 100644 --- a/command/debug/debug.go +++ b/command/debug/debug.go @@ -114,7 +114,7 @@ func (c *cmd) init() { fmt.Sprintf("One or more types of information to capture. This can be used "+ "to capture a subset of information, and defaults to capturing "+ "everything available. Possible information for capture: %s. "+ - "This can be repeated multiple times.", strings.Join(c.defaultTargets(), ", "))) + "This can be repeated multiple times.", strings.Join(defaultTargets, ", "))) c.flags.DurationVar(&c.interval, "interval", debugInterval, fmt.Sprintf("The interval in which to capture dynamic information such as "+ "telemetry, and profiling. Defaults to %s.", debugInterval)) @@ -195,7 +195,7 @@ func (c *cmd) Run(args []string) int { } // Capture dynamic information from the target agent, blocking for duration - if c.configuredTarget("metrics") || c.configuredTarget("logs") || c.configuredTarget("pprof") { + if c.captureTarget(targetMetrics) || c.captureTarget(targetLogs) || c.captureTarget(targetProfiles) { g := new(errgroup.Group) g.Go(c.captureInterval) g.Go(c.captureLongRunning) @@ -264,11 +264,11 @@ func (c *cmd) prepare() (version string, err error) { // If none are specified we will collect information from // all by default if len(c.capture) == 0 { - c.capture = c.defaultTargets() + c.capture = defaultTargets } for _, t := range c.capture { - if !c.allowedTarget(t) { + if !allowedTarget(t) { return version, fmt.Errorf("target not found: %s", t) } } @@ -288,56 +288,48 @@ func (c *cmd) prepare() (version string, err error) { // captureStatic captures static target information and writes it // to the output path func (c *cmd) captureStatic() error { - // Collect errors via multierror as we want to gracefully - // fail if an API is inaccessible var errs error - // Collect the named outputs here - outputs := make(map[string]interface{}) - - // Capture host information - if c.configuredTarget("host") { + if c.captureTarget(targetHost) { host, err := c.client.Agent().Host() if err != nil { errs = multierror.Append(errs, err) } - outputs["host"] = host + if err := writeJSONFile(filepath.Join(c.output, targetHost+".json"), host); err != nil { + errs = multierror.Append(errs, err) + } } - // Capture agent information - if c.configuredTarget("agent") { + if c.captureTarget(targetAgent) { agent, err := c.client.Agent().Self() if err != nil { errs = multierror.Append(errs, err) } - outputs["agent"] = agent + if err := writeJSONFile(filepath.Join(c.output, targetAgent+".json"), agent); err != nil { + errs = multierror.Append(errs, err) + } } - // Capture cluster members information, including WAN - if c.configuredTarget("cluster") { + if c.captureTarget(targetMembers) { members, err := c.client.Agent().Members(true) if err != nil { errs = multierror.Append(errs, err) } - outputs["cluster"] = members - } - - // Write all outputs to disk as JSON - for output, v := range outputs { - marshaled, err := json.MarshalIndent(v, "", "\t") - if err != nil { - errs = multierror.Append(errs, err) - } - - err = ioutil.WriteFile(fmt.Sprintf("%s/%s.json", c.output, output), marshaled, 0644) - if err != nil { + if err := writeJSONFile(filepath.Join(c.output, targetMembers+".json"), members); err != nil { errs = multierror.Append(errs, err) } } - return errs } +func writeJSONFile(filename string, content interface{}) error { + marshaled, err := json.MarshalIndent(content, "", "\t") + if err != nil { + return err + } + return ioutil.WriteFile(filename, marshaled, 0644) +} + // captureInterval blocks for the duration of the command // specified by the duration flag, capturing the dynamic // targets at the interval specified @@ -380,7 +372,7 @@ func captureShortLived(c *cmd) error { if err != nil { return err } - if c.configuredTarget("pprof") { + if c.captureTarget(targetProfiles) { g.Go(func() error { return c.captureHeap(timestampDir) }) @@ -417,7 +409,7 @@ func (c *cmd) captureLongRunning() error { if s < 1 { s = 1 } - if c.configuredTarget("pprof") { + if c.captureTarget(targetProfiles) { g.Go(func() error { return c.captureProfile(s, timestampDir) }) @@ -426,12 +418,12 @@ func (c *cmd) captureLongRunning() error { return c.captureTrace(s, timestampDir) }) } - if c.configuredTarget("logs") { + if c.captureTarget(targetLogs) { g.Go(func() error { return c.captureLogs(timestampDir) }) } - if c.configuredTarget("metrics") { + if c.captureTarget(targetMetrics) { // TODO: pass in context from caller ctx, cancel := context.WithTimeout(context.Background(), c.duration) defer cancel() @@ -450,8 +442,7 @@ func (c *cmd) captureGoRoutines(timestampDir string) error { return fmt.Errorf("failed to collect goroutine profile: %w", err) } - err = ioutil.WriteFile(fmt.Sprintf("%s/goroutine.prof", timestampDir), gr, 0644) - return err + return ioutil.WriteFile(fmt.Sprintf("%s/goroutine.prof", timestampDir), gr, 0644) } func (c *cmd) captureTrace(s float64, timestampDir string) error { @@ -460,8 +451,7 @@ func (c *cmd) captureTrace(s float64, timestampDir string) error { return fmt.Errorf("failed to collect trace: %w", err) } - err = ioutil.WriteFile(fmt.Sprintf("%s/trace.out", timestampDir), trace, 0644) - return err + return ioutil.WriteFile(fmt.Sprintf("%s/trace.out", timestampDir), trace, 0644) } func (c *cmd) captureProfile(s float64, timestampDir string) error { @@ -470,8 +460,7 @@ func (c *cmd) captureProfile(s float64, timestampDir string) error { return fmt.Errorf("failed to collect cpu profile: %w", err) } - err = ioutil.WriteFile(fmt.Sprintf("%s/profile.prof", timestampDir), prof, 0644) - return err + return ioutil.WriteFile(fmt.Sprintf("%s/profile.prof", timestampDir), prof, 0644) } func (c *cmd) captureHeap(timestampDir string) error { @@ -480,8 +469,7 @@ func (c *cmd) captureHeap(timestampDir string) error { return fmt.Errorf("failed to collect heap profile: %w", err) } - err = ioutil.WriteFile(fmt.Sprintf("%s/heap.prof", timestampDir), heap, 0644) - return err + return ioutil.WriteFile(fmt.Sprintf("%s/heap.prof", timestampDir), heap, 0644) } func (c *cmd) captureLogs(timestampDir string) error { @@ -538,23 +526,31 @@ func (c *cmd) captureMetrics(ctx context.Context, timestampDir string) error { return nil } -// allowedTarget returns a boolean if the target is able to be captured -func (c *cmd) allowedTarget(target string) bool { - for _, dt := range c.defaultTargets() { +// allowedTarget returns true if the target is a recognized name of a capture +// target. +func allowedTarget(target string) bool { + for _, dt := range defaultTargets { if dt == target { return true } } + for _, t := range deprecatedTargets { + if t == target { + return true + } + } return false } -// configuredTarget returns a boolean if the target is configured to be -// captured in the command -func (c *cmd) configuredTarget(target string) bool { +// captureTarget returns true if the target capture type is enabled. +func (c *cmd) captureTarget(target string) bool { for _, dt := range c.capture { if dt == target { return true } + if target == targetMembers && dt == targetCluster { + return true + } } return false } @@ -676,12 +672,29 @@ func (c *cmd) createArchiveTemp(path string) (tempName string, err error) { return tempName, nil } -// defaultTargets specifies the list of all targets that -// will be captured by default -func (c *cmd) defaultTargets() []string { - return []string{"metrics", "logs", "pprof", "host", "agent", "cluster"} +const ( + targetMetrics = "metrics" + targetLogs = "logs" + targetProfiles = "pprof" + targetHost = "host" + targetAgent = "agent" + targetMembers = "members" + // targetCluster is the now deprecated name for targetMembers + targetCluster = "cluster" +) + +// defaultTargets specifies the list of targets that will be captured by default +var defaultTargets = []string{ + targetMetrics, + targetLogs, + targetProfiles, + targetHost, + targetAgent, + targetMembers, } +var deprecatedTargets = []string{targetCluster} + func (c *cmd) Synopsis() string { return "Records a debugging archive for operators" } diff --git a/command/debug/debug_test.go b/command/debug/debug_test.go index 1fa331f81d..fee8113b66 100644 --- a/command/debug/debug_test.go +++ b/command/debug/debug_test.go @@ -258,17 +258,17 @@ func TestDebugCommand_CaptureTargets(t *testing.T) { "single": { []string{"agent"}, []string{"agent.json"}, - []string{"host.json", "cluster.json"}, + []string{"host.json", "members.json"}, }, "static": { []string{"agent", "host", "cluster"}, - []string{"agent.json", "host.json", "cluster.json"}, + []string{"agent.json", "host.json", "members.json"}, []string{"*/metrics.json"}, }, "metrics-only": { []string{"metrics"}, []string{"*/metrics.json"}, - []string{"agent.json", "host.json", "cluster.json"}, + []string{"agent.json", "host.json", "members.json"}, }, "all-but-pprof": { []string{ @@ -281,7 +281,7 @@ func TestDebugCommand_CaptureTargets(t *testing.T) { []string{ "host.json", "agent.json", - "cluster.json", + "members.json", "*/metrics.json", "*/consul.log", }, From 31bcd805288a047b0a4ac1deda44e99489c94677 Mon Sep 17 00:00:00 2001 From: Daniel Nephin Date: Fri, 6 Aug 2021 13:46:01 -0400 Subject: [PATCH 3/6] debug: improve a couple of the test cases Use gotest.tools/v3/fs to make better assertions about the files Remove the TestAgent from TestDebugCommand_Prepare_ValidateTiming, since we can test that validation without making any API calls. --- command/debug/debug_test.go | 93 +++++++++++++++---------------------- go.mod | 1 + go.sum | 3 ++ 3 files changed, 42 insertions(+), 55 deletions(-) diff --git a/command/debug/debug_test.go b/command/debug/debug_test.go index fee8113b66..215475706c 100644 --- a/command/debug/debug_test.go +++ b/command/debug/debug_test.go @@ -14,19 +14,18 @@ import ( "testing" "time" + "github.com/google/pprof/profile" "github.com/mitchellh/cli" "github.com/stretchr/testify/require" - - "github.com/google/pprof/profile" + "gotest.tools/v3/assert" + "gotest.tools/v3/fs" "github.com/hashicorp/consul/agent" "github.com/hashicorp/consul/sdk/testutil" "github.com/hashicorp/consul/testrpc" ) -func TestDebugCommand_noTabs(t *testing.T) { - t.Parallel() - +func TestDebugCommand_Help_TextContainsNoTabs(t *testing.T) { if strings.ContainsRune(New(cli.NewMockUi(), nil).Help(), '\t') { t.Fatal("help has tabs") } @@ -63,6 +62,16 @@ func TestDebugCommand(t *testing.T) { require.Equal(t, 0, code) require.Equal(t, "", ui.ErrorWriter.String()) + expected := fs.Expected(t, + fs.WithDir("debug", + fs.WithFile("agent.json", "", fs.MatchAnyFileContent), + fs.WithFile("host.json", "", fs.MatchAnyFileContent), + fs.WithFile("index.json", "", fs.MatchAnyFileContent), + fs.WithFile("members.json", "", fs.MatchAnyFileContent), + // TODO: make the sub-directory names predictable) + fs.MatchExtraFiles)) + assert.Assert(t, fs.Equal(testDir, expected)) + metricsFiles, err := filepath.Glob(fmt.Sprintf("%s/*/%s", outputPath, "metrics.json")) require.NoError(t, err) require.Len(t, metricsFiles, 1) @@ -127,15 +136,10 @@ func TestDebugCommand_Archive(t *testing.T) { } func TestDebugCommand_ArgsBad(t *testing.T) { - t.Parallel() - ui := cli.NewMockUi() cmd := New(ui, nil) - args := []string{ - "foo", - "bad", - } + args := []string{"foo", "bad"} if code := cmd.Run(args); code == 0 { t.Fatalf("should exit non-zero, got code: %d", code) @@ -518,65 +522,44 @@ func TestDebugCommand_ProfilesExist(t *testing.T) { } } -func TestDebugCommand_ValidateTiming(t *testing.T) { - if testing.Short() { - t.Skip("too slow for testing.Short") - } - +func TestDebugCommand_Prepare_ValidateTiming(t *testing.T) { cases := map[string]struct { duration string interval string - output string - code int + expected string }{ "both": { - "20ms", - "10ms", - "duration must be longer", - 1, + duration: "20ms", + interval: "10ms", + expected: "duration must be longer", }, "short interval": { - "10s", - "10ms", - "interval must be longer", - 1, + duration: "10s", + interval: "10ms", + expected: "interval must be longer", }, "lower duration": { - "20s", - "30s", - "must be longer than interval", - 1, + duration: "20s", + interval: "30s", + expected: "must be longer than interval", }, } for name, tc := range cases { - // Because we're only testng validation, we want to shut down - // the valid duration test to avoid hanging - shutdownCh := make(chan struct{}) + t.Run(name, func(t *testing.T) { + ui := cli.NewMockUi() + cmd := New(ui, nil) - a := agent.NewTestAgent(t, "") - defer a.Shutdown() - testrpc.WaitForLeader(t, a.RPC, "dc1") + args := []string{ + "-duration=" + tc.duration, + "-interval=" + tc.interval, + } + err := cmd.flags.Parse(args) + require.NoError(t, err) - ui := cli.NewMockUi() - cmd := New(ui, shutdownCh) - - args := []string{ - "-http-addr=" + a.HTTPAddr(), - "-duration=" + tc.duration, - "-interval=" + tc.interval, - "-capture=agent", - } - code := cmd.Run(args) - - if code != tc.code { - t.Errorf("%s: should exit %d, got code: %d", name, tc.code, code) - } - - errOutput := ui.ErrorWriter.String() - if !strings.Contains(errOutput, tc.output) { - t.Errorf("%s: expected error output '%s', got '%q'", name, tc.output, errOutput) - } + _, err = cmd.prepare() + testutil.RequireErrorContains(t, err, tc.expected) + }) } } diff --git a/go.mod b/go.mod index 9c7bb865f3..b2531f76e5 100644 --- a/go.mod +++ b/go.mod @@ -92,6 +92,7 @@ require ( google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55 google.golang.org/grpc v1.25.1 gopkg.in/square/go-jose.v2 v2.5.1 + gotest.tools/v3 v3.0.3 k8s.io/api v0.16.9 k8s.io/apimachinery v0.16.9 k8s.io/client-go v0.16.9 diff --git a/go.sum b/go.sum index 35862eacef..c8c2ff4fc4 100644 --- a/go.sum +++ b/go.sum @@ -652,6 +652,7 @@ golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3 golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190624222133-a101b041ded4/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190907020128-2ca718005c18/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191108193012-7d206e10da11/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= @@ -710,6 +711,8 @@ gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gotest.tools/v3 v3.0.3 h1:4AuOwCGf4lLR9u3YOe2awrHygurzhO/HeQ6laiA6Sx0= +gotest.tools/v3 v3.0.3/go.mod h1:Z7Lb0S5l+klDB31fvDQX8ss/FlKDxtlFlw3Oa8Ymbl8= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= From 4359e3811424b062d2e751045f99be98cc6bd4eb Mon Sep 17 00:00:00 2001 From: Daniel Nephin Date: Fri, 6 Aug 2021 14:24:12 -0400 Subject: [PATCH 4/6] debug: restore cancel on SigInt Some previous changes broke interrupting the debug on SigInterupt. This change restores the original behaviour by passing a context to requests. Since a new API client function was required to pass the context, I had it also return an io.ReadCloser, so that output can be streamed to files instead of fully buffering in process memory. --- api/debug.go | 22 +++++++++ command/commands_oss.go | 2 +- command/debug/debug.go | 92 +++++++++++++++++++++++-------------- command/debug/debug_test.go | 24 +++++----- command/registry.go | 1 + 5 files changed, 93 insertions(+), 48 deletions(-) diff --git a/api/debug.go b/api/debug.go index 56dcc9bcd2..2e7bb94b5b 100644 --- a/api/debug.go +++ b/api/debug.go @@ -1,7 +1,9 @@ package api import ( + "context" "fmt" + "io" "io/ioutil" "strconv" ) @@ -70,6 +72,26 @@ func (d *Debug) Profile(seconds int) ([]byte, error) { return body, nil } +// PProf returns a pprof profile for the specified number of seconds. The caller +// is responsible for closing the returned io.ReadCloser once all bytes are read. +func (d *Debug) PProf(ctx context.Context, name string, seconds int) (io.ReadCloser, error) { + r := d.c.newRequest("GET", "/debug/pprof/"+name) + r.ctx = ctx + + // Capture a profile for the specified number of seconds + r.params.Set("seconds", strconv.Itoa(seconds)) + + _, resp, err := d.c.doRequest(r) + if err != nil { + return nil, fmt.Errorf("error making request: %s", err) + } + + if resp.StatusCode != 200 { + return nil, generateUnexpectedResponseCodeError(resp) + } + return resp.Body, nil +} + // Trace returns an execution trace func (d *Debug) Trace(seconds int) ([]byte, error) { r := d.c.newRequest("GET", "/debug/pprof/trace") diff --git a/command/commands_oss.go b/command/commands_oss.go index cfa8a7e621..92b49e4106 100644 --- a/command/commands_oss.go +++ b/command/commands_oss.go @@ -166,7 +166,7 @@ func init() { Register("connect envoy pipe-bootstrap", func(ui cli.Ui) (cli.Command, error) { return pipebootstrap.New(ui), nil }) Register("connect expose", func(ui cli.Ui) (cli.Command, error) { return expose.New(ui), nil }) Register("connect redirect-traffic", func(ui cli.Ui) (cli.Command, error) { return redirecttraffic.New(ui), nil }) - Register("debug", func(ui cli.Ui) (cli.Command, error) { return debug.New(ui, MakeShutdownCh()), nil }) + Register("debug", func(ui cli.Ui) (cli.Command, error) { return debug.New(ui), nil }) Register("event", func(ui cli.Ui) (cli.Command, error) { return event.New(ui), nil }) Register("exec", func(ui cli.Ui) (cli.Command, error) { return exec.New(ui, MakeShutdownCh()), nil }) Register("force-leave", func(ui cli.Ui) (cli.Command, error) { return forceleave.New(ui), nil }) diff --git a/command/debug/debug.go b/command/debug/debug.go index ccb2607889..869161dadd 100644 --- a/command/debug/debug.go +++ b/command/debug/debug.go @@ -12,8 +12,10 @@ import ( "io" "io/ioutil" "os" + "os/signal" "path/filepath" "strings" + "syscall" "time" "golang.org/x/sync/errgroup" @@ -55,7 +57,7 @@ const ( debugProtocolVersion = 1 ) -func New(ui cli.Ui, shutdownCh <-chan struct{}) *cmd { +func New(ui cli.Ui) *cmd { ui = &cli.PrefixedUi{ OutputPrefix: "==> ", InfoPrefix: " ", @@ -63,7 +65,7 @@ func New(ui cli.Ui, shutdownCh <-chan struct{}) *cmd { Ui: ui, } - c := &cmd{UI: ui, shutdownCh: shutdownCh} + c := &cmd{UI: ui} c.init() return c } @@ -74,8 +76,6 @@ type cmd struct { http *flags.HTTPFlags help string - shutdownCh <-chan struct{} - // flags interval time.Duration duration time.Duration @@ -136,6 +136,9 @@ func (c *cmd) init() { } func (c *cmd) Run(args []string) int { + ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) + defer cancel() + if err := c.flags.Parse(args); err != nil { c.UI.Error(fmt.Sprintf("Error parsing flags: %s", err)) return 1 @@ -197,8 +200,12 @@ func (c *cmd) Run(args []string) int { // Capture dynamic information from the target agent, blocking for duration if c.captureTarget(targetMetrics) || c.captureTarget(targetLogs) || c.captureTarget(targetProfiles) { g := new(errgroup.Group) - g.Go(c.captureInterval) - g.Go(c.captureLongRunning) + g.Go(func() error { + return c.captureInterval(ctx) + }) + g.Go(func() error { + return c.captureLongRunning(ctx) + }) err = g.Wait() if err != nil { c.UI.Error(fmt.Sprintf("Error encountered during collection: %v", err)) @@ -333,7 +340,7 @@ func writeJSONFile(filename string, content interface{}) error { // captureInterval blocks for the duration of the command // specified by the duration flag, capturing the dynamic // targets at the interval specified -func (c *cmd) captureInterval() error { +func (c *cmd) captureInterval(ctx context.Context) error { intervalChn := time.NewTicker(c.interval) defer intervalChn.Stop() durationChn := time.After(c.duration) @@ -358,7 +365,7 @@ func (c *cmd) captureInterval() error { case <-durationChn: intervalChn.Stop() return nil - case <-c.shutdownCh: + case <-ctx.Done(): return errors.New("stopping collection due to shutdown signal") } } @@ -395,7 +402,7 @@ func (c *cmd) createTimestampDir(timestamp int64) (string, error) { return timestampDir, nil } -func (c *cmd) captureLongRunning() error { +func (c *cmd) captureLongRunning(ctx context.Context) error { timestamp := time.Now().Local().Unix() timestampDir, err := c.createTimestampDir(timestamp) @@ -411,24 +418,27 @@ func (c *cmd) captureLongRunning() error { } if c.captureTarget(targetProfiles) { g.Go(func() error { - return c.captureProfile(s, timestampDir) + // use ctx without a timeout to allow the profile to finish sending + return c.captureProfile(ctx, s, timestampDir) }) g.Go(func() error { - return c.captureTrace(s, timestampDir) + // use ctx without a timeout to allow the trace to finish sending + return c.captureTrace(ctx, s, timestampDir) }) } if c.captureTarget(targetLogs) { g.Go(func() error { - return c.captureLogs(timestampDir) + ctx, cancel := context.WithTimeout(ctx, c.duration) + defer cancel() + return c.captureLogs(ctx, timestampDir) }) } if c.captureTarget(targetMetrics) { - // TODO: pass in context from caller - ctx, cancel := context.WithTimeout(context.Background(), c.duration) - defer cancel() - g.Go(func() error { + + ctx, cancel := context.WithTimeout(ctx, c.duration) + defer cancel() return c.captureMetrics(ctx, timestampDir) }) } @@ -445,22 +455,38 @@ func (c *cmd) captureGoRoutines(timestampDir string) error { return ioutil.WriteFile(fmt.Sprintf("%s/goroutine.prof", timestampDir), gr, 0644) } -func (c *cmd) captureTrace(s float64, timestampDir string) error { - trace, err := c.client.Debug().Trace(int(s)) - if err != nil { - return fmt.Errorf("failed to collect trace: %w", err) - } - - return ioutil.WriteFile(fmt.Sprintf("%s/trace.out", timestampDir), trace, 0644) -} - -func (c *cmd) captureProfile(s float64, timestampDir string) error { - prof, err := c.client.Debug().Profile(int(s)) +func (c *cmd) captureTrace(ctx context.Context, s float64, timestampDir string) error { + prof, err := c.client.Debug().PProf(ctx, "trace", int(s)) if err != nil { return fmt.Errorf("failed to collect cpu profile: %w", err) } + defer prof.Close() - return ioutil.WriteFile(fmt.Sprintf("%s/profile.prof", timestampDir), prof, 0644) + r := bufio.NewReader(prof) + fh, err := os.Create(fmt.Sprintf("%s/trace.out", timestampDir)) + if err != nil { + return err + } + defer fh.Close() + _, err = r.WriteTo(fh) + return err +} + +func (c *cmd) captureProfile(ctx context.Context, s float64, timestampDir string) error { + prof, err := c.client.Debug().PProf(ctx, "profile", int(s)) + if err != nil { + return fmt.Errorf("failed to collect cpu profile: %w", err) + } + defer prof.Close() + + r := bufio.NewReader(prof) + fh, err := os.Create(fmt.Sprintf("%s/profile.prof", timestampDir)) + if err != nil { + return err + } + defer fh.Close() + _, err = r.WriteTo(fh) + return err } func (c *cmd) captureHeap(timestampDir string) error { @@ -472,15 +498,11 @@ func (c *cmd) captureHeap(timestampDir string) error { return ioutil.WriteFile(fmt.Sprintf("%s/heap.prof", timestampDir), heap, 0644) } -func (c *cmd) captureLogs(timestampDir string) error { - endLogChn := make(chan struct{}) - timeIsUp := time.After(c.duration) - logCh, err := c.client.Agent().Monitor("DEBUG", endLogChn, nil) +func (c *cmd) captureLogs(ctx context.Context, timestampDir string) error { + logCh, err := c.client.Agent().Monitor("DEBUG", ctx.Done(), nil) if err != nil { return err } - // Close the log stream - defer close(endLogChn) // Create the log file for writing f, err := os.Create(fmt.Sprintf("%s/%s", timestampDir, "consul.log")) @@ -498,7 +520,7 @@ func (c *cmd) captureLogs(timestampDir string) error { if _, err = f.WriteString(log + "\n"); err != nil { return err } - case <-timeIsUp: + case <-ctx.Done(): return nil } } diff --git a/command/debug/debug_test.go b/command/debug/debug_test.go index 215475706c..22820e5cfb 100644 --- a/command/debug/debug_test.go +++ b/command/debug/debug_test.go @@ -26,7 +26,7 @@ import ( ) func TestDebugCommand_Help_TextContainsNoTabs(t *testing.T) { - if strings.ContainsRune(New(cli.NewMockUi(), nil).Help(), '\t') { + if strings.ContainsRune(New(cli.NewMockUi()).Help(), '\t') { t.Fatal("help has tabs") } } @@ -46,7 +46,7 @@ func TestDebugCommand(t *testing.T) { testrpc.WaitForLeader(t, a.RPC, "dc1") ui := cli.NewMockUi() - cmd := New(ui, nil) + cmd := New(ui) cmd.validateTiming = false outputPath := fmt.Sprintf("%s/debug", testDir) @@ -92,7 +92,7 @@ func TestDebugCommand_Archive(t *testing.T) { testrpc.WaitForLeader(t, a.RPC, "dc1") ui := cli.NewMockUi() - cmd := New(ui, nil) + cmd := New(ui) cmd.validateTiming = false outputPath := fmt.Sprintf("%s/debug", testDir) @@ -137,7 +137,7 @@ func TestDebugCommand_Archive(t *testing.T) { func TestDebugCommand_ArgsBad(t *testing.T) { ui := cli.NewMockUi() - cmd := New(ui, nil) + cmd := New(ui) args := []string{"foo", "bad"} @@ -153,7 +153,7 @@ func TestDebugCommand_ArgsBad(t *testing.T) { func TestDebugCommand_InvalidFlags(t *testing.T) { ui := cli.NewMockUi() - cmd := New(ui, nil) + cmd := New(ui) cmd.validateTiming = false outputPath := "" @@ -186,7 +186,7 @@ func TestDebugCommand_OutputPathBad(t *testing.T) { testrpc.WaitForLeader(t, a.RPC, "dc1") ui := cli.NewMockUi() - cmd := New(ui, nil) + cmd := New(ui) cmd.validateTiming = false outputPath := "" @@ -219,7 +219,7 @@ func TestDebugCommand_OutputPathExists(t *testing.T) { testrpc.WaitForLeader(t, a.RPC, "dc1") ui := cli.NewMockUi() - cmd := New(ui, nil) + cmd := New(ui) cmd.validateTiming = false outputPath := fmt.Sprintf("%s/debug", testDir) @@ -304,7 +304,7 @@ func TestDebugCommand_CaptureTargets(t *testing.T) { testrpc.WaitForLeader(t, a.RPC, "dc1") ui := cli.NewMockUi() - cmd := New(ui, nil) + cmd := New(ui) cmd.validateTiming = false outputPath := fmt.Sprintf("%s/debug-%s", testDir, name) @@ -387,7 +387,7 @@ func TestDebugCommand_CaptureLogs(t *testing.T) { testrpc.WaitForLeader(t, a.RPC, "dc1") ui := cli.NewMockUi() - cmd := New(ui, nil) + cmd := New(ui) cmd.validateTiming = false outputPath := fmt.Sprintf("%s/debug-%s", testDir, name) @@ -480,7 +480,7 @@ func TestDebugCommand_ProfilesExist(t *testing.T) { testrpc.WaitForLeader(t, a.RPC, "dc1") ui := cli.NewMockUi() - cmd := New(ui, nil) + cmd := New(ui) cmd.validateTiming = false outputPath := fmt.Sprintf("%s/debug", testDir) @@ -548,7 +548,7 @@ func TestDebugCommand_Prepare_ValidateTiming(t *testing.T) { for name, tc := range cases { t.Run(name, func(t *testing.T) { ui := cli.NewMockUi() - cmd := New(ui, nil) + cmd := New(ui) args := []string{ "-duration=" + tc.duration, @@ -579,7 +579,7 @@ func TestDebugCommand_DebugDisabled(t *testing.T) { testrpc.WaitForLeader(t, a.RPC, "dc1") ui := cli.NewMockUi() - cmd := New(ui, nil) + cmd := New(ui) cmd.validateTiming = false outputPath := fmt.Sprintf("%s/debug", testDir) diff --git a/command/registry.go b/command/registry.go index a96818c2f3..b400a92dfb 100644 --- a/command/registry.go +++ b/command/registry.go @@ -47,6 +47,7 @@ var registry map[string]Factory // MakeShutdownCh returns a channel that can be used for shutdown notifications // for commands. This channel will send a message for every interrupt or SIGTERM // received. +// Deprecated: use signal.NotifyContext func MakeShutdownCh() <-chan struct{} { resultCh := make(chan struct{}) signalCh := make(chan os.Signal, 4) From 26ef0df458578597b6c73c19cac391e92fde8ed9 Mon Sep 17 00:00:00 2001 From: Daniel Nephin Date: Wed, 18 Aug 2021 12:17:20 -0400 Subject: [PATCH 5/6] docs: update CLI reference docs for debug the cluster target was renamed to members. --- website/content/commands/debug.mdx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/website/content/commands/debug.mdx b/website/content/commands/debug.mdx index dc4282d1b9..ded40e58e6 100644 --- a/website/content/commands/debug.mdx +++ b/website/content/commands/debug.mdx @@ -8,13 +8,13 @@ page_title: 'Commands: Debug' Command: `consul debug` The `consul debug` command monitors a Consul agent for the specified period of -time, recording information about the agent, cluster, and environment to an archive +time, recording information about the agent, cluster membership, and environment to an archive written to the current directory. Providing support for complex issues encountered by Consul operators often requires a large amount of debugging information to be retrieved. This command aims to shortcut that coordination and provide a simple workflow for accessing -data about Consul agent, cluster, and environment to enable faster +data about Consul agent, cluster membership, and environment to enable faster isolation and debugging of issues. This command requires an `operator:read` ACL token in order to retrieve the @@ -75,7 +75,7 @@ information when `debug` is running. By default, it captures all information. | --------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `agent` | Version and configuration information about the agent. | | `host` | Information about resources on the host running the target agent such as CPU, memory, and disk. | -| `cluster` | A list of all the WAN and LAN members in the cluster. | +| `members` | A list of all the WAN and LAN members in the cluster. | | `metrics` | Metrics from the in-memory metrics endpoint in the target, captured at the interval. | | `logs` | `DEBUG` level logs for the target agent, captured for the duration. | | `pprof` | Golang heap, CPU, goroutine, and trace profiling. CPU and traces are captured for `duration` in a single file while heap and goroutine are separate snapshots for each `interval`. This information is not retrieved unless [`enable_debug`](/docs/agent/options#enable_debug) is set to `true` on the target agent or ACLs are enable and an ACL token with `operator:read` is provided. | From 049c4e9623e84e7fbcfca54cebeeb5c814c212c5 Mon Sep 17 00:00:00 2001 From: Daniel Nephin Date: Wed, 18 Aug 2021 12:54:11 -0400 Subject: [PATCH 6/6] add changelog --- .changelog/10804.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .changelog/10804.txt diff --git a/.changelog/10804.txt b/.changelog/10804.txt new file mode 100644 index 0000000000..ed4323fd7d --- /dev/null +++ b/.changelog/10804.txt @@ -0,0 +1,3 @@ +```release-note:improvement +debug: rename cluster capture target to members, to be more consistent with the terms used by the API. +```