Browse Source

Fixes #3891: agent monitor no longer unresponsive before logs stream.

The root cause is actually that the agent's streaming HTTP API didn't flush until the first log line was found which commonly was pretty soon since the default level is INFO. In cases where there were no logs immediately due to level for instance, the client gets stuck in the HTTP code waiting on a response packet from the server before we enter the loop that checks the shutdown channel from the signal handler.

This fix flushes the initial status immediately on the streaming endpoint which lets the client code get into it's expected state where it's listening for shutdown or log lines.
pull/3900/head
Paul Banks 7 years ago
parent
commit
de58eb1820
No known key found for this signature in database
GPG Key ID: C25A851A849B8221
  1. 4
      agent/agent_endpoint.go
  2. 72
      command/monitor/monitor_test.go

4
agent/agent_endpoint.go

@ -774,6 +774,10 @@ func (s *HTTPServer) AgentMonitor(resp http.ResponseWriter, req *http.Request) (
defer s.agent.LogWriter.DeregisterHandler(handler)
notify := resp.(http.CloseNotifier).CloseNotify()
// Send header so client can start streaming body
resp.WriteHeader(http.StatusOK)
flusher.Flush()
// Stream logs until the connection is closed.
for {
select {

72
command/monitor/monitor_test.go

@ -0,0 +1,72 @@
package monitor
import (
"io"
"os"
"sync"
"testing"
"time"
"github.com/hashicorp/consul/agent"
"github.com/hashicorp/consul/logger"
"github.com/mitchellh/cli"
)
func TestMonitorCommand_exitssOnSignalBeforeLinesArrive(t *testing.T) {
t.Parallel()
logWriter := logger.NewLogWriter(512)
a := &agent.TestAgent{
Name: t.Name(),
LogWriter: logWriter,
LogOutput: io.MultiWriter(os.Stderr, logWriter),
}
a.Start()
defer a.Shutdown()
shutdownCh := make(chan struct{})
ui := cli.NewMockUi()
c := New(ui, shutdownCh)
// Only wait for ERR which shouldn't happen so should leave logs empty for a
// while
args := []string{"-http-addr=" + a.HTTPAddr(), "-log-level=ERR"}
// Buffer it so we don't deadlock when blocking send on shutdownCh triggers
// Run to return before we can select on it.
exitCode := make(chan int, 1)
// Run the monitor in another go routine. If this doesn't exit on our "signal"
// then the whole test will hang and we'll panic (to not blow up if people run
// the suite without -timeout)
var wg sync.WaitGroup
wg.Add(1)
go func() {
wg.Done() // Signal that this goroutine is at least running now
exitCode <- c.Run(args)
}()
// Wait for that routine to at least be running
wg.Wait()
// Simulate signal in a few milliseconds without blocking this thread
go func() {
time.Sleep(5 * time.Millisecond)
shutdownCh <- struct{}{}
}()
// Wait for a second - shouldn't take long to handle the signal before we
// panic. We simulate inside the select since the other goroutine might
// already have exited if there was some error and we'd block forever trying
// to write to unbuffered shutdownCh. We don't just buffer it because then it
// doesn't model the real shutdownCh we use for signal watching and could mask
// bugs in the handling.
select {
case ret := <-exitCode:
if ret != 0 {
t.Fatal("command returned with non-zero code")
}
// OK!
case <-time.After(1 * time.Second):
t.Fatal("timed out waiting for exit")
}
}
Loading…
Cancel
Save