From a4201255dfceeb3a040eec2ae08aac0f2f581af3 Mon Sep 17 00:00:00 2001 From: Armon Dadgar Date: Mon, 20 Jan 2014 16:44:23 -1000 Subject: [PATCH] Adding basic CheckMonitor to invoke scripts --- command/agent/check.go | 121 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 command/agent/check.go diff --git a/command/agent/check.go b/command/agent/check.go new file mode 100644 index 0000000000..ba6271dcab --- /dev/null +++ b/command/agent/check.go @@ -0,0 +1,121 @@ +package agent + +import ( + "bytes" + "github.com/hashicorp/consul/consul/structs" + "log" + "os/exec" + "runtime" + "sync" + "syscall" + "time" +) + +// CheckNotifier interface is used by the CheckMonitor +// to notify when a check has a status update. The update +// should take care to be idempotent. +type CheckNotifier interface { + UpdateCheck(checkID, status string) +} + +// CheckMonitor is used to periodically invoke a script to +// determine the health of a given check. It is compatible with +// nagios plugins and expects the output in the same format. +type CheckMonitor struct { + Notify CheckNotifier + CheckID string + Script string + Interval time.Duration + Logger *log.Logger + + stop bool + stopCh chan struct{} + stopLock sync.Mutex +} + +// Start is used to start a check monitor. +// Monitor runs until stop is called +func (c *CheckMonitor) Start() { + c.stopLock.Lock() + defer c.stopLock.Unlock() + c.stop = false + c.stopCh = make(chan struct{}) + go c.run() +} + +// Stop is used to stop a check monitor. +func (c *CheckMonitor) Stop() { + c.stopLock.Lock() + defer c.stopLock.Unlock() + if !c.stop { + c.stop = true + close(c.stopCh) + } +} + +// run is invoked by a goroutine to run until Stop() is called +func (c *CheckMonitor) run() { + select { + case <-time.After(c.Interval): + c.check() + case <-c.stopCh: + return + } +} + +// check is invoked periodically to perform the script check +func (c *CheckMonitor) check() { + // Determine the shell invocation based on OS + var shell, flag string + if runtime.GOOS == "windows" { + shell = "cmd" + flag = "/C" + } else { + shell = "/bin/sh" + flag = "-c" + } + + // Create the command + cmd := exec.Command(shell, flag, c.Script) + + // Collect the output + var output bytes.Buffer + cmd.Stdout = &output + cmd.Stderr = &output + + // Start the check + if err := cmd.Start(); err != nil { + c.Logger.Printf("[ERR] agent: failed to invoke '%s': %s", c.Script, err) + c.Notify.UpdateCheck(c.CheckID, structs.HealthUnknown) + return + } + + // Wait for the check to complete + err := cmd.Wait() + c.Logger.Printf("[DEBUG] agent: check '%s' script '%s' output: %s", + c.CheckID, c.Script, output.Bytes()) + + // Check if the check passed + if err == nil { + c.Logger.Printf("[DEBUG] Check '%v' is passing", c.CheckID) + c.Notify.UpdateCheck(c.CheckID, structs.HealthPassing) + return + } + + // If the exit code is 1, set check as warning + exitErr, ok := err.(*exec.ExitError) + if ok { + if status, ok := exitErr.Sys().(syscall.WaitStatus); ok { + code := status.ExitStatus() + if code == 1 { + c.Logger.Printf("[WARN] Check '%v' is now warning", c.CheckID) + c.Notify.UpdateCheck(c.CheckID, structs.HealthWarning) + return + } + } + } + + // Set the health as critical + c.Logger.Printf("[WARN] Check '%v' is now critical", c.CheckID) + c.Notify.UpdateCheck(c.CheckID, structs.HealthCritical) +}