mirror of https://github.com/hashicorp/consul
Adding basic CheckMonitor to invoke scripts
parent
fe27fa191f
commit
a4201255df
|
@ -0,0 +1,121 @@
|
|||
package agent
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"github.com/hashicorp/consul/consul/structs"
|
||||
"log"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
)
|
||||
|
||||
// CheckNotifier interface is used by the CheckMonitor
|
||||
// to notify when a check has a status update. The update
|
||||
// should take care to be idempotent.
|
||||
type CheckNotifier interface {
|
||||
UpdateCheck(checkID, status string)
|
||||
}
|
||||
|
||||
// CheckMonitor is used to periodically invoke a script to
|
||||
// determine the health of a given check. It is compatible with
|
||||
// nagios plugins and expects the output in the same format.
|
||||
type CheckMonitor struct {
|
||||
Notify CheckNotifier
|
||||
CheckID string
|
||||
Script string
|
||||
Interval time.Duration
|
||||
Logger *log.Logger
|
||||
|
||||
stop bool
|
||||
stopCh chan struct{}
|
||||
stopLock sync.Mutex
|
||||
}
|
||||
|
||||
// Start is used to start a check monitor.
|
||||
// Monitor runs until stop is called
|
||||
func (c *CheckMonitor) Start() {
|
||||
c.stopLock.Lock()
|
||||
defer c.stopLock.Unlock()
|
||||
c.stop = false
|
||||
c.stopCh = make(chan struct{})
|
||||
go c.run()
|
||||
}
|
||||
|
||||
// Stop is used to stop a check monitor.
|
||||
func (c *CheckMonitor) Stop() {
|
||||
c.stopLock.Lock()
|
||||
defer c.stopLock.Unlock()
|
||||
if !c.stop {
|
||||
c.stop = true
|
||||
close(c.stopCh)
|
||||
}
|
||||
}
|
||||
|
||||
// run is invoked by a goroutine to run until Stop() is called
|
||||
func (c *CheckMonitor) run() {
|
||||
select {
|
||||
case <-time.After(c.Interval):
|
||||
c.check()
|
||||
case <-c.stopCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// check is invoked periodically to perform the script check
|
||||
func (c *CheckMonitor) check() {
|
||||
// Determine the shell invocation based on OS
|
||||
var shell, flag string
|
||||
if runtime.GOOS == "windows" {
|
||||
shell = "cmd"
|
||||
flag = "/C"
|
||||
} else {
|
||||
shell = "/bin/sh"
|
||||
flag = "-c"
|
||||
}
|
||||
|
||||
// Create the command
|
||||
cmd := exec.Command(shell, flag, c.Script)
|
||||
|
||||
// Collect the output
|
||||
var output bytes.Buffer
|
||||
cmd.Stdout = &output
|
||||
cmd.Stderr = &output
|
||||
|
||||
// Start the check
|
||||
if err := cmd.Start(); err != nil {
|
||||
c.Logger.Printf("[ERR] agent: failed to invoke '%s': %s", c.Script, err)
|
||||
c.Notify.UpdateCheck(c.CheckID, structs.HealthUnknown)
|
||||
return
|
||||
}
|
||||
|
||||
// Wait for the check to complete
|
||||
err := cmd.Wait()
|
||||
c.Logger.Printf("[DEBUG] agent: check '%s' script '%s' output: %s",
|
||||
c.CheckID, c.Script, output.Bytes())
|
||||
|
||||
// Check if the check passed
|
||||
if err == nil {
|
||||
c.Logger.Printf("[DEBUG] Check '%v' is passing", c.CheckID)
|
||||
c.Notify.UpdateCheck(c.CheckID, structs.HealthPassing)
|
||||
return
|
||||
}
|
||||
|
||||
// If the exit code is 1, set check as warning
|
||||
exitErr, ok := err.(*exec.ExitError)
|
||||
if ok {
|
||||
if status, ok := exitErr.Sys().(syscall.WaitStatus); ok {
|
||||
code := status.ExitStatus()
|
||||
if code == 1 {
|
||||
c.Logger.Printf("[WARN] Check '%v' is now warning", c.CheckID)
|
||||
c.Notify.UpdateCheck(c.CheckID, structs.HealthWarning)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Set the health as critical
|
||||
c.Logger.Printf("[WARN] Check '%v' is now critical", c.CheckID)
|
||||
c.Notify.UpdateCheck(c.CheckID, structs.HealthCritical)
|
||||
}
|
Loading…
Reference in New Issue