|
|
|
@ -12,6 +12,7 @@ import (
|
|
|
|
|
"regexp" |
|
|
|
|
"strconv" |
|
|
|
|
"sync" |
|
|
|
|
"time" |
|
|
|
|
|
|
|
|
|
"github.com/hashicorp/consul/consul" |
|
|
|
|
"github.com/hashicorp/consul/consul/structs" |
|
|
|
@ -23,7 +24,8 @@ const (
|
|
|
|
|
servicesDir = "services" |
|
|
|
|
|
|
|
|
|
// Path to save local agent checks
|
|
|
|
|
checksDir = "checks" |
|
|
|
|
checksDir = "checks" |
|
|
|
|
checkStateDir = "checks/state" |
|
|
|
|
|
|
|
|
|
// The ID of the faux health checks for maintenance mode
|
|
|
|
|
serviceMaintCheckPrefix = "_service_maintenance" |
|
|
|
@ -757,6 +759,13 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *CheckType, persist
|
|
|
|
|
TTL: chkType.TTL, |
|
|
|
|
Logger: a.logger, |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Restore persisted state, if any
|
|
|
|
|
if err := a.recallCheckState(check); err != nil { |
|
|
|
|
a.logger.Printf("[WARN] agent: failed restoring state for check %q: %s", |
|
|
|
|
check.CheckID, err) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
ttl.Start() |
|
|
|
|
a.checkTTLs[check.CheckID] = ttl |
|
|
|
|
|
|
|
|
@ -861,6 +870,75 @@ func (a *Agent) UpdateCheck(checkID, status, output string) error {
|
|
|
|
|
|
|
|
|
|
// Set the status through CheckTTL to reset the TTL
|
|
|
|
|
check.SetStatus(status, output) |
|
|
|
|
|
|
|
|
|
// Always persist the state for TTL checks
|
|
|
|
|
if err := a.persistCheckState(check, status, output); err != nil { |
|
|
|
|
return fmt.Errorf("failed persisting state for check %q: %s", checkID, err) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return nil |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// persistCheckState is used to record the check status into the data dir.
|
|
|
|
|
// This allows the state to be restored on a later agent start. Currently
|
|
|
|
|
// only useful for TTL based checks.
|
|
|
|
|
func (a *Agent) persistCheckState(check *CheckTTL, status, output string) error { |
|
|
|
|
// Create the persisted state
|
|
|
|
|
state := persistedCheckState{ |
|
|
|
|
CheckID: check.CheckID, |
|
|
|
|
Status: status, |
|
|
|
|
Output: output, |
|
|
|
|
Expires: time.Now().Add(check.TTL).Unix(), |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Encode the state
|
|
|
|
|
buf, err := json.Marshal(state) |
|
|
|
|
if err != nil { |
|
|
|
|
return err |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Create the state dir if it doesn't exist
|
|
|
|
|
dir := filepath.Join(a.config.DataDir, checkStateDir) |
|
|
|
|
if err := os.MkdirAll(dir, 0700); err != nil { |
|
|
|
|
return fmt.Errorf("failed creating check state dir %q: %s", dir, err) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Write the state to the file
|
|
|
|
|
file := filepath.Join(dir, stringHash(check.CheckID)) |
|
|
|
|
if err := ioutil.WriteFile(file, buf, 0600); err != nil { |
|
|
|
|
return fmt.Errorf("failed writing file %q: %s", file, err) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return nil |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// recallCheckState is used to restore the persisted state of a check.
|
|
|
|
|
func (a *Agent) recallCheckState(check *structs.HealthCheck) error { |
|
|
|
|
// Try to read the persisted state for this check
|
|
|
|
|
file := filepath.Join(a.config.DataDir, checkStateDir, stringHash(check.CheckID)) |
|
|
|
|
buf, err := ioutil.ReadFile(file) |
|
|
|
|
if err != nil { |
|
|
|
|
if os.IsNotExist(err) { |
|
|
|
|
return nil |
|
|
|
|
} |
|
|
|
|
return fmt.Errorf("failed reading file %q: %s", file, err) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Decode the state data
|
|
|
|
|
var p persistedCheckState |
|
|
|
|
if err := json.Unmarshal(buf, &p); err != nil { |
|
|
|
|
return fmt.Errorf("failed decoding check state: %s", err) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Check if the state has expired
|
|
|
|
|
if time.Now().Unix() > p.Expires { |
|
|
|
|
a.logger.Printf("[DEBUG] agent: check state expired for %q, not restoring", check.CheckID) |
|
|
|
|
return nil |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Restore the fields from the state
|
|
|
|
|
check.Output = p.Output |
|
|
|
|
check.Status = p.Status |
|
|
|
|
return nil |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|