|
|
|
@ -1,6 +1,7 @@
|
|
|
|
|
package agent |
|
|
|
|
|
|
|
|
|
import ( |
|
|
|
|
"encoding/json" |
|
|
|
|
"fmt" |
|
|
|
|
"io" |
|
|
|
|
"log" |
|
|
|
@ -15,6 +16,14 @@ import (
|
|
|
|
|
"github.com/hashicorp/serf/serf" |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
const ( |
|
|
|
|
// Path to save agent service definitions
|
|
|
|
|
servicesDir = "services" |
|
|
|
|
|
|
|
|
|
// Path to save local agent checks
|
|
|
|
|
checksDir = "checks" |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
/* |
|
|
|
|
The agent is the long running process that is run on every machine. |
|
|
|
|
It exposes an RPC interface that is used by the CLI to control the |
|
|
|
@ -132,6 +141,14 @@ func Create(config *Config, logOutput io.Writer) (*Agent, error) {
|
|
|
|
|
return nil, err |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Load any persisted services and services
|
|
|
|
|
if err := agent.restoreServices(); err != nil { |
|
|
|
|
return nil, err |
|
|
|
|
} |
|
|
|
|
if err := agent.restoreChecks(); err != nil { |
|
|
|
|
return nil, err |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Start handling events
|
|
|
|
|
go agent.handleEvents() |
|
|
|
|
|
|
|
|
@ -472,6 +489,144 @@ func (a *Agent) ResumeSync() {
|
|
|
|
|
a.state.Resume() |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// persistService saves a service definition to a JSON file in the data dir
|
|
|
|
|
func (a *Agent) persistService(service *structs.NodeService) error { |
|
|
|
|
svcPath := filepath.Join(a.config.DataDir, servicesDir, service.ID) |
|
|
|
|
if _, err := os.Stat(svcPath); os.IsNotExist(err) { |
|
|
|
|
encoded, err := json.Marshal(service) |
|
|
|
|
if err != nil { |
|
|
|
|
return nil |
|
|
|
|
} |
|
|
|
|
if err := os.MkdirAll(filepath.Dir(svcPath), 0700); err != nil { |
|
|
|
|
return err |
|
|
|
|
} |
|
|
|
|
fh, err := os.OpenFile(svcPath, os.O_CREATE|os.O_WRONLY, 0600) |
|
|
|
|
if err != nil { |
|
|
|
|
return err |
|
|
|
|
} |
|
|
|
|
defer fh.Close() |
|
|
|
|
if _, err := fh.Write(encoded); err != nil { |
|
|
|
|
return err |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
return nil |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// purgeService removes a persisted service definition file from the data dir
|
|
|
|
|
func (a *Agent) purgeService(serviceID string) error { |
|
|
|
|
svcPath := filepath.Join(a.config.DataDir, servicesDir, serviceID) |
|
|
|
|
if _, err := os.Stat(svcPath); err == nil { |
|
|
|
|
return os.Remove(svcPath) |
|
|
|
|
} |
|
|
|
|
return nil |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// restoreServices is used to load previously persisted service definitions
|
|
|
|
|
// into the agent during startup.
|
|
|
|
|
func (a *Agent) restoreServices() error { |
|
|
|
|
svcDir := filepath.Join(a.config.DataDir, servicesDir) |
|
|
|
|
if _, err := os.Stat(svcDir); os.IsNotExist(err) { |
|
|
|
|
return nil |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
err := filepath.Walk(svcDir, func(path string, fi os.FileInfo, err error) error { |
|
|
|
|
if err != nil { |
|
|
|
|
return err |
|
|
|
|
} |
|
|
|
|
if fi.Name() == servicesDir { |
|
|
|
|
return nil |
|
|
|
|
} |
|
|
|
|
fh, err := os.Open(filepath.Join(svcDir, fi.Name())) |
|
|
|
|
if err != nil { |
|
|
|
|
return err |
|
|
|
|
} |
|
|
|
|
content := make([]byte, fi.Size()) |
|
|
|
|
if _, err := fh.Read(content); err != nil { |
|
|
|
|
return err |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
var svc *structs.NodeService |
|
|
|
|
if err := json.Unmarshal(content, &svc); err != nil { |
|
|
|
|
return err |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
a.logger.Printf("[DEBUG] Restored service definition: %s", svc.ID) |
|
|
|
|
return a.AddService(svc, nil) |
|
|
|
|
}) |
|
|
|
|
return err |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// persistCheck saves a check definition to the local agent's state directory
|
|
|
|
|
func (a *Agent) persistCheck(check *structs.HealthCheck) error { |
|
|
|
|
checkPath := filepath.Join(a.config.DataDir, checksDir, check.CheckID) |
|
|
|
|
if _, err := os.Stat(checkPath); os.IsNotExist(err) { |
|
|
|
|
encoded, err := json.Marshal(check) |
|
|
|
|
if err != nil { |
|
|
|
|
return nil |
|
|
|
|
} |
|
|
|
|
if err := os.MkdirAll(filepath.Dir(checkPath), 0700); err != nil { |
|
|
|
|
return err |
|
|
|
|
} |
|
|
|
|
fh, err := os.OpenFile(checkPath, os.O_CREATE|os.O_WRONLY, 0600) |
|
|
|
|
if err != nil { |
|
|
|
|
return err |
|
|
|
|
} |
|
|
|
|
defer fh.Close() |
|
|
|
|
if _, err := fh.Write(encoded); err != nil { |
|
|
|
|
return err |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
return nil |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// purgeCheck removes a persisted check definition file from the data dir
|
|
|
|
|
func (a *Agent) purgeCheck(checkID string) error { |
|
|
|
|
checkPath := filepath.Join(a.config.DataDir, checksDir, checkID) |
|
|
|
|
if _, err := os.Stat(checkPath); err == nil { |
|
|
|
|
return os.Remove(checkPath) |
|
|
|
|
} |
|
|
|
|
return nil |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// restoreChecks is used to load previously persisted health check definitions
|
|
|
|
|
// into the agent during startup.
|
|
|
|
|
func (a *Agent) restoreChecks() error { |
|
|
|
|
checkDir := filepath.Join(a.config.DataDir, checksDir) |
|
|
|
|
if _, err := os.Stat(checkDir); os.IsNotExist(err) { |
|
|
|
|
return nil |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
err := filepath.Walk(checkDir, func(path string, fi os.FileInfo, err error) error { |
|
|
|
|
if err != nil { |
|
|
|
|
return err |
|
|
|
|
} |
|
|
|
|
if fi.Name() == checksDir { |
|
|
|
|
return nil |
|
|
|
|
} |
|
|
|
|
fh, err := os.Open(filepath.Join(checkDir, fi.Name())) |
|
|
|
|
if err != nil { |
|
|
|
|
return err |
|
|
|
|
} |
|
|
|
|
content := make([]byte, fi.Size()) |
|
|
|
|
if _, err := fh.Read(content); err != nil { |
|
|
|
|
return err |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
var check *structs.HealthCheck |
|
|
|
|
if err := json.Unmarshal(content, &check); err != nil { |
|
|
|
|
return err |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Default check to critical to avoid placing potentially unhealthy
|
|
|
|
|
// services into the active pool
|
|
|
|
|
check.Status = structs.HealthCritical |
|
|
|
|
|
|
|
|
|
a.logger.Printf("[DEBUG] Restored health check: %s", check.CheckID) |
|
|
|
|
return a.AddCheck(check, nil) |
|
|
|
|
}) |
|
|
|
|
return err |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// AddService is used to add a service entry.
|
|
|
|
|
// This entry is persistent and the agent will make a best effort to
|
|
|
|
|
// ensure it is registered
|
|
|
|
@ -489,6 +644,11 @@ func (a *Agent) AddService(service *structs.NodeService, chkType *CheckType) err
|
|
|
|
|
// Add the service
|
|
|
|
|
a.state.AddService(service) |
|
|
|
|
|
|
|
|
|
// Persist the service to a file
|
|
|
|
|
if err := a.persistService(service); err != nil { |
|
|
|
|
return err |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Create an associated health check
|
|
|
|
|
if chkType != nil { |
|
|
|
|
check := &structs.HealthCheck{ |
|
|
|
@ -520,6 +680,11 @@ func (a *Agent) RemoveService(serviceID string) error {
|
|
|
|
|
// Remove service immeidately
|
|
|
|
|
a.state.RemoveService(serviceID) |
|
|
|
|
|
|
|
|
|
// Remove the service from the data dir
|
|
|
|
|
if err := a.purgeService(serviceID); err != nil { |
|
|
|
|
return err |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Deregister any associated health checks
|
|
|
|
|
checkID := fmt.Sprintf("service:%s", serviceID) |
|
|
|
|
return a.RemoveCheck(checkID) |
|
|
|
@ -580,7 +745,9 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *CheckType) error {
|
|
|
|
|
|
|
|
|
|
// Add to the local state for anti-entropy
|
|
|
|
|
a.state.AddCheck(check) |
|
|
|
|
return nil |
|
|
|
|
|
|
|
|
|
// Persist the check
|
|
|
|
|
return a.persistCheck(check) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// RemoveCheck is used to remove a health check.
|
|
|
|
@ -601,7 +768,7 @@ func (a *Agent) RemoveCheck(checkID string) error {
|
|
|
|
|
check.Stop() |
|
|
|
|
delete(a.checkTTLs, checkID) |
|
|
|
|
} |
|
|
|
|
return nil |
|
|
|
|
return a.purgeCheck(checkID) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// UpdateCheck is used to update the status of a check.
|
|
|
|
|