mirror of https://github.com/hashicorp/consul
Merge pull request #606 from hashicorp/f-maint
Support node and service maintenance modepull/614/head
commit
60bb23c19a
|
@ -29,6 +29,10 @@ const (
|
|||
"If Consul was not shut down properly, the socket file may " +
|
||||
"be left behind. If the path looks correct, remove the file " +
|
||||
"and try again."
|
||||
|
||||
// The ID of the faux health checks for maintenance mode
|
||||
serviceMaintCheckPrefix = "_service_maintenance"
|
||||
nodeMaintCheckID = "_node_maintenenace"
|
||||
)
|
||||
|
||||
/*
|
||||
|
@ -995,3 +999,86 @@ func (a *Agent) unloadChecks() error {
|
|||
|
||||
return nil
|
||||
}
|
||||
|
||||
// serviceMaintCheckID returns the ID of a given service's maintenance check
|
||||
func serviceMaintCheckID(serviceID string) string {
|
||||
return fmt.Sprintf("%s:%s", serviceMaintCheckPrefix, serviceID)
|
||||
}
|
||||
|
||||
// EnableServiceMaintenance will register a false health check against the given
|
||||
// service ID with critical status. This will exclude the service from queries.
|
||||
func (a *Agent) EnableServiceMaintenance(serviceID string) error {
|
||||
service, ok := a.state.Services()[serviceID]
|
||||
if !ok {
|
||||
return fmt.Errorf("No service registered with ID %q", serviceID)
|
||||
}
|
||||
|
||||
// Check if maintenance mode is not already enabled
|
||||
checkID := serviceMaintCheckID(serviceID)
|
||||
if _, ok := a.state.Checks()[checkID]; ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Create and register the critical health check
|
||||
check := &structs.HealthCheck{
|
||||
Node: a.config.NodeName,
|
||||
CheckID: checkID,
|
||||
Name: "Service Maintenance Mode",
|
||||
Notes: "Maintenance mode is enabled for this service",
|
||||
ServiceID: service.ID,
|
||||
ServiceName: service.Service,
|
||||
Status: structs.HealthCritical,
|
||||
}
|
||||
a.AddCheck(check, nil, true)
|
||||
a.logger.Printf("[INFO] agent: service %q entered maintenance mode", serviceID)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// DisableServiceMaintenance will deregister the fake maintenance mode check
|
||||
// if the service has been marked as in maintenance.
|
||||
func (a *Agent) DisableServiceMaintenance(serviceID string) error {
|
||||
if _, ok := a.state.Services()[serviceID]; !ok {
|
||||
return fmt.Errorf("No service registered with ID %q", serviceID)
|
||||
}
|
||||
|
||||
// Check if maintenance mode is enabled
|
||||
checkID := serviceMaintCheckID(serviceID)
|
||||
if _, ok := a.state.Checks()[checkID]; !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Deregister the maintenance check
|
||||
a.RemoveCheck(checkID, true)
|
||||
a.logger.Printf("[INFO] agent: service %q left maintenance mode", serviceID)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// EnableNodeMaintenance places a node into maintenance mode.
|
||||
func (a *Agent) EnableNodeMaintenance() {
|
||||
// Ensure node maintenance is not already enabled
|
||||
if _, ok := a.state.Checks()[nodeMaintCheckID]; ok {
|
||||
return
|
||||
}
|
||||
|
||||
// Create and register the node maintenance check
|
||||
check := &structs.HealthCheck{
|
||||
Node: a.config.NodeName,
|
||||
CheckID: nodeMaintCheckID,
|
||||
Name: "Node Maintenance Mode",
|
||||
Notes: "Maintenance mode is enabled for this node",
|
||||
Status: structs.HealthCritical,
|
||||
}
|
||||
a.AddCheck(check, nil, true)
|
||||
a.logger.Printf("[INFO] agent: node entered maintenance mode")
|
||||
}
|
||||
|
||||
// DisableNodeMaintenance removes a node from maintenance mode
|
||||
func (a *Agent) DisableNodeMaintenance() {
|
||||
if _, ok := a.state.Checks()[nodeMaintCheckID]; !ok {
|
||||
return
|
||||
}
|
||||
a.RemoveCheck(nodeMaintCheckID, true)
|
||||
a.logger.Printf("[INFO] agent: node left maintenance mode")
|
||||
}
|
||||
|
|
|
@ -176,3 +176,90 @@ func (s *HTTPServer) AgentDeregisterService(resp http.ResponseWriter, req *http.
|
|||
serviceID := strings.TrimPrefix(req.URL.Path, "/v1/agent/service/deregister/")
|
||||
return nil, s.agent.RemoveService(serviceID, true)
|
||||
}
|
||||
|
||||
func (s *HTTPServer) AgentServiceMaintenance(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
||||
// Only PUT supported
|
||||
if req.Method != "PUT" {
|
||||
resp.WriteHeader(405)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Ensure we have a service ID
|
||||
serviceID := strings.TrimPrefix(req.URL.Path, "/v1/agent/service/maintenance/")
|
||||
if serviceID == "" {
|
||||
resp.WriteHeader(400)
|
||||
resp.Write([]byte("Missing service ID"))
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Ensure we have some action
|
||||
params := req.URL.Query()
|
||||
if _, ok := params["enable"]; !ok {
|
||||
resp.WriteHeader(400)
|
||||
resp.Write([]byte("Missing value for enable"))
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var enable bool
|
||||
raw := params.Get("enable")
|
||||
switch raw {
|
||||
case "true":
|
||||
enable = true
|
||||
case "false":
|
||||
enable = false
|
||||
default:
|
||||
resp.WriteHeader(400)
|
||||
resp.Write([]byte(fmt.Sprintf("Invalid value for enable: %q", raw)))
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var err error
|
||||
if enable {
|
||||
if err = s.agent.EnableServiceMaintenance(serviceID); err != nil {
|
||||
resp.WriteHeader(404)
|
||||
resp.Write([]byte(err.Error()))
|
||||
}
|
||||
} else {
|
||||
if err = s.agent.DisableServiceMaintenance(serviceID); err != nil {
|
||||
resp.WriteHeader(404)
|
||||
resp.Write([]byte(err.Error()))
|
||||
}
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
func (s *HTTPServer) AgentNodeMaintenance(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
|
||||
// Only PUT supported
|
||||
if req.Method != "PUT" {
|
||||
resp.WriteHeader(405)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Ensure we have some action
|
||||
params := req.URL.Query()
|
||||
if _, ok := params["enable"]; !ok {
|
||||
resp.WriteHeader(400)
|
||||
resp.Write([]byte("Missing value for enable"))
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var enable bool
|
||||
raw := params.Get("enable")
|
||||
switch raw {
|
||||
case "true":
|
||||
enable = true
|
||||
case "false":
|
||||
enable = false
|
||||
default:
|
||||
resp.WriteHeader(400)
|
||||
resp.Write([]byte(fmt.Sprintf("Invalid value for enable: %q", raw)))
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if enable {
|
||||
s.agent.EnableNodeMaintenance()
|
||||
} else {
|
||||
s.agent.DisableNodeMaintenance()
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
"github.com/hashicorp/consul/testutil"
|
||||
"github.com/hashicorp/serf/serf"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
@ -492,3 +493,193 @@ func TestHTTPAgentDeregisterService(t *testing.T) {
|
|||
t.Fatalf("have test check")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHTTPAgent_ServiceMaintenanceEndpoint_BadRequest(t *testing.T) {
|
||||
dir, srv := makeHTTPServer(t)
|
||||
defer os.RemoveAll(dir)
|
||||
defer srv.Shutdown()
|
||||
defer srv.agent.Shutdown()
|
||||
|
||||
// Fails on non-PUT
|
||||
req, _ := http.NewRequest("GET", "/v1/agent/service/maintenance/test?enable=true", nil)
|
||||
resp := httptest.NewRecorder()
|
||||
if _, err := srv.AgentServiceMaintenance(resp, req); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if resp.Code != 405 {
|
||||
t.Fatalf("expected 405, got %d", resp.Code)
|
||||
}
|
||||
|
||||
// Fails when no enable flag provided
|
||||
req, _ = http.NewRequest("PUT", "/v1/agent/service/maintenance/test", nil)
|
||||
resp = httptest.NewRecorder()
|
||||
if _, err := srv.AgentServiceMaintenance(resp, req); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if resp.Code != 400 {
|
||||
t.Fatalf("expected 400, got %d", resp.Code)
|
||||
}
|
||||
|
||||
// Fails when no service ID provided
|
||||
req, _ = http.NewRequest("PUT", "/v1/agent/service/maintenance/?enable=true", nil)
|
||||
resp = httptest.NewRecorder()
|
||||
if _, err := srv.AgentServiceMaintenance(resp, req); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if resp.Code != 400 {
|
||||
t.Fatalf("expected 400, got %d", resp.Code)
|
||||
}
|
||||
|
||||
// Fails when bad service ID provided
|
||||
req, _ = http.NewRequest("PUT", "/v1/agent/service/maintenance/_nope_?enable=true", nil)
|
||||
resp = httptest.NewRecorder()
|
||||
if _, err := srv.AgentServiceMaintenance(resp, req); err == nil {
|
||||
t.Fatalf("should have errored")
|
||||
}
|
||||
if resp.Code != 404 {
|
||||
t.Fatalf("expected 404, got %d", resp.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHTTPAgent_EnableServiceMaintenance(t *testing.T) {
|
||||
dir, srv := makeHTTPServer(t)
|
||||
defer os.RemoveAll(dir)
|
||||
defer srv.Shutdown()
|
||||
defer srv.agent.Shutdown()
|
||||
|
||||
// Register the service
|
||||
service := &structs.NodeService{
|
||||
ID: "test",
|
||||
Service: "test",
|
||||
}
|
||||
if err := srv.agent.AddService(service, nil, false); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Force the service into maintenance mode
|
||||
req, _ := http.NewRequest("PUT", "/v1/agent/service/maintenance/test?enable=true", nil)
|
||||
resp := httptest.NewRecorder()
|
||||
if _, err := srv.AgentServiceMaintenance(resp, req); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if resp.Code != 200 {
|
||||
t.Fatalf("expected 200, got %d", resp.Code)
|
||||
}
|
||||
|
||||
// Ensure the maintenance check was registered
|
||||
checkID := serviceMaintCheckID("test")
|
||||
if _, ok := srv.agent.state.Checks()[checkID]; !ok {
|
||||
t.Fatalf("should have registered maintenance check")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHTTPAgent_DisableServiceMaintenance(t *testing.T) {
|
||||
dir, srv := makeHTTPServer(t)
|
||||
defer os.RemoveAll(dir)
|
||||
defer srv.Shutdown()
|
||||
defer srv.agent.Shutdown()
|
||||
|
||||
// Register the service
|
||||
service := &structs.NodeService{
|
||||
ID: "test",
|
||||
Service: "test",
|
||||
}
|
||||
if err := srv.agent.AddService(service, nil, false); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Force the service into maintenance mode
|
||||
if err := srv.agent.EnableServiceMaintenance("test"); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
// Leave maintenance mode
|
||||
req, _ := http.NewRequest("PUT", "/v1/agent/service/maintenance/test?enable=false", nil)
|
||||
resp := httptest.NewRecorder()
|
||||
if _, err := srv.AgentServiceMaintenance(resp, req); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if resp.Code != 200 {
|
||||
t.Fatalf("expected 200, got %d", resp.Code)
|
||||
}
|
||||
|
||||
// Ensure the maintenance check was removed
|
||||
checkID := serviceMaintCheckID("test")
|
||||
if _, ok := srv.agent.state.Checks()[checkID]; ok {
|
||||
t.Fatalf("should have removed maintenance check")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHTTPAgent_NodeMaintenanceEndpoint_BadRequest(t *testing.T) {
|
||||
dir, srv := makeHTTPServer(t)
|
||||
defer os.RemoveAll(dir)
|
||||
defer srv.Shutdown()
|
||||
defer srv.agent.Shutdown()
|
||||
|
||||
// Fails on non-PUT
|
||||
req, _ := http.NewRequest("GET", "/v1/agent/self/maintenance?enable=true", nil)
|
||||
resp := httptest.NewRecorder()
|
||||
if _, err := srv.AgentNodeMaintenance(resp, req); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if resp.Code != 405 {
|
||||
t.Fatalf("expected 405, got %d", resp.Code)
|
||||
}
|
||||
|
||||
// Fails when no enable flag provided
|
||||
req, _ = http.NewRequest("PUT", "/v1/agent/self/maintenance", nil)
|
||||
resp = httptest.NewRecorder()
|
||||
if _, err := srv.AgentNodeMaintenance(resp, req); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if resp.Code != 400 {
|
||||
t.Fatalf("expected 400, got %d", resp.Code)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHTTPAgent_EnableNodeMaintenance(t *testing.T) {
|
||||
dir, srv := makeHTTPServer(t)
|
||||
defer os.RemoveAll(dir)
|
||||
defer srv.Shutdown()
|
||||
defer srv.agent.Shutdown()
|
||||
|
||||
// Force the node into maintenance mode
|
||||
req, _ := http.NewRequest("PUT", "/v1/agent/self/maintenance?enable=true", nil)
|
||||
resp := httptest.NewRecorder()
|
||||
if _, err := srv.AgentNodeMaintenance(resp, req); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if resp.Code != 200 {
|
||||
t.Fatalf("expected 200, got %d", resp.Code)
|
||||
}
|
||||
|
||||
// Ensure the maintenance check was registered
|
||||
if _, ok := srv.agent.state.Checks()[nodeMaintCheckID]; !ok {
|
||||
t.Fatalf("should have registered maintenance check")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHTTPAgent_DisableNodeMaintenance(t *testing.T) {
|
||||
dir, srv := makeHTTPServer(t)
|
||||
defer os.RemoveAll(dir)
|
||||
defer srv.Shutdown()
|
||||
defer srv.agent.Shutdown()
|
||||
|
||||
// Force the node into maintenance mode
|
||||
srv.agent.EnableNodeMaintenance()
|
||||
|
||||
// Leave maintenance mode
|
||||
req, _ := http.NewRequest("PUT", "/v1/agent/self/maintenance?enable=false", nil)
|
||||
resp := httptest.NewRecorder()
|
||||
if _, err := srv.AgentNodeMaintenance(resp, req); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
if resp.Code != 200 {
|
||||
t.Fatalf("expected 200, got %d", resp.Code)
|
||||
}
|
||||
|
||||
// Ensure the maintenance check was removed
|
||||
if _, ok := srv.agent.state.Checks()[nodeMaintCheckID]; ok {
|
||||
t.Fatalf("should have removed maintenance check")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -781,3 +781,66 @@ func TestAgent_unloadServices(t *testing.T) {
|
|||
t.Fatalf("consul service should not be removed")
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgent_ServiceMaintenanceMode(t *testing.T) {
|
||||
config := nextConfig()
|
||||
dir, agent := makeAgent(t, config)
|
||||
defer os.RemoveAll(dir)
|
||||
defer agent.Shutdown()
|
||||
|
||||
svc := &structs.NodeService{
|
||||
ID: "redis",
|
||||
Service: "redis",
|
||||
Tags: []string{"foo"},
|
||||
Port: 8000,
|
||||
}
|
||||
|
||||
// Register the service
|
||||
if err := agent.AddService(svc, nil, false); err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Enter maintenance mode for the service
|
||||
if err := agent.EnableServiceMaintenance("redis"); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
// Make sure the critical health check was added
|
||||
checkID := serviceMaintCheckID("redis")
|
||||
if _, ok := agent.state.Checks()[checkID]; !ok {
|
||||
t.Fatalf("should have registered critical maintenance check")
|
||||
}
|
||||
|
||||
// Leave maintenance mode
|
||||
if err := agent.DisableServiceMaintenance("redis"); err != nil {
|
||||
t.Fatalf("err: %s", err)
|
||||
}
|
||||
|
||||
// Ensure the check was deregistered
|
||||
if _, ok := agent.state.Checks()[checkID]; ok {
|
||||
t.Fatalf("should have deregistered maintenance check")
|
||||
}
|
||||
}
|
||||
|
||||
func TestAgent_NodeMaintenanceMode(t *testing.T) {
|
||||
config := nextConfig()
|
||||
dir, agent := makeAgent(t, config)
|
||||
defer os.RemoveAll(dir)
|
||||
defer agent.Shutdown()
|
||||
|
||||
// Enter maintenance mode for the node
|
||||
agent.EnableNodeMaintenance()
|
||||
|
||||
// Make sure the critical health check was added
|
||||
if _, ok := agent.state.Checks()[nodeMaintCheckID]; !ok {
|
||||
t.Fatalf("should have registered critical node check")
|
||||
}
|
||||
|
||||
// Leave maintenance mode
|
||||
agent.DisableNodeMaintenance()
|
||||
|
||||
// Ensure the check was deregistered
|
||||
if _, ok := agent.state.Checks()[nodeMaintCheckID]; ok {
|
||||
t.Fatalf("should have deregistered critical node check")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -181,6 +181,7 @@ func (s *HTTPServer) registerHandlers(enableDebug bool) {
|
|||
s.mux.HandleFunc("/v1/health/service/", s.wrap(s.HealthServiceNodes))
|
||||
|
||||
s.mux.HandleFunc("/v1/agent/self", s.wrap(s.AgentSelf))
|
||||
s.mux.HandleFunc("/v1/agent/self/maintenance", s.wrap(s.AgentNodeMaintenance))
|
||||
s.mux.HandleFunc("/v1/agent/services", s.wrap(s.AgentServices))
|
||||
s.mux.HandleFunc("/v1/agent/checks", s.wrap(s.AgentChecks))
|
||||
s.mux.HandleFunc("/v1/agent/members", s.wrap(s.AgentMembers))
|
||||
|
@ -195,6 +196,7 @@ func (s *HTTPServer) registerHandlers(enableDebug bool) {
|
|||
|
||||
s.mux.HandleFunc("/v1/agent/service/register", s.wrap(s.AgentRegisterService))
|
||||
s.mux.HandleFunc("/v1/agent/service/deregister/", s.wrap(s.AgentDeregisterService))
|
||||
s.mux.HandleFunc("/v1/agent/service/maintenance/", s.wrap(s.AgentServiceMaintenance))
|
||||
|
||||
s.mux.HandleFunc("/v1/event/fire/", s.wrap(s.EventFire))
|
||||
s.mux.HandleFunc("/v1/event/list", s.wrap(s.EventList))
|
||||
|
|
|
@ -238,6 +238,7 @@ The following endpoints are supported:
|
|||
* [`/v1/agent/services`](#agent_services) : Returns the services local agent is managing
|
||||
* [`/v1/agent/members`](#agent_members) : Returns the members as seen by the local serf agent
|
||||
* [`/v1/agent/self`](#agent_self) : Returns the local node configuration
|
||||
* [`/v1/agent/self/maintenance`](#agent_self_maintenance) : Node maintenance mode
|
||||
* [`/v1/agent/join/<address>`](#agent_join) : Trigger local agent to join a node
|
||||
* [`/v1/agent/force-leave/<node>`](#agent_force_leave)>: Force remove node
|
||||
* [`/v1/agent/check/register`](#agent_check_register) : Registers a new local check
|
||||
|
@ -247,6 +248,7 @@ The following endpoints are supported:
|
|||
* [`/v1/agent/check/fail/<checkID>`](#agent_check_fail) : Mark a local test as critical
|
||||
* [`/v1/agent/service/register`](#agent_service_register) : Registers a new local service
|
||||
* [`/v1/agent/service/deregister/<serviceID>`](#agent_service_deregister) : Deregister a local service
|
||||
* [`/v1/agent/service/maintenance/<serviceID>`](#agent_service_maintenance) : Service maintenance mode
|
||||
|
||||
### <a name="agent_checks"></a> /v1/agent/checks
|
||||
|
||||
|
@ -401,6 +403,18 @@ It returns a JSON body like this:
|
|||
}
|
||||
```
|
||||
|
||||
### <a name="agent_self_maintenance"></a> /v1/agent/self/maintenance
|
||||
|
||||
The node maintenance endpoint allows placing the agent into "maintenance mode".
|
||||
During maintenance mode, the node will be marked as unavailable, and will not be
|
||||
present in DNS or API queries. This API call is idempotent. Maintenance mode is
|
||||
persistent and will be automatically restored on agent restart.
|
||||
|
||||
The `?enable` flag is required, and its value must be `true` (to enter
|
||||
maintenance mode), or `false` (to resume normal operation).
|
||||
|
||||
The return code is 200 on success.
|
||||
|
||||
### <a name="agent_join"></a> /v1/agent/join/\<address\>
|
||||
|
||||
This endpoint is hit with a GET and is used to instruct the agent to attempt to
|
||||
|
@ -548,6 +562,19 @@ check, that is also deregistered.
|
|||
|
||||
The return code is 200 on success.
|
||||
|
||||
### <a name="agent_service_maintenance"></a> /v1/agent/service/maintenance/\<serviceId\>
|
||||
|
||||
The service maintenance endpoint allows placing a given service into
|
||||
"maintenance mode". During maintenance mode, the service will be marked as
|
||||
unavailable, and will not be present in DNS or API queries. This API call is
|
||||
idempotent. Maintenance mode is persistent and will be automatically restored
|
||||
on agent restart.
|
||||
|
||||
The `?enable` flag is required, and its value must be `true` (to enter
|
||||
maintenance mode), or `false` (to resume normal operation).
|
||||
|
||||
The return code is 200 on success.
|
||||
|
||||
## <a name="catalog"></a> Catalog
|
||||
|
||||
The Catalog is the endpoint used to register and deregister nodes,
|
||||
|
|
Loading…
Reference in New Issue