mirror of https://github.com/hashicorp/consul
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
404 lines
13 KiB
404 lines
13 KiB
// Copyright (c) HashiCorp, Inc. |
|
// SPDX-License-Identifier: MPL-2.0 |
|
|
|
package api |
|
|
|
import ( |
|
"bytes" |
|
"fmt" |
|
"io" |
|
"strconv" |
|
"strings" |
|
"time" |
|
) |
|
|
|
// AutopilotConfiguration is used for querying/setting the Autopilot configuration. |
|
// Autopilot helps manage operator tasks related to Consul servers like removing |
|
// failed servers from the Raft quorum. |
|
type AutopilotConfiguration struct { |
|
// CleanupDeadServers controls whether to remove dead servers from the Raft |
|
// peer list when a new server joins |
|
CleanupDeadServers bool |
|
|
|
// LastContactThreshold is the limit on the amount of time a server can go |
|
// without leader contact before being considered unhealthy. |
|
LastContactThreshold *ReadableDuration |
|
|
|
// MaxTrailingLogs is the amount of entries in the Raft Log that a server can |
|
// be behind before being considered unhealthy. |
|
MaxTrailingLogs uint64 |
|
|
|
// MinQuorum sets the minimum number of servers allowed in a cluster before |
|
// autopilot can prune dead servers. |
|
MinQuorum uint |
|
|
|
// ServerStabilizationTime is the minimum amount of time a server must be |
|
// in a stable, healthy state before it can be added to the cluster. Only |
|
// applicable with Raft protocol version 3 or higher. |
|
ServerStabilizationTime *ReadableDuration |
|
|
|
// (Enterprise-only) RedundancyZoneTag is the node tag to use for separating |
|
// servers into zones for redundancy. If left blank, this feature will be disabled. |
|
RedundancyZoneTag string |
|
|
|
// (Enterprise-only) DisableUpgradeMigration will disable Autopilot's upgrade migration |
|
// strategy of waiting until enough newer-versioned servers have been added to the |
|
// cluster before promoting them to voters. |
|
DisableUpgradeMigration bool |
|
|
|
// (Enterprise-only) UpgradeVersionTag is the node tag to use for version info when |
|
// performing upgrade migrations. If left blank, the Consul version will be used. |
|
UpgradeVersionTag string |
|
|
|
// CreateIndex holds the index corresponding the creation of this configuration. |
|
// This is a read-only field. |
|
CreateIndex uint64 |
|
|
|
// ModifyIndex will be set to the index of the last update when retrieving the |
|
// Autopilot configuration. Resubmitting a configuration with |
|
// AutopilotCASConfiguration will perform a check-and-set operation which ensures |
|
// there hasn't been a subsequent update since the configuration was retrieved. |
|
ModifyIndex uint64 |
|
} |
|
|
|
// Defines default values for the AutopilotConfiguration type, consistent with |
|
// https://www.consul.io/api-docs/operator/autopilot#parameters-1 |
|
func NewAutopilotConfiguration() AutopilotConfiguration { |
|
cfg := AutopilotConfiguration{ |
|
CleanupDeadServers: true, |
|
LastContactThreshold: NewReadableDuration(200 * time.Millisecond), |
|
MaxTrailingLogs: 250, |
|
MinQuorum: 0, |
|
ServerStabilizationTime: NewReadableDuration(10 * time.Second), |
|
RedundancyZoneTag: "", |
|
DisableUpgradeMigration: false, |
|
UpgradeVersionTag: "", |
|
} |
|
|
|
return cfg |
|
} |
|
|
|
// ServerHealth is the health (from the leader's point of view) of a server. |
|
type ServerHealth struct { |
|
// ID is the raft ID of the server. |
|
ID string |
|
|
|
// Name is the node name of the server. |
|
Name string |
|
|
|
// Address is the address of the server. |
|
Address string |
|
|
|
// The status of the SerfHealth check for the server. |
|
SerfStatus string |
|
|
|
// Version is the Consul version of the server. |
|
Version string |
|
|
|
// Leader is whether this server is currently the leader. |
|
Leader bool |
|
|
|
// LastContact is the time since this node's last contact with the leader. |
|
LastContact *ReadableDuration |
|
|
|
// LastTerm is the highest leader term this server has a record of in its Raft log. |
|
LastTerm uint64 |
|
|
|
// LastIndex is the last log index this server has a record of in its Raft log. |
|
LastIndex uint64 |
|
|
|
// Healthy is whether or not the server is healthy according to the current |
|
// Autopilot config. |
|
Healthy bool |
|
|
|
// Voter is whether this is a voting server. |
|
Voter bool |
|
|
|
// StableSince is the last time this server's Healthy value changed. |
|
StableSince time.Time |
|
} |
|
|
|
// OperatorHealthReply is a representation of the overall health of the cluster |
|
type OperatorHealthReply struct { |
|
// Healthy is true if all the servers in the cluster are healthy. |
|
Healthy bool |
|
|
|
// FailureTolerance is the number of healthy servers that could be lost without |
|
// an outage occurring. |
|
FailureTolerance int |
|
|
|
// Servers holds the health of each server. |
|
Servers []ServerHealth |
|
} |
|
|
|
type AutopilotState struct { |
|
Healthy bool |
|
FailureTolerance int |
|
OptimisticFailureTolerance int |
|
|
|
Servers map[string]AutopilotServer |
|
Leader string |
|
Voters []string |
|
ReadReplicas []string `json:",omitempty"` |
|
RedundancyZones map[string]AutopilotZone `json:",omitempty"` |
|
Upgrade *AutopilotUpgrade `json:",omitempty"` |
|
} |
|
|
|
type AutopilotServer struct { |
|
ID string |
|
Name string |
|
Address string |
|
NodeStatus string |
|
Version string |
|
LastContact *ReadableDuration |
|
LastTerm uint64 |
|
LastIndex uint64 |
|
Healthy bool |
|
StableSince time.Time |
|
RedundancyZone string `json:",omitempty"` |
|
UpgradeVersion string `json:",omitempty"` |
|
ReadReplica bool |
|
Status AutopilotServerStatus |
|
Meta map[string]string |
|
NodeType AutopilotServerType |
|
} |
|
|
|
type AutopilotServerStatus string |
|
|
|
const ( |
|
AutopilotServerNone AutopilotServerStatus = "none" |
|
AutopilotServerLeader AutopilotServerStatus = "leader" |
|
AutopilotServerVoter AutopilotServerStatus = "voter" |
|
AutopilotServerNonVoter AutopilotServerStatus = "non-voter" |
|
AutopilotServerStaging AutopilotServerStatus = "staging" |
|
) |
|
|
|
type AutopilotServerType string |
|
|
|
const ( |
|
AutopilotTypeVoter AutopilotServerType = "voter" |
|
AutopilotTypeReadReplica AutopilotServerType = "read-replica" |
|
AutopilotTypeZoneVoter AutopilotServerType = "zone-voter" |
|
AutopilotTypeZoneExtraVoter AutopilotServerType = "zone-extra-voter" |
|
AutopilotTypeZoneStandby AutopilotServerType = "zone-standby" |
|
) |
|
|
|
type AutopilotZone struct { |
|
Servers []string |
|
Voters []string |
|
FailureTolerance int |
|
} |
|
|
|
type AutopilotZoneUpgradeVersions struct { |
|
TargetVersionVoters []string `json:",omitempty"` |
|
TargetVersionNonVoters []string `json:",omitempty"` |
|
OtherVersionVoters []string `json:",omitempty"` |
|
OtherVersionNonVoters []string `json:",omitempty"` |
|
} |
|
|
|
type AutopilotUpgrade struct { |
|
Status AutopilotUpgradeStatus |
|
TargetVersion string `json:",omitempty"` |
|
TargetVersionVoters []string `json:",omitempty"` |
|
TargetVersionNonVoters []string `json:",omitempty"` |
|
TargetVersionReadReplicas []string `json:",omitempty"` |
|
OtherVersionVoters []string `json:",omitempty"` |
|
OtherVersionNonVoters []string `json:",omitempty"` |
|
OtherVersionReadReplicas []string `json:",omitempty"` |
|
RedundancyZones map[string]AutopilotZoneUpgradeVersions `json:",omitempty"` |
|
} |
|
|
|
type AutopilotUpgradeStatus string |
|
|
|
const ( |
|
// AutopilotUpgradeIdle is the status when no upgrade is in progress. |
|
AutopilotUpgradeIdle AutopilotUpgradeStatus = "idle" |
|
|
|
// AutopilotUpgradeAwaitNewVoters is the status when more servers of |
|
// the target version must be added in order to start the promotion |
|
// phase of the upgrade |
|
AutopilotUpgradeAwaitNewVoters AutopilotUpgradeStatus = "await-new-voters" |
|
|
|
// AutopilotUpgradePromoting is the status when autopilot is promoting |
|
// servers of the target version. |
|
AutopilotUpgradePromoting AutopilotUpgradeStatus = "promoting" |
|
|
|
// AutopilotUpgradeDemoting is the status when autopilot is demoting |
|
// servers not on the target version |
|
AutopilotUpgradeDemoting AutopilotUpgradeStatus = "demoting" |
|
|
|
// AutopilotUpgradeLeaderTransfer is the status when autopilot is transferring |
|
// leadership from a server running an older version to a server |
|
// using the target version. |
|
AutopilotUpgradeLeaderTransfer AutopilotUpgradeStatus = "leader-transfer" |
|
|
|
// AutopilotUpgradeAwaitNewServers is the status when autpilot has finished |
|
// transferring leadership and has demoted all the other versioned |
|
// servers but wants to indicate that more target version servers |
|
// are needed to replace all the existing other version servers. |
|
AutopilotUpgradeAwaitNewServers AutopilotUpgradeStatus = "await-new-servers" |
|
|
|
// AutopilotUpgradeAwaitServerRemoval is the status when autopilot is waiting |
|
// for the servers on non-target versions to be removed |
|
AutopilotUpgradeAwaitServerRemoval AutopilotUpgradeStatus = "await-server-removal" |
|
|
|
// AutopilotUpgradeDisabled is the status when automated ugprades are |
|
// disabled in the autopilot configuration |
|
AutopilotUpgradeDisabled AutopilotUpgradeStatus = "disabled" |
|
) |
|
|
|
// ReadableDuration is a duration type that is serialized to JSON in human readable format. |
|
type ReadableDuration time.Duration |
|
|
|
func NewReadableDuration(dur time.Duration) *ReadableDuration { |
|
d := ReadableDuration(dur) |
|
return &d |
|
} |
|
|
|
func (d *ReadableDuration) String() string { |
|
return d.Duration().String() |
|
} |
|
|
|
func (d *ReadableDuration) Duration() time.Duration { |
|
if d == nil { |
|
return time.Duration(0) |
|
} |
|
return time.Duration(*d) |
|
} |
|
|
|
func (d *ReadableDuration) MarshalJSON() ([]byte, error) { |
|
return []byte(fmt.Sprintf(`"%s"`, d.Duration().String())), nil |
|
} |
|
|
|
func (d *ReadableDuration) UnmarshalJSON(raw []byte) (err error) { |
|
if d == nil { |
|
return fmt.Errorf("cannot unmarshal to nil pointer") |
|
} |
|
|
|
var dur time.Duration |
|
str := string(raw) |
|
if len(str) >= 2 && str[0] == '"' && str[len(str)-1] == '"' { |
|
// quoted string |
|
dur, err = time.ParseDuration(str[1 : len(str)-1]) |
|
if err != nil { |
|
return err |
|
} |
|
} else { |
|
// no quotes, not a string |
|
v, err := strconv.ParseFloat(str, 64) |
|
if err != nil { |
|
return err |
|
} |
|
dur = time.Duration(v) |
|
} |
|
|
|
*d = ReadableDuration(dur) |
|
return nil |
|
} |
|
|
|
// AutopilotGetConfiguration is used to query the current Autopilot configuration. |
|
func (op *Operator) AutopilotGetConfiguration(q *QueryOptions) (*AutopilotConfiguration, error) { |
|
r := op.c.newRequest("GET", "/v1/operator/autopilot/configuration") |
|
r.setQueryOptions(q) |
|
_, resp, err := op.c.doRequest(r) |
|
if err != nil { |
|
return nil, err |
|
} |
|
defer closeResponseBody(resp) |
|
if err := requireOK(resp); err != nil { |
|
return nil, err |
|
} |
|
|
|
var out AutopilotConfiguration |
|
if err := decodeBody(resp, &out); err != nil { |
|
return nil, err |
|
} |
|
|
|
return &out, nil |
|
} |
|
|
|
// AutopilotSetConfiguration is used to set the current Autopilot configuration. |
|
func (op *Operator) AutopilotSetConfiguration(conf *AutopilotConfiguration, q *WriteOptions) error { |
|
r := op.c.newRequest("PUT", "/v1/operator/autopilot/configuration") |
|
r.setWriteOptions(q) |
|
r.obj = conf |
|
_, resp, err := op.c.doRequest(r) |
|
if err != nil { |
|
return err |
|
} |
|
defer closeResponseBody(resp) |
|
if err := requireOK(resp); err != nil { |
|
return err |
|
} |
|
return nil |
|
} |
|
|
|
// AutopilotCASConfiguration is used to perform a Check-And-Set update on the |
|
// Autopilot configuration. The ModifyIndex value will be respected. Returns |
|
// true on success or false on failures. |
|
func (op *Operator) AutopilotCASConfiguration(conf *AutopilotConfiguration, q *WriteOptions) (bool, error) { |
|
r := op.c.newRequest("PUT", "/v1/operator/autopilot/configuration") |
|
r.setWriteOptions(q) |
|
r.params.Set("cas", strconv.FormatUint(conf.ModifyIndex, 10)) |
|
r.obj = conf |
|
_, resp, err := op.c.doRequest(r) |
|
if err != nil { |
|
return false, err |
|
} |
|
defer closeResponseBody(resp) |
|
if err := requireOK(resp); err != nil { |
|
return false, err |
|
} |
|
|
|
var buf bytes.Buffer |
|
if _, err := io.Copy(&buf, resp.Body); err != nil { |
|
return false, fmt.Errorf("Failed to read response: %v", err) |
|
} |
|
res := strings.Contains(buf.String(), "true") |
|
|
|
return res, nil |
|
} |
|
|
|
// AutopilotServerHealth |
|
func (op *Operator) AutopilotServerHealth(q *QueryOptions) (*OperatorHealthReply, error) { |
|
r := op.c.newRequest("GET", "/v1/operator/autopilot/health") |
|
r.setQueryOptions(q) |
|
|
|
// we use 429 status to indicate unhealthiness |
|
_, resp, err := op.c.doRequest(r) |
|
if err != nil { |
|
return nil, err |
|
} |
|
defer closeResponseBody(resp) |
|
err = requireHttpCodes(resp, 200, 429) |
|
if err != nil { |
|
return nil, err |
|
} |
|
defer closeResponseBody(resp) |
|
|
|
var out OperatorHealthReply |
|
if err := decodeBody(resp, &out); err != nil { |
|
return nil, err |
|
} |
|
return &out, nil |
|
} |
|
|
|
func (op *Operator) AutopilotState(q *QueryOptions) (*AutopilotState, error) { |
|
r := op.c.newRequest("GET", "/v1/operator/autopilot/state") |
|
r.setQueryOptions(q) |
|
_, resp, err := op.c.doRequest(r) |
|
if err != nil { |
|
return nil, err |
|
} |
|
defer closeResponseBody(resp) |
|
if err := requireOK(resp); err != nil { |
|
return nil, err |
|
} |
|
|
|
var out AutopilotState |
|
if err := decodeBody(resp, &out); err != nil { |
|
return nil, err |
|
} |
|
|
|
return &out, nil |
|
}
|
|
|