Merge pull request #3007 from hashicorp/config-json

Adds support for peers.json recover with Raft protocol version 3.
pull/2975/merge
James Phillips 2017-05-04 14:31:21 -07:00 committed by GitHub
commit af62e2bc77
4 changed files with 164 additions and 21 deletions

View File

@ -518,10 +518,17 @@ func (s *Server) setupRaft() error {
}
} else if _, err := os.Stat(peersFile); err == nil {
s.logger.Printf("[INFO] consul: found peers.json file, recovering Raft configuration...")
configuration, err := raft.ReadPeersJSON(peersFile)
var configuration raft.Configuration
if s.config.RaftConfig.ProtocolVersion < 3 {
configuration, err = raft.ReadPeersJSON(peersFile)
} else {
configuration, err = raft.ReadConfigJSON(peersFile)
}
if err != nil {
return fmt.Errorf("recovery failed to parse peers.json: %v", err)
}
tmpFsm, err := NewFSM(s.tombstoneGC, s.config.LogOutput)
if err != nil {
return fmt.Errorf("recovery failed to make temp FSM: %v", err)
@ -530,6 +537,7 @@ func (s *Server) setupRaft() error {
log, stable, snap, trans, configuration); err != nil {
return fmt.Errorf("recovery failed: %v", err)
}
if err := os.Remove(peersFile); err != nil {
return fmt.Errorf("recovery failed to delete peers.json, please delete manually (see peers.info for details): %v", err)
}
@ -965,10 +973,54 @@ func (s *Server) GetWANCoordinate() (*coordinate.Coordinate, error) {
// location.
const peersInfoContent = `
As of Consul 0.7.0, the peers.json file is only used for recovery
after an outage. It should be formatted as a JSON array containing the address
and port of each Consul server in the cluster, like this:
after an outage. The format of this file depends on what the server has
configured for its Raft protocol version. Please see the agent configuration
page at https://www.consul.io/docs/agent/options.html#_raft_protocol for more
details about this parameter.
["10.1.0.1:8300","10.1.0.2:8300","10.1.0.3:8300"]
For Raft protocol version 2 and earlier, this should be formatted as a JSON
array containing the address and port of each Consul server in the cluster, like
this:
[
"10.1.0.1:8300",
"10.1.0.2:8300",
"10.1.0.3:8300"
]
For Raft protocol version 3 and later, this should be formatted as a JSON
array containing the node ID, address:port, and suffrage information of each
Consul server in the cluster, like this:
[
{
"id": "adf4238a-882b-9ddc-4a9d-5b6758e4159e",
"address": "10.1.0.1:8300",
"non_voter": false
},
{
"id": "8b6dda82-3103-11e7-93ae-92361f002671",
"address": "10.1.0.2:8300",
"non_voter": false
},
{
"id": "97e17742-3103-11e7-93ae-92361f002671",
"address": "10.1.0.3:8300",
"non_voter": false
}
]
The "id" field is the node ID of the server. This can be found in the logs when
the server starts up, or in the "node-id" file inside the server's data
directory.
The "address" field is the address and port of the server.
The "non_voter" field controls whether the server is a non-voter, which is used
in some advanced Autopilot configurations, please see
https://www.consul.io/docs/guides/autopilot.html for more information. If
"non_voter" is omitted it will default to false, which is typical for most
clusters.
Under normal operation, the peers.json file will not be present.

View File

@ -44,3 +44,55 @@ func ReadPeersJSON(path string) (Configuration, error) {
}
return configuration, nil
}
// configEntry is used when decoding a new-style peers.json.
type configEntry struct {
// ID is the ID of the server (a UUID, usually).
ID ServerID `json:"id"`
// Address is the host:port of the server.
Address ServerAddress `json:"address"`
// NonVoter controls the suffrage. We choose this sense so people
// can leave this out and get a Voter by default.
NonVoter bool `json:"non_voter"`
}
// ReadConfigJSON reads a new-style peers.json and returns a configuration
// structure. This can be used to perform manual recovery when running protocol
// versions that use server IDs.
func ReadConfigJSON(path string) (Configuration, error) {
// Read in the file.
buf, err := ioutil.ReadFile(path)
if err != nil {
return Configuration{}, err
}
// Parse it as JSON.
var peers []configEntry
dec := json.NewDecoder(bytes.NewReader(buf))
if err := dec.Decode(&peers); err != nil {
return Configuration{}, err
}
// Map it into the new-style configuration structure.
var configuration Configuration
for _, peer := range peers {
suffrage := Voter
if peer.NonVoter {
suffrage = Nonvoter
}
server := Server{
Suffrage: suffrage,
ID: peer.ID,
Address: peer.Address,
}
configuration.Servers = append(configuration.Servers, server)
}
// We should only ingest valid configurations.
if err := checkConfiguration(configuration); err != nil {
return Configuration{}, err
}
return configuration, nil
}

6
vendor/vendor.json vendored
View File

@ -612,10 +612,10 @@
"revisionTime": "2015-11-16T02:03:38Z"
},
{
"checksumSHA1": "NvFexY/rs9sPfve+ny/rkMkCL5M=",
"checksumSHA1": "8Na6qG9taUXHDunMYecGxbHbJKE=",
"path": "github.com/hashicorp/raft",
"revision": "6b063a18bfe6e0da3fdc2b9bf6256be9c0a4849a",
"revisionTime": "2017-03-16T02:42:32Z",
"revision": "939ebd2103731c2f38c7964d8dd24af0e1b26dc3",
"revisionTime": "2017-05-04T20:16:11Z",
"version": "library-v2-stage-one",
"versionExact": "library-v2-stage-one"
},

View File

@ -124,21 +124,60 @@ periodic basis.
The next step is to go to the [`-data-dir`](/docs/agent/options.html#_data_dir)
of each Consul server. Inside that directory, there will be a `raft/`
sub-directory. We need to create a `raft/peers.json` file. It should look
something like:
sub-directory. We need to create a `raft/peers.json` file. The format of this file
depends on what the server has configured for its
[Raft protocol](/docs/agent/options.html#_raft_protocol) version.
```javascript
For Raft protocol version 2 and earlier, this should be formatted as a JSON
array containing the address and port of each Consul server in the cluster, like
this:
```json
[
"10.0.1.8:8300",
"10.0.1.6:8300",
"10.0.1.7:8300"
"10.1.0.1:8300",
"10.1.0.2:8300",
"10.1.0.3:8300"
]
```
Simply create entries for all remaining servers. You must confirm
that servers you do not include here have indeed failed and will not later
rejoin the cluster. Ensure that this file is the same across all remaining
server nodes.
For Raft protocol version 3 and later, this should be formatted as a JSON
array containing the node ID, address:port, and suffrage information of each
Consul server in the cluster, like this:
```
[
{
"id": "adf4238a-882b-9ddc-4a9d-5b6758e4159e",
"address": "10.1.0.1:8300",
"non_voter": false
},
{
"id": "8b6dda82-3103-11e7-93ae-92361f002671",
"address": "10.1.0.2:8300",
"non_voter": false
},
{
"id": "97e17742-3103-11e7-93ae-92361f002671",
"address": "10.1.0.3:8300",
"non_voter": false
}
]
```
- `id` `(string: <required>)` - Specifies the [node ID](/docs/agent/options.html#_node_id)
of the server. This can be found in the logs when the server starts up if it was auto-generated,
and it can also be found inside the `node-id` file in the server's data directory.
- `address` `(string: <required>)` - Specifies the IP and port of the server. The port is the
server's RPC port used for cluster communications.
- `non_voter` `(bool: <false>)` - This controls whether the server is a non-voter, which is used
in some advanced [Autopilot](/docs/guides/autopilot.html) configurations. If omitted, it will
default to false, which is typical for most clusters.
Simply create entries for all servers. You must confirm that servers you do not include here have
indeed failed and will not later rejoin the cluster. Ensure that this file is the same across all
remaining server nodes.
At this point, you can restart all the remaining servers. In Consul 0.7 and
later you will see them ingest recovery file:
@ -177,8 +216,8 @@ command to inspect the Raft configuration:
```
$ consul operator raft -list-peers
Node ID Address State Voter
alice 10.0.1.8:8300 10.0.1.8:8300 follower true
bob 10.0.1.6:8300 10.0.1.6:8300 leader true
carol 10.0.1.7:8300 10.0.1.7:8300 follower true
Node ID Address State Voter RaftProtocol
alice 10.0.1.8:8300 10.0.1.8:8300 follower true 2
bob 10.0.1.6:8300 10.0.1.6:8300 leader true 2
carol 10.0.1.7:8300 10.0.1.7:8300 follower true 2
```