Adds back "safing" the configuration when a server leaves.

pull/2222/head
James Phillips 8 years ago committed by James Phillips
parent b32578ccab
commit 87c15d414d
No known key found for this signature in database
GPG Key ID: 77183E682AC5FC11

@ -93,6 +93,9 @@ type Server struct {
// strong consistency. // strong consistency.
fsm *consulFSM fsm *consulFSM
// left is true if we have attempted to leave the cluster.
left bool
// localConsuls is used to track the known consuls // localConsuls is used to track the known consuls
// in the local datacenter. Used to do leader forwarding. // in the local datacenter. Used to do leader forwarding.
localConsuls map[raft.ServerAddress]*agent.Server localConsuls map[raft.ServerAddress]*agent.Server
@ -101,13 +104,16 @@ type Server struct {
// Logger uses the provided LogOutput // Logger uses the provided LogOutput
logger *log.Logger logger *log.Logger
// The raft instance is used among Consul nodes within the // The raft instance is used among Consul nodes within the DC to protect
// DC to protect operations that require strong consistency // operations that require strong consistency. The raftSafeFn will get
// called on a graceful leave to help "safe" the state of the server so
// it won't interfere with other servers.
raft *raft.Raft raft *raft.Raft
raftLayer *RaftLayer raftLayer *RaftLayer
raftStore *raftboltdb.BoltStore raftStore *raftboltdb.BoltStore
raftTransport *raft.NetworkTransport raftTransport *raft.NetworkTransport
raftInmem *raft.InmemStore raftInmem *raft.InmemStore
raftSafeFn func() error
// reconcileCh is used to pass events from the serf handler // reconcileCh is used to pass events from the serf handler
// into the leader manager, so that the strong state can be // into the leader manager, so that the strong state can be
@ -405,6 +411,31 @@ func (s *Server) setupRaft() error {
} }
s.logger.Printf("[INFO] consul: deleted peers.json file after successful recovery") s.logger.Printf("[INFO] consul: deleted peers.json file after successful recovery")
} }
// Register a cleanup function to call when a leave occurs. This
// needs state that we only have access to here. This does a
// recover operation to an empty configuration so this server
// won't interfere with the rest of the cluster.
s.raftSafeFn = func() error {
hasState, err := raft.HasExistingState(log, stable, snap)
if err != nil {
return fmt.Errorf("cleanup failed to check for state: %v", err)
}
if !hasState {
return nil
}
tmpFsm, err := NewFSM(s.tombstoneGC, s.config.LogOutput)
if err != nil {
return fmt.Errorf("cleanup failed to make temp FSM: %v", err)
}
if err := raft.RecoverCluster(s.config.RaftConfig, tmpFsm,
log, stable, snap, raft.Configuration{}); err != nil {
return fmt.Errorf("recovery failed: %v", err)
}
return nil
}
} }
// If we are in bootstrap or dev mode and the state is clean then we can // If we are in bootstrap or dev mode and the state is clean then we can
@ -531,15 +562,14 @@ func (s *Server) Shutdown() error {
if err := future.Error(); err != nil { if err := future.Error(); err != nil {
s.logger.Printf("[WARN] consul: Error shutting down raft: %s", err) s.logger.Printf("[WARN] consul: Error shutting down raft: %s", err)
} }
if s.left && s.raftSafeFn != nil {
if err := s.raftSafeFn(); err != nil {
s.logger.Printf("[WARN] consul: Error safing raft: %s", err)
}
}
if s.raftStore != nil { if s.raftStore != nil {
s.raftStore.Close() s.raftStore.Close()
} }
// TODO (slackpad) - We used to nerf the Raft configuration here
// if a leave had been done in order to prevent this from joining
// next time if we couldn't be removed after a leave. We wont't
// always get a confirmation from Raft (see comment in Leave). Are
// we losing anything by not doing this?
} }
if s.rpcListener != nil { if s.rpcListener != nil {
@ -555,6 +585,7 @@ func (s *Server) Shutdown() error {
// Leave is used to prepare for a graceful shutdown of the server // Leave is used to prepare for a graceful shutdown of the server
func (s *Server) Leave() error { func (s *Server) Leave() error {
s.logger.Printf("[INFO] consul: server starting leave") s.logger.Printf("[INFO] consul: server starting leave")
s.left = true
// Check the number of known peers // Check the number of known peers
numPeers, err := s.numPeers() numPeers, err := s.numPeers()

Loading…
Cancel
Save