diff --git a/.changelog/17107.txt b/.changelog/17107.txt new file mode 100644 index 0000000000..5694fca2c9 --- /dev/null +++ b/.changelog/17107.txt @@ -0,0 +1,3 @@ +```release-note:breaking-change +api: RaftLeaderTransfer now requires an id string. An empty string can be specified to keep the old behavior. +``` diff --git a/api/operator_raft.go b/api/operator_raft.go index d72c00c97b..f0f5794aa5 100644 --- a/api/operator_raft.go +++ b/api/operator_raft.go @@ -68,9 +68,14 @@ func (op *Operator) RaftGetConfiguration(q *QueryOptions) (*RaftConfiguration, e } // RaftLeaderTransfer is used to transfer the current raft leader to another node -func (op *Operator) RaftLeaderTransfer(q *QueryOptions) (*TransferLeaderResponse, error) { +// Optionally accepts a non-empty id of another node to transfer leadership to. +func (op *Operator) RaftLeaderTransfer(id string, q *QueryOptions) (*TransferLeaderResponse, error) { r := op.c.newRequest("POST", "/v1/operator/raft/transfer-leader") r.setQueryOptions(q) + + if id != "" { + r.params.Set("id", id) + } _, resp, err := op.c.doRequest(r) if err != nil { return nil, err diff --git a/api/operator_raft_test.go b/api/operator_raft_test.go index 6e3b7fc0e3..eefffec077 100644 --- a/api/operator_raft_test.go +++ b/api/operator_raft_test.go @@ -4,8 +4,9 @@ package api import ( - "strings" "testing" + + "github.com/hashicorp/consul/sdk/testutil" ) func TestAPI_OperatorRaftGetConfiguration(t *testing.T) { @@ -27,33 +28,181 @@ func TestAPI_OperatorRaftGetConfiguration(t *testing.T) { func TestAPI_OperatorRaftRemovePeerByAddress(t *testing.T) { t.Parallel() - c, s := makeClient(t) - defer s.Stop() + c1, s1 := makeClientWithConfig(t, nil, func(conf *testutil.TestServerConfig) { + if conf.Autopilot == nil { + conf.Autopilot = &testutil.TestAutopilotConfig{} + } + conf.Autopilot.ServerStabilizationTime = "1ms" + }) + defer s1.Stop() - // If we get this error, it proves we sent the address all the way - // through. - operator := c.Operator() - err := operator.RaftRemovePeerByAddress("nope", nil) - if err == nil || !strings.Contains(err.Error(), - "address \"nope\" was not found in the Raft configuration") { + _, s2 := makeClientWithConfig(t, nil, func(conf *testutil.TestServerConfig) { + conf.Server = true + conf.Bootstrap = false + conf.RetryJoin = []string{s1.LANAddr} + if conf.Autopilot == nil { + conf.Autopilot = &testutil.TestAutopilotConfig{} + } + conf.Autopilot.ServerStabilizationTime = "1ms" + }) + defer s2.Stop() + s2.WaitForVoting(t) + + operator := c1.Operator() + err := operator.RaftRemovePeerByAddress(s2.ServerAddr, nil) + if err != nil { t.Fatalf("err: %v", err) } + + cfg, err := c1.Operator().RaftGetConfiguration(nil) + if err != nil { + t.Fatalf("err: %v", err) + } + if len(cfg.Servers) != 1 { + t.Fatalf("more than 1 server left: %+v", cfg.Servers) + } +} + +func TestAPI_OperatorRaftRemovePeerByID(t *testing.T) { + t.Parallel() + c1, s1 := makeClientWithConfig(t, nil, func(conf *testutil.TestServerConfig) { + if conf.Autopilot == nil { + conf.Autopilot = &testutil.TestAutopilotConfig{} + } + conf.Autopilot.ServerStabilizationTime = "1ms" + }) + defer s1.Stop() + + _, s2 := makeClientWithConfig(t, nil, func(conf *testutil.TestServerConfig) { + conf.Server = true + conf.Bootstrap = false + conf.RetryJoin = []string{s1.LANAddr} + if conf.Autopilot == nil { + conf.Autopilot = &testutil.TestAutopilotConfig{} + } + conf.Autopilot.ServerStabilizationTime = "1ms" + }) + defer s2.Stop() + s2.WaitForVoting(t) + + operator := c1.Operator() + err := operator.RaftRemovePeerByID(s2.Config.NodeID, nil) + if err != nil { + t.Fatalf("err: %v", err) + } + + cfg, err := c1.Operator().RaftGetConfiguration(nil) + if err != nil { + t.Fatalf("err: %v", err) + } + if len(cfg.Servers) != 1 { + t.Fatalf("more than 1 server left: %+v", cfg.Servers) + } } func TestAPI_OperatorRaftLeaderTransfer(t *testing.T) { t.Parallel() - c, s := makeClient(t) - defer s.Stop() + c1, s1 := makeClientWithConfig(t, nil, func(conf *testutil.TestServerConfig) { + if conf.Autopilot == nil { + conf.Autopilot = &testutil.TestAutopilotConfig{} + } + conf.Autopilot.ServerStabilizationTime = "1ms" + }) + defer s1.Stop() - // If we get this error, it proves we sent the address all the way - // through. - operator := c.Operator() - transfer, err := operator.RaftLeaderTransfer(nil) - if err == nil || !strings.Contains(err.Error(), - "cannot find peer") { + _, s2 := makeClientWithConfig(t, nil, func(conf *testutil.TestServerConfig) { + conf.Server = true + conf.Bootstrap = false + conf.RetryJoin = []string{s1.LANAddr} + if conf.Autopilot == nil { + conf.Autopilot = &testutil.TestAutopilotConfig{} + } + conf.Autopilot.ServerStabilizationTime = "1ms" + }) + defer s2.Stop() + s2.WaitForVoting(t) + + cfg, err := c1.Operator().RaftGetConfiguration(nil) + if err != nil { t.Fatalf("err: %v", err) } - if transfer != nil { - t.Fatalf("err:%v", transfer) + if len(cfg.Servers) != 2 { + t.Fatalf("not 2 servers: %#v", cfg.Servers) + } + var leaderID string + for _, srv := range cfg.Servers { + if srv.Leader { + leaderID = srv.ID + } + } + if leaderID == "" { + t.Fatalf("no leader: %+v", cfg.Servers) + } + + transfer, err := c1.Operator().RaftLeaderTransfer("", nil) + if err != nil { + t.Fatalf("err: %v", err) + } + if !transfer.Success { + t.Fatal("unsuccessful transfer") + } + + s2.WaitForLeader(t) + + cfg, err = c1.Operator().RaftGetConfiguration(nil) + if err != nil { + t.Fatalf("err: %v", err) + } + var newLeaderID string + for _, srv := range cfg.Servers { + if srv.Leader { + newLeaderID = srv.ID + } + } + if newLeaderID == "" { + t.Fatalf("no leader: %#v", cfg.Servers) + } + if newLeaderID == leaderID { + t.Fatalf("leader did not change: %v == %v", newLeaderID, leaderID) + } + + _, s3 := makeClientWithConfig(t, nil, func(conf *testutil.TestServerConfig) { + conf.Server = true + conf.Bootstrap = false + conf.RetryJoin = []string{s1.LANAddr, s2.LANAddr} + if conf.Autopilot == nil { + conf.Autopilot = &testutil.TestAutopilotConfig{} + } + conf.Autopilot.ServerStabilizationTime = "1ms" + }) + defer s3.Stop() + s3.WaitForVoting(t) + + // Transfer it to another member + transfer, err = c1.Operator().RaftLeaderTransfer(s3.Config.NodeID, nil) + if err != nil { + t.Fatalf("err: %v", err) + } + if !transfer.Success { + t.Fatal("unsuccessful transfer") + } + + s3.WaitForLeader(t) + + cfg, err = c1.Operator().RaftGetConfiguration(nil) + if err != nil { + t.Fatalf("err: %v", err) + } + newLeaderID = "" + for _, srv := range cfg.Servers { + if srv.Leader { + newLeaderID = srv.ID + } + } + if newLeaderID == "" { + t.Fatalf("no leader: %#v", cfg.Servers) + } + if newLeaderID != s3.Config.NodeID { + t.Fatalf("leader is not s3: %v != %v", newLeaderID, s3.Config.NodeID) } } diff --git a/command/operator/raft/transferleader/transfer_leader.go b/command/operator/raft/transferleader/transfer_leader.go index ebeb77dfc8..c7694b65a6 100644 --- a/command/operator/raft/transferleader/transfer_leader.go +++ b/command/operator/raft/transferleader/transfer_leader.go @@ -53,7 +53,7 @@ func (c *cmd) Run(args []string) int { } // Fetch the current configuration. - result, err := raftTransferLeader(client, c.http.Stale()) + result, err := raftTransferLeader(client, c.http.Stale(), c.id) if err != nil { c.UI.Error(fmt.Sprintf("Error transfering leadership: %v", err)) return 1 @@ -63,11 +63,11 @@ func (c *cmd) Run(args []string) int { return 0 } -func raftTransferLeader(client *api.Client, stale bool) (string, error) { +func raftTransferLeader(client *api.Client, stale bool, id string) (string, error) { q := &api.QueryOptions{ AllowStale: stale, } - reply, err := client.Operator().RaftLeaderTransfer(q) + reply, err := client.Operator().RaftLeaderTransfer(id, q) if err != nil { return "", fmt.Errorf("Failed to transfer leadership %w", err) } diff --git a/sdk/testutil/server.go b/sdk/testutil/server.go index 148fb03d6a..91ef5180e6 100644 --- a/sdk/testutil/server.go +++ b/sdk/testutil/server.go @@ -86,6 +86,11 @@ type Locality struct { Zone string `json:"zone"` } +// TestAutopilotConfig contains the configuration for autopilot. +type TestAutopilotConfig struct { + ServerStabilizationTime string `json:"server_stabilization_time,omitempty"` +} + // TestServerConfig is the main server configuration struct. type TestServerConfig struct { NodeName string `json:"node_name"` @@ -123,6 +128,7 @@ type TestServerConfig struct { EnableDebug bool `json:"enable_debug,omitempty"` SkipLeaveOnInt bool `json:"skip_leave_on_interrupt"` Peering *TestPeeringConfig `json:"peering,omitempty"` + Autopilot *TestAutopilotConfig `json:"autopilot,omitempty"` ReadyTimeout time.Duration `json:"-"` StopTimeout time.Duration `json:"-"` Stdout io.Writer `json:"-"` @@ -260,6 +266,7 @@ type TestServer struct { HTTPSAddr string LANAddr string WANAddr string + ServerAddr string GRPCAddr string GRPCTLSAddr string @@ -344,6 +351,7 @@ func NewTestServerConfigT(t TestingTB, cb ServerConfigCallback) (*TestServer, er HTTPSAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.HTTPS), LANAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.SerfLan), WANAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.SerfWan), + ServerAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.Server), GRPCAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.GRPC), GRPCTLSAddr: fmt.Sprintf("127.0.0.1:%d", cfg.Ports.GRPCTLS), @@ -442,7 +450,7 @@ func (s *TestServer) waitForAPI() error { return nil } -// waitForLeader waits for the Consul server's HTTP API to become +// WaitForLeader waits for the Consul server's HTTP API to become // available, and then waits for a known leader and an index of // 2 or more to be observed to confirm leader election is done. func (s *TestServer) WaitForLeader(t testing.TB) { @@ -472,6 +480,49 @@ func (s *TestServer) WaitForLeader(t testing.TB) { }) } +// WaitForVoting waits for the Consul server to become a voter in the current raft +// configuration. You probably want to adjust the ServerStablizationTime autopilot +// configuration otherwise this could take 10 seconds. +func (s *TestServer) WaitForVoting(t testing.TB) { + // don't need to fully decode the response + type raftServer struct { + ID string + Voter bool + } + type raftCfgResponse struct { + Servers []raftServer + } + + retry.Run(t, func(r *retry.R) { + // Query the API and get the current raft configuration. + url := s.url("/v1/operator/raft/configuration") + resp, err := s.privilegedGet(url) + if err != nil { + r.Fatalf("failed http get '%s': %v", url, err) + } + defer resp.Body.Close() + if err := s.requireOK(resp); err != nil { + r.Fatalf("failed OK response: %v", err) + } + + var cfg raftCfgResponse + dec := json.NewDecoder(resp.Body) + if err := dec.Decode(&cfg); err != nil { + r.Fatal(err) + } + + for _, srv := range cfg.Servers { + if srv.ID == s.Config.NodeID { + if srv.Voter { + return + } + break + } + } + r.Fatalf("Server is not voting: %#v", cfg.Servers) + }) +} + // WaitForActiveCARoot waits until the server can return a Connect CA meaning // connect has completed bootstrapping and is ready to use. func (s *TestServer) WaitForActiveCARoot(t testing.TB) {