mirror of https://github.com/hashicorp/consul
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
427 lines
12 KiB
427 lines
12 KiB
// Copyright (c) HashiCorp, Inc. |
|
// SPDX-License-Identifier: BUSL-1.1 |
|
|
|
package router |
|
|
|
import ( |
|
"bytes" |
|
"fmt" |
|
"math/rand" |
|
"net" |
|
"testing" |
|
"time" |
|
|
|
"github.com/hashicorp/go-hclog" |
|
"github.com/stretchr/testify/assert" |
|
"github.com/stretchr/testify/require" |
|
|
|
"github.com/hashicorp/consul/agent/metadata" |
|
) |
|
|
|
var ( |
|
localLogBuffer *bytes.Buffer |
|
) |
|
|
|
func init() { |
|
localLogBuffer = new(bytes.Buffer) |
|
} |
|
|
|
func GetBufferedLogger() hclog.Logger { |
|
localLogBuffer = new(bytes.Buffer) |
|
return hclog.New(&hclog.LoggerOptions{Output: localLogBuffer}) |
|
} |
|
|
|
type fauxConnPool struct { |
|
// failPct between 0.0 and 1.0 == pct of time a Ping should fail |
|
failPct float64 |
|
} |
|
|
|
func (cp *fauxConnPool) Ping(string, string, net.Addr) (bool, error) { |
|
var success bool |
|
successProb := rand.Float64() |
|
if successProb > cp.failPct { |
|
success = true |
|
} |
|
return success, nil |
|
} |
|
|
|
type fauxSerf struct { |
|
numNodes int |
|
} |
|
|
|
func (s *fauxSerf) NumNodes() int { |
|
return s.numNodes |
|
} |
|
|
|
func testManager() (m *Manager) { |
|
logger := GetBufferedLogger() |
|
shutdownCh := make(chan struct{}) |
|
m = New(logger, shutdownCh, &fauxSerf{numNodes: 16384}, &fauxConnPool{}, "", noopRebalancer) |
|
return m |
|
} |
|
|
|
func noopRebalancer() {} |
|
|
|
func testManagerFailProb(failPct float64) (m *Manager) { |
|
logger := GetBufferedLogger() |
|
shutdownCh := make(chan struct{}) |
|
m = New(logger, shutdownCh, &fauxSerf{}, &fauxConnPool{failPct: failPct}, "", noopRebalancer) |
|
return m |
|
} |
|
|
|
// func (l *serverList) cycleServer() (servers []*metadata.Server) { |
|
func TestManagerInternal_cycleServer(t *testing.T) { |
|
m := testManager() |
|
l := m.getServerList() |
|
|
|
server0 := &metadata.Server{Name: "server1"} |
|
server1 := &metadata.Server{Name: "server2"} |
|
server2 := &metadata.Server{Name: "server3"} |
|
l.servers = append(l.servers, server0, server1, server2) |
|
m.saveServerList(l) |
|
|
|
l = m.getServerList() |
|
if len(l.servers) != 3 { |
|
t.Fatalf("server length incorrect: %d/3", len(l.servers)) |
|
} |
|
if l.servers[0] != server0 && |
|
l.servers[1] != server1 && |
|
l.servers[2] != server2 { |
|
t.Fatalf("initial server ordering not correct") |
|
} |
|
|
|
l.servers = l.cycleServer() |
|
if len(l.servers) != 3 { |
|
t.Fatalf("server length incorrect: %d/3", len(l.servers)) |
|
} |
|
if l.servers[0] != server1 && |
|
l.servers[1] != server2 && |
|
l.servers[2] != server0 { |
|
t.Fatalf("server ordering after one cycle not correct") |
|
} |
|
|
|
l.servers = l.cycleServer() |
|
if len(l.servers) != 3 { |
|
t.Fatalf("server length incorrect: %d/3", len(l.servers)) |
|
} |
|
if l.servers[0] != server2 && |
|
l.servers[1] != server0 && |
|
l.servers[2] != server1 { |
|
t.Fatalf("server ordering after two cycles not correct") |
|
} |
|
|
|
l.servers = l.cycleServer() |
|
if len(l.servers) != 3 { |
|
t.Fatalf("server length incorrect: %d/3", len(l.servers)) |
|
} |
|
if l.servers[0] != server0 && |
|
l.servers[1] != server1 && |
|
l.servers[2] != server2 { |
|
t.Fatalf("server ordering after three cycles not correct") |
|
} |
|
} |
|
|
|
// func (m *Manager) getServerList() serverList { |
|
func TestManagerInternal_getServerList(t *testing.T) { |
|
m := testManager() |
|
l := m.getServerList() |
|
if l.servers == nil { |
|
t.Fatalf("serverList.servers nil") |
|
} |
|
|
|
if len(l.servers) != 0 { |
|
t.Fatalf("serverList.servers length not zero") |
|
} |
|
} |
|
|
|
func TestManagerInternal_New(t *testing.T) { |
|
m := testManager() |
|
if m == nil { |
|
t.Fatalf("Manager nil") |
|
} |
|
|
|
if m.clusterInfo == nil { |
|
t.Fatalf("Manager.clusterInfo nil") |
|
} |
|
|
|
if m.logger == nil { |
|
t.Fatalf("Manager.logger nil") |
|
} |
|
|
|
if m.shutdownCh == nil { |
|
t.Fatalf("Manager.shutdownCh nil") |
|
} |
|
} |
|
|
|
// func (m *Manager) reconcileServerList(l *serverList) bool { |
|
func TestManagerInternal_reconcileServerList(t *testing.T) { |
|
tests := []int{0, 1, 2, 3, 4, 5, 10, 100} |
|
for _, n := range tests { |
|
ok, err := test_reconcileServerList(n) |
|
if !ok { |
|
t.Errorf("Expected %d to pass: %v", n, err) |
|
} |
|
} |
|
} |
|
|
|
func test_reconcileServerList(maxServers int) (bool, error) { |
|
// Build a server list, reconcile, verify the missing servers are |
|
// missing, the added have been added, and the original server is |
|
// present. |
|
const failPct = 0.5 |
|
m := testManagerFailProb(failPct) |
|
|
|
var failedServers, healthyServers []*metadata.Server |
|
for i := 0; i < maxServers; i++ { |
|
nodeName := fmt.Sprintf("s%02d", i) |
|
|
|
node := &metadata.Server{Name: nodeName} |
|
// Add 66% of servers to Manager |
|
if rand.Float64() > 0.33 { |
|
m.AddServer(node) |
|
|
|
// Of healthy servers, (ab)use connPoolPinger to |
|
// failPct of the servers for the reconcile. This |
|
// allows for the selected server to no longer be |
|
// healthy for the reconcile below. |
|
if ok, _ := m.connPoolPinger.Ping(node.Datacenter, node.ShortName, node.Addr); ok { |
|
// Will still be present |
|
healthyServers = append(healthyServers, node) |
|
} else { |
|
// Will be missing |
|
failedServers = append(failedServers, node) |
|
} |
|
} else { |
|
// Will be added from the call to reconcile |
|
healthyServers = append(healthyServers, node) |
|
} |
|
} |
|
|
|
// Randomize Manager's server list |
|
m.RebalanceServers() |
|
selectedServer := m.FindServer() |
|
|
|
var selectedServerFailed bool |
|
for _, s := range failedServers { |
|
if selectedServer.Key().Equal(s.Key()) { |
|
selectedServerFailed = true |
|
break |
|
} |
|
} |
|
|
|
// Update Manager's server list to be "healthy" based on Serf. |
|
// Reconcile this with origServers, which is shuffled and has a live |
|
// connection, but possibly out of date. |
|
origServers := m.getServerList() |
|
m.saveServerList(serverList{servers: healthyServers}) |
|
|
|
// This should always succeed with non-zero server lists |
|
if !selectedServerFailed && !m.reconcileServerList(&origServers) && |
|
len(m.getServerList().servers) != 0 && |
|
len(origServers.servers) != 0 { |
|
// If the random gods are unfavorable and we end up with zero |
|
// length lists, expect things to fail and retry the test. |
|
return false, fmt.Errorf("Expected reconcile to succeed: %v %d %d", |
|
selectedServerFailed, |
|
len(m.getServerList().servers), |
|
len(origServers.servers)) |
|
} |
|
|
|
// If we have zero-length server lists, test succeeded in degenerate |
|
// case. |
|
if len(m.getServerList().servers) == 0 && |
|
len(origServers.servers) == 0 { |
|
// Failed as expected w/ zero length list |
|
return true, nil |
|
} |
|
|
|
resultingServerMap := make(map[metadata.Key]bool) |
|
for _, s := range m.getServerList().servers { |
|
resultingServerMap[*s.Key()] = true |
|
} |
|
|
|
// Test to make sure no failed servers are in the Manager's |
|
// list. Error if there are any failedServers in l.servers |
|
for _, s := range failedServers { |
|
_, ok := resultingServerMap[*s.Key()] |
|
if ok { |
|
return false, fmt.Errorf("Found failed server %v in merged list %v", s, resultingServerMap) |
|
} |
|
} |
|
|
|
// Test to make sure all healthy servers are in the healthy list. |
|
if len(healthyServers) != len(m.getServerList().servers) { |
|
return false, fmt.Errorf("Expected healthy map and servers to match: %d/%d", len(healthyServers), len(healthyServers)) |
|
} |
|
|
|
// Test to make sure all healthy servers are in the resultingServerMap list. |
|
for _, s := range healthyServers { |
|
_, ok := resultingServerMap[*s.Key()] |
|
if !ok { |
|
return false, fmt.Errorf("Server %v missing from healthy map after merged lists", s) |
|
} |
|
} |
|
return true, nil |
|
} |
|
|
|
func TestRebalanceDelayer(t *testing.T) { |
|
type testCase struct { |
|
servers int |
|
nodes int |
|
expected time.Duration |
|
} |
|
|
|
testCases := []testCase{ |
|
{servers: 0, nodes: 1}, |
|
{servers: 0, nodes: 100}, |
|
{servers: 0, nodes: 65535}, |
|
{servers: 0, nodes: 1000000}, |
|
|
|
{servers: 1, nodes: 100}, |
|
{servers: 1, nodes: 1024}, |
|
{servers: 1, nodes: 8192}, |
|
{servers: 1, nodes: 11520}, |
|
{servers: 1, nodes: 11521, expected: 3*time.Minute + 15625*time.Microsecond}, |
|
{servers: 1, nodes: 16384, expected: 4*time.Minute + 16*time.Second}, |
|
{servers: 1, nodes: 65535, expected: 17*time.Minute + 3984375000}, |
|
{servers: 1, nodes: 1000000, expected: 4*time.Hour + 20*time.Minute + 25*time.Second}, |
|
|
|
{servers: 2, nodes: 100}, |
|
{servers: 2, nodes: 16384}, |
|
{servers: 2, nodes: 23040}, |
|
{servers: 2, nodes: 23041, expected: 3*time.Minute + 7812500}, |
|
{servers: 2, nodes: 65535, expected: 8*time.Minute + 31992187500}, |
|
{servers: 2, nodes: 1000000, expected: 2*time.Hour + 10*time.Minute + 12500*time.Millisecond}, |
|
|
|
{servers: 3, nodes: 0}, |
|
{servers: 3, nodes: 100}, |
|
{servers: 3, nodes: 1024}, |
|
{servers: 3, nodes: 16384}, |
|
{servers: 3, nodes: 34560}, |
|
{servers: 3, nodes: 34561, expected: 3*time.Minute + 5208333}, |
|
{servers: 3, nodes: 65535, expected: 5*time.Minute + 41328125000}, |
|
{servers: 3, nodes: 1000000, expected: 86*time.Minute + 48333333333}, |
|
|
|
{servers: 5, nodes: 0}, |
|
{servers: 5, nodes: 1024}, |
|
{servers: 5, nodes: 16384}, |
|
{servers: 5, nodes: 32768}, |
|
{servers: 5, nodes: 57600}, |
|
{servers: 5, nodes: 65535, expected: 3*time.Minute + 24796875000}, |
|
{servers: 5, nodes: 1000000, expected: 52*time.Minute + 5*time.Second}, |
|
|
|
{servers: 7, nodes: 65535}, |
|
{servers: 7, nodes: 80500}, |
|
{servers: 7, nodes: 131070, expected: 4*time.Minute + 52566964285}, |
|
|
|
{servers: 11, nodes: 1000000, expected: 23*time.Minute + 40454545454}, |
|
{servers: 19, nodes: 1000000, expected: 13*time.Minute + 42368421052}, |
|
} |
|
|
|
for _, tc := range testCases { |
|
delay := delayer.Delay(tc.servers, tc.nodes) |
|
|
|
if tc.expected != 0 { |
|
assert.Equal(t, tc.expected, delay, "nodes=%d, servers=%d", tc.nodes, tc.servers) |
|
continue |
|
} |
|
|
|
min := 2 * time.Minute |
|
max := 3 * time.Minute |
|
if delay < min { |
|
t.Errorf("nodes=%d, servers=%d, expected >%v, actual %v", tc.nodes, tc.servers, min, delay) |
|
} |
|
if delay > max { |
|
t.Errorf("nodes=%d, servers=%d, expected <%v, actual %v", tc.nodes, tc.servers, max, delay) |
|
} |
|
} |
|
} |
|
|
|
// func (m *Manager) saveServerList(l serverList) { |
|
func TestManagerInternal_saveServerList(t *testing.T) { |
|
m := testManager() |
|
|
|
// Initial condition |
|
func() { |
|
l := m.getServerList() |
|
if len(l.servers) != 0 { |
|
t.Fatalf("Manager.saveServerList failed to load init config") |
|
} |
|
|
|
newServer := new(metadata.Server) |
|
l.servers = append(l.servers, newServer) |
|
m.saveServerList(l) |
|
}() |
|
|
|
// Test that save works |
|
func() { |
|
l1 := m.getServerList() |
|
t1NumServers := len(l1.servers) |
|
if t1NumServers != 1 { |
|
t.Fatalf("Manager.saveServerList failed to save mutated config") |
|
} |
|
}() |
|
|
|
// Verify mutation w/o a save doesn't alter the original |
|
func() { |
|
newServer := new(metadata.Server) |
|
l := m.getServerList() |
|
l.servers = append(l.servers, newServer) |
|
|
|
l_orig := m.getServerList() |
|
origNumServers := len(l_orig.servers) |
|
if origNumServers >= len(l.servers) { |
|
t.Fatalf("Manager.saveServerList unsaved config overwrote original") |
|
} |
|
}() |
|
} |
|
|
|
func TestManager_healthyServer(t *testing.T) { |
|
t.Run("checking itself", func(t *testing.T) { |
|
m := testManager() |
|
m.serverName = "s1" |
|
server := metadata.Server{Name: m.serverName} |
|
require.True(t, m.healthyServer(&server)) |
|
}) |
|
t.Run("checking another server with successful ping", func(t *testing.T) { |
|
m := testManager() |
|
server := metadata.Server{Name: "s1"} |
|
require.True(t, m.healthyServer(&server)) |
|
}) |
|
t.Run("checking another server with failed ping", func(t *testing.T) { |
|
m := testManagerFailProb(1) |
|
server := metadata.Server{Name: "s1"} |
|
require.False(t, m.healthyServer(&server)) |
|
}) |
|
} |
|
|
|
func TestManager_Rebalance(t *testing.T) { |
|
t.Run("single server cluster checking itself", func(t *testing.T) { |
|
m := testManager() |
|
m.serverName = "s1" |
|
m.AddServer(&metadata.Server{Name: m.serverName}) |
|
m.RebalanceServers() |
|
require.False(t, m.IsOffline()) |
|
}) |
|
t.Run("multi server cluster is unhealthy when pings always fail", func(t *testing.T) { |
|
m := testManagerFailProb(1) |
|
m.AddServer(&metadata.Server{Name: "s1"}) |
|
m.AddServer(&metadata.Server{Name: "s2"}) |
|
m.AddServer(&metadata.Server{Name: "s3"}) |
|
for i := 0; i < 100; i++ { |
|
m.RebalanceServers() |
|
require.True(t, m.IsOffline()) |
|
} |
|
}) |
|
t.Run("multi server cluster checking itself remains healthy despite pings always fail", func(t *testing.T) { |
|
m := testManagerFailProb(1) |
|
m.serverName = "s1" |
|
m.AddServer(&metadata.Server{Name: m.serverName}) |
|
m.AddServer(&metadata.Server{Name: "s2"}) |
|
m.AddServer(&metadata.Server{Name: "s3"}) |
|
for i := 0; i < 100; i++ { |
|
m.RebalanceServers() |
|
require.False(t, m.IsOffline()) |
|
} |
|
}) |
|
}
|
|
|