diff --git a/command/agent/agent.go b/command/agent/agent.go index 472adf53da..75ff489ecc 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -2,15 +2,16 @@ package agent import ( "fmt" - "github.com/hashicorp/consul/consul" - "github.com/hashicorp/consul/consul/structs" - "github.com/hashicorp/serf/serf" "io" "log" "net" "os" "strconv" "sync" + + "github.com/hashicorp/consul/consul" + "github.com/hashicorp/consul/consul/structs" + "github.com/hashicorp/serf/serf" ) /* @@ -174,6 +175,9 @@ func (a *Agent) consulConfig() *consul.Config { if a.config.RejoinAfterLeave { base.RejoinAfterLeave = true } + if a.config.Expect != 0 { + base.Expect = a.config.Expect + } if a.config.Protocol > 0 { base.ProtocolVersion = uint8(a.config.Protocol) } diff --git a/command/agent/command.go b/command/agent/command.go index 2c9c5e271e..25e04dc44d 100644 --- a/command/agent/command.go +++ b/command/agent/command.go @@ -3,10 +3,6 @@ package agent import ( "flag" "fmt" - "github.com/armon/go-metrics" - "github.com/hashicorp/go-syslog" - "github.com/hashicorp/logutils" - "github.com/mitchellh/cli" "io" "net" "os" @@ -16,6 +12,11 @@ import ( "strings" "syscall" "time" + + "github.com/armon/go-metrics" + "github.com/hashicorp/go-syslog" + "github.com/hashicorp/logutils" + "github.com/mitchellh/cli" ) // gracefulTimeout controls how long we wait before forcefully terminating @@ -62,6 +63,7 @@ func (c *Command) readConfig() *Config { cmdFlags.BoolVar(&cmdConfig.Server, "server", false, "run agent as server") cmdFlags.BoolVar(&cmdConfig.Bootstrap, "bootstrap", false, "enable server bootstrap mode") + cmdFlags.IntVar(&cmdConfig.Expect, "expect", 0, "enable automatic bootstrap via expect mode") cmdFlags.StringVar(&cmdConfig.ClientAddr, "client", "", "address to bind client listeners to (DNS, HTTP, RPC)") cmdFlags.StringVar(&cmdConfig.BindAddr, "bind", "", "address to bind server listeners to") @@ -127,6 +129,30 @@ func (c *Command) readConfig() *Config { return nil } + // Expect can only work when acting as a server + if config.Expect != 0 && !config.Server { + c.Ui.Error("Expect mode cannot be enabled when server mode is not enabled") + return nil + } + + // Expect & Bootstrap are mutually exclusive + if config.Expect != 0 && config.Bootstrap { + c.Ui.Error("Bootstrap cannot be provided with an expected server count") + return nil + } + + // Warn if we are in expect mode + if config.Expect != 0 { + if config.Expect == 1 { + // just use bootstrap mode + c.Ui.Error("WARNING: Expect Mode is specified as 1; this is the same as Bootstrap mode.") + config.Expect = 0 + config.Bootstrap = true + } else { + c.Ui.Error(fmt.Sprintf("WARNING: Expect Mode enabled, looking for %v servers!", config.Expect)) + } + } + // Warn if we are in bootstrap mode if config.Bootstrap { c.Ui.Error("WARNING: Bootstrap mode enabled! Do not enable unless necessary") @@ -524,6 +550,7 @@ Options: order. -data-dir=path Path to a data directory to store agent state -dc=east-aws Datacenter of the agent + -expect=0 Sets server to expect bootstrap mode. -join=1.2.3.4 Address of an agent to join at start time. Can be specified multiple times. -log-level=info Log level of the agent. diff --git a/command/agent/config.go b/command/agent/config.go index a5a2bc4524..c3631429a8 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -4,8 +4,6 @@ import ( "encoding/base64" "encoding/json" "fmt" - "github.com/hashicorp/consul/consul" - "github.com/mitchellh/mapstructure" "io" "net" "os" @@ -13,6 +11,9 @@ import ( "sort" "strings" "time" + + "github.com/hashicorp/consul/consul" + "github.com/mitchellh/mapstructure" ) // Ports is used to simplify the configuration by @@ -64,6 +65,10 @@ type Config struct { // permits that node to elect itself leader Bootstrap bool `mapstructure:"bootstrap"` + // Expect tries to automatically bootstrap the Consul cluster, + // by witholding peers until enough servers join. + Expect int `mapstructure:"expect"` + // Server controls if this agent acts like a Consul server, // or merely as a client. Servers have more state, take part // in leader election, etc. @@ -219,6 +224,7 @@ type dirEnts []os.FileInfo func DefaultConfig() *Config { return &Config{ Bootstrap: false, + Expect: 0, Server: false, Datacenter: consul.DefaultDC, Domain: "consul.", @@ -449,6 +455,9 @@ func MergeConfig(a, b *Config) *Config { if b.Bootstrap { result.Bootstrap = true } + if b.Expect != 0 { + result.Expect = b.Expect + } if b.Datacenter != "" { result.Datacenter = b.Datacenter } diff --git a/command/agent/config_test.go b/command/agent/config_test.go index b1c83d4795..0225630d08 100644 --- a/command/agent/config_test.go +++ b/command/agent/config_test.go @@ -93,6 +93,21 @@ func TestDecodeConfig(t *testing.T) { t.Fatalf("bad: %#v", config) } + // Expect bootstrap + input = `{"server": true, "expect": 3}` + config, err = DecodeConfig(bytes.NewReader([]byte(input))) + if err != nil { + t.Fatalf("err: %s", err) + } + + if !config.Server { + t.Fatalf("bad: %#v", config) + } + + if config.Expect != 3 { + t.Fatalf("bad: %#v", config) + } + // DNS setup input = `{"ports": {"dns": 8500}, "recursor": "8.8.8.8", "domain": "foobar"}` config, err = DecodeConfig(bytes.NewReader([]byte(input))) @@ -426,6 +441,7 @@ func TestDecodeConfig_Check(t *testing.T) { func TestMergeConfig(t *testing.T) { a := &Config{ Bootstrap: false, + Expect: 0, Datacenter: "dc1", DataDir: "/tmp/foo", DNSRecursor: "127.0.0.1:1001", @@ -444,6 +460,7 @@ func TestMergeConfig(t *testing.T) { b := &Config{ Bootstrap: true, + Expect: 3, Datacenter: "dc2", DataDir: "/tmp/bar", DNSRecursor: "127.0.0.2:1001", diff --git a/consul/config.go b/consul/config.go index 6000177a8f..ae6c482823 100644 --- a/consul/config.go +++ b/consul/config.go @@ -44,6 +44,11 @@ type Config struct { // other nodes being present Bootstrap bool + // Expect mode is used to automatically bring up a collection of + // Consul servers. This can be used to automatically bring up a collection + // of nodes. + Expect int + // Datacenter is the datacenter this Consul server represents Datacenter string diff --git a/consul/leader.go b/consul/leader.go index d09f11185e..b63f7bbe80 100644 --- a/consul/leader.go +++ b/consul/leader.go @@ -1,13 +1,14 @@ package consul import ( + "net" + "strconv" + "time" + "github.com/armon/go-metrics" "github.com/hashicorp/consul/consul/structs" "github.com/hashicorp/raft" "github.com/hashicorp/serf/serf" - "net" - "strconv" - "time" ) const ( diff --git a/consul/serf.go b/consul/serf.go index 11a48ee471..c23c10c81e 100644 --- a/consul/serf.go +++ b/consul/serf.go @@ -1,8 +1,10 @@ package consul import ( - "github.com/hashicorp/serf/serf" + "net" "strings" + + "github.com/hashicorp/serf/serf" ) const ( @@ -149,6 +151,41 @@ func (s *Server) nodeJoin(me serf.MemberEvent, wan bool) { s.localConsuls[parts.Addr.String()] = parts s.localLock.Unlock() } + + // If we're still expecting, and they are too, check servers. + if s.config.Expect != 0 && parts.Expect != 0 { + index, err := s.raftStore.LastIndex() + if err == nil && index == 0 { + members := s.serfLAN.Members() + addrs := make([]net.Addr, 0) + for _, member := range members { + valid, p := isConsulServer(member) + if valid && p.Datacenter == parts.Datacenter { + if p.Expect != parts.Expect { + s.logger.Printf("[ERR] consul: '%v' and '%v' have different expect values. All expect nodes should have the same value, will never leave expect mode", m.Name, member.Name) + return + } else { + addrs = append(addrs, &net.TCPAddr{IP: member.Addr, Port: p.Port}) + } + } + } + + if len(addrs) >= s.config.Expect { + // we have enough nodes, set peers. + + future := s.raft.SetPeers(addrs) + + if err := future.Error(); err != nil { + s.logger.Printf("[ERR] consul: failed to leave expect mode and set peers: %v", err) + } else { + // we've left expect mode, don't enter this again + s.config.Expect = 0 + } + } + } else if err != nil { + s.logger.Printf("[ERR] consul: error retrieving index: %v", err) + } + } } } diff --git a/consul/server.go b/consul/server.go index e7dd195f34..d126e2343e 100644 --- a/consul/server.go +++ b/consul/server.go @@ -4,9 +4,6 @@ import ( "crypto/tls" "errors" "fmt" - "github.com/hashicorp/raft" - "github.com/hashicorp/raft-mdb" - "github.com/hashicorp/serf/serf" "log" "net" "net/rpc" @@ -17,6 +14,10 @@ import ( "strconv" "sync" "time" + + "github.com/hashicorp/raft" + "github.com/hashicorp/raft-mdb" + "github.com/hashicorp/serf/serf" ) // These are the protocol versions that Consul can _understand_. These are @@ -233,6 +234,9 @@ func (s *Server) setupSerf(conf *serf.Config, ch chan serf.Event, path string, w if s.config.Bootstrap { conf.Tags["bootstrap"] = "1" } + if s.config.Expect != 0 { + conf.Tags["expect"] = fmt.Sprintf("%d", s.config.Expect) + } conf.MemberlistConfig.LogOutput = s.config.LogOutput conf.LogOutput = s.config.LogOutput conf.EventCh = ch diff --git a/consul/server_test.go b/consul/server_test.go index b8edc6ef48..109f5081b8 100644 --- a/consul/server_test.go +++ b/consul/server_test.go @@ -3,12 +3,13 @@ package consul import ( "errors" "fmt" - "github.com/hashicorp/consul/testutil" "io/ioutil" "net" "os" "testing" "time" + + "github.com/hashicorp/consul/testutil" ) var nextPort = 15000 @@ -87,6 +88,19 @@ func testServerDCBootstrap(t *testing.T, dc string, bootstrap bool) (string, *Se return dir, server } +func testServerDCExpect(t *testing.T, dc string, expect int) (string, *Server) { + name := fmt.Sprintf("Node %d", getPort()) + dir, config := testServerConfig(t, name) + config.Datacenter = dc + config.Bootstrap = false + config.Expect = expect + server, err := NewServer(config) + if err != nil { + t.Fatalf("err: %v", err) + } + return dir, server +} + func TestServer_StartStop(t *testing.T) { dir := tmpDir(t) defer os.RemoveAll(dir) @@ -304,3 +318,147 @@ func TestServer_JoinLAN_TLS(t *testing.T) { t.Fatalf("no peer established") }) } + +func TestServer_Expect(t *testing.T) { + // all test servers should be in expect=3 mode + dir1, s1 := testServerDCExpect(t, "dc1", 3) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + + dir2, s2 := testServerDCExpect(t, "dc1", 3) + defer os.RemoveAll(dir2) + defer s2.Shutdown() + + dir3, s3 := testServerDCExpect(t, "dc1", 3) + defer os.RemoveAll(dir3) + defer s3.Shutdown() + + // Try to join + addr := fmt.Sprintf("127.0.0.1:%d", + s1.config.SerfLANConfig.MemberlistConfig.BindPort) + if _, err := s2.JoinLAN([]string{addr}); err != nil { + t.Fatalf("err: %v", err) + } + + var p1 []net.Addr + var p2 []net.Addr + + // should have no peers yet + testutil.WaitForResult(func() (bool, error) { + p1, _ = s1.raftPeers.Peers() + return len(p1) == 0, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + + testutil.WaitForResult(func() (bool, error) { + p2, _ = s2.raftPeers.Peers() + return len(p2) == 0, errors.New(fmt.Sprintf("%v", p2)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + + // join the third node + if _, err := s3.JoinLAN([]string{addr}); err != nil { + t.Fatalf("err: %v", err) + } + + var p3 []net.Addr + + // should now have all three peers + testutil.WaitForResult(func() (bool, error) { + p1, _ = s1.raftPeers.Peers() + return len(p1) == 3, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 3 peers: %v", err) + }) + + testutil.WaitForResult(func() (bool, error) { + p2, _ = s2.raftPeers.Peers() + return len(p2) == 3, errors.New(fmt.Sprintf("%v", p2)) + }, func(err error) { + t.Fatalf("should have 3 peers: %v", err) + }) + + testutil.WaitForResult(func() (bool, error) { + p3, _ = s3.raftPeers.Peers() + return len(p3) == 3, errors.New(fmt.Sprintf("%v", p3)) + }, func(err error) { + t.Fatalf("should have 3 peers: %v", err) + }) + + // check if there is one leader now + testutil.WaitForLeader(t, s1.RPC, "dc1") +} + +func TestServer_BadExpect(t *testing.T) { + // this one is in expect=3 mode + dir1, s1 := testServerDCExpect(t, "dc1", 3) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + + // this one is in expect=2 mode + dir2, s2 := testServerDCExpect(t, "dc1", 2) + defer os.RemoveAll(dir2) + defer s2.Shutdown() + + // and this one is in expect=3 mode + dir3, s3 := testServerDCExpect(t, "dc1", 3) + defer os.RemoveAll(dir3) + defer s3.Shutdown() + + // Try to join + addr := fmt.Sprintf("127.0.0.1:%d", + s1.config.SerfLANConfig.MemberlistConfig.BindPort) + if _, err := s2.JoinLAN([]string{addr}); err != nil { + t.Fatalf("err: %v", err) + } + + var p1 []net.Addr + var p2 []net.Addr + + // should have no peers yet + testutil.WaitForResult(func() (bool, error) { + p1, _ = s1.raftPeers.Peers() + return len(p1) == 0, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + + testutil.WaitForResult(func() (bool, error) { + p2, _ = s2.raftPeers.Peers() + return len(p2) == 0, errors.New(fmt.Sprintf("%v", p2)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + + // join the third node + if _, err := s3.JoinLAN([]string{addr}); err != nil { + t.Fatalf("err: %v", err) + } + + var p3 []net.Addr + + // should still have no peers (because s2 is in expect=2 mode) + testutil.WaitForResult(func() (bool, error) { + p1, _ = s1.raftPeers.Peers() + return len(p1) == 0, errors.New(fmt.Sprintf("%v", p1)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + + testutil.WaitForResult(func() (bool, error) { + p2, _ = s2.raftPeers.Peers() + return len(p2) == 0, errors.New(fmt.Sprintf("%v", p2)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + + testutil.WaitForResult(func() (bool, error) { + p3, _ = s3.raftPeers.Peers() + return len(p3) == 0, errors.New(fmt.Sprintf("%v", p3)) + }, func(err error) { + t.Fatalf("should have 0 peers: %v", err) + }) + +} diff --git a/consul/util.go b/consul/util.go index 402ecee734..00815ea10c 100644 --- a/consul/util.go +++ b/consul/util.go @@ -4,12 +4,13 @@ import ( crand "crypto/rand" "encoding/binary" "fmt" - "github.com/hashicorp/serf/serf" "net" "os" "path/filepath" "runtime" "strconv" + + "github.com/hashicorp/serf/serf" ) /* @@ -26,6 +27,7 @@ type serverParts struct { Datacenter string Port int Bootstrap bool + Expect int Version int Addr net.Addr } @@ -84,6 +86,16 @@ func isConsulServer(m serf.Member) (bool, *serverParts) { datacenter := m.Tags["dc"] _, bootstrap := m.Tags["bootstrap"] + expect := 0 + expect_str, ok := m.Tags["expect"] + var err error + if ok { + expect, err = strconv.Atoi(expect_str) + if err != nil { + return false, nil + } + } + port_str := m.Tags["port"] port, err := strconv.Atoi(port_str) if err != nil { @@ -103,6 +115,7 @@ func isConsulServer(m serf.Member) (bool, *serverParts) { Datacenter: datacenter, Port: port, Bootstrap: bootstrap, + Expect: expect, Addr: addr, Version: vsn, } diff --git a/consul/util_test.go b/consul/util_test.go index 65e5e99ed4..107146b521 100644 --- a/consul/util_test.go +++ b/consul/util_test.go @@ -1,10 +1,11 @@ package consul import ( - "github.com/hashicorp/serf/serf" "net" "regexp" "testing" + + "github.com/hashicorp/serf/serf" ) func TestStrContains(t *testing.T) { @@ -56,6 +57,9 @@ func TestIsConsulServer(t *testing.T) { if parts.Bootstrap { t.Fatalf("unexpected bootstrap") } + if parts.Expect != 0 { + t.Fatalf("bad: %v", parts.Expect) + } m.Tags["bootstrap"] = "1" valid, parts = isConsulServer(m) if !valid || !parts.Bootstrap { @@ -67,6 +71,12 @@ func TestIsConsulServer(t *testing.T) { if parts.Version != 1 { t.Fatalf("bad: %v", parts) } + m.Tags["expect"] = "3" + delete(m.Tags, "bootstrap") + valid, parts = isConsulServer(m) + if !valid || parts.Expect != 3 { + t.Fatalf("bad: %v", parts.Expect) + } } func TestIsConsulNode(t *testing.T) {