diff --git a/command/agent/agent.go b/command/agent/agent.go
index 1c41bddb46..6284a25f02 100644
--- a/command/agent/agent.go
+++ b/command/agent/agent.go
@@ -261,6 +261,11 @@ func (a *Agent) consulConfig() *consul.Config {
// Apply dev mode
base.DevMode = a.config.DevMode
+ // Apply performance factors
+ if a.config.Performance.RaftMultiplier > 0 {
+ base.ScaleRaft(a.config.Performance.RaftMultiplier)
+ }
+
// Override with our config
if a.config.Datacenter != "" {
base.Datacenter = a.config.Datacenter
diff --git a/command/agent/agent_test.go b/command/agent/agent_test.go
index af82a00e8f..c676a80929 100644
--- a/command/agent/agent_test.go
+++ b/command/agent/agent_test.go
@@ -17,6 +17,7 @@ import (
"github.com/hashicorp/consul/consul"
"github.com/hashicorp/consul/consul/structs"
"github.com/hashicorp/consul/testutil"
+ "github.com/hashicorp/raft"
)
const (
@@ -191,6 +192,44 @@ func TestAgent_CheckAdvertiseAddrsSettings(t *testing.T) {
}
}
+func TestAgent_CheckPerformanceSettings(t *testing.T) {
+ // Try a default config.
+ {
+ c := nextConfig()
+ c.ConsulConfig = nil
+ dir, agent := makeAgent(t, c)
+ defer os.RemoveAll(dir)
+ defer agent.Shutdown()
+
+ raftMult := time.Duration(consul.DefaultRaftMultiplier)
+ r := agent.consulConfig().RaftConfig
+ def := raft.DefaultConfig()
+ if r.HeartbeatTimeout != raftMult*def.HeartbeatTimeout ||
+ r.ElectionTimeout != raftMult*def.ElectionTimeout ||
+ r.LeaderLeaseTimeout != raftMult*def.LeaderLeaseTimeout {
+ t.Fatalf("bad: %#v", *r)
+ }
+ }
+
+ // Try a multiplier.
+ {
+ c := nextConfig()
+ c.Performance.RaftMultiplier = 99
+ dir, agent := makeAgent(t, c)
+ defer os.RemoveAll(dir)
+ defer agent.Shutdown()
+
+ const raftMult time.Duration = 99
+ r := agent.consulConfig().RaftConfig
+ def := raft.DefaultConfig()
+ if r.HeartbeatTimeout != raftMult*def.HeartbeatTimeout ||
+ r.ElectionTimeout != raftMult*def.ElectionTimeout ||
+ r.LeaderLeaseTimeout != raftMult*def.LeaderLeaseTimeout {
+ t.Fatalf("bad: %#v", *r)
+ }
+ }
+}
+
func TestAgent_ReconnectConfigSettings(t *testing.T) {
c := nextConfig()
func() {
diff --git a/command/agent/config.go b/command/agent/config.go
index 87b3a13526..0e2574e402 100644
--- a/command/agent/config.go
+++ b/command/agent/config.go
@@ -111,6 +111,13 @@ type DNSConfig struct {
DisableCompression bool `mapstructure:"disable_compression"`
}
+// Performance is used to tune the performance of Consul's subsystems.
+type Performance struct {
+ // RaftMultiplier is an integer multiplier used to scale Raft timing
+ // parameters: HeartbeatTimeout, ElectionTimeout, and LeaderLeaseTimeout.
+ RaftMultiplier uint `mapstructure:"raft_multiplier"`
+}
+
// Telemetry is the telemetry configuration for the server
type Telemetry struct {
// StatsiteAddr is the address of a statsite instance. If provided,
@@ -205,10 +212,13 @@ type Telemetry struct {
// Some of this is configurable as CLI flags, but most must
// be set using a configuration file.
type Config struct {
- // DevMode enables a fast-path mode of opertaion to bring up an in-memory
+ // DevMode enables a fast-path mode of operation to bring up an in-memory
// server with minimal configuration. Useful for developing Consul.
DevMode bool `mapstructure:"-"`
+ // Performance is used to tune the performance of Consul's subsystems.
+ Performance Performance `mapstructure:"performance"`
+
// Bootstrap is used to bring up the first Consul server, and
// permits that node to elect itself leader
Bootstrap bool `mapstructure:"bootstrap"`
@@ -932,6 +942,11 @@ func DecodeConfig(r io.Reader) (*Config, error) {
result.AdvertiseAddrs.RPC = addr
}
+ // Enforce the max Raft multiplier.
+ if result.Performance.RaftMultiplier > consul.MaxRaftMultiplier {
+ return nil, fmt.Errorf("Performance.RaftMultiplier must be <= %d", consul.MaxRaftMultiplier)
+ }
+
return &result, nil
}
@@ -1085,6 +1100,11 @@ func DecodeCheckDefinition(raw interface{}) (*CheckDefinition, error) {
func MergeConfig(a, b *Config) *Config {
var result Config = *a
+ // Propagate non-default performance settings
+ if b.Performance.RaftMultiplier > 0 {
+ result.Performance.RaftMultiplier = b.Performance.RaftMultiplier
+ }
+
// Copy the strings if they're set
if b.Bootstrap {
result.Bootstrap = true
diff --git a/command/agent/config_test.go b/command/agent/config_test.go
index 3b4de35ed0..ed174a9f82 100644
--- a/command/agent/config_test.go
+++ b/command/agent/config_test.go
@@ -957,6 +957,23 @@ func TestDecodeConfig_invalidKeys(t *testing.T) {
}
}
+func TestDecodeConfig_Performance(t *testing.T) {
+ input := `{"performance": { "raft_multiplier": 3 }}`
+ config, err := DecodeConfig(bytes.NewReader([]byte(input)))
+ if err != nil {
+ t.Fatalf("err: %s", err)
+ }
+ if config.Performance.RaftMultiplier != 3 {
+ t.Fatalf("bad: multiplier isn't set: %#v", config)
+ }
+
+ input = `{"performance": { "raft_multiplier": 11 }}`
+ config, err = DecodeConfig(bytes.NewReader([]byte(input)))
+ if err == nil || !strings.Contains(err.Error(), "Performance.RaftMultiplier must be <=") {
+ t.Fatalf("bad: %v", err)
+ }
+}
+
func TestDecodeConfig_Services(t *testing.T) {
input := `{
"services": [
@@ -1382,6 +1399,9 @@ func TestMergeConfig(t *testing.T) {
}
b := &Config{
+ Performance: Performance{
+ RaftMultiplier: 99,
+ },
Bootstrap: true,
BootstrapExpect: 3,
Datacenter: "dc2",
diff --git a/consul/config.go b/consul/config.go
index 5bde0951d9..0e094f305b 100644
--- a/consul/config.go
+++ b/consul/config.go
@@ -17,6 +17,15 @@ const (
DefaultDC = "dc1"
DefaultLANSerfPort = 8301
DefaultWANSerfPort = 8302
+
+ // DefaultRaftMultiplier is used as a baseline Raft configuration that
+ // will be reliable on a very basic server. See docs/guides/performance.html
+ // for information on how this value was obtained.
+ DefaultRaftMultiplier uint = 5
+
+ // MaxRaftMultiplier is a fairly arbitrary upper bound that limits the
+ // amount of performance detuning that's possible.
+ MaxRaftMultiplier uint = 10
)
var (
@@ -314,8 +323,11 @@ func DefaultConfig() *Config {
CoordinateUpdateBatchSize: 128,
CoordinateUpdateMaxBatches: 5,
- // Hold an RPC for up to 5 seconds by default
- RPCHoldTimeout: 5 * time.Second,
+ // This holds RPCs during leader elections. For the default Raft
+ // config the election timeout is 5 seconds, so we set this a
+ // bit longer to try to cover that period. This should be more
+ // than enough when running in the high performance mode.
+ RPCHoldTimeout: 7 * time.Second,
}
// Increase our reap interval to 3 days instead of 24h.
@@ -333,6 +345,7 @@ func DefaultConfig() *Config {
// Enable interoperability with unversioned Raft library, and don't
// start using new ID-based features yet.
conf.RaftConfig.ProtocolVersion = 1
+ conf.ScaleRaft(DefaultRaftMultiplier)
// Disable shutdown on removal
conf.RaftConfig.ShutdownOnRemove = false
@@ -340,6 +353,19 @@ func DefaultConfig() *Config {
return conf
}
+// ScaleRaft sets the config to have Raft timing parameters scaled by the given
+// performance multiplier. This is done in an idempotent way so it's not tricky
+// to call this when composing configurations and potentially calling this
+// multiple times on the same structure.
+func (c *Config) ScaleRaft(raftMultRaw uint) {
+ raftMult := time.Duration(raftMultRaw)
+
+ def := raft.DefaultConfig()
+ c.RaftConfig.HeartbeatTimeout = raftMult * def.HeartbeatTimeout
+ c.RaftConfig.ElectionTimeout = raftMult * def.ElectionTimeout
+ c.RaftConfig.LeaderLeaseTimeout = raftMult * def.LeaderLeaseTimeout
+}
+
func (c *Config) tlsConfig() *tlsutil.Config {
tlsConf := &tlsutil.Config{
VerifyIncoming: c.VerifyIncoming,
diff --git a/testutil/server.go b/testutil/server.go
index 8a88a8b1c7..aad60e3866 100644
--- a/testutil/server.go
+++ b/testutil/server.go
@@ -32,6 +32,11 @@ import (
// offset is used to atomically increment the port numbers.
var offset uint64
+// TestPerformanceConfig configures the performance parameters.
+type TestPerformanceConfig struct {
+ RaftMultiplier uint `json:"raft_multiplier,omitempty"`
+}
+
// TestPortConfig configures the various ports used for services
// provided by the Consul server.
type TestPortConfig struct {
@@ -51,20 +56,21 @@ type TestAddressConfig struct {
// TestServerConfig is the main server configuration struct.
type TestServerConfig struct {
- NodeName string `json:"node_name"`
- Bootstrap bool `json:"bootstrap,omitempty"`
- Server bool `json:"server,omitempty"`
- DataDir string `json:"data_dir,omitempty"`
- Datacenter string `json:"datacenter,omitempty"`
- DisableCheckpoint bool `json:"disable_update_check"`
- LogLevel string `json:"log_level,omitempty"`
- Bind string `json:"bind_addr,omitempty"`
- Addresses *TestAddressConfig `json:"addresses,omitempty"`
- Ports *TestPortConfig `json:"ports,omitempty"`
- ACLMasterToken string `json:"acl_master_token,omitempty"`
- ACLDatacenter string `json:"acl_datacenter,omitempty"`
- ACLDefaultPolicy string `json:"acl_default_policy,omitempty"`
- Stdout, Stderr io.Writer `json:"-"`
+ NodeName string `json:"node_name"`
+ Performance *TestPerformanceConfig `json:"performance,omitempty"`
+ Bootstrap bool `json:"bootstrap,omitempty"`
+ Server bool `json:"server,omitempty"`
+ DataDir string `json:"data_dir,omitempty"`
+ Datacenter string `json:"datacenter,omitempty"`
+ DisableCheckpoint bool `json:"disable_update_check"`
+ LogLevel string `json:"log_level,omitempty"`
+ Bind string `json:"bind_addr,omitempty"`
+ Addresses *TestAddressConfig `json:"addresses,omitempty"`
+ Ports *TestPortConfig `json:"ports,omitempty"`
+ ACLMasterToken string `json:"acl_master_token,omitempty"`
+ ACLDatacenter string `json:"acl_datacenter,omitempty"`
+ ACLDefaultPolicy string `json:"acl_default_policy,omitempty"`
+ Stdout, Stderr io.Writer `json:"-"`
}
// ServerConfigCallback is a function interface which can be
@@ -79,11 +85,14 @@ func defaultServerConfig() *TestServerConfig {
return &TestServerConfig{
NodeName: fmt.Sprintf("node%d", idx),
DisableCheckpoint: true,
- Bootstrap: true,
- Server: true,
- LogLevel: "debug",
- Bind: "127.0.0.1",
- Addresses: &TestAddressConfig{},
+ Performance: &TestPerformanceConfig{
+ RaftMultiplier: 1,
+ },
+ Bootstrap: true,
+ Server: true,
+ LogLevel: "debug",
+ Bind: "127.0.0.1",
+ Addresses: &TestAddressConfig{},
Ports: &TestPortConfig{
DNS: 20000 + idx,
HTTP: 21000 + idx,
diff --git a/website/source/docs/agent/options.html.markdown b/website/source/docs/agent/options.html.markdown
index 952ec54d1f..7d439cd357 100644
--- a/website/source/docs/agent/options.html.markdown
+++ b/website/source/docs/agent/options.html.markdown
@@ -576,6 +576,25 @@ Consul will not enable TLS for the HTTP API unless the `https` port has been ass
* `node_name` Equivalent to the
[`-node` command-line flag](#_node).
+* `performance` Available in Consul 0.7 and
+ later, this is a nested object that allows tuning the performance of different subsystems in
+ Consul. See the [Server Performance](/docs/guides/performance.html) guide for more details. The
+ following parameters are available:
+ * `raft_multiplier` - An integer
+ multiplier used by Consul servers to scale key Raft timing parameters. Omitting this value
+ or setting it to 0 uses default timing described below. Lower values are used to tighten
+ timing and increase sensitivity while higher values relax timings and reduce sensitivity.
+ Tuning this affects the time it takes Consul to detect leader failures and to perform
+ leader elections, at the expense of requiring more network and CPU resources for better
+ performance.
By default, Consul will use a lower-performance timing that's suitable
+ for [minimal Consul servers](/docs/guides/performance.html#minumum), currently equivalent
+ to setting this to a value of 5 (this default may be changed in future versions of Consul,
+ depending if the target minimum server profile changes). Setting this to a value of 1 will
+ configure Raft to its highest-performance mode, equivalent to the default timing of Consul
+ prior to 0.7, and is recommended for [production Consul servers](/docs/guides/performance.html#production).
+ See the note on [last contact](/docs/guides/performance.html#last-contact) timing for more
+ details on tuning this parameter. The maximum allowed value is 10.
+
* `ports` This is a nested object that allows setting
the bind ports for the following keys:
* `dns` - The DNS server, -1 to disable. Default 8600.
diff --git a/website/source/docs/agent/telemetry.html.markdown b/website/source/docs/agent/telemetry.html.markdown
index 1072609574..c662ae0d00 100644
--- a/website/source/docs/agent/telemetry.html.markdown
+++ b/website/source/docs/agent/telemetry.html.markdown
@@ -129,8 +129,8 @@ These metrics are used to monitor the health of the Consul servers.