Move EC2 discovery logic into retryJoin for robustness

pull/2459/head
Kyle Havlovitz 2016-11-02 14:35:37 -04:00
parent 468bf736b4
commit d4d6e2b482
No known key found for this signature in database
GPG Key ID: 8A5E6B173056AD6C
5 changed files with 169 additions and 116 deletions

View File

@ -4,6 +4,7 @@ import (
"flag"
"fmt"
"io"
"log"
"net"
"os"
"os/signal"
@ -105,10 +106,6 @@ func (c *Command) readConfig() *Config {
cmdFlags.BoolVar(&cmdConfig.AtlasJoin, "atlas-join", false, "auto-join with Atlas")
cmdFlags.StringVar(&cmdConfig.AtlasEndpoint, "atlas-endpoint", "", "endpoint for Atlas integration")
cmdFlags.StringVar(&cmdConfig.EC2Discovery.Region, "ec2-region", "", "Region to search for instances in")
cmdFlags.StringVar(&cmdConfig.EC2Discovery.TagKey, "ec2-tag-key", "", "EC2 tag key to filter for server discovery")
cmdFlags.StringVar(&cmdConfig.EC2Discovery.TagValue, "ec2-tag-value", "", "EC2 tag value to filter for server discovery")
cmdFlags.IntVar(&cmdConfig.Protocol, "protocol", -1, "protocol version")
cmdFlags.BoolVar(&cmdConfig.EnableSyslog, "syslog", false,
@ -125,6 +122,12 @@ func (c *Command) readConfig() *Config {
"number of retries for joining")
cmdFlags.StringVar(&retryInterval, "retry-interval", "",
"interval between join attempts")
cmdFlags.StringVar(&cmdConfig.RetryJoinEC2.Region, "retry-join-ec2-region", "",
"EC2 Region to discover servers in")
cmdFlags.StringVar(&cmdConfig.RetryJoinEC2.TagKey, "retry-join-ec2-tag-key", "",
"EC2 tag key to filter on for server discovery")
cmdFlags.StringVar(&cmdConfig.RetryJoinEC2.TagValue, "retry-join-ec2-tag-value", "",
"EC2 tag value to filter on for server discovery")
cmdFlags.Var((*AppendSliceValue)(&cmdConfig.RetryJoinWan), "retry-join-wan",
"address of agent to join -wan on startup with retry")
cmdFlags.IntVar(&cmdConfig.RetryMaxAttemptsWan, "retry-max-wan", 0,
@ -320,25 +323,12 @@ func (c *Command) readConfig() *Config {
c.Ui.Error("WARNING: Bootstrap mode enabled! Do not enable unless necessary")
}
// Populate the join list using EC2 discovery if configured
if config.EC2Discovery.TagKey != "" || config.EC2Discovery.TagValue != "" {
if config.EC2Discovery.TagKey == "" || config.EC2Discovery.TagValue == "" {
c.Ui.Error("EC2 tag key and EC2 tag value are both required")
// Need both tag key and value for EC2 discovery
if config.RetryJoinEC2.TagKey != "" || config.RetryJoinEC2.TagValue != "" {
if config.RetryJoinEC2.TagKey == "" || config.RetryJoinEC2.TagValue == "" {
c.Ui.Error("tag key and value are both required for EC2 retry-join")
return nil
}
if config.EC2Discovery.Region == "" {
c.Ui.Error("Amazon EC2 region is required")
return nil
}
ec2servers, err := config.discoverEc2Hosts()
if err != nil {
c.Ui.Error(fmt.Sprintf("Unable to query EC2 insances: %s", err))
return nil
}
c.Ui.Info(fmt.Sprintf("Discovered %d servers from EC2...", len(ec2servers)))
config.StartJoin = append(config.StartJoin, ec2servers...)
}
// Set the version info
@ -393,12 +383,23 @@ func (config *Config) verifyUniqueListeners() error {
return nil
}
// discoverEc2Hosts searches the given AWS region, returning a list of instance
// addresses where EC2TagKey = EC2TagValue
func (c *Config) discoverEc2Hosts() ([]string, error) {
config := c.EC2Discovery
// discoverEc2Hosts searches an AWS region, returning a list of instance ips
// where EC2TagKey = EC2TagValue
func (c *Config) discoverEc2Hosts(logger *log.Logger) ([]string, error) {
config := c.RetryJoinEC2
ec2meta := ec2metadata.New(session.New())
if config.Region == "" {
logger.Printf("[INFO] agent: No EC2 region provided, querying instance metadata endpoint...")
identity, err := ec2meta.GetInstanceIdentityDocument()
if err != nil {
return nil, err
}
config.Region = identity.Region
}
awsConfig := &aws.Config{
Region: aws.String(config.Region),
Region: &config.Region,
Credentials: credentials.NewChainCredentials(
[]credentials.Provider{
&credentials.StaticProvider{
@ -410,7 +411,7 @@ func (c *Config) discoverEc2Hosts() ([]string, error) {
&credentials.EnvProvider{},
&credentials.SharedCredentialsProvider{},
&ec2rolecreds.EC2RoleProvider{
Client: ec2metadata.New(session.New()),
Client: ec2meta,
},
}),
}
@ -664,7 +665,9 @@ func (c *Command) startupJoinWan(config *Config) error {
// retryJoin is used to handle retrying a join until it succeeds or all
// retries are exhausted.
func (c *Command) retryJoin(config *Config, errCh chan<- struct{}) {
if len(config.RetryJoin) == 0 {
ec2Enabled := config.RetryJoinEC2.TagKey != "" && config.RetryJoinEC2.TagValue != ""
if len(config.RetryJoin) == 0 && !ec2Enabled {
return
}
@ -673,10 +676,25 @@ func (c *Command) retryJoin(config *Config, errCh chan<- struct{}) {
attempt := 0
for {
n, err := c.agent.JoinLAN(config.RetryJoin)
if err == nil {
logger.Printf("[INFO] agent: Join completed. Synced with %d initial agents", n)
return
var servers []string
var err error
if ec2Enabled {
servers, err = config.discoverEc2Hosts(logger)
if err != nil {
logger.Printf("[ERROR] agent: Unable to query EC2 insances: %s", err)
}
logger.Printf("[INFO] agent: Discovered %d servers from EC2...", len(servers))
}
servers = append(config.RetryJoin, servers...)
if len(servers) == 0 {
err = fmt.Errorf("No servers to join")
} else {
n, err := c.agent.JoinLAN(servers)
if err == nil {
logger.Printf("[INFO] agent: Join completed. Synced with %d initial agents", n)
return
}
}
attempt++
@ -1162,57 +1180,57 @@ Usage: consul agent [options]
Options:
-advertise=addr Sets the advertise address to use
-advertise-wan=addr Sets address to advertise on wan instead of advertise addr
-atlas=org/name Sets the Atlas infrastructure name, enables SCADA.
-atlas-join Enables auto-joining the Atlas cluster
-atlas-token=token Provides the Atlas API token
-atlas-endpoint=1.2.3.4 The address of the endpoint for Atlas integration.
-ec2-region The AWS region to search for instances in
-ec2-tag-key=tag The EC2 instance tag to filter on
-ec2-tag-value=value The filter value for ec2-tag-key
-bootstrap Sets server to bootstrap mode
-bind=0.0.0.0 Sets the bind address for cluster communication
-http-port=8500 Sets the HTTP API port to listen on
-bootstrap-expect=0 Sets server to expect bootstrap mode.
-client=127.0.0.1 Sets the address to bind for client access.
This includes RPC, DNS, HTTP and HTTPS (if configured)
-config-file=foo Path to a JSON file to read configuration from.
This can be specified multiple times.
-config-dir=foo Path to a directory to read configuration files
from. This will read every file ending in ".json"
as configuration in this directory in alphabetical
order. This can be specified multiple times.
-data-dir=path Path to a data directory to store agent state
-dev Starts the agent in development mode.
-recursor=1.2.3.4 Address of an upstream DNS server.
Can be specified multiple times.
-dc=east-aws Datacenter of the agent (deprecated: use 'datacenter' instead).
-datacenter=east-aws Datacenter of the agent.
-encrypt=key Provides the gossip encryption key
-join=1.2.3.4 Address of an agent to join at start time.
Can be specified multiple times.
-join-wan=1.2.3.4 Address of an agent to join -wan at start time.
Can be specified multiple times.
-retry-join=1.2.3.4 Address of an agent to join at start time with
retries enabled. Can be specified multiple times.
-retry-interval=30s Time to wait between join attempts.
-retry-max=0 Maximum number of join attempts. Defaults to 0, which
will retry indefinitely.
-retry-join-wan=1.2.3.4 Address of an agent to join -wan at start time with
retries enabled. Can be specified multiple times.
-retry-interval-wan=30s Time to wait between join -wan attempts.
-retry-max-wan=0 Maximum number of join -wan attempts. Defaults to 0, which
will retry indefinitely.
-log-level=info Log level of the agent.
-node=hostname Name of this node. Must be unique in the cluster
-protocol=N Sets the protocol version. Defaults to latest.
-rejoin Ignores a previous leave and attempts to rejoin the cluster.
-server Switches agent to server mode.
-syslog Enables logging to syslog
-ui Enables the built-in static web UI server
-ui-dir=path Path to directory containing the Web UI resources
-pid-file=path Path to file to store agent PID
-advertise=addr Sets the advertise address to use
-advertise-wan=addr Sets address to advertise on wan instead of advertise addr
-atlas=org/name Sets the Atlas infrastructure name, enables SCADA.
-atlas-join Enables auto-joining the Atlas cluster
-atlas-token=token Provides the Atlas API token
-atlas-endpoint=1.2.3.4 The address of the endpoint for Atlas integration.
-bootstrap Sets server to bootstrap mode
-bind=0.0.0.0 Sets the bind address for cluster communication
-http-port=8500 Sets the HTTP API port to listen on
-bootstrap-expect=0 Sets server to expect bootstrap mode.
-client=127.0.0.1 Sets the address to bind for client access.
This includes RPC, DNS, HTTP and HTTPS (if configured)
-config-file=foo Path to a JSON file to read configuration from.
This can be specified multiple times.
-config-dir=foo Path to a directory to read configuration files
from. This will read every file ending in ".json"
as configuration in this directory in alphabetical
order. This can be specified multiple times.
-data-dir=path Path to a data directory to store agent state
-dev Starts the agent in development mode.
-recursor=1.2.3.4 Address of an upstream DNS server.
Can be specified multiple times.
-dc=east-aws Datacenter of the agent (deprecated: use 'datacenter' instead).
-datacenter=east-aws Datacenter of the agent.
-encrypt=key Provides the gossip encryption key
-join=1.2.3.4 Address of an agent to join at start time.
Can be specified multiple times.
-join-wan=1.2.3.4 Address of an agent to join -wan at start time.
Can be specified multiple times.
-retry-join=1.2.3.4 Address of an agent to join at start time with
retries enabled. Can be specified multiple times.
-retry-interval=30s Time to wait between join attempts.
-retry-max=0 Maximum number of join attempts. Defaults to 0, which
will retry indefinitely.
-retry-join-ec2-region EC2 Region to use for discovering servers to join.
-retry-join-ec2-tag-key EC2 tag key to filter on for server discovery
-retry-join-ec2-tag-value EC2 tag value to filter on for server discovery
-retry-join-wan=1.2.3.4 Address of an agent to join -wan at start time with
retries enabled. Can be specified multiple times.
-retry-interval-wan=30s Time to wait between join -wan attempts.
-retry-max-wan=0 Maximum number of join -wan attempts. Defaults to 0, which
will retry indefinitely.
-log-level=info Log level of the agent.
-node=hostname Name of this node. Must be unique in the cluster
-protocol=N Sets the protocol version. Defaults to latest.
-rejoin Ignores a previous leave and attempts to rejoin the cluster.
-server Switches agent to server mode.
-syslog Enables logging to syslog
-ui Enables the built-in static web UI server
-ui-dir=path Path to directory containing the Web UI resources
-pid-file=path Path to file to store agent PID
`
return strings.TrimSpace(helpText)

View File

@ -280,14 +280,14 @@ func TestDiscoverEC2Hosts(t *testing.T) {
}
c := &Config{
EC2Discovery: EC2Discovery{
RetryJoinEC2: RetryJoinEC2{
Region: os.Getenv("AWS_REGION"),
TagKey: "ConsulRole",
TagValue: "Server",
},
}
servers, err := c.discoverEc2Hosts()
servers, err := c.discoverEc2Hosts(&log.Logger{})
if err != nil {
t.Fatal(err)
}

View File

@ -118,8 +118,8 @@ type DNSConfig struct {
RecursorTimeoutRaw string `mapstructure:"recursor_timeout" json:"-"`
}
// EC2Discovery is used to configure discovery of instances via Amazon's EC2 api
type EC2Discovery struct {
// RetryJoinEC2 is used to configure discovery of instances via Amazon's EC2 api
type RetryJoinEC2 struct {
// The AWS region to look for instances in
Region string `mapstructure:"region"`
@ -399,6 +399,9 @@ type Config struct {
RetryInterval time.Duration `mapstructure:"-" json:"-"`
RetryIntervalRaw string `mapstructure:"retry_interval"`
// RetryJoinEC2 configuration
RetryJoinEC2 RetryJoinEC2 `mapstructure:"retry_join_ec2"`
// RetryJoinWan is a list of addresses to join -wan with retry enabled.
RetryJoinWan []string `mapstructure:"retry_join_wan"`
@ -544,9 +547,6 @@ type Config struct {
// empty, the defaults from the provider are used.
AtlasEndpoint string `mapstructure:"atlas_endpoint"`
// EC2Discovery configuration
EC2Discovery EC2Discovery `mapstructure:"ec2_discovery"`
// AEInterval controls the anti-entropy interval. This is how often
// the agent attempts to reconcile its local state with the server's
// representation of our state. Defaults to every 60s.
@ -1339,6 +1339,21 @@ func MergeConfig(a, b *Config) *Config {
if b.RetryInterval != 0 {
result.RetryInterval = b.RetryInterval
}
if b.RetryJoinEC2.AccessKeyID != "" {
result.RetryJoinEC2.AccessKeyID = b.RetryJoinEC2.AccessKeyID
}
if b.RetryJoinEC2.SecretAccessKey != "" {
result.RetryJoinEC2.SecretAccessKey = b.RetryJoinEC2.SecretAccessKey
}
if b.RetryJoinEC2.Region != "" {
result.RetryJoinEC2.Region = b.RetryJoinEC2.Region
}
if b.RetryJoinEC2.TagKey != "" {
result.RetryJoinEC2.TagKey = b.RetryJoinEC2.TagKey
}
if b.RetryJoinEC2.TagValue != "" {
result.RetryJoinEC2.TagValue = b.RetryJoinEC2.TagValue
}
if b.RetryMaxAttemptsWan != 0 {
result.RetryMaxAttemptsWan = b.RetryMaxAttemptsWan
}
@ -1452,21 +1467,6 @@ func MergeConfig(a, b *Config) *Config {
if b.AtlasEndpoint != "" {
result.AtlasEndpoint = b.AtlasEndpoint
}
if b.EC2Discovery.AccessKeyID != "" {
result.EC2Discovery.AccessKeyID = b.EC2Discovery.AccessKeyID
}
if b.EC2Discovery.SecretAccessKey != "" {
result.EC2Discovery.SecretAccessKey = b.EC2Discovery.SecretAccessKey
}
if b.EC2Discovery.Region != "" {
result.EC2Discovery.Region = b.EC2Discovery.Region
}
if b.EC2Discovery.TagKey != "" {
result.EC2Discovery.TagKey = b.EC2Discovery.TagKey
}
if b.EC2Discovery.TagValue != "" {
result.EC2Discovery.TagValue = b.EC2Discovery.TagValue
}
if b.DisableCoordinates {
result.DisableCoordinates = true
}

View File

@ -939,8 +939,8 @@ func TestDecodeConfig_invalidKeys(t *testing.T) {
}
}
func TestDecodeConfig_EC2Discovery(t *testing.T) {
input := `{"ec2_discovery": {
func TestRetryJoinEC2(t *testing.T) {
input := `{"retry_join_ec2": {
"region": "us-east-1",
"tag_key": "ConsulRole",
"tag_value": "Server",
@ -952,19 +952,19 @@ func TestDecodeConfig_EC2Discovery(t *testing.T) {
t.Fatalf("err: %s", err)
}
if config.EC2Discovery.Region != "us-east-1" {
if config.RetryJoinEC2.Region != "us-east-1" {
t.Fatalf("bad: %#v", config)
}
if config.EC2Discovery.TagKey != "ConsulRole" {
if config.RetryJoinEC2.TagKey != "ConsulRole" {
t.Fatalf("bad: %#v", config)
}
if config.EC2Discovery.TagValue != "Server" {
if config.RetryJoinEC2.TagValue != "Server" {
t.Fatalf("bad: %#v", config)
}
if config.EC2Discovery.AccessKeyID != "asdf" {
if config.RetryJoinEC2.AccessKeyID != "asdf" {
t.Fatalf("bad: %#v", config)
}
if config.EC2Discovery.SecretAccessKey != "qwerty" {
if config.RetryJoinEC2.SecretAccessKey != "qwerty" {
t.Fatalf("bad: %#v", config)
}
}
@ -1400,7 +1400,7 @@ func TestMergeConfig(t *testing.T) {
CheckUpdateIntervalRaw: "8m",
RetryIntervalRaw: "10s",
RetryIntervalWanRaw: "10s",
EC2Discovery: EC2Discovery{
RetryJoinEC2: RetryJoinEC2{
Region: "us-east-1",
TagKey: "Key1",
TagValue: "Value1",
@ -1529,7 +1529,7 @@ func TestMergeConfig(t *testing.T) {
AtlasToken: "123456789",
AtlasACLToken: "abcdefgh",
AtlasJoin: true,
EC2Discovery: EC2Discovery{
RetryJoinEC2: RetryJoinEC2{
Region: "us-east-2",
TagKey: "Key2",
TagValue: "Value2",

View File

@ -92,11 +92,11 @@ The options below are all specified on the command-line.
* <a name="_bind"></a><a href="#_bind">`-bind`</a> - The address that should be bound to
for internal cluster communications.
This is an IP address that should be reachable by all other nodes in the cluster.
By default, this is "0.0.0.0", meaning Consul will bind to all addresses on
By default, this is "0.0.0.0", meaning Consul will bind to all addresses on
the local machine and will [advertise](/docs/agent/options.html#_advertise)
the first available private IPv4 address to the rest of the cluster. If there
are multiple private IPv4 addresses available, Consul will exit with an error
at startup. If you specify "[::]", Consul will
at startup. If you specify "[::]", Consul will
[advertise](/docs/agent/options.html#_advertise) the first available public
IPv6 address. If there are multiple public IPv6 addresses available, Consul
will exit with an error at startup.
@ -193,6 +193,24 @@ will exit with an error at startup.
with return code 1. By default, this is set to 0 which is interpreted as infinite
retries.
* <a name="_retry_join_ec2_tag_key"></a><a href="#_retry_join_ec2_tag_key">`-retry-join-ec2-tag-key`
</a> - The Amazon EC2 instance tag key to filter on. When used with
[`-retry-join-ec2-tag-value`](#_retry_join_ec2_tag_value), Consul will attempt to join EC2
instances with the given tag key and value on startup.
</br></br>For AWS authentication the following methods are supported, in order:
- Static credentials (from the config file)
- Environment variables (`AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`)
- Shared credentials file (`~/.aws/credentials` or the path specified by `AWS_SHARED_CREDENTIALS_FILE`)
- EC2 Role instance metadata.
* <a name="_retry_join_ec2_tag_value"></a><a href="#_retry_join_ec2_tag_value">`-retry-join-ec2-tag-value`
</a> - The Amazon EC2 instance tag value to filter on.
* <a name="_retry_join_ec2_region"></a><a href="#_retry_join_ec2_region">`-retry-join-ec2-region`
</a> - (Optional) The Amazon EC2 region to use. If not specified, Consul
will use the local instance's [EC2 metadata endpoint](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-identity-documents.html)
to discover the region.
* <a name="_join_wan"></a><a href="#_join_wan">`-join-wan`</a> - Address of another
wan agent to join upon starting up. This can be
specified multiple times to specify multiple WAN agents to join. If Consul is
@ -440,6 +458,9 @@ Consul will not enable TLS for the HTTP API unless the `https` port has been ass
* <a name="atlas_endpoint"></a><a href="#atlas_endpoint">`atlas_endpoint`</a> Equivalent to the
[`-atlas-endpoint` command-line flag](#_atlas_endpoint).
* <a name="atlas_endpoint"></a><a href="#atlas_endpoint">`atlas_endpoint`</a> Equivalent to the
[`-atlas-endpoint` command-line flag](#_atlas_endpoint).
* <a name="bootstrap"></a><a href="#bootstrap">`bootstrap`</a> Equivalent to the
[`-bootstrap` command-line flag](#_bootstrap).
@ -673,6 +694,20 @@ Consul will not enable TLS for the HTTP API unless the `https` port has been ass
of addresses to attempt joining to WAN every [`retry_interval_wan`](#_retry_interval_wan) until at least one
[`-join-wan`](#_join_wan) works.
* <a name="retry_join_ec2"></a><a href="#retry_join_ec2">`retry_join_ec2`</a> - This is a nested object
that allows the setting of EC2-related [`-retry-join`](#_retry_join) options.
<br><br>
The following keys are valid:
* `region` - The AWS region. Equivalent to the
[`-retry-join-ec2-region` command-line flag](#_retry_join_ec2_region).
* `tag_key` - The EC2 instance tag key to filter on. Equivalent to the</br>
[`-retry-join-ec2-tag-key` command-line flag](#_retry_join_ec2_tag_key).
* `tag_value` - The EC2 instance tag value to filter on. Equivalent to the</br>
[`-retry-join-ec2-tag-value` command-line flag](#_retry_join_ec2_tag_value).
* `access_key_id` - The AWS access key ID to use for authentication.
* `secret_access_key` - The AWS secret access key to use for authentication.
* <a name="retry_interval_wan"></a><a href="#retry_interval_wan">`retry_interval_wan`</a> Equivalent to the
[`-retry-interval-wan` command-line flag](#_retry_interval_wan).