From a3ae480acc65fa12e9d4d889250862b2b7780a00 Mon Sep 17 00:00:00 2001 From: Justin Santa Barbara Date: Wed, 4 Mar 2015 17:58:36 -0500 Subject: [PATCH] Ignore EC2 instances that are stopped Otherwise we pick up previous cluster instances (in EC2, stopped instances hang around for a while - maybe 30 minutes?) --- .../salt/kube-controller-manager/default | 1 - pkg/cloudprovider/aws/aws.go | 57 +++++++++++++++---- pkg/cloudprovider/aws/aws_test.go | 6 ++ 3 files changed, 51 insertions(+), 13 deletions(-) diff --git a/cluster/saltbase/salt/kube-controller-manager/default b/cluster/saltbase/salt/kube-controller-manager/default index 7dcdddba76..22d9856431 100644 --- a/cluster/saltbase/salt/kube-controller-manager/default +++ b/cluster/saltbase/salt/kube-controller-manager/default @@ -26,7 +26,6 @@ {% if grains.cloud == 'aws' -%} {% set cloud_provider = "--cloud_provider=aws" -%} {% set cloud_config = "--cloud_config=/etc/aws.conf" -%} - {% set minion_regexp = "" -%} {% set machines = "--machines=" + ','.join(salt['mine.get']('roles:kubernetes-pool', 'network.ip_addrs', expr_form='grain').keys()) -%} {% endif -%} {% if grains.cloud == 'azure' -%} diff --git a/pkg/cloudprovider/aws/aws.go b/pkg/cloudprovider/aws/aws.go index 93363c17cd..13d4024c51 100644 --- a/pkg/cloudprovider/aws/aws.go +++ b/pkg/cloudprovider/aws/aws.go @@ -187,20 +187,46 @@ func (aws *AWSCloud) getInstancesByDnsName(name string) (*ec2.Instance, error) { if err != nil { return nil, err } - if len(resp.Reservations) == 0 { - return nil, fmt.Errorf("no reservations found for host: %s", name) - } - if len(resp.Reservations) > 1 { - return nil, fmt.Errorf("multiple reservations found for host: %s", name) - } - if len(resp.Reservations[0].Instances) == 0 { - return nil, fmt.Errorf("no instances found for host: %s", name) - } - if len(resp.Reservations[0].Instances) > 1 { - return nil, fmt.Errorf("multiple instances found for host: %s", name) + + instances := []*ec2.Instance{} + for _, reservation := range resp.Reservations { + for _, instance := range reservation.Instances { + // TODO: Push running logic down into filter? + if !isAlive(&instance) { + continue + } + + if instance.PrivateDNSName != name { + // TODO: Should we warn here? - the filter should have caught this + // (this will happen in the tests if they don't fully mock the EC2 API) + continue + } + + instances = append(instances, &instance) + } } - return &resp.Reservations[0].Instances[0], nil + if len(instances) == 0 { + return nil, fmt.Errorf("no instances found for host: %s", name) + } + if len(instances) > 1 { + return nil, fmt.Errorf("multiple instances found for host: %s", name) + } + return instances[0], nil +} + +// Check if the instance is alive (running or pending) +// We typically ignore instances that are not alive +func isAlive(instance *ec2.Instance) bool { + switch instance.State.Name { + case "shutting-down", "terminated", "stopping", "stopped": + return false + case "pending", "running": + return true + default: + glog.Errorf("unknown EC2 instance state: %s", instance.State) + return false + } } // Return a list of instances matching regex string. @@ -226,6 +252,12 @@ func (aws *AWSCloud) getInstancesByRegex(regex string) ([]string, error) { instances := []string{} for _, reservation := range resp.Reservations { for _, instance := range reservation.Instances { + // TODO: Push filtering down into EC2 API filter? + if !isAlive(&instance) { + glog.V(2).Infof("skipping EC2 instance (not alive): %s", instance.InstanceId) + continue + } + for _, tag := range instance.Tags { if tag.Key == "Name" && re.MatchString(tag.Value) { instances = append(instances, instance.PrivateDNSName) @@ -234,6 +266,7 @@ func (aws *AWSCloud) getInstancesByRegex(regex string) ([]string, error) { } } } + glog.V(2).Infof("Matched EC2 instances: %s", instances) return instances, nil } diff --git a/pkg/cloudprovider/aws/aws_test.go b/pkg/cloudprovider/aws/aws_test.go index d08f99ab71..4e4eb5ff3b 100644 --- a/pkg/cloudprovider/aws/aws_test.go +++ b/pkg/cloudprovider/aws/aws_test.go @@ -104,12 +104,16 @@ func TestList(t *testing.T) { instances := make([]ec2.Instance, 4) instances[0].Tags = []ec2.Tag{{"Name", "foo"}} instances[0].PrivateDNSName = "instance1" + instances[0].State.Name = "running" instances[1].Tags = []ec2.Tag{{"Name", "bar"}} instances[1].PrivateDNSName = "instance2" + instances[1].State.Name = "running" instances[2].Tags = []ec2.Tag{{"Name", "baz"}} instances[2].PrivateDNSName = "instance3" + instances[2].State.Name = "running" instances[3].Tags = []ec2.Tag{{"Name", "quux"}} instances[3].PrivateDNSName = "instance4" + instances[3].State.Name = "running" aws := mockInstancesResp(instances) @@ -139,8 +143,10 @@ func TestIPAddress(t *testing.T) { instances := make([]ec2.Instance, 2) instances[0].PrivateDNSName = "instance1" instances[0].PrivateIpAddress = "192.168.0.1" + instances[0].State.Name = "running" instances[1].PrivateDNSName = "instance1" instances[1].PrivateIpAddress = "192.168.0.2" + instances[1].State.Name = "running" aws1 := mockInstancesResp([]ec2.Instance{}) _, err1 := aws1.IPAddress("instance")