AWS: Configure LoadBalancer health checks

ELB will automatically create a health check, but if we update the listeners the old health check port sticks around, and all the instances are marked offline. Update the health-checks to match the listeners: we just check the first valid service port, with some hard-coded options for timeouts / retries etc.
2015-07-31 00:24:46 -04:00 · 2015-07-31 00:24:46 -04:00 · 08e904ad96
parent fde0a8884f
commit 08e904ad96
3 changed files with 71 additions and 0 deletions
--- a/pkg/cloudprovider/aws/aws_loadbalancer.go
+++ b/pkg/cloudprovider/aws/aws_loadbalancer.go
@ -18,6 +18,8 @@ package aws_cloud

 import (
 	"fmt"
+	"strconv"
+
 	"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
 	"github.com/aws/aws-sdk-go/aws"
 	"github.com/aws/aws-sdk-go/service/ec2"
@ -193,6 +195,64 @@ func (s *AWSCloud) ensureLoadBalancer(region, name string, listeners []*elb.List
 	return loadBalancer, nil
 }

+// Makes sure that the health check for an ELB matches the configured listeners
+func (s *AWSCloud) ensureLoadBalancerHealthCheck(region string, loadBalancer *elb.LoadBalancerDescription, listeners []*elb.Listener) error {
+	elbClient, err := s.getELBClient(region)
+	if err != nil {
+		return err
+	}
+
+	actual := loadBalancer.HealthCheck
+
+	// Default AWS settings
+	expectedHealthyThreshold := int64(10)
+	expectedUnhealthyThreshold := int64(2)
+	expectedTimeout := int64(5)
+	expectedInterval := int64(30)
+
+	// We only a TCP health-check on the first port
+	expectedTarget := ""
+	for _, listener := range listeners {
+		if listener.InstancePort == nil {
+			continue
+		}
+		expectedTarget = "TCP:" + strconv.FormatInt(*listener.InstancePort, 10)
+		break
+	}
+
+	if expectedTarget == "" {
+		return fmt.Errorf("unable to determine health check port (no valid listeners)")
+	}
+
+	if expectedTarget == orEmpty(actual.Target) &&
+		expectedHealthyThreshold == orZero(actual.HealthyThreshold) &&
+		expectedUnhealthyThreshold == orZero(actual.UnhealthyThreshold) &&
+		expectedTimeout == orZero(actual.Timeout) &&
+		expectedInterval == orZero(actual.Interval) {
+		return nil
+	}
+
+	glog.V(2).Info("Updating load-balancer health-check")
+
+	healthCheck := &elb.HealthCheck{}
+	healthCheck.HealthyThreshold = &expectedHealthyThreshold
+	healthCheck.UnhealthyThreshold = &expectedUnhealthyThreshold
+	healthCheck.Timeout = &expectedTimeout
+	healthCheck.Interval = &expectedInterval
+	healthCheck.Target = &expectedTarget
+
+	request := &elb.ConfigureHealthCheckInput{}
+	request.HealthCheck = healthCheck
+	request.LoadBalancerName = loadBalancer.LoadBalancerName
+
+	_, err = elbClient.ConfigureHealthCheck(request)
+	if err != nil {
+		return fmt.Errorf("error configuring load-balancer health-check: %v", err)
+	}
+
+	return nil
+}
+
 // Makes sure that exactly the specified hosts are registered as instances with the load balancer
 func (s *AWSCloud) ensureLoadBalancerInstances(elbClient ELB, loadBalancerName string, lbInstances []*elb.Instance, instances []*ec2.Instance) error {
 	expected := util.NewStringSet()
--- a/pkg/cloudprovider/providers/aws/aws.go
+++ b/pkg/cloudprovider/providers/aws/aws.go
@ -117,6 +117,8 @@ type ELB interface {
 	DeleteLoadBalancerListeners(*elb.DeleteLoadBalancerListenersInput) (*elb.DeleteLoadBalancerListenersOutput, error)

 	ApplySecurityGroupsToLoadBalancer(*elb.ApplySecurityGroupsToLoadBalancerInput) (*elb.ApplySecurityGroupsToLoadBalancerOutput, error)
+
+	ConfigureHealthCheck(*elb.ConfigureHealthCheckInput) (*elb.ConfigureHealthCheckOutput, error)
 }

 // This is a simple pass-through of the Autoscaling client interface, which allows for testing
@ -1706,6 +1708,11 @@ func (s *AWSCloud) EnsureTCPLoadBalancer(name, region string, publicIP net.IP, p
 		return nil, err
 	}

+	err = s.ensureLoadBalancerHealthCheck(region, loadBalancer, listeners)
+	if err != nil {
+		return nil, err
+	}
+
 	err = s.updateInstanceSecurityGroupsForLoadBalancer(loadBalancer, instances)
 	if err != nil {
 		glog.Warning("Error opening ingress rules for the load balancer to the instances: ", err)
--- a/pkg/cloudprovider/providers/aws/aws_test.go
+++ b/pkg/cloudprovider/providers/aws/aws_test.go
@ -430,6 +430,10 @@ func (ec2 *FakeELB) ApplySecurityGroupsToLoadBalancer(*elb.ApplySecurityGroupsTo
 	panic("Not implemented")
 }

+func (elb *FakeELB) ConfigureHealthCheck(*elb.ConfigureHealthCheckInput) (*elb.ConfigureHealthCheckOutput, error) {
+	panic("Not implemented")
+}
+
 type FakeASG struct {
 	aws *FakeAWSServices
 }