mirror of https://github.com/k3s-io/k3s
Fix issue caused by sole server marked as failed under load
If health checks are failing for all servers, make a second pass through the server list with health-checks ignored before returning failure
Signed-off-by: Brad Davidson <brad.davidson@rancher.com>
(cherry picked from commit ca39614d4e
)
Signed-off-by: Brad Davidson <brad.davidson@rancher.com>
pull/10290/head
parent
da89ab5052
commit
19692f5d17
|
@ -158,6 +158,7 @@ func (lb *LoadBalancer) dialContext(ctx context.Context, network, _ string) (net
|
||||||
lb.mutex.RLock()
|
lb.mutex.RLock()
|
||||||
defer lb.mutex.RUnlock()
|
defer lb.mutex.RUnlock()
|
||||||
|
|
||||||
|
var allChecksFailed bool
|
||||||
startIndex := lb.nextServerIndex
|
startIndex := lb.nextServerIndex
|
||||||
for {
|
for {
|
||||||
targetServer := lb.currentServerAddress
|
targetServer := lb.currentServerAddress
|
||||||
|
@ -165,7 +166,7 @@ func (lb *LoadBalancer) dialContext(ctx context.Context, network, _ string) (net
|
||||||
server := lb.servers[targetServer]
|
server := lb.servers[targetServer]
|
||||||
if server == nil || targetServer == "" {
|
if server == nil || targetServer == "" {
|
||||||
logrus.Debugf("Nil server for load balancer %s: %s", lb.serviceName, targetServer)
|
logrus.Debugf("Nil server for load balancer %s: %s", lb.serviceName, targetServer)
|
||||||
} else if server.healthCheck() {
|
} else if allChecksFailed || server.healthCheck() {
|
||||||
conn, err := server.dialContext(ctx, network, targetServer)
|
conn, err := server.dialContext(ctx, network, targetServer)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
return conn, nil
|
return conn, nil
|
||||||
|
@ -189,7 +190,11 @@ func (lb *LoadBalancer) dialContext(ctx context.Context, network, _ string) (net
|
||||||
startIndex = maxIndex
|
startIndex = maxIndex
|
||||||
}
|
}
|
||||||
if lb.nextServerIndex == startIndex {
|
if lb.nextServerIndex == startIndex {
|
||||||
return nil, errors.New("all servers failed")
|
if allChecksFailed {
|
||||||
|
return nil, errors.New("all servers failed")
|
||||||
|
}
|
||||||
|
logrus.Debugf("Health checks for all servers in load balancer %s have failed: retrying with health checks ignored", lb.serviceName)
|
||||||
|
allChecksFailed = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -227,13 +227,19 @@ func (lb *LoadBalancer) SetHealthCheck(address string, healthCheck func() bool)
|
||||||
// runHealthChecks periodically health-checks all servers. Any servers that fail the health-check will have their
|
// runHealthChecks periodically health-checks all servers. Any servers that fail the health-check will have their
|
||||||
// connections closed, to force clients to switch over to a healthy server.
|
// connections closed, to force clients to switch over to a healthy server.
|
||||||
func (lb *LoadBalancer) runHealthChecks(ctx context.Context) {
|
func (lb *LoadBalancer) runHealthChecks(ctx context.Context) {
|
||||||
|
previousStatus := map[string]bool{}
|
||||||
wait.Until(func() {
|
wait.Until(func() {
|
||||||
lb.mutex.RLock()
|
lb.mutex.RLock()
|
||||||
defer lb.mutex.RUnlock()
|
defer lb.mutex.RUnlock()
|
||||||
for _, server := range lb.servers {
|
for address, server := range lb.servers {
|
||||||
if !server.healthCheck() {
|
status := server.healthCheck()
|
||||||
|
if status == false && previousStatus[address] == true {
|
||||||
|
// Only close connections when the server transitions from healthy to unhealthy;
|
||||||
|
// we don't want to re-close all the connections every time as we might be ignoring
|
||||||
|
// health checks due to all servers being marked unhealthy.
|
||||||
defer server.closeAll()
|
defer server.closeAll()
|
||||||
}
|
}
|
||||||
|
previousStatus[address] = status
|
||||||
}
|
}
|
||||||
}, time.Second, ctx.Done())
|
}, time.Second, ctx.Done())
|
||||||
logrus.Debugf("Stopped health checking for load balancer %s", lb.serviceName)
|
logrus.Debugf("Stopped health checking for load balancer %s", lb.serviceName)
|
||||||
|
|
|
@ -130,7 +130,7 @@ func (e etcdproxy) createHealthCheck(ctx context.Context, address string) func()
|
||||||
statusCode = resp.StatusCode
|
statusCode = resp.StatusCode
|
||||||
}
|
}
|
||||||
if err != nil || statusCode != http.StatusOK {
|
if err != nil || statusCode != http.StatusOK {
|
||||||
logrus.Debugf("Health check %s failed: %v (StatusCode: %d)", url, err, statusCode)
|
logrus.Debugf("Health check %s failed: %v (StatusCode: %d)", address, err, statusCode)
|
||||||
connected = false
|
connected = false
|
||||||
} else {
|
} else {
|
||||||
connected = true
|
connected = true
|
||||||
|
|
Loading…
Reference in New Issue