2019-07-24 07:22:31 +00:00
|
|
|
package loadbalancer
|
|
|
|
|
|
|
|
import (
|
2023-03-31 20:51:27 +00:00
|
|
|
"context"
|
2024-01-02 20:05:36 +00:00
|
|
|
"fmt"
|
2019-07-24 07:22:31 +00:00
|
|
|
"math/rand"
|
2023-03-31 20:51:27 +00:00
|
|
|
"net"
|
2024-01-02 20:05:36 +00:00
|
|
|
"net/url"
|
|
|
|
"os"
|
2024-11-14 02:04:37 +00:00
|
|
|
"slices"
|
2024-01-02 20:05:36 +00:00
|
|
|
"strconv"
|
2024-03-19 22:01:36 +00:00
|
|
|
"time"
|
2024-01-02 20:05:36 +00:00
|
|
|
|
|
|
|
"github.com/k3s-io/k3s/pkg/version"
|
|
|
|
http_dialer "github.com/mwitkow/go-http-dialer"
|
2024-01-11 20:26:47 +00:00
|
|
|
"github.com/pkg/errors"
|
|
|
|
"golang.org/x/net/http/httpproxy"
|
2024-01-02 20:05:36 +00:00
|
|
|
"golang.org/x/net/proxy"
|
2023-03-31 20:51:27 +00:00
|
|
|
|
|
|
|
"github.com/sirupsen/logrus"
|
|
|
|
"k8s.io/apimachinery/pkg/util/sets"
|
2024-03-19 22:01:36 +00:00
|
|
|
"k8s.io/apimachinery/pkg/util/wait"
|
2019-07-24 07:22:31 +00:00
|
|
|
)
|
|
|
|
|
2024-11-14 02:04:37 +00:00
|
|
|
var defaultDialer proxy.Dialer = &net.Dialer{
|
|
|
|
Timeout: 10 * time.Second,
|
|
|
|
KeepAlive: 30 * time.Second,
|
|
|
|
}
|
2024-01-02 20:05:36 +00:00
|
|
|
|
2024-01-11 20:26:47 +00:00
|
|
|
// SetHTTPProxy configures a proxy-enabled dialer to be used for all loadbalancer connections,
|
|
|
|
// if the agent has been configured to allow use of a HTTP proxy, and the environment has been configured
|
|
|
|
// to indicate use of a HTTP proxy for the server URL.
|
|
|
|
func SetHTTPProxy(address string) error {
|
2024-01-02 20:05:36 +00:00
|
|
|
// Check if env variable for proxy is set
|
2024-01-11 20:26:47 +00:00
|
|
|
if useProxy, _ := strconv.ParseBool(os.Getenv(version.ProgramUpper + "_AGENT_HTTP_PROXY_ALLOWED")); !useProxy || address == "" {
|
|
|
|
return nil
|
2024-01-02 20:05:36 +00:00
|
|
|
}
|
|
|
|
|
2024-01-11 20:26:47 +00:00
|
|
|
serverURL, err := url.Parse(address)
|
2024-01-02 20:05:36 +00:00
|
|
|
if err != nil {
|
2024-01-11 20:26:47 +00:00
|
|
|
return errors.Wrapf(err, "failed to parse address %s", address)
|
2024-01-02 20:05:36 +00:00
|
|
|
}
|
2024-01-11 20:26:47 +00:00
|
|
|
|
|
|
|
// Call this directly instead of using the cached environment used by http.ProxyFromEnvironment to allow for testing
|
|
|
|
proxyFromEnvironment := httpproxy.FromEnvironment().ProxyFunc()
|
|
|
|
proxyURL, err := proxyFromEnvironment(serverURL)
|
2024-01-02 20:05:36 +00:00
|
|
|
if err != nil {
|
2024-01-11 20:26:47 +00:00
|
|
|
return errors.Wrapf(err, "failed to get proxy for address %s", address)
|
|
|
|
}
|
|
|
|
if proxyURL == nil {
|
|
|
|
logrus.Debug(version.ProgramUpper + "_AGENT_HTTP_PROXY_ALLOWED is true but no proxy is configured for URL " + serverURL.String())
|
|
|
|
return nil
|
2024-01-02 20:05:36 +00:00
|
|
|
}
|
|
|
|
|
2024-11-14 02:04:37 +00:00
|
|
|
dialer, err := proxyDialer(proxyURL, defaultDialer)
|
2024-01-11 20:26:47 +00:00
|
|
|
if err != nil {
|
|
|
|
return errors.Wrapf(err, "failed to create proxy dialer for %s", proxyURL)
|
2024-01-02 20:05:36 +00:00
|
|
|
}
|
2024-01-11 20:26:47 +00:00
|
|
|
|
|
|
|
defaultDialer = dialer
|
|
|
|
logrus.Debugf("Using proxy %s for agent connection to %s", proxyURL, serverURL)
|
|
|
|
return nil
|
2024-01-02 20:05:36 +00:00
|
|
|
}
|
2023-03-31 20:51:27 +00:00
|
|
|
|
2019-07-24 07:22:31 +00:00
|
|
|
func (lb *LoadBalancer) setServers(serverAddresses []string) bool {
|
2024-11-14 02:04:37 +00:00
|
|
|
serverAddresses, hasDefaultServer := sortServers(serverAddresses, lb.defaultServerAddress)
|
2019-07-24 07:22:31 +00:00
|
|
|
if len(serverAddresses) == 0 {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
lb.mutex.Lock()
|
|
|
|
defer lb.mutex.Unlock()
|
|
|
|
|
2023-03-31 20:51:27 +00:00
|
|
|
newAddresses := sets.NewString(serverAddresses...)
|
|
|
|
curAddresses := sets.NewString(lb.ServerAddresses...)
|
|
|
|
if newAddresses.Equal(curAddresses) {
|
2019-07-24 07:22:31 +00:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2023-03-31 20:51:27 +00:00
|
|
|
for addedServer := range newAddresses.Difference(curAddresses) {
|
|
|
|
logrus.Infof("Adding server to load balancer %s: %s", lb.serviceName, addedServer)
|
2024-03-19 22:01:36 +00:00
|
|
|
lb.servers[addedServer] = &server{
|
|
|
|
address: addedServer,
|
|
|
|
connections: make(map[net.Conn]struct{}),
|
|
|
|
healthCheck: func() bool { return true },
|
|
|
|
}
|
2023-03-31 20:51:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for removedServer := range curAddresses.Difference(newAddresses) {
|
|
|
|
server := lb.servers[removedServer]
|
|
|
|
if server != nil {
|
|
|
|
logrus.Infof("Removing server from load balancer %s: %s", lb.serviceName, removedServer)
|
|
|
|
// Defer closing connections until after the new server list has been put into place.
|
|
|
|
// Closing open connections ensures that anything stuck retrying on a stale server is forced
|
|
|
|
// over to a valid endpoint.
|
|
|
|
defer server.closeAll()
|
|
|
|
// Don't delete the default server from the server map, in case we need to fall back to it.
|
|
|
|
if removedServer != lb.defaultServerAddress {
|
|
|
|
delete(lb.servers, removedServer)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-07-24 07:22:31 +00:00
|
|
|
lb.ServerAddresses = serverAddresses
|
|
|
|
lb.randomServers = append([]string{}, lb.ServerAddresses...)
|
|
|
|
rand.Shuffle(len(lb.randomServers), func(i, j int) {
|
|
|
|
lb.randomServers[i], lb.randomServers[j] = lb.randomServers[j], lb.randomServers[i]
|
|
|
|
})
|
2024-11-14 02:04:37 +00:00
|
|
|
// If the current server list does not contain the default server address,
|
|
|
|
// we want to include it in the random server list so that it can be tried if necessary.
|
|
|
|
// However, it should be treated as always failing health checks so that it is only
|
|
|
|
// used if all other endpoints are unavailable.
|
|
|
|
if !hasDefaultServer {
|
2022-02-16 22:19:58 +00:00
|
|
|
lb.randomServers = append(lb.randomServers, lb.defaultServerAddress)
|
2024-11-14 02:04:37 +00:00
|
|
|
if defaultServer, ok := lb.servers[lb.defaultServerAddress]; ok {
|
|
|
|
defaultServer.healthCheck = func() bool { return false }
|
|
|
|
lb.servers[lb.defaultServerAddress] = defaultServer
|
|
|
|
}
|
2019-07-24 07:22:31 +00:00
|
|
|
}
|
|
|
|
lb.currentServerAddress = lb.randomServers[0]
|
|
|
|
lb.nextServerIndex = 1
|
|
|
|
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2024-07-12 16:03:15 +00:00
|
|
|
// nextServer attempts to get the next server in the loadbalancer server list.
|
|
|
|
// If another goroutine has already updated the current server address to point at
|
|
|
|
// a different address than just failed, nothing is changed. Otherwise, a new server address
|
|
|
|
// is stored to the currentServerAddress field, and returned for use.
|
|
|
|
// This function must always be called by a goroutine that holds a read lock on the loadbalancer mutex.
|
2019-07-24 07:22:31 +00:00
|
|
|
func (lb *LoadBalancer) nextServer(failedServer string) (string, error) {
|
2024-06-06 23:47:27 +00:00
|
|
|
// note: these fields are not protected by the mutex, so we clamp the index value and update
|
|
|
|
// the index/current address using local variables, to avoid time-of-check vs time-of-use
|
|
|
|
// race conditions caused by goroutine A incrementing it in between the time goroutine B
|
|
|
|
// validates its value, and uses it as a list index.
|
|
|
|
currentServerAddress := lb.currentServerAddress
|
|
|
|
nextServerIndex := lb.nextServerIndex
|
|
|
|
|
2019-07-24 07:22:31 +00:00
|
|
|
if len(lb.randomServers) == 0 {
|
|
|
|
return "", errors.New("No servers in load balancer proxy list")
|
|
|
|
}
|
|
|
|
if len(lb.randomServers) == 1 {
|
2024-06-06 23:47:27 +00:00
|
|
|
return currentServerAddress, nil
|
2019-07-24 07:22:31 +00:00
|
|
|
}
|
2024-06-06 23:47:27 +00:00
|
|
|
if failedServer != currentServerAddress {
|
|
|
|
return currentServerAddress, nil
|
2019-07-24 07:22:31 +00:00
|
|
|
}
|
2024-06-06 23:47:27 +00:00
|
|
|
if nextServerIndex >= len(lb.randomServers) {
|
|
|
|
nextServerIndex = 0
|
2019-07-24 07:22:31 +00:00
|
|
|
}
|
|
|
|
|
2024-06-06 23:47:27 +00:00
|
|
|
currentServerAddress = lb.randomServers[nextServerIndex]
|
|
|
|
nextServerIndex++
|
|
|
|
|
|
|
|
lb.currentServerAddress = currentServerAddress
|
|
|
|
lb.nextServerIndex = nextServerIndex
|
2019-07-24 07:22:31 +00:00
|
|
|
|
2024-06-06 23:47:27 +00:00
|
|
|
return currentServerAddress, nil
|
2019-07-24 07:22:31 +00:00
|
|
|
}
|
2023-03-31 20:51:27 +00:00
|
|
|
|
2024-01-02 20:05:36 +00:00
|
|
|
// dialContext dials a new connection using the environment's proxy settings, and adds its wrapped connection to the map
|
2023-03-31 20:51:27 +00:00
|
|
|
func (s *server) dialContext(ctx context.Context, network, address string) (net.Conn, error) {
|
2024-01-02 20:05:36 +00:00
|
|
|
conn, err := defaultDialer.Dial(network, address)
|
2023-03-31 20:51:27 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-01-02 20:05:36 +00:00
|
|
|
|
|
|
|
// Wrap the connection and add it to the server's connection map
|
2023-03-31 20:51:27 +00:00
|
|
|
s.mutex.Lock()
|
|
|
|
defer s.mutex.Unlock()
|
|
|
|
|
2024-01-02 20:05:36 +00:00
|
|
|
wrappedConn := &serverConn{server: s, Conn: conn}
|
|
|
|
s.connections[wrappedConn] = struct{}{}
|
|
|
|
return wrappedConn, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// proxyDialer creates a new proxy.Dialer that routes connections through the specified proxy.
|
2024-11-14 02:04:37 +00:00
|
|
|
func proxyDialer(proxyURL *url.URL, forward proxy.Dialer) (proxy.Dialer, error) {
|
2024-01-02 20:05:36 +00:00
|
|
|
if proxyURL.Scheme == "http" || proxyURL.Scheme == "https" {
|
|
|
|
// Create a new HTTP proxy dialer
|
2024-11-14 02:04:37 +00:00
|
|
|
httpProxyDialer := http_dialer.New(proxyURL, http_dialer.WithDialer(forward.(*net.Dialer)))
|
2024-01-02 20:05:36 +00:00
|
|
|
return httpProxyDialer, nil
|
|
|
|
} else if proxyURL.Scheme == "socks5" {
|
|
|
|
// For SOCKS5 proxies, use the proxy package's FromURL
|
2024-11-14 02:04:37 +00:00
|
|
|
return proxy.FromURL(proxyURL, forward)
|
2024-01-02 20:05:36 +00:00
|
|
|
}
|
|
|
|
return nil, fmt.Errorf("unsupported proxy scheme: %s", proxyURL.Scheme)
|
2023-03-31 20:51:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// closeAll closes all connections to the server, and removes their entries from the map
|
|
|
|
func (s *server) closeAll() {
|
|
|
|
s.mutex.Lock()
|
|
|
|
defer s.mutex.Unlock()
|
|
|
|
|
2024-03-19 22:01:36 +00:00
|
|
|
if l := len(s.connections); l > 0 {
|
|
|
|
logrus.Infof("Closing %d connections to load balancer server %s", len(s.connections), s.address)
|
|
|
|
for conn := range s.connections {
|
|
|
|
// Close the connection in a goroutine so that we don't hold the lock while doing so.
|
|
|
|
go conn.Close()
|
|
|
|
}
|
2023-03-31 20:51:27 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Close removes the connection entry from the server's connection map, and
|
|
|
|
// closes the wrapped connection.
|
|
|
|
func (sc *serverConn) Close() error {
|
|
|
|
sc.server.mutex.Lock()
|
|
|
|
defer sc.server.mutex.Unlock()
|
|
|
|
|
|
|
|
delete(sc.server.connections, sc)
|
|
|
|
return sc.Conn.Close()
|
|
|
|
}
|
2024-03-19 22:01:36 +00:00
|
|
|
|
|
|
|
// SetDefault sets the selected address as the default / fallback address
|
|
|
|
func (lb *LoadBalancer) SetDefault(serverAddress string) {
|
|
|
|
lb.mutex.Lock()
|
|
|
|
defer lb.mutex.Unlock()
|
|
|
|
|
2024-11-14 02:04:37 +00:00
|
|
|
hasDefaultServer := slices.Contains(lb.ServerAddresses, lb.defaultServerAddress)
|
2024-03-19 22:01:36 +00:00
|
|
|
// if the old default server is not currently in use, remove it from the server map
|
2024-11-14 02:04:37 +00:00
|
|
|
if server := lb.servers[lb.defaultServerAddress]; server != nil && !hasDefaultServer {
|
2024-03-19 22:01:36 +00:00
|
|
|
defer server.closeAll()
|
|
|
|
delete(lb.servers, lb.defaultServerAddress)
|
|
|
|
}
|
2024-11-14 02:04:37 +00:00
|
|
|
// if the new default server doesn't have an entry in the map, add one - but
|
|
|
|
// with a failing health check so that it is only used as a last resort.
|
2024-03-19 22:01:36 +00:00
|
|
|
if _, ok := lb.servers[serverAddress]; !ok {
|
|
|
|
lb.servers[serverAddress] = &server{
|
|
|
|
address: serverAddress,
|
2024-11-14 02:04:37 +00:00
|
|
|
healthCheck: func() bool { return false },
|
2024-03-19 22:01:36 +00:00
|
|
|
connections: make(map[net.Conn]struct{}),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
lb.defaultServerAddress = serverAddress
|
|
|
|
logrus.Infof("Updated load balancer %s default server address -> %s", lb.serviceName, serverAddress)
|
|
|
|
}
|
|
|
|
|
|
|
|
// SetHealthCheck adds a health-check callback to an address, replacing the default no-op function.
|
|
|
|
func (lb *LoadBalancer) SetHealthCheck(address string, healthCheck func() bool) {
|
|
|
|
lb.mutex.Lock()
|
|
|
|
defer lb.mutex.Unlock()
|
|
|
|
|
|
|
|
if server := lb.servers[address]; server != nil {
|
|
|
|
logrus.Debugf("Added health check for load balancer %s: %s", lb.serviceName, address)
|
|
|
|
server.healthCheck = healthCheck
|
|
|
|
} else {
|
|
|
|
logrus.Errorf("Failed to add health check for load balancer %s: no server found for %s", lb.serviceName, address)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// runHealthChecks periodically health-checks all servers. Any servers that fail the health-check will have their
|
|
|
|
// connections closed, to force clients to switch over to a healthy server.
|
|
|
|
func (lb *LoadBalancer) runHealthChecks(ctx context.Context) {
|
2024-05-29 18:17:29 +00:00
|
|
|
previousStatus := map[string]bool{}
|
2024-03-19 22:01:36 +00:00
|
|
|
wait.Until(func() {
|
|
|
|
lb.mutex.RLock()
|
|
|
|
defer lb.mutex.RUnlock()
|
2024-11-14 02:04:37 +00:00
|
|
|
var healthyServerExists bool
|
2024-05-29 18:17:29 +00:00
|
|
|
for address, server := range lb.servers {
|
|
|
|
status := server.healthCheck()
|
2024-11-14 02:04:37 +00:00
|
|
|
healthyServerExists = healthyServerExists || status
|
2024-05-29 18:17:29 +00:00
|
|
|
if status == false && previousStatus[address] == true {
|
|
|
|
// Only close connections when the server transitions from healthy to unhealthy;
|
|
|
|
// we don't want to re-close all the connections every time as we might be ignoring
|
|
|
|
// health checks due to all servers being marked unhealthy.
|
2024-03-19 22:01:36 +00:00
|
|
|
defer server.closeAll()
|
|
|
|
}
|
2024-05-29 18:17:29 +00:00
|
|
|
previousStatus[address] = status
|
2024-03-19 22:01:36 +00:00
|
|
|
}
|
2024-11-14 02:04:37 +00:00
|
|
|
|
|
|
|
// If there is at least one healthy server, and the default server is not in the server list,
|
|
|
|
// close all the connections to the default server so that clients reconnect and switch over
|
|
|
|
// to a preferred server.
|
|
|
|
hasDefaultServer := slices.Contains(lb.ServerAddresses, lb.defaultServerAddress)
|
|
|
|
if healthyServerExists && !hasDefaultServer {
|
|
|
|
if server, ok := lb.servers[lb.defaultServerAddress]; ok {
|
|
|
|
defer server.closeAll()
|
|
|
|
}
|
|
|
|
}
|
2024-03-19 22:01:36 +00:00
|
|
|
}, time.Second, ctx.Done())
|
|
|
|
logrus.Debugf("Stopped health checking for load balancer %s", lb.serviceName)
|
|
|
|
}
|