Fix bug that caused agents to bypass local loadbalancer

If proxy.SetAPIServerPort was called multiple times, all calls after the
first one would cause the apiserver address to be set to the default
server address, bypassing the local load-balancer. This was most likely
to occur on RKE2, where the supervisor may be up for a period of time
before it is ready to manage node password secrets, causing the agent
to retry.

Signed-off-by: Brad Davidson <brad.davidson@rancher.com>
pull/10296/head
Brad Davidson 2024-06-03 20:40:09 +00:00 committed by Brad Davidson
parent 79ba10f5ec
commit 1661f1024a
4 changed files with 41 additions and 15 deletions

View File

@ -375,9 +375,10 @@ func get(ctx context.Context, envInfo *cmds.Agent, proxy proxy.Proxy) (*config.N
if controlConfig.SupervisorPort != controlConfig.HTTPSPort {
isIPv6 := utilsnet.IsIPv6(net.ParseIP([]string{envInfo.NodeIP.String()}[0]))
if err := proxy.SetAPIServerPort(controlConfig.HTTPSPort, isIPv6); err != nil {
return nil, errors.Wrapf(err, "failed to setup access to API Server port %d on at %s", controlConfig.HTTPSPort, proxy.SupervisorURL())
return nil, errors.Wrapf(err, "failed to set apiserver port to %d", controlConfig.HTTPSPort)
}
}
apiServerURL := proxy.APIServerURL()
var flannelIface *net.Interface
if controlConfig.FlannelBackend != config.FlannelBackendNone && len(envInfo.FlannelIface) > 0 {
@ -482,40 +483,53 @@ func get(ctx context.Context, envInfo *cmds.Agent, proxy proxy.Proxy) (*config.N
os.Setenv("NODE_NAME", nodeName)
// Ensure that the kubelet's server certificate is valid for all configured node IPs. Note
// that in the case of an external CCM, additional IPs may be added by the infra provider
// that the cert will not be valid for, as they are not present in the list collected here.
nodeExternalAndInternalIPs := append(nodeIPs, nodeExternalIPs...)
// Ask the server to generate a kubelet server cert+key. These files are unique to this node.
servingCert, err := getServingCert(nodeName, nodeExternalAndInternalIPs, servingKubeletCert, servingKubeletKey, newNodePasswordFile, info)
if err != nil {
return nil, err
return nil, errors.Wrap(err, servingKubeletCert)
}
// Ask the server to genrate a kubelet client cert+key. These files are unique to this node.
if err := getNodeNamedHostFile(clientKubeletCert, clientKubeletKey, nodeName, nodeIPs, newNodePasswordFile, info); err != nil {
return nil, err
return nil, errors.Wrap(err, clientKubeletCert)
}
// Generate a kubeconfig for the kubelet.
kubeconfigKubelet := filepath.Join(envInfo.DataDir, "agent", "kubelet.kubeconfig")
if err := deps.KubeConfig(kubeconfigKubelet, proxy.APIServerURL(), serverCAFile, clientKubeletCert, clientKubeletKey); err != nil {
if err := deps.KubeConfig(kubeconfigKubelet, apiServerURL, serverCAFile, clientKubeletCert, clientKubeletKey); err != nil {
return nil, err
}
clientKubeProxyCert := filepath.Join(envInfo.DataDir, "agent", "client-kube-proxy.crt")
clientKubeProxyKey := filepath.Join(envInfo.DataDir, "agent", "client-kube-proxy.key")
// Ask the server to send us its kube-proxy client cert+key. These files are not unique to this node.
if err := getHostFile(clientKubeProxyCert, clientKubeProxyKey, info); err != nil {
return nil, err
return nil, errors.Wrap(err, clientKubeProxyCert)
}
// Generate a kubeconfig for kube-proxy.
kubeconfigKubeproxy := filepath.Join(envInfo.DataDir, "agent", "kubeproxy.kubeconfig")
if err := deps.KubeConfig(kubeconfigKubeproxy, proxy.APIServerURL(), serverCAFile, clientKubeProxyCert, clientKubeProxyKey); err != nil {
if err := deps.KubeConfig(kubeconfigKubeproxy, apiServerURL, serverCAFile, clientKubeProxyCert, clientKubeProxyKey); err != nil {
return nil, err
}
clientK3sControllerCert := filepath.Join(envInfo.DataDir, "agent", "client-"+version.Program+"-controller.crt")
clientK3sControllerKey := filepath.Join(envInfo.DataDir, "agent", "client-"+version.Program+"-controller.key")
// Ask the server to send us its agent controller client cert+key. These files are not unique to this node.
if err := getHostFile(clientK3sControllerCert, clientK3sControllerKey, info); err != nil {
return nil, err
return nil, errors.Wrap(err, clientK3sControllerCert)
}
// Generate a kubeconfig for the agent controller.
kubeconfigK3sController := filepath.Join(envInfo.DataDir, "agent", version.Program+"controller.kubeconfig")
if err := deps.KubeConfig(kubeconfigK3sController, proxy.APIServerURL(), serverCAFile, clientK3sControllerCert, clientK3sControllerKey); err != nil {
if err := deps.KubeConfig(kubeconfigK3sController, apiServerURL, serverCAFile, clientK3sControllerCert, clientK3sControllerKey); err != nil {
return nil, err
}

View File

@ -172,6 +172,11 @@ func (lb *LoadBalancer) dialContext(ctx context.Context, network, _ string) (net
return conn, nil
}
logrus.Debugf("Dial error from load balancer %s: %s", lb.serviceName, err)
// Don't close connections to the failed server if we're retrying with health checks ignored.
// We don't want to disrupt active connections if it is unlikely they will have anywhere to go.
if !allChecksFailed {
defer server.closeAll()
}
}
newServer, err := lb.nextServer(targetServer)
@ -179,7 +184,7 @@ func (lb *LoadBalancer) dialContext(ctx context.Context, network, _ string) (net
return nil, err
}
if targetServer != newServer {
logrus.Debugf("Failed over to new server for load balancer %s: %s", lb.serviceName, newServer)
logrus.Debugf("Failed over to new server for load balancer %s: %s -> %s", lb.serviceName, targetServer, newServer)
}
if ctx.Err() != nil {
return nil, ctx.Err()

View File

@ -63,6 +63,7 @@ func NewSupervisorProxy(ctx context.Context, lbEnabled bool, dataDir, supervisor
p.fallbackSupervisorAddress = u.Host
p.supervisorPort = u.Port()
logrus.Debugf("Supervisor proxy using supervisor=%s apiserver=%s lb=%v", p.supervisorURL, p.apiServerURL, p.lbEnabled)
return &p, nil
}
@ -132,6 +133,11 @@ func (p *proxy) setSupervisorPort(addresses []string) []string {
// load-balancer, and the address of this load-balancer is returned instead of the actual apiserver
// addresses.
func (p *proxy) SetAPIServerPort(port int, isIPv6 bool) error {
if p.apiServerEnabled {
logrus.Debugf("Supervisor proxy apiserver port already set")
return nil
}
u, err := url.Parse(p.initialSupervisorURL)
if err != nil {
return errors.Wrapf(err, "failed to parse server URL %s", p.initialSupervisorURL)
@ -139,22 +145,23 @@ func (p *proxy) SetAPIServerPort(port int, isIPv6 bool) error {
p.apiServerPort = strconv.Itoa(port)
u.Host = sysnet.JoinHostPort(u.Hostname(), p.apiServerPort)
p.apiServerURL = u.String()
p.apiServerEnabled = true
if p.lbEnabled && p.apiServerLB == nil {
lbServerPort := p.lbServerPort
if lbServerPort != 0 {
lbServerPort = lbServerPort - 1
}
lb, err := loadbalancer.New(p.context, p.dataDir, loadbalancer.APIServerServiceName, p.apiServerURL, lbServerPort, isIPv6)
lb, err := loadbalancer.New(p.context, p.dataDir, loadbalancer.APIServerServiceName, u.String(), lbServerPort, isIPv6)
if err != nil {
return err
}
p.apiServerURL = lb.LoadBalancerServerURL()
p.apiServerLB = lb
p.apiServerURL = lb.LoadBalancerServerURL()
} else {
p.apiServerURL = u.String()
}
logrus.Debugf("Supervisor proxy apiserver port changed; apiserver=%s lb=%v", p.apiServerURL, p.lbEnabled)
p.apiServerEnabled = true
return nil
}

View File

@ -451,7 +451,7 @@ func (a *agentTunnel) connect(rootCtx context.Context, waitGroup *sync.WaitGroup
err := remotedialer.ConnectToProxy(ctx, wsURL, nil, auth, ws, onConnect)
connected = false
if err != nil && !errors.Is(err, context.Canceled) {
logrus.WithField("url", wsURL).WithError(err).Error("Remotedialer proxy error; reconecting...")
logrus.WithField("url", wsURL).WithError(err).Error("Remotedialer proxy error; reconnecting...")
// wait between reconnection attempts to avoid hammering the server
time.Sleep(endpointDebounceDelay)
}