diff --git a/pkg/agent/config/config.go b/pkg/agent/config/config.go index dfab68d023..fc0e724154 100644 --- a/pkg/agent/config/config.go +++ b/pkg/agent/config/config.go @@ -375,9 +375,10 @@ func get(ctx context.Context, envInfo *cmds.Agent, proxy proxy.Proxy) (*config.N if controlConfig.SupervisorPort != controlConfig.HTTPSPort { isIPv6 := utilsnet.IsIPv6(net.ParseIP([]string{envInfo.NodeIP.String()}[0])) if err := proxy.SetAPIServerPort(controlConfig.HTTPSPort, isIPv6); err != nil { - return nil, errors.Wrapf(err, "failed to setup access to API Server port %d on at %s", controlConfig.HTTPSPort, proxy.SupervisorURL()) + return nil, errors.Wrapf(err, "failed to set apiserver port to %d", controlConfig.HTTPSPort) } } + apiServerURL := proxy.APIServerURL() var flannelIface *net.Interface if controlConfig.FlannelBackend != config.FlannelBackendNone && len(envInfo.FlannelIface) > 0 { @@ -482,40 +483,53 @@ func get(ctx context.Context, envInfo *cmds.Agent, proxy proxy.Proxy) (*config.N os.Setenv("NODE_NAME", nodeName) + // Ensure that the kubelet's server certificate is valid for all configured node IPs. Note + // that in the case of an external CCM, additional IPs may be added by the infra provider + // that the cert will not be valid for, as they are not present in the list collected here. nodeExternalAndInternalIPs := append(nodeIPs, nodeExternalIPs...) + + // Ask the server to generate a kubelet server cert+key. These files are unique to this node. servingCert, err := getServingCert(nodeName, nodeExternalAndInternalIPs, servingKubeletCert, servingKubeletKey, newNodePasswordFile, info) if err != nil { - return nil, err + return nil, errors.Wrap(err, servingKubeletCert) } + // Ask the server to genrate a kubelet client cert+key. These files are unique to this node. if err := getNodeNamedHostFile(clientKubeletCert, clientKubeletKey, nodeName, nodeIPs, newNodePasswordFile, info); err != nil { - return nil, err + return nil, errors.Wrap(err, clientKubeletCert) } + // Generate a kubeconfig for the kubelet. kubeconfigKubelet := filepath.Join(envInfo.DataDir, "agent", "kubelet.kubeconfig") - if err := deps.KubeConfig(kubeconfigKubelet, proxy.APIServerURL(), serverCAFile, clientKubeletCert, clientKubeletKey); err != nil { + if err := deps.KubeConfig(kubeconfigKubelet, apiServerURL, serverCAFile, clientKubeletCert, clientKubeletKey); err != nil { return nil, err } clientKubeProxyCert := filepath.Join(envInfo.DataDir, "agent", "client-kube-proxy.crt") clientKubeProxyKey := filepath.Join(envInfo.DataDir, "agent", "client-kube-proxy.key") + + // Ask the server to send us its kube-proxy client cert+key. These files are not unique to this node. if err := getHostFile(clientKubeProxyCert, clientKubeProxyKey, info); err != nil { - return nil, err + return nil, errors.Wrap(err, clientKubeProxyCert) } + // Generate a kubeconfig for kube-proxy. kubeconfigKubeproxy := filepath.Join(envInfo.DataDir, "agent", "kubeproxy.kubeconfig") - if err := deps.KubeConfig(kubeconfigKubeproxy, proxy.APIServerURL(), serverCAFile, clientKubeProxyCert, clientKubeProxyKey); err != nil { + if err := deps.KubeConfig(kubeconfigKubeproxy, apiServerURL, serverCAFile, clientKubeProxyCert, clientKubeProxyKey); err != nil { return nil, err } clientK3sControllerCert := filepath.Join(envInfo.DataDir, "agent", "client-"+version.Program+"-controller.crt") clientK3sControllerKey := filepath.Join(envInfo.DataDir, "agent", "client-"+version.Program+"-controller.key") + + // Ask the server to send us its agent controller client cert+key. These files are not unique to this node. if err := getHostFile(clientK3sControllerCert, clientK3sControllerKey, info); err != nil { - return nil, err + return nil, errors.Wrap(err, clientK3sControllerCert) } + // Generate a kubeconfig for the agent controller. kubeconfigK3sController := filepath.Join(envInfo.DataDir, "agent", version.Program+"controller.kubeconfig") - if err := deps.KubeConfig(kubeconfigK3sController, proxy.APIServerURL(), serverCAFile, clientK3sControllerCert, clientK3sControllerKey); err != nil { + if err := deps.KubeConfig(kubeconfigK3sController, apiServerURL, serverCAFile, clientK3sControllerCert, clientK3sControllerKey); err != nil { return nil, err } diff --git a/pkg/agent/loadbalancer/loadbalancer.go b/pkg/agent/loadbalancer/loadbalancer.go index feddb4d872..36019470c8 100644 --- a/pkg/agent/loadbalancer/loadbalancer.go +++ b/pkg/agent/loadbalancer/loadbalancer.go @@ -172,6 +172,11 @@ func (lb *LoadBalancer) dialContext(ctx context.Context, network, _ string) (net return conn, nil } logrus.Debugf("Dial error from load balancer %s: %s", lb.serviceName, err) + // Don't close connections to the failed server if we're retrying with health checks ignored. + // We don't want to disrupt active connections if it is unlikely they will have anywhere to go. + if !allChecksFailed { + defer server.closeAll() + } } newServer, err := lb.nextServer(targetServer) @@ -179,7 +184,7 @@ func (lb *LoadBalancer) dialContext(ctx context.Context, network, _ string) (net return nil, err } if targetServer != newServer { - logrus.Debugf("Failed over to new server for load balancer %s: %s", lb.serviceName, newServer) + logrus.Debugf("Failed over to new server for load balancer %s: %s -> %s", lb.serviceName, targetServer, newServer) } if ctx.Err() != nil { return nil, ctx.Err() diff --git a/pkg/agent/proxy/apiproxy.go b/pkg/agent/proxy/apiproxy.go index becc2a0def..e711623e46 100644 --- a/pkg/agent/proxy/apiproxy.go +++ b/pkg/agent/proxy/apiproxy.go @@ -63,6 +63,7 @@ func NewSupervisorProxy(ctx context.Context, lbEnabled bool, dataDir, supervisor p.fallbackSupervisorAddress = u.Host p.supervisorPort = u.Port() + logrus.Debugf("Supervisor proxy using supervisor=%s apiserver=%s lb=%v", p.supervisorURL, p.apiServerURL, p.lbEnabled) return &p, nil } @@ -132,6 +133,11 @@ func (p *proxy) setSupervisorPort(addresses []string) []string { // load-balancer, and the address of this load-balancer is returned instead of the actual apiserver // addresses. func (p *proxy) SetAPIServerPort(port int, isIPv6 bool) error { + if p.apiServerEnabled { + logrus.Debugf("Supervisor proxy apiserver port already set") + return nil + } + u, err := url.Parse(p.initialSupervisorURL) if err != nil { return errors.Wrapf(err, "failed to parse server URL %s", p.initialSupervisorURL) @@ -139,22 +145,23 @@ func (p *proxy) SetAPIServerPort(port int, isIPv6 bool) error { p.apiServerPort = strconv.Itoa(port) u.Host = sysnet.JoinHostPort(u.Hostname(), p.apiServerPort) - p.apiServerURL = u.String() - p.apiServerEnabled = true - if p.lbEnabled && p.apiServerLB == nil { lbServerPort := p.lbServerPort if lbServerPort != 0 { lbServerPort = lbServerPort - 1 } - lb, err := loadbalancer.New(p.context, p.dataDir, loadbalancer.APIServerServiceName, p.apiServerURL, lbServerPort, isIPv6) + lb, err := loadbalancer.New(p.context, p.dataDir, loadbalancer.APIServerServiceName, u.String(), lbServerPort, isIPv6) if err != nil { return err } - p.apiServerURL = lb.LoadBalancerServerURL() p.apiServerLB = lb + p.apiServerURL = lb.LoadBalancerServerURL() + } else { + p.apiServerURL = u.String() } + logrus.Debugf("Supervisor proxy apiserver port changed; apiserver=%s lb=%v", p.apiServerURL, p.lbEnabled) + p.apiServerEnabled = true return nil } diff --git a/pkg/agent/tunnel/tunnel.go b/pkg/agent/tunnel/tunnel.go index 6245d52910..23c6dac404 100644 --- a/pkg/agent/tunnel/tunnel.go +++ b/pkg/agent/tunnel/tunnel.go @@ -451,7 +451,7 @@ func (a *agentTunnel) connect(rootCtx context.Context, waitGroup *sync.WaitGroup err := remotedialer.ConnectToProxy(ctx, wsURL, nil, auth, ws, onConnect) connected = false if err != nil && !errors.Is(err, context.Canceled) { - logrus.WithField("url", wsURL).WithError(err).Error("Remotedialer proxy error; reconecting...") + logrus.WithField("url", wsURL).WithError(err).Error("Remotedialer proxy error; reconnecting...") // wait between reconnection attempts to avoid hammering the server time.Sleep(endpointDebounceDelay) }