diff --git a/pkg/cluster/managed.go b/pkg/cluster/managed.go index c47923dafa..3d4c5faf8b 100644 --- a/pkg/cluster/managed.go +++ b/pkg/cluster/managed.go @@ -144,7 +144,8 @@ func (c *Cluster) assignManagedDriver(ctx context.Context) error { return nil } -// setupEtcdProxy +// setupEtcdProxy periodically updates the etcd proxy with the current list of +// cluster client URLs, as retrieved from etcd. func (c *Cluster) setupEtcdProxy(ctx context.Context, etcdProxy etcd.Proxy) { if c.managedDB == nil { return diff --git a/pkg/daemons/executor/etcd.go b/pkg/daemons/executor/etcd.go index 8fa8e337c8..dde8e496bf 100644 --- a/pkg/daemons/executor/etcd.go +++ b/pkg/daemons/executor/etcd.go @@ -38,11 +38,13 @@ func (e *Embedded) ETCD(ctx context.Context, args ETCDConfig, extraArgs []string if errors.Is(err, rafthttp.ErrMemberRemoved) { tombstoneFile := filepath.Join(args.DataDir, "tombstone") if err := os.WriteFile(tombstoneFile, []byte{}, 0600); err != nil { - logrus.Fatalf("failed to write tombstone file to %s", tombstoneFile) + logrus.Fatalf("Failed to write tombstone file to %s: %v", tombstoneFile, err) } - logrus.Infof("this node has been removed from the cluster please restart %s to rejoin the cluster", version.Program) + etcd.Close() + logrus.Infof("This node has been removed from the cluster - please restart %s to rejoin the cluster", version.Program) return } + logrus.Errorf("etcd error: %v", err) case <-ctx.Done(): logrus.Infof("stopping etcd") etcd.Close() diff --git a/pkg/etcd/etcd.go b/pkg/etcd/etcd.go index 8e30b74b0f..6389641b25 100644 --- a/pkg/etcd/etcd.go +++ b/pkg/etcd/etcd.go @@ -554,15 +554,17 @@ func (e *ETCD) Register(ctx context.Context, config *config.Control, handler htt } // The apiserver endpoint controller needs to run on a node with a local apiserver, - // in order to successfully seed etcd with the endpoint list. + // in order to successfully seed etcd with the endpoint list. The member removal controller + // also needs to run on a non-etcd node as to avoid disruption if running on the node that + // is being removed from the cluster. if !e.config.DisableAPIServer { - e.config.Runtime.LeaderElectedClusterControllerStarts["etcd-apiserver-endpoints"] = func(ctx context.Context) { + e.config.Runtime.LeaderElectedClusterControllerStarts[version.Program+"-etcd"] = func(ctx context.Context) { registerEndpointsHandlers(ctx, e) + registerMemberHandlers(ctx, e) } } - // The etcd member-removal controllers should only run on an etcd node. Tombstone file checking - // is also unnecessary if we're not running etcd. + // Tombstone file checking is unnecessary if we're not running etcd. if !e.config.DisableETCD { tombstoneFile := filepath.Join(DBDir(e.config), "tombstone") if _, err := os.Stat(tombstoneFile); err == nil { @@ -575,10 +577,6 @@ func (e *ETCD) Register(ctx context.Context, config *config.Control, handler htt if err := e.setName(false); err != nil { return nil, err } - - e.config.Runtime.LeaderElectedClusterControllerStarts["etcd-member-removal"] = func(ctx context.Context) { - registerMemberHandlers(ctx, e) - } } return e.handler(handler), nil @@ -666,6 +664,8 @@ func getClientConfig(ctx context.Context, control *config.Control, endpoints ... DialTimeout: defaultDialTimeout, DialKeepAliveTime: defaultKeepAliveTime, DialKeepAliveTimeout: defaultKeepAliveTimeout, + AutoSyncInterval: defaultKeepAliveTimeout, + PermitWithoutStream: true, } var err error @@ -2126,21 +2126,7 @@ func GetAPIServerURLsFromETCD(ctx context.Context, cfg *config.Control) ([]strin // GetMembersClientURLs will list through the member lists in etcd and return // back a combined list of client urls for each member in the cluster func (e *ETCD) GetMembersClientURLs(ctx context.Context) ([]string, error) { - ctx, cancel := context.WithTimeout(ctx, testTimeout) - defer cancel() - - members, err := e.client.MemberList(ctx) - if err != nil { - return nil, err - } - - var memberUrls []string - for _, member := range members.Members { - for _, clientURL := range member.ClientURLs { - memberUrls = append(memberUrls, string(clientURL)) - } - } - return memberUrls, nil + return e.client.Endpoints(), nil } // GetMembersNames will list through the member lists in etcd and return diff --git a/pkg/server/etcd.go b/pkg/server/etcd.go deleted file mode 100644 index 127a50426c..0000000000 --- a/pkg/server/etcd.go +++ /dev/null @@ -1,103 +0,0 @@ -package server - -import ( - "context" - "os" - "path/filepath" - "time" - - "github.com/k3s-io/k3s/pkg/etcd" - "github.com/sirupsen/logrus" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -// setETCDLabelsAndAnnotations will set the etcd role label if not exists also it -// sets special annotations on the node object which are etcd node id and etcd node -// address, the function will also remove the controlplane and master role labels if -// they exist on the node -func setETCDLabelsAndAnnotations(ctx context.Context, config *Config) error { - <-config.ControlConfig.Runtime.APIServerReady - t := time.NewTicker(5 * time.Second) - defer t.Stop() - for range t.C { - controlConfig := &config.ControlConfig - - sc, err := NewContext(ctx, controlConfig.Runtime.KubeConfigAdmin) - if err != nil { - logrus.Infof("Failed to set etcd role label: %v", err) - continue - } - - if err := sc.Start(ctx); err != nil { - logrus.Infof("Failed to set etcd role label: %v", err) - continue - } - - controlConfig.Runtime.Core = sc.Core - nodes := sc.Core.Core().V1().Node() - - nodeName := os.Getenv("NODE_NAME") - if nodeName == "" { - logrus.Info("Failed to set etcd role label: node name not set") - continue - } - node, err := nodes.Get(nodeName, metav1.GetOptions{}) - if err != nil { - logrus.Infof("Failed to set etcd role label: %v", err) - continue - } - - if node.Labels == nil { - node.Labels = make(map[string]string) - } - - // remove controlplane label if role label exists - var controlRoleLabelExists bool - if _, ok := node.Labels[MasterRoleLabelKey]; ok { - delete(node.Labels, MasterRoleLabelKey) - controlRoleLabelExists = true - } - if _, ok := node.Labels[ControlPlaneRoleLabelKey]; ok { - delete(node.Labels, ControlPlaneRoleLabelKey) - controlRoleLabelExists = true - } - - if v, ok := node.Labels[ETCDRoleLabelKey]; ok && v == "true" && !controlRoleLabelExists { - break - } - - node.Labels[ETCDRoleLabelKey] = "true" - - // this is replacement to the etcd controller handleself function - if node.Annotations == nil { - node.Annotations = map[string]string{} - } - fileName := filepath.Join(controlConfig.DataDir, "db", "etcd", "name") - - data, err := os.ReadFile(fileName) - if err != nil { - logrus.Infof("Waiting for etcd node name file to be available: %v", err) - continue - } - etcdNodeName := string(data) - node.Annotations[etcd.NodeNameAnnotation] = etcdNodeName - - address, err := etcd.GetAdvertiseAddress(controlConfig.PrivateIP) - if err != nil { - logrus.Infof("Waiting for etcd node address to be available: %v", err) - continue - } - node.Annotations[etcd.NodeAddressAnnotation] = address - - _, err = nodes.Update(node) - if err == nil { - logrus.Infof("Successfully set etcd role label and annotations on node %s", nodeName) - break - } - select { - case <-ctx.Done(): - return ctx.Err() - } - } - return nil -} diff --git a/pkg/server/server.go b/pkg/server/server.go index 72d10c83e2..8d9012f3cb 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -78,11 +78,7 @@ func StartServer(ctx context.Context, config *Config, cfg *cmds.Server) error { } } - if config.ControlConfig.DisableAPIServer { - go setETCDLabelsAndAnnotations(ctx, config) - } else { - go startOnAPIServerReady(ctx, config) - } + go startOnAPIServerReady(ctx, config) if err := printTokens(&config.ControlConfig); err != nil { return err @@ -138,20 +134,9 @@ func runControllers(ctx context.Context, config *Config) error { return errors.Wrap(err, "failed to start wranger controllers") } - controlConfig.Runtime.LeaderElectedClusterControllerStarts[version.Program] = func(ctx context.Context) { - if controlConfig.DisableAPIServer { - return - } - if err := coreControllers(ctx, sc, config); err != nil { - panic(err) - } - for _, controller := range config.LeaderControllers { - if err := controller(ctx, sc); err != nil { - panic(errors.Wrapf(err, "failed to start %s leader controller", util.GetFunctionName(controller))) - } - } - if err := sc.Start(ctx); err != nil { - panic(err) + if !controlConfig.DisableAPIServer { + controlConfig.Runtime.LeaderElectedClusterControllerStarts[version.Program] = func(ctx context.Context) { + apiserverControllers(ctx, sc, config) } } @@ -172,6 +157,22 @@ func runControllers(ctx context.Context, config *Config) error { return nil } +// apiServerControllers starts the core controllers, as well as the leader-elected controllers +// that should only run on a control-plane node. +func apiserverControllers(ctx context.Context, sc *Context, config *Config) { + if err := coreControllers(ctx, sc, config); err != nil { + panic(err) + } + for _, controller := range config.LeaderControllers { + if err := controller(ctx, sc); err != nil { + panic(errors.Wrapf(err, "failed to start %s leader controller", util.GetFunctionName(controller))) + } + } + if err := sc.Start(ctx); err != nil { + panic(err) + } +} + // runOrDie is similar to leader.RunOrDie, except that it runs the callback // immediately, without performing leader election. func runOrDie(ctx context.Context, name string, cb leader.Callback) { @@ -184,6 +185,12 @@ func runOrDie(ctx context.Context, name string, cb leader.Callback) { <-ctx.Done() } +// coreControllers starts the following controllers, if they are enabled: +// * Node controller (manages nodes passwords and coredns hosts file) +// * Helm controller +// * Secrets encryption +// * Rootless ports +// These controllers should only be run on nodes with a local apiserver func coreControllers(ctx context.Context, sc *Context, config *Config) error { if err := node.Register(ctx, !config.ControlConfig.Skips["coredns"], @@ -237,6 +244,9 @@ func coreControllers(ctx context.Context, sc *Context, config *Config) error { } func stageFiles(ctx context.Context, sc *Context, controlConfig *config.Control) error { + if controlConfig.DisableAPIServer { + return nil + } dataDir := filepath.Join(controlConfig.DataDir, "static") if err := static.Stage(dataDir); err != nil { return err @@ -527,19 +537,11 @@ func setNodeLabelsAndAnnotations(ctx context.Context, nodes v1.NodeClient, confi time.Sleep(1 * time.Second) continue } - // remove etcd label if etcd is disabled - var etcdRoleLabelExists bool - if config.ControlConfig.DisableETCD { - if _, ok := node.Labels[ETCDRoleLabelKey]; ok { - delete(node.Labels, ETCDRoleLabelKey) - etcdRoleLabelExists = true - } - } if node.Labels == nil { node.Labels = make(map[string]string) } v, ok := node.Labels[ControlPlaneRoleLabelKey] - if !ok || v != "true" || etcdRoleLabelExists { + if !ok || v != "true" { node.Labels[ControlPlaneRoleLabelKey] = "true" node.Labels[MasterRoleLabelKey] = "true" } @@ -565,15 +567,18 @@ func setNodeLabelsAndAnnotations(ctx context.Context, nodes v1.NodeClient, confi return nil } -func setClusterDNSConfig(ctx context.Context, controlConfig *Config, configMap v1.ConfigMapClient) error { +func setClusterDNSConfig(ctx context.Context, config *Config, configMap v1.ConfigMapClient) error { + if config.ControlConfig.DisableAPIServer { + return nil + } // check if configmap already exists _, err := configMap.Get("kube-system", "cluster-dns", metav1.GetOptions{}) if err == nil { logrus.Infof("Cluster dns configmap already exists") return nil } - clusterDNS := controlConfig.ControlConfig.ClusterDNS - clusterDomain := controlConfig.ControlConfig.ClusterDomain + clusterDNS := config.ControlConfig.ClusterDNS + clusterDomain := config.ControlConfig.ClusterDomain c := &corev1.ConfigMap{ TypeMeta: metav1.TypeMeta{ Kind: "ConfigMap",