From edb044001718b2c404263d86510ed17ad2f56781 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Wed, 27 Mar 2024 21:11:55 +0000 Subject: [PATCH] Fix etcd snapshot reconcile for agentless nodes Disable cleanup of orphaned snapshots and patching of node annotations if running agentless Signed-off-by: Brad Davidson --- pkg/cli/server/server.go | 1 + pkg/daemons/config/types.go | 1 + pkg/etcd/snapshot.go | 6 ++++++ pkg/etcd/snapshot_controller.go | 18 ++++++++++++++++++ 4 files changed, 26 insertions(+) diff --git a/pkg/cli/server/server.go b/pkg/cli/server/server.go index fbe5c1d8ee..af0f148ee3 100644 --- a/pkg/cli/server/server.go +++ b/pkg/cli/server/server.go @@ -167,6 +167,7 @@ func run(app *cli.Context, cfg *cmds.Server, leaderControllers server.CustomCont serverConfig.ControlConfig.DisableAPIServer = cfg.DisableAPIServer serverConfig.ControlConfig.DisableScheduler = cfg.DisableScheduler serverConfig.ControlConfig.DisableControllerManager = cfg.DisableControllerManager + serverConfig.ControlConfig.DisableAgent = cfg.DisableAgent serverConfig.ControlConfig.EmbeddedRegistry = cfg.EmbeddedRegistry serverConfig.ControlConfig.ClusterInit = cfg.ClusterInit serverConfig.ControlConfig.EncryptSecrets = cfg.EncryptSecrets diff --git a/pkg/daemons/config/types.go b/pkg/daemons/config/types.go index 4a40726483..7677bf2790 100644 --- a/pkg/daemons/config/types.go +++ b/pkg/daemons/config/types.go @@ -181,6 +181,7 @@ type Control struct { DataDir string Datastore endpoint.Config `json:"-"` Disables map[string]bool + DisableAgent bool DisableAPIServer bool DisableControllerManager bool DisableETCD bool diff --git a/pkg/etcd/snapshot.go b/pkg/etcd/snapshot.go index a481985171..19d18a8245 100644 --- a/pkg/etcd/snapshot.go +++ b/pkg/etcd/snapshot.go @@ -850,6 +850,12 @@ func (e *ETCD) ReconcileSnapshotData(ctx context.Context) error { } } + // Agentless servers do not have a node. If we are running agentless, return early to avoid pruning + // snapshots for nonexistent nodes and trying to patch the reconcile annotations on our node. + if e.config.DisableAgent { + return nil + } + // List all snapshots in Kubernetes not stored on S3 or a current etcd node. // These snapshots are local to a node that no longer runs etcd and cannot be restored. // If the node rejoins later and has local snapshots, it will reconcile them itself. diff --git a/pkg/etcd/snapshot_controller.go b/pkg/etcd/snapshot_controller.go index d4b22755e0..5d43de6457 100644 --- a/pkg/etcd/snapshot_controller.go +++ b/pkg/etcd/snapshot_controller.go @@ -2,6 +2,7 @@ package etcd import ( "context" + "os" "sort" "strconv" "strings" @@ -176,6 +177,23 @@ func (e *etcdSnapshotHandler) reconcile() error { return err } + // If running without an agent there will not be a node for this server; + // create a dummy node and assume it has reconciled. + if e.etcd.config.DisableAgent { + node := v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: os.Getenv("NODE_NAME"), + Annotations: map[string]string{ + annotationLocalReconciled: "true", + }, + }, + } + if e.etcd.s3 != nil { + node.Annotations[annotationS3Reconciled] = "true" + } + nodeList.Items = append(nodeList.Items, node) + } + // Once a node has set the reconcile annotation, it is considered to have // migrated to using ETCDSnapshotFile resources, and any old configmap // entries for it can be pruned. Until the annotation is set, we will leave