From 3cd7a46b8fb9362fb490bec641ddd7e25c1d93f7 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Fri, 18 Mar 2022 02:07:19 -0700 Subject: [PATCH] Defer ensuring node passwords on etcd-only nodes during initial cluster bootstrap This allows secondary etcd nodes to bootstrap the kubelet before an apiserver joins the cluster. Rancher waits for all the etcd nodes to come up before adding the control-plane nodes, so this needs to be handled properly. Signed-off-by: Brad Davidson (cherry picked from commit 38706eeec0215f91a6ca59fc157cbf29dbcabdeb) --- pkg/server/router.go | 71 ++++++++++++++++++++++++++++---------------- 1 file changed, 46 insertions(+), 25 deletions(-) diff --git a/pkg/server/router.go b/pkg/server/router.go index 12895f8d06..10645b7360 100644 --- a/pkg/server/router.go +++ b/pkg/server/router.go @@ -332,8 +332,9 @@ type nodePassBootstrapper func(req *http.Request) (string, int, error) func passwordBootstrap(ctx context.Context, config *Config) nodePassBootstrapper { runtime := config.ControlConfig.Runtime + deferredNodes := map[string]bool{} var secretClient coreclient.SecretClient - var once sync.Once + var mu sync.Mutex return nodePassBootstrapper(func(req *http.Request) (string, int, error) { nodeName, nodePassword, err := getNodeInfo(req) @@ -347,7 +348,10 @@ func passwordBootstrap(ctx context.Context, config *Config) nodePassBootstrapper secretClient = runtime.Core.Core().V1().Secret() } else if nodeName == os.Getenv("NODE_NAME") { // or verify the password locally and ensure a secret later - return verifyLocalPassword(ctx, config, &once, nodeName, nodePassword) + return verifyLocalPassword(ctx, config, &mu, deferredNodes, nodeName, nodePassword) + } else if config.ControlConfig.DisableAPIServer { + // or defer node password verification until an apiserver joins the cluster + return verifyRemotePassword(ctx, config, &mu, deferredNodes, nodeName, nodePassword) } else { // or reject the request until the core is ready return "", http.StatusServiceUnavailable, errors.New("runtime core not ready") @@ -362,7 +366,7 @@ func passwordBootstrap(ctx context.Context, config *Config) nodePassBootstrapper }) } -func verifyLocalPassword(ctx context.Context, config *Config, once *sync.Once, nodeName, nodePassword string) (string, int, error) { +func verifyLocalPassword(ctx context.Context, config *Config, mu *sync.Mutex, deferredNodes map[string]bool, nodeName, nodePassword string) (string, int, error) { // use same password file location that the agent creates nodePasswordRoot := "/" if config.Rootless { @@ -381,29 +385,46 @@ func verifyLocalPassword(ctx context.Context, config *Config, once *sync.Once, n return "", http.StatusForbidden, errors.Wrapf(err, "unable to verify local password for node '%s'", nodeName) } - // make sure the secret is created when the api server is ready - ensureSecret := func() { - runtime := config.ControlConfig.Runtime - for { - select { - case <-ctx.Done(): - return - case <-time.After(1 * time.Second): - if runtime.Core != nil { - logrus.Debugf("runtime core has become available, ensuring password secret for node '%s'", nodeName) - secretClient := runtime.Core.Core().V1().Secret() - if err := nodepassword.Ensure(secretClient, nodeName, nodePassword); err != nil { - logrus.Warnf("error ensuring node password secret for pre-validated node '%s': %v", nodeName, err) - } - return - } - } - } + mu.Lock() + defer mu.Unlock() + + if _, ok := deferredNodes[nodeName]; !ok { + deferredNodes[nodeName] = true + go ensureSecret(ctx, config, nodeName, nodePassword) + logrus.Debugf("Password verified locally for node '%s'", nodeName) } - go once.Do(ensureSecret) - - logrus.Debugf("password verified locally for node '%s'", nodeName) - return nodeName, http.StatusOK, nil } + +func verifyRemotePassword(ctx context.Context, config *Config, mu *sync.Mutex, deferredNodes map[string]bool, nodeName, nodePassword string) (string, int, error) { + mu.Lock() + defer mu.Unlock() + + if _, ok := deferredNodes[nodeName]; !ok { + deferredNodes[nodeName] = true + go ensureSecret(ctx, config, nodeName, nodePassword) + logrus.Debugf("Password verification deferred for node '%s'", nodeName) + } + + return nodeName, http.StatusOK, nil +} + +func ensureSecret(ctx context.Context, config *Config, nodeName, nodePassword string) { + runtime := config.ControlConfig.Runtime + for { + select { + case <-ctx.Done(): + return + case <-time.After(1 * time.Second): + if runtime.Core != nil { + logrus.Debugf("Runtime core has become available, ensuring password secret for node '%s'", nodeName) + secretClient := runtime.Core.Core().V1().Secret() + if err := nodepassword.Ensure(secretClient, nodeName, nodePassword); err != nil { + logrus.Warnf("Error ensuring node password secret for pre-validated node '%s': %v", nodeName, err) + } + return + } + } + } +}