From 5cb2c254e5f29ebf5511819e0e88c213b8abe0f3 Mon Sep 17 00:00:00 2001 From: Brad Davidson Date: Thu, 24 Mar 2022 12:23:59 -0700 Subject: [PATCH] Fix etcd-only secrets encryption rotation Improve feedback when running secrets-encrypt commands on etcd-only nodes, and allow etcd-only nodes to properly restart when effecting rotation. Signed-off-by: Brad Davidson (cherry picked from commit e811689df9d1754e7b537b2cebc3f4927f15ea9d) --- pkg/cli/secretsencrypt/secrets_encrypt.go | 17 +++++---- pkg/cli/server/server.go | 12 ++++++- pkg/secretsencrypt/controller.go | 9 ++--- pkg/server/secrets-encrypt.go | 44 ++++++++++------------- 4 files changed, 46 insertions(+), 36 deletions(-) diff --git a/pkg/cli/secretsencrypt/secrets_encrypt.go b/pkg/cli/secretsencrypt/secrets_encrypt.go index 8b4ace5791..b6bbe8a445 100644 --- a/pkg/cli/secretsencrypt/secrets_encrypt.go +++ b/pkg/cli/secretsencrypt/secrets_encrypt.go @@ -11,6 +11,7 @@ import ( "text/tabwriter" "github.com/erikdubbelboer/gspt" + "github.com/pkg/errors" "github.com/rancher/k3s/pkg/cli/cmds" "github.com/rancher/k3s/pkg/clientaccess" "github.com/rancher/k3s/pkg/secretsencrypt" @@ -41,6 +42,10 @@ func commandPrep(app *cli.Context, cfg *cmds.Server) (*clientaccess.Info, error) return clientaccess.ParseAndValidateTokenForUser(cmds.ServerConfig.ServerURL, cfg.Token, "server") } +func wrapServerError(err error) error { + return errors.Wrap(err, "see server log for details") +} + func Enable(app *cli.Context) error { var err error if err = cmds.InitLogging(); err != nil { @@ -55,7 +60,7 @@ func Enable(app *cli.Context) error { return err } if err = info.Put("/v1-"+version.Program+"/encrypt/config", b); err != nil { - return err + return wrapServerError(err) } fmt.Println("secrets-encryption enabled") return nil @@ -75,7 +80,7 @@ func Disable(app *cli.Context) error { return err } if err = info.Put("/v1-"+version.Program+"/encrypt/config", b); err != nil { - return err + return wrapServerError(err) } fmt.Println("secrets-encryption disabled") return nil @@ -91,7 +96,7 @@ func Status(app *cli.Context) error { } data, err := info.Get("/v1-" + version.Program + "/encrypt/status") if err != nil { - return err + return wrapServerError(err) } status := server.EncryptionState{} if err := json.Unmarshal(data, &status); err != nil { @@ -159,7 +164,7 @@ func Prepare(app *cli.Context) error { return err } if err = info.Put("/v1-"+version.Program+"/encrypt/config", b); err != nil { - return err + return wrapServerError(err) } fmt.Println("prepare completed successfully") return nil @@ -181,7 +186,7 @@ func Rotate(app *cli.Context) error { return err } if err = info.Put("/v1-"+version.Program+"/encrypt/config", b); err != nil { - return err + return wrapServerError(err) } fmt.Println("rotate completed successfully") return nil @@ -205,7 +210,7 @@ func Reencrypt(app *cli.Context) error { return err } if err = info.Put("/v1-"+version.Program+"/encrypt/config", b); err != nil { - return err + return wrapServerError(err) } fmt.Println("reencryption started") return nil diff --git a/pkg/cli/server/server.go b/pkg/cli/server/server.go index 9c5b02d2ab..afa0fa9a81 100644 --- a/pkg/cli/server/server.go +++ b/pkg/cli/server/server.go @@ -471,7 +471,17 @@ func run(app *cli.Context, cfg *cmds.Server, leaderControllers server.CustomCont } if serverConfig.ControlConfig.DisableAPIServer { - if cfg.ServerURL != "" { + if cfg.ServerURL == "" { + // If this node is the initial member of the cluster and is not hosting an apiserver, + // always bootstrap the agent off local supervisor, and go through the process of reading + // apiserver endpoints from etcd and blocking further startup until one is available. + // This ensures that we don't end up in a chicken-and-egg situation on cluster restarts, + // where the loadbalancer is routing traffic to existing apiservers, but the apiservers + // are non-functional because they're waiting for us to start etcd. + loadbalancer.ResetLoadBalancer(filepath.Join(agentConfig.DataDir, "agent"), loadbalancer.SupervisorServiceName) + } else { + // If this is a secondary member of the cluster and is not hosting an apiserver, + // bootstrap the agent off the existing supervisor, instead of bootstrapping locally. agentConfig.ServerURL = cfg.ServerURL } // initialize the apiAddress Channel for receiving the api address from etcd diff --git a/pkg/secretsencrypt/controller.go b/pkg/secretsencrypt/controller.go index 45fcb02e4b..4c2372041a 100644 --- a/pkg/secretsencrypt/controller.go +++ b/pkg/secretsencrypt/controller.go @@ -12,6 +12,7 @@ import ( "github.com/sirupsen/logrus" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/kubernetes" @@ -25,6 +26,7 @@ const ( secretsProgressEvent string = "SecretsProgress" secretsUpdateCompleteEvent string = "SecretsUpdateComplete" secretsUpdateErrorEvent string = "SecretsUpdateError" + controlPlaneRoleLabelKey string = "node-role.kubernetes.io/control-plane" ) type handler struct { @@ -130,7 +132,6 @@ func (h *handler) onChangeNode(key string, node *corev1.Node) (*corev1.Node, err // validateReencryptStage ensures that the request for reencryption is valid and // that there is only one active reencryption at a time func (h *handler) validateReencryptStage(node *corev1.Node, annotation string) (bool, error) { - split := strings.Split(annotation, "-") if len(split) != 2 { err := fmt.Errorf("invalid annotation %s found on node %s", annotation, node.ObjectMeta.Name) @@ -149,12 +150,12 @@ func (h *handler) validateReencryptStage(node *corev1.Node, annotation string) ( err = fmt.Errorf("invalid hash: %s found on node %s", hash, node.ObjectMeta.Name) return false, err } - - nodes, err := h.nodes.List(metav1.ListOptions{}) + reencryptActiveHash, err := GenReencryptHash(h.controlConfig.Runtime, EncryptionReencryptActive) if err != nil { return false, err } - reencryptActiveHash, err := GenReencryptHash(h.controlConfig.Runtime, EncryptionReencryptActive) + labelSelector := labels.Set{controlPlaneRoleLabelKey: "true"}.String() + nodes, err := h.nodes.List(metav1.ListOptions{LabelSelector: labelSelector}) if err != nil { return false, err } diff --git a/pkg/server/secrets-encrypt.go b/pkg/server/secrets-encrypt.go index dff66836e1..f1d153f5e5 100644 --- a/pkg/server/secrets-encrypt.go +++ b/pkg/server/secrets-encrypt.go @@ -18,8 +18,8 @@ import ( "github.com/rancher/k3s/pkg/secretsencrypt" "github.com/rancher/wrangler/pkg/generated/controllers/core" "github.com/sirupsen/logrus" - corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" apiserverconfigv1 "k8s.io/apiserver/pkg/apis/config/v1" "k8s.io/utils/pointer" ) @@ -300,40 +300,31 @@ func getEncryptionHashAnnotation(core core.Interface) (string, string, error) { if err != nil { return "", "", err } - ann := node.Annotations[secretsencrypt.EncryptionHashAnnotation] - split := strings.Split(ann, "-") - if len(split) != 2 { - return "", "", fmt.Errorf("invalid annotation %s found on node %s", ann, node.ObjectMeta.Name) + if _, ok := node.Labels[ControlPlaneRoleLabelKey]; !ok { + return "", "", fmt.Errorf("cannot manage secrets encryption on non control-plane node %s", nodeName) } - return split[0], split[1], nil -} - -func getServerNodes(core core.Interface) ([]corev1.Node, error) { - nodes, err := core.V1().Node().List(metav1.ListOptions{}) - if err != nil { - return nil, err - } - var serverNodes []corev1.Node - for _, node := range nodes.Items { - if v, ok := node.Labels[ControlPlaneRoleLabelKey]; ok && v == "true" { - serverNodes = append(serverNodes, node) + if ann, ok := node.Annotations[secretsencrypt.EncryptionHashAnnotation]; ok { + split := strings.Split(ann, "-") + if len(split) != 2 { + return "", "", fmt.Errorf("invalid annotation %s found on node %s", ann, nodeName) } + return split[0], split[1], nil } - return serverNodes, nil + return "", "", fmt.Errorf("missing annotation on node %s", nodeName) } // verifyEncryptionHashAnnotation checks that all nodes are on the same stage, // and that a request for new stage is valid func verifyEncryptionHashAnnotation(runtime *config.ControlRuntime, core core.Interface, prevStage string) error { - var firstHash string var firstNodeName string first := true - serverNodes, err := getServerNodes(core) + labelSelector := labels.Set{ControlPlaneRoleLabelKey: "true"}.String() + nodes, err := core.V1().Node().List(metav1.ListOptions{LabelSelector: labelSelector}) if err != nil { return err } - for _, node := range serverNodes { + for _, node := range nodes.Items { hash, ok := node.Annotations[secretsencrypt.EncryptionHashAnnotation] if ok && first { firstHash = hash @@ -358,14 +349,17 @@ func verifyEncryptionHashAnnotation(runtime *config.ControlRuntime, core core.In return err } if !strings.Contains(prevStage, oldStage) { - return fmt.Errorf("incorrect stage: %s found on node %s", oldStage, serverNodes[0].ObjectMeta.Name) + return fmt.Errorf("incorrect stage: %s found on node %s", oldStage, nodes.Items[0].ObjectMeta.Name) } else if oldHash != encryptionConfigHash { - return fmt.Errorf("invalid hash: %s found on node %s", oldHash, serverNodes[0].ObjectMeta.Name) + return fmt.Errorf("invalid hash: %s found on node %s", oldHash, nodes.Items[0].ObjectMeta.Name) } return nil } +// genErrorMessage sends and logs a random error ID so that logs can be correlated +// between the REST API (which does not provide any detailed error output, to avoid +// information disclosure) and the server logs. func genErrorMessage(resp http.ResponseWriter, statusCode int, passedErr error) { errID, err := rand.Int(rand.Reader, big.NewInt(99999)) if err != nil { @@ -373,7 +367,7 @@ func genErrorMessage(resp http.ResponseWriter, statusCode int, passedErr error) resp.Write([]byte(err.Error())) return } - logrus.Warnf("secrets-encrypt-%s: %s", errID.String(), passedErr.Error()) + logrus.Warnf("secrets-encrypt error ID %05d: %s", errID, passedErr.Error()) resp.WriteHeader(statusCode) - resp.Write([]byte(fmt.Sprintf("error secrets-encrypt-%s: see server logs for more info", errID.String()))) + resp.Write([]byte(fmt.Sprintf("secrets-encrypt error ID %05d", errID))) }