Fix etcd-only secrets encryption rotation

Improve feedback when running secrets-encrypt commands on etcd-only nodes, and
allow etcd-only nodes to properly restart when effecting rotation.

Signed-off-by: Brad Davidson <brad.davidson@rancher.com>
pull/5343/head v1.23.5-rc3+k3s1
Brad Davidson 2022-03-24 12:23:59 -07:00 committed by Brad Davidson
parent dd541e8557
commit e811689df9
4 changed files with 46 additions and 36 deletions

View File

@ -16,6 +16,7 @@ import (
"github.com/k3s-io/k3s/pkg/secretsencrypt"
"github.com/k3s-io/k3s/pkg/server"
"github.com/k3s-io/k3s/pkg/version"
"github.com/pkg/errors"
"github.com/urfave/cli"
"k8s.io/utils/pointer"
)
@ -41,6 +42,10 @@ func commandPrep(app *cli.Context, cfg *cmds.Server) (*clientaccess.Info, error)
return clientaccess.ParseAndValidateTokenForUser(cmds.ServerConfig.ServerURL, cfg.Token, "server")
}
func wrapServerError(err error) error {
return errors.Wrap(err, "see server log for details")
}
func Enable(app *cli.Context) error {
var err error
if err = cmds.InitLogging(); err != nil {
@ -55,7 +60,7 @@ func Enable(app *cli.Context) error {
return err
}
if err = info.Put("/v1-"+version.Program+"/encrypt/config", b); err != nil {
return err
return wrapServerError(err)
}
fmt.Println("secrets-encryption enabled")
return nil
@ -75,7 +80,7 @@ func Disable(app *cli.Context) error {
return err
}
if err = info.Put("/v1-"+version.Program+"/encrypt/config", b); err != nil {
return err
return wrapServerError(err)
}
fmt.Println("secrets-encryption disabled")
return nil
@ -91,7 +96,7 @@ func Status(app *cli.Context) error {
}
data, err := info.Get("/v1-" + version.Program + "/encrypt/status")
if err != nil {
return err
return wrapServerError(err)
}
status := server.EncryptionState{}
if err := json.Unmarshal(data, &status); err != nil {
@ -159,7 +164,7 @@ func Prepare(app *cli.Context) error {
return err
}
if err = info.Put("/v1-"+version.Program+"/encrypt/config", b); err != nil {
return err
return wrapServerError(err)
}
fmt.Println("prepare completed successfully")
return nil
@ -181,7 +186,7 @@ func Rotate(app *cli.Context) error {
return err
}
if err = info.Put("/v1-"+version.Program+"/encrypt/config", b); err != nil {
return err
return wrapServerError(err)
}
fmt.Println("rotate completed successfully")
return nil
@ -205,7 +210,7 @@ func Reencrypt(app *cli.Context) error {
return err
}
if err = info.Put("/v1-"+version.Program+"/encrypt/config", b); err != nil {
return err
return wrapServerError(err)
}
fmt.Println("reencryption started")
return nil

View File

@ -488,7 +488,17 @@ func run(app *cli.Context, cfg *cmds.Server, leaderControllers server.CustomCont
}
if serverConfig.ControlConfig.DisableAPIServer {
if cfg.ServerURL != "" {
if cfg.ServerURL == "" {
// If this node is the initial member of the cluster and is not hosting an apiserver,
// always bootstrap the agent off local supervisor, and go through the process of reading
// apiserver endpoints from etcd and blocking further startup until one is available.
// This ensures that we don't end up in a chicken-and-egg situation on cluster restarts,
// where the loadbalancer is routing traffic to existing apiservers, but the apiservers
// are non-functional because they're waiting for us to start etcd.
loadbalancer.ResetLoadBalancer(filepath.Join(agentConfig.DataDir, "agent"), loadbalancer.SupervisorServiceName)
} else {
// If this is a secondary member of the cluster and is not hosting an apiserver,
// bootstrap the agent off the existing supervisor, instead of bootstrapping locally.
agentConfig.ServerURL = cfg.ServerURL
}
// initialize the apiAddress Channel for receiving the api address from etcd

View File

@ -12,6 +12,7 @@ import (
"github.com/sirupsen/logrus"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes"
@ -25,6 +26,7 @@ const (
secretsProgressEvent string = "SecretsProgress"
secretsUpdateCompleteEvent string = "SecretsUpdateComplete"
secretsUpdateErrorEvent string = "SecretsUpdateError"
controlPlaneRoleLabelKey string = "node-role.kubernetes.io/control-plane"
)
type handler struct {
@ -139,7 +141,6 @@ func (h *handler) onChangeNode(key string, node *corev1.Node) (*corev1.Node, err
// validateReencryptStage ensures that the request for reencryption is valid and
// that there is only one active reencryption at a time
func (h *handler) validateReencryptStage(node *corev1.Node, annotation string) (bool, error) {
split := strings.Split(annotation, "-")
if len(split) != 2 {
err := fmt.Errorf("invalid annotation %s found on node %s", annotation, node.ObjectMeta.Name)
@ -158,12 +159,12 @@ func (h *handler) validateReencryptStage(node *corev1.Node, annotation string) (
err = fmt.Errorf("invalid hash: %s found on node %s", hash, node.ObjectMeta.Name)
return false, err
}
nodes, err := h.nodes.List(metav1.ListOptions{})
reencryptActiveHash, err := GenReencryptHash(h.controlConfig.Runtime, EncryptionReencryptActive)
if err != nil {
return false, err
}
reencryptActiveHash, err := GenReencryptHash(h.controlConfig.Runtime, EncryptionReencryptActive)
labelSelector := labels.Set{controlPlaneRoleLabelKey: "true"}.String()
nodes, err := h.nodes.List(metav1.ListOptions{LabelSelector: labelSelector})
if err != nil {
return false, err
}

View File

@ -18,8 +18,8 @@ import (
"github.com/k3s-io/k3s/pkg/secretsencrypt"
"github.com/rancher/wrangler/pkg/generated/controllers/core"
"github.com/sirupsen/logrus"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
apiserverconfigv1 "k8s.io/apiserver/pkg/apis/config/v1"
"k8s.io/utils/pointer"
)
@ -300,40 +300,31 @@ func getEncryptionHashAnnotation(core core.Interface) (string, string, error) {
if err != nil {
return "", "", err
}
ann := node.Annotations[secretsencrypt.EncryptionHashAnnotation]
split := strings.Split(ann, "-")
if len(split) != 2 {
return "", "", fmt.Errorf("invalid annotation %s found on node %s", ann, node.ObjectMeta.Name)
if _, ok := node.Labels[ControlPlaneRoleLabelKey]; !ok {
return "", "", fmt.Errorf("cannot manage secrets encryption on non control-plane node %s", nodeName)
}
return split[0], split[1], nil
}
func getServerNodes(core core.Interface) ([]corev1.Node, error) {
nodes, err := core.V1().Node().List(metav1.ListOptions{})
if err != nil {
return nil, err
}
var serverNodes []corev1.Node
for _, node := range nodes.Items {
if v, ok := node.Labels[ControlPlaneRoleLabelKey]; ok && v == "true" {
serverNodes = append(serverNodes, node)
if ann, ok := node.Annotations[secretsencrypt.EncryptionHashAnnotation]; ok {
split := strings.Split(ann, "-")
if len(split) != 2 {
return "", "", fmt.Errorf("invalid annotation %s found on node %s", ann, nodeName)
}
return split[0], split[1], nil
}
return serverNodes, nil
return "", "", fmt.Errorf("missing annotation on node %s", nodeName)
}
// verifyEncryptionHashAnnotation checks that all nodes are on the same stage,
// and that a request for new stage is valid
func verifyEncryptionHashAnnotation(runtime *config.ControlRuntime, core core.Interface, prevStage string) error {
var firstHash string
var firstNodeName string
first := true
serverNodes, err := getServerNodes(core)
labelSelector := labels.Set{ControlPlaneRoleLabelKey: "true"}.String()
nodes, err := core.V1().Node().List(metav1.ListOptions{LabelSelector: labelSelector})
if err != nil {
return err
}
for _, node := range serverNodes {
for _, node := range nodes.Items {
hash, ok := node.Annotations[secretsencrypt.EncryptionHashAnnotation]
if ok && first {
firstHash = hash
@ -358,14 +349,17 @@ func verifyEncryptionHashAnnotation(runtime *config.ControlRuntime, core core.In
return err
}
if !strings.Contains(prevStage, oldStage) {
return fmt.Errorf("incorrect stage: %s found on node %s", oldStage, serverNodes[0].ObjectMeta.Name)
return fmt.Errorf("incorrect stage: %s found on node %s", oldStage, nodes.Items[0].ObjectMeta.Name)
} else if oldHash != encryptionConfigHash {
return fmt.Errorf("invalid hash: %s found on node %s", oldHash, serverNodes[0].ObjectMeta.Name)
return fmt.Errorf("invalid hash: %s found on node %s", oldHash, nodes.Items[0].ObjectMeta.Name)
}
return nil
}
// genErrorMessage sends and logs a random error ID so that logs can be correlated
// between the REST API (which does not provide any detailed error output, to avoid
// information disclosure) and the server logs.
func genErrorMessage(resp http.ResponseWriter, statusCode int, passedErr error) {
errID, err := rand.Int(rand.Reader, big.NewInt(99999))
if err != nil {
@ -373,7 +367,7 @@ func genErrorMessage(resp http.ResponseWriter, statusCode int, passedErr error)
resp.Write([]byte(err.Error()))
return
}
logrus.Warnf("secrets-encrypt-%s: %s", errID.String(), passedErr.Error())
logrus.Warnf("secrets-encrypt error ID %05d: %s", errID, passedErr.Error())
resp.WriteHeader(statusCode)
resp.Write([]byte(fmt.Sprintf("error secrets-encrypt-%s: see server logs for more info", errID.String())))
resp.Write([]byte(fmt.Sprintf("secrets-encrypt error ID %05d", errID)))
}