mirror of https://github.com/k3s-io/k3s
228 lines
9.0 KiB
Go
228 lines
9.0 KiB
Go
package nodepassword
|
|
|
|
import (
|
|
"context"
|
|
"net/http"
|
|
"os"
|
|
"path"
|
|
"path/filepath"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/gorilla/mux"
|
|
"github.com/k3s-io/k3s/pkg/daemons/config"
|
|
"github.com/k3s-io/k3s/pkg/util"
|
|
"github.com/pkg/errors"
|
|
coreclient "github.com/rancher/wrangler/v3/pkg/generated/controllers/core/v1"
|
|
"github.com/sirupsen/logrus"
|
|
corev1 "k8s.io/api/core/v1"
|
|
"k8s.io/apimachinery/pkg/types"
|
|
"k8s.io/apimachinery/pkg/util/wait"
|
|
"k8s.io/apiserver/pkg/authentication/user"
|
|
"k8s.io/apiserver/pkg/endpoints/request"
|
|
"k8s.io/kubernetes/pkg/auth/nodeidentifier"
|
|
)
|
|
|
|
var identifier = nodeidentifier.NewDefaultNodeIdentifier()
|
|
|
|
// NodeAuthValidator returns a node name, or http error code and error
|
|
type NodeAuthValidator func(req *http.Request) (string, int, error)
|
|
|
|
// nodeInfo contains information on the requesting node, derived from auth creds
|
|
// and request headers.
|
|
type nodeInfo struct {
|
|
Name string
|
|
Password string
|
|
User user.Info
|
|
}
|
|
|
|
// GetNodeAuthValidator returns a function that will be called to validate node password authentication.
|
|
// Node password authentication is used when requesting kubelet certificates, and verifies that the
|
|
// credentials are valid for the requested node name, and that the node password is valid if it exists.
|
|
// These checks prevent a user with access to one agent from requesting kubelet certificates that
|
|
// could be used to impersonate another cluster member.
|
|
func GetNodeAuthValidator(ctx context.Context, control *config.Control) NodeAuthValidator {
|
|
runtime := control.Runtime
|
|
deferredNodes := map[string]bool{}
|
|
var secretClient coreclient.SecretController
|
|
var nodeClient coreclient.NodeController
|
|
var mu sync.Mutex
|
|
|
|
return func(req *http.Request) (string, int, error) {
|
|
node, err := getNodeInfo(req)
|
|
if err != nil {
|
|
return "", http.StatusBadRequest, err
|
|
}
|
|
|
|
// node identity auth uses an existing kubelet client cert instead of auth token.
|
|
// If used, validate that the node identity matches the requested node name.
|
|
nodeName, isNodeAuth := identifier.NodeIdentity(node.User)
|
|
if isNodeAuth && nodeName != node.Name {
|
|
return "", http.StatusBadRequest, errors.New("header node name does not match auth node name")
|
|
}
|
|
|
|
if secretClient == nil || nodeClient == nil {
|
|
if runtime.Core != nil {
|
|
// initialize the client if we can
|
|
secretClient = runtime.Core.Core().V1().Secret()
|
|
nodeClient = runtime.Core.Core().V1().Node()
|
|
} else if node.Name == os.Getenv("NODE_NAME") {
|
|
// If we're verifying our own password, verify it locally and ensure a secret later.
|
|
return verifyLocalPassword(ctx, control, &mu, deferredNodes, node)
|
|
} else if control.DisableAPIServer && !isNodeAuth {
|
|
// If we're running on an etcd-only node, and the request didn't use Node Identity auth,
|
|
// defer node password verification until an apiserver joins the cluster.
|
|
return verifyRemotePassword(ctx, control, &mu, deferredNodes, node)
|
|
} else {
|
|
// Otherwise, reject the request until the core is ready.
|
|
return "", http.StatusServiceUnavailable, util.ErrCoreNotReady
|
|
}
|
|
}
|
|
|
|
// verify that the node exists, if using Node Identity auth
|
|
if err := verifyNode(ctx, nodeClient, node); err != nil {
|
|
return "", http.StatusUnauthorized, err
|
|
}
|
|
|
|
// verify that the node password secret matches, or create it if it does not
|
|
if err := Ensure(secretClient, node.Name, node.Password); err != nil {
|
|
// if the verification failed, reject the request
|
|
if errors.Is(err, ErrVerifyFailed) {
|
|
return "", http.StatusForbidden, err
|
|
}
|
|
// If verification failed due to an error creating the node password secret, allow
|
|
// the request, but retry verification until the outage is resolved. This behavior
|
|
// allows nodes to join the cluster during outages caused by validating webhooks
|
|
// blocking secret creation - if the outage requires new nodes to join in order to
|
|
// run the webhook pods, we must fail open here to resolve the outage.
|
|
return verifyRemotePassword(ctx, control, &mu, deferredNodes, node)
|
|
}
|
|
|
|
return node.Name, http.StatusOK, nil
|
|
}
|
|
}
|
|
|
|
// getNodeInfo returns node name, password, and user extracted
|
|
// from request headers and context. An error is returned
|
|
// if any critical fields are missing.
|
|
func getNodeInfo(req *http.Request) (*nodeInfo, error) {
|
|
user, ok := request.UserFrom(req.Context())
|
|
if !ok {
|
|
return nil, errors.New("auth user not set")
|
|
}
|
|
|
|
program := mux.Vars(req)["program"]
|
|
nodeName := req.Header.Get(program + "-Node-Name")
|
|
if nodeName == "" {
|
|
return nil, errors.New("node name not set")
|
|
}
|
|
|
|
nodePassword := req.Header.Get(program + "-Node-Password")
|
|
if nodePassword == "" {
|
|
return nil, errors.New("node password not set")
|
|
}
|
|
|
|
return &nodeInfo{
|
|
Name: strings.ToLower(nodeName),
|
|
Password: nodePassword,
|
|
User: user,
|
|
}, nil
|
|
}
|
|
|
|
// verifyLocalPassword is used to validate the local node's password secret directly against the node password file, when the apiserver is unavailable.
|
|
// This is only used early in startup, when a control-plane node's agent is starting up without a functional apiserver.
|
|
func verifyLocalPassword(ctx context.Context, control *config.Control, mu *sync.Mutex, deferredNodes map[string]bool, node *nodeInfo) (string, int, error) {
|
|
// do not attempt to verify the node password if the local host is not running an agent and does not have a node resource.
|
|
// note that the agent certs and kubeconfigs are created even if the agent is disabled; the only thing that is skipped is starting the kubelet and container runtime.
|
|
if control.DisableAgent {
|
|
return node.Name, http.StatusOK, nil
|
|
}
|
|
|
|
// use same password file location that the agent creates
|
|
nodePasswordRoot := "/"
|
|
if control.Rootless {
|
|
nodePasswordRoot = filepath.Join(path.Dir(control.DataDir), "agent")
|
|
}
|
|
nodeConfigPath := filepath.Join(nodePasswordRoot, "etc", "rancher", "node")
|
|
nodePasswordFile := filepath.Join(nodeConfigPath, "password")
|
|
|
|
passBytes, err := os.ReadFile(nodePasswordFile)
|
|
if err != nil {
|
|
return "", http.StatusInternalServerError, errors.Wrap(err, "unable to read node password file")
|
|
}
|
|
|
|
passHash, err := Hasher.CreateHash(strings.TrimSpace(string(passBytes)))
|
|
if err != nil {
|
|
return "", http.StatusInternalServerError, errors.Wrap(err, "unable to hash node password file")
|
|
}
|
|
|
|
if err := Hasher.VerifyHash(passHash, node.Password); err != nil {
|
|
return "", http.StatusForbidden, errors.Wrap(err, "unable to verify local node password")
|
|
}
|
|
|
|
mu.Lock()
|
|
defer mu.Unlock()
|
|
|
|
if _, ok := deferredNodes[node.Name]; !ok {
|
|
deferredNodes[node.Name] = true
|
|
go ensureSecret(ctx, control, node)
|
|
logrus.Infof("Password verified locally for node %s", node.Name)
|
|
}
|
|
|
|
return node.Name, http.StatusOK, nil
|
|
}
|
|
|
|
// verifyRemotePassword is used when the server does not have a local apisever, as in the case of etcd-only nodes.
|
|
// The node password is ensured once an apiserver joins the cluster.
|
|
func verifyRemotePassword(ctx context.Context, control *config.Control, mu *sync.Mutex, deferredNodes map[string]bool, node *nodeInfo) (string, int, error) {
|
|
mu.Lock()
|
|
defer mu.Unlock()
|
|
|
|
if _, ok := deferredNodes[node.Name]; !ok {
|
|
deferredNodes[node.Name] = true
|
|
go ensureSecret(ctx, control, node)
|
|
logrus.Infof("Password verification deferred for node %s", node.Name)
|
|
}
|
|
|
|
return node.Name, http.StatusOK, nil
|
|
}
|
|
|
|
// verifyNode confirms that a node with the given name exists, to prevent auth
|
|
// from succeeding with a client certificate for a node that has been deleted from the cluster.
|
|
func verifyNode(ctx context.Context, nodeClient coreclient.NodeController, node *nodeInfo) error {
|
|
if nodeName, isNodeAuth := identifier.NodeIdentity(node.User); isNodeAuth {
|
|
if _, err := nodeClient.Cache().Get(nodeName); err != nil {
|
|
return errors.Wrap(err, "unable to verify node identity")
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// ensureSecret validates a server's node password secret once the apiserver is up.
|
|
// As the node has already joined the cluster at this point, this is purely informational.
|
|
func ensureSecret(ctx context.Context, control *config.Control, node *nodeInfo) {
|
|
runtime := control.Runtime
|
|
_ = wait.PollUntilContextCancel(ctx, time.Second*5, true, func(ctx context.Context) (bool, error) {
|
|
if runtime.Core != nil {
|
|
secretClient := runtime.Core.Core().V1().Secret()
|
|
// This is consistent with events attached to the node generated by the kubelet
|
|
// https://github.com/kubernetes/kubernetes/blob/612130dd2f4188db839ea5c2dea07a96b0ad8d1c/pkg/kubelet/kubelet.go#L479-L485
|
|
nodeRef := &corev1.ObjectReference{
|
|
Kind: "Node",
|
|
Name: node.Name,
|
|
UID: types.UID(node.Name),
|
|
Namespace: "",
|
|
}
|
|
if err := Ensure(secretClient, node.Name, node.Password); err != nil {
|
|
runtime.Event.Eventf(nodeRef, corev1.EventTypeWarning, "NodePasswordValidationFailed", "Deferred node password secret validation failed: %v", err)
|
|
// Return true to stop polling if the password verification failed; only retry on secret creation errors.
|
|
return errors.Is(err, ErrVerifyFailed), nil
|
|
}
|
|
runtime.Event.Event(nodeRef, corev1.EventTypeNormal, "NodePasswordValidationComplete", "Deferred node password secret validation complete")
|
|
return true, nil
|
|
}
|
|
return false, nil
|
|
})
|
|
}
|