mirror of https://github.com/k3s-io/k3s
Soft-fail on node password verification if the secret cannot be created
Allows nodes to join the cluster during a webhook outage. This also enhances auditability by creating Kubernetes events for the deferred verification. Signed-off-by: Brad Davidson <brad.davidson@rancher.com>pull/7681/head
parent
b0188f5a13
commit
45d8c1a1a2
|
@ -16,6 +16,7 @@ import (
|
|||
"github.com/rancher/wrangler/pkg/leader"
|
||||
utilnet "k8s.io/apimachinery/pkg/util/net"
|
||||
"k8s.io/apiserver/pkg/authentication/authenticator"
|
||||
"k8s.io/client-go/tools/record"
|
||||
utilsnet "k8s.io/utils/net"
|
||||
)
|
||||
|
||||
|
@ -340,6 +341,7 @@ type ControlRuntime struct {
|
|||
ClientETCDKey string
|
||||
|
||||
Core *core.Factory
|
||||
Event record.EventRecorder
|
||||
EtcdConfig endpoint.ETCDConfig
|
||||
}
|
||||
|
||||
|
|
|
@ -15,13 +15,38 @@ import (
|
|||
v1 "k8s.io/api/core/v1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/utils/pointer"
|
||||
)
|
||||
|
||||
var (
|
||||
// Hasher provides the algorithm for generating and verifying hashes
|
||||
Hasher = hash.NewSCrypt()
|
||||
Hasher = hash.NewSCrypt()
|
||||
ErrVerifyFailed = errVerifyFailed()
|
||||
)
|
||||
|
||||
type passwordError struct {
|
||||
node string
|
||||
err error
|
||||
}
|
||||
|
||||
func (e *passwordError) Error() string {
|
||||
return fmt.Sprintf("unable to verify password for node %s: %v", e.node, e.err)
|
||||
}
|
||||
|
||||
func (e *passwordError) Is(target error) bool {
|
||||
switch target {
|
||||
case ErrVerifyFailed:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (e *passwordError) Unwrap() error {
|
||||
return e.err
|
||||
}
|
||||
|
||||
func errVerifyFailed() error { return &passwordError{} }
|
||||
|
||||
func getSecretName(nodeName string) string {
|
||||
return strings.ToLower(nodeName + ".node-password." + version.Program)
|
||||
}
|
||||
|
@ -30,39 +55,34 @@ func verifyHash(secretClient coreclient.SecretClient, nodeName, pass string) err
|
|||
name := getSecretName(nodeName)
|
||||
secret, err := secretClient.Get(metav1.NamespaceSystem, name, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
return err
|
||||
return &passwordError{node: nodeName, err: err}
|
||||
}
|
||||
if hash, ok := secret.Data["hash"]; ok {
|
||||
if err := Hasher.VerifyHash(string(hash), pass); err != nil {
|
||||
return errors.Wrapf(err, "unable to verify hash for node '%s'", nodeName)
|
||||
return &passwordError{node: nodeName, err: err}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("unable to locate hash data for node secret '%s'", name)
|
||||
return &passwordError{node: nodeName, err: errors.New("password hash not found in node secret")}
|
||||
}
|
||||
|
||||
// Ensure will verify a node-password secret if it exists, otherwise it will create one
|
||||
func Ensure(secretClient coreclient.SecretClient, nodeName, pass string) error {
|
||||
if err := verifyHash(secretClient, nodeName, pass); !apierrors.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
hash, err := Hasher.CreateHash(pass)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "unable to create hash for node '%s'", nodeName)
|
||||
}
|
||||
|
||||
immutable := true
|
||||
_, err = secretClient.Create(&v1.Secret{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: getSecretName(nodeName),
|
||||
Namespace: metav1.NamespaceSystem,
|
||||
},
|
||||
Immutable: &immutable,
|
||||
Data: map[string][]byte{"hash": []byte(hash)},
|
||||
})
|
||||
if apierrors.IsAlreadyExists(err) {
|
||||
return verifyHash(secretClient, nodeName, pass)
|
||||
err := verifyHash(secretClient, nodeName, pass)
|
||||
if apierrors.IsNotFound(err) {
|
||||
var hash string
|
||||
hash, err = Hasher.CreateHash(pass)
|
||||
if err != nil {
|
||||
return &passwordError{node: nodeName, err: err}
|
||||
}
|
||||
_, err = secretClient.Create(&v1.Secret{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: getSecretName(nodeName),
|
||||
Namespace: metav1.NamespaceSystem,
|
||||
},
|
||||
Immutable: pointer.Bool(true),
|
||||
Data: map[string][]byte{"hash": []byte(hash)},
|
||||
})
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package nodepassword
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
|
@ -89,6 +90,13 @@ func Test_UnitMigrateFileNodes(t *testing.T) {
|
|||
assertNotEqual(t, Ensure(secretClient, newNode, "wrong-password"), nil)
|
||||
}
|
||||
|
||||
func Test_PasswordError(t *testing.T) {
|
||||
err := &passwordError{node: "test", err: fmt.Errorf("inner error")}
|
||||
assertEqual(t, errors.Is(err, ErrVerifyFailed), true)
|
||||
assertEqual(t, errors.Is(err, fmt.Errorf("different error")), false)
|
||||
assertNotEqual(t, errors.Unwrap(err), nil)
|
||||
}
|
||||
|
||||
// --------------------------
|
||||
|
||||
// mock secret client interface
|
||||
|
|
|
@ -16,9 +16,11 @@ import (
|
|||
"github.com/rancher/wrangler/pkg/generated/controllers/core"
|
||||
"github.com/rancher/wrangler/pkg/generated/controllers/rbac"
|
||||
"github.com/rancher/wrangler/pkg/start"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/rest"
|
||||
"k8s.io/client-go/tools/clientcmd"
|
||||
"k8s.io/client-go/tools/record"
|
||||
)
|
||||
|
||||
type Context struct {
|
||||
|
@ -29,6 +31,7 @@ type Context struct {
|
|||
Auth *rbac.Factory
|
||||
Core *core.Factory
|
||||
K8s kubernetes.Interface
|
||||
Event record.EventRecorder
|
||||
}
|
||||
|
||||
func (c *Context) Start(ctx context.Context) error {
|
||||
|
@ -58,6 +61,7 @@ func NewContext(ctx context.Context, cfg string) (*Context, error) {
|
|||
Apps: apps.NewFactoryFromConfigOrDie(restConfig),
|
||||
Batch: batch.NewFactoryFromConfigOrDie(restConfig),
|
||||
Core: core.NewFactoryFromConfigOrDie(restConfig),
|
||||
Event: util.BuildControllerEventRecorder(k8s, version.Program+"-supervisor", metav1.NamespaceAll),
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
|
|
@ -26,10 +26,13 @@ import (
|
|||
certutil "github.com/rancher/dynamiclistener/cert"
|
||||
coreclient "github.com/rancher/wrangler/pkg/generated/controllers/core/v1"
|
||||
"github.com/sirupsen/logrus"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apimachinery/pkg/util/json"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
"k8s.io/apiserver/pkg/authentication/user"
|
||||
"k8s.io/apiserver/pkg/endpoints/handlers/responsewriters"
|
||||
"k8s.io/apiserver/pkg/endpoints/request"
|
||||
|
@ -457,12 +460,23 @@ func passwordBootstrap(ctx context.Context, config *Config) nodePassBootstrapper
|
|||
}
|
||||
}
|
||||
|
||||
// verify that the node exists, if using Node Identity auth
|
||||
if err := verifyNode(ctx, nodeClient, node); err != nil {
|
||||
return "", http.StatusUnauthorized, err
|
||||
}
|
||||
|
||||
// verify that the node password secret matches, or create it if it does not
|
||||
if err := nodepassword.Ensure(secretClient, node.Name, node.Password); err != nil {
|
||||
return "", http.StatusForbidden, err
|
||||
// if the verification failed, reject the request
|
||||
if errors.Is(err, nodepassword.ErrVerifyFailed) {
|
||||
return "", http.StatusForbidden, err
|
||||
}
|
||||
// If verification failed due to an error creating the node password secret, allow
|
||||
// the request, but retry verification until the outage is resolved. This behavior
|
||||
// allows nodes to join the cluster during outages caused by validating webhooks
|
||||
// blocking secret creation - if the outage requires new nodes to join in order to
|
||||
// run the webhook pods, we must fail open here to resolve the outage.
|
||||
return verifyRemotePassword(ctx, config, &mu, deferredNodes, node)
|
||||
}
|
||||
|
||||
return node.Name, http.StatusOK, nil
|
||||
|
@ -489,7 +503,7 @@ func verifyLocalPassword(ctx context.Context, config *Config, mu *sync.Mutex, de
|
|||
}
|
||||
|
||||
if err := nodepassword.Hasher.VerifyHash(passHash, node.Password); err != nil {
|
||||
return "", http.StatusForbidden, errors.Wrapf(err, "unable to verify local password for node '%s'", node.Name)
|
||||
return "", http.StatusForbidden, errors.Wrap(err, "unable to verify local node password")
|
||||
}
|
||||
|
||||
mu.Lock()
|
||||
|
@ -498,7 +512,7 @@ func verifyLocalPassword(ctx context.Context, config *Config, mu *sync.Mutex, de
|
|||
if _, ok := deferredNodes[node.Name]; !ok {
|
||||
deferredNodes[node.Name] = true
|
||||
go ensureSecret(ctx, config, node)
|
||||
logrus.Debugf("Password verified locally for node '%s'", node.Name)
|
||||
logrus.Infof("Password verified locally for node %s", node.Name)
|
||||
}
|
||||
|
||||
return node.Name, http.StatusOK, nil
|
||||
|
@ -511,7 +525,7 @@ func verifyRemotePassword(ctx context.Context, config *Config, mu *sync.Mutex, d
|
|||
if _, ok := deferredNodes[node.Name]; !ok {
|
||||
deferredNodes[node.Name] = true
|
||||
go ensureSecret(ctx, config, node)
|
||||
logrus.Debugf("Password verification deferred for node '%s'", node.Name)
|
||||
logrus.Infof("Password verification deferred for node %s", node.Name)
|
||||
}
|
||||
|
||||
return node.Name, http.StatusOK, nil
|
||||
|
@ -528,19 +542,25 @@ func verifyNode(ctx context.Context, nodeClient coreclient.NodeClient, node *nod
|
|||
|
||||
func ensureSecret(ctx context.Context, config *Config, node *nodeInfo) {
|
||||
runtime := config.ControlConfig.Runtime
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-time.After(1 * time.Second):
|
||||
if runtime.Core != nil {
|
||||
logrus.Debugf("Runtime core has become available, ensuring password secret for node '%s'", node.Name)
|
||||
secretClient := runtime.Core.Core().V1().Secret()
|
||||
if err := nodepassword.Ensure(secretClient, node.Name, node.Password); err != nil {
|
||||
logrus.Warnf("Error ensuring node password secret for pre-validated node '%s': %v", node.Name, err)
|
||||
}
|
||||
return
|
||||
wait.PollImmediateUntilWithContext(ctx, time.Second*5, func(ctx context.Context) (bool, error) {
|
||||
if runtime.Core != nil {
|
||||
secretClient := runtime.Core.Core().V1().Secret()
|
||||
// This is consistent with events attached to the node generated by the kubelet
|
||||
// https://github.com/kubernetes/kubernetes/blob/612130dd2f4188db839ea5c2dea07a96b0ad8d1c/pkg/kubelet/kubelet.go#L479-L485
|
||||
nodeRef := &corev1.ObjectReference{
|
||||
Kind: "Node",
|
||||
Name: node.Name,
|
||||
UID: types.UID(node.Name),
|
||||
Namespace: "",
|
||||
}
|
||||
if err := nodepassword.Ensure(secretClient, node.Name, node.Password); err != nil {
|
||||
runtime.Event.Eventf(nodeRef, corev1.EventTypeWarning, "NodePasswordValidationFailed", "Deferred node password secret validation failed: %v", err)
|
||||
// Return true to stop polling if the password verification failed; only retry on secret creation errors.
|
||||
return errors.Is(err, nodepassword.ErrVerifyFailed), nil
|
||||
}
|
||||
runtime.Event.Event(nodeRef, corev1.EventTypeNormal, "NodePasswordValidationComplete", "Deferred node password secret validation complete")
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
})
|
||||
}
|
||||
|
|
|
@ -120,6 +120,7 @@ func runControllers(ctx context.Context, config *Config) error {
|
|||
controlConfig.Runtime.NodePasswdFile); err != nil {
|
||||
logrus.Warn(errors.Wrap(err, "error migrating node-password file"))
|
||||
}
|
||||
controlConfig.Runtime.Event = sc.Event
|
||||
controlConfig.Runtime.Core = sc.Core
|
||||
|
||||
for name, cb := range controlConfig.Runtime.ClusterControllerStarts {
|
||||
|
|
Loading…
Reference in New Issue