|
|
|
@ -6,20 +6,27 @@ import (
|
|
|
|
|
"errors"
|
|
|
|
|
"os"
|
|
|
|
|
"path/filepath"
|
|
|
|
|
"strings"
|
|
|
|
|
"time"
|
|
|
|
|
|
|
|
|
|
"github.com/k3s-io/k3s/pkg/bootstrap"
|
|
|
|
|
"github.com/k3s-io/k3s/pkg/clientaccess"
|
|
|
|
|
"github.com/k3s-io/k3s/pkg/daemons/config"
|
|
|
|
|
"github.com/k3s-io/kine/pkg/client"
|
|
|
|
|
"github.com/sirupsen/logrus"
|
|
|
|
|
"go.etcd.io/etcd/api/v3/v3rpc/rpctypes"
|
|
|
|
|
"k8s.io/apimachinery/pkg/util/wait"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// maxBootstrapWaitAttempts is the number of iterations to wait for another node to populate an empty bootstrap key.
|
|
|
|
|
// After this many attempts, the lock is deleted and the counter reset.
|
|
|
|
|
const maxBootstrapWaitAttempts = 5
|
|
|
|
|
|
|
|
|
|
// Save writes the current ControlRuntimeBootstrap data to the datastore. This contains a complete
|
|
|
|
|
// snapshot of the cluster's CA certs and keys, encryption passphrases, etc - encrypted with the join token.
|
|
|
|
|
// This is used when bootstrapping a cluster from a managed database or external etcd cluster.
|
|
|
|
|
// This is NOT used with embedded etcd, which bootstraps over HTTP.
|
|
|
|
|
func Save(ctx context.Context, config *config.Control, override bool) error {
|
|
|
|
|
logrus.Info("Saving cluster bootstrap data to datastore")
|
|
|
|
|
buf := &bytes.Buffer{}
|
|
|
|
|
if err := bootstrap.ReadFromDisk(buf, &config.Runtime.ControlRuntimeBootstrap); err != nil {
|
|
|
|
|
return err
|
|
|
|
@ -48,13 +55,19 @@ func Save(ctx context.Context, config *config.Control, override bool) error {
|
|
|
|
|
}
|
|
|
|
|
defer storageClient.Close()
|
|
|
|
|
|
|
|
|
|
if _, _, err = getBootstrapKeyFromStorage(ctx, storageClient, normalizedToken, token); err != nil {
|
|
|
|
|
currentKey, _, err := getBootstrapKeyFromStorage(ctx, storageClient, normalizedToken, token)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If there's an empty bootstrap key, then we've locked it and can override.
|
|
|
|
|
if currentKey != nil && len(currentKey.Data) == 0 {
|
|
|
|
|
logrus.Info("Bootstrap key lock is held")
|
|
|
|
|
override = true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if err := storageClient.Create(ctx, storageKey(normalizedToken), data); err != nil {
|
|
|
|
|
if err.Error() == "key exists" {
|
|
|
|
|
logrus.Warn("bootstrap key already exists")
|
|
|
|
|
if override {
|
|
|
|
|
bsd, err := bootstrapKeyData(ctx, storageClient)
|
|
|
|
|
if err != nil {
|
|
|
|
@ -62,9 +75,10 @@ func Save(ctx context.Context, config *config.Control, override bool) error {
|
|
|
|
|
}
|
|
|
|
|
return storageClient.Update(ctx, storageKey(normalizedToken), bsd.Modified, data)
|
|
|
|
|
}
|
|
|
|
|
logrus.Warn("Bootstrap key already exists")
|
|
|
|
|
return nil
|
|
|
|
|
} else if strings.Contains(err.Error(), "not supported for learner") {
|
|
|
|
|
logrus.Debug("skipping bootstrap data save on learner")
|
|
|
|
|
} else if errors.Is(err, rpctypes.ErrGPRCNotSupportedForLearner) {
|
|
|
|
|
logrus.Debug("Skipping bootstrap data save on learner")
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
return err
|
|
|
|
@ -89,9 +103,12 @@ func bootstrapKeyData(ctx context.Context, storageClient client.Client) (*client
|
|
|
|
|
return &bootstrapList[0], nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// storageBootstrap loads data from the datastore into the ControlRuntimeBootstrap struct.
|
|
|
|
|
// The storage key and encryption passphrase are both derived from the join token.
|
|
|
|
|
// token is either passed.
|
|
|
|
|
// storageBootstrap loads data from the datastore's bootstrap key into the
|
|
|
|
|
// ControlRuntimeBootstrap struct. The storage key and encryption passphrase are both derived
|
|
|
|
|
// from the join token. If no bootstrap key exists, indicating that data needs to be written
|
|
|
|
|
// back to the datastore, this function will set c.saveBootstrap to true and create an empty
|
|
|
|
|
// bootstrap key as a lock. This function will not return successfully until either the
|
|
|
|
|
// bootstrap key has been locked, or data is read into the struct.
|
|
|
|
|
func (c *Cluster) storageBootstrap(ctx context.Context) error {
|
|
|
|
|
if err := c.startStorage(ctx); err != nil {
|
|
|
|
|
return err
|
|
|
|
@ -110,7 +127,12 @@ func (c *Cluster) storageBootstrap(ctx context.Context) error {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
if tokenFromFile == "" {
|
|
|
|
|
// at this point this is a fresh start in a non managed environment
|
|
|
|
|
// No token on disk or from CLI, but we don't know if there's data in the datastore.
|
|
|
|
|
// Return here and generate new CA certs and tokens. Note that startup will fail
|
|
|
|
|
// later when saving to the datastore if there's already a bootstrap key - but
|
|
|
|
|
// that's AFTER generating CA certs and tokens. If the config is updated to set the
|
|
|
|
|
// matching key, further startups will still be blocked pending cleanup of the
|
|
|
|
|
// "newer" files as per the bootstrap reconciliation code.
|
|
|
|
|
c.saveBootstrap = true
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
@ -121,34 +143,81 @@ func (c *Cluster) storageBootstrap(ctx context.Context) error {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
attempts := 0
|
|
|
|
|
tokenKey := storageKey(normalizedToken)
|
|
|
|
|
return wait.PollImmediateUntilWithContext(ctx, time.Second, func(ctx context.Context) (bool, error) {
|
|
|
|
|
attempts++
|
|
|
|
|
value, saveBootstrap, err := getBootstrapKeyFromStorage(ctx, storageClient, normalizedToken, token)
|
|
|
|
|
c.saveBootstrap = saveBootstrap
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
return false, err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if value == nil {
|
|
|
|
|
return nil
|
|
|
|
|
// No bootstrap keys found in the datastore - create an empty bootstrap key as a lock to
|
|
|
|
|
// ensure that no other node races us to populate it. If we fail to create the key, then
|
|
|
|
|
// some other node beat us to it and we should just wait for them to finish.
|
|
|
|
|
if err := storageClient.Create(ctx, tokenKey, []byte{}); err != nil {
|
|
|
|
|
if err.Error() == "key exists" {
|
|
|
|
|
logrus.Info("Bootstrap key already locked - waiting for data to be populated by another server")
|
|
|
|
|
return false, nil
|
|
|
|
|
}
|
|
|
|
|
return false, err
|
|
|
|
|
}
|
|
|
|
|
logrus.Info("Bootstrap key locked for initial create")
|
|
|
|
|
return true, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if len(value.Data) == 0 {
|
|
|
|
|
// Empty (locked) bootstrap key found - check to see if we should continue waiting, or
|
|
|
|
|
// delete it and attempt to retake the lock on the next iteration (assuming that the
|
|
|
|
|
// other node failed while holding the lock).
|
|
|
|
|
if attempts >= maxBootstrapWaitAttempts {
|
|
|
|
|
logrus.Info("Bootstrap key lock timed out - deleting lock and retrying")
|
|
|
|
|
attempts = 0
|
|
|
|
|
if err := storageClient.Delete(ctx, tokenKey, value.Modified); err != nil {
|
|
|
|
|
return false, err
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
logrus.Infof("Bootstrap key is locked - waiting for data to be populated by another server")
|
|
|
|
|
}
|
|
|
|
|
return false, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
data, err := decrypt(normalizedToken, value.Data)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
return false, err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return c.ReconcileBootstrapData(ctx, bytes.NewReader(data), &c.config.Runtime.ControlRuntimeBootstrap, false)
|
|
|
|
|
return true, c.ReconcileBootstrapData(ctx, bytes.NewReader(data), &c.config.Runtime.ControlRuntimeBootstrap, false)
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// getBootstrapKeyFromStorage will list all keys that has prefix /bootstrap and will check for key that is
|
|
|
|
|
// hashed with empty string and will check for any key that is hashed by different token than the one
|
|
|
|
|
// passed to it, it will return error if it finds a key that is hashed with different token and will return
|
|
|
|
|
// value if it finds the key hashed by passed token or empty string
|
|
|
|
|
// value if it finds the key hashed by passed token or empty string.
|
|
|
|
|
// Upon receiving a "not supported for learner" error from etcd, this function will retry until the context is cancelled.
|
|
|
|
|
func getBootstrapKeyFromStorage(ctx context.Context, storageClient client.Client, normalizedToken, oldToken string) (*client.Value, bool, error) {
|
|
|
|
|
emptyStringKey := storageKey("")
|
|
|
|
|
tokenKey := storageKey(normalizedToken)
|
|
|
|
|
bootstrapList, err := storageClient.List(ctx, "/bootstrap", 0)
|
|
|
|
|
|
|
|
|
|
var bootstrapList []client.Value
|
|
|
|
|
var err error
|
|
|
|
|
|
|
|
|
|
if err := wait.PollImmediateUntilWithContext(ctx, 5*time.Second, func(ctx context.Context) (bool, error) {
|
|
|
|
|
bootstrapList, err = storageClient.List(ctx, "/bootstrap", 0)
|
|
|
|
|
if err != nil {
|
|
|
|
|
if errors.Is(err, rpctypes.ErrGPRCNotSupportedForLearner) {
|
|
|
|
|
return false, nil
|
|
|
|
|
}
|
|
|
|
|
return false, err
|
|
|
|
|
}
|
|
|
|
|
return true, nil
|
|
|
|
|
}); err != nil {
|
|
|
|
|
return nil, false, err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if len(bootstrapList) == 0 {
|
|
|
|
|
return nil, true, nil
|
|
|
|
|
}
|
|
|
|
@ -248,7 +317,7 @@ func doMigrateToken(ctx context.Context, storageClient client.Client, keyValue c
|
|
|
|
|
if err := storageClient.Create(ctx, newTokenKey, encryptedData); err != nil {
|
|
|
|
|
if err.Error() == "key exists" {
|
|
|
|
|
logrus.Warn("bootstrap key exists")
|
|
|
|
|
} else if strings.Contains(err.Error(), "not supported for learner") {
|
|
|
|
|
} else if errors.Is(err, rpctypes.ErrGPRCNotSupportedForLearner) {
|
|
|
|
|
logrus.Debug("skipping bootstrap data save on learner")
|
|
|
|
|
return nil
|
|
|
|
|
} else {
|
|
|
|
|