package cluster import ( "bytes" "context" "encoding/json" "fmt" "io" "net" "os" "path/filepath" "reflect" "strconv" "strings" "time" "github.com/go-test/deep" "github.com/k3s-io/k3s/pkg/bootstrap" "github.com/k3s-io/k3s/pkg/clientaccess" "github.com/k3s-io/k3s/pkg/daemons/config" "github.com/k3s-io/k3s/pkg/etcd" "github.com/k3s-io/k3s/pkg/version" "github.com/k3s-io/kine/pkg/client" "github.com/k3s-io/kine/pkg/endpoint" "github.com/otiai10/copy" "github.com/pkg/errors" "github.com/sirupsen/logrus" ) // Bootstrap attempts to load a managed database driver, if one has been initialized or should be created/joined. // It then checks to see if the cluster needs to load bootstrap data, and if so, loads data into the // ControlRuntimeBoostrap struct, either via HTTP or from the datastore. func (c *Cluster) Bootstrap(ctx context.Context, snapshot bool) error { if err := c.assignManagedDriver(ctx); err != nil { return err } shouldBootstrap, isInitialized, err := c.shouldBootstrapLoad(ctx) if err != nil { return err } c.shouldBootstrap = shouldBootstrap if c.managedDB != nil { if !snapshot { isHTTP := c.config.JoinURL != "" && c.config.Token != "" // For secondary servers, we attempt to connect and reconcile with the datastore. // If that fails we fallback to the local etcd cluster start if isInitialized && isHTTP && c.clientAccessInfo != nil { if err := c.httpBootstrap(ctx); err == nil { logrus.Info("Successfully reconciled with datastore") return nil } logrus.Warnf("Unable to reconcile with datastore: %v", err) } // In the case of etcd, if the database has been initialized, it doesn't // need to be bootstrapped however we still need to check the database // and reconcile the bootstrap data. Below we're starting a temporary // instance of etcd in the event that etcd certificates are unavailable, // reading the data, and comparing that to the data on disk, all the while // starting normal etcd. if isInitialized { if err := c.reconcileEtcd(ctx); err != nil { logrus.Fatalf("Failed to reconcile with temporary etcd: %v", err) } } } } if c.shouldBootstrap { return c.bootstrap(ctx) } return nil } // shouldBootstrapLoad returns true if we need to load ControlRuntimeBootstrap data again and a second boolean // indicating that the server has or has not been initialized, if etcd. This is controlled by a stamp file on // disk that records successful bootstrap using a hash of the join token. func (c *Cluster) shouldBootstrapLoad(ctx context.Context) (bool, bool, error) { // Non-nil managedDB indicates that the database is either initialized, initializing, or joining if c.managedDB != nil { c.config.Runtime.HTTPBootstrap = true isInitialized, err := c.managedDB.IsInitialized(ctx, c.config) if err != nil { return false, false, err } if isInitialized { // If the database is initialized we skip bootstrapping; if the user wants to rejoin a // cluster they need to delete the database. logrus.Infof("Managed %s cluster bootstrap already complete and initialized", c.managedDB.EndpointName()) // This is a workaround for an issue that can be caused by terminating the cluster bootstrap before // etcd is promoted from learner. Odds are we won't need this info, and we don't want to fail startup // due to failure to retrieve it as this will break cold cluster restart, so we ignore any errors. if c.config.JoinURL != "" && c.config.Token != "" { c.clientAccessInfo, _ = clientaccess.ParseAndValidateToken(c.config.JoinURL, c.config.Token, clientaccess.WithUser("server")) } return false, true, nil } else if c.config.JoinURL == "" { // Not initialized, not joining - must be initializing (cluster-init) logrus.Infof("Managed %s cluster initializing", c.managedDB.EndpointName()) return false, false, nil } else { // Not initialized, but have a Join URL - fail if there's no token; if there is then validate it. if c.config.Token == "" { return false, false, errors.New(version.ProgramUpper + "_TOKEN is required to join a cluster") } // Fail if the token isn't syntactically valid, or if the CA hash on the remote server doesn't match // the hash in the token. The password isn't actually checked until later when actually bootstrapping. info, err := clientaccess.ParseAndValidateToken(c.config.JoinURL, c.config.Token, clientaccess.WithUser("server")) if err != nil { return false, false, err } logrus.Infof("Managed %s cluster not yet initialized", c.managedDB.EndpointName()) c.clientAccessInfo = info } } // No errors and no bootstrap stamp, need to bootstrap. return true, false, nil } // isDirEmpty checks to see if the given directory // is empty. func isDirEmpty(name string) (bool, error) { f, err := os.Open(name) if err != nil { return false, err } defer f.Close() _, err = f.Readdir(1) if err == io.EOF { return true, nil } return false, err } // certDirsExist checks to see if the directories // that contain the needed certificates exist. func (c *Cluster) certDirsExist() error { bootstrapDirs := []string{ "cred", "tls", "tls/etcd", } const ( missingDir = "missing %s directory from ${data-dir}" emptyDir = "%s directory is empty" ) for _, dir := range bootstrapDirs { d := filepath.Join(c.config.DataDir, dir) if _, err := os.Stat(d); os.IsNotExist(err) { errMsg := fmt.Sprintf(missingDir, d) logrus.Debug(errMsg) return errors.New(errMsg) } ok, err := isDirEmpty(d) if err != nil { return err } if ok { errMsg := fmt.Sprintf(emptyDir, d) logrus.Debug(errMsg) return errors.New(errMsg) } } return nil } // migrateBootstrapData migrates bootstrap data from the old format to the new format. func migrateBootstrapData(ctx context.Context, data io.Reader, files bootstrap.PathsDataformat) error { logrus.Info("Migrating bootstrap data to new format") var oldBootstrapData map[string][]byte if err := json.NewDecoder(data).Decode(&oldBootstrapData); err != nil { // if this errors here, we can assume that the error being thrown // is not related to needing to perform a migration. return err } // iterate through the old bootstrap data structure // and copy into the new bootstrap data structure for k, v := range oldBootstrapData { files[k] = bootstrap.File{ Content: v, } } return nil } const systemTimeSkew = int64(3) // isMigrated checks to see if the given bootstrap data // is in the latest format. func isMigrated(buf io.ReadSeeker, files *bootstrap.PathsDataformat) bool { buf.Seek(0, 0) defer buf.Seek(0, 0) if err := json.NewDecoder(buf).Decode(files); err != nil { // This will fail if data is being pulled from old an cluster since // older clusters used a map[string][]byte for the data structure. // Therefore, we need to perform a migration to the newer bootstrap // format; bootstrap.BootstrapFile. return false } return true } // ReconcileBootstrapData is called before any data is saved to the // datastore or locally. It checks to see if the contents of the // bootstrap data in the datastore is newer than on disk or different // and depending on where the difference is. If the datastore is newer, // then the data will be written to disk. If the data on disk is newer, // k3s will exit with an error. func (c *Cluster) ReconcileBootstrapData(ctx context.Context, buf io.ReadSeeker, crb *config.ControlRuntimeBootstrap, isHTTP bool) error { logrus.Info("Reconciling bootstrap data between datastore and disk") if err := c.certDirsExist(); err != nil { // we need to see if the data has been migrated before writing to disk. This // is because the data may have been given to us via the HTTP bootstrap process // from an older version of k3s. That version might not have the new data format // and we should write the correct format. files := make(bootstrap.PathsDataformat) if !isMigrated(buf, &files) { if err := migrateBootstrapData(ctx, buf, files); err != nil { return err } buf.Seek(0, 0) } logrus.Debugf("One or more certificate directories do not exist; writing data to disk from datastore") return bootstrap.WriteToDiskFromStorage(files, crb) } var dbRawData []byte if c.managedDB != nil && !isHTTP { token := c.config.Token if token == "" { tokenFromFile, err := readTokenFromFile(c.config.Runtime.ServerToken, c.config.Runtime.ServerCA, c.config.DataDir) if err != nil { return err } if tokenFromFile == "" { // at this point this is a fresh start in a non-managed environment c.saveBootstrap = true return nil } token = tokenFromFile } normalizedToken, err := normalizeToken(token) if err != nil { return err } var value *client.Value storageClient, err := client.New(c.config.Runtime.EtcdConfig) if err != nil { return err } defer storageClient.Close() ticker := time.NewTicker(5 * time.Second) defer ticker.Stop() RETRY: for { value, c.saveBootstrap, err = getBootstrapKeyFromStorage(ctx, storageClient, normalizedToken, token) if err != nil { if strings.Contains(err.Error(), "not supported for learner") { for range ticker.C { continue RETRY } } return err } if value == nil { return nil } dbRawData, err = decrypt(normalizedToken, value.Data) if err != nil { return err } break } buf = bytes.NewReader(dbRawData) } paths, err := bootstrap.ObjToMap(crb) if err != nil { return err } files := make(bootstrap.PathsDataformat) if !isMigrated(buf, &files) { if err := migrateBootstrapData(ctx, buf, files); err != nil { return err } buf.Seek(0, 0) } // Compare on-disk content to the datastore. // If the files differ and the timestamp in the datastore is newer, data on disk will be updated. // If the files differ and the timestamp on disk is newer, an error will be raised listing the conflicting files. var updateDisk bool var newerOnDisk []string for pathKey, fileData := range files { path, ok := paths[pathKey] if !ok || path == "" { logrus.Warnf("Unable to lookup path to reconcile %s", pathKey) continue } logrus.Debugf("Reconciling %s at '%s'", pathKey, path) updated, newer, err := isNewerFile(path, fileData) if err != nil { return errors.Wrapf(err, "failed to get update status of %s", pathKey) } if newer { newerOnDisk = append(newerOnDisk, path) } updateDisk = updateDisk || updated } if c.config.ClusterReset { updateDisk = true serverTLSDir := filepath.Join(c.config.DataDir, "tls") tlsBackupDir := filepath.Join(c.config.DataDir, "tls-"+strconv.Itoa(int(time.Now().Unix()))) logrus.Infof("Cluster reset: backing up certificates directory to " + tlsBackupDir) if _, err := os.Stat(serverTLSDir); err != nil { return errors.Wrap(err, "cluster reset failed to stat server TLS dir") } if err := copy.Copy(serverTLSDir, tlsBackupDir); err != nil { return errors.Wrap(err, "cluster reset failed to back up server TLS dir") } } else if len(newerOnDisk) > 0 { logrus.Fatal(strings.Join(newerOnDisk, ", ") + " newer than datastore and could cause a cluster outage. Remove the file(s) from disk and restart to be recreated from datastore.") } if updateDisk { logrus.Warn("Updating bootstrap data on disk from datastore") return bootstrap.WriteToDiskFromStorage(files, crb) } return nil } // isNewerFile compares the file from disk and datastore, and returns // update status. func isNewerFile(path string, file bootstrap.File) (updated bool, newerOnDisk bool, _ error) { f, err := os.Open(path) if err != nil { if os.IsNotExist(err) { logrus.Warn(path + " doesn't exist. continuing...") return true, false, nil } return false, false, errors.Wrapf(err, "reconcile failed to open") } defer f.Close() data, err := io.ReadAll(f) if err != nil { return false, false, errors.Wrapf(err, "reconcile failed to read") } if bytes.Equal(file.Content, data) { return false, false, nil } info, err := f.Stat() if err != nil { return false, false, errors.Wrapf(err, "reconcile failed to stat") } if info.ModTime().Unix()-file.Timestamp.Unix() >= systemTimeSkew { return true, true, nil } logrus.Warn(path + " will be updated from the datastore.") return true, false, nil } // httpBootstrap retrieves bootstrap data (certs and keys, etc) from the remote server via HTTP // and loads it into the ControlRuntimeBootstrap struct. Unlike the storage bootstrap path, // this data does not need to be decrypted since it is generated on-demand by an existing server. func (c *Cluster) httpBootstrap(ctx context.Context) error { content, err := c.clientAccessInfo.Get("/v1-" + version.Program + "/server-bootstrap") if err != nil { return err } return c.ReconcileBootstrapData(ctx, bytes.NewReader(content), &c.config.Runtime.ControlRuntimeBootstrap, true) } func (c *Cluster) retrieveInitializedDBdata(ctx context.Context) (*bytes.Buffer, error) { var buf bytes.Buffer if err := bootstrap.ReadFromDisk(&buf, &c.config.Runtime.ControlRuntimeBootstrap); err != nil { return nil, err } return &buf, nil } // bootstrap performs cluster bootstrapping, either via HTTP (for managed databases) or direct load from datastore. func (c *Cluster) bootstrap(ctx context.Context) error { c.joining = true // bootstrap managed database via HTTPS if c.config.Runtime.HTTPBootstrap { // Assuming we should just compare on managed databases if err := c.compareConfig(); err != nil { return errors.Wrap(err, "failed to validate server configuration") } return c.httpBootstrap(ctx) } // Bootstrap directly from datastore return c.storageBootstrap(ctx) } // Snapshot is a proxy method to call the snapshot method on the managedb // interface for etcd clusters. func (c *Cluster) Snapshot(ctx context.Context, config *config.Control) error { if c.managedDB == nil { return errors.New("unable to perform etcd snapshot on non-etcd system") } return c.managedDB.Snapshot(ctx, config) } // compareConfig verifies that the config of the joining control plane node coincides with the cluster's config func (c *Cluster) compareConfig() error { token := c.config.AgentToken if token == "" { token = c.config.Token } agentClientAccessInfo, err := clientaccess.ParseAndValidateToken(c.config.JoinURL, token, clientaccess.WithUser("node")) if err != nil { return err } serverConfig, err := agentClientAccessInfo.Get("/v1-" + version.Program + "/config") if err != nil { return err } clusterControl := &config.Control{} if err := json.Unmarshal(serverConfig, clusterControl); err != nil { return err } // We are saving IPs of ClusterIPRanges and ServiceIPRanges in 4-bytes representation but json decodes in 16-byte ipsTo16Bytes(c.config.CriticalControlArgs.ClusterIPRanges) ipsTo16Bytes(c.config.CriticalControlArgs.ServiceIPRanges) // If the remote server is down-level and did not fill the egress-selector // mode, use the local value to allow for temporary mismatch during upgrades. if clusterControl.CriticalControlArgs.EgressSelectorMode == "" { clusterControl.CriticalControlArgs.EgressSelectorMode = c.config.CriticalControlArgs.EgressSelectorMode } if diff := deep.Equal(c.config.CriticalControlArgs, clusterControl.CriticalControlArgs); diff != nil { rc := reflect.ValueOf(clusterControl.CriticalControlArgs).Type() for _, d := range diff { field := strings.Split(d, ":")[0] v, _ := rc.FieldByName(field) if cliTag, found := v.Tag.Lookup("cli"); found { logrus.Warnf("critical configuration mismatched: %s", cliTag) } else { logrus.Warnf("critical configuration mismatched: %s", field) } } return errors.New("critical configuration value mismatch between servers") } return nil } // ipsTo16Bytes makes sure the IPs in the []*net.IPNet slice are represented in 16-byte format func ipsTo16Bytes(mySlice []*net.IPNet) { for _, ipNet := range mySlice { ipNet.IP = ipNet.IP.To16() } } // reconcileEtcd starts a temporary single-member etcd cluster using a copy of the // etcd database, and uses it to reconcile bootstrap data. This is necessary // because the full etcd cluster may not have quorum during startup, but we still // need to extract data from the datastore. func (c *Cluster) reconcileEtcd(ctx context.Context) error { logrus.Info("Starting temporary etcd to reconcile with datastore") tempConfig := endpoint.ETCDConfig{Endpoints: []string{"http://127.0.0.1:2399"}} originalConfig := c.config.Runtime.EtcdConfig c.config.Runtime.EtcdConfig = tempConfig reconcileCtx, cancel := context.WithCancel(ctx) defer func() { cancel() c.config.Runtime.EtcdConfig = originalConfig }() e := etcd.NewETCD() if err := e.SetControlConfig(reconcileCtx, c.config); err != nil { return err } if err := e.StartEmbeddedTemporary(reconcileCtx); err != nil { return err } for { if err := e.Test(reconcileCtx); err != nil && !errors.Is(err, etcd.ErrNotMember) { logrus.Infof("Failed to test temporary data store connection: %v", err) } else { logrus.Info(e.EndpointName() + " temporary data store connection OK") break } select { case <-time.After(5 * time.Second): case <-reconcileCtx.Done(): break } } data, err := c.retrieveInitializedDBdata(reconcileCtx) if err != nil { return err } return c.ReconcileBootstrapData(reconcileCtx, bytes.NewReader(data.Bytes()), &c.config.Runtime.ControlRuntimeBootstrap, false) }