2020-05-05 21:59:15 +00:00
|
|
|
package cluster
|
|
|
|
|
2020-09-24 06:29:25 +00:00
|
|
|
// A managed database is one whose lifecycle we control - initializing the cluster, adding/removing members, taking snapshots, etc.
|
|
|
|
// This is currently just used for the embedded etcd datastore. Kine and other external etcd clusters are NOT considered managed.
|
|
|
|
|
2020-05-05 21:59:15 +00:00
|
|
|
import (
|
|
|
|
"context"
|
2020-09-30 00:53:31 +00:00
|
|
|
"fmt"
|
2020-05-05 21:59:15 +00:00
|
|
|
"net/http"
|
2021-03-06 10:29:57 +00:00
|
|
|
"net/url"
|
2020-09-30 00:53:31 +00:00
|
|
|
"os"
|
2020-05-05 21:59:15 +00:00
|
|
|
"time"
|
|
|
|
|
2022-03-02 23:47:27 +00:00
|
|
|
"github.com/k3s-io/k3s/pkg/cluster/managed"
|
|
|
|
"github.com/k3s-io/k3s/pkg/etcd"
|
|
|
|
"github.com/k3s-io/k3s/pkg/nodepassword"
|
|
|
|
"github.com/k3s-io/k3s/pkg/version"
|
2020-05-05 21:59:15 +00:00
|
|
|
"github.com/sirupsen/logrus"
|
2021-06-22 20:42:34 +00:00
|
|
|
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
2024-04-08 18:04:27 +00:00
|
|
|
"k8s.io/apimachinery/pkg/util/wait"
|
2020-05-05 21:59:15 +00:00
|
|
|
)
|
|
|
|
|
2020-09-24 06:29:25 +00:00
|
|
|
// testClusterDB returns a channel that will be closed when the datastore connection is available.
|
|
|
|
// The datastore is tested for readiness every 5 seconds until the test succeeds.
|
2020-05-05 21:59:15 +00:00
|
|
|
func (c *Cluster) testClusterDB(ctx context.Context) (<-chan struct{}, error) {
|
|
|
|
result := make(chan struct{})
|
|
|
|
if c.managedDB == nil {
|
|
|
|
close(result)
|
|
|
|
return result, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
go func() {
|
|
|
|
defer close(result)
|
|
|
|
for {
|
2020-10-27 18:06:26 +00:00
|
|
|
if err := c.managedDB.Test(ctx); err != nil {
|
2020-05-05 21:59:15 +00:00
|
|
|
logrus.Infof("Failed to test data store connection: %v", err)
|
|
|
|
} else {
|
2020-09-21 16:56:03 +00:00
|
|
|
logrus.Info(c.managedDB.EndpointName() + " data store connection OK")
|
2020-05-05 21:59:15 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
select {
|
|
|
|
case <-time.After(5 * time.Second):
|
|
|
|
case <-ctx.Done():
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
return result, nil
|
|
|
|
}
|
|
|
|
|
2020-09-24 06:29:25 +00:00
|
|
|
// start starts the database, unless a cluster reset has been requested, in which case
|
|
|
|
// it does that instead.
|
2020-05-05 21:59:15 +00:00
|
|
|
func (c *Cluster) start(ctx context.Context) error {
|
|
|
|
if c.managedDB == nil {
|
|
|
|
return nil
|
|
|
|
}
|
2022-04-27 20:44:15 +00:00
|
|
|
rebootstrap := func() error {
|
|
|
|
return c.storageBootstrap(ctx)
|
|
|
|
}
|
2020-05-05 21:59:15 +00:00
|
|
|
|
2023-09-21 18:53:50 +00:00
|
|
|
resetDone, err := c.managedDB.IsReset()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2022-04-27 20:44:15 +00:00
|
|
|
if c.config.ClusterReset {
|
|
|
|
// If we're restoring from a snapshot, don't check the reset-flag - just reset and restore.
|
|
|
|
if c.config.ClusterResetRestorePath != "" {
|
|
|
|
return c.managedDB.Reset(ctx, rebootstrap)
|
2021-03-11 20:07:40 +00:00
|
|
|
}
|
2023-09-21 18:53:50 +00:00
|
|
|
|
2022-04-27 20:44:15 +00:00
|
|
|
// If the reset-flag doesn't exist, reset. This will create the reset-flag if it succeeds.
|
2023-09-21 18:53:50 +00:00
|
|
|
if !resetDone {
|
2021-05-05 15:40:04 +00:00
|
|
|
return c.managedDB.Reset(ctx, rebootstrap)
|
2020-09-30 00:53:31 +00:00
|
|
|
}
|
2023-09-21 18:53:50 +00:00
|
|
|
|
2022-04-27 20:44:15 +00:00
|
|
|
// The reset-flag exists, ask the user to remove it if they want to reset again.
|
2023-09-21 18:53:50 +00:00
|
|
|
return fmt.Errorf("Managed etcd cluster membership was previously reset, please remove the cluster-reset flag and start %s normally. "+
|
|
|
|
"If you need to perform another cluster reset, you must first manually delete the file at %s", version.Program, c.managedDB.ResetFile())
|
2020-05-05 21:59:15 +00:00
|
|
|
}
|
2021-03-11 20:07:40 +00:00
|
|
|
|
2023-09-21 18:53:50 +00:00
|
|
|
if resetDone {
|
|
|
|
// If the cluster was reset, we need to delete the node passwd secret in case the node
|
2022-04-27 20:44:15 +00:00
|
|
|
// password from the previously restored snapshot differs from the current password on disk.
|
2023-04-20 22:28:57 +00:00
|
|
|
c.config.Runtime.ClusterControllerStarts["node-password-secret-cleanup"] = c.deleteNodePasswdSecret
|
2021-06-22 20:42:34 +00:00
|
|
|
}
|
2020-05-05 21:59:15 +00:00
|
|
|
|
2023-09-21 18:53:50 +00:00
|
|
|
// Starting the managed database will clear the reset-flag if set
|
2020-05-05 21:59:15 +00:00
|
|
|
return c.managedDB.Start(ctx, c.clientAccessInfo)
|
|
|
|
}
|
|
|
|
|
2023-09-21 18:53:50 +00:00
|
|
|
// registerDBHandlers registers routes for database info with the http request handler
|
|
|
|
func (c *Cluster) registerDBHandlers(handler http.Handler) (http.Handler, error) {
|
2020-05-05 21:59:15 +00:00
|
|
|
if c.managedDB == nil {
|
2020-09-24 05:40:00 +00:00
|
|
|
return handler, nil
|
2020-05-05 21:59:15 +00:00
|
|
|
}
|
|
|
|
|
2023-09-21 18:53:50 +00:00
|
|
|
return c.managedDB.Register(handler)
|
2020-05-05 21:59:15 +00:00
|
|
|
}
|
|
|
|
|
2020-09-29 01:13:55 +00:00
|
|
|
// assignManagedDriver assigns a driver based on a number of different configuration variables.
|
|
|
|
// If a driver has been initialized it is used.
|
|
|
|
// If no specific endpoint has been requested and creating or joining has been requested,
|
|
|
|
// we use the default driver.
|
|
|
|
// If none of the above are true, no managed driver is assigned.
|
2020-05-05 21:59:15 +00:00
|
|
|
func (c *Cluster) assignManagedDriver(ctx context.Context) error {
|
2020-09-24 06:29:25 +00:00
|
|
|
// Check all managed drivers for an initialized database on disk; use one if found
|
2020-05-05 21:59:15 +00:00
|
|
|
for _, driver := range managed.Registered() {
|
2023-09-21 18:53:50 +00:00
|
|
|
if err := driver.SetControlConfig(c.config); err != nil {
|
2020-05-05 21:59:15 +00:00
|
|
|
return err
|
|
|
|
}
|
2023-09-21 18:53:50 +00:00
|
|
|
if ok, err := driver.IsInitialized(); err != nil {
|
|
|
|
return err
|
|
|
|
} else if ok {
|
2020-09-29 01:13:55 +00:00
|
|
|
c.managedDB = driver
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-09-24 06:29:25 +00:00
|
|
|
// If we have been asked to initialize or join a cluster, do so using the default managed database.
|
2020-05-05 21:59:15 +00:00
|
|
|
if c.config.Datastore.Endpoint == "" && (c.config.ClusterInit || (c.config.Token != "" && c.config.JoinURL != "")) {
|
2023-09-21 18:53:50 +00:00
|
|
|
c.managedDB = managed.Default()
|
2020-05-05 21:59:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
2021-02-12 15:35:57 +00:00
|
|
|
|
2024-03-19 22:01:36 +00:00
|
|
|
// setupEtcdProxy starts a goroutine to periodically update the etcd proxy with the current list of
|
2023-02-13 20:00:52 +00:00
|
|
|
// cluster client URLs, as retrieved from etcd.
|
2021-02-12 15:35:57 +00:00
|
|
|
func (c *Cluster) setupEtcdProxy(ctx context.Context, etcdProxy etcd.Proxy) {
|
|
|
|
if c.managedDB == nil {
|
|
|
|
return
|
|
|
|
}
|
2024-04-08 18:04:27 +00:00
|
|
|
// We use Poll here instead of Until because we want to wait the interval before running the function.
|
|
|
|
go wait.PollUntilWithContext(ctx, 30*time.Second, func(ctx context.Context) (bool, error) {
|
|
|
|
clientURLs, err := c.managedDB.GetMembersClientURLs(ctx)
|
|
|
|
if err != nil {
|
|
|
|
logrus.Warnf("Failed to get etcd ClientURLs: %v", err)
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
// client URLs are a full URI, but the proxy only wants host:port
|
|
|
|
for i, c := range clientURLs {
|
|
|
|
u, err := url.Parse(c)
|
2021-02-12 15:35:57 +00:00
|
|
|
if err != nil {
|
2024-04-08 18:04:27 +00:00
|
|
|
logrus.Warnf("Failed to parse etcd ClientURL: %v", err)
|
|
|
|
return false, nil
|
2021-02-12 15:35:57 +00:00
|
|
|
}
|
2024-04-08 18:04:27 +00:00
|
|
|
clientURLs[i] = u.Host
|
2021-02-12 15:35:57 +00:00
|
|
|
}
|
2024-04-08 18:04:27 +00:00
|
|
|
etcdProxy.Update(clientURLs)
|
|
|
|
return false, nil
|
|
|
|
})
|
2021-02-12 15:35:57 +00:00
|
|
|
}
|
2021-06-22 20:42:34 +00:00
|
|
|
|
|
|
|
// deleteNodePasswdSecret wipes out the node password secret after restoration
|
|
|
|
func (c *Cluster) deleteNodePasswdSecret(ctx context.Context) {
|
2023-04-20 22:28:57 +00:00
|
|
|
nodeName := os.Getenv("NODE_NAME")
|
|
|
|
secretsClient := c.config.Runtime.Core.Core().V1().Secret()
|
|
|
|
if err := nodepassword.Delete(secretsClient, nodeName); err != nil {
|
|
|
|
if apierrors.IsNotFound(err) {
|
2024-03-19 22:01:36 +00:00
|
|
|
logrus.Debugf("Node password secret is not found for node %s", nodeName)
|
2023-04-20 22:28:57 +00:00
|
|
|
return
|
2021-06-22 20:42:34 +00:00
|
|
|
}
|
2023-04-20 22:28:57 +00:00
|
|
|
logrus.Warnf("failed to delete old node password secret: %v", err)
|
2021-06-22 20:42:34 +00:00
|
|
|
}
|
|
|
|
}
|