2020-05-05 21:59:15 +00:00
package cluster
2020-09-24 06:29:25 +00:00
// A managed database is one whose lifecycle we control - initializing the cluster, adding/removing members, taking snapshots, etc.
// This is currently just used for the embedded etcd datastore. Kine and other external etcd clusters are NOT considered managed.
2020-05-05 21:59:15 +00:00
import (
"context"
2020-09-30 00:53:31 +00:00
"fmt"
2020-05-05 21:59:15 +00:00
"net/http"
2021-03-06 10:29:57 +00:00
"net/url"
2020-09-30 00:53:31 +00:00
"os"
2020-09-29 01:13:55 +00:00
"strings"
2020-05-05 21:59:15 +00:00
"time"
2022-03-02 23:47:27 +00:00
"github.com/k3s-io/k3s/pkg/cluster/managed"
"github.com/k3s-io/k3s/pkg/etcd"
"github.com/k3s-io/k3s/pkg/nodepassword"
"github.com/k3s-io/k3s/pkg/version"
2020-11-30 23:45:22 +00:00
"github.com/k3s-io/kine/pkg/endpoint"
2020-05-05 21:59:15 +00:00
"github.com/sirupsen/logrus"
2021-06-22 20:42:34 +00:00
apierrors "k8s.io/apimachinery/pkg/api/errors"
2020-05-05 21:59:15 +00:00
)
2020-09-24 06:29:25 +00:00
// testClusterDB returns a channel that will be closed when the datastore connection is available.
// The datastore is tested for readiness every 5 seconds until the test succeeds.
2020-05-05 21:59:15 +00:00
func ( c * Cluster ) testClusterDB ( ctx context . Context ) ( <- chan struct { } , error ) {
result := make ( chan struct { } )
if c . managedDB == nil {
close ( result )
return result , nil
}
go func ( ) {
defer close ( result )
for {
2020-10-27 18:06:26 +00:00
if err := c . managedDB . Test ( ctx ) ; err != nil {
2020-05-05 21:59:15 +00:00
logrus . Infof ( "Failed to test data store connection: %v" , err )
} else {
2020-09-21 16:56:03 +00:00
logrus . Info ( c . managedDB . EndpointName ( ) + " data store connection OK" )
2020-05-05 21:59:15 +00:00
return
}
select {
case <- time . After ( 5 * time . Second ) :
case <- ctx . Done ( ) :
return
}
}
} ( )
return result , nil
}
2020-09-24 06:29:25 +00:00
// start starts the database, unless a cluster reset has been requested, in which case
// it does that instead.
2020-05-05 21:59:15 +00:00
func ( c * Cluster ) start ( ctx context . Context ) error {
if c . managedDB == nil {
return nil
}
2022-04-27 20:44:15 +00:00
resetFile := etcd . ResetFile ( c . config )
rebootstrap := func ( ) error {
return c . storageBootstrap ( ctx )
}
2020-05-05 21:59:15 +00:00
2022-04-27 20:44:15 +00:00
if c . config . ClusterReset {
// If we're restoring from a snapshot, don't check the reset-flag - just reset and restore.
if c . config . ClusterResetRestorePath != "" {
return c . managedDB . Reset ( ctx , rebootstrap )
2021-03-11 20:07:40 +00:00
}
2022-04-27 20:44:15 +00:00
// If the reset-flag doesn't exist, reset. This will create the reset-flag if it succeeds.
2020-09-30 00:53:31 +00:00
if _ , err := os . Stat ( resetFile ) ; err != nil {
if ! os . IsNotExist ( err ) {
return err
}
2021-05-05 15:40:04 +00:00
return c . managedDB . Reset ( ctx , rebootstrap )
2020-09-30 00:53:31 +00:00
}
2022-04-27 20:44:15 +00:00
// The reset-flag exists, ask the user to remove it if they want to reset again.
return fmt . Errorf ( "Managed etcd cluster membership was previously reset, please remove the cluster-reset flag and start %s normally. If you need to perform another cluster reset, you must first manually delete the %s file" , version . Program , resetFile )
2020-05-05 21:59:15 +00:00
}
2021-03-11 20:07:40 +00:00
2022-04-27 20:44:15 +00:00
// The reset-flag exists but we're not resetting; remove it
2021-06-22 20:42:34 +00:00
if _ , err := os . Stat ( resetFile ) ; err == nil {
2022-04-27 20:44:15 +00:00
// Before removing reset file we need to delete the node passwd secret in case the node
// password from the previously restored snapshot differs from the current password on disk.
2023-04-20 22:28:57 +00:00
c . config . Runtime . ClusterControllerStarts [ "node-password-secret-cleanup" ] = c . deleteNodePasswdSecret
2022-04-27 20:44:15 +00:00
os . Remove ( resetFile )
2021-06-22 20:42:34 +00:00
}
2020-05-05 21:59:15 +00:00
return c . managedDB . Start ( ctx , c . clientAccessInfo )
}
2020-09-24 05:40:00 +00:00
// initClusterDB registers routes for database info with the http request handler
func ( c * Cluster ) initClusterDB ( ctx context . Context , handler http . Handler ) ( http . Handler , error ) {
2020-05-05 21:59:15 +00:00
if c . managedDB == nil {
2020-09-24 05:40:00 +00:00
return handler , nil
2020-05-05 21:59:15 +00:00
}
2020-09-29 01:13:55 +00:00
if ! strings . HasPrefix ( c . config . Datastore . Endpoint , c . managedDB . EndpointName ( ) + "://" ) {
c . config . Datastore = endpoint . Config {
Endpoint : c . managedDB . EndpointName ( ) ,
}
}
2020-09-24 05:40:00 +00:00
return c . managedDB . Register ( ctx , c . config , handler )
2020-05-05 21:59:15 +00:00
}
2020-09-29 01:13:55 +00:00
// assignManagedDriver assigns a driver based on a number of different configuration variables.
// If a driver has been initialized it is used.
// If the configured endpoint matches the name of a driver, that driver is used.
// If no specific endpoint has been requested and creating or joining has been requested,
// we use the default driver.
// If none of the above are true, no managed driver is assigned.
2020-05-05 21:59:15 +00:00
func ( c * Cluster ) assignManagedDriver ( ctx context . Context ) error {
2020-09-24 06:29:25 +00:00
// Check all managed drivers for an initialized database on disk; use one if found
2020-05-05 21:59:15 +00:00
for _ , driver := range managed . Registered ( ) {
if ok , err := driver . IsInitialized ( ctx , c . config ) ; err != nil {
return err
} else if ok {
c . managedDB = driver
return nil
}
}
2020-09-29 01:13:55 +00:00
// This is needed to allow downstreams to override driver selection logic by
// setting ServerConfig.Datastore.Endpoint such that it will match a driver's EndpointName
endpointType := strings . SplitN ( c . config . Datastore . Endpoint , ":" , 2 ) [ 0 ]
for _ , driver := range managed . Registered ( ) {
if endpointType == driver . EndpointName ( ) {
c . managedDB = driver
return nil
}
}
2020-09-24 06:29:25 +00:00
// If we have been asked to initialize or join a cluster, do so using the default managed database.
2020-05-05 21:59:15 +00:00
if c . config . Datastore . Endpoint == "" && ( c . config . ClusterInit || ( c . config . Token != "" && c . config . JoinURL != "" ) ) {
for _ , driver := range managed . Registered ( ) {
if driver . EndpointName ( ) == managed . Default ( ) {
c . managedDB = driver
return nil
}
}
}
return nil
}
2021-02-12 15:35:57 +00:00
2023-02-13 20:00:52 +00:00
// setupEtcdProxy periodically updates the etcd proxy with the current list of
// cluster client URLs, as retrieved from etcd.
2021-02-12 15:35:57 +00:00
func ( c * Cluster ) setupEtcdProxy ( ctx context . Context , etcdProxy etcd . Proxy ) {
if c . managedDB == nil {
return
}
go func ( ) {
t := time . NewTicker ( 30 * time . Second )
defer t . Stop ( )
for range t . C {
newAddresses , err := c . managedDB . GetMembersClientURLs ( ctx )
if err != nil {
logrus . Warnf ( "failed to get etcd client URLs: %v" , err )
continue
}
2021-03-06 10:29:57 +00:00
// client URLs are a full URI, but the proxy only wants host:port
var hosts [ ] string
for _ , address := range newAddresses {
u , err := url . Parse ( address )
if err != nil {
logrus . Warnf ( "failed to parse etcd client URL: %v" , err )
continue
}
hosts = append ( hosts , u . Host )
}
etcdProxy . Update ( hosts )
2021-02-12 15:35:57 +00:00
}
} ( )
}
2021-06-22 20:42:34 +00:00
// deleteNodePasswdSecret wipes out the node password secret after restoration
func ( c * Cluster ) deleteNodePasswdSecret ( ctx context . Context ) {
2023-04-20 22:28:57 +00:00
nodeName := os . Getenv ( "NODE_NAME" )
secretsClient := c . config . Runtime . Core . Core ( ) . V1 ( ) . Secret ( )
if err := nodepassword . Delete ( secretsClient , nodeName ) ; err != nil {
if apierrors . IsNotFound ( err ) {
logrus . Debugf ( "node password secret is not found for node %s" , nodeName )
return
2021-06-22 20:42:34 +00:00
}
2023-04-20 22:28:57 +00:00
logrus . Warnf ( "failed to delete old node password secret: %v" , err )
2021-06-22 20:42:34 +00:00
}
}