mirror of https://github.com/k3s-io/k3s
Add ability to perform an etcd on-demand snapshot via cli (#2819)
* add ability to perform an etcd on-demand snapshot via clipull/2846/head
parent
84f6655342
commit
13229019f8
|
@ -0,0 +1,22 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/rancher/k3s/pkg/cli/cmds"
|
||||
"github.com/rancher/k3s/pkg/cli/etcdsnapshot"
|
||||
"github.com/rancher/k3s/pkg/configfilearg"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli"
|
||||
)
|
||||
|
||||
func main() {
|
||||
app := cmds.NewApp()
|
||||
app.Commands = []cli.Command{
|
||||
cmds.NewEtcdSnapshotCommand(etcdsnapshot.Run),
|
||||
}
|
||||
|
||||
if err := app.Run(configfilearg.MustParse(os.Args)); err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
}
|
|
@ -35,10 +35,10 @@ func main() {
|
|||
cmds.NewCRICTL(externalCLIAction("crictl", dataDir)),
|
||||
cmds.NewCtrCommand(externalCLIAction("ctr", dataDir)),
|
||||
cmds.NewCheckConfigCommand(externalCLIAction("check-config", dataDir)),
|
||||
cmds.NewEtcdSnapshotCommand(wrap(version.Program+"-"+cmds.EtcdSnapshotCommand, dataDir, os.Args)),
|
||||
}
|
||||
|
||||
err := app.Run(os.Args)
|
||||
if err != nil {
|
||||
if err := app.Run(os.Args); err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
@ -96,7 +96,7 @@ func externalCLI(cli, dataDir string, args []string) error {
|
|||
return stageAndRun(dataDir, cli, append([]string{cli}, args...))
|
||||
}
|
||||
|
||||
func wrap(cmd string, dataDir string, args []string) func(ctx *cli.Context) error {
|
||||
func wrap(cmd, dataDir string, args []string) func(ctx *cli.Context) error {
|
||||
return func(ctx *cli.Context) error {
|
||||
return stageAndRunCLI(ctx, cmd, dataDir, args)
|
||||
}
|
||||
|
@ -111,7 +111,7 @@ func stageAndRunCLI(cli *cli.Context, cmd string, dataDir string, args []string)
|
|||
return stageAndRun(dataDir, cmd, args)
|
||||
}
|
||||
|
||||
func stageAndRun(dataDir string, cmd string, args []string) error {
|
||||
func stageAndRun(dataDir, cmd string, args []string) error {
|
||||
dir, err := extract(dataDir)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "extracting data")
|
||||
|
|
|
@ -10,6 +10,7 @@ import (
|
|||
"github.com/rancher/k3s/pkg/cli/cmds"
|
||||
"github.com/rancher/k3s/pkg/cli/crictl"
|
||||
"github.com/rancher/k3s/pkg/cli/ctr"
|
||||
"github.com/rancher/k3s/pkg/cli/etcdsnapshot"
|
||||
"github.com/rancher/k3s/pkg/cli/kubectl"
|
||||
"github.com/rancher/k3s/pkg/cli/server"
|
||||
"github.com/rancher/k3s/pkg/configfilearg"
|
||||
|
@ -42,6 +43,7 @@ func main() {
|
|||
cmds.NewKubectlCommand(kubectl.Run),
|
||||
cmds.NewCRICTL(crictl.Run),
|
||||
cmds.NewCtrCommand(ctr.Run),
|
||||
cmds.NewEtcdSnapshotCommand(etcdsnapshot.Run),
|
||||
}
|
||||
|
||||
err := app.Run(configfilearg.MustParse(os.Args))
|
||||
|
|
2
main.go
2
main.go
|
@ -12,6 +12,7 @@ import (
|
|||
"github.com/rancher/k3s/pkg/cli/agent"
|
||||
"github.com/rancher/k3s/pkg/cli/cmds"
|
||||
"github.com/rancher/k3s/pkg/cli/crictl"
|
||||
"github.com/rancher/k3s/pkg/cli/etcdsnapshot"
|
||||
"github.com/rancher/k3s/pkg/cli/kubectl"
|
||||
"github.com/rancher/k3s/pkg/cli/server"
|
||||
"github.com/rancher/k3s/pkg/configfilearg"
|
||||
|
@ -26,6 +27,7 @@ func main() {
|
|||
cmds.NewAgentCommand(agent.Run),
|
||||
cmds.NewKubectlCommand(kubectl.Run),
|
||||
cmds.NewCRICTL(crictl.Run),
|
||||
cmds.NewEtcdSnapshotCommand(etcdsnapshot.Run),
|
||||
}
|
||||
|
||||
if err := app.Run(configfilearg.MustParse(os.Args)); err != nil {
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
package cmds
|
||||
|
||||
import (
|
||||
"github.com/rancher/k3s/pkg/version"
|
||||
"github.com/urfave/cli"
|
||||
)
|
||||
|
||||
const EtcdSnapshotCommand = "etcd-snapshot"
|
||||
|
||||
func NewEtcdSnapshotCommand(action func(*cli.Context) error) cli.Command {
|
||||
return cli.Command{
|
||||
Name: EtcdSnapshotCommand,
|
||||
Usage: "Trigger an immediate etcd snapshot",
|
||||
SkipFlagParsing: false,
|
||||
SkipArgReorder: true,
|
||||
Action: action,
|
||||
Flags: []cli.Flag{
|
||||
DebugFlag,
|
||||
LogFile,
|
||||
AlsoLogToStderr,
|
||||
cli.StringFlag{
|
||||
Name: "data-dir,d",
|
||||
Usage: "(data) Folder to hold state default /var/lib/rancher/" + version.Program + " or ${HOME}/.rancher/" + version.Program + " if not root",
|
||||
Destination: &ServerConfig.DataDir,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "name",
|
||||
Usage: "(db) Set the base name of the etcd on-demand snapshot (appended with UNIX timestamp).",
|
||||
Destination: &ServerConfig.EtcdSnapshotName,
|
||||
Value: "on-demand",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "dir",
|
||||
Usage: "(db) Directory to save etcd on-demand snapshot. (default: ${data-dir}/db/snapshots)",
|
||||
Destination: &ServerConfig.EtcdSnapshotDir,
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
|
@ -58,6 +58,7 @@ type Server struct {
|
|||
ClusterResetRestorePath string
|
||||
EncryptSecrets bool
|
||||
StartupHooks []func(context.Context, <-chan struct{}, string) error
|
||||
EtcdSnapshotName string
|
||||
EtcdDisableSnapshots bool
|
||||
EtcdSnapshotDir string
|
||||
EtcdSnapshotCron string
|
||||
|
@ -214,6 +215,12 @@ func NewServerCommand(action func(*cli.Context) error) cli.Command {
|
|||
Usage: "(db) Disable automatic etcd snapshots",
|
||||
Destination: &ServerConfig.EtcdDisableSnapshots,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "etcd-snapshot-name",
|
||||
Usage: "(db) Set the base name of etcd snapshots. Default: etcd-snapshot-<unix-timestamp>",
|
||||
Destination: &ServerConfig.EtcdSnapshotName,
|
||||
Value: "etcd-snapshot",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "etcd-snapshot-schedule-cron",
|
||||
Usage: "(db) Snapshot interval time in cron spec. eg. every 5 hours '* */5 * * *'",
|
||||
|
|
|
@ -0,0 +1,62 @@
|
|||
package etcdsnapshot
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/erikdubbelboer/gspt"
|
||||
"github.com/rancher/k3s/pkg/cli/cmds"
|
||||
"github.com/rancher/k3s/pkg/cluster"
|
||||
"github.com/rancher/k3s/pkg/daemons/config"
|
||||
"github.com/rancher/k3s/pkg/etcd"
|
||||
"github.com/rancher/k3s/pkg/server"
|
||||
"github.com/rancher/wrangler/pkg/signals"
|
||||
"github.com/urfave/cli"
|
||||
)
|
||||
|
||||
func Run(app *cli.Context) error {
|
||||
if err := cmds.InitLogging(); err != nil {
|
||||
return err
|
||||
}
|
||||
return run(app, &cmds.ServerConfig)
|
||||
}
|
||||
|
||||
func run(app *cli.Context, cfg *cmds.Server) error {
|
||||
gspt.SetProcTitle(os.Args[0])
|
||||
|
||||
dataDir, err := server.ResolveDataDir(cfg.DataDir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var serverConfig server.Config
|
||||
serverConfig.DisableAgent = true
|
||||
serverConfig.ControlConfig.DataDir = dataDir
|
||||
serverConfig.ControlConfig.EtcdSnapshotName = cfg.EtcdSnapshotName
|
||||
serverConfig.ControlConfig.EtcdSnapshotDir = cfg.EtcdSnapshotDir
|
||||
serverConfig.ControlConfig.EtcdSnapshotRetention = 0 // disable retention check
|
||||
serverConfig.ControlConfig.Runtime = &config.ControlRuntime{}
|
||||
serverConfig.ControlConfig.Runtime.ETCDServerCA = filepath.Join(dataDir, "tls", "etcd", "server-ca.crt")
|
||||
serverConfig.ControlConfig.Runtime.ClientETCDCert = filepath.Join(dataDir, "tls", "etcd", "client.crt")
|
||||
serverConfig.ControlConfig.Runtime.ClientETCDKey = filepath.Join(dataDir, "tls", "etcd", "client.key")
|
||||
|
||||
ctx := signals.SetupSignalHandler(context.Background())
|
||||
|
||||
initialized, err := etcd.NewETCD().IsInitialized(ctx, &serverConfig.ControlConfig)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !initialized {
|
||||
return errors.New("managed etcd database has not been initialized")
|
||||
}
|
||||
|
||||
cluster := cluster.New(&serverConfig.ControlConfig)
|
||||
|
||||
if err := cluster.Bootstrap(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return cluster.Snapshot(ctx, &serverConfig.ControlConfig)
|
||||
}
|
|
@ -111,6 +111,7 @@ func run(app *cli.Context, cfg *cmds.Server) error {
|
|||
serverConfig.ControlConfig.DisableKubeProxy = cfg.DisableKubeProxy
|
||||
serverConfig.ControlConfig.ClusterInit = cfg.ClusterInit
|
||||
serverConfig.ControlConfig.EncryptSecrets = cfg.EncryptSecrets
|
||||
serverConfig.ControlConfig.EtcdSnapshotName = cfg.EtcdSnapshotName
|
||||
serverConfig.ControlConfig.EtcdSnapshotCron = cfg.EtcdSnapshotCron
|
||||
serverConfig.ControlConfig.EtcdSnapshotDir = cfg.EtcdSnapshotDir
|
||||
serverConfig.ControlConfig.EtcdSnapshotRetention = cfg.EtcdSnapshotRetention
|
||||
|
|
|
@ -9,6 +9,7 @@ import (
|
|||
|
||||
"github.com/rancher/k3s/pkg/bootstrap"
|
||||
"github.com/rancher/k3s/pkg/clientaccess"
|
||||
"github.com/rancher/k3s/pkg/daemons/config"
|
||||
"github.com/rancher/k3s/pkg/version"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
@ -147,3 +148,12 @@ func (c *Cluster) bootstrap(ctx context.Context) error {
|
|||
func (c *Cluster) bootstrapStamp() string {
|
||||
return filepath.Join(c.config.DataDir, "db/joined-"+keyHash(c.config.Token))
|
||||
}
|
||||
|
||||
// Snapshot is a proxy method to call the snapshot method on the managedb
|
||||
// interface for etcd clusters.
|
||||
func (c *Cluster) Snapshot(ctx context.Context, config *config.Control) error {
|
||||
if c.managedDB == nil {
|
||||
return errors.New("unable to perform etcd snapshot on non-etcd system")
|
||||
}
|
||||
return c.managedDB.Snapshot(ctx, config)
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@ type Driver interface {
|
|||
Test(ctx context.Context) error
|
||||
Restore(ctx context.Context) error
|
||||
EndpointName() string
|
||||
Snapshot(ctx context.Context, config *config.Control) error
|
||||
}
|
||||
|
||||
func RegisterDriver(d Driver) {
|
||||
|
|
|
@ -130,6 +130,7 @@ type Control struct {
|
|||
EncryptSecrets bool
|
||||
TLSMinVersion uint16
|
||||
TLSCipherSuites []uint16
|
||||
EtcdSnapshotName string
|
||||
EtcdDisableSnapshots bool
|
||||
EtcdSnapshotDir string
|
||||
EtcdSnapshotCron string
|
||||
|
|
|
@ -419,7 +419,6 @@ func getClientConfig(ctx context.Context, runtime *config.ControlRuntime, endpoi
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cfg := &etcd.Config{
|
||||
Endpoints: endpoints,
|
||||
TLS: tlsConfig,
|
||||
|
@ -428,7 +427,6 @@ func getClientConfig(ctx context.Context, runtime *config.ControlRuntime, endpoi
|
|||
DialKeepAliveTime: defaultKeepAliveTime,
|
||||
DialKeepAliveTimeout: defaultKeepAliveTimeout,
|
||||
}
|
||||
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
|
@ -723,48 +721,80 @@ func snapshotDir(config *config.Control) (string, error) {
|
|||
return config.EtcdSnapshotDir, nil
|
||||
}
|
||||
|
||||
// snapshot attempts to save a new snapshot to the configured directory, and then clean up any old
|
||||
// snapshots in excess of the retention limits.
|
||||
func (e *ETCD) snapshot(ctx context.Context) {
|
||||
// preSnapshotSetup checks to see if the necessary components are in place
|
||||
// to perform an Etcd snapshot. This is necessary primarily for on-demand
|
||||
// snapshots since they're performed before normal Etcd setup is completed.
|
||||
func (e *ETCD) preSnapshotSetup(ctx context.Context, config *config.Control) error {
|
||||
if e.client == nil {
|
||||
if e.config == nil {
|
||||
e.config = config
|
||||
}
|
||||
client, err := getClient(ctx, e.config.Runtime, endpoint)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
e.client = client
|
||||
}
|
||||
if e.runtime == nil {
|
||||
e.runtime = config.Runtime
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Snapshot attempts to save a new snapshot to the configured directory, and then clean up any old
|
||||
// snapshots in excess of the retention limits. This method is used in the internal cron snapshot
|
||||
// system as well as used to do on-demand snapshots.
|
||||
func (e *ETCD) Snapshot(ctx context.Context, config *config.Control) error {
|
||||
if err := e.preSnapshotSetup(ctx, config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
status, err := e.client.Status(ctx, endpoint)
|
||||
if err != nil {
|
||||
logrus.Errorf("Failed to check etcd status for snapshot: %v", err)
|
||||
return
|
||||
return errors.Wrap(err, "failed to check etcd status for snapshot")
|
||||
}
|
||||
|
||||
if status.IsLearner {
|
||||
logrus.Warnf("Skipping snapshot: not supported for learner")
|
||||
return
|
||||
return nil
|
||||
}
|
||||
|
||||
snapshotDir, err := snapshotDir(e.config)
|
||||
if err != nil {
|
||||
logrus.Errorf("Failed to get the snapshot dir: %v", err)
|
||||
return
|
||||
return errors.Wrap(err, "failed to get the snapshot dir")
|
||||
}
|
||||
|
||||
cfg, err := getClientConfig(ctx, e.runtime, endpoint)
|
||||
if err != nil {
|
||||
logrus.Errorf("Failed to get config for etcd snapshot: %v", err)
|
||||
return
|
||||
return errors.Wrap(err, "failed to get config for etcd snapshot")
|
||||
}
|
||||
|
||||
snapshotPath := filepath.Join(snapshotDir, snapshotPrefix+strconv.Itoa(int(time.Now().Unix())))
|
||||
snapshotName := fmt.Sprintf("%s-%d", e.config.EtcdSnapshotName, time.Now().Unix())
|
||||
snapshotPath := filepath.Join(snapshotDir, snapshotName)
|
||||
|
||||
logrus.Infof("Saving etcd snapshot to %s", snapshotPath)
|
||||
|
||||
if err := snapshot.NewV3(nil).Save(ctx, *cfg, snapshotPath); err != nil {
|
||||
logrus.Errorf("Failed to save snapshot: %v", err)
|
||||
return
|
||||
return errors.Wrap(err, "failed to save snapshot")
|
||||
}
|
||||
if err := snapshotRetention(e.config.EtcdSnapshotRetention, snapshotDir); err != nil {
|
||||
logrus.Errorf("Failed to apply snapshot retention: %v", err)
|
||||
return
|
||||
|
||||
// check if we need to perform a retention check
|
||||
if e.config.EtcdSnapshotRetention >= 1 {
|
||||
if err := snapshotRetention(e.config.EtcdSnapshotRetention, snapshotDir); err != nil {
|
||||
return errors.Wrap(err, "failed to apply snapshot retention")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// setSnapshotFunction schedules snapshots at the configured interval
|
||||
func (e *ETCD) setSnapshotFunction(ctx context.Context) {
|
||||
e.cron.AddFunc(e.config.EtcdSnapshotCron, func() { e.snapshot(ctx) })
|
||||
e.cron.AddFunc(e.config.EtcdSnapshotCron, func() {
|
||||
if err := e.Snapshot(ctx, e.config); err != nil {
|
||||
logrus.Error(err)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Restore performs a restore of the ETCD datastore from
|
||||
|
|
|
@ -42,7 +42,7 @@ const (
|
|||
ControlPlaneRoleLabelKey = "node-role.kubernetes.io/control-plane"
|
||||
)
|
||||
|
||||
func resolveDataDir(dataDir string) (string, error) {
|
||||
func ResolveDataDir(dataDir string) (string, error) {
|
||||
dataDir, err := datadir.Resolve(dataDir)
|
||||
return filepath.Join(dataDir, "server"), err
|
||||
}
|
||||
|
@ -322,7 +322,7 @@ func setupDataDirAndChdir(config *config.Control) error {
|
|||
err error
|
||||
)
|
||||
|
||||
config.DataDir, err = resolveDataDir(config.DataDir)
|
||||
config.DataDir, err = ResolveDataDir(config.DataDir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -77,6 +77,7 @@ rm -f \
|
|||
bin/containerd-shim-runc-v1 \
|
||||
bin/containerd-shim-runc-v2 \
|
||||
bin/k3s-server \
|
||||
bin/k3s-etcd-snapshot \
|
||||
bin/kubectl \
|
||||
bin/crictl \
|
||||
bin/ctr
|
||||
|
@ -105,6 +106,7 @@ echo Building server
|
|||
CGO_ENABLED=1 "${GO}" build -tags "$TAGS" -ldflags "$VERSIONFLAGS $LDFLAGS $STATIC_SQLITE" -o bin/containerd ./cmd/server/main.go
|
||||
ln -s containerd ./bin/k3s-agent
|
||||
ln -s containerd ./bin/k3s-server
|
||||
ln -s containerd ./bin/k3s-etcd-snapshot
|
||||
ln -s containerd ./bin/kubectl
|
||||
ln -s containerd ./bin/crictl
|
||||
ln -s containerd ./bin/ctr
|
||||
|
|
|
@ -7,7 +7,7 @@ cd $(dirname $0)/..
|
|||
|
||||
GO=${GO-go}
|
||||
|
||||
for i in crictl kubectl k3s-agent k3s-server k3s; do
|
||||
for i in crictl kubectl k3s-agent k3s-server k3s-etcd-snapshot k3s; do
|
||||
rm -f bin/$i
|
||||
ln -s containerd bin/$i
|
||||
done
|
||||
|
|
Loading…
Reference in New Issue