mirror of https://github.com/k3s-io/k3s
Add tombstone file to etcd and catch errc etcd channel (#2592)
* Add tombstone file to embedded etcd Signed-off-by: galal-hussein <hussein.galal.ahmed.11@gmail.com> * go mod update Signed-off-by: galal-hussein <hussein.galal.ahmed.11@gmail.com> * fixes Signed-off-by: galal-hussein <hussein.galal.ahmed.11@gmail.com> * more fixes Signed-off-by: galal-hussein <hussein.galal.ahmed.11@gmail.com> * more changes Signed-off-by: galal-hussein <hussein.galal.ahmed.11@gmail.com> * gofmt and goimports Signed-off-by: galal-hussein <hussein.galal.ahmed.11@gmail.com> * go mod update Signed-off-by: galal-hussein <hussein.galal.ahmed.11@gmail.com> * go lint Signed-off-by: galal-hussein <hussein.galal.ahmed.11@gmail.com> * go lint Signed-off-by: galal-hussein <hussein.galal.ahmed.11@gmail.com> * go mod tidy Signed-off-by: galal-hussein <hussein.galal.ahmed.11@gmail.com>pull/2649/head
parent
10b43c8fe5
commit
fadc5a8057
2
go.mod
2
go.mod
|
@ -25,7 +25,7 @@ replace (
|
|||
github.com/matryer/moq => github.com/rancher/moq v0.0.0-20190404221404-ee5226d43009
|
||||
github.com/opencontainers/runc => github.com/opencontainers/runc v1.0.0-rc92
|
||||
github.com/opencontainers/runtime-spec => github.com/opencontainers/runtime-spec v1.0.3-0.20200728170252-4d89ac9fbff6
|
||||
go.etcd.io/etcd => github.com/k3s-io/etcd v0.0.0-20200911210206-f8fde3601008 // v3.4.13-k3s1
|
||||
go.etcd.io/etcd => github.com/k3s-io/etcd v0.5.0-alpha.5.0.20201204203317-251ee41536d8
|
||||
golang.org/x/crypto => golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2
|
||||
golang.org/x/net => golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7
|
||||
golang.org/x/sys => golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456
|
||||
|
|
4
go.sum
4
go.sum
|
@ -472,8 +472,8 @@ github.com/k3s-io/cri v1.4.0-k3s.2 h1:HiJLH0P7k6sSJwbzjPwIN0CeY0iA6bKlb7OyThMiaE
|
|||
github.com/k3s-io/cri v1.4.0-k3s.2/go.mod h1:fGPUUHMKQik/vIegSe05DtX/m4miovdtvVLqRUFAkK0=
|
||||
github.com/k3s-io/cri-tools v1.19.0-k3s1 h1:FQ1iURavoP3rE/GqND/f3aIL1X59IpFQCRnDhiwzcZ8=
|
||||
github.com/k3s-io/cri-tools v1.19.0-k3s1/go.mod h1:bitvtZRi5F7t505Yw3zPzp22LOao1lqJKHfx6x0hnpw=
|
||||
github.com/k3s-io/etcd v0.0.0-20200911210206-f8fde3601008 h1:PlAf/spqR2ZVFeWORItuvYk0YNDsjTlmq+e+7TQbtrI=
|
||||
github.com/k3s-io/etcd v0.0.0-20200911210206-f8fde3601008/go.mod h1:yVHk9ub3CSBatqGNg7GRmsnfLWtoW60w4eDYfh7vHDg=
|
||||
github.com/k3s-io/etcd v0.5.0-alpha.5.0.20201204203317-251ee41536d8 h1:S+MCp8UM5sS1bpxedfr3Qb907ig0dF1bARZ+UqeM4vk=
|
||||
github.com/k3s-io/etcd v0.5.0-alpha.5.0.20201204203317-251ee41536d8/go.mod h1:yVHk9ub3CSBatqGNg7GRmsnfLWtoW60w4eDYfh7vHDg=
|
||||
github.com/k3s-io/flannel v0.12.0-k3s2 h1:KEfj4fe+P0qINcYZxo5/C0cga2XBEfeV4nhKrUGDyCg=
|
||||
github.com/k3s-io/flannel v0.12.0-k3s2/go.mod h1:2tCkIFWhim43MkRsOcPOxY8/Bcpai9uZLJaywN7ciNg=
|
||||
github.com/k3s-io/go-powershell v0.0.0-20200701182037-6845e6fcfa79 h1:9naOL3iAREsJh9mbf9C6Qqu9xuFv7/jIGBFCWvZMg4E=
|
||||
|
|
|
@ -3,8 +3,14 @@
|
|||
package executor
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/rancher/k3s/pkg/version"
|
||||
"github.com/sirupsen/logrus"
|
||||
"go.etcd.io/etcd/embed"
|
||||
"go.etcd.io/etcd/etcdserver"
|
||||
)
|
||||
|
||||
func (e Embedded) CurrentETCDOptions() (InitialOptions, error) {
|
||||
|
@ -27,8 +33,18 @@ func (e Embedded) ETCD(args ETCDConfig) error {
|
|||
|
||||
go func() {
|
||||
select {
|
||||
case err := <-etcd.Server.ErrNotify():
|
||||
if strings.Contains(err.Error(), etcdserver.ErrMemberRemoved.Error()) {
|
||||
tombstoneFile := filepath.Join(args.DataDir, "tombstone")
|
||||
if err := ioutil.WriteFile(tombstoneFile, []byte{}, 0600); err != nil {
|
||||
logrus.Fatalf("failed to write tombstone file to %s", tombstoneFile)
|
||||
}
|
||||
logrus.Infof("this node has been removed from the cluster please restart %s to rejoin the cluster", version.Program)
|
||||
return
|
||||
}
|
||||
|
||||
case <-etcd.Server.StopNotify():
|
||||
logrus.Fatalf("etcd stopped - if this node was removed from the cluster, you must backup and delete %s before rejoining", args.DataDir)
|
||||
logrus.Fatalf("etcd stopped")
|
||||
case err := <-etcd.Err():
|
||||
logrus.Fatalf("etcd exited: %v", err)
|
||||
}
|
||||
|
|
|
@ -72,6 +72,8 @@ const (
|
|||
// other defaults from k8s.io/apiserver/pkg/storage/storagebackend/factory/etcd3.go
|
||||
defaultKeepAliveTime = 30 * time.Second
|
||||
defaultKeepAliveTimeout = 10 * time.Second
|
||||
|
||||
maxBackupRetention = 5
|
||||
)
|
||||
|
||||
// Members contains a slice that holds all
|
||||
|
@ -323,6 +325,13 @@ func (e *ETCD) Register(ctx context.Context, config *config.Control, handler htt
|
|||
return nil, err
|
||||
}
|
||||
|
||||
tombstoneFile := filepath.Join(etcdDBDir(e.config), "tombstone")
|
||||
if _, err := os.Stat(tombstoneFile); err == nil {
|
||||
logrus.Infof("tombstone file has been detected, removing data dir to rejoin the cluster")
|
||||
if _, err := backupDirWithRetention(etcdDBDir(e.config), maxBackupRetention); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return e.handler(handler), err
|
||||
}
|
||||
|
||||
|
@ -512,7 +521,7 @@ func (e *ETCD) removePeer(ctx context.Context, id, address string) error {
|
|||
}
|
||||
if u.Hostname() == address {
|
||||
if e.address == address {
|
||||
logrus.Fatalf("node has been delete from the cluster. Backup and delete ${datadir}/server/db if you like to rejoin the node")
|
||||
return errors.New("node has been deleted from the cluster")
|
||||
}
|
||||
logrus.Infof("Removing name=%s id=%d address=%s from etcd", member.Name, member.ID, address)
|
||||
_, err := e.client.MemberRemove(ctx, member.ID)
|
||||
|
@ -802,3 +811,35 @@ func snapshotRetention(retention int, snapshotDir string) error {
|
|||
})
|
||||
return os.Remove(filepath.Join(snapshotDir, snapshotFiles[0].Name()))
|
||||
}
|
||||
|
||||
// backupDirWithRetention will move the dir to a backup dir
|
||||
// and will keep only maxBackupRetention of dirs.
|
||||
func backupDirWithRetention(dir string, maxBackupRetention int) (string, error) {
|
||||
backupDir := dir + "-backup-" + strconv.Itoa(int(time.Now().Unix()))
|
||||
if _, err := os.Stat(dir); err != nil {
|
||||
return "", nil
|
||||
}
|
||||
files, err := ioutil.ReadDir(filepath.Dir(dir))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
sort.Slice(files, func(i, j int) bool {
|
||||
return files[i].ModTime().After(files[j].ModTime())
|
||||
})
|
||||
count := 0
|
||||
for _, f := range files {
|
||||
if strings.HasPrefix(f.Name(), filepath.Base(dir)+"-backup") && f.IsDir() {
|
||||
count++
|
||||
if count > maxBackupRetention {
|
||||
if err := os.RemoveAll(filepath.Join(filepath.Dir(dir), f.Name())); err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// move the directory to a temp path
|
||||
if err := os.Rename(dir, backupDir); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return backupDir, nil
|
||||
}
|
||||
|
|
|
@ -39,6 +39,7 @@ var (
|
|||
ErrKeyNotFound = errors.New("etcdserver: key not found")
|
||||
ErrCorrupt = errors.New("etcdserver: corrupt cluster")
|
||||
ErrBadLeaderTransferee = errors.New("etcdserver: bad leader transferee")
|
||||
ErrMemberRemoved = errors.New("etcdserver: the member has been permanently removed from the cluster")
|
||||
)
|
||||
|
||||
type DiscoveryError struct {
|
||||
|
|
|
@ -1388,7 +1388,7 @@ func (s *EtcdServer) applyEntries(ep *etcdProgress, apply *apply) {
|
|||
}
|
||||
var shouldstop bool
|
||||
if ep.appliedt, ep.appliedi, shouldstop = s.apply(ents, &ep.confState); shouldstop {
|
||||
go s.stopWithDelay(10*100*time.Millisecond, fmt.Errorf("the member has been permanently removed from the cluster"))
|
||||
go s.stopWithDelay(10*100*time.Millisecond, ErrMemberRemoved)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1551,6 +1551,8 @@ func (s *EtcdServer) stopWithDelay(d time.Duration, err error) {
|
|||
// when the server is stopped.
|
||||
func (s *EtcdServer) StopNotify() <-chan struct{} { return s.done }
|
||||
|
||||
func (s *EtcdServer) ErrNotify() <-chan error { return s.errorc }
|
||||
|
||||
func (s *EtcdServer) SelfStats() []byte { return s.stats.JSON() }
|
||||
|
||||
func (s *EtcdServer) LeaderStats() []byte {
|
||||
|
|
|
@ -1007,7 +1007,7 @@ github.com/willf/bitset
|
|||
github.com/xiang90/probing
|
||||
# go.etcd.io/bbolt v1.3.5
|
||||
go.etcd.io/bbolt
|
||||
# go.etcd.io/etcd v0.5.0-alpha.5.0.20200819165624-17cef6e3e9d5 => github.com/k3s-io/etcd v0.0.0-20200911210206-f8fde3601008
|
||||
# go.etcd.io/etcd v0.5.0-alpha.5.0.20200819165624-17cef6e3e9d5 => github.com/k3s-io/etcd v0.5.0-alpha.5.0.20201204203317-251ee41536d8
|
||||
## explicit
|
||||
go.etcd.io/etcd/auth
|
||||
go.etcd.io/etcd/auth/authpb
|
||||
|
@ -2949,7 +2949,7 @@ vbom.ml/util/sortorder
|
|||
# github.com/matryer/moq => github.com/rancher/moq v0.0.0-20190404221404-ee5226d43009
|
||||
# github.com/opencontainers/runc => github.com/opencontainers/runc v1.0.0-rc92
|
||||
# github.com/opencontainers/runtime-spec => github.com/opencontainers/runtime-spec v1.0.3-0.20200728170252-4d89ac9fbff6
|
||||
# go.etcd.io/etcd => github.com/k3s-io/etcd v0.0.0-20200911210206-f8fde3601008
|
||||
# go.etcd.io/etcd => github.com/k3s-io/etcd v0.5.0-alpha.5.0.20201204203317-251ee41536d8
|
||||
# golang.org/x/crypto => golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2
|
||||
# golang.org/x/net => golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7
|
||||
# golang.org/x/sys => golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456
|
||||
|
|
Loading…
Reference in New Issue