Add tombstone file to etcd and catch errc etcd channel (#2592)

* Add tombstone file to embedded etcd

Signed-off-by: galal-hussein <hussein.galal.ahmed.11@gmail.com>

* go mod update

Signed-off-by: galal-hussein <hussein.galal.ahmed.11@gmail.com>

* fixes

Signed-off-by: galal-hussein <hussein.galal.ahmed.11@gmail.com>

* more fixes

Signed-off-by: galal-hussein <hussein.galal.ahmed.11@gmail.com>

* more changes

Signed-off-by: galal-hussein <hussein.galal.ahmed.11@gmail.com>

* gofmt and goimports

Signed-off-by: galal-hussein <hussein.galal.ahmed.11@gmail.com>

* go mod update

Signed-off-by: galal-hussein <hussein.galal.ahmed.11@gmail.com>

* go lint

Signed-off-by: galal-hussein <hussein.galal.ahmed.11@gmail.com>

* go lint

Signed-off-by: galal-hussein <hussein.galal.ahmed.11@gmail.com>

* go mod tidy

Signed-off-by: galal-hussein <hussein.galal.ahmed.11@gmail.com>
pull/2649/head
Hussein Galal 2020-12-07 22:30:44 +02:00 committed by GitHub
parent 10b43c8fe5
commit fadc5a8057
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 68 additions and 8 deletions

2
go.mod
View File

@ -25,7 +25,7 @@ replace (
github.com/matryer/moq => github.com/rancher/moq v0.0.0-20190404221404-ee5226d43009
github.com/opencontainers/runc => github.com/opencontainers/runc v1.0.0-rc92
github.com/opencontainers/runtime-spec => github.com/opencontainers/runtime-spec v1.0.3-0.20200728170252-4d89ac9fbff6
go.etcd.io/etcd => github.com/k3s-io/etcd v0.0.0-20200911210206-f8fde3601008 // v3.4.13-k3s1
go.etcd.io/etcd => github.com/k3s-io/etcd v0.5.0-alpha.5.0.20201204203317-251ee41536d8
golang.org/x/crypto => golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2
golang.org/x/net => golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7
golang.org/x/sys => golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456

4
go.sum
View File

@ -472,8 +472,8 @@ github.com/k3s-io/cri v1.4.0-k3s.2 h1:HiJLH0P7k6sSJwbzjPwIN0CeY0iA6bKlb7OyThMiaE
github.com/k3s-io/cri v1.4.0-k3s.2/go.mod h1:fGPUUHMKQik/vIegSe05DtX/m4miovdtvVLqRUFAkK0=
github.com/k3s-io/cri-tools v1.19.0-k3s1 h1:FQ1iURavoP3rE/GqND/f3aIL1X59IpFQCRnDhiwzcZ8=
github.com/k3s-io/cri-tools v1.19.0-k3s1/go.mod h1:bitvtZRi5F7t505Yw3zPzp22LOao1lqJKHfx6x0hnpw=
github.com/k3s-io/etcd v0.0.0-20200911210206-f8fde3601008 h1:PlAf/spqR2ZVFeWORItuvYk0YNDsjTlmq+e+7TQbtrI=
github.com/k3s-io/etcd v0.0.0-20200911210206-f8fde3601008/go.mod h1:yVHk9ub3CSBatqGNg7GRmsnfLWtoW60w4eDYfh7vHDg=
github.com/k3s-io/etcd v0.5.0-alpha.5.0.20201204203317-251ee41536d8 h1:S+MCp8UM5sS1bpxedfr3Qb907ig0dF1bARZ+UqeM4vk=
github.com/k3s-io/etcd v0.5.0-alpha.5.0.20201204203317-251ee41536d8/go.mod h1:yVHk9ub3CSBatqGNg7GRmsnfLWtoW60w4eDYfh7vHDg=
github.com/k3s-io/flannel v0.12.0-k3s2 h1:KEfj4fe+P0qINcYZxo5/C0cga2XBEfeV4nhKrUGDyCg=
github.com/k3s-io/flannel v0.12.0-k3s2/go.mod h1:2tCkIFWhim43MkRsOcPOxY8/Bcpai9uZLJaywN7ciNg=
github.com/k3s-io/go-powershell v0.0.0-20200701182037-6845e6fcfa79 h1:9naOL3iAREsJh9mbf9C6Qqu9xuFv7/jIGBFCWvZMg4E=

View File

@ -3,8 +3,14 @@
package executor
import (
"io/ioutil"
"path/filepath"
"strings"
"github.com/rancher/k3s/pkg/version"
"github.com/sirupsen/logrus"
"go.etcd.io/etcd/embed"
"go.etcd.io/etcd/etcdserver"
)
func (e Embedded) CurrentETCDOptions() (InitialOptions, error) {
@ -27,8 +33,18 @@ func (e Embedded) ETCD(args ETCDConfig) error {
go func() {
select {
case err := <-etcd.Server.ErrNotify():
if strings.Contains(err.Error(), etcdserver.ErrMemberRemoved.Error()) {
tombstoneFile := filepath.Join(args.DataDir, "tombstone")
if err := ioutil.WriteFile(tombstoneFile, []byte{}, 0600); err != nil {
logrus.Fatalf("failed to write tombstone file to %s", tombstoneFile)
}
logrus.Infof("this node has been removed from the cluster please restart %s to rejoin the cluster", version.Program)
return
}
case <-etcd.Server.StopNotify():
logrus.Fatalf("etcd stopped - if this node was removed from the cluster, you must backup and delete %s before rejoining", args.DataDir)
logrus.Fatalf("etcd stopped")
case err := <-etcd.Err():
logrus.Fatalf("etcd exited: %v", err)
}

View File

@ -72,6 +72,8 @@ const (
// other defaults from k8s.io/apiserver/pkg/storage/storagebackend/factory/etcd3.go
defaultKeepAliveTime = 30 * time.Second
defaultKeepAliveTimeout = 10 * time.Second
maxBackupRetention = 5
)
// Members contains a slice that holds all
@ -323,6 +325,13 @@ func (e *ETCD) Register(ctx context.Context, config *config.Control, handler htt
return nil, err
}
tombstoneFile := filepath.Join(etcdDBDir(e.config), "tombstone")
if _, err := os.Stat(tombstoneFile); err == nil {
logrus.Infof("tombstone file has been detected, removing data dir to rejoin the cluster")
if _, err := backupDirWithRetention(etcdDBDir(e.config), maxBackupRetention); err != nil {
return nil, err
}
}
return e.handler(handler), err
}
@ -512,7 +521,7 @@ func (e *ETCD) removePeer(ctx context.Context, id, address string) error {
}
if u.Hostname() == address {
if e.address == address {
logrus.Fatalf("node has been delete from the cluster. Backup and delete ${datadir}/server/db if you like to rejoin the node")
return errors.New("node has been deleted from the cluster")
}
logrus.Infof("Removing name=%s id=%d address=%s from etcd", member.Name, member.ID, address)
_, err := e.client.MemberRemove(ctx, member.ID)
@ -802,3 +811,35 @@ func snapshotRetention(retention int, snapshotDir string) error {
})
return os.Remove(filepath.Join(snapshotDir, snapshotFiles[0].Name()))
}
// backupDirWithRetention will move the dir to a backup dir
// and will keep only maxBackupRetention of dirs.
func backupDirWithRetention(dir string, maxBackupRetention int) (string, error) {
backupDir := dir + "-backup-" + strconv.Itoa(int(time.Now().Unix()))
if _, err := os.Stat(dir); err != nil {
return "", nil
}
files, err := ioutil.ReadDir(filepath.Dir(dir))
if err != nil {
return "", err
}
sort.Slice(files, func(i, j int) bool {
return files[i].ModTime().After(files[j].ModTime())
})
count := 0
for _, f := range files {
if strings.HasPrefix(f.Name(), filepath.Base(dir)+"-backup") && f.IsDir() {
count++
if count > maxBackupRetention {
if err := os.RemoveAll(filepath.Join(filepath.Dir(dir), f.Name())); err != nil {
return "", err
}
}
}
}
// move the directory to a temp path
if err := os.Rename(dir, backupDir); err != nil {
return "", err
}
return backupDir, nil
}

View File

@ -39,6 +39,7 @@ var (
ErrKeyNotFound = errors.New("etcdserver: key not found")
ErrCorrupt = errors.New("etcdserver: corrupt cluster")
ErrBadLeaderTransferee = errors.New("etcdserver: bad leader transferee")
ErrMemberRemoved = errors.New("etcdserver: the member has been permanently removed from the cluster")
)
type DiscoveryError struct {

View File

@ -1388,7 +1388,7 @@ func (s *EtcdServer) applyEntries(ep *etcdProgress, apply *apply) {
}
var shouldstop bool
if ep.appliedt, ep.appliedi, shouldstop = s.apply(ents, &ep.confState); shouldstop {
go s.stopWithDelay(10*100*time.Millisecond, fmt.Errorf("the member has been permanently removed from the cluster"))
go s.stopWithDelay(10*100*time.Millisecond, ErrMemberRemoved)
}
}
@ -1551,6 +1551,8 @@ func (s *EtcdServer) stopWithDelay(d time.Duration, err error) {
// when the server is stopped.
func (s *EtcdServer) StopNotify() <-chan struct{} { return s.done }
func (s *EtcdServer) ErrNotify() <-chan error { return s.errorc }
func (s *EtcdServer) SelfStats() []byte { return s.stats.JSON() }
func (s *EtcdServer) LeaderStats() []byte {

4
vendor/modules.txt vendored
View File

@ -1007,7 +1007,7 @@ github.com/willf/bitset
github.com/xiang90/probing
# go.etcd.io/bbolt v1.3.5
go.etcd.io/bbolt
# go.etcd.io/etcd v0.5.0-alpha.5.0.20200819165624-17cef6e3e9d5 => github.com/k3s-io/etcd v0.0.0-20200911210206-f8fde3601008
# go.etcd.io/etcd v0.5.0-alpha.5.0.20200819165624-17cef6e3e9d5 => github.com/k3s-io/etcd v0.5.0-alpha.5.0.20201204203317-251ee41536d8
## explicit
go.etcd.io/etcd/auth
go.etcd.io/etcd/auth/authpb
@ -2949,7 +2949,7 @@ vbom.ml/util/sortorder
# github.com/matryer/moq => github.com/rancher/moq v0.0.0-20190404221404-ee5226d43009
# github.com/opencontainers/runc => github.com/opencontainers/runc v1.0.0-rc92
# github.com/opencontainers/runtime-spec => github.com/opencontainers/runtime-spec v1.0.3-0.20200728170252-4d89ac9fbff6
# go.etcd.io/etcd => github.com/k3s-io/etcd v0.0.0-20200911210206-f8fde3601008
# go.etcd.io/etcd => github.com/k3s-io/etcd v0.5.0-alpha.5.0.20201204203317-251ee41536d8
# golang.org/x/crypto => golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2
# golang.org/x/net => golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7
# golang.org/x/sys => golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456