Merge pull request #3003 from galal-hussein/fix_etcd_only_nodes

Fix etcd only nodes
pull/3018/head
Hussein Galal 2021-03-02 02:16:02 +02:00 committed by GitHub
commit 1bf04b6a50
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 41 additions and 14 deletions

2
go.mod
View File

@ -93,7 +93,7 @@ require (
github.com/opencontainers/selinux v1.6.0
github.com/pierrec/lz4 v2.5.2+incompatible
github.com/pkg/errors v0.9.1
github.com/rancher/dynamiclistener v0.2.1
github.com/rancher/dynamiclistener v0.2.2
github.com/rancher/remotedialer v0.2.0
github.com/rancher/wrangler v0.6.1
github.com/rancher/wrangler-api v0.6.0

4
go.sum
View File

@ -768,8 +768,8 @@ github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40T
github.com/quobyte/api v0.1.8/go.mod h1:jL7lIHrmqQ7yh05OJ+eEEdHr0u/kmT1Ff9iHd+4H6VI=
github.com/rakelkar/gonetsh v0.0.0-20190930180311-e5c5ffe4bdf0 h1:iXE9kmlAqhusXxzkXictdNgWS7p4ZBnmv9SdyMgTf6E=
github.com/rakelkar/gonetsh v0.0.0-20190930180311-e5c5ffe4bdf0/go.mod h1:4XHkfaUj+URzGO9sohoAgt2V9Y8nIW7fugpu0E6gShk=
github.com/rancher/dynamiclistener v0.2.1 h1:QiY1jxs2TOLrKB04G36vE2ehEvPMPGiWp8zEHLKB1nE=
github.com/rancher/dynamiclistener v0.2.1/go.mod h1:9WusTANoiRr8cDWCTtf5txieulezHbpv4vhLADPp0zU=
github.com/rancher/dynamiclistener v0.2.2 h1:70dMwOr1sqb6mQqfU2nDb/fr5cv7HJjH+kFYzoxb8KU=
github.com/rancher/dynamiclistener v0.2.2/go.mod h1:9WusTANoiRr8cDWCTtf5txieulezHbpv4vhLADPp0zU=
github.com/rancher/moq v0.0.0-20190404221404-ee5226d43009/go.mod h1:wpITyDPTi/Na5h73XkbuEf2AP9fbgrIGqqxVzFhYD6U=
github.com/rancher/remotedialer v0.2.0 h1:xD7t3K6JYwTdAsxmGtTHQMkEkFgKouQ1foLxVW424Dc=
github.com/rancher/remotedialer v0.2.0/go.mod h1:tkU8ZvrR5lRgaKWaX71nAy6daeqvPFx/lJEnbW7tXSI=

View File

@ -267,8 +267,13 @@ func run(app *cli.Context, cfg *cmds.Server) error {
}
go func() {
if !serverConfig.ControlConfig.DisableAPIServer {
<-serverConfig.ControlConfig.Runtime.APIServerReady
logrus.Info("Kube API server is now running")
} else {
<-serverConfig.ControlConfig.Runtime.ETCDReady
logrus.Info("ETCD server is now running")
}
logrus.Info(version.Program + " is up and running")
if notifySocket != "" {
os.Setenv("NOTIFY_SOCKET", notifySocket)

View File

@ -12,6 +12,7 @@ import (
"github.com/rancher/k3s/pkg/cluster/managed"
"github.com/rancher/k3s/pkg/daemons/config"
"github.com/rancher/k3s/pkg/etcd"
"github.com/sirupsen/logrus"
)
type Cluster struct {
@ -41,7 +42,7 @@ func (c *Cluster) Start(ctx context.Context) (<-chan struct{}, error) {
defer close(ready)
// try to get /db/info urls first before attempting to use join url
clientURLs, _, err := etcd.ClientURLs(ctx, c.clientAccessInfo)
clientURLs, _, err := etcd.ClientURLs(ctx, c.clientAccessInfo, c.config.PrivateIP)
if err != nil {
return nil, err
}
@ -58,6 +59,12 @@ func (c *Cluster) Start(ctx context.Context) (<-chan struct{}, error) {
return nil, err
}
c.setupEtcdProxy(ctx, etcdProxy)
// remove etcd member if it exists
if err := c.managedDB.RemoveSelf(ctx); err != nil {
logrus.Warnf("Failed to remove this node from etcd members")
}
return ready, nil
}

View File

@ -23,6 +23,7 @@ type Driver interface {
EndpointName() string
Snapshot(ctx context.Context, config *config.Control) error
GetMembersClientURLs(ctx context.Context) ([]string, error)
RemoveSelf(ctx context.Context) error
}
func RegisterDriver(d Driver) {

View File

@ -87,5 +87,5 @@ func (h *handler) onRemove(key string, node *v1.Node) (*v1.Node, error) {
if !ok {
return node, nil
}
return node, h.etcd.removePeer(h.ctx, id, address)
return node, h.etcd.removePeer(h.ctx, id, address, false)
}

View File

@ -245,7 +245,7 @@ func (e *ETCD) Start(ctx context.Context, clientAccessInfo *clientaccess.Info) e
// join attempts to add a member to an existing cluster
func (e *ETCD) join(ctx context.Context, clientAccessInfo *clientaccess.Info) error {
clientURLs, memberList, err := ClientURLs(ctx, clientAccessInfo)
clientURLs, memberList, err := ClientURLs(ctx, clientAccessInfo, e.config.PrivateIP)
if err != nil {
return err
}
@ -524,7 +524,7 @@ func (e *ETCD) cluster(ctx context.Context, forceNew bool, options executor.Init
}
// removePeer removes a peer from the cluster. The peer ID and IP address must both match.
func (e *ETCD) removePeer(ctx context.Context, id, address string) error {
func (e *ETCD) removePeer(ctx context.Context, id, address string, removeSelf bool) error {
members, err := e.client.MemberList(ctx)
if err != nil {
return err
@ -540,7 +540,7 @@ func (e *ETCD) removePeer(ctx context.Context, id, address string) error {
return err
}
if u.Hostname() == address {
if e.address == address {
if e.address == address && !removeSelf {
return errors.New("node has been deleted from the cluster")
}
logrus.Infof("Removing name=%s id=%d address=%s from etcd", member.Name, member.ID, address)
@ -687,7 +687,7 @@ func (e *ETCD) setLearnerProgress(ctx context.Context, status *learnerProgress)
}
// clientURLs returns a list of all non-learner etcd cluster member client access URLs
func ClientURLs(ctx context.Context, clientAccessInfo *clientaccess.Info) ([]string, Members, error) {
func ClientURLs(ctx context.Context, clientAccessInfo *clientaccess.Info, selfIP string) ([]string, Members, error) {
var memberList Members
resp, err := clientaccess.Get("/db/info", clientAccessInfo)
if err != nil {
@ -697,13 +697,22 @@ func ClientURLs(ctx context.Context, clientAccessInfo *clientaccess.Info) ([]str
if err := json.Unmarshal(resp, &memberList); err != nil {
return nil, memberList, err
}
ip, err := GetAdvertiseAddress(selfIP)
if err != nil {
return nil, memberList, err
}
var clientURLs []string
members:
for _, member := range memberList.Members {
// excluding learner member from the client list
if member.IsLearner {
continue
}
for _, url := range member.ClientURLs {
if strings.Contains(url, ip) {
continue members
}
}
clientURLs = append(clientURLs, member.ClientURLs...)
}
return clientURLs, memberList, nil
@ -941,3 +950,8 @@ func (e *ETCD) GetMembersClientURLs(ctx context.Context) ([]string, error) {
}
return memberUrls, nil
}
// RemoveSelf will remove the member if it exists in the cluster
func (e *ETCD) RemoveSelf(ctx context.Context) error {
return e.removePeer(ctx, e.name, e.address, true)
}

View File

@ -32,7 +32,7 @@ func (m *memory) Get() (*v1.Secret, error) {
}
func (m *memory) Update(secret *v1.Secret) error {
if m.secret == nil || m.secret.ResourceVersion != secret.ResourceVersion {
if m.secret == nil || m.secret.ResourceVersion == "" || m.secret.ResourceVersion != secret.ResourceVersion {
if m.storage != nil {
if err := m.storage.Update(secret); err != nil {
return err

2
vendor/modules.txt vendored
View File

@ -842,7 +842,7 @@ github.com/prometheus/procfs/internal/util
# github.com/rakelkar/gonetsh v0.0.0-20190930180311-e5c5ffe4bdf0
github.com/rakelkar/gonetsh/netroute
github.com/rakelkar/gonetsh/netsh
# github.com/rancher/dynamiclistener v0.2.1
# github.com/rancher/dynamiclistener v0.2.2
## explicit
github.com/rancher/dynamiclistener
github.com/rancher/dynamiclistener/cert