From b8b12796fc0e869988d3e8db96771b63bd716c08 Mon Sep 17 00:00:00 2001 From: Zhen Wang Date: Tue, 30 Oct 2018 11:25:31 -0700 Subject: [PATCH] Delete node lease if the corresponding node is deleted --- pkg/kubelet/nodelease/BUILD | 3 + pkg/kubelet/nodelease/controller.go | 54 ++++++-- pkg/kubelet/nodelease/controller_test.go | 118 ++++++++++++++-- test/e2e/lifecycle/BUILD | 1 + test/e2e/lifecycle/node_lease.go | 163 +++++++++++++++++++++++ 5 files changed, 316 insertions(+), 23 deletions(-) create mode 100644 test/e2e/lifecycle/node_lease.go diff --git a/pkg/kubelet/nodelease/BUILD b/pkg/kubelet/nodelease/BUILD index 2fe0520311..923c33e77f 100644 --- a/pkg/kubelet/nodelease/BUILD +++ b/pkg/kubelet/nodelease/BUILD @@ -25,10 +25,13 @@ go_test( embed = [":go_default_library"], deps = [ "//staging/src/k8s.io/api/coordination/v1beta1:go_default_library", + "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/api/equality:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/types:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/clock:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/diff:go_default_library", + "//staging/src/k8s.io/client-go/kubernetes/fake:go_default_library", "//vendor/k8s.io/utils/pointer:go_default_library", ], ) diff --git a/pkg/kubelet/nodelease/controller.go b/pkg/kubelet/nodelease/controller.go index 5e587f6c59..fa0d833854 100644 --- a/pkg/kubelet/nodelease/controller.go +++ b/pkg/kubelet/nodelease/controller.go @@ -52,7 +52,8 @@ type Controller interface { } type controller struct { - client coordclientset.LeaseInterface + client clientset.Interface + leaseClient coordclientset.LeaseInterface holderIdentity string leaseDurationSeconds int32 renewInterval time.Duration @@ -67,7 +68,8 @@ func NewController(clock clock.Clock, client clientset.Interface, holderIdentity leaseClient = client.CoordinationV1beta1().Leases(corev1.NamespaceNodeLease) } return &controller{ - client: leaseClient, + client: client, + leaseClient: leaseClient, holderIdentity: holderIdentity, leaseDurationSeconds: leaseDurationSeconds, renewInterval: renewInterval, @@ -78,8 +80,8 @@ func NewController(clock clock.Clock, client clientset.Interface, holderIdentity // Run runs the controller func (c *controller) Run(stopCh <-chan struct{}) { - if c.client == nil { - glog.Infof("node lease controller has nil client, will not claim or renew leases") + if c.leaseClient == nil { + glog.Infof("node lease controller has nil lease client, will not claim or renew leases") return } wait.Until(c.sync, c.renewInterval, stopCh) @@ -120,10 +122,10 @@ func (c *controller) backoffEnsureLease() (*coordv1beta1.Lease, bool) { // ensureLease creates the lease if it does not exist. Returns the lease and // a bool (true if this call created the lease), or any error that occurs. func (c *controller) ensureLease() (*coordv1beta1.Lease, bool, error) { - lease, err := c.client.Get(c.holderIdentity, metav1.GetOptions{}) + lease, err := c.leaseClient.Get(c.holderIdentity, metav1.GetOptions{}) if apierrors.IsNotFound(err) { // lease does not exist, create it - lease, err := c.client.Create(c.newLease(nil)) + lease, err := c.leaseClient.Create(c.newLease(nil)) if err != nil { return nil, false, err } @@ -140,7 +142,7 @@ func (c *controller) ensureLease() (*coordv1beta1.Lease, bool, error) { // call this once you're sure the lease has been created func (c *controller) retryUpdateLease(base *coordv1beta1.Lease) { for i := 0; i < maxUpdateRetries; i++ { - _, err := c.client.Update(c.newLease(base)) + _, err := c.leaseClient.Update(c.newLease(base)) if err == nil { return } @@ -155,18 +157,44 @@ func (c *controller) retryUpdateLease(base *coordv1beta1.Lease) { // newLease constructs a new lease if base is nil, or returns a copy of base // with desired state asserted on the copy. func (c *controller) newLease(base *coordv1beta1.Lease) *coordv1beta1.Lease { + // Use the bare minimum set of fields; other fields exist for debugging/legacy, + // but we don't need to make node heartbeats more complicated by using them. var lease *coordv1beta1.Lease if base == nil { - lease = &coordv1beta1.Lease{} + lease = &coordv1beta1.Lease{ + ObjectMeta: metav1.ObjectMeta{ + Name: c.holderIdentity, + Namespace: corev1.NamespaceNodeLease, + }, + Spec: coordv1beta1.LeaseSpec{ + HolderIdentity: pointer.StringPtr(c.holderIdentity), + LeaseDurationSeconds: pointer.Int32Ptr(c.leaseDurationSeconds), + }, + } } else { lease = base.DeepCopy() } - // Use the bare minimum set of fields; other fields exist for debugging/legacy, - // but we don't need to make node heartbeats more complicated by using them. - lease.Name = c.holderIdentity - lease.Spec.HolderIdentity = pointer.StringPtr(c.holderIdentity) - lease.Spec.LeaseDurationSeconds = pointer.Int32Ptr(c.leaseDurationSeconds) lease.Spec.RenewTime = &metav1.MicroTime{Time: c.clock.Now()} + + // Setting owner reference needs node's UID. Note that it is different from + // kubelet.nodeRef.UID. When lease is initially created, it is possible that + // the connection between master and node is not ready yet. So try to set + // owner reference every time when renewing the lease, until successful. + if lease.OwnerReferences == nil || len(lease.OwnerReferences) == 0 { + if node, err := c.client.CoreV1().Nodes().Get(c.holderIdentity, metav1.GetOptions{}); err == nil { + lease.OwnerReferences = []metav1.OwnerReference{ + { + APIVersion: corev1.SchemeGroupVersion.WithKind("Node").Version, + Kind: corev1.SchemeGroupVersion.WithKind("Node").Kind, + Name: c.holderIdentity, + UID: node.UID, + }, + } + } else { + glog.Errorf("failed to get node %q when trying to set owner ref to the node lease: %v", c.holderIdentity, err) + } + } + return lease } diff --git a/pkg/kubelet/nodelease/controller_test.go b/pkg/kubelet/nodelease/controller_test.go index d48d3d7e7c..651fdae781 100644 --- a/pkg/kubelet/nodelease/controller_test.go +++ b/pkg/kubelet/nodelease/controller_test.go @@ -21,15 +21,24 @@ import ( "time" coordv1beta1 "k8s.io/api/coordination/v1beta1" + corev1 "k8s.io/api/core/v1" apiequality "k8s.io/apimachinery/pkg/api/equality" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/clock" "k8s.io/apimachinery/pkg/util/diff" + "k8s.io/client-go/kubernetes/fake" "k8s.io/utils/pointer" ) func TestNewLease(t *testing.T) { fakeClock := clock.NewFakeClock(time.Now()) + node := &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "foo", + UID: types.UID("foo-uid"), + }, + } cases := []struct { desc string controller *controller @@ -37,47 +46,136 @@ func TestNewLease(t *testing.T) { expect *coordv1beta1.Lease }{ { - desc: "nil base", + desc: "nil base without node", controller: &controller{ - holderIdentity: "foo", + client: fake.NewSimpleClientset(), + holderIdentity: node.Name, leaseDurationSeconds: 10, clock: fakeClock, }, base: nil, expect: &coordv1beta1.Lease{ ObjectMeta: metav1.ObjectMeta{ - Name: "foo", + Name: node.Name, + Namespace: corev1.NamespaceNodeLease, }, Spec: coordv1beta1.LeaseSpec{ - HolderIdentity: pointer.StringPtr("foo"), + HolderIdentity: pointer.StringPtr(node.Name), LeaseDurationSeconds: pointer.Int32Ptr(10), RenewTime: &metav1.MicroTime{Time: fakeClock.Now()}, }, }, }, { - desc: "non-nil base renew time is updated", + desc: "nil base with node", controller: &controller{ - holderIdentity: "foo", + client: fake.NewSimpleClientset(node), + holderIdentity: node.Name, + leaseDurationSeconds: 10, + clock: fakeClock, + }, + base: nil, + expect: &coordv1beta1.Lease{ + ObjectMeta: metav1.ObjectMeta{ + Name: node.Name, + Namespace: corev1.NamespaceNodeLease, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: corev1.SchemeGroupVersion.WithKind("Node").Version, + Kind: corev1.SchemeGroupVersion.WithKind("Node").Kind, + Name: node.Name, + UID: node.UID, + }, + }, + }, + Spec: coordv1beta1.LeaseSpec{ + HolderIdentity: pointer.StringPtr(node.Name), + LeaseDurationSeconds: pointer.Int32Ptr(10), + RenewTime: &metav1.MicroTime{Time: fakeClock.Now()}, + }, + }, + }, + { + desc: "non-nil base without owner ref, renew time is updated", + controller: &controller{ + client: fake.NewSimpleClientset(node), + holderIdentity: node.Name, leaseDurationSeconds: 10, clock: fakeClock, }, base: &coordv1beta1.Lease{ ObjectMeta: metav1.ObjectMeta{ - Name: "foo", + Name: node.Name, + Namespace: corev1.NamespaceNodeLease, }, Spec: coordv1beta1.LeaseSpec{ - HolderIdentity: pointer.StringPtr("foo"), + HolderIdentity: pointer.StringPtr(node.Name), LeaseDurationSeconds: pointer.Int32Ptr(10), RenewTime: &metav1.MicroTime{Time: fakeClock.Now().Add(-10 * time.Second)}, }, }, expect: &coordv1beta1.Lease{ ObjectMeta: metav1.ObjectMeta{ - Name: "foo", + Name: node.Name, + Namespace: corev1.NamespaceNodeLease, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: corev1.SchemeGroupVersion.WithKind("Node").Version, + Kind: corev1.SchemeGroupVersion.WithKind("Node").Kind, + Name: node.Name, + UID: node.UID, + }, + }, }, Spec: coordv1beta1.LeaseSpec{ - HolderIdentity: pointer.StringPtr("foo"), + HolderIdentity: pointer.StringPtr(node.Name), + LeaseDurationSeconds: pointer.Int32Ptr(10), + RenewTime: &metav1.MicroTime{Time: fakeClock.Now()}, + }, + }, + }, + { + desc: "non-nil base with owner ref, renew time is updated", + controller: &controller{ + client: fake.NewSimpleClientset(node), + holderIdentity: node.Name, + leaseDurationSeconds: 10, + clock: fakeClock, + }, + base: &coordv1beta1.Lease{ + ObjectMeta: metav1.ObjectMeta{ + Name: node.Name, + Namespace: corev1.NamespaceNodeLease, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: corev1.SchemeGroupVersion.WithKind("Node").Version, + Kind: corev1.SchemeGroupVersion.WithKind("Node").Kind, + Name: node.Name, + UID: node.UID, + }, + }, + }, + Spec: coordv1beta1.LeaseSpec{ + HolderIdentity: pointer.StringPtr(node.Name), + LeaseDurationSeconds: pointer.Int32Ptr(10), + RenewTime: &metav1.MicroTime{Time: fakeClock.Now().Add(-10 * time.Second)}, + }, + }, + expect: &coordv1beta1.Lease{ + ObjectMeta: metav1.ObjectMeta{ + Name: node.Name, + Namespace: corev1.NamespaceNodeLease, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: corev1.SchemeGroupVersion.WithKind("Node").Version, + Kind: corev1.SchemeGroupVersion.WithKind("Node").Kind, + Name: node.Name, + UID: node.UID, + }, + }, + }, + Spec: coordv1beta1.LeaseSpec{ + HolderIdentity: pointer.StringPtr(node.Name), LeaseDurationSeconds: pointer.Int32Ptr(10), RenewTime: &metav1.MicroTime{Time: fakeClock.Now()}, }, diff --git a/test/e2e/lifecycle/BUILD b/test/e2e/lifecycle/BUILD index 89585641a6..24f1c8c9fb 100644 --- a/test/e2e/lifecycle/BUILD +++ b/test/e2e/lifecycle/BUILD @@ -13,6 +13,7 @@ go_library( "framework.go", "ha_master.go", "kubelet_security.go", + "node_lease.go", "reboot.go", "resize_nodes.go", "restart.go", diff --git a/test/e2e/lifecycle/node_lease.go b/test/e2e/lifecycle/node_lease.go new file mode 100644 index 0000000000..9bd6c6b63f --- /dev/null +++ b/test/e2e/lifecycle/node_lease.go @@ -0,0 +1,163 @@ +/* +Copyright 2018 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package lifecycle + +import ( + "fmt" + "strings" + "time" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/kubernetes/test/e2e/framework" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" +) + +var _ = SIGDescribe("[Feature:NodeLease][NodeAlphaFeature:NodeLease][Disruptive]", func() { + f := framework.NewDefaultFramework("node-lease-test") + var systemPodsNo int32 + var c clientset.Interface + var ns string + var group string + + BeforeEach(func() { + c = f.ClientSet + ns = f.Namespace.Name + systemPods, err := framework.GetPodsInNamespace(c, ns, map[string]string{}) + Expect(err).To(BeNil()) + systemPodsNo = int32(len(systemPods)) + if strings.Index(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") >= 0 { + framework.Failf("Test dose not support cluster setup with more than one MIG: %s", framework.TestContext.CloudConfig.NodeInstanceGroup) + } else { + group = framework.TestContext.CloudConfig.NodeInstanceGroup + } + }) + + Describe("NodeLease deletion", func() { + var skipped bool + + BeforeEach(func() { + skipped = true + framework.SkipUnlessProviderIs("gce", "gke", "aws") + framework.SkipUnlessNodeCountIsAtLeast(2) + skipped = false + }) + + AfterEach(func() { + if skipped { + return + } + + By("restoring the original node instance group size") + if err := framework.ResizeGroup(group, int32(framework.TestContext.CloudConfig.NumNodes)); err != nil { + framework.Failf("Couldn't restore the original node instance group size: %v", err) + } + // In GKE, our current tunneling setup has the potential to hold on to a broken tunnel (from a + // rebooted/deleted node) for up to 5 minutes before all tunnels are dropped and recreated. + // Most tests make use of some proxy feature to verify functionality. So, if a reboot test runs + // right before a test that tries to get logs, for example, we may get unlucky and try to use a + // closed tunnel to a node that was recently rebooted. There's no good way to framework.Poll for proxies + // being closed, so we sleep. + // + // TODO(cjcullen) reduce this sleep (#19314) + if framework.ProviderIs("gke") { + By("waiting 5 minutes for all dead tunnels to be dropped") + time.Sleep(5 * time.Minute) + } + if err := framework.WaitForGroupSize(group, int32(framework.TestContext.CloudConfig.NumNodes)); err != nil { + framework.Failf("Couldn't restore the original node instance group size: %v", err) + } + + if err := framework.WaitForReadyNodes(c, framework.TestContext.CloudConfig.NumNodes, 10*time.Minute); err != nil { + framework.Failf("Couldn't restore the original cluster size: %v", err) + } + // Many e2e tests assume that the cluster is fully healthy before they start. Wait until + // the cluster is restored to health. + By("waiting for system pods to successfully restart") + err := framework.WaitForPodsRunningReady(c, metav1.NamespaceSystem, systemPodsNo, 0, framework.PodReadyBeforeTimeout, map[string]string{}) + Expect(err).To(BeNil()) + }) + + It("node lease should be deleted when corresponding node is deleted", func() { + leaseClient := c.CoordinationV1beta1().Leases(corev1.NamespaceNodeLease) + err := framework.WaitForReadyNodes(c, framework.TestContext.CloudConfig.NumNodes, 10*time.Minute) + Expect(err).To(BeNil()) + + By("verify node lease exists for every nodes") + originalNodes := framework.GetReadySchedulableNodesOrDie(c) + Expect(len(originalNodes.Items)).To(Equal(framework.TestContext.CloudConfig.NumNodes)) + + Eventually(func() error { + pass := true + for _, node := range originalNodes.Items { + if _, err := leaseClient.Get(node.ObjectMeta.Name, metav1.GetOptions{}); err != nil { + framework.Logf("Try to get lease of node %s, but got error: %v", node.ObjectMeta.Name, err) + pass = false + } + } + if pass { + return nil + } + return fmt.Errorf("some node lease is not ready") + }, 1*time.Minute, 5*time.Second).Should(BeNil()) + + targetNumNodes := int32(framework.TestContext.CloudConfig.NumNodes - 1) + By(fmt.Sprintf("decreasing cluster size to %d", targetNumNodes)) + err = framework.ResizeGroup(group, targetNumNodes) + Expect(err).To(BeNil()) + err = framework.WaitForGroupSize(group, targetNumNodes) + Expect(err).To(BeNil()) + err = framework.WaitForReadyNodes(c, framework.TestContext.CloudConfig.NumNodes-1, 10*time.Minute) + Expect(err).To(BeNil()) + targetNodes := framework.GetReadySchedulableNodesOrDie(c) + Expect(len(targetNodes.Items)).To(Equal(int(targetNumNodes))) + + By("verify node lease is deleted for the deleted node") + var deletedNodeName string + for _, originalNode := range originalNodes.Items { + originalNodeName := originalNode.ObjectMeta.Name + for _, targetNode := range targetNodes.Items { + if originalNodeName == targetNode.ObjectMeta.Name { + continue + } + } + deletedNodeName = originalNodeName + break + } + Expect(deletedNodeName).NotTo(Equal("")) + Eventually(func() error { + if _, err := leaseClient.Get(deletedNodeName, metav1.GetOptions{}); err == nil { + return fmt.Errorf("node lease is not deleted yet for node %q", deletedNodeName) + } + return nil + }, 1*time.Minute, 5*time.Second).Should(BeNil()) + + By("verify node leases still exist for remaining nodes") + Eventually(func() error { + for _, node := range targetNodes.Items { + if _, err := leaseClient.Get(node.ObjectMeta.Name, metav1.GetOptions{}); err != nil { + return err + } + } + return nil + }, 1*time.Minute, 5*time.Second).Should(BeNil()) + }) + }) +})