mirror of https://github.com/k3s-io/k3s
Merge pull request #70034 from wangzhen127/cleanup-node-lease
Delete node lease if the corresponding node is deletedpull/58/head
commit
be71fbe511
|
@ -25,10 +25,13 @@ go_test(
|
|||
embed = [":go_default_library"],
|
||||
deps = [
|
||||
"//staging/src/k8s.io/api/coordination/v1beta1:go_default_library",
|
||||
"//staging/src/k8s.io/api/core/v1:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/api/equality:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/util/clock:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/util/diff:go_default_library",
|
||||
"//staging/src/k8s.io/client-go/kubernetes/fake:go_default_library",
|
||||
"//vendor/k8s.io/utils/pointer:go_default_library",
|
||||
],
|
||||
)
|
||||
|
|
|
@ -52,7 +52,8 @@ type Controller interface {
|
|||
}
|
||||
|
||||
type controller struct {
|
||||
client coordclientset.LeaseInterface
|
||||
client clientset.Interface
|
||||
leaseClient coordclientset.LeaseInterface
|
||||
holderIdentity string
|
||||
leaseDurationSeconds int32
|
||||
renewInterval time.Duration
|
||||
|
@ -67,7 +68,8 @@ func NewController(clock clock.Clock, client clientset.Interface, holderIdentity
|
|||
leaseClient = client.CoordinationV1beta1().Leases(corev1.NamespaceNodeLease)
|
||||
}
|
||||
return &controller{
|
||||
client: leaseClient,
|
||||
client: client,
|
||||
leaseClient: leaseClient,
|
||||
holderIdentity: holderIdentity,
|
||||
leaseDurationSeconds: leaseDurationSeconds,
|
||||
renewInterval: renewInterval,
|
||||
|
@ -78,8 +80,8 @@ func NewController(clock clock.Clock, client clientset.Interface, holderIdentity
|
|||
|
||||
// Run runs the controller
|
||||
func (c *controller) Run(stopCh <-chan struct{}) {
|
||||
if c.client == nil {
|
||||
glog.Infof("node lease controller has nil client, will not claim or renew leases")
|
||||
if c.leaseClient == nil {
|
||||
glog.Infof("node lease controller has nil lease client, will not claim or renew leases")
|
||||
return
|
||||
}
|
||||
wait.Until(c.sync, c.renewInterval, stopCh)
|
||||
|
@ -120,10 +122,10 @@ func (c *controller) backoffEnsureLease() (*coordv1beta1.Lease, bool) {
|
|||
// ensureLease creates the lease if it does not exist. Returns the lease and
|
||||
// a bool (true if this call created the lease), or any error that occurs.
|
||||
func (c *controller) ensureLease() (*coordv1beta1.Lease, bool, error) {
|
||||
lease, err := c.client.Get(c.holderIdentity, metav1.GetOptions{})
|
||||
lease, err := c.leaseClient.Get(c.holderIdentity, metav1.GetOptions{})
|
||||
if apierrors.IsNotFound(err) {
|
||||
// lease does not exist, create it
|
||||
lease, err := c.client.Create(c.newLease(nil))
|
||||
lease, err := c.leaseClient.Create(c.newLease(nil))
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
|
@ -140,7 +142,7 @@ func (c *controller) ensureLease() (*coordv1beta1.Lease, bool, error) {
|
|||
// call this once you're sure the lease has been created
|
||||
func (c *controller) retryUpdateLease(base *coordv1beta1.Lease) {
|
||||
for i := 0; i < maxUpdateRetries; i++ {
|
||||
_, err := c.client.Update(c.newLease(base))
|
||||
_, err := c.leaseClient.Update(c.newLease(base))
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
|
@ -155,18 +157,44 @@ func (c *controller) retryUpdateLease(base *coordv1beta1.Lease) {
|
|||
// newLease constructs a new lease if base is nil, or returns a copy of base
|
||||
// with desired state asserted on the copy.
|
||||
func (c *controller) newLease(base *coordv1beta1.Lease) *coordv1beta1.Lease {
|
||||
// Use the bare minimum set of fields; other fields exist for debugging/legacy,
|
||||
// but we don't need to make node heartbeats more complicated by using them.
|
||||
var lease *coordv1beta1.Lease
|
||||
if base == nil {
|
||||
lease = &coordv1beta1.Lease{}
|
||||
lease = &coordv1beta1.Lease{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: c.holderIdentity,
|
||||
Namespace: corev1.NamespaceNodeLease,
|
||||
},
|
||||
Spec: coordv1beta1.LeaseSpec{
|
||||
HolderIdentity: pointer.StringPtr(c.holderIdentity),
|
||||
LeaseDurationSeconds: pointer.Int32Ptr(c.leaseDurationSeconds),
|
||||
},
|
||||
}
|
||||
} else {
|
||||
lease = base.DeepCopy()
|
||||
}
|
||||
// Use the bare minimum set of fields; other fields exist for debugging/legacy,
|
||||
// but we don't need to make node heartbeats more complicated by using them.
|
||||
lease.Name = c.holderIdentity
|
||||
lease.Spec.HolderIdentity = pointer.StringPtr(c.holderIdentity)
|
||||
lease.Spec.LeaseDurationSeconds = pointer.Int32Ptr(c.leaseDurationSeconds)
|
||||
lease.Spec.RenewTime = &metav1.MicroTime{Time: c.clock.Now()}
|
||||
|
||||
// Setting owner reference needs node's UID. Note that it is different from
|
||||
// kubelet.nodeRef.UID. When lease is initially created, it is possible that
|
||||
// the connection between master and node is not ready yet. So try to set
|
||||
// owner reference every time when renewing the lease, until successful.
|
||||
if lease.OwnerReferences == nil || len(lease.OwnerReferences) == 0 {
|
||||
if node, err := c.client.CoreV1().Nodes().Get(c.holderIdentity, metav1.GetOptions{}); err == nil {
|
||||
lease.OwnerReferences = []metav1.OwnerReference{
|
||||
{
|
||||
APIVersion: corev1.SchemeGroupVersion.WithKind("Node").Version,
|
||||
Kind: corev1.SchemeGroupVersion.WithKind("Node").Kind,
|
||||
Name: c.holderIdentity,
|
||||
UID: node.UID,
|
||||
},
|
||||
}
|
||||
} else {
|
||||
glog.Errorf("failed to get node %q when trying to set owner ref to the node lease: %v", c.holderIdentity, err)
|
||||
}
|
||||
}
|
||||
|
||||
return lease
|
||||
}
|
||||
|
||||
|
|
|
@ -21,15 +21,24 @@ import (
|
|||
"time"
|
||||
|
||||
coordv1beta1 "k8s.io/api/coordination/v1beta1"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
apiequality "k8s.io/apimachinery/pkg/api/equality"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apimachinery/pkg/util/clock"
|
||||
"k8s.io/apimachinery/pkg/util/diff"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
"k8s.io/utils/pointer"
|
||||
)
|
||||
|
||||
func TestNewLease(t *testing.T) {
|
||||
fakeClock := clock.NewFakeClock(time.Now())
|
||||
node := &corev1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "foo",
|
||||
UID: types.UID("foo-uid"),
|
||||
},
|
||||
}
|
||||
cases := []struct {
|
||||
desc string
|
||||
controller *controller
|
||||
|
@ -37,47 +46,136 @@ func TestNewLease(t *testing.T) {
|
|||
expect *coordv1beta1.Lease
|
||||
}{
|
||||
{
|
||||
desc: "nil base",
|
||||
desc: "nil base without node",
|
||||
controller: &controller{
|
||||
holderIdentity: "foo",
|
||||
client: fake.NewSimpleClientset(),
|
||||
holderIdentity: node.Name,
|
||||
leaseDurationSeconds: 10,
|
||||
clock: fakeClock,
|
||||
},
|
||||
base: nil,
|
||||
expect: &coordv1beta1.Lease{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "foo",
|
||||
Name: node.Name,
|
||||
Namespace: corev1.NamespaceNodeLease,
|
||||
},
|
||||
Spec: coordv1beta1.LeaseSpec{
|
||||
HolderIdentity: pointer.StringPtr("foo"),
|
||||
HolderIdentity: pointer.StringPtr(node.Name),
|
||||
LeaseDurationSeconds: pointer.Int32Ptr(10),
|
||||
RenewTime: &metav1.MicroTime{Time: fakeClock.Now()},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "non-nil base renew time is updated",
|
||||
desc: "nil base with node",
|
||||
controller: &controller{
|
||||
holderIdentity: "foo",
|
||||
client: fake.NewSimpleClientset(node),
|
||||
holderIdentity: node.Name,
|
||||
leaseDurationSeconds: 10,
|
||||
clock: fakeClock,
|
||||
},
|
||||
base: nil,
|
||||
expect: &coordv1beta1.Lease{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: node.Name,
|
||||
Namespace: corev1.NamespaceNodeLease,
|
||||
OwnerReferences: []metav1.OwnerReference{
|
||||
{
|
||||
APIVersion: corev1.SchemeGroupVersion.WithKind("Node").Version,
|
||||
Kind: corev1.SchemeGroupVersion.WithKind("Node").Kind,
|
||||
Name: node.Name,
|
||||
UID: node.UID,
|
||||
},
|
||||
},
|
||||
},
|
||||
Spec: coordv1beta1.LeaseSpec{
|
||||
HolderIdentity: pointer.StringPtr(node.Name),
|
||||
LeaseDurationSeconds: pointer.Int32Ptr(10),
|
||||
RenewTime: &metav1.MicroTime{Time: fakeClock.Now()},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "non-nil base without owner ref, renew time is updated",
|
||||
controller: &controller{
|
||||
client: fake.NewSimpleClientset(node),
|
||||
holderIdentity: node.Name,
|
||||
leaseDurationSeconds: 10,
|
||||
clock: fakeClock,
|
||||
},
|
||||
base: &coordv1beta1.Lease{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "foo",
|
||||
Name: node.Name,
|
||||
Namespace: corev1.NamespaceNodeLease,
|
||||
},
|
||||
Spec: coordv1beta1.LeaseSpec{
|
||||
HolderIdentity: pointer.StringPtr("foo"),
|
||||
HolderIdentity: pointer.StringPtr(node.Name),
|
||||
LeaseDurationSeconds: pointer.Int32Ptr(10),
|
||||
RenewTime: &metav1.MicroTime{Time: fakeClock.Now().Add(-10 * time.Second)},
|
||||
},
|
||||
},
|
||||
expect: &coordv1beta1.Lease{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "foo",
|
||||
Name: node.Name,
|
||||
Namespace: corev1.NamespaceNodeLease,
|
||||
OwnerReferences: []metav1.OwnerReference{
|
||||
{
|
||||
APIVersion: corev1.SchemeGroupVersion.WithKind("Node").Version,
|
||||
Kind: corev1.SchemeGroupVersion.WithKind("Node").Kind,
|
||||
Name: node.Name,
|
||||
UID: node.UID,
|
||||
},
|
||||
},
|
||||
},
|
||||
Spec: coordv1beta1.LeaseSpec{
|
||||
HolderIdentity: pointer.StringPtr("foo"),
|
||||
HolderIdentity: pointer.StringPtr(node.Name),
|
||||
LeaseDurationSeconds: pointer.Int32Ptr(10),
|
||||
RenewTime: &metav1.MicroTime{Time: fakeClock.Now()},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "non-nil base with owner ref, renew time is updated",
|
||||
controller: &controller{
|
||||
client: fake.NewSimpleClientset(node),
|
||||
holderIdentity: node.Name,
|
||||
leaseDurationSeconds: 10,
|
||||
clock: fakeClock,
|
||||
},
|
||||
base: &coordv1beta1.Lease{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: node.Name,
|
||||
Namespace: corev1.NamespaceNodeLease,
|
||||
OwnerReferences: []metav1.OwnerReference{
|
||||
{
|
||||
APIVersion: corev1.SchemeGroupVersion.WithKind("Node").Version,
|
||||
Kind: corev1.SchemeGroupVersion.WithKind("Node").Kind,
|
||||
Name: node.Name,
|
||||
UID: node.UID,
|
||||
},
|
||||
},
|
||||
},
|
||||
Spec: coordv1beta1.LeaseSpec{
|
||||
HolderIdentity: pointer.StringPtr(node.Name),
|
||||
LeaseDurationSeconds: pointer.Int32Ptr(10),
|
||||
RenewTime: &metav1.MicroTime{Time: fakeClock.Now().Add(-10 * time.Second)},
|
||||
},
|
||||
},
|
||||
expect: &coordv1beta1.Lease{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: node.Name,
|
||||
Namespace: corev1.NamespaceNodeLease,
|
||||
OwnerReferences: []metav1.OwnerReference{
|
||||
{
|
||||
APIVersion: corev1.SchemeGroupVersion.WithKind("Node").Version,
|
||||
Kind: corev1.SchemeGroupVersion.WithKind("Node").Kind,
|
||||
Name: node.Name,
|
||||
UID: node.UID,
|
||||
},
|
||||
},
|
||||
},
|
||||
Spec: coordv1beta1.LeaseSpec{
|
||||
HolderIdentity: pointer.StringPtr(node.Name),
|
||||
LeaseDurationSeconds: pointer.Int32Ptr(10),
|
||||
RenewTime: &metav1.MicroTime{Time: fakeClock.Now()},
|
||||
},
|
||||
|
|
|
@ -13,6 +13,7 @@ go_library(
|
|||
"framework.go",
|
||||
"ha_master.go",
|
||||
"kubelet_security.go",
|
||||
"node_lease.go",
|
||||
"reboot.go",
|
||||
"resize_nodes.go",
|
||||
"restart.go",
|
||||
|
|
|
@ -0,0 +1,163 @@
|
|||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package lifecycle
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = SIGDescribe("[Feature:NodeLease][NodeAlphaFeature:NodeLease][Disruptive]", func() {
|
||||
f := framework.NewDefaultFramework("node-lease-test")
|
||||
var systemPodsNo int32
|
||||
var c clientset.Interface
|
||||
var ns string
|
||||
var group string
|
||||
|
||||
BeforeEach(func() {
|
||||
c = f.ClientSet
|
||||
ns = f.Namespace.Name
|
||||
systemPods, err := framework.GetPodsInNamespace(c, ns, map[string]string{})
|
||||
Expect(err).To(BeNil())
|
||||
systemPodsNo = int32(len(systemPods))
|
||||
if strings.Index(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") >= 0 {
|
||||
framework.Failf("Test dose not support cluster setup with more than one MIG: %s", framework.TestContext.CloudConfig.NodeInstanceGroup)
|
||||
} else {
|
||||
group = framework.TestContext.CloudConfig.NodeInstanceGroup
|
||||
}
|
||||
})
|
||||
|
||||
Describe("NodeLease deletion", func() {
|
||||
var skipped bool
|
||||
|
||||
BeforeEach(func() {
|
||||
skipped = true
|
||||
framework.SkipUnlessProviderIs("gce", "gke", "aws")
|
||||
framework.SkipUnlessNodeCountIsAtLeast(2)
|
||||
skipped = false
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
if skipped {
|
||||
return
|
||||
}
|
||||
|
||||
By("restoring the original node instance group size")
|
||||
if err := framework.ResizeGroup(group, int32(framework.TestContext.CloudConfig.NumNodes)); err != nil {
|
||||
framework.Failf("Couldn't restore the original node instance group size: %v", err)
|
||||
}
|
||||
// In GKE, our current tunneling setup has the potential to hold on to a broken tunnel (from a
|
||||
// rebooted/deleted node) for up to 5 minutes before all tunnels are dropped and recreated.
|
||||
// Most tests make use of some proxy feature to verify functionality. So, if a reboot test runs
|
||||
// right before a test that tries to get logs, for example, we may get unlucky and try to use a
|
||||
// closed tunnel to a node that was recently rebooted. There's no good way to framework.Poll for proxies
|
||||
// being closed, so we sleep.
|
||||
//
|
||||
// TODO(cjcullen) reduce this sleep (#19314)
|
||||
if framework.ProviderIs("gke") {
|
||||
By("waiting 5 minutes for all dead tunnels to be dropped")
|
||||
time.Sleep(5 * time.Minute)
|
||||
}
|
||||
if err := framework.WaitForGroupSize(group, int32(framework.TestContext.CloudConfig.NumNodes)); err != nil {
|
||||
framework.Failf("Couldn't restore the original node instance group size: %v", err)
|
||||
}
|
||||
|
||||
if err := framework.WaitForReadyNodes(c, framework.TestContext.CloudConfig.NumNodes, 10*time.Minute); err != nil {
|
||||
framework.Failf("Couldn't restore the original cluster size: %v", err)
|
||||
}
|
||||
// Many e2e tests assume that the cluster is fully healthy before they start. Wait until
|
||||
// the cluster is restored to health.
|
||||
By("waiting for system pods to successfully restart")
|
||||
err := framework.WaitForPodsRunningReady(c, metav1.NamespaceSystem, systemPodsNo, 0, framework.PodReadyBeforeTimeout, map[string]string{})
|
||||
Expect(err).To(BeNil())
|
||||
})
|
||||
|
||||
It("node lease should be deleted when corresponding node is deleted", func() {
|
||||
leaseClient := c.CoordinationV1beta1().Leases(corev1.NamespaceNodeLease)
|
||||
err := framework.WaitForReadyNodes(c, framework.TestContext.CloudConfig.NumNodes, 10*time.Minute)
|
||||
Expect(err).To(BeNil())
|
||||
|
||||
By("verify node lease exists for every nodes")
|
||||
originalNodes := framework.GetReadySchedulableNodesOrDie(c)
|
||||
Expect(len(originalNodes.Items)).To(Equal(framework.TestContext.CloudConfig.NumNodes))
|
||||
|
||||
Eventually(func() error {
|
||||
pass := true
|
||||
for _, node := range originalNodes.Items {
|
||||
if _, err := leaseClient.Get(node.ObjectMeta.Name, metav1.GetOptions{}); err != nil {
|
||||
framework.Logf("Try to get lease of node %s, but got error: %v", node.ObjectMeta.Name, err)
|
||||
pass = false
|
||||
}
|
||||
}
|
||||
if pass {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("some node lease is not ready")
|
||||
}, 1*time.Minute, 5*time.Second).Should(BeNil())
|
||||
|
||||
targetNumNodes := int32(framework.TestContext.CloudConfig.NumNodes - 1)
|
||||
By(fmt.Sprintf("decreasing cluster size to %d", targetNumNodes))
|
||||
err = framework.ResizeGroup(group, targetNumNodes)
|
||||
Expect(err).To(BeNil())
|
||||
err = framework.WaitForGroupSize(group, targetNumNodes)
|
||||
Expect(err).To(BeNil())
|
||||
err = framework.WaitForReadyNodes(c, framework.TestContext.CloudConfig.NumNodes-1, 10*time.Minute)
|
||||
Expect(err).To(BeNil())
|
||||
targetNodes := framework.GetReadySchedulableNodesOrDie(c)
|
||||
Expect(len(targetNodes.Items)).To(Equal(int(targetNumNodes)))
|
||||
|
||||
By("verify node lease is deleted for the deleted node")
|
||||
var deletedNodeName string
|
||||
for _, originalNode := range originalNodes.Items {
|
||||
originalNodeName := originalNode.ObjectMeta.Name
|
||||
for _, targetNode := range targetNodes.Items {
|
||||
if originalNodeName == targetNode.ObjectMeta.Name {
|
||||
continue
|
||||
}
|
||||
}
|
||||
deletedNodeName = originalNodeName
|
||||
break
|
||||
}
|
||||
Expect(deletedNodeName).NotTo(Equal(""))
|
||||
Eventually(func() error {
|
||||
if _, err := leaseClient.Get(deletedNodeName, metav1.GetOptions{}); err == nil {
|
||||
return fmt.Errorf("node lease is not deleted yet for node %q", deletedNodeName)
|
||||
}
|
||||
return nil
|
||||
}, 1*time.Minute, 5*time.Second).Should(BeNil())
|
||||
|
||||
By("verify node leases still exist for remaining nodes")
|
||||
Eventually(func() error {
|
||||
for _, node := range targetNodes.Items {
|
||||
if _, err := leaseClient.Get(node.ObjectMeta.Name, metav1.GetOptions{}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}, 1*time.Minute, 5*time.Second).Should(BeNil())
|
||||
})
|
||||
})
|
||||
})
|
Loading…
Reference in New Issue