Merge pull request #41896 from kevin-wangzefeng/daemonset-infinite-default-toleration-seconds

Automatic merge from submit-queue (batch tested with PRs 40932, 41896, 41815, 41309, 41628)

Make DaemonSets survive taint-based evictions when nodes turn unreachable/notReady

**What this PR does / why we need it**:
DaemonPods shouldn't be deleted by NodeController in case of Node problems.
This PR is to add infinite tolerations for Unreachable/NotReady NoExecute Taints, so that they won't be deleted by NodeController when a node goes unreachable/notReady.

**Which issue this PR fixes** :
fixes #41738 
Related PR: #41133


**Special notes for your reviewer**:

**Release note**:

```release-note
Make DaemonSets survive taint-based evictions when nodes turn unreachable/notReady.
```
pull/6/head
Kubernetes Submit Queue 2017-02-26 08:09:56 -08:00 committed by GitHub
commit dd29e6cdc7
2 changed files with 74 additions and 0 deletions

View File

@ -753,6 +753,32 @@ func (dsc *DaemonSetsController) nodeShouldRunDaemonPod(node *v1.Node, ds *exten
newPod.Namespace = ds.Namespace
newPod.Spec.NodeName = node.Name
// DaemonSet pods shouldn't be deleted by NodeController in case of node problems.
// Add infinite toleration for taint notReady:NoExecute here
// to survive taint-based eviction enforced by NodeController
// when node turns not ready.
_, err = v1.AddOrUpdateTolerationInPod(newPod, &v1.Toleration{
Key: metav1.TaintNodeNotReady,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoExecute,
})
if err != nil {
return false, false, false, err
}
// DaemonSet pods shouldn't be deleted by NodeController in case of node problems.
// Add infinite toleration for taint unreachable:NoExecute here
// to survive taint-based eviction enforced by NodeController
// when node turns unreachable.
_, err = v1.AddOrUpdateTolerationInPod(newPod, &v1.Toleration{
Key: metav1.TaintNodeUnreachable,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoExecute,
})
if err != nil {
return false, false, false, err
}
pods := []*v1.Pod{}
podList, err := dsc.podLister.List(labels.Everything())

View File

@ -50,6 +50,20 @@ var (
noScheduleTaints = []v1.Taint{{Key: "dedicated", Value: "user1", Effect: "NoSchedule"}}
)
var (
nodeNotReady = []v1.Taint{{
Key: metav1.TaintNodeNotReady,
Effect: v1.TaintEffectNoExecute,
TimeAdded: metav1.Now(),
}}
nodeUnreachable = []v1.Taint{{
Key: metav1.TaintNodeUnreachable,
Effect: v1.TaintEffectNoExecute,
TimeAdded: metav1.Now(),
}}
)
func getKey(ds *extensions.DaemonSet, t *testing.T) string {
if key, err := controller.KeyFunc(ds); err != nil {
t.Errorf("Unexpected error getting key for ds %v: %v", ds.Name, err)
@ -742,6 +756,40 @@ func TestTaintedNodeDaemonLaunchesToleratePod(t *testing.T) {
syncAndValidateDaemonSets(t, manager, ds, podControl, 1, 0)
}
// DaemonSet should launch a pod on a not ready node with taint notReady:NoExecute.
func TestNotReadyNodeDaemonLaunchesPod(t *testing.T) {
manager, podControl, _ := newTestController()
node := newNode("tainted", nil)
setNodeTaint(node, nodeNotReady)
node.Status.Conditions = []v1.NodeCondition{
{Type: v1.NodeReady, Status: v1.ConditionFalse},
}
manager.nodeStore.Add(node)
ds := newDaemonSet("simple")
manager.dsStore.Add(ds)
syncAndValidateDaemonSets(t, manager, ds, podControl, 1, 0)
}
// DaemonSet should launch a pod on an unreachable node with taint unreachable:NoExecute.
func TestUnreachableNodeDaemonLaunchesPod(t *testing.T) {
manager, podControl, _ := newTestController()
node := newNode("tainted", nil)
setNodeTaint(node, nodeUnreachable)
node.Status.Conditions = []v1.NodeCondition{
{Type: v1.NodeReady, Status: v1.ConditionUnknown},
}
manager.nodeStore.Add(node)
ds := newDaemonSet("simple")
manager.dsStore.Add(ds)
syncAndValidateDaemonSets(t, manager, ds, podControl, 1, 0)
}
// DaemonSet should launch a pod on an untainted node when the pod has tolerations.
func TestNodeDaemonLaunchesToleratePod(t *testing.T) {
manager, podControl, _ := newTestController()