mirror of https://github.com/k3s-io/k3s
Merge pull request #67036 from Huang-Wei/update-ds-pod-tolerations
update logic of adding default DS pod tolerationspull/564/head
commit
550f6200c5
|
@ -32,7 +32,6 @@ go_library(
|
|||
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/util/errors:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/util/json:go_default_library",
|
||||
|
|
|
@ -31,7 +31,6 @@ import (
|
|||
"k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
|
@ -1011,7 +1010,7 @@ func (dsc *DaemonSetsController) syncNodes(ds *apps.DaemonSet, podsToDelete, nod
|
|||
if err != nil {
|
||||
generation = nil
|
||||
}
|
||||
template := util.CreatePodTemplate(ds.Namespace, ds.Spec.Template, generation, hash)
|
||||
template := util.CreatePodTemplate(ds.Spec.Template, generation, hash)
|
||||
// Batch the pod creates. Batch sizes start at SlowStartInitialBatchSize
|
||||
// and double with each successful iteration in a kind of "slow start".
|
||||
// This handles attempts to start large numbers of pods that would
|
||||
|
@ -1299,12 +1298,11 @@ func (dsc *DaemonSetsController) simulate(newPod *v1.Pod, node *v1.Node, ds *app
|
|||
|
||||
for _, obj := range objects {
|
||||
// Ignore pods that belong to the daemonset when taking into account whether a daemonset should bind to a node.
|
||||
// TODO: replace this with metav1.IsControlledBy() in 1.12
|
||||
pod, ok := obj.(*v1.Pod)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if isControlledByDaemonSet(pod, ds.GetUID()) {
|
||||
if metav1.IsControlledBy(pod, ds) {
|
||||
continue
|
||||
}
|
||||
nodeInfo.AddPod(pod)
|
||||
|
@ -1420,7 +1418,7 @@ func NewPod(ds *apps.DaemonSet, nodeName string) *v1.Pod {
|
|||
newPod.Spec.NodeName = nodeName
|
||||
|
||||
// Added default tolerations for DaemonSet pods.
|
||||
util.AddOrUpdateDaemonPodTolerations(&newPod.Spec, kubelettypes.IsCriticalPod(newPod))
|
||||
util.AddOrUpdateDaemonPodTolerations(&newPod.Spec)
|
||||
|
||||
return newPod
|
||||
}
|
||||
|
@ -1523,15 +1521,6 @@ func (o podByCreationTimestampAndPhase) Less(i, j int) bool {
|
|||
return o[i].CreationTimestamp.Before(&o[j].CreationTimestamp)
|
||||
}
|
||||
|
||||
func isControlledByDaemonSet(p *v1.Pod, uuid types.UID) bool {
|
||||
for _, ref := range p.OwnerReferences {
|
||||
if ref.Controller != nil && *ref.Controller && ref.UID == uuid {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func failedPodsBackoffKey(ds *apps.DaemonSet, nodeName string) string {
|
||||
return fmt.Sprintf("%s/%d/%s", ds.UID, ds.Status.ObservedGeneration, nodeName)
|
||||
}
|
||||
|
|
|
@ -1691,41 +1691,7 @@ func setDaemonSetToleration(ds *apps.DaemonSet, tolerations []v1.Toleration) {
|
|||
ds.Spec.Template.Spec.Tolerations = tolerations
|
||||
}
|
||||
|
||||
// DaemonSet should launch a critical pod even when the node with OutOfDisk taints.
|
||||
// TODO(#48843) OutOfDisk taints will be removed in 1.10
|
||||
func TestTaintOutOfDiskNodeDaemonLaunchesCriticalPod(t *testing.T) {
|
||||
for _, f := range []bool{true, false} {
|
||||
defer utilfeaturetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ScheduleDaemonSetPods, f)()
|
||||
for _, strategy := range updateStrategies() {
|
||||
ds := newDaemonSet("critical")
|
||||
ds.Spec.UpdateStrategy = *strategy
|
||||
setDaemonSetCritical(ds)
|
||||
manager, podControl, _, err := newTestController(ds)
|
||||
if err != nil {
|
||||
t.Fatalf("error creating DaemonSets controller: %v", err)
|
||||
}
|
||||
|
||||
node := newNode("not-enough-disk", nil)
|
||||
node.Status.Conditions = []v1.NodeCondition{{Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}}
|
||||
node.Spec.Taints = []v1.Taint{{Key: schedulerapi.TaintNodeOutOfDisk, Effect: v1.TaintEffectNoSchedule}}
|
||||
manager.nodeStore.Add(node)
|
||||
|
||||
// NOTE: Whether or not TaintNodesByCondition is enabled, it'll add toleration to DaemonSet pods.
|
||||
|
||||
// Without enabling critical pod annotation feature gate, we shouldn't create critical pod
|
||||
defer utilfeaturetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ExperimentalCriticalPodAnnotation, false)()
|
||||
manager.dsStore.Add(ds)
|
||||
syncAndValidateDaemonSets(t, manager, ds, podControl, 0, 0, 0)
|
||||
|
||||
// With enabling critical pod annotation feature gate, we will create critical pod
|
||||
defer utilfeaturetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ExperimentalCriticalPodAnnotation, true)()
|
||||
manager.dsStore.Add(ds)
|
||||
syncAndValidateDaemonSets(t, manager, ds, podControl, 1, 0, 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// DaemonSet should launch a pod even when the node with MemoryPressure/DiskPressure taints.
|
||||
// DaemonSet should launch a pod even when the node with MemoryPressure/DiskPressure/PIDPressure taints.
|
||||
func TestTaintPressureNodeDaemonLaunchesPod(t *testing.T) {
|
||||
for _, f := range []bool{true, false} {
|
||||
defer utilfeaturetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.ScheduleDaemonSetPods, f)()
|
||||
|
@ -1742,10 +1708,12 @@ func TestTaintPressureNodeDaemonLaunchesPod(t *testing.T) {
|
|||
node.Status.Conditions = []v1.NodeCondition{
|
||||
{Type: v1.NodeDiskPressure, Status: v1.ConditionTrue},
|
||||
{Type: v1.NodeMemoryPressure, Status: v1.ConditionTrue},
|
||||
{Type: v1.NodePIDPressure, Status: v1.ConditionTrue},
|
||||
}
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{Key: schedulerapi.TaintNodeDiskPressure, Effect: v1.TaintEffectNoSchedule},
|
||||
{Key: schedulerapi.TaintNodeMemoryPressure, Effect: v1.TaintEffectNoSchedule},
|
||||
{Key: schedulerapi.TaintNodePIDPressure, Effect: v1.TaintEffectNoSchedule},
|
||||
}
|
||||
manager.nodeStore.Add(node)
|
||||
|
||||
|
|
|
@ -13,14 +13,11 @@ go_library(
|
|||
deps = [
|
||||
"//pkg/api/v1/pod:go_default_library",
|
||||
"//pkg/apis/core/v1/helper:go_default_library",
|
||||
"//pkg/features:go_default_library",
|
||||
"//pkg/kubelet/types:go_default_library",
|
||||
"//pkg/scheduler/api:go_default_library",
|
||||
"//staging/src/k8s.io/api/apps/v1:go_default_library",
|
||||
"//staging/src/k8s.io/api/core/v1:go_default_library",
|
||||
"//staging/src/k8s.io/api/extensions/v1beta1:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
|
||||
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
|
||||
],
|
||||
)
|
||||
|
||||
|
|
|
@ -24,11 +24,8 @@ import (
|
|||
"k8s.io/api/core/v1"
|
||||
extensions "k8s.io/api/extensions/v1beta1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
|
||||
"k8s.io/kubernetes/pkg/features"
|
||||
kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
|
||||
schedulerapi "k8s.io/kubernetes/pkg/scheduler/api"
|
||||
)
|
||||
|
||||
|
@ -49,7 +46,7 @@ func GetTemplateGeneration(ds *apps.DaemonSet) (*int64, error) {
|
|||
}
|
||||
|
||||
// AddOrUpdateDaemonPodTolerations apply necessary tolerations to DeamonSet Pods, e.g. node.kubernetes.io/not-ready:NoExecute.
|
||||
func AddOrUpdateDaemonPodTolerations(spec *v1.PodSpec, isCritical bool) {
|
||||
func AddOrUpdateDaemonPodTolerations(spec *v1.PodSpec) {
|
||||
// DaemonSet pods shouldn't be deleted by NodeController in case of node problems.
|
||||
// Add infinite toleration for taint notReady:NoExecute here
|
||||
// to survive taint-based eviction enforced by NodeController
|
||||
|
@ -71,8 +68,7 @@ func AddOrUpdateDaemonPodTolerations(spec *v1.PodSpec, isCritical bool) {
|
|||
})
|
||||
|
||||
// According to TaintNodesByCondition feature, all DaemonSet pods should tolerate
|
||||
// MemoryPressure, DisPressure, Unschedulable and NetworkUnavailable taints,
|
||||
// and the critical pods should tolerate OutOfDisk taint.
|
||||
// MemoryPressure, DiskPressure, PIDPressure, Unschedulable and NetworkUnavailable taints.
|
||||
v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
|
||||
Key: schedulerapi.TaintNodeDiskPressure,
|
||||
Operator: v1.TolerationOpExists,
|
||||
|
@ -85,6 +81,12 @@ func AddOrUpdateDaemonPodTolerations(spec *v1.PodSpec, isCritical bool) {
|
|||
Effect: v1.TaintEffectNoSchedule,
|
||||
})
|
||||
|
||||
v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
|
||||
Key: schedulerapi.TaintNodePIDPressure,
|
||||
Operator: v1.TolerationOpExists,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
})
|
||||
|
||||
v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
|
||||
Key: schedulerapi.TaintNodeUnschedulable,
|
||||
Operator: v1.TolerationOpExists,
|
||||
|
@ -98,33 +100,15 @@ func AddOrUpdateDaemonPodTolerations(spec *v1.PodSpec, isCritical bool) {
|
|||
Effect: v1.TaintEffectNoSchedule,
|
||||
})
|
||||
}
|
||||
|
||||
// TODO(#48843) OutOfDisk taints will be removed in 1.10
|
||||
if isCritical {
|
||||
v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
|
||||
Key: schedulerapi.TaintNodeOutOfDisk,
|
||||
Operator: v1.TolerationOpExists,
|
||||
Effect: v1.TaintEffectNoExecute,
|
||||
})
|
||||
v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
|
||||
Key: schedulerapi.TaintNodeOutOfDisk,
|
||||
Operator: v1.TolerationOpExists,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// CreatePodTemplate returns copy of provided template with additional
|
||||
// label which contains templateGeneration (for backward compatibility),
|
||||
// hash of provided template and sets default daemon tolerations.
|
||||
func CreatePodTemplate(ns string, template v1.PodTemplateSpec, generation *int64, hash string) v1.PodTemplateSpec {
|
||||
func CreatePodTemplate(template v1.PodTemplateSpec, generation *int64, hash string) v1.PodTemplateSpec {
|
||||
newTemplate := *template.DeepCopy()
|
||||
|
||||
// TODO(k82cn): when removing CritialPod feature, also remove 'ns' parameter.
|
||||
isCritical := utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalCriticalPodAnnotation) &&
|
||||
kubelettypes.IsCritical(ns, newTemplate.Annotations)
|
||||
|
||||
AddOrUpdateDaemonPodTolerations(&newTemplate.Spec, isCritical)
|
||||
AddOrUpdateDaemonPodTolerations(&newTemplate.Spec)
|
||||
|
||||
if newTemplate.ObjectMeta.Labels == nil {
|
||||
newTemplate.ObjectMeta.Labels = make(map[string]string)
|
||||
|
|
|
@ -154,7 +154,7 @@ func TestCreatePodTemplate(t *testing.T) {
|
|||
}
|
||||
for _, test := range tests {
|
||||
podTemplateSpec := v1.PodTemplateSpec{}
|
||||
newPodTemplate := CreatePodTemplate("", podTemplateSpec, test.templateGeneration, test.hash)
|
||||
newPodTemplate := CreatePodTemplate(podTemplateSpec, test.templateGeneration, test.hash)
|
||||
val, exists := newPodTemplate.ObjectMeta.Labels[extensions.DaemonSetTemplateGenerationKey]
|
||||
if !exists || val != fmt.Sprint(*test.templateGeneration) {
|
||||
t.Errorf("Expected podTemplateSpec to have generation label value: %d, got: %s", *test.templateGeneration, val)
|
||||
|
|
|
@ -145,24 +145,12 @@ func TestTaintNodeByCondition(t *testing.T) {
|
|||
Effect: v1.TaintEffectNoSchedule,
|
||||
}
|
||||
|
||||
unreachableToleration := v1.Toleration{
|
||||
Key: schedulerapi.TaintNodeUnreachable,
|
||||
Operator: v1.TolerationOpExists,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
}
|
||||
|
||||
unschedulableToleration := v1.Toleration{
|
||||
Key: schedulerapi.TaintNodeUnschedulable,
|
||||
Operator: v1.TolerationOpExists,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
}
|
||||
|
||||
outOfDiskToleration := v1.Toleration{
|
||||
Key: schedulerapi.TaintNodeOutOfDisk,
|
||||
Operator: v1.TolerationOpExists,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
}
|
||||
|
||||
memoryPressureToleration := v1.Toleration{
|
||||
Key: schedulerapi.TaintNodeMemoryPressure,
|
||||
Operator: v1.TolerationOpExists,
|
||||
|
@ -240,46 +228,6 @@ func TestTaintNodeByCondition(t *testing.T) {
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "unreachable node",
|
||||
existingTaints: []v1.Taint{
|
||||
{
|
||||
Key: schedulerapi.TaintNodeUnreachable,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
},
|
||||
nodeConditions: []v1.NodeCondition{
|
||||
{
|
||||
Type: v1.NodeReady,
|
||||
Status: v1.ConditionUnknown, // node status is "Unknown"
|
||||
},
|
||||
},
|
||||
expectedTaints: []v1.Taint{
|
||||
{
|
||||
Key: schedulerapi.TaintNodeUnreachable,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
},
|
||||
pods: []podCase{
|
||||
{
|
||||
pod: bestEffortPod,
|
||||
fits: false,
|
||||
},
|
||||
{
|
||||
pod: burstablePod,
|
||||
fits: false,
|
||||
},
|
||||
{
|
||||
pod: guaranteePod,
|
||||
fits: false,
|
||||
},
|
||||
{
|
||||
pod: bestEffortPod,
|
||||
tolerations: []v1.Toleration{unreachableToleration},
|
||||
fits: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "unschedulable node",
|
||||
unschedulable: true, // node.spec.unschedulable = true
|
||||
|
@ -315,50 +263,6 @@ func TestTaintNodeByCondition(t *testing.T) {
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "out of disk node",
|
||||
nodeConditions: []v1.NodeCondition{
|
||||
{
|
||||
Type: v1.NodeOutOfDisk,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
{
|
||||
Type: v1.NodeReady,
|
||||
Status: v1.ConditionTrue,
|
||||
},
|
||||
},
|
||||
expectedTaints: []v1.Taint{
|
||||
{
|
||||
Key: schedulerapi.TaintNodeOutOfDisk,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
},
|
||||
// In OutOfDisk condition, only pods with toleration can be scheduled.
|
||||
pods: []podCase{
|
||||
{
|
||||
pod: bestEffortPod,
|
||||
fits: false,
|
||||
},
|
||||
{
|
||||
pod: burstablePod,
|
||||
fits: false,
|
||||
},
|
||||
{
|
||||
pod: guaranteePod,
|
||||
fits: false,
|
||||
},
|
||||
{
|
||||
pod: bestEffortPod,
|
||||
tolerations: []v1.Toleration{outOfDiskToleration},
|
||||
fits: true,
|
||||
},
|
||||
{
|
||||
pod: bestEffortPod,
|
||||
tolerations: []v1.Toleration{diskPressureToleration},
|
||||
fits: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "memory pressure node",
|
||||
nodeConditions: []v1.NodeCondition{
|
||||
|
|
Loading…
Reference in New Issue