diff --git a/pkg/controller/daemon/daemon_controller.go b/pkg/controller/daemon/daemon_controller.go index 8200ba1ff8..eec8fc19cf 100644 --- a/pkg/controller/daemon/daemon_controller.go +++ b/pkg/controller/daemon/daemon_controller.go @@ -23,6 +23,8 @@ import ( "sync" "time" + "github.com/golang/glog" + apps "k8s.io/api/apps/v1" "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -55,8 +57,6 @@ import ( "k8s.io/kubernetes/pkg/scheduler/algorithm/predicates" "k8s.io/kubernetes/pkg/scheduler/schedulercache" "k8s.io/kubernetes/pkg/util/metrics" - - "github.com/golang/glog" ) const ( @@ -942,6 +942,7 @@ func (dsc *DaemonSetsController) syncNodes(ds *apps.DaemonSet, podsToDelete, nod podTemplate = template.DeepCopy() podTemplate.Spec.Affinity = util.ReplaceDaemonSetPodHostnameNodeAffinity( podTemplate.Spec.Affinity, nodesNeedingDaemonPods[ix]) + podTemplate.Spec.Tolerations = util.AppendNoScheduleTolerationIfNotExist(podTemplate.Spec.Tolerations) err = dsc.podControl.CreatePodsWithControllerRef(ds.Namespace, podTemplate, ds, metav1.NewControllerRef(ds, controllerKind)) diff --git a/pkg/controller/daemon/util/BUILD b/pkg/controller/daemon/util/BUILD index b33d553a95..9a64e9a62e 100644 --- a/pkg/controller/daemon/util/BUILD +++ b/pkg/controller/daemon/util/BUILD @@ -20,6 +20,7 @@ go_library( "//vendor/k8s.io/api/apps/v1:go_default_library", "//vendor/k8s.io/api/core/v1:go_default_library", "//vendor/k8s.io/api/extensions/v1beta1:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/api/equality:go_default_library", "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library", ], diff --git a/pkg/controller/daemon/util/daemonset_util.go b/pkg/controller/daemon/util/daemonset_util.go index 20a0889d19..e39f1620f1 100644 --- a/pkg/controller/daemon/util/daemonset_util.go +++ b/pkg/controller/daemon/util/daemonset_util.go @@ -23,6 +23,7 @@ import ( apps "k8s.io/api/apps/v1" "k8s.io/api/core/v1" extensions "k8s.io/api/extensions/v1beta1" + apiequality "k8s.io/apimachinery/pkg/api/equality" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" utilfeature "k8s.io/apiserver/pkg/util/feature" podutil "k8s.io/kubernetes/pkg/api/v1/pod" @@ -199,3 +200,28 @@ func ReplaceDaemonSetPodHostnameNodeAffinity(affinity *v1.Affinity, nodename str return affinity } + +// AppendNoScheduleTolerationIfNotExist appends unschedulable toleration to `.spec` if not exist; otherwise, +// no changes to `.spec.tolerations`. +func AppendNoScheduleTolerationIfNotExist(tolerations []v1.Toleration) []v1.Toleration { + unschedulableToleration := v1.Toleration{ + Key: algorithm.TaintNodeUnschedulable, + Operator: v1.TolerationOpExists, + Effect: v1.TaintEffectNoSchedule, + } + + unschedulableTaintExist := false + + for _, t := range tolerations { + if apiequality.Semantic.DeepEqual(t, unschedulableToleration) { + unschedulableTaintExist = true + break + } + } + + if !unschedulableTaintExist { + tolerations = append(tolerations, unschedulableToleration) + } + + return tolerations +} diff --git a/pkg/controller/nodelifecycle/node_lifecycle_controller.go b/pkg/controller/nodelifecycle/node_lifecycle_controller.go index a48a8965e3..5337f715f6 100644 --- a/pkg/controller/nodelifecycle/node_lifecycle_controller.go +++ b/pkg/controller/nodelifecycle/node_lifecycle_controller.go @@ -22,6 +22,12 @@ limitations under the License. package nodelifecycle import ( + "fmt" + "sync" + "time" + + "github.com/golang/glog" + "k8s.io/api/core/v1" apiequality "k8s.io/apimachinery/pkg/api/equality" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -50,12 +56,6 @@ import ( "k8s.io/kubernetes/pkg/util/system" taintutils "k8s.io/kubernetes/pkg/util/taints" utilversion "k8s.io/kubernetes/pkg/util/version" - - "fmt" - "sync" - "time" - - "github.com/golang/glog" ) func init() { @@ -438,7 +438,21 @@ func (nc *Controller) doNoScheduleTaintingPass(node *v1.Node) error { } } } + if node.Spec.Unschedulable { + // If unschedulable, append related taint. + taints = append(taints, v1.Taint{ + Key: algorithm.TaintNodeUnschedulable, + Effect: v1.TaintEffectNoSchedule, + }) + } + + // Get exist taints of node. nodeTaints := taintutils.TaintSetFilter(node.Spec.Taints, func(t *v1.Taint) bool { + // Find unschedulable taint of node. + if t.Key == algorithm.TaintNodeUnschedulable { + return true + } + // Find node condition taints of node. _, found := taintKeyToNodeConditionMap[t.Key] return found }) diff --git a/pkg/scheduler/algorithm/well_known_labels.go b/pkg/scheduler/algorithm/well_known_labels.go index 3a056d467e..4b3ba39d1a 100644 --- a/pkg/scheduler/algorithm/well_known_labels.go +++ b/pkg/scheduler/algorithm/well_known_labels.go @@ -36,6 +36,11 @@ const ( // It is deprecated since 1.9 DeprecatedTaintNodeUnreachable = "node.alpha.kubernetes.io/unreachable" + // TaintNodeUnschedulable will be added when node becomes unschedulable + // and feature-gate for TaintNodesByCondition flag is enabled, + // and removed when node becomes scheduable. + TaintNodeUnschedulable = "node.kubernetes.io/unschedulable" + // TaintNodeOutOfDisk will be added when node becomes out of disk // and feature-gate for TaintNodesByCondition flag is enabled, // and removed when node has enough disk. diff --git a/pkg/scheduler/algorithmprovider/defaults/defaults.go b/pkg/scheduler/algorithmprovider/defaults/defaults.go index 6ac2f1849a..3267d457ba 100644 --- a/pkg/scheduler/algorithmprovider/defaults/defaults.go +++ b/pkg/scheduler/algorithmprovider/defaults/defaults.go @@ -186,15 +186,12 @@ func ApplyFeatureGates() { // if you just want remove specific provider, call func RemovePredicateKeyFromAlgoProvider() factory.RemovePredicateKeyFromAlgorithmProviderMap(predicates.CheckNodeConditionPred) - // Fit is determined based on whether a node has Unschedulable spec - factory.RegisterMandatoryFitPredicate(predicates.CheckNodeUnschedulablePred, predicates.CheckNodeUnschedulablePredicate) // Fit is determined based on whether a pod can tolerate all of the node's taints factory.RegisterMandatoryFitPredicate(predicates.PodToleratesNodeTaintsPred, predicates.PodToleratesNodeTaints) // Insert Key "PodToleratesNodeTaints" and "CheckNodeUnschedulable" To All Algorithm Provider // The key will insert to all providers which in algorithmProviderMap[] // if you just want insert to specific provider, call func InsertPredicateKeyToAlgoProvider() factory.InsertPredicateKeyToAlgorithmProviderMap(predicates.PodToleratesNodeTaintsPred) - factory.InsertPredicateKeyToAlgorithmProviderMap(predicates.CheckNodeUnschedulablePred) glog.Warningf("TaintNodesByCondition is enabled, PodToleratesNodeTaints predicate is mandatory") } diff --git a/test/integration/scheduler/taint_test.go b/test/integration/scheduler/taint_test.go index 0c0f4774a8..fb158f8e2e 100644 --- a/test/integration/scheduler/taint_test.go +++ b/test/integration/scheduler/taint_test.go @@ -244,6 +244,9 @@ func TestTaintNodeByCondition(t *testing.T) { nodeInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ UpdateFunc: func(old, cur interface{}) { curNode := cur.(*v1.Node) + if curNode.Name != "node-1" { + return + } for _, taint := range curNode.Spec.Taints { if taint.Key == algorithm.TaintNodeNetworkUnavailable && taint.Effect == v1.TaintEffectNoSchedule { @@ -294,4 +297,55 @@ func TestTaintNodeByCondition(t *testing.T) { t.Errorf("Case 4: Failed to schedule network daemon pod in 60s.") } } + + // Case 5: Taint node by unschedulable condition + unschedulableNode := &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-2", + }, + Spec: v1.NodeSpec{ + Unschedulable: true, + }, + Status: v1.NodeStatus{ + Capacity: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("4000m"), + v1.ResourceMemory: resource.MustParse("16Gi"), + v1.ResourcePods: resource.MustParse("110"), + }, + Allocatable: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("4000m"), + v1.ResourceMemory: resource.MustParse("16Gi"), + v1.ResourcePods: resource.MustParse("110"), + }, + }, + } + + nodeInformerCh2 := make(chan bool) + nodeInformer2 := informers.Core().V1().Nodes().Informer() + nodeInformer2.AddEventHandler(cache.ResourceEventHandlerFuncs{ + UpdateFunc: func(old, cur interface{}) { + curNode := cur.(*v1.Node) + if curNode.Name != "node-2" { + return + } + + for _, taint := range curNode.Spec.Taints { + if taint.Key == algorithm.TaintNodeUnschedulable && + taint.Effect == v1.TaintEffectNoSchedule { + nodeInformerCh2 <- true + break + } + } + }, + }) + + if _, err := clientset.CoreV1().Nodes().Create(unschedulableNode); err != nil { + t.Errorf("Case 5: Failed to create node: %v", err) + } else { + select { + case <-time.After(60 * time.Second): + t.Errorf("Case 5: Failed to taint node after 60s.") + case <-nodeInformerCh2: + } + } }