From 554acf2b38b0d5ba19b62a67c87b2ac0fbd19a9e Mon Sep 17 00:00:00 2001 From: "Bobby (Babak) Salamat" Date: Tue, 27 Nov 2018 16:46:24 -0800 Subject: [PATCH 1/2] Change sort function of the scheduling queue to avoid starvation --- .../internal/queue/scheduling_queue.go | 23 +++++++- .../internal/queue/scheduling_queue_test.go | 55 +++++++++++++++++++ 2 files changed, 77 insertions(+), 1 deletion(-) diff --git a/pkg/scheduler/internal/queue/scheduling_queue.go b/pkg/scheduler/internal/queue/scheduling_queue.go index 6f5aa682c9..9bf40361b4 100644 --- a/pkg/scheduler/internal/queue/scheduling_queue.go +++ b/pkg/scheduler/internal/queue/scheduling_queue.go @@ -206,10 +206,31 @@ type PriorityQueue struct { // Making sure that PriorityQueue implements SchedulingQueue. var _ = SchedulingQueue(&PriorityQueue{}) +// podTimeStamp returns pod's last schedule time or its creation time if the +// scheduler has never tried scheduling it. +func podTimestamp(pod *v1.Pod) *metav1.Time { + _, condition := podutil.GetPodCondition(&pod.Status, v1.PodScheduled) + if condition == nil { + return &pod.CreationTimestamp + } + return &condition.LastTransitionTime +} + +// activeQComp is the function used by the activeQ heap algorithm to sort pods. +// It sorts pods based on their priority. When priorities are equal, it uses +// podTimestamp. +func activeQComp(pod1, pod2 interface{}) bool { + p1 := pod1.(*v1.Pod) + p2 := pod2.(*v1.Pod) + prio1 := util.GetPodPriority(p1) + prio2 := util.GetPodPriority(p2) + return (prio1 > prio2) || (prio1 == prio2 && podTimestamp(p1).Before(podTimestamp(p2))) +} + // NewPriorityQueue creates a PriorityQueue object. func NewPriorityQueue() *PriorityQueue { pq := &PriorityQueue{ - activeQ: newHeap(cache.MetaNamespaceKeyFunc, util.HigherPriorityPod), + activeQ: newHeap(cache.MetaNamespaceKeyFunc, activeQComp), unschedulableQ: newUnschedulablePodsMap(), nominatedPods: map[string][]*v1.Pod{}, } diff --git a/pkg/scheduler/internal/queue/scheduling_queue_test.go b/pkg/scheduler/internal/queue/scheduling_queue_test.go index ca0e336965..73ef4a0081 100644 --- a/pkg/scheduler/internal/queue/scheduling_queue_test.go +++ b/pkg/scheduler/internal/queue/scheduling_queue_test.go @@ -24,6 +24,8 @@ import ( "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + podutil "k8s.io/kubernetes/pkg/api/v1/pod" "k8s.io/kubernetes/pkg/scheduler/util" ) @@ -512,3 +514,56 @@ func TestSchedulingQueue_Close(t *testing.T) { }) } } + +// TestRecentlyTriedPodsGoBack tests that pods which are recently tried and are +// unschedulable go behind other pods with the same priority. This behavior +// ensures that an unschedulable pod does not block head of the queue when there +// are frequent events that move pods to the active queue. +func TestRecentlyTriedPodsGoBack(t *testing.T) { + q := NewPriorityQueue() + // Add a few pods to priority queue. + for i := 0; i < 5; i++ { + p := v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("test-pod-%v", i), + Namespace: "ns1", + UID: types.UID(fmt.Sprintf("tp00%v", i)), + }, + Spec: v1.PodSpec{ + Priority: &highPriority, + }, + Status: v1.PodStatus{ + NominatedNodeName: "node1", + }, + } + q.Add(&p) + } + // Simulate a pod being popped by the scheduler, determined unschedulable, and + // then moved back to the active queue. + p1, err := q.Pop() + if err != nil { + t.Errorf("Error while popping the head of the queue: %v", err) + } + // Update pod condition to unschedulable. + podutil.UpdatePodCondition(&p1.Status, &v1.PodCondition{ + Type: v1.PodScheduled, + Status: v1.ConditionFalse, + Reason: v1.PodReasonUnschedulable, + Message: "fake scheduling failure", + }) + // Put in the unschedulable queue. + q.AddUnschedulableIfNotPresent(p1) + // Move all unschedulable pods to the active queue. + q.MoveAllToActiveQueue() + // Simulation is over. Now let's pop all pods. The pod popped first should be + // the last one we pop here. + for i := 0; i < 5; i++ { + p, err := q.Pop() + if err != nil { + t.Errorf("Error while popping pods from the queue: %v", err) + } + if (i == 4) != (p1 == p) { + t.Errorf("A pod tried before is not the last pod popped: i: %v, pod name: %v", i, p.Name) + } + } +} From 36f8859fa09ad9c50c89e60b0d6f73823de88585 Mon Sep 17 00:00:00 2001 From: "Bobby (Babak) Salamat" Date: Tue, 27 Nov 2018 17:08:58 -0800 Subject: [PATCH 2/2] autogenerated files --- pkg/scheduler/internal/queue/BUILD | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/scheduler/internal/queue/BUILD b/pkg/scheduler/internal/queue/BUILD index c675f85924..37a8d1f525 100644 --- a/pkg/scheduler/internal/queue/BUILD +++ b/pkg/scheduler/internal/queue/BUILD @@ -22,9 +22,11 @@ go_test( srcs = ["scheduling_queue_test.go"], embed = [":go_default_library"], deps = [ + "//pkg/api/v1/pod:go_default_library", "//pkg/scheduler/util:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/types:go_default_library", ], )