From 33c710adf06ec844ed65127aaab14fc2bc2bf79c Mon Sep 17 00:00:00 2001 From: Wojciech Tyczynski Date: Fri, 26 Aug 2016 16:08:40 +0200 Subject: [PATCH] MapReduce-like scheduler priority functions --- .../algorithm/predicates/predicates.go | 2 +- .../algorithm/priorities/priorities.go | 40 +++- .../algorithm/priorities/priorities_test.go | 184 ++++++------------ plugin/pkg/scheduler/algorithm/types.go | 15 ++ .../algorithmprovider/defaults/defaults.go | 2 +- plugin/pkg/scheduler/factory/plugins.go | 52 ++++- plugin/pkg/scheduler/generic_scheduler.go | 31 ++- .../pkg/scheduler/generic_scheduler_test.go | 147 ++++++++++++++ 8 files changed, 327 insertions(+), 146 deletions(-) diff --git a/plugin/pkg/scheduler/algorithm/predicates/predicates.go b/plugin/pkg/scheduler/algorithm/predicates/predicates.go index f0dddc4c67..5526eff441 100644 --- a/plugin/pkg/scheduler/algorithm/predicates/predicates.go +++ b/plugin/pkg/scheduler/algorithm/predicates/predicates.go @@ -67,7 +67,7 @@ func (c *CachedNodeInfo) GetNodeInfo(id string) (*api.Node, error) { return node.(*api.Node), nil } -// podMetadata is a type that is passed as metadata for predicate functions +// predicateMetadata is a type that is passed as metadata for predicate functions type predicateMetadata struct { podBestEffort bool podRequest *schedulercache.Resource diff --git a/plugin/pkg/scheduler/algorithm/priorities/priorities.go b/plugin/pkg/scheduler/algorithm/priorities/priorities.go index 46afb6617f..5e33225c91 100644 --- a/plugin/pkg/scheduler/algorithm/priorities/priorities.go +++ b/plugin/pkg/scheduler/algorithm/priorities/priorities.go @@ -17,6 +17,7 @@ limitations under the License. package priorities import ( + "fmt" "math" "github.com/golang/glog" @@ -28,6 +29,21 @@ import ( "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" ) +// priorityMetadata is a type that is passed as metadata for priority functions +type priorityMetadata struct { + nonZeroRequest *schedulercache.Resource +} + +func PriorityMetadata(pod *api.Pod, nodes []*api.Node) interface{} { + // If we cannot compute metadata, just return nil + if pod == nil { + return nil + } + return &priorityMetadata{ + nonZeroRequest: getNonZeroRequests(pod), + } +} + func getNonZeroRequests(pod *api.Pod) *schedulercache.Resource { result := &schedulercache.Resource{} for i := range pod.Spec.Containers { @@ -76,8 +92,12 @@ func calculateUsedScore(requested int64, capacity int64, node string) int64 { // Calculates host priority based on the amount of unused resources. // 'node' has information about the resources on the node. // 'pods' is a list of pods currently scheduled on the node. -// TODO: Use Node() from nodeInfo instead of passing it. -func calculateUnusedPriority(pod *api.Pod, podRequests *schedulercache.Resource, node *api.Node, nodeInfo *schedulercache.NodeInfo) schedulerapi.HostPriority { +func calculateUnusedPriority(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { + node := nodeInfo.Node() + if node == nil { + return schedulerapi.HostPriority{}, fmt.Errorf("node not found") + } + allocatableResources := nodeInfo.AllocatableResource() totalResources := *podRequests totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU @@ -100,7 +120,7 @@ func calculateUnusedPriority(pod *api.Pod, podRequests *schedulercache.Resource, return schedulerapi.HostPriority{ Host: node.Name, Score: int((cpuScore + memoryScore) / 2), - } + }, nil } // Calculate the resource used on a node. 'node' has information about the resources on the node. @@ -136,13 +156,15 @@ func calculateUsedPriority(pod *api.Pod, podRequests *schedulercache.Resource, n // It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes // based on the minimum of the average of the fraction of requested to capacity. // Details: cpu((capacity - sum(requested)) * 10 / capacity) + memory((capacity - sum(requested)) * 10 / capacity) / 2 -func LeastRequestedPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) { - podResources := getNonZeroRequests(pod) - list := make(schedulerapi.HostPriorityList, 0, len(nodes)) - for _, node := range nodes { - list = append(list, calculateUnusedPriority(pod, podResources, node, nodeNameToInfo[node.Name])) +func LeastRequestedPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) { + var nonZeroRequest *schedulercache.Resource + if priorityMeta, ok := meta.(*priorityMetadata); ok { + nonZeroRequest = priorityMeta.nonZeroRequest + } else { + // We couldn't parse metadata - fallback to computing it. + nonZeroRequest = getNonZeroRequests(pod) } - return list, nil + return calculateUnusedPriority(pod, nonZeroRequest, nodeInfo) } // MostRequestedPriority is a priority function that favors nodes with most requested resources. diff --git a/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go b/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go index 1b2512c4ef..30ab4a1d2a 100644 --- a/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go +++ b/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go @@ -17,21 +17,21 @@ limitations under the License. package priorities import ( + "fmt" "os/exec" + "path/filepath" "reflect" "sort" - "strconv" "testing" + "k8s.io/kubernetes/cmd/libs/go2idl/parser" "k8s.io/kubernetes/cmd/libs/go2idl/types" "k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/api/resource" "k8s.io/kubernetes/pkg/api/unversioned" "k8s.io/kubernetes/pkg/apis/extensions" "k8s.io/kubernetes/pkg/util/codeinspector" - "k8s.io/kubernetes/plugin/pkg/scheduler" "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm" - priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util" schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api" "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" ) @@ -52,126 +52,22 @@ func makeNode(node string, milliCPU, memory int64) *api.Node { } } -func TestZeroRequest(t *testing.T) { - // A pod with no resources. We expect spreading to count it as having the default resources. - noResources := api.PodSpec{ - Containers: []api.Container{ - {}, - }, - } - noResources1 := noResources - noResources1.NodeName = "machine1" - // A pod with the same resources as a 0-request pod gets by default as its resources (for spreading). - small := api.PodSpec{ - Containers: []api.Container{ - { - Resources: api.ResourceRequirements{ - Requests: api.ResourceList{ - "cpu": resource.MustParse( - strconv.FormatInt(priorityutil.DefaultMilliCpuRequest, 10) + "m"), - "memory": resource.MustParse( - strconv.FormatInt(priorityutil.DefaultMemoryRequest, 10)), - }, - }, - }, - }, - } - small2 := small - small2.NodeName = "machine2" - // A larger pod. - large := api.PodSpec{ - Containers: []api.Container{ - { - Resources: api.ResourceRequirements{ - Requests: api.ResourceList{ - "cpu": resource.MustParse( - strconv.FormatInt(priorityutil.DefaultMilliCpuRequest*3, 10) + "m"), - "memory": resource.MustParse( - strconv.FormatInt(priorityutil.DefaultMemoryRequest*3, 10)), - }, - }, - }, - }, - } - large1 := large - large1.NodeName = "machine1" - large2 := large - large2.NodeName = "machine2" - tests := []struct { - pod *api.Pod - pods []*api.Pod - nodes []*api.Node - test string - }{ - // The point of these next two tests is to show you get the same priority for a zero-request pod - // as for a pod with the defaults requests, both when the zero-request pod is already on the machine - // and when the zero-request pod is the one being scheduled. - { - pod: &api.Pod{Spec: noResources}, - nodes: []*api.Node{makeNode("machine1", 1000, priorityutil.DefaultMemoryRequest*10), makeNode("machine2", 1000, priorityutil.DefaultMemoryRequest*10)}, - test: "test priority of zero-request pod with machine with zero-request pod", - pods: []*api.Pod{ - {Spec: large1}, {Spec: noResources1}, - {Spec: large2}, {Spec: small2}, - }, - }, - { - pod: &api.Pod{Spec: small}, - nodes: []*api.Node{makeNode("machine1", 1000, priorityutil.DefaultMemoryRequest*10), makeNode("machine2", 1000, priorityutil.DefaultMemoryRequest*10)}, - test: "test priority of nonzero-request pod with machine with zero-request pod", - pods: []*api.Pod{ - {Spec: large1}, {Spec: noResources1}, - {Spec: large2}, {Spec: small2}, - }, - }, - // The point of this test is to verify that we're not just getting the same score no matter what we schedule. - { - pod: &api.Pod{Spec: large}, - nodes: []*api.Node{makeNode("machine1", 1000, priorityutil.DefaultMemoryRequest*10), makeNode("machine2", 1000, priorityutil.DefaultMemoryRequest*10)}, - test: "test priority of larger pod with machine with zero-request pod", - pods: []*api.Pod{ - {Spec: large1}, {Spec: noResources1}, - {Spec: large2}, {Spec: small2}, - }, - }, - } - - const expectedPriority int = 25 - for _, test := range tests { - nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes) - list, err := scheduler.PrioritizeNodes( - test.pod, - nodeNameToInfo, - // This should match the configuration in defaultPriorities() in - // plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go if you want - // to test what's actually in production. - []algorithm.PriorityConfig{ - {Function: LeastRequestedPriority, Weight: 1}, - {Function: BalancedResourceAllocation, Weight: 1}, - { - Function: NewSelectorSpreadPriority( - algorithm.FakePodLister(test.pods), - algorithm.FakeServiceLister([]api.Service{}), - algorithm.FakeControllerLister([]api.ReplicationController{}), - algorithm.FakeReplicaSetLister([]extensions.ReplicaSet{})), - Weight: 1, - }, - }, - algorithm.FakeNodeLister(test.nodes), []algorithm.SchedulerExtender{}) - if err != nil { - t.Errorf("unexpected error: %v", err) +func priorityFunction(mapFn algorithm.PriorityMapFunction, reduceFn algorithm.PriorityReduceFunction) algorithm.PriorityFunction { + return func(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) { + result := make(schedulerapi.HostPriorityList, 0, len(nodes)) + for i := range nodes { + hostResult, err := mapFn(pod, nil, nodeNameToInfo[nodes[i].Name]) + if err != nil { + return nil, err + } + result = append(result, hostResult) } - for _, hp := range list { - if test.test == "test priority of larger pod with machine with zero-request pod" { - if hp.Score == expectedPriority { - t.Errorf("%s: expected non-%d for all priorities, got list %#v", test.test, expectedPriority, list) - } - } else { - if hp.Score != expectedPriority { - t.Errorf("%s: expected %d for all priorities, got list %#v", test.test, expectedPriority, list) - } + if reduceFn != nil { + if err := reduceFn(result); err != nil { + return nil, err } } + return result, nil } } @@ -401,7 +297,8 @@ func TestLeastRequested(t *testing.T) { for _, test := range tests { nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes) - list, err := LeastRequestedPriority(test.pod, nodeNameToInfo, test.nodes) + lrp := priorityFunction(LeastRequestedPriorityMap, nil) + list, err := lrp(test.pod, nodeNameToInfo, test.nodes) if err != nil { t.Errorf("unexpected error: %v", err) } @@ -1054,6 +951,29 @@ func makeImageNode(node string, status api.NodeStatus) *api.Node { } } +func getPrioritySignatures() ([]*types.Signature, error) { + filePath := "./../types.go" + pkgName := filepath.Dir(filePath) + builder := parser.New() + if err := builder.AddDir(pkgName); err != nil { + return nil, err + } + universe, err := builder.FindTypes() + if err != nil { + return nil, err + } + signatures := []string{"PriorityFunction", "PriorityMapFunction", "PriorityReduceFunction"} + results := make([]*types.Signature, 0, len(signatures)) + for _, signature := range signatures { + result, ok := universe[pkgName].Types[signature] + if !ok { + return nil, fmt.Errorf("%s type not defined", signature) + } + results = append(results, result.Signature) + } + return results, nil +} + func TestPrioritiesRegistered(t *testing.T) { var functions []*types.Type @@ -1080,8 +1000,30 @@ func TestPrioritiesRegistered(t *testing.T) { } } + prioritySignatures, err := getPrioritySignatures() + if err != nil { + t.Fatalf("Couldn't get priorities signatures") + } + // Check if all public priorities are referenced in target files. for _, function := range functions { + // Ignore functions that don't match priorities signatures. + signature := function.Underlying.Signature + match := false + for _, prioritySignature := range prioritySignatures { + if len(prioritySignature.Parameters) != len(signature.Parameters) { + continue + } + if len(prioritySignature.Results) != len(signature.Results) { + continue + } + // TODO: Check exact types of parameters and results. + match = true + } + if !match { + continue + } + args := []string{"-rl", function.Name.Name} args = append(args, targetFiles...) diff --git a/plugin/pkg/scheduler/algorithm/types.go b/plugin/pkg/scheduler/algorithm/types.go index 1271e2a300..3ddb5a68ff 100644 --- a/plugin/pkg/scheduler/algorithm/types.go +++ b/plugin/pkg/scheduler/algorithm/types.go @@ -26,9 +26,24 @@ import ( // The failure information is given by the error. type FitPredicate func(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []PredicateFailureReason, error) +// PriorityMapFunction is a function that computes per-node results for a given node. +// TODO: Figure out the exact API of this method. +type PriorityMapFunction func(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) + +// PriorityReduceFunction is a function that aggregated per-node results and computes +// final scores for all nodes. +// TODO: Figure out the exact API of this method. +type PriorityReduceFunction func(result schedulerapi.HostPriorityList) error + +// DEPRECATED +// Use Map-Reduce pattern for priority functions. type PriorityFunction func(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) type PriorityConfig struct { + Map PriorityMapFunction + Reduce PriorityReduceFunction + // TODO: Remove it after migrating all functions to + // Map-Reduce pattern. Function PriorityFunction Weight int } diff --git a/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go b/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go index c249a45e1a..06816fce19 100644 --- a/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go +++ b/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go @@ -165,7 +165,7 @@ func defaultPredicates() sets.String { func defaultPriorities() sets.String { return sets.NewString( // Prioritize nodes by least requested utilization. - factory.RegisterPriorityFunction("LeastRequestedPriority", priorities.LeastRequestedPriority, 1), + factory.RegisterPriorityFunction2("LeastRequestedPriority", priorities.LeastRequestedPriorityMap, nil, 1), // Prioritizes nodes to help achieve balanced resource usage factory.RegisterPriorityFunction("BalancedResourceAllocation", priorities.BalancedResourceAllocation, 1), // spreads pods by minimizing the number of pods (belonging to the same service or replication controller) on the same node. diff --git a/plugin/pkg/scheduler/factory/plugins.go b/plugin/pkg/scheduler/factory/plugins.go index e6bcdefb9a..fdc0204f1b 100644 --- a/plugin/pkg/scheduler/factory/plugins.go +++ b/plugin/pkg/scheduler/factory/plugins.go @@ -48,13 +48,21 @@ type PluginFactoryArgs struct { // A FitPredicateFactory produces a FitPredicate from the given args. type FitPredicateFactory func(PluginFactoryArgs) algorithm.FitPredicate +// DEPRECATED +// Use Map-Reduce pattern for priority functions. // A PriorityFunctionFactory produces a PriorityConfig from the given args. type PriorityFunctionFactory func(PluginFactoryArgs) algorithm.PriorityFunction +// A PriorityFunctionFactory produces map & reduce priority functions +// from a given args. +// FIXME: Rename to PriorityFunctionFactory. +type PriorityFunctionFactory2 func(PluginFactoryArgs) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) + // A PriorityConfigFactory produces a PriorityConfig from the given function and weight type PriorityConfigFactory struct { - Function PriorityFunctionFactory - Weight int + Function PriorityFunctionFactory + MapReduceFunction PriorityFunctionFactory2 + Weight int } var ( @@ -139,6 +147,8 @@ func IsFitPredicateRegistered(name string) bool { return ok } +// DEPRECATED +// Use Map-Reduce pattern for priority functions. // Registers a priority function with the algorithm registry. Returns the name, // with which the function was registered. func RegisterPriorityFunction(name string, function algorithm.PriorityFunction, weight int) string { @@ -150,6 +160,22 @@ func RegisterPriorityFunction(name string, function algorithm.PriorityFunction, }) } +// Registers a priority function with the algorithm registry. Returns the name, +// with which the function was registered. +// FIXME: Rename to PriorityFunctionFactory. +func RegisterPriorityFunction2( + name string, + mapFunction algorithm.PriorityMapFunction, + reduceFunction algorithm.PriorityReduceFunction, + weight int) string { + return RegisterPriorityConfigFactory(name, PriorityConfigFactory{ + MapReduceFunction: func(PluginFactoryArgs) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) { + return mapFunction, reduceFunction + }, + Weight: weight, + }) +} + func RegisterPriorityConfigFactory(name string, pcf PriorityConfigFactory) string { schedulerFactoryMutex.Lock() defer schedulerFactoryMutex.Unlock() @@ -193,8 +219,9 @@ func RegisterCustomPriorityFunction(policy schedulerapi.PriorityPolicy) string { glog.V(2).Infof("Priority type %s already registered, reusing.", policy.Name) // set/update the weight based on the policy pcf = &PriorityConfigFactory{ - Function: existing_pcf.Function, - Weight: policy.Weight, + Function: existing_pcf.Function, + MapReduceFunction: existing_pcf.MapReduceFunction, + Weight: policy.Weight, } } @@ -265,10 +292,19 @@ func getPriorityFunctionConfigs(names sets.String, args PluginFactoryArgs) ([]al if !ok { return nil, fmt.Errorf("Invalid priority name %s specified - no corresponding function found", name) } - configs = append(configs, algorithm.PriorityConfig{ - Function: factory.Function(args), - Weight: factory.Weight, - }) + if factory.Function != nil { + configs = append(configs, algorithm.PriorityConfig{ + Function: factory.Function(args), + Weight: factory.Weight, + }) + } else { + mapFunction, reduceFunction := factory.MapReduceFunction(args) + configs = append(configs, algorithm.PriorityConfig{ + Map: mapFunction, + Reduce: reduceFunction, + Weight: factory.Weight, + }) + } } return configs, nil } diff --git a/plugin/pkg/scheduler/generic_scheduler.go b/plugin/pkg/scheduler/generic_scheduler.go index 7577c7068e..05088d8df2 100644 --- a/plugin/pkg/scheduler/generic_scheduler.go +++ b/plugin/pkg/scheduler/generic_scheduler.go @@ -32,6 +32,7 @@ import ( "k8s.io/kubernetes/pkg/util/workqueue" "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm" "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates" + "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities" schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api" "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" ) @@ -237,7 +238,7 @@ func PrioritizeNodes( nodes []*api.Node, extenders []algorithm.SchedulerExtender, ) (schedulerapi.HostPriorityList, error) { - result := make(schedulerapi.HostPriorityList, 0, len(nodeNameToInfo)) + result := make(schedulerapi.HostPriorityList, 0, len(nodes)) // If no priority configs are provided, then the EqualPriority function is applied // This is required to generate the priority list in the required format @@ -252,6 +253,7 @@ func PrioritizeNodes( errs []error ) + meta := priorities.PriorityMetadata(pod, nodes) for _, priorityConfig := range priorityConfigs { // skip the priority function if the weight is specified as 0 if priorityConfig.Weight == 0 { @@ -262,8 +264,26 @@ func PrioritizeNodes( go func(config algorithm.PriorityConfig) { defer wg.Done() weight := config.Weight - priorityFunc := config.Function - prioritizedList, err := priorityFunc(pod, nodeNameToInfo, nodes) + + prioritizedList, err := func() (schedulerapi.HostPriorityList, error) { + if config.Function != nil { + return config.Function(pod, nodeNameToInfo, nodes) + } + prioritizedList := make(schedulerapi.HostPriorityList, 0, len(nodes)) + for i := range nodes { + hostResult, err := config.Map(pod, meta, nodeNameToInfo[nodes[i].Name]) + if err != nil { + return nil, err + } + prioritizedList = append(prioritizedList, hostResult) + } + if config.Reduce != nil { + if err := config.Reduce(prioritizedList); err != nil { + return nil, err + } + } + return prioritizedList, nil + }() mu.Lock() defer mu.Unlock() @@ -277,13 +297,12 @@ func PrioritizeNodes( } }(priorityConfig) } + // wait for all go routines to finish + wg.Wait() if len(errs) != 0 { return schedulerapi.HostPriorityList{}, errors.NewAggregate(errs) } - // wait for all go routines to finish - wg.Wait() - if len(extenders) != 0 && nodes != nil { for _, extender := range extenders { wg.Add(1) diff --git a/plugin/pkg/scheduler/generic_scheduler_test.go b/plugin/pkg/scheduler/generic_scheduler_test.go index 7d0d9eef24..6bae311e54 100644 --- a/plugin/pkg/scheduler/generic_scheduler_test.go +++ b/plugin/pkg/scheduler/generic_scheduler_test.go @@ -25,10 +25,14 @@ import ( "time" "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/api/resource" + "k8s.io/kubernetes/pkg/apis/extensions" "k8s.io/kubernetes/pkg/util/sets" "k8s.io/kubernetes/pkg/util/wait" "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm" algorithmpredicates "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates" + algorithmpriorities "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities" + priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util" schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api" "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" ) @@ -372,3 +376,146 @@ func TestFindFitSomeError(t *testing.T) { } } } + +func makeNode(node string, milliCPU, memory int64) *api.Node { + return &api.Node{ + ObjectMeta: api.ObjectMeta{Name: node}, + Status: api.NodeStatus{ + Capacity: api.ResourceList{ + "cpu": *resource.NewMilliQuantity(milliCPU, resource.DecimalSI), + "memory": *resource.NewQuantity(memory, resource.BinarySI), + }, + Allocatable: api.ResourceList{ + "cpu": *resource.NewMilliQuantity(milliCPU, resource.DecimalSI), + "memory": *resource.NewQuantity(memory, resource.BinarySI), + }, + }, + } +} + +// The point of this test is to show that you: +// - get the same priority for a zero-request pod as for a pod with the defaults requests, +// both when the zero-request pod is already on the machine and when the zero-request pod +// is the one being scheduled. +// - don't get the same score no matter what we schedule. +func TestZeroRequest(t *testing.T) { + // A pod with no resources. We expect spreading to count it as having the default resources. + noResources := api.PodSpec{ + Containers: []api.Container{ + {}, + }, + } + noResources1 := noResources + noResources1.NodeName = "machine1" + // A pod with the same resources as a 0-request pod gets by default as its resources (for spreading). + small := api.PodSpec{ + Containers: []api.Container{ + { + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{ + "cpu": resource.MustParse( + strconv.FormatInt(priorityutil.DefaultMilliCpuRequest, 10) + "m"), + "memory": resource.MustParse( + strconv.FormatInt(priorityutil.DefaultMemoryRequest, 10)), + }, + }, + }, + }, + } + small2 := small + small2.NodeName = "machine2" + // A larger pod. + large := api.PodSpec{ + Containers: []api.Container{ + { + Resources: api.ResourceRequirements{ + Requests: api.ResourceList{ + "cpu": resource.MustParse( + strconv.FormatInt(priorityutil.DefaultMilliCpuRequest*3, 10) + "m"), + "memory": resource.MustParse( + strconv.FormatInt(priorityutil.DefaultMemoryRequest*3, 10)), + }, + }, + }, + }, + } + large1 := large + large1.NodeName = "machine1" + large2 := large + large2.NodeName = "machine2" + tests := []struct { + pod *api.Pod + pods []*api.Pod + nodes []*api.Node + test string + }{ + // The point of these next two tests is to show you get the same priority for a zero-request pod + // as for a pod with the defaults requests, both when the zero-request pod is already on the machine + // and when the zero-request pod is the one being scheduled. + { + pod: &api.Pod{Spec: noResources}, + nodes: []*api.Node{makeNode("machine1", 1000, priorityutil.DefaultMemoryRequest*10), makeNode("machine2", 1000, priorityutil.DefaultMemoryRequest*10)}, + test: "test priority of zero-request pod with machine with zero-request pod", + pods: []*api.Pod{ + {Spec: large1}, {Spec: noResources1}, + {Spec: large2}, {Spec: small2}, + }, + }, + { + pod: &api.Pod{Spec: small}, + nodes: []*api.Node{makeNode("machine1", 1000, priorityutil.DefaultMemoryRequest*10), makeNode("machine2", 1000, priorityutil.DefaultMemoryRequest*10)}, + test: "test priority of nonzero-request pod with machine with zero-request pod", + pods: []*api.Pod{ + {Spec: large1}, {Spec: noResources1}, + {Spec: large2}, {Spec: small2}, + }, + }, + // The point of this test is to verify that we're not just getting the same score no matter what we schedule. + { + pod: &api.Pod{Spec: large}, + nodes: []*api.Node{makeNode("machine1", 1000, priorityutil.DefaultMemoryRequest*10), makeNode("machine2", 1000, priorityutil.DefaultMemoryRequest*10)}, + test: "test priority of larger pod with machine with zero-request pod", + pods: []*api.Pod{ + {Spec: large1}, {Spec: noResources1}, + {Spec: large2}, {Spec: small2}, + }, + }, + } + + const expectedPriority int = 25 + for _, test := range tests { + // This should match the configuration in defaultPriorities() in + // plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go if you want + // to test what's actually in production. + priorityConfigs := []algorithm.PriorityConfig{ + {Map: algorithmpriorities.LeastRequestedPriorityMap, Weight: 1}, + {Function: algorithmpriorities.BalancedResourceAllocation, Weight: 1}, + { + Function: algorithmpriorities.NewSelectorSpreadPriority( + algorithm.FakePodLister(test.pods), + algorithm.FakeServiceLister([]api.Service{}), + algorithm.FakeControllerLister([]api.ReplicationController{}), + algorithm.FakeReplicaSetLister([]extensions.ReplicaSet{})), + Weight: 1, + }, + } + nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes) + list, err := PrioritizeNodes( + test.pod, nodeNameToInfo, priorityConfigs, + algorithm.FakeNodeLister(test.nodes), []algorithm.SchedulerExtender{}) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + for _, hp := range list { + if test.test == "test priority of larger pod with machine with zero-request pod" { + if hp.Score == expectedPriority { + t.Errorf("%s: expected non-%d for all priorities, got list %#v", test.test, expectedPriority, list) + } + } else { + if hp.Score != expectedPriority { + t.Errorf("%s: expected %d for all priorities, got list %#v", test.test, expectedPriority, list) + } + } + } + } +}