Merge pull request #31606 from wojtek-t/scheuler_map_reduce

Automatic merge from submit-queue POC: MapReduce-like scheduler priority functions Ref #24246
2016-09-09 14:24:05 -07:00 · 2016-09-09 14:24:05 -07:00 · 9d06efb2d1
parent 7536eb2691 33c710adf0
commit 9d06efb2d1
8 changed files with 327 additions and 146 deletions
--- a/plugin/pkg/scheduler/algorithm/predicates/predicates.go
+++ b/plugin/pkg/scheduler/algorithm/predicates/predicates.go
@ -67,7 +67,7 @@ func (c *CachedNodeInfo) GetNodeInfo(id string) (*api.Node, error) {
 	return node.(*api.Node), nil
 }

-// podMetadata is a type that is passed as metadata for predicate functions
+// predicateMetadata is a type that is passed as metadata for predicate functions
 type predicateMetadata struct {
 	podBestEffort             bool
 	podRequest                *schedulercache.Resource
--- a/plugin/pkg/scheduler/algorithm/priorities/priorities.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/priorities.go
@ -17,6 +17,7 @@ limitations under the License.
 package priorities

 import (
+	"fmt"
 	"math"

 	"github.com/golang/glog"
@ -28,6 +29,21 @@ import (
 	"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 )

+// priorityMetadata is a type that is passed as metadata for priority functions
+type priorityMetadata struct {
+	nonZeroRequest *schedulercache.Resource
+}
+
+func PriorityMetadata(pod *api.Pod, nodes []*api.Node) interface{} {
+	// If we cannot compute metadata, just return nil
+	if pod == nil {
+		return nil
+	}
+	return &priorityMetadata{
+		nonZeroRequest: getNonZeroRequests(pod),
+	}
+}
+
 func getNonZeroRequests(pod *api.Pod) *schedulercache.Resource {
 	result := &schedulercache.Resource{}
 	for i := range pod.Spec.Containers {
@ -76,8 +92,12 @@ func calculateUsedScore(requested int64, capacity int64, node string) int64 {
 // Calculates host priority based on the amount of unused resources.
 // 'node' has information about the resources on the node.
 // 'pods' is a list of pods currently scheduled on the node.
-// TODO: Use Node() from nodeInfo instead of passing it.
-func calculateUnusedPriority(pod *api.Pod, podRequests *schedulercache.Resource, node *api.Node, nodeInfo *schedulercache.NodeInfo) schedulerapi.HostPriority {
+func calculateUnusedPriority(pod *api.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
+	node := nodeInfo.Node()
+	if node == nil {
+		return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
+	}
+
 	allocatableResources := nodeInfo.AllocatableResource()
 	totalResources := *podRequests
 	totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
@ -100,7 +120,7 @@ func calculateUnusedPriority(pod *api.Pod, podRequests *schedulercache.Resource,
 	return schedulerapi.HostPriority{
 		Host:  node.Name,
 		Score: int((cpuScore + memoryScore) / 2),
-	}
+	}, nil
 }

 // Calculate the resource used on a node.  'node' has information about the resources on the node.
@ -136,13 +156,15 @@ func calculateUsedPriority(pod *api.Pod, podRequests *schedulercache.Resource, n
 // It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
 // based on the minimum of the average of the fraction of requested to capacity.
 // Details: cpu((capacity - sum(requested)) * 10 / capacity) + memory((capacity - sum(requested)) * 10 / capacity) / 2
-func LeastRequestedPriority(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) {
-	podResources := getNonZeroRequests(pod)
-	list := make(schedulerapi.HostPriorityList, 0, len(nodes))
-	for _, node := range nodes {
-		list = append(list, calculateUnusedPriority(pod, podResources, node, nodeNameToInfo[node.Name]))
+func LeastRequestedPriorityMap(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
+	var nonZeroRequest *schedulercache.Resource
+	if priorityMeta, ok := meta.(*priorityMetadata); ok {
+		nonZeroRequest = priorityMeta.nonZeroRequest
+	} else {
+		// We couldn't parse metadata - fallback to computing it.
+		nonZeroRequest = getNonZeroRequests(pod)
 	}
-	return list, nil
+	return calculateUnusedPriority(pod, nonZeroRequest, nodeInfo)
 }

 // MostRequestedPriority is a priority function that favors nodes with most requested resources.
--- a/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go
+++ b/plugin/pkg/scheduler/algorithm/priorities/priorities_test.go
@ -17,21 +17,21 @@ limitations under the License.
 package priorities

 import (
+	"fmt"
 	"os/exec"
+	"path/filepath"
 	"reflect"
 	"sort"
-	"strconv"
 	"testing"

+	"k8s.io/kubernetes/cmd/libs/go2idl/parser"
 	"k8s.io/kubernetes/cmd/libs/go2idl/types"
 	"k8s.io/kubernetes/pkg/api"
 	"k8s.io/kubernetes/pkg/api/resource"
 	"k8s.io/kubernetes/pkg/api/unversioned"
 	"k8s.io/kubernetes/pkg/apis/extensions"
 	"k8s.io/kubernetes/pkg/util/codeinspector"
-	"k8s.io/kubernetes/plugin/pkg/scheduler"
 	"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
-	priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
 	schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
 	"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 )
@ -52,126 +52,22 @@ func makeNode(node string, milliCPU, memory int64) *api.Node {
 	}
 }

-func TestZeroRequest(t *testing.T) {
-	// A pod with no resources. We expect spreading to count it as having the default resources.
-	noResources := api.PodSpec{
-		Containers: []api.Container{
-			{},
-		},
-	}
-	noResources1 := noResources
-	noResources1.NodeName = "machine1"
-	// A pod with the same resources as a 0-request pod gets by default as its resources (for spreading).
-	small := api.PodSpec{
-		Containers: []api.Container{
-			{
-				Resources: api.ResourceRequirements{
-					Requests: api.ResourceList{
-						"cpu": resource.MustParse(
-							strconv.FormatInt(priorityutil.DefaultMilliCpuRequest, 10) + "m"),
-						"memory": resource.MustParse(
-							strconv.FormatInt(priorityutil.DefaultMemoryRequest, 10)),
-					},
-				},
-			},
-		},
-	}
-	small2 := small
-	small2.NodeName = "machine2"
-	// A larger pod.
-	large := api.PodSpec{
-		Containers: []api.Container{
-			{
-				Resources: api.ResourceRequirements{
-					Requests: api.ResourceList{
-						"cpu": resource.MustParse(
-							strconv.FormatInt(priorityutil.DefaultMilliCpuRequest*3, 10) + "m"),
-						"memory": resource.MustParse(
-							strconv.FormatInt(priorityutil.DefaultMemoryRequest*3, 10)),
-					},
-				},
-			},
-		},
-	}
-	large1 := large
-	large1.NodeName = "machine1"
-	large2 := large
-	large2.NodeName = "machine2"
-	tests := []struct {
-		pod   *api.Pod
-		pods  []*api.Pod
-		nodes []*api.Node
-		test  string
-	}{
-		// The point of these next two tests is to show you get the same priority for a zero-request pod
-		// as for a pod with the defaults requests, both when the zero-request pod is already on the machine
-		// and when the zero-request pod is the one being scheduled.
-		{
-			pod:   &api.Pod{Spec: noResources},
-			nodes: []*api.Node{makeNode("machine1", 1000, priorityutil.DefaultMemoryRequest*10), makeNode("machine2", 1000, priorityutil.DefaultMemoryRequest*10)},
-			test:  "test priority of zero-request pod with machine with zero-request pod",
-			pods: []*api.Pod{
-				{Spec: large1}, {Spec: noResources1},
-				{Spec: large2}, {Spec: small2},
-			},
-		},
-		{
-			pod:   &api.Pod{Spec: small},
-			nodes: []*api.Node{makeNode("machine1", 1000, priorityutil.DefaultMemoryRequest*10), makeNode("machine2", 1000, priorityutil.DefaultMemoryRequest*10)},
-			test:  "test priority of nonzero-request pod with machine with zero-request pod",
-			pods: []*api.Pod{
-				{Spec: large1}, {Spec: noResources1},
-				{Spec: large2}, {Spec: small2},
-			},
-		},
-		// The point of this test is to verify that we're not just getting the same score no matter what we schedule.
-		{
-			pod:   &api.Pod{Spec: large},
-			nodes: []*api.Node{makeNode("machine1", 1000, priorityutil.DefaultMemoryRequest*10), makeNode("machine2", 1000, priorityutil.DefaultMemoryRequest*10)},
-			test:  "test priority of larger pod with machine with zero-request pod",
-			pods: []*api.Pod{
-				{Spec: large1}, {Spec: noResources1},
-				{Spec: large2}, {Spec: small2},
-			},
-		},
-	}
-
-	const expectedPriority int = 25
-	for _, test := range tests {
-		nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
-		list, err := scheduler.PrioritizeNodes(
-			test.pod,
-			nodeNameToInfo,
-			// This should match the configuration in defaultPriorities() in
-			// plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go if you want
-			// to test what's actually in production.
-			[]algorithm.PriorityConfig{
-				{Function: LeastRequestedPriority, Weight: 1},
-				{Function: BalancedResourceAllocation, Weight: 1},
-				{
-					Function: NewSelectorSpreadPriority(
-						algorithm.FakePodLister(test.pods),
-						algorithm.FakeServiceLister([]api.Service{}),
-						algorithm.FakeControllerLister([]api.ReplicationController{}),
-						algorithm.FakeReplicaSetLister([]extensions.ReplicaSet{})),
-					Weight: 1,
-				},
-			},
-			algorithm.FakeNodeLister(test.nodes), []algorithm.SchedulerExtender{})
-		if err != nil {
-			t.Errorf("unexpected error: %v", err)
+func priorityFunction(mapFn algorithm.PriorityMapFunction, reduceFn algorithm.PriorityReduceFunction) algorithm.PriorityFunction {
+	return func(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error) {
+		result := make(schedulerapi.HostPriorityList, 0, len(nodes))
+		for i := range nodes {
+			hostResult, err := mapFn(pod, nil, nodeNameToInfo[nodes[i].Name])
+			if err != nil {
+				return nil, err
+			}
+			result = append(result, hostResult)
 		}
-		for _, hp := range list {
-			if test.test == "test priority of larger pod with machine with zero-request pod" {
-				if hp.Score == expectedPriority {
-					t.Errorf("%s: expected non-%d for all priorities, got list %#v", test.test, expectedPriority, list)
-				}
-			} else {
-				if hp.Score != expectedPriority {
-					t.Errorf("%s: expected %d for all priorities, got list %#v", test.test, expectedPriority, list)
-				}
+		if reduceFn != nil {
+			if err := reduceFn(result); err != nil {
+				return nil, err
 			}
 		}
+		return result, nil
 	}
 }

@ -401,7 +297,8 @@ func TestLeastRequested(t *testing.T) {

 	for _, test := range tests {
 		nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
-		list, err := LeastRequestedPriority(test.pod, nodeNameToInfo, test.nodes)
+		lrp := priorityFunction(LeastRequestedPriorityMap, nil)
+		list, err := lrp(test.pod, nodeNameToInfo, test.nodes)
 		if err != nil {
 			t.Errorf("unexpected error: %v", err)
 		}
@ -1054,6 +951,29 @@ func makeImageNode(node string, status api.NodeStatus) *api.Node {
 	}
 }

+func getPrioritySignatures() ([]*types.Signature, error) {
+	filePath := "./../types.go"
+	pkgName := filepath.Dir(filePath)
+	builder := parser.New()
+	if err := builder.AddDir(pkgName); err != nil {
+		return nil, err
+	}
+	universe, err := builder.FindTypes()
+	if err != nil {
+		return nil, err
+	}
+	signatures := []string{"PriorityFunction", "PriorityMapFunction", "PriorityReduceFunction"}
+	results := make([]*types.Signature, 0, len(signatures))
+	for _, signature := range signatures {
+		result, ok := universe[pkgName].Types[signature]
+		if !ok {
+			return nil, fmt.Errorf("%s type not defined", signature)
+		}
+		results = append(results, result.Signature)
+	}
+	return results, nil
+}
+
 func TestPrioritiesRegistered(t *testing.T) {
 	var functions []*types.Type

@ -1080,8 +1000,30 @@ func TestPrioritiesRegistered(t *testing.T) {
 		}
 	}

+	prioritySignatures, err := getPrioritySignatures()
+	if err != nil {
+		t.Fatalf("Couldn't get priorities signatures")
+	}
+
 	// Check if all public priorities are referenced in target files.
 	for _, function := range functions {
+		// Ignore functions that don't match priorities signatures.
+		signature := function.Underlying.Signature
+		match := false
+		for _, prioritySignature := range prioritySignatures {
+			if len(prioritySignature.Parameters) != len(signature.Parameters) {
+				continue
+			}
+			if len(prioritySignature.Results) != len(signature.Results) {
+				continue
+			}
+			// TODO: Check exact types of parameters and results.
+			match = true
+		}
+		if !match {
+			continue
+		}
+
 		args := []string{"-rl", function.Name.Name}
 		args = append(args, targetFiles...)

--- a/plugin/pkg/scheduler/algorithm/types.go
+++ b/plugin/pkg/scheduler/algorithm/types.go
@ -26,9 +26,24 @@ import (
 // The failure information is given by the error.
 type FitPredicate func(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []PredicateFailureReason, error)

+// PriorityMapFunction is a function that computes per-node results for a given node.
+// TODO: Figure out the exact API of this method.
+type PriorityMapFunction func(pod *api.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error)
+
+// PriorityReduceFunction is a function that aggregated per-node results and computes
+// final scores for all nodes.
+// TODO: Figure out the exact API of this method.
+type PriorityReduceFunction func(result schedulerapi.HostPriorityList) error
+
+// DEPRECATED
+// Use Map-Reduce pattern for priority functions.
 type PriorityFunction func(pod *api.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*api.Node) (schedulerapi.HostPriorityList, error)

 type PriorityConfig struct {
+	Map    PriorityMapFunction
+	Reduce PriorityReduceFunction
+	// TODO: Remove it after migrating all functions to
+	// Map-Reduce pattern.
 	Function PriorityFunction
 	Weight   int
 }
--- a/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go
+++ b/plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go
@ -165,7 +165,7 @@ func defaultPredicates() sets.String {
 func defaultPriorities() sets.String {
 	return sets.NewString(
 		// Prioritize nodes by least requested utilization.
-		factory.RegisterPriorityFunction("LeastRequestedPriority", priorities.LeastRequestedPriority, 1),
+		factory.RegisterPriorityFunction2("LeastRequestedPriority", priorities.LeastRequestedPriorityMap, nil, 1),
 		// Prioritizes nodes to help achieve balanced resource usage
 		factory.RegisterPriorityFunction("BalancedResourceAllocation", priorities.BalancedResourceAllocation, 1),
 		// spreads pods by minimizing the number of pods (belonging to the same service or replication controller) on the same node.
--- a/plugin/pkg/scheduler/factory/plugins.go
+++ b/plugin/pkg/scheduler/factory/plugins.go
@ -48,13 +48,21 @@ type PluginFactoryArgs struct {
 // A FitPredicateFactory produces a FitPredicate from the given args.
 type FitPredicateFactory func(PluginFactoryArgs) algorithm.FitPredicate

+// DEPRECATED
+// Use Map-Reduce pattern for priority functions.
 // A PriorityFunctionFactory produces a PriorityConfig from the given args.
 type PriorityFunctionFactory func(PluginFactoryArgs) algorithm.PriorityFunction

+// A PriorityFunctionFactory produces map & reduce priority functions
+// from a given args.
+// FIXME: Rename to PriorityFunctionFactory.
+type PriorityFunctionFactory2 func(PluginFactoryArgs) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction)
+
 // A PriorityConfigFactory produces a PriorityConfig from the given function and weight
 type PriorityConfigFactory struct {
-	Function PriorityFunctionFactory
-	Weight   int
+	Function          PriorityFunctionFactory
+	MapReduceFunction PriorityFunctionFactory2
+	Weight            int
 }

 var (
@ -139,6 +147,8 @@ func IsFitPredicateRegistered(name string) bool {
 	return ok
 }

+// DEPRECATED
+// Use Map-Reduce pattern for priority functions.
 // Registers a priority function with the algorithm registry. Returns the name,
 // with which the function was registered.
 func RegisterPriorityFunction(name string, function algorithm.PriorityFunction, weight int) string {
@ -150,6 +160,22 @@ func RegisterPriorityFunction(name string, function algorithm.PriorityFunction,
 	})
 }

+// Registers a priority function with the algorithm registry. Returns the name,
+// with which the function was registered.
+// FIXME: Rename to PriorityFunctionFactory.
+func RegisterPriorityFunction2(
+	name string,
+	mapFunction algorithm.PriorityMapFunction,
+	reduceFunction algorithm.PriorityReduceFunction,
+	weight int) string {
+	return RegisterPriorityConfigFactory(name, PriorityConfigFactory{
+		MapReduceFunction: func(PluginFactoryArgs) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) {
+			return mapFunction, reduceFunction
+		},
+		Weight: weight,
+	})
+}
+
 func RegisterPriorityConfigFactory(name string, pcf PriorityConfigFactory) string {
 	schedulerFactoryMutex.Lock()
 	defer schedulerFactoryMutex.Unlock()
@ -193,8 +219,9 @@ func RegisterCustomPriorityFunction(policy schedulerapi.PriorityPolicy) string {
 		glog.V(2).Infof("Priority type %s already registered, reusing.", policy.Name)
 		// set/update the weight based on the policy
 		pcf = &PriorityConfigFactory{
-			Function: existing_pcf.Function,
-			Weight:   policy.Weight,
+			Function:          existing_pcf.Function,
+			MapReduceFunction: existing_pcf.MapReduceFunction,
+			Weight:            policy.Weight,
 		}
 	}

@ -265,10 +292,19 @@ func getPriorityFunctionConfigs(names sets.String, args PluginFactoryArgs) ([]al
 		if !ok {
 			return nil, fmt.Errorf("Invalid priority name %s specified - no corresponding function found", name)
 		}
-		configs = append(configs, algorithm.PriorityConfig{
-			Function: factory.Function(args),
-			Weight:   factory.Weight,
-		})
+		if factory.Function != nil {
+			configs = append(configs, algorithm.PriorityConfig{
+				Function: factory.Function(args),
+				Weight:   factory.Weight,
+			})
+		} else {
+			mapFunction, reduceFunction := factory.MapReduceFunction(args)
+			configs = append(configs, algorithm.PriorityConfig{
+				Map:    mapFunction,
+				Reduce: reduceFunction,
+				Weight: factory.Weight,
+			})
+		}
 	}
 	return configs, nil
 }
--- a/plugin/pkg/scheduler/generic_scheduler.go
+++ b/plugin/pkg/scheduler/generic_scheduler.go
@ -32,6 +32,7 @@ import (
 	"k8s.io/kubernetes/pkg/util/workqueue"
 	"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
 	"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
+	"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities"
 	schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
 	"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 )
@ -237,7 +238,7 @@ func PrioritizeNodes(
 	nodes []*api.Node,
 	extenders []algorithm.SchedulerExtender,
 ) (schedulerapi.HostPriorityList, error) {
-	result := make(schedulerapi.HostPriorityList, 0, len(nodeNameToInfo))
+	result := make(schedulerapi.HostPriorityList, 0, len(nodes))

 	// If no priority configs are provided, then the EqualPriority function is applied
 	// This is required to generate the priority list in the required format
@ -252,6 +253,7 @@ func PrioritizeNodes(
 		errs           []error
 	)

+	meta := priorities.PriorityMetadata(pod, nodes)
 	for _, priorityConfig := range priorityConfigs {
 		// skip the priority function if the weight is specified as 0
 		if priorityConfig.Weight == 0 {
@ -262,8 +264,26 @@ func PrioritizeNodes(
 		go func(config algorithm.PriorityConfig) {
 			defer wg.Done()
 			weight := config.Weight
-			priorityFunc := config.Function
-			prioritizedList, err := priorityFunc(pod, nodeNameToInfo, nodes)
+
+			prioritizedList, err := func() (schedulerapi.HostPriorityList, error) {
+				if config.Function != nil {
+					return config.Function(pod, nodeNameToInfo, nodes)
+				}
+				prioritizedList := make(schedulerapi.HostPriorityList, 0, len(nodes))
+				for i := range nodes {
+					hostResult, err := config.Map(pod, meta, nodeNameToInfo[nodes[i].Name])
+					if err != nil {
+						return nil, err
+					}
+					prioritizedList = append(prioritizedList, hostResult)
+				}
+				if config.Reduce != nil {
+					if err := config.Reduce(prioritizedList); err != nil {
+						return nil, err
+					}
+				}
+				return prioritizedList, nil
+			}()

 			mu.Lock()
 			defer mu.Unlock()
@ -277,13 +297,12 @@ func PrioritizeNodes(
 			}
 		}(priorityConfig)
 	}
+	// wait for all go routines to finish
+	wg.Wait()
 	if len(errs) != 0 {
 		return schedulerapi.HostPriorityList{}, errors.NewAggregate(errs)
 	}

-	// wait for all go routines to finish
-	wg.Wait()
-
 	if len(extenders) != 0 && nodes != nil {
 		for _, extender := range extenders {
 			wg.Add(1)
--- a/plugin/pkg/scheduler/generic_scheduler_test.go
+++ b/plugin/pkg/scheduler/generic_scheduler_test.go
@ -25,10 +25,14 @@ import (
 	"time"

 	"k8s.io/kubernetes/pkg/api"
+	"k8s.io/kubernetes/pkg/api/resource"
+	"k8s.io/kubernetes/pkg/apis/extensions"
 	"k8s.io/kubernetes/pkg/util/sets"
 	"k8s.io/kubernetes/pkg/util/wait"
 	"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
 	algorithmpredicates "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
+	algorithmpriorities "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities"
+	priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
 	schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
 	"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
 )
@ -372,3 +376,146 @@ func TestFindFitSomeError(t *testing.T) {
 		}
 	}
 }
+
+func makeNode(node string, milliCPU, memory int64) *api.Node {
+	return &api.Node{
+		ObjectMeta: api.ObjectMeta{Name: node},
+		Status: api.NodeStatus{
+			Capacity: api.ResourceList{
+				"cpu":    *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
+				"memory": *resource.NewQuantity(memory, resource.BinarySI),
+			},
+			Allocatable: api.ResourceList{
+				"cpu":    *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
+				"memory": *resource.NewQuantity(memory, resource.BinarySI),
+			},
+		},
+	}
+}
+
+// The point of this test is to show that you:
+// - get the same priority for a zero-request pod as for a pod with the defaults requests,
+//   both when the zero-request pod is already on the machine and when the zero-request pod
+//   is the one being scheduled.
+// - don't get the same score no matter what we schedule.
+func TestZeroRequest(t *testing.T) {
+	// A pod with no resources. We expect spreading to count it as having the default resources.
+	noResources := api.PodSpec{
+		Containers: []api.Container{
+			{},
+		},
+	}
+	noResources1 := noResources
+	noResources1.NodeName = "machine1"
+	// A pod with the same resources as a 0-request pod gets by default as its resources (for spreading).
+	small := api.PodSpec{
+		Containers: []api.Container{
+			{
+				Resources: api.ResourceRequirements{
+					Requests: api.ResourceList{
+						"cpu": resource.MustParse(
+							strconv.FormatInt(priorityutil.DefaultMilliCpuRequest, 10) + "m"),
+						"memory": resource.MustParse(
+							strconv.FormatInt(priorityutil.DefaultMemoryRequest, 10)),
+					},
+				},
+			},
+		},
+	}
+	small2 := small
+	small2.NodeName = "machine2"
+	// A larger pod.
+	large := api.PodSpec{
+		Containers: []api.Container{
+			{
+				Resources: api.ResourceRequirements{
+					Requests: api.ResourceList{
+						"cpu": resource.MustParse(
+							strconv.FormatInt(priorityutil.DefaultMilliCpuRequest*3, 10) + "m"),
+						"memory": resource.MustParse(
+							strconv.FormatInt(priorityutil.DefaultMemoryRequest*3, 10)),
+					},
+				},
+			},
+		},
+	}
+	large1 := large
+	large1.NodeName = "machine1"
+	large2 := large
+	large2.NodeName = "machine2"
+	tests := []struct {
+		pod   *api.Pod
+		pods  []*api.Pod
+		nodes []*api.Node
+		test  string
+	}{
+		// The point of these next two tests is to show you get the same priority for a zero-request pod
+		// as for a pod with the defaults requests, both when the zero-request pod is already on the machine
+		// and when the zero-request pod is the one being scheduled.
+		{
+			pod:   &api.Pod{Spec: noResources},
+			nodes: []*api.Node{makeNode("machine1", 1000, priorityutil.DefaultMemoryRequest*10), makeNode("machine2", 1000, priorityutil.DefaultMemoryRequest*10)},
+			test:  "test priority of zero-request pod with machine with zero-request pod",
+			pods: []*api.Pod{
+				{Spec: large1}, {Spec: noResources1},
+				{Spec: large2}, {Spec: small2},
+			},
+		},
+		{
+			pod:   &api.Pod{Spec: small},
+			nodes: []*api.Node{makeNode("machine1", 1000, priorityutil.DefaultMemoryRequest*10), makeNode("machine2", 1000, priorityutil.DefaultMemoryRequest*10)},
+			test:  "test priority of nonzero-request pod with machine with zero-request pod",
+			pods: []*api.Pod{
+				{Spec: large1}, {Spec: noResources1},
+				{Spec: large2}, {Spec: small2},
+			},
+		},
+		// The point of this test is to verify that we're not just getting the same score no matter what we schedule.
+		{
+			pod:   &api.Pod{Spec: large},
+			nodes: []*api.Node{makeNode("machine1", 1000, priorityutil.DefaultMemoryRequest*10), makeNode("machine2", 1000, priorityutil.DefaultMemoryRequest*10)},
+			test:  "test priority of larger pod with machine with zero-request pod",
+			pods: []*api.Pod{
+				{Spec: large1}, {Spec: noResources1},
+				{Spec: large2}, {Spec: small2},
+			},
+		},
+	}
+
+	const expectedPriority int = 25
+	for _, test := range tests {
+		// This should match the configuration in defaultPriorities() in
+		// plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go if you want
+		// to test what's actually in production.
+		priorityConfigs := []algorithm.PriorityConfig{
+			{Map: algorithmpriorities.LeastRequestedPriorityMap, Weight: 1},
+			{Function: algorithmpriorities.BalancedResourceAllocation, Weight: 1},
+			{
+				Function: algorithmpriorities.NewSelectorSpreadPriority(
+					algorithm.FakePodLister(test.pods),
+					algorithm.FakeServiceLister([]api.Service{}),
+					algorithm.FakeControllerLister([]api.ReplicationController{}),
+					algorithm.FakeReplicaSetLister([]extensions.ReplicaSet{})),
+				Weight: 1,
+			},
+		}
+		nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
+		list, err := PrioritizeNodes(
+			test.pod, nodeNameToInfo, priorityConfigs,
+			algorithm.FakeNodeLister(test.nodes), []algorithm.SchedulerExtender{})
+		if err != nil {
+			t.Errorf("unexpected error: %v", err)
+		}
+		for _, hp := range list {
+			if test.test == "test priority of larger pod with machine with zero-request pod" {
+				if hp.Score == expectedPriority {
+					t.Errorf("%s: expected non-%d for all priorities, got list %#v", test.test, expectedPriority, list)
+				}
+			} else {
+				if hp.Score != expectedPriority {
+					t.Errorf("%s: expected %d for all priorities, got list %#v", test.test, expectedPriority, list)
+				}
+			}
+		}
+	}
+}