Merge pull request #10667 from gmarek/scheduler

Add spreading by controllers
pull/6/head
Mike Danese 2015-07-31 14:52:42 -07:00
commit 356c9ffb4d
7 changed files with 163 additions and 35 deletions

View File

@ -66,7 +66,7 @@ type ServiceLister interface {
// FakeServiceLister implements ServiceLister on []api.Service for test purposes. // FakeServiceLister implements ServiceLister on []api.Service for test purposes.
type FakeServiceLister []api.Service type FakeServiceLister []api.Service
// FakeServiceLister returns api.ServiceList, the list of all services. // List returns api.ServiceList, the list of all services.
func (f FakeServiceLister) List() (api.ServiceList, error) { func (f FakeServiceLister) List() (api.ServiceList, error) {
return api.ServiceList{Items: f}, nil return api.ServiceList{Items: f}, nil
} }
@ -91,3 +91,39 @@ func (f FakeServiceLister) GetPodServices(pod *api.Pod) (services []api.Service,
return return
} }
// ControllerLister interface represents anything that can produce a list of ReplicationController; the list is consumed by a scheduler.
type ControllerLister interface {
// Lists all the replication controllers
List() ([]api.ReplicationController, error)
// Gets the services for the given pod
GetPodControllers(*api.Pod) ([]api.ReplicationController, error)
}
// FakeControllerLister implements ControllerLister on []api.ReplicationController for test purposes.
type FakeControllerLister []api.ReplicationController
// List returns []api.ReplicationController, the list of all ReplicationControllers.
func (f FakeControllerLister) List() ([]api.ReplicationController, error) {
return f, nil
}
// GetPodControllers gets the ReplicationControllers that have the selector that match the labels on the given pod
func (f FakeControllerLister) GetPodControllers(pod *api.Pod) (controllers []api.ReplicationController, err error) {
var selector labels.Selector
for _, controller := range f {
if controller.Namespace != pod.Namespace {
continue
}
selector = labels.Set(controller.Spec.Selector).AsSelector()
if selector.Matches(labels.Set(pod.Labels)) {
controllers = append(controllers, controller)
}
}
if len(controllers) == 0 {
err = fmt.Errorf("Could not find Replication Controller for pod %s in namespace %s with labels: %v", pod.Name, pod.Namespace, pod.Labels)
}
return
}

View File

@ -132,7 +132,7 @@ func TestZeroLimit(t *testing.T) {
// This should match the configuration in defaultPriorities() in // This should match the configuration in defaultPriorities() in
// plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go if you want // plugin/pkg/scheduler/algorithmprovider/defaults/defaults.go if you want
// to test what's actually in production. // to test what's actually in production.
[]algorithm.PriorityConfig{{Function: LeastRequestedPriority, Weight: 1}, {Function: BalancedResourceAllocation, Weight: 1}, {Function: NewServiceSpreadPriority(algorithm.FakeServiceLister([]api.Service{})), Weight: 1}}, []algorithm.PriorityConfig{{Function: LeastRequestedPriority, Weight: 1}, {Function: BalancedResourceAllocation, Weight: 1}, {Function: NewSelectorSpreadPriority(algorithm.FakeServiceLister([]api.Service{}), algorithm.FakeControllerLister([]api.ReplicationController{})), Weight: 1}},
algorithm.FakeMinionLister(api.NodeList{Items: test.nodes})) algorithm.FakeMinionLister(api.NodeList{Items: test.nodes}))
if err != nil { if err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)

View File

@ -23,36 +23,49 @@ import (
"github.com/golang/glog" "github.com/golang/glog"
) )
type ServiceSpread struct { type SelectorSpread struct {
serviceLister algorithm.ServiceLister serviceLister algorithm.ServiceLister
controllerLister algorithm.ControllerLister
} }
func NewServiceSpreadPriority(serviceLister algorithm.ServiceLister) algorithm.PriorityFunction { func NewSelectorSpreadPriority(serviceLister algorithm.ServiceLister, controllerLister algorithm.ControllerLister) algorithm.PriorityFunction {
serviceSpread := &ServiceSpread{ selectorSpread := &SelectorSpread{
serviceLister: serviceLister, serviceLister: serviceLister,
controllerLister: controllerLister,
} }
return serviceSpread.CalculateSpreadPriority return selectorSpread.CalculateSpreadPriority
} }
// CalculateSpreadPriority spreads pods by minimizing the number of pods belonging to the same service // CalculateSpreadPriority spreads pods by minimizing the number of pods belonging to the same service or replication controller. It counts number of pods that run under
// on the same machine. // Services or RCs as the pod being scheduled and tries to minimize the number of conflicts. I.e. pushes scheduler towards a Node where there's a smallest number of
func (s *ServiceSpread) CalculateSpreadPriority(pod *api.Pod, podLister algorithm.PodLister, minionLister algorithm.MinionLister) (algorithm.HostPriorityList, error) { // pods which match the same selectors of Services and RCs as current pod.
func (s *SelectorSpread) CalculateSpreadPriority(pod *api.Pod, podLister algorithm.PodLister, minionLister algorithm.MinionLister) (algorithm.HostPriorityList, error) {
var maxCount int var maxCount int
var nsServicePods []*api.Pod var nsPods []*api.Pod
selectors := make([]labels.Selector, 0)
services, err := s.serviceLister.GetPodServices(pod) services, err := s.serviceLister.GetPodServices(pod)
if err == nil { if err == nil {
// just use the first service and get the other pods within the service for _, service := range services {
// TODO: a separate predicate can be created that tries to handle all services for the pod selectors = append(selectors, labels.SelectorFromSet(service.Spec.Selector))
selector := labels.SelectorFromSet(services[0].Spec.Selector) }
pods, err := podLister.List(selector) }
controllers, err := s.controllerLister.GetPodControllers(pod)
if err == nil {
for _, controller := range controllers {
selectors = append(selectors, labels.SelectorFromSet(controller.Spec.Selector))
}
}
if len(selectors) > 0 {
pods, err := podLister.List(labels.Everything())
if err != nil { if err != nil {
return nil, err return nil, err
} }
// consider only the pods that belong to the same namespace // consider only the pods that belong to the same namespace
for _, nsPod := range pods { for _, nsPod := range pods {
if nsPod.Namespace == pod.Namespace { if nsPod.Namespace == pod.Namespace {
nsServicePods = append(nsServicePods, nsPod) nsPods = append(nsPods, nsPod)
} }
} }
} }
@ -63,8 +76,16 @@ func (s *ServiceSpread) CalculateSpreadPriority(pod *api.Pod, podLister algorith
} }
counts := map[string]int{} counts := map[string]int{}
if len(nsServicePods) > 0 { if len(nsPods) > 0 {
for _, pod := range nsServicePods { for _, pod := range nsPods {
matches := false
for _, selector := range selectors {
if selector.Matches(labels.Set(pod.ObjectMeta.Labels)) {
matches = true
break
}
}
if matches {
counts[pod.Spec.NodeName]++ counts[pod.Spec.NodeName]++
// Compute the maximum number of pods hosted on any minion // Compute the maximum number of pods hosted on any minion
if counts[pod.Spec.NodeName] > maxCount { if counts[pod.Spec.NodeName] > maxCount {
@ -72,6 +93,7 @@ func (s *ServiceSpread) CalculateSpreadPriority(pod *api.Pod, podLister algorith
} }
} }
} }
}
result := []algorithm.HostPriority{} result := []algorithm.HostPriority{}
//score int - scale of 0-10 //score int - scale of 0-10
@ -84,7 +106,7 @@ func (s *ServiceSpread) CalculateSpreadPriority(pod *api.Pod, podLister algorith
} }
result = append(result, algorithm.HostPriority{Host: minion.Name, Score: int(fScore)}) result = append(result, algorithm.HostPriority{Host: minion.Name, Score: int(fScore)})
glog.V(10).Infof( glog.V(10).Infof(
"%v -> %v: ServiceSpreadPriority, Score: (%d)", pod.Name, minion.Name, int(fScore), "%v -> %v: SelectorSpreadPriority, Score: (%d)", pod.Name, minion.Name, int(fScore),
) )
} }
return result, nil return result, nil

View File

@ -25,7 +25,7 @@ import (
"github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithm" "github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/algorithm"
) )
func TestServiceSpreadPriority(t *testing.T) { func TestSelectorSpreadPriority(t *testing.T) {
labels1 := map[string]string{ labels1 := map[string]string{
"foo": "bar", "foo": "bar",
"baz": "blah", "baz": "blah",
@ -44,6 +44,7 @@ func TestServiceSpreadPriority(t *testing.T) {
pod *api.Pod pod *api.Pod
pods []*api.Pod pods []*api.Pod
nodes []string nodes []string
rcs []api.ReplicationController
services []api.Service services []api.Service
expectedList algorithm.HostPriorityList expectedList algorithm.HostPriorityList
test string test string
@ -158,11 +159,65 @@ func TestServiceSpreadPriority(t *testing.T) {
expectedList: []algorithm.HostPriority{{"machine1", 0}, {"machine2", 5}}, expectedList: []algorithm.HostPriority{{"machine1", 0}, {"machine2", 5}},
test: "service with partial pod label matches", test: "service with partial pod label matches",
}, },
{
pod: &api.Pod{ObjectMeta: api.ObjectMeta{Labels: labels1}},
pods: []*api.Pod{
{Spec: zone1Spec, ObjectMeta: api.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
},
nodes: []string{"machine1", "machine2"},
services: []api.Service{{Spec: api.ServiceSpec{Selector: map[string]string{"baz": "blah"}}}},
rcs: []api.ReplicationController{{Spec: api.ReplicationControllerSpec{Selector: map[string]string{"foo": "bar"}}}},
// "baz=blah" matches both labels1 and labels2, and "foo=bar" matches only labels 1. This means that we assume that we want to
// do spreading between all pods. The result should be exactly as above.
expectedList: []algorithm.HostPriority{{"machine1", 0}, {"machine2", 5}},
test: "service with partial pod label matches with service and replication controller",
},
{
pod: &api.Pod{ObjectMeta: api.ObjectMeta{Labels: map[string]string{"foo": "bar", "bar": "foo"}}},
pods: []*api.Pod{
{Spec: zone1Spec, ObjectMeta: api.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
},
nodes: []string{"machine1", "machine2"},
services: []api.Service{{Spec: api.ServiceSpec{Selector: map[string]string{"bar": "foo"}}}},
rcs: []api.ReplicationController{{Spec: api.ReplicationControllerSpec{Selector: map[string]string{"foo": "bar"}}}},
// Taken together Service and Replication Controller should match all Pods, hence result should be equal to one above.
expectedList: []algorithm.HostPriority{{"machine1", 0}, {"machine2", 5}},
test: "disjoined service and replication controller should be treated equally",
},
{
pod: &api.Pod{ObjectMeta: api.ObjectMeta{Labels: labels1}},
pods: []*api.Pod{
{Spec: zone1Spec, ObjectMeta: api.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
},
nodes: []string{"machine1", "machine2"},
rcs: []api.ReplicationController{{Spec: api.ReplicationControllerSpec{Selector: map[string]string{"foo": "bar"}}}},
// Both Nodes have one pod from the given RC, hence both get 0 score.
expectedList: []algorithm.HostPriority{{"machine1", 0}, {"machine2", 0}},
test: "Replication controller with partial pod label matches",
},
{
pod: &api.Pod{ObjectMeta: api.ObjectMeta{Labels: labels1}},
pods: []*api.Pod{
{Spec: zone1Spec, ObjectMeta: api.ObjectMeta{Labels: labels2}},
{Spec: zone1Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
{Spec: zone2Spec, ObjectMeta: api.ObjectMeta{Labels: labels1}},
},
nodes: []string{"machine1", "machine2"},
rcs: []api.ReplicationController{{Spec: api.ReplicationControllerSpec{Selector: map[string]string{"baz": "blah"}}}},
expectedList: []algorithm.HostPriority{{"machine1", 0}, {"machine2", 5}},
test: "Replication controller with partial pod label matches",
},
} }
for _, test := range tests { for _, test := range tests {
serviceSpread := ServiceSpread{serviceLister: algorithm.FakeServiceLister(test.services)} selectorSpread := SelectorSpread{serviceLister: algorithm.FakeServiceLister(test.services), controllerLister: algorithm.FakeControllerLister(test.rcs)}
list, err := serviceSpread.CalculateSpreadPriority(test.pod, algorithm.FakePodLister(test.pods), algorithm.FakeMinionLister(makeNodeList(test.nodes))) list, err := selectorSpread.CalculateSpreadPriority(test.pod, algorithm.FakePodLister(test.pods), algorithm.FakeMinionLister(makeNodeList(test.nodes)))
if err != nil { if err != nil {
t.Errorf("unexpected error: %v", err) t.Errorf("unexpected error: %v", err)
} }

View File

@ -67,10 +67,10 @@ func defaultPriorities() util.StringSet {
factory.RegisterPriorityFunction("BalancedResourceAllocation", priorities.BalancedResourceAllocation, 1), factory.RegisterPriorityFunction("BalancedResourceAllocation", priorities.BalancedResourceAllocation, 1),
// spreads pods by minimizing the number of pods (belonging to the same service) on the same minion. // spreads pods by minimizing the number of pods (belonging to the same service) on the same minion.
factory.RegisterPriorityConfigFactory( factory.RegisterPriorityConfigFactory(
"ServiceSpreadingPriority", "SelectorSpreadPriority",
factory.PriorityConfigFactory{ factory.PriorityConfigFactory{
Function: func(args factory.PluginFactoryArgs) algorithm.PriorityFunction { Function: func(args factory.PluginFactoryArgs) algorithm.PriorityFunction {
return priorities.NewServiceSpreadPriority(args.ServiceLister) return priorities.NewSelectorSpreadPriority(args.ServiceLister, args.ControllerLister)
}, },
Weight: 1, Weight: 1,
}, },

View File

@ -58,6 +58,8 @@ type ConfigFactory struct {
NodeLister *cache.StoreToNodeLister NodeLister *cache.StoreToNodeLister
// a means to list all services // a means to list all services
ServiceLister *cache.StoreToServiceLister ServiceLister *cache.StoreToServiceLister
// a means to list all controllers
ControllerLister *cache.StoreToReplicationControllerLister
// Close this to stop all reflectors // Close this to stop all reflectors
StopEverything chan struct{} StopEverything chan struct{}
@ -77,6 +79,7 @@ func NewConfigFactory(client *client.Client) *ConfigFactory {
// Only nodes in the "Ready" condition with status == "True" are schedulable // Only nodes in the "Ready" condition with status == "True" are schedulable
NodeLister: &cache.StoreToNodeLister{cache.NewStore(cache.MetaNamespaceKeyFunc)}, NodeLister: &cache.StoreToNodeLister{cache.NewStore(cache.MetaNamespaceKeyFunc)},
ServiceLister: &cache.StoreToServiceLister{cache.NewStore(cache.MetaNamespaceKeyFunc)}, ServiceLister: &cache.StoreToServiceLister{cache.NewStore(cache.MetaNamespaceKeyFunc)},
ControllerLister: &cache.StoreToReplicationControllerLister{cache.NewStore(cache.MetaNamespaceKeyFunc)},
StopEverything: make(chan struct{}), StopEverything: make(chan struct{}),
} }
modeler := scheduler.NewSimpleModeler(&cache.StoreToPodLister{c.PodQueue}, c.ScheduledPodLister) modeler := scheduler.NewSimpleModeler(&cache.StoreToPodLister{c.PodQueue}, c.ScheduledPodLister)
@ -162,6 +165,7 @@ func (f *ConfigFactory) CreateFromKeys(predicateKeys, priorityKeys util.StringSe
pluginArgs := PluginFactoryArgs{ pluginArgs := PluginFactoryArgs{
PodLister: f.PodLister, PodLister: f.PodLister,
ServiceLister: f.ServiceLister, ServiceLister: f.ServiceLister,
ControllerLister: f.ControllerLister,
// All fit predicates only need to consider schedulable nodes. // All fit predicates only need to consider schedulable nodes.
NodeLister: f.NodeLister.NodeCondition(api.NodeReady, api.ConditionTrue), NodeLister: f.NodeLister.NodeCondition(api.NodeReady, api.ConditionTrue),
NodeInfo: f.NodeLister, NodeInfo: f.NodeLister,
@ -187,10 +191,15 @@ func (f *ConfigFactory) CreateFromKeys(predicateKeys, priorityKeys util.StringSe
cache.NewReflector(f.createMinionLW(), &api.Node{}, f.NodeLister.Store, 0).RunUntil(f.StopEverything) cache.NewReflector(f.createMinionLW(), &api.Node{}, f.NodeLister.Store, 0).RunUntil(f.StopEverything)
// Watch and cache all service objects. Scheduler needs to find all pods // Watch and cache all service objects. Scheduler needs to find all pods
// created by the same service, so that it can spread them correctly. // created by the same services or ReplicationControllers, so that it can spread them correctly.
// Cache this locally. // Cache this locally.
cache.NewReflector(f.createServiceLW(), &api.Service{}, f.ServiceLister.Store, 0).RunUntil(f.StopEverything) cache.NewReflector(f.createServiceLW(), &api.Service{}, f.ServiceLister.Store, 0).RunUntil(f.StopEverything)
// Watch and cache all ReplicationController objects. Scheduler needs to find all pods
// created by the same services or ReplicationControllers, so that it can spread them correctly.
// Cache this locally.
cache.NewReflector(f.createControllerLW(), &api.ReplicationController{}, f.ControllerLister.Store, 0).RunUntil(f.StopEverything)
r := rand.New(rand.NewSource(time.Now().UnixNano())) r := rand.New(rand.NewSource(time.Now().UnixNano()))
algo := scheduler.NewGenericScheduler(predicateFuncs, priorityConfigs, f.PodLister, r) algo := scheduler.NewGenericScheduler(predicateFuncs, priorityConfigs, f.PodLister, r)
@ -254,6 +263,11 @@ func (factory *ConfigFactory) createServiceLW() *cache.ListWatch {
return cache.NewListWatchFromClient(factory.Client, "services", api.NamespaceAll, parseSelectorOrDie("")) return cache.NewListWatchFromClient(factory.Client, "services", api.NamespaceAll, parseSelectorOrDie(""))
} }
// Returns a cache.ListWatch that gets all changes to controllers.
func (factory *ConfigFactory) createControllerLW() *cache.ListWatch {
return cache.NewListWatchFromClient(factory.Client, "replicationControllers", api.NamespaceAll, parseSelectorOrDie(""))
}
func (factory *ConfigFactory) makeDefaultErrorFunc(backoff *podBackoff, podQueue *cache.FIFO) func(pod *api.Pod, err error) { func (factory *ConfigFactory) makeDefaultErrorFunc(backoff *podBackoff, podQueue *cache.FIFO) func(pod *api.Pod, err error) {
return func(pod *api.Pod, err error) { return func(pod *api.Pod, err error) {
if err == scheduler.ErrNoNodesAvailable { if err == scheduler.ErrNoNodesAvailable {

View File

@ -35,6 +35,7 @@ import (
type PluginFactoryArgs struct { type PluginFactoryArgs struct {
algorithm.PodLister algorithm.PodLister
algorithm.ServiceLister algorithm.ServiceLister
algorithm.ControllerLister
NodeLister algorithm.MinionLister NodeLister algorithm.MinionLister
NodeInfo predicates.NodeInfo NodeInfo predicates.NodeInfo
} }