2014-08-20 21:34:55 +00:00
|
|
|
/*
|
|
|
|
Copyright 2014 Google Inc. All rights reserved.
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
// Package factory can set up a scheduler. This code is here instead of
|
|
|
|
// plugin/cmd/scheduler for both testability and reuse.
|
|
|
|
package factory
|
|
|
|
|
|
|
|
import (
|
|
|
|
"math/rand"
|
2014-10-03 16:59:39 +00:00
|
|
|
"sync"
|
2014-08-20 21:34:55 +00:00
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
|
|
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
|
|
|
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/client/cache"
|
|
|
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
|
|
|
|
algorithm "github.com/GoogleCloudPlatform/kubernetes/pkg/scheduler"
|
2014-08-20 22:03:32 +00:00
|
|
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/util"
|
2014-08-20 21:34:55 +00:00
|
|
|
"github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler"
|
2015-02-20 00:18:28 +00:00
|
|
|
schedulerapi "github.com/GoogleCloudPlatform/kubernetes/plugin/pkg/scheduler/api"
|
2014-08-20 21:34:55 +00:00
|
|
|
|
|
|
|
"github.com/golang/glog"
|
|
|
|
)
|
|
|
|
|
2014-12-10 04:25:45 +00:00
|
|
|
var (
|
2015-01-26 21:44:53 +00:00
|
|
|
PodLister = &cache.StoreToPodLister{cache.NewStore(cache.MetaNamespaceKeyFunc)}
|
|
|
|
MinionLister = &cache.StoreToNodeLister{cache.NewStore(cache.MetaNamespaceKeyFunc)}
|
|
|
|
ServiceLister = &cache.StoreToServiceLister{cache.NewStore(cache.MetaNamespaceKeyFunc)}
|
2014-12-10 04:25:45 +00:00
|
|
|
)
|
|
|
|
|
2014-12-01 19:49:13 +00:00
|
|
|
// ConfigFactory knows how to fill out a scheduler config with its support functions.
|
|
|
|
type ConfigFactory struct {
|
2014-08-20 21:34:55 +00:00
|
|
|
Client *client.Client
|
2014-11-19 16:16:25 +00:00
|
|
|
// queue for pods that need scheduling
|
|
|
|
PodQueue *cache.FIFO
|
|
|
|
// a means to list all scheduled pods
|
2015-01-07 23:33:17 +00:00
|
|
|
PodLister *cache.StoreToPodLister
|
2014-11-19 16:16:25 +00:00
|
|
|
// a means to list all minions
|
2015-01-07 23:33:17 +00:00
|
|
|
MinionLister *cache.StoreToNodeLister
|
2014-12-12 22:29:20 +00:00
|
|
|
// a means to list all services
|
|
|
|
ServiceLister *cache.StoreToServiceLister
|
2014-11-19 16:16:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// NewConfigFactory initializes the factory.
|
2014-12-01 19:49:13 +00:00
|
|
|
func NewConfigFactory(client *client.Client) *ConfigFactory {
|
2014-12-10 04:25:45 +00:00
|
|
|
return &ConfigFactory{
|
2014-12-12 22:29:20 +00:00
|
|
|
Client: client,
|
2015-01-26 21:44:53 +00:00
|
|
|
PodQueue: cache.NewFIFO(cache.MetaNamespaceKeyFunc),
|
2014-12-12 22:29:20 +00:00
|
|
|
PodLister: PodLister,
|
|
|
|
MinionLister: MinionLister,
|
|
|
|
ServiceLister: ServiceLister,
|
2014-11-19 16:16:25 +00:00
|
|
|
}
|
2014-12-10 04:25:45 +00:00
|
|
|
}
|
2014-11-19 16:16:25 +00:00
|
|
|
|
2014-12-10 04:25:45 +00:00
|
|
|
// Create creates a scheduler with the default algorithm provider.
|
|
|
|
func (f *ConfigFactory) Create() (*scheduler.Config, error) {
|
|
|
|
return f.CreateFromProvider(DefaultProvider)
|
2014-08-20 21:34:55 +00:00
|
|
|
}
|
|
|
|
|
2014-12-10 04:25:45 +00:00
|
|
|
// CreateFromProvider creates a scheduler from the name of a registered algorithm provider.
|
|
|
|
func (f *ConfigFactory) CreateFromProvider(providerName string) (*scheduler.Config, error) {
|
|
|
|
glog.V(2).Infof("creating scheduler from algorithm provider '%v'", providerName)
|
|
|
|
provider, err := GetAlgorithmProvider(providerName)
|
2014-11-19 16:16:25 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2014-12-10 04:25:45 +00:00
|
|
|
return f.CreateFromKeys(provider.FitPredicateKeys, provider.PriorityFunctionKeys)
|
|
|
|
}
|
|
|
|
|
2015-02-20 00:18:28 +00:00
|
|
|
// CreateFromConfig creates a scheduler from the configuration file
|
|
|
|
func (f *ConfigFactory) CreateFromConfig(policy schedulerapi.Policy) (*scheduler.Config, error) {
|
|
|
|
glog.V(2).Infof("creating scheduler from configuration: %v", policy)
|
|
|
|
|
|
|
|
predicateKeys := util.NewStringSet()
|
|
|
|
for _, predicate := range policy.Predicates {
|
|
|
|
glog.V(2).Infof("Registering predicate: %s", predicate.Name)
|
|
|
|
predicateKeys.Insert(RegisterCustomPredicate(predicate))
|
|
|
|
}
|
|
|
|
|
|
|
|
priorityKeys := util.NewStringSet()
|
|
|
|
for _, priority := range policy.Priorities {
|
|
|
|
glog.V(2).Infof("Registering priority: %s", priority.Name)
|
|
|
|
priorityKeys.Insert(RegisterCustomPriorityFunction(priority))
|
|
|
|
}
|
|
|
|
|
|
|
|
return f.CreateFromKeys(predicateKeys, priorityKeys)
|
|
|
|
}
|
|
|
|
|
2014-12-10 04:25:45 +00:00
|
|
|
// CreateFromKeys creates a scheduler from a set of registered fit predicate keys and priority keys.
|
|
|
|
func (f *ConfigFactory) CreateFromKeys(predicateKeys, priorityKeys util.StringSet) (*scheduler.Config, error) {
|
|
|
|
glog.V(2).Infof("creating scheduler with fit predicates '%v' and priority functions '%v", predicateKeys, priorityKeys)
|
|
|
|
predicateFuncs, err := getFitPredicateFunctions(predicateKeys)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2014-11-19 16:16:25 +00:00
|
|
|
}
|
2014-12-10 04:25:45 +00:00
|
|
|
|
|
|
|
priorityConfigs, err := getPriorityFunctionConfigs(priorityKeys)
|
2014-11-19 16:16:25 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2014-08-20 21:34:55 +00:00
|
|
|
// Watch and queue pods that need scheduling.
|
2014-12-10 04:25:45 +00:00
|
|
|
cache.NewReflector(f.createUnassignedPodLW(), &api.Pod{}, f.PodQueue).Run()
|
2014-08-20 21:34:55 +00:00
|
|
|
|
|
|
|
// Watch and cache all running pods. Scheduler needs to find all pods
|
|
|
|
// so it knows where it's safe to place a pod. Cache this locally.
|
2014-12-10 04:25:45 +00:00
|
|
|
cache.NewReflector(f.createAssignedPodLW(), &api.Pod{}, f.PodLister.Store).Run()
|
2014-08-20 21:34:55 +00:00
|
|
|
|
|
|
|
// Watch minions.
|
|
|
|
// Minions may be listed frequently, so provide a local up-to-date cache.
|
|
|
|
if false {
|
2015-01-10 18:34:55 +00:00
|
|
|
// Disable this code until minions support watches. Note when this code is enabled,
|
|
|
|
// we need to make sure minion ListWatcher has proper FieldSelector.
|
2014-12-10 04:25:45 +00:00
|
|
|
cache.NewReflector(f.createMinionLW(), &api.Node{}, f.MinionLister.Store).Run()
|
2014-08-20 21:34:55 +00:00
|
|
|
} else {
|
2014-12-10 04:25:45 +00:00
|
|
|
cache.NewPoller(f.pollMinions, 10*time.Second, f.MinionLister.Store).Run()
|
2014-08-20 21:34:55 +00:00
|
|
|
}
|
|
|
|
|
2014-12-12 22:29:20 +00:00
|
|
|
// Watch and cache all service objects. Scheduler needs to find all pods
|
|
|
|
// created by the same service, so that it can spread them correctly.
|
|
|
|
// Cache this locally.
|
|
|
|
cache.NewReflector(f.createServiceLW(), &api.Service{}, f.ServiceLister.Store).Run()
|
|
|
|
|
2014-08-20 21:34:55 +00:00
|
|
|
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
2014-11-19 16:16:25 +00:00
|
|
|
|
2014-12-10 04:25:45 +00:00
|
|
|
algo := algorithm.NewGenericScheduler(predicateFuncs, priorityConfigs, f.PodLister, r)
|
2014-08-20 21:34:55 +00:00
|
|
|
|
2014-10-03 16:59:39 +00:00
|
|
|
podBackoff := podBackoff{
|
|
|
|
perPodBackoff: map[string]*backoffEntry{},
|
|
|
|
clock: realClock{},
|
2015-01-09 18:03:49 +00:00
|
|
|
|
|
|
|
defaultDuration: 1 * time.Second,
|
|
|
|
maxDuration: 60 * time.Second,
|
2014-10-03 16:59:39 +00:00
|
|
|
}
|
|
|
|
|
2014-08-20 21:34:55 +00:00
|
|
|
return &scheduler.Config{
|
2014-12-10 04:25:45 +00:00
|
|
|
MinionLister: f.MinionLister,
|
2014-08-20 21:34:55 +00:00
|
|
|
Algorithm: algo,
|
2014-12-10 04:25:45 +00:00
|
|
|
Binder: &binder{f.Client},
|
2014-08-20 21:34:55 +00:00
|
|
|
NextPod: func() *api.Pod {
|
2014-12-10 04:25:45 +00:00
|
|
|
pod := f.PodQueue.Pop().(*api.Pod)
|
2014-11-19 16:16:25 +00:00
|
|
|
glog.V(2).Infof("glog.v2 --> About to try and schedule pod %v", pod.Name)
|
2014-08-21 20:35:50 +00:00
|
|
|
return pod
|
2014-08-20 21:34:55 +00:00
|
|
|
},
|
2014-12-10 04:25:45 +00:00
|
|
|
Error: f.makeDefaultErrorFunc(&podBackoff, f.PodQueue),
|
2014-11-19 16:16:25 +00:00
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
2015-01-07 21:12:29 +00:00
|
|
|
// createUnassignedPodLW returns a cache.ListWatch that finds all pods that need to be
|
2014-09-16 21:08:57 +00:00
|
|
|
// scheduled.
|
2015-01-07 21:12:29 +00:00
|
|
|
func (factory *ConfigFactory) createUnassignedPodLW() *cache.ListWatch {
|
2015-02-28 19:42:49 +00:00
|
|
|
return cache.NewListWatchFromClient(factory.Client, "pods", api.NamespaceAll, labels.Set{"DesiredState.Host": ""}.AsSelector())
|
2014-09-16 21:08:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func parseSelectorOrDie(s string) labels.Selector {
|
|
|
|
selector, err := labels.ParseSelector(s)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
return selector
|
|
|
|
}
|
|
|
|
|
2015-01-07 21:12:29 +00:00
|
|
|
// createAssignedPodLW returns a cache.ListWatch that finds all pods that are
|
2014-09-16 21:08:57 +00:00
|
|
|
// already scheduled.
|
2015-01-07 21:12:29 +00:00
|
|
|
// TODO: return a ListerWatcher interface instead?
|
|
|
|
func (factory *ConfigFactory) createAssignedPodLW() *cache.ListWatch {
|
2015-02-28 19:42:49 +00:00
|
|
|
return cache.NewListWatchFromClient(factory.Client, "pods", api.NamespaceAll, parseSelectorOrDie("DesiredState.Host!="))
|
2014-09-16 21:08:57 +00:00
|
|
|
}
|
|
|
|
|
2015-01-07 21:12:29 +00:00
|
|
|
// createMinionLW returns a cache.ListWatch that gets all changes to minions.
|
|
|
|
func (factory *ConfigFactory) createMinionLW() *cache.ListWatch {
|
2015-02-14 22:10:57 +00:00
|
|
|
return cache.NewListWatchFromClient(factory.Client, "minions", api.NamespaceAll, parseSelectorOrDie(""))
|
2014-09-16 21:08:57 +00:00
|
|
|
}
|
|
|
|
|
2015-01-10 18:34:55 +00:00
|
|
|
// pollMinions lists all minions and filter out unhealthy ones, then returns
|
|
|
|
// an enumerator for cache.Poller.
|
2014-12-01 19:49:13 +00:00
|
|
|
func (factory *ConfigFactory) pollMinions() (cache.Enumerator, error) {
|
2015-01-10 18:34:55 +00:00
|
|
|
allNodes := &api.NodeList{}
|
|
|
|
err := factory.Client.Get().Resource("minions").Do().Into(allNodes)
|
2014-08-20 21:34:55 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2015-01-10 18:34:55 +00:00
|
|
|
nodes := &api.NodeList{
|
|
|
|
TypeMeta: allNodes.TypeMeta,
|
|
|
|
ListMeta: allNodes.ListMeta,
|
|
|
|
}
|
|
|
|
for _, node := range allNodes.Items {
|
2015-02-24 05:21:14 +00:00
|
|
|
conditionMap := make(map[api.NodeConditionType]*api.NodeCondition)
|
2015-01-10 18:34:55 +00:00
|
|
|
for i := range node.Status.Conditions {
|
|
|
|
cond := node.Status.Conditions[i]
|
2015-02-24 05:21:14 +00:00
|
|
|
conditionMap[cond.Type] = &cond
|
2015-01-10 18:34:55 +00:00
|
|
|
}
|
|
|
|
if condition, ok := conditionMap[api.NodeReady]; ok {
|
|
|
|
if condition.Status == api.ConditionFull {
|
|
|
|
nodes.Items = append(nodes.Items, node)
|
|
|
|
}
|
|
|
|
} else if condition, ok := conditionMap[api.NodeReachable]; ok {
|
|
|
|
if condition.Status == api.ConditionFull {
|
|
|
|
nodes.Items = append(nodes.Items, node)
|
|
|
|
}
|
|
|
|
} else {
|
2015-01-16 22:28:20 +00:00
|
|
|
// If no condition is set, we get unknown node condition. In such cases,
|
|
|
|
// we add nodes unconditionally.
|
2015-01-10 18:34:55 +00:00
|
|
|
nodes.Items = append(nodes.Items, node)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return &nodeEnumerator{nodes}, nil
|
2014-08-20 21:34:55 +00:00
|
|
|
}
|
|
|
|
|
2015-01-08 02:18:21 +00:00
|
|
|
// createServiceLW returns a cache.ListWatch that gets all changes to services.
|
|
|
|
func (factory *ConfigFactory) createServiceLW() *cache.ListWatch {
|
2015-02-14 22:10:57 +00:00
|
|
|
return cache.NewListWatchFromClient(factory.Client, "services", api.NamespaceAll, parseSelectorOrDie(""))
|
2014-12-12 22:29:20 +00:00
|
|
|
}
|
|
|
|
|
2014-12-01 19:49:13 +00:00
|
|
|
func (factory *ConfigFactory) makeDefaultErrorFunc(backoff *podBackoff, podQueue *cache.FIFO) func(pod *api.Pod, err error) {
|
2014-08-20 22:03:32 +00:00
|
|
|
return func(pod *api.Pod, err error) {
|
2014-12-19 21:32:42 +00:00
|
|
|
glog.Errorf("Error scheduling %v %v: %v; retrying", pod.Namespace, pod.Name, err)
|
2014-10-03 16:59:39 +00:00
|
|
|
backoff.gc()
|
2014-08-20 22:03:32 +00:00
|
|
|
// Retry asynchronously.
|
|
|
|
// Note that this is extremely rudimentary and we need a more real error handling path.
|
|
|
|
go func() {
|
|
|
|
defer util.HandleCrash()
|
2014-10-22 17:02:02 +00:00
|
|
|
podID := pod.Name
|
2014-11-03 21:31:36 +00:00
|
|
|
podNamespace := pod.Namespace
|
2014-10-03 16:59:39 +00:00
|
|
|
backoff.wait(podID)
|
2014-08-20 22:03:32 +00:00
|
|
|
// Get the pod again; it may have changed/been scheduled already.
|
|
|
|
pod = &api.Pod{}
|
2014-12-26 20:06:25 +00:00
|
|
|
err := factory.Client.Get().Namespace(podNamespace).Resource("pods").Name(podID).Do().Into(pod)
|
2014-08-20 22:03:32 +00:00
|
|
|
if err != nil {
|
|
|
|
glog.Errorf("Error getting pod %v for retry: %v; abandoning", podID, err)
|
|
|
|
return
|
|
|
|
}
|
2014-11-13 15:52:13 +00:00
|
|
|
if pod.Status.Host == "" {
|
2015-01-26 21:44:53 +00:00
|
|
|
podQueue.Add(pod)
|
2014-08-20 22:03:32 +00:00
|
|
|
}
|
|
|
|
}()
|
|
|
|
}
|
2014-08-20 21:34:55 +00:00
|
|
|
}
|
|
|
|
|
2014-12-08 03:44:27 +00:00
|
|
|
// nodeEnumerator allows a cache.Poller to enumerate items in an api.NodeList
|
|
|
|
type nodeEnumerator struct {
|
|
|
|
*api.NodeList
|
2014-08-20 21:34:55 +00:00
|
|
|
}
|
|
|
|
|
2014-12-08 03:44:27 +00:00
|
|
|
// Len returns the number of items in the node list.
|
|
|
|
func (ne *nodeEnumerator) Len() int {
|
|
|
|
if ne.NodeList == nil {
|
2014-08-20 21:34:55 +00:00
|
|
|
return 0
|
|
|
|
}
|
2014-12-08 03:44:27 +00:00
|
|
|
return len(ne.Items)
|
2014-08-20 21:34:55 +00:00
|
|
|
}
|
|
|
|
|
2014-09-02 10:00:28 +00:00
|
|
|
// Get returns the item (and ID) with the particular index.
|
2015-01-26 21:44:53 +00:00
|
|
|
func (ne *nodeEnumerator) Get(index int) interface{} {
|
|
|
|
return &ne.Items[index]
|
2014-08-20 21:34:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type binder struct {
|
|
|
|
*client.Client
|
|
|
|
}
|
|
|
|
|
|
|
|
// Bind just does a POST binding RPC.
|
|
|
|
func (b *binder) Bind(binding *api.Binding) error {
|
2014-09-18 10:46:14 +00:00
|
|
|
glog.V(2).Infof("Attempting to bind %v to %v", binding.PodID, binding.Host)
|
2014-10-03 15:44:06 +00:00
|
|
|
ctx := api.WithNamespace(api.NewContext(), binding.Namespace)
|
2015-01-19 19:35:41 +00:00
|
|
|
return b.Post().Namespace(api.NamespaceValue(ctx)).Resource("bindings").Body(binding).Do().Error()
|
2014-08-20 21:34:55 +00:00
|
|
|
}
|
2014-10-03 16:59:39 +00:00
|
|
|
|
|
|
|
type clock interface {
|
|
|
|
Now() time.Time
|
|
|
|
}
|
|
|
|
|
|
|
|
type realClock struct{}
|
|
|
|
|
|
|
|
func (realClock) Now() time.Time {
|
|
|
|
return time.Now()
|
|
|
|
}
|
|
|
|
|
|
|
|
type backoffEntry struct {
|
|
|
|
backoff time.Duration
|
|
|
|
lastUpdate time.Time
|
|
|
|
}
|
|
|
|
|
|
|
|
type podBackoff struct {
|
2015-01-09 18:03:49 +00:00
|
|
|
perPodBackoff map[string]*backoffEntry
|
|
|
|
lock sync.Mutex
|
|
|
|
clock clock
|
|
|
|
defaultDuration time.Duration
|
|
|
|
maxDuration time.Duration
|
2014-10-03 16:59:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (p *podBackoff) getEntry(podID string) *backoffEntry {
|
|
|
|
p.lock.Lock()
|
|
|
|
defer p.lock.Unlock()
|
|
|
|
entry, ok := p.perPodBackoff[podID]
|
|
|
|
if !ok {
|
2015-01-09 18:03:49 +00:00
|
|
|
entry = &backoffEntry{backoff: p.defaultDuration}
|
2014-10-03 16:59:39 +00:00
|
|
|
p.perPodBackoff[podID] = entry
|
|
|
|
}
|
|
|
|
entry.lastUpdate = p.clock.Now()
|
|
|
|
return entry
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *podBackoff) getBackoff(podID string) time.Duration {
|
|
|
|
entry := p.getEntry(podID)
|
|
|
|
duration := entry.backoff
|
|
|
|
entry.backoff *= 2
|
2015-01-09 18:03:49 +00:00
|
|
|
if entry.backoff > p.maxDuration {
|
|
|
|
entry.backoff = p.maxDuration
|
2014-10-03 16:59:39 +00:00
|
|
|
}
|
|
|
|
glog.V(4).Infof("Backing off %s for pod %s", duration.String(), podID)
|
|
|
|
return duration
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *podBackoff) wait(podID string) {
|
|
|
|
time.Sleep(p.getBackoff(podID))
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *podBackoff) gc() {
|
|
|
|
p.lock.Lock()
|
|
|
|
defer p.lock.Unlock()
|
|
|
|
now := p.clock.Now()
|
|
|
|
for podID, entry := range p.perPodBackoff {
|
2015-01-09 18:03:49 +00:00
|
|
|
if now.Sub(entry.lastUpdate) > p.maxDuration {
|
2014-10-03 16:59:39 +00:00
|
|
|
delete(p.perPodBackoff, podID)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|