2014-06-28 22:35:51 +00:00
/ *
2015-05-01 16:19:44 +00:00
Copyright 2014 The Kubernetes Authors All rights reserved .
2014-06-28 22:35:51 +00:00
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
2015-05-08 11:01:09 +00:00
package predicates
2014-06-28 22:35:51 +00:00
import (
2014-09-26 23:28:30 +00:00
"fmt"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/api"
2015-08-13 19:01:50 +00:00
client "k8s.io/kubernetes/pkg/client/unversioned"
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/labels"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
2015-07-08 07:48:49 +00:00
"github.com/golang/glog"
2014-06-28 22:35:51 +00:00
)
2014-09-25 20:55:42 +00:00
type NodeInfo interface {
2014-12-08 03:44:27 +00:00
GetNodeInfo ( nodeID string ) ( * api . Node , error )
2014-09-26 23:28:30 +00:00
}
type StaticNodeInfo struct {
2014-12-08 03:44:27 +00:00
* api . NodeList
2014-09-26 23:28:30 +00:00
}
2014-12-08 03:44:27 +00:00
func ( nodes StaticNodeInfo ) GetNodeInfo ( nodeID string ) ( * api . Node , error ) {
2014-09-26 23:28:30 +00:00
for ix := range nodes . Items {
2014-10-22 17:02:02 +00:00
if nodes . Items [ ix ] . Name == nodeID {
2014-09-26 23:28:30 +00:00
return & nodes . Items [ ix ] , nil
}
}
2014-10-10 20:49:09 +00:00
return nil , fmt . Errorf ( "failed to find node: %s, %#v" , nodeID , nodes )
}
type ClientNodeInfo struct {
* client . Client
}
2014-12-08 03:44:27 +00:00
func ( nodes ClientNodeInfo ) GetNodeInfo ( nodeID string ) ( * api . Node , error ) {
2014-12-08 05:56:43 +00:00
return nodes . Nodes ( ) . Get ( nodeID )
2014-09-25 20:55:42 +00:00
}
2014-10-13 04:34:23 +00:00
func isVolumeConflict ( volume api . Volume , pod * api . Pod ) bool {
2015-03-06 14:26:39 +00:00
if volume . GCEPersistentDisk != nil {
2015-06-01 12:13:48 +00:00
disk := volume . GCEPersistentDisk
2015-03-06 14:26:39 +00:00
manifest := & ( pod . Spec )
for ix := range manifest . Volumes {
if manifest . Volumes [ ix ] . GCEPersistentDisk != nil &&
2015-06-01 12:13:48 +00:00
manifest . Volumes [ ix ] . GCEPersistentDisk . PDName == disk . PDName &&
! ( manifest . Volumes [ ix ] . GCEPersistentDisk . ReadOnly && disk . ReadOnly ) {
2015-03-06 14:26:39 +00:00
return true
}
}
2014-10-13 04:34:23 +00:00
}
2015-04-07 21:16:36 +00:00
if volume . AWSElasticBlockStore != nil {
2015-04-09 13:34:16 +00:00
volumeID := volume . AWSElasticBlockStore . VolumeID
2014-10-13 04:34:23 +00:00
2015-03-06 14:26:39 +00:00
manifest := & ( pod . Spec )
for ix := range manifest . Volumes {
2015-04-07 21:16:36 +00:00
if manifest . Volumes [ ix ] . AWSElasticBlockStore != nil &&
2015-04-09 13:34:16 +00:00
manifest . Volumes [ ix ] . AWSElasticBlockStore . VolumeID == volumeID {
2015-03-06 14:26:39 +00:00
return true
}
2014-10-13 04:34:23 +00:00
}
}
return false
}
// NoDiskConflict evaluates if a pod can fit due to the volumes it requests, and those that
// are already mounted. Some times of volumes are mounted onto node machines. For now, these mounts
// are exclusive so if there is already a volume mounted on that node, another pod can't schedule
2015-07-10 12:11:15 +00:00
// there. This is GCE and Amazon EBS specific for now.
2014-10-13 04:34:23 +00:00
// TODO: migrate this into some per-volume specific code?
2015-04-03 22:51:50 +00:00
func NoDiskConflict ( pod * api . Pod , existingPods [ ] * api . Pod , node string ) ( bool , error ) {
2014-11-13 15:52:13 +00:00
manifest := & ( pod . Spec )
2014-10-13 04:34:23 +00:00
for ix := range manifest . Volumes {
for podIx := range existingPods {
2015-04-03 22:51:50 +00:00
if isVolumeConflict ( manifest . Volumes [ ix ] , existingPods [ podIx ] ) {
2014-10-13 04:34:23 +00:00
return false , nil
}
}
}
return true , nil
}
2014-09-25 20:55:42 +00:00
type ResourceFit struct {
info NodeInfo
}
type resourceRequest struct {
2015-01-05 21:16:18 +00:00
milliCPU int64
memory int64
2014-09-25 20:55:42 +00:00
}
2015-07-24 01:27:29 +00:00
var FailedResourceType string
2014-09-25 20:55:42 +00:00
func getResourceRequest ( pod * api . Pod ) resourceRequest {
result := resourceRequest { }
2015-07-30 19:59:22 +00:00
for _ , container := range pod . Spec . Containers {
requests := container . Resources . Requests
result . memory += requests . Memory ( ) . Value ( )
result . milliCPU += requests . Cpu ( ) . MilliValue ( )
2014-09-25 20:55:42 +00:00
}
return result
}
2015-07-24 01:27:29 +00:00
func CheckPodsExceedingFreeResources ( pods [ ] * api . Pod , capacity api . ResourceList ) ( fitting [ ] * api . Pod , notFittingCPU , notFittingMemory [ ] * api . Pod ) {
2015-03-16 12:50:00 +00:00
totalMilliCPU := capacity . Cpu ( ) . MilliValue ( )
totalMemory := capacity . Memory ( ) . Value ( )
milliCPURequested := int64 ( 0 )
memoryRequested := int64 ( 0 )
2015-04-03 22:51:50 +00:00
for _ , pod := range pods {
podRequest := getResourceRequest ( pod )
2015-03-16 12:50:00 +00:00
fitsCPU := totalMilliCPU == 0 || ( totalMilliCPU - milliCPURequested ) >= podRequest . milliCPU
fitsMemory := totalMemory == 0 || ( totalMemory - memoryRequested ) >= podRequest . memory
2015-07-24 01:27:29 +00:00
if ! fitsCPU {
2015-08-12 03:16:40 +00:00
// the pod doesn't fit due to CPU request
2015-07-24 01:27:29 +00:00
notFittingCPU = append ( notFittingCPU , pod )
continue
}
if ! fitsMemory {
2015-08-12 03:16:40 +00:00
// the pod doesn't fit due to Memory request
2015-07-24 01:27:29 +00:00
notFittingMemory = append ( notFittingMemory , pod )
2015-03-20 16:52:32 +00:00
continue
2015-03-16 12:50:00 +00:00
}
2015-03-20 16:52:32 +00:00
// the pod fits
milliCPURequested += podRequest . milliCPU
memoryRequested += podRequest . memory
2015-04-03 22:51:50 +00:00
fitting = append ( fitting , pod )
2015-03-16 12:50:00 +00:00
}
2015-03-20 16:52:32 +00:00
return
2015-03-16 12:50:00 +00:00
}
2014-09-25 20:55:42 +00:00
// PodFitsResources calculates fit based on requested, rather than used resources
2015-04-03 22:51:50 +00:00
func ( r * ResourceFit ) PodFitsResources ( pod * api . Pod , existingPods [ ] * api . Pod , node string ) ( bool , error ) {
podRequest := getResourceRequest ( pod )
2014-09-25 20:55:42 +00:00
info , err := r . info . GetNodeInfo ( node )
if err != nil {
return false , err
}
2015-03-17 14:43:49 +00:00
if podRequest . milliCPU == 0 && podRequest . memory == 0 {
2015-05-18 22:32:32 +00:00
return int64 ( len ( existingPods ) ) < info . Status . Capacity . Pods ( ) . Value ( ) , nil
2015-03-17 14:43:49 +00:00
}
2015-04-03 22:51:50 +00:00
pods := [ ] * api . Pod { }
2015-03-16 12:50:00 +00:00
copy ( pods , existingPods )
pods = append ( existingPods , pod )
2015-07-24 01:27:29 +00:00
_ , exceedingCPU , exceedingMemory := CheckPodsExceedingFreeResources ( pods , info . Status . Capacity )
if int64 ( len ( pods ) ) > info . Status . Capacity . Pods ( ) . Value ( ) {
2015-09-29 13:46:22 +00:00
glog . V ( 4 ) . Infof ( "Cannot schedule Pod %+v, because Node %+v is full, running %v out of %v Pods." , pod , node , len ( pods ) - 1 , info . Status . Capacity . Pods ( ) . Value ( ) )
2015-07-24 01:27:29 +00:00
FailedResourceType = "PodExceedsMaxPodNumber"
return false , nil
}
if len ( exceedingCPU ) > 0 {
2015-09-29 13:46:22 +00:00
glog . V ( 4 ) . Infof ( "Cannot schedule Pod %+v, because Node does not have sufficient CPU" , pod )
2015-07-24 01:27:29 +00:00
FailedResourceType = "PodExceedsFreeCPU"
return false , nil
}
if len ( exceedingMemory ) > 0 {
2015-09-29 13:46:22 +00:00
glog . V ( 4 ) . Infof ( "Cannot schedule Pod %+v, because Node does not have sufficient Memory" , pod )
2015-07-24 01:27:29 +00:00
FailedResourceType = "PodExceedsFreeMemory"
2015-03-16 12:50:00 +00:00
return false , nil
2014-09-25 20:55:42 +00:00
}
2015-09-29 13:46:22 +00:00
glog . V ( 4 ) . Infof ( "Schedule Pod %+v on Node %+v is allowed, Node is running only %v out of %v Pods." , pod , node , len ( pods ) - 1 , info . Status . Capacity . Pods ( ) . Value ( ) )
2015-03-16 12:50:00 +00:00
return true , nil
2014-09-25 20:55:42 +00:00
}
2015-05-08 11:01:09 +00:00
func NewResourceFitPredicate ( info NodeInfo ) algorithm . FitPredicate {
2014-09-26 23:28:30 +00:00
fit := & ResourceFit {
info : info ,
}
return fit . PodFitsResources
}
2015-05-08 11:01:09 +00:00
func NewSelectorMatchPredicate ( info NodeInfo ) algorithm . FitPredicate {
2014-10-22 00:13:52 +00:00
selector := & NodeSelector {
info : info ,
}
return selector . PodSelectorMatches
}
2015-03-20 16:52:32 +00:00
func PodMatchesNodeLabels ( pod * api . Pod , node * api . Node ) bool {
if len ( pod . Spec . NodeSelector ) == 0 {
return true
}
selector := labels . SelectorFromSet ( pod . Spec . NodeSelector )
return selector . Matches ( labels . Set ( node . Labels ) )
}
2014-10-22 00:13:52 +00:00
type NodeSelector struct {
info NodeInfo
}
2015-09-10 08:40:22 +00:00
func ( n * NodeSelector ) PodSelectorMatches ( pod * api . Pod , existingPods [ ] * api . Pod , nodeID string ) ( bool , error ) {
node , err := n . info . GetNodeInfo ( nodeID )
2014-10-22 00:13:52 +00:00
if err != nil {
return false , err
}
2015-09-10 08:40:22 +00:00
return PodMatchesNodeLabels ( pod , node ) , nil
2014-10-22 00:13:52 +00:00
}
2015-04-03 22:51:50 +00:00
func PodFitsHost ( pod * api . Pod , existingPods [ ] * api . Pod , node string ) ( bool , error ) {
2015-05-22 23:40:57 +00:00
if len ( pod . Spec . NodeName ) == 0 {
2014-12-18 22:12:58 +00:00
return true , nil
}
2015-05-22 23:40:57 +00:00
return pod . Spec . NodeName == node , nil
2014-12-18 22:12:58 +00:00
}
2014-12-22 21:54:41 +00:00
type NodeLabelChecker struct {
info NodeInfo
labels [ ] string
presence bool
}
2015-05-08 11:01:09 +00:00
func NewNodeLabelPredicate ( info NodeInfo , labels [ ] string , presence bool ) algorithm . FitPredicate {
2014-12-22 21:54:41 +00:00
labelChecker := & NodeLabelChecker {
info : info ,
labels : labels ,
presence : presence ,
}
return labelChecker . CheckNodeLabelPresence
}
2015-09-10 08:40:22 +00:00
// CheckNodeLabelPresence checks whether all of the specified labels exists on a node or not, regardless of their value
// If "presence" is false, then returns false if any of the requested labels matches any of the node's labels,
2015-01-05 22:51:22 +00:00
// otherwise returns true.
2015-09-10 08:40:22 +00:00
// If "presence" is true, then returns false if any of the requested labels does not match any of the node's labels,
2015-01-05 22:51:22 +00:00
// otherwise returns true.
//
2015-09-10 08:40:22 +00:00
// Consider the cases where the nodes are placed in regions/zones/racks and these are identified by labels
// In some cases, it is required that only nodes that are part of ANY of the defined regions/zones/racks be selected
2014-12-22 21:54:41 +00:00
//
2015-09-10 08:40:22 +00:00
// Alternately, eliminating nodes that have a certain label, regardless of value, is also useful
// A node may have a label with "retiring" as key and the date as the value
// and it may be desirable to avoid scheduling new pods on this node
func ( n * NodeLabelChecker ) CheckNodeLabelPresence ( pod * api . Pod , existingPods [ ] * api . Pod , nodeID string ) ( bool , error ) {
2014-12-22 21:54:41 +00:00
var exists bool
2015-09-10 08:40:22 +00:00
node , err := n . info . GetNodeInfo ( nodeID )
2014-12-22 21:54:41 +00:00
if err != nil {
return false , err
}
2015-09-10 08:40:22 +00:00
nodeLabels := labels . Set ( node . Labels )
2014-12-22 21:54:41 +00:00
for _ , label := range n . labels {
2015-09-10 08:40:22 +00:00
exists = nodeLabels . Has ( label )
2014-12-22 21:54:41 +00:00
if ( exists && ! n . presence ) || ( ! exists && n . presence ) {
return false , nil
}
}
return true , nil
}
2014-12-22 23:55:31 +00:00
type ServiceAffinity struct {
2015-05-08 11:01:09 +00:00
podLister algorithm . PodLister
serviceLister algorithm . ServiceLister
2014-12-22 23:55:31 +00:00
nodeInfo NodeInfo
labels [ ] string
}
2015-05-08 11:01:09 +00:00
func NewServiceAffinityPredicate ( podLister algorithm . PodLister , serviceLister algorithm . ServiceLister , nodeInfo NodeInfo , labels [ ] string ) algorithm . FitPredicate {
2014-12-22 23:55:31 +00:00
affinity := & ServiceAffinity {
podLister : podLister ,
serviceLister : serviceLister ,
nodeInfo : nodeInfo ,
labels : labels ,
}
return affinity . CheckServiceAffinity
}
2015-09-10 08:40:22 +00:00
// CheckServiceAffinity ensures that only the nodes that match the specified labels are considered for scheduling.
2015-01-05 22:51:22 +00:00
// The set of labels to be considered are provided to the struct (ServiceAffinity).
2015-09-10 08:40:22 +00:00
// The pod is checked for the labels and any missing labels are then checked in the node
2015-01-05 22:51:22 +00:00
// that hosts the service pods (peers) for the given pod.
2015-01-08 06:18:22 +00:00
//
// We add an implicit selector requiring some particular value V for label L to a pod, if:
// - L is listed in the ServiceAffinity object that is passed into the function
// - the pod does not have any NodeSelector for L
2015-09-10 08:40:22 +00:00
// - some other pod from the same service is already scheduled onto a node that has value V for label L
func ( s * ServiceAffinity ) CheckServiceAffinity ( pod * api . Pod , existingPods [ ] * api . Pod , nodeID string ) ( bool , error ) {
2014-12-22 23:55:31 +00:00
var affinitySelector labels . Selector
2015-01-05 22:51:22 +00:00
// check if the pod being scheduled has the affinity labels specified in its NodeSelector
2014-12-22 23:55:31 +00:00
affinityLabels := map [ string ] string { }
2015-01-05 22:51:22 +00:00
nodeSelector := labels . Set ( pod . Spec . NodeSelector )
2014-12-22 23:55:31 +00:00
labelsExist := true
for _ , l := range s . labels {
2015-01-05 22:51:22 +00:00
if nodeSelector . Has ( l ) {
affinityLabels [ l ] = nodeSelector . Get ( l )
2014-12-22 23:55:31 +00:00
} else {
// the current pod does not specify all the labels, look in the existing service pods
labelsExist = false
}
}
// skip looking at other pods in the service if the current pod defines all the required affinity labels
if ! labelsExist {
2015-01-05 22:51:22 +00:00
services , err := s . serviceLister . GetPodServices ( pod )
2014-12-22 23:55:31 +00:00
if err == nil {
2015-01-05 22:51:22 +00:00
// just use the first service and get the other pods within the service
// TODO: a separate predicate can be created that tries to handle all services for the pod
selector := labels . SelectorFromSet ( services [ 0 ] . Spec . Selector )
2015-01-13 17:52:37 +00:00
servicePods , err := s . podLister . List ( selector )
2014-12-22 23:55:31 +00:00
if err != nil {
return false , err
}
2015-03-06 22:29:44 +00:00
// consider only the pods that belong to the same namespace
2015-04-03 22:51:50 +00:00
nsServicePods := [ ] * api . Pod { }
2015-03-06 22:29:44 +00:00
for _ , nsPod := range servicePods {
if nsPod . Namespace == pod . Namespace {
nsServicePods = append ( nsServicePods , nsPod )
}
}
if len ( nsServicePods ) > 0 {
2015-09-10 08:40:22 +00:00
// consider any service pod and fetch the node its hosted on
otherNode , err := s . nodeInfo . GetNodeInfo ( nsServicePods [ 0 ] . Spec . NodeName )
2014-12-22 23:55:31 +00:00
if err != nil {
return false , err
}
for _ , l := range s . labels {
// If the pod being scheduled has the label value specified, do not override it
if _ , exists := affinityLabels [ l ] ; exists {
continue
}
2015-09-10 08:40:22 +00:00
if labels . Set ( otherNode . Labels ) . Has ( l ) {
affinityLabels [ l ] = labels . Set ( otherNode . Labels ) . Get ( l )
2014-12-22 23:55:31 +00:00
}
}
}
}
}
2015-09-10 08:40:22 +00:00
// if there are no existing pods in the service, consider all nodes
2014-12-22 23:55:31 +00:00
if len ( affinityLabels ) == 0 {
affinitySelector = labels . Everything ( )
} else {
affinitySelector = labels . Set ( affinityLabels ) . AsSelector ( )
}
2015-09-10 08:40:22 +00:00
node , err := s . nodeInfo . GetNodeInfo ( nodeID )
2014-12-22 23:55:31 +00:00
if err != nil {
return false , err
}
2015-09-10 08:40:22 +00:00
// check if the node matches the selector
return affinitySelector . Matches ( labels . Set ( node . Labels ) ) , nil
2014-12-22 23:55:31 +00:00
}
2015-09-29 09:44:26 +00:00
func PodFitsHostPorts ( pod * api . Pod , existingPods [ ] * api . Pod , node string ) ( bool , error ) {
2014-11-05 05:21:26 +00:00
existingPorts := getUsedPorts ( existingPods ... )
wantPorts := getUsedPorts ( pod )
for wport := range wantPorts {
if wport == 0 {
continue
}
if existingPorts [ wport ] {
return false , nil
2014-09-23 23:14:54 +00:00
}
2014-06-28 22:35:51 +00:00
}
2014-09-23 23:14:54 +00:00
return true , nil
2014-06-28 22:35:51 +00:00
}
2015-04-03 22:51:50 +00:00
func getUsedPorts ( pods ... * api . Pod ) map [ int ] bool {
2014-11-05 05:21:26 +00:00
ports := make ( map [ int ] bool )
for _ , pod := range pods {
2014-11-13 15:52:13 +00:00
for _ , container := range pod . Spec . Containers {
2014-11-05 05:21:26 +00:00
for _ , podPort := range container . Ports {
ports [ podPort . HostPort ] = true
2014-06-28 22:35:51 +00:00
}
}
}
2014-11-05 05:21:26 +00:00
return ports
2014-06-28 22:35:51 +00:00
}
2015-06-10 21:35:59 +00:00
func filterNonRunningPods ( pods [ ] * api . Pod ) [ ] * api . Pod {
if len ( pods ) == 0 {
return pods
}
result := [ ] * api . Pod { }
for _ , pod := range pods {
if pod . Status . Phase == api . PodSucceeded || pod . Status . Phase == api . PodFailed {
continue
}
result = append ( result , pod )
}
return result
}
2014-09-24 21:18:31 +00:00
// MapPodsToMachines obtains a list of pods and pivots that list into a map where the keys are host names
// and the values are the list of pods running on that host.
2015-05-08 11:01:09 +00:00
func MapPodsToMachines ( lister algorithm . PodLister ) ( map [ string ] [ ] * api . Pod , error ) {
2015-04-03 22:51:50 +00:00
machineToPods := map [ string ] [ ] * api . Pod { }
2014-09-24 16:32:36 +00:00
// TODO: perform more targeted query...
2015-01-08 05:57:45 +00:00
pods , err := lister . List ( labels . Everything ( ) )
2014-09-24 16:32:36 +00:00
if err != nil {
2015-04-03 22:51:50 +00:00
return map [ string ] [ ] * api . Pod { } , err
2014-09-24 16:32:36 +00:00
}
2015-06-10 21:35:59 +00:00
pods = filterNonRunningPods ( pods )
2014-09-24 16:32:36 +00:00
for _ , scheduledPod := range pods {
2015-05-22 23:40:57 +00:00
host := scheduledPod . Spec . NodeName
2014-09-24 16:32:36 +00:00
machineToPods [ host ] = append ( machineToPods [ host ] , scheduledPod )
}
return machineToPods , nil
}