/* Copyright 2019 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package cache import ( "fmt" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/sets" schedulerlisters "k8s.io/kubernetes/pkg/scheduler/listers" schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo" ) // Snapshot is a snapshot of cache NodeInfo and NodeTree order. The scheduler takes a // snapshot at the beginning of each scheduling cycle and uses it for its operations in that cycle. type Snapshot struct { // nodeInfoMap a map of node name to a snapshot of its NodeInfo. nodeInfoMap map[string]*schedulernodeinfo.NodeInfo // nodeInfoList is the list of nodes as ordered in the cache's nodeTree. nodeInfoList []*schedulernodeinfo.NodeInfo // havePodsWithAffinityNodeInfoList is the list of nodes with at least one pod declaring affinity terms. havePodsWithAffinityNodeInfoList []*schedulernodeinfo.NodeInfo generation int64 } var _ schedulerlisters.SharedLister = &Snapshot{} // NewEmptySnapshot initializes a Snapshot struct and returns it. func NewEmptySnapshot() *Snapshot { return &Snapshot{ nodeInfoMap: make(map[string]*schedulernodeinfo.NodeInfo), } } // NewSnapshot initializes a Snapshot struct and returns it. func NewSnapshot(pods []*v1.Pod, nodes []*v1.Node) *Snapshot { nodeInfoMap := createNodeInfoMap(pods, nodes) nodeInfoList := make([]*schedulernodeinfo.NodeInfo, 0, len(nodeInfoMap)) havePodsWithAffinityNodeInfoList := make([]*schedulernodeinfo.NodeInfo, 0, len(nodeInfoMap)) for _, v := range nodeInfoMap { nodeInfoList = append(nodeInfoList, v) if len(v.PodsWithAffinity()) > 0 { havePodsWithAffinityNodeInfoList = append(havePodsWithAffinityNodeInfoList, v) } } s := NewEmptySnapshot() s.nodeInfoMap = nodeInfoMap s.nodeInfoList = nodeInfoList s.havePodsWithAffinityNodeInfoList = havePodsWithAffinityNodeInfoList return s } // createNodeInfoMap obtains a list of pods and pivots that list into a map // where the keys are node names and the values are the aggregated information // for that node. func createNodeInfoMap(pods []*v1.Pod, nodes []*v1.Node) map[string]*schedulernodeinfo.NodeInfo { nodeNameToInfo := make(map[string]*schedulernodeinfo.NodeInfo) for _, pod := range pods { nodeName := pod.Spec.NodeName if _, ok := nodeNameToInfo[nodeName]; !ok { nodeNameToInfo[nodeName] = schedulernodeinfo.NewNodeInfo() } nodeNameToInfo[nodeName].AddPod(pod) } imageExistenceMap := createImageExistenceMap(nodes) for _, node := range nodes { if _, ok := nodeNameToInfo[node.Name]; !ok { nodeNameToInfo[node.Name] = schedulernodeinfo.NewNodeInfo() } nodeInfo := nodeNameToInfo[node.Name] nodeInfo.SetNode(node) nodeInfo.SetImageStates(getNodeImageStates(node, imageExistenceMap)) } return nodeNameToInfo } // getNodeImageStates returns the given node's image states based on the given imageExistence map. func getNodeImageStates(node *v1.Node, imageExistenceMap map[string]sets.String) map[string]*schedulernodeinfo.ImageStateSummary { imageStates := make(map[string]*schedulernodeinfo.ImageStateSummary) for _, image := range node.Status.Images { for _, name := range image.Names { imageStates[name] = &schedulernodeinfo.ImageStateSummary{ Size: image.SizeBytes, NumNodes: len(imageExistenceMap[name]), } } } return imageStates } // createImageExistenceMap returns a map recording on which nodes the images exist, keyed by the images' names. func createImageExistenceMap(nodes []*v1.Node) map[string]sets.String { imageExistenceMap := make(map[string]sets.String) for _, node := range nodes { for _, image := range node.Status.Images { for _, name := range image.Names { if _, ok := imageExistenceMap[name]; !ok { imageExistenceMap[name] = sets.NewString(node.Name) } else { imageExistenceMap[name].Insert(node.Name) } } } } return imageExistenceMap } // Pods returns a PodLister func (s *Snapshot) Pods() schedulerlisters.PodLister { return podLister(s.nodeInfoList) } // NodeInfos returns a NodeInfoLister. func (s *Snapshot) NodeInfos() schedulerlisters.NodeInfoLister { return s } // NumNodes returns the number of nodes in the snapshot. func (s *Snapshot) NumNodes() int { return len(s.nodeInfoList) } type podLister []*schedulernodeinfo.NodeInfo // List returns the list of pods in the snapshot. func (p podLister) List(selector labels.Selector) ([]*v1.Pod, error) { alwaysTrue := func(*v1.Pod) bool { return true } return p.FilteredList(alwaysTrue, selector) } // FilteredList returns a filtered list of pods in the snapshot. func (p podLister) FilteredList(filter schedulerlisters.PodFilter, selector labels.Selector) ([]*v1.Pod, error) { // podFilter is expected to return true for most or all of the pods. We // can avoid expensive array growth without wasting too much memory by // pre-allocating capacity. maxSize := 0 for _, n := range p { maxSize += len(n.Pods()) } pods := make([]*v1.Pod, 0, maxSize) for _, n := range p { for _, pod := range n.Pods() { if filter(pod) && selector.Matches(labels.Set(pod.Labels)) { pods = append(pods, pod) } } } return pods, nil } // List returns the list of nodes in the snapshot. func (s *Snapshot) List() ([]*schedulernodeinfo.NodeInfo, error) { return s.nodeInfoList, nil } // HavePodsWithAffinityList returns the list of nodes with at least one pods with inter-pod affinity func (s *Snapshot) HavePodsWithAffinityList() ([]*schedulernodeinfo.NodeInfo, error) { return s.havePodsWithAffinityNodeInfoList, nil } // Get returns the NodeInfo of the given node name. func (s *Snapshot) Get(nodeName string) (*schedulernodeinfo.NodeInfo, error) { if v, ok := s.nodeInfoMap[nodeName]; ok && v.Node() != nil { return v, nil } return nil, fmt.Errorf("nodeinfo not found for node name %q", nodeName) }