mirror of https://github.com/k3s-io/k3s
129 lines
5.1 KiB
Go
129 lines
5.1 KiB
Go
/*
|
|
Copyright 2015 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package cache
|
|
|
|
import (
|
|
"k8s.io/api/core/v1"
|
|
"k8s.io/apimachinery/pkg/labels"
|
|
"k8s.io/kubernetes/pkg/scheduler/algorithm"
|
|
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
|
|
)
|
|
|
|
// Cache collects pods' information and provides node-level aggregated information.
|
|
// It's intended for generic scheduler to do efficient lookup.
|
|
// Cache's operations are pod centric. It does incremental updates based on pod events.
|
|
// Pod events are sent via network. We don't have guaranteed delivery of all events:
|
|
// We use Reflector to list and watch from remote.
|
|
// Reflector might be slow and do a relist, which would lead to missing events.
|
|
//
|
|
// State Machine of a pod's events in scheduler's cache:
|
|
//
|
|
//
|
|
// +-------------------------------------------+ +----+
|
|
// | Add | | |
|
|
// | | | | Update
|
|
// + Assume Add v v |
|
|
//Initial +--------> Assumed +------------+---> Added <--+
|
|
// ^ + + | +
|
|
// | | | | |
|
|
// | | | Add | | Remove
|
|
// | | | | |
|
|
// | | | + |
|
|
// +----------------+ +-----------> Expired +----> Deleted
|
|
// Forget Expire
|
|
//
|
|
//
|
|
// Note that an assumed pod can expire, because if we haven't received Add event notifying us
|
|
// for a while, there might be some problems and we shouldn't keep the pod in cache anymore.
|
|
//
|
|
// Note that "Initial", "Expired", and "Deleted" pods do not actually exist in cache.
|
|
// Based on existing use cases, we are making the following assumptions:
|
|
// - No pod would be assumed twice
|
|
// - A pod could be added without going through scheduler. In this case, we will see Add but not Assume event.
|
|
// - If a pod wasn't added, it wouldn't be removed or updated.
|
|
// - Both "Expired" and "Deleted" are valid end states. In case of some problems, e.g. network issue,
|
|
// a pod might have changed its state (e.g. added and deleted) without delivering notification to the cache.
|
|
type Cache interface {
|
|
// AssumePod assumes a pod scheduled and aggregates the pod's information into its node.
|
|
// The implementation also decides the policy to expire pod before being confirmed (receiving Add event).
|
|
// After expiration, its information would be subtracted.
|
|
AssumePod(pod *v1.Pod) error
|
|
|
|
// FinishBinding signals that cache for assumed pod can be expired
|
|
FinishBinding(pod *v1.Pod) error
|
|
|
|
// ForgetPod removes an assumed pod from cache.
|
|
ForgetPod(pod *v1.Pod) error
|
|
|
|
// AddPod either confirms a pod if it's assumed, or adds it back if it's expired.
|
|
// If added back, the pod's information would be added again.
|
|
AddPod(pod *v1.Pod) error
|
|
|
|
// UpdatePod removes oldPod's information and adds newPod's information.
|
|
UpdatePod(oldPod, newPod *v1.Pod) error
|
|
|
|
// RemovePod removes a pod. The pod's information would be subtracted from assigned node.
|
|
RemovePod(pod *v1.Pod) error
|
|
|
|
// GetPod returns the pod from the cache with the same namespace and the
|
|
// same name of the specified pod.
|
|
GetPod(pod *v1.Pod) (*v1.Pod, error)
|
|
|
|
// IsAssumedPod returns true if the pod is assumed and not expired.
|
|
IsAssumedPod(pod *v1.Pod) (bool, error)
|
|
|
|
// AddNode adds overall information about node.
|
|
AddNode(node *v1.Node) error
|
|
|
|
// UpdateNode updates overall information about node.
|
|
UpdateNode(oldNode, newNode *v1.Node) error
|
|
|
|
// RemoveNode removes overall information about node.
|
|
RemoveNode(node *v1.Node) error
|
|
|
|
// UpdateNodeInfoSnapshot updates the passed infoSnapshot to the current contents of Cache.
|
|
// The node info contains aggregated information of pods scheduled (including assumed to be)
|
|
// on this node.
|
|
UpdateNodeInfoSnapshot(nodeSnapshot *NodeInfoSnapshot) error
|
|
|
|
// List lists all cached pods (including assumed ones).
|
|
List(labels.Selector) ([]*v1.Pod, error)
|
|
|
|
// FilteredList returns all cached pods that pass the filter.
|
|
FilteredList(filter algorithm.PodFilter, selector labels.Selector) ([]*v1.Pod, error)
|
|
|
|
// Snapshot takes a snapshot on current cache
|
|
Snapshot() *Snapshot
|
|
|
|
// NodeTree returns a node tree structure
|
|
NodeTree() *NodeTree
|
|
}
|
|
|
|
// Snapshot is a snapshot of cache state
|
|
type Snapshot struct {
|
|
AssumedPods map[string]bool
|
|
Nodes map[string]*schedulernodeinfo.NodeInfo
|
|
}
|
|
|
|
// NodeInfoSnapshot is a snapshot of cache NodeInfo. The scheduler takes a
|
|
// snapshot at the beginning of each scheduling cycle and uses it for its
|
|
// operations in that cycle.
|
|
type NodeInfoSnapshot struct {
|
|
NodeInfoMap map[string]*schedulernodeinfo.NodeInfo
|
|
Generation int64
|
|
}
|