Resource quota observes deletes faster

pull/6/head
derekwaynecarr 2015-11-11 16:19:39 -05:00
parent 68e74f96d2
commit 55d4f70f8e
8 changed files with 217 additions and 41 deletions

View File

@ -36,6 +36,7 @@ import (
client "k8s.io/kubernetes/pkg/client/unversioned"
"k8s.io/kubernetes/pkg/client/unversioned/clientcmd"
"k8s.io/kubernetes/pkg/cloudprovider"
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/controller/daemon"
"k8s.io/kubernetes/pkg/controller/deployment"
endpointcontroller "k8s.io/kubernetes/pkg/controller/endpoint"
@ -72,6 +73,7 @@ type CMServer struct {
ConcurrentRCSyncs int
ConcurrentDSCSyncs int
ConcurrentJobSyncs int
ConcurrentResourceQuotaSyncs int
ServiceSyncPeriod time.Duration
NodeSyncPeriod time.Duration
ResourceQuotaSyncPeriod time.Duration
@ -113,9 +115,10 @@ func NewCMServer() *CMServer {
ConcurrentRCSyncs: 5,
ConcurrentDSCSyncs: 2,
ConcurrentJobSyncs: 5,
ConcurrentResourceQuotaSyncs: 5,
ServiceSyncPeriod: 5 * time.Minute,
NodeSyncPeriod: 10 * time.Second,
ResourceQuotaSyncPeriod: 10 * time.Second,
ResourceQuotaSyncPeriod: 5 * time.Minute,
NamespaceSyncPeriod: 5 * time.Minute,
PVClaimBinderSyncPeriod: 10 * time.Minute,
HorizontalPodAutoscalerSyncPeriod: 30 * time.Second,
@ -183,6 +186,7 @@ func (s *CMServer) AddFlags(fs *pflag.FlagSet) {
fs.StringVar(&s.CloudConfigFile, "cloud-config", s.CloudConfigFile, "The path to the cloud provider configuration file. Empty string for no configuration file.")
fs.IntVar(&s.ConcurrentEndpointSyncs, "concurrent-endpoint-syncs", s.ConcurrentEndpointSyncs, "The number of endpoint syncing operations that will be done concurrently. Larger number = faster endpoint updating, but more CPU (and network) load")
fs.IntVar(&s.ConcurrentRCSyncs, "concurrent_rc_syncs", s.ConcurrentRCSyncs, "The number of replication controllers that are allowed to sync concurrently. Larger number = more reponsive replica management, but more CPU (and network) load")
fs.IntVar(&s.ConcurrentResourceQuotaSyncs, "concurrent-resource-quota-syncs", s.ConcurrentResourceQuotaSyncs, "The number of resource quotas that are allowed to sync concurrently. Larger number = more responsive quota management, but more CPU (and network) load")
fs.DurationVar(&s.ServiceSyncPeriod, "service-sync-period", s.ServiceSyncPeriod, "The period for syncing services with their external load balancers")
fs.DurationVar(&s.NodeSyncPeriod, "node-sync-period", s.NodeSyncPeriod, ""+
"The period for syncing nodes from cloudprovider. Longer periods will result in "+
@ -304,7 +308,8 @@ func (s *CMServer) Run(_ []string) error {
glog.Infof("allocate-node-cidrs set to %v, node controller not creating routes", s.AllocateNodeCIDRs)
}
resourcequotacontroller.NewResourceQuotaController(kubeClient).Run(s.ResourceQuotaSyncPeriod)
go resourcequotacontroller.NewResourceQuotaController(
kubeClient, controller.StaticResyncPeriodFunc(s.ResourceQuotaSyncPeriod)).Run(s.ConcurrentResourceQuotaSyncs, util.NeverStop)
// If apiserver is not running we should wait for some time and fail only then. This is particularly
// important when we start apiserver and controller manager at the same time.

View File

@ -33,6 +33,7 @@ import (
clientcmdapi "k8s.io/kubernetes/pkg/client/unversioned/clientcmd/api"
"k8s.io/kubernetes/pkg/cloudprovider"
"k8s.io/kubernetes/pkg/cloudprovider/providers/mesos"
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/controller/daemon"
kendpoint "k8s.io/kubernetes/pkg/controller/endpoint"
namespacecontroller "k8s.io/kubernetes/pkg/controller/namespace"
@ -160,8 +161,8 @@ func (s *CMServer) Run(_ []string) error {
routeController.Run(s.NodeSyncPeriod)
}
resourceQuotaController := resourcequotacontroller.NewResourceQuotaController(kubeClient)
resourceQuotaController.Run(s.ResourceQuotaSyncPeriod)
go resourcequotacontroller.NewResourceQuotaController(
kubeClient, controller.StaticResyncPeriodFunc(s.ResourceQuotaSyncPeriod)).Run(s.ConcurrentResourceQuotaSyncs, util.NeverStop)
namespaceController := namespacecontroller.NewNamespaceController(kubeClient, &unversioned.APIVersions{}, s.NamespaceSyncPeriod)
namespaceController.Run()

View File

@ -61,6 +61,7 @@ kube-controller-manager
--cluster-cidr=<nil>: CIDR Range for Pods in cluster.
--cluster-name="kubernetes": The instance prefix for the cluster
--concurrent-endpoint-syncs=5: The number of endpoint syncing operations that will be done concurrently. Larger number = faster endpoint updating, but more CPU (and network) load
--concurrent-resource-quota-syncs=5: The number of resource quotas that are allowed to sync concurrently. Larger number = more responsive quota management, but more CPU (and network) load
--concurrent_rc_syncs=5: The number of replication controllers that are allowed to sync concurrently. Larger number = more reponsive replica management, but more CPU (and network) load
--deleting-pods-burst=10: Number of nodes on which pods are bursty deleted in case of node failure. For more details look into RateLimiter.
--deleting-pods-qps=0.1: Number of nodes per second on which pods are deleted in case of node failure.
@ -88,14 +89,14 @@ kube-controller-manager
--pv-recycler-pod-template-filepath-nfs="": The file path to a pod definition used as a template for NFS persistent volume recycling
--pv-recycler-timeout-increment-hostpath=30: the increment of time added per Gi to ActiveDeadlineSeconds for a HostPath scrubber pod. This is for development and testing only and will not work in a multi-node cluster.
--pvclaimbinder-sync-period=10m0s: The period for syncing persistent volumes and persistent volume claims
--resource-quota-sync-period=10s: The period for syncing quota usage status in the system
--resource-quota-sync-period=5m0s: The period for syncing quota usage status in the system
--root-ca-file="": If set, this root certificate authority will be included in service account's token secret. This must be a valid PEM-encoded CA bundle.
--service-account-private-key-file="": Filename containing a PEM-encoded private RSA key used to sign service account tokens.
--service-sync-period=5m0s: The period for syncing services with their external load balancers
--terminated-pod-gc-threshold=12500: Number of terminated pods that can exist before the terminated pod garbage collector starts deleting terminated pods. If <= 0, the terminated pod garbage collector is disabled.
```
###### Auto generated by spf13/cobra on 4-Nov-2015
###### Auto generated by spf13/cobra on 30-Nov-2015
<!-- BEGIN MUNGE: GENERATED_ANALYTICS -->

View File

@ -46,6 +46,7 @@ cluster-domain
cluster-name
cluster-tag
concurrent-endpoint-syncs
concurrent-resource-quota-syncs
config-sync-period
configure-cbr0
container-port

View File

@ -336,7 +336,7 @@ func (s *StoreToJobLister) List() (jobs extensions.JobList, err error) {
return jobs, nil
}
// GetPodControllers returns a list of jobs managing a pod. Returns an error only if no matching jobs are found.
// GetPodJobs returns a list of jobs managing a pod. Returns an error only if no matching jobs are found.
func (s *StoreToJobLister) GetPodJobs(pod *api.Pod) (jobs []extensions.Job, err error) {
var selector labels.Selector
var job extensions.Job

View File

@ -61,6 +61,13 @@ func NoResyncPeriodFunc() time.Duration {
return 0
}
// StaticResyncPeriodFunc returns the resync period specified
func StaticResyncPeriodFunc(resyncPeriod time.Duration) ResyncPeriodFunc {
return func() time.Duration {
return resyncPeriod
}
}
// Expectations are a way for controllers to tell the controller manager what they expect. eg:
// ControllerExpectations: {
// controller1: expects 2 adds in 2 minutes

View File

@ -24,51 +24,154 @@ import (
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/resource"
"k8s.io/kubernetes/pkg/api/unversioned"
"k8s.io/kubernetes/pkg/client/cache"
client "k8s.io/kubernetes/pkg/client/unversioned"
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/controller/framework"
"k8s.io/kubernetes/pkg/runtime"
"k8s.io/kubernetes/pkg/util"
"k8s.io/kubernetes/pkg/util/workqueue"
"k8s.io/kubernetes/pkg/watch"
)
// ResourceQuotaController is responsible for tracking quota usage status in the system
type ResourceQuotaController struct {
// Must have authority to list all resources in the system, and update quota status
kubeClient client.Interface
syncTime <-chan time.Time
// An index of resource quota objects by namespace
rqIndexer cache.Indexer
// Watches changes to all resource quota
rqController *framework.Controller
// A store of pods, populated by the podController
podStore cache.StoreToPodLister
// Watches changes to all pods (so we can optimize release of compute resources)
podController *framework.Controller
// ResourceQuota objects that need to be synchronized
queue *workqueue.Type
// To allow injection of syncUsage for testing.
syncHandler func(quota api.ResourceQuota) error
syncHandler func(key string) error
// function that controls full recalculation of quota usage
resyncPeriod controller.ResyncPeriodFunc
}
// NewResourceQuotaController creates a new ResourceQuotaController
func NewResourceQuotaController(kubeClient client.Interface) *ResourceQuotaController {
func NewResourceQuotaController(kubeClient client.Interface, resyncPeriod controller.ResyncPeriodFunc) *ResourceQuotaController {
rm := &ResourceQuotaController{
rq := &ResourceQuotaController{
kubeClient: kubeClient,
queue: workqueue.New(),
resyncPeriod: resyncPeriod,
}
rq.rqIndexer, rq.rqController = framework.NewIndexerInformer(
&cache.ListWatch{
ListFunc: func() (runtime.Object, error) {
return rq.kubeClient.ResourceQuotas(api.NamespaceAll).List(unversioned.ListOptions{})
},
WatchFunc: func(options unversioned.ListOptions) (watch.Interface, error) {
return rq.kubeClient.ResourceQuotas(api.NamespaceAll).Watch(options)
},
},
&api.ResourceQuota{},
resyncPeriod(),
framework.ResourceEventHandlerFuncs{
AddFunc: rq.enqueueResourceQuota,
UpdateFunc: func(old, cur interface{}) {
// We are only interested in observing updates to quota.spec to drive updates to quota.status.
// We ignore all updates to quota.Status because they are all driven by this controller.
// IMPORTANT:
// We do not use this function to queue up a full quota recalculation. To do so, would require
// us to enqueue all quota.Status updates, and since quota.Status updates involve additional queries
// that cannot be backed by a cache and result in a full query of a namespace's content, we do not
// want to pay the price on spurious status updates. As a result, we have a separate routine that is
// responsible for enqueue of all resource quotas when doing a full resync (enqueueAll)
oldResourceQuota := old.(*api.ResourceQuota)
curResourceQuota := cur.(*api.ResourceQuota)
if api.Semantic.DeepEqual(oldResourceQuota.Spec.Hard, curResourceQuota.Status.Hard) {
return
}
glog.V(4).Infof("Observed updated quota spec for %v/%v", curResourceQuota.Namespace, curResourceQuota.Name)
rq.enqueueResourceQuota(curResourceQuota)
},
// This will enter the sync loop and no-op, because the controller has been deleted from the store.
// Note that deleting a controller immediately after scaling it to 0 will not work. The recommended
// way of achieving this is by performing a `stop` operation on the controller.
DeleteFunc: rq.enqueueResourceQuota,
},
cache.Indexers{"namespace": cache.MetaNamespaceIndexFunc},
)
// We use this pod controller to rapidly observe when a pod deletion occurs in order to
// release compute resources from any associated quota.
rq.podStore.Store, rq.podController = framework.NewInformer(
&cache.ListWatch{
ListFunc: func() (runtime.Object, error) {
return rq.kubeClient.Pods(api.NamespaceAll).List(unversioned.ListOptions{})
},
WatchFunc: func(options unversioned.ListOptions) (watch.Interface, error) {
return rq.kubeClient.Pods(api.NamespaceAll).Watch(options)
},
},
&api.Pod{},
resyncPeriod(),
framework.ResourceEventHandlerFuncs{
DeleteFunc: rq.deletePod,
},
)
// set the synchronization handler
rm.syncHandler = rm.syncResourceQuota
return rm
rq.syncHandler = rq.syncResourceQuotaFromKey
return rq
}
// Run begins watching and syncing.
func (rm *ResourceQuotaController) Run(period time.Duration) {
rm.syncTime = time.Tick(period)
go util.Until(func() { rm.synchronize() }, period, util.NeverStop)
// enqueueAll is called at the fullResyncPeriod interval to force a full recalculation of quota usage statistics
func (rq *ResourceQuotaController) enqueueAll() {
defer glog.V(4).Infof("Resource quota controller queued all resource quota for full calculation of usage")
for _, k := range rq.rqIndexer.ListKeys() {
rq.queue.Add(k)
}
}
func (rm *ResourceQuotaController) synchronize() {
var resourceQuotas []api.ResourceQuota
list, err := rm.kubeClient.ResourceQuotas(api.NamespaceAll).List(unversioned.ListOptions{})
// obj could be an *api.ResourceQuota, or a DeletionFinalStateUnknown marker item.
func (rq *ResourceQuotaController) enqueueResourceQuota(obj interface{}) {
key, err := controller.KeyFunc(obj)
if err != nil {
glog.Errorf("Synchronization error: %v (%#v)", err, err)
glog.Errorf("Couldn't get key for object %+v: %v", obj, err)
return
}
resourceQuotas = list.Items
for ix := range resourceQuotas {
glog.V(4).Infof("periodic sync of %v/%v", resourceQuotas[ix].Namespace, resourceQuotas[ix].Name)
err := rm.syncHandler(resourceQuotas[ix])
rq.queue.Add(key)
}
// worker runs a worker thread that just dequeues items, processes them, and marks them done.
// It enforces that the syncHandler is never invoked concurrently with the same key.
func (rq *ResourceQuotaController) worker() {
for {
func() {
key, quit := rq.queue.Get()
if quit {
return
}
defer rq.queue.Done(key)
err := rq.syncHandler(key.(string))
if err != nil {
glog.Errorf("Error synchronizing: %v", err)
util.HandleError(err)
}
}()
}
}
// Run begins quota controller using the specified number of workers
func (rq *ResourceQuotaController) Run(workers int, stopCh <-chan struct{}) {
defer util.HandleCrash()
go rq.rqController.Run(stopCh)
go rq.podController.Run(stopCh)
for i := 0; i < workers; i++ {
go util.Until(rq.worker, time.Second, stopCh)
}
go util.Until(func() { rq.enqueueAll() }, rq.resyncPeriod(), stopCh)
<-stopCh
glog.Infof("Shutting down ResourceQuotaController")
rq.queue.ShutDown()
}
// FilterQuotaPods eliminates pods that no longer have a cost against the quota
@ -100,8 +203,29 @@ func FilterQuotaPods(pods []api.Pod) []*api.Pod {
return result
}
// syncResourceQuotaFromKey syncs a quota key
func (rq *ResourceQuotaController) syncResourceQuotaFromKey(key string) (err error) {
startTime := time.Now()
defer func() {
glog.V(4).Infof("Finished syncing resource quota %q (%v)", key, time.Now().Sub(startTime))
}()
obj, exists, err := rq.rqIndexer.GetByKey(key)
if !exists {
glog.Infof("Resource quota has been deleted %v", key)
return nil
}
if err != nil {
glog.Infof("Unable to retrieve resource quota %v from store: %v", key, err)
rq.queue.Add(key)
return err
}
quota := *obj.(*api.ResourceQuota)
return rq.syncResourceQuota(quota)
}
// syncResourceQuota runs a complete sync of current status
func (rm *ResourceQuotaController) syncResourceQuota(quota api.ResourceQuota) (err error) {
func (rq *ResourceQuotaController) syncResourceQuota(quota api.ResourceQuota) (err error) {
// quota is dirty if any part of spec hard limits differs from the status hard limits
dirty := !api.Semantic.DeepEqual(quota.Spec.Hard, quota.Status.Hard)
@ -141,7 +265,7 @@ func (rm *ResourceQuotaController) syncResourceQuota(quota api.ResourceQuota) (e
pods := &api.PodList{}
if set[api.ResourcePods] || set[api.ResourceMemory] || set[api.ResourceCPU] {
pods, err = rm.kubeClient.Pods(usage.Namespace).List(unversioned.ListOptions{})
pods, err = rq.kubeClient.Pods(usage.Namespace).List(unversioned.ListOptions{})
if err != nil {
return err
}
@ -164,31 +288,31 @@ func (rm *ResourceQuotaController) syncResourceQuota(quota api.ResourceQuota) (e
case api.ResourcePods:
value = resource.NewQuantity(int64(len(filteredPods)), resource.DecimalSI)
case api.ResourceServices:
items, err := rm.kubeClient.Services(usage.Namespace).List(unversioned.ListOptions{})
items, err := rq.kubeClient.Services(usage.Namespace).List(unversioned.ListOptions{})
if err != nil {
return err
}
value = resource.NewQuantity(int64(len(items.Items)), resource.DecimalSI)
case api.ResourceReplicationControllers:
items, err := rm.kubeClient.ReplicationControllers(usage.Namespace).List(unversioned.ListOptions{})
items, err := rq.kubeClient.ReplicationControllers(usage.Namespace).List(unversioned.ListOptions{})
if err != nil {
return err
}
value = resource.NewQuantity(int64(len(items.Items)), resource.DecimalSI)
case api.ResourceQuotas:
items, err := rm.kubeClient.ResourceQuotas(usage.Namespace).List(unversioned.ListOptions{})
items, err := rq.kubeClient.ResourceQuotas(usage.Namespace).List(unversioned.ListOptions{})
if err != nil {
return err
}
value = resource.NewQuantity(int64(len(items.Items)), resource.DecimalSI)
case api.ResourceSecrets:
items, err := rm.kubeClient.Secrets(usage.Namespace).List(unversioned.ListOptions{})
items, err := rq.kubeClient.Secrets(usage.Namespace).List(unversioned.ListOptions{})
if err != nil {
return err
}
value = resource.NewQuantity(int64(len(items.Items)), resource.DecimalSI)
case api.ResourcePersistentVolumeClaims:
items, err := rm.kubeClient.PersistentVolumeClaims(usage.Namespace).List(unversioned.ListOptions{})
items, err := rq.kubeClient.PersistentVolumeClaims(usage.Namespace).List(unversioned.ListOptions{})
if err != nil {
return err
}
@ -210,7 +334,7 @@ func (rm *ResourceQuotaController) syncResourceQuota(quota api.ResourceQuota) (e
// update the usage only if it changed
if dirty {
_, err = rm.kubeClient.ResourceQuotas(usage.Namespace).UpdateStatus(&usage)
_, err = rq.kubeClient.ResourceQuotas(usage.Namespace).UpdateStatus(&usage)
return err
}
return nil
@ -280,3 +404,38 @@ func PodHasRequests(pod *api.Pod, resourceName api.ResourceName) bool {
}
return true
}
// When a pod is deleted, enqueue the quota that manages the pod and update its expectations.
// obj could be an *api.Pod, or a DeletionFinalStateUnknown marker item.
func (rq *ResourceQuotaController) deletePod(obj interface{}) {
pod, ok := obj.(*api.Pod)
// When a delete is dropped, the relist will notice a pod in the store not
// in the list, leading to the insertion of a tombstone object which contains
// the deleted key/value. Note that this value might be stale. If the pod
// changed labels the new rc will not be woken up till the periodic resync.
if !ok {
tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
if !ok {
glog.Errorf("Couldn't get object from tombstone %+v, could take up to %v before a quota records the deletion", obj, rq.resyncPeriod())
return
}
pod, ok = tombstone.Obj.(*api.Pod)
if !ok {
glog.Errorf("Tombstone contained object that is not a pod %+v, could take up to %v before quota records the deletion", obj, rq.resyncPeriod())
return
}
}
quotas, err := rq.rqIndexer.Index("namespace", pod)
if err != nil {
glog.Errorf("Couldn't find resource quota associated with pod %+v, could take up to %v before a quota records the deletion", obj, rq.resyncPeriod())
}
if len(quotas) == 0 {
glog.V(4).Infof("No resource quota associated with namespace %q", pod.Namespace)
return
}
for i := range quotas {
quota := quotas[i].(*api.ResourceQuota)
rq.enqueueResourceQuota(quota)
}
}

View File

@ -19,10 +19,12 @@ package resourcequota
import (
"strconv"
"testing"
"time"
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/api/resource"
"k8s.io/kubernetes/pkg/client/unversioned/testclient"
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/util/sets"
)
@ -173,7 +175,7 @@ func TestSyncResourceQuota(t *testing.T) {
kubeClient := testclient.NewSimpleFake(&podList, &quota)
ResourceQuotaController := NewResourceQuotaController(kubeClient)
ResourceQuotaController := NewResourceQuotaController(kubeClient, controller.StaticResyncPeriodFunc(time.Second))
err := ResourceQuotaController.syncResourceQuota(quota)
if err != nil {
t.Fatalf("Unexpected error %v", err)
@ -230,7 +232,7 @@ func TestSyncResourceQuotaSpecChange(t *testing.T) {
kubeClient := testclient.NewSimpleFake(&quota)
ResourceQuotaController := NewResourceQuotaController(kubeClient)
ResourceQuotaController := NewResourceQuotaController(kubeClient, controller.StaticResyncPeriodFunc(time.Second))
err := ResourceQuotaController.syncResourceQuota(quota)
if err != nil {
t.Fatalf("Unexpected error %v", err)
@ -277,7 +279,7 @@ func TestSyncResourceQuotaNoChange(t *testing.T) {
kubeClient := testclient.NewSimpleFake(&api.PodList{}, &quota)
ResourceQuotaController := NewResourceQuotaController(kubeClient)
ResourceQuotaController := NewResourceQuotaController(kubeClient, controller.StaticResyncPeriodFunc(time.Second))
err := ResourceQuotaController.syncResourceQuota(quota)
if err != nil {
t.Fatalf("Unexpected error %v", err)