Added optional delays to starting controller managers. #22669

pull/6/head
Robert Rati 2016-03-30 10:07:30 -04:00
parent 8eb19c7889
commit 83de3e9733
7 changed files with 428 additions and 329 deletions

View File

@ -75,6 +75,11 @@ import (
"github.com/spf13/pflag" "github.com/spf13/pflag"
) )
const (
// Jitter used when starting controller managers
ControllerStartJitter = 1.0
)
// NewControllerManagerCommand creates a *cobra.Command object with default parameters // NewControllerManagerCommand creates a *cobra.Command object with default parameters
func NewControllerManagerCommand() *cobra.Command { func NewControllerManagerCommand() *cobra.Command {
s := options.NewCMServer() s := options.NewCMServer()
@ -186,6 +191,7 @@ func Run(s *options.CMServer) error {
func StartControllers(s *options.CMServer, kubeClient *client.Client, kubeconfig *restclient.Config, stop <-chan struct{}) error { func StartControllers(s *options.CMServer, kubeClient *client.Client, kubeconfig *restclient.Config, stop <-chan struct{}) error {
go endpointcontroller.NewEndpointController(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "endpoint-controller")), ResyncPeriod(s)). go endpointcontroller.NewEndpointController(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "endpoint-controller")), ResyncPeriod(s)).
Run(s.ConcurrentEndpointSyncs, wait.NeverStop) Run(s.ConcurrentEndpointSyncs, wait.NeverStop)
time.Sleep(wait.Jitter(s.ControllerStartInterval.Duration, ControllerStartJitter))
go replicationcontroller.NewReplicationManager( go replicationcontroller.NewReplicationManager(
clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "replication-controller")), clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "replication-controller")),
@ -193,10 +199,12 @@ func StartControllers(s *options.CMServer, kubeClient *client.Client, kubeconfig
replicationcontroller.BurstReplicas, replicationcontroller.BurstReplicas,
s.LookupCacheSizeForRC, s.LookupCacheSizeForRC,
).Run(s.ConcurrentRCSyncs, wait.NeverStop) ).Run(s.ConcurrentRCSyncs, wait.NeverStop)
time.Sleep(wait.Jitter(s.ControllerStartInterval.Duration, ControllerStartJitter))
if s.TerminatedPodGCThreshold > 0 { if s.TerminatedPodGCThreshold > 0 {
go gc.New(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "garbage-collector")), ResyncPeriod(s), s.TerminatedPodGCThreshold). go gc.New(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "garbage-collector")), ResyncPeriod(s), s.TerminatedPodGCThreshold).
Run(wait.NeverStop) Run(wait.NeverStop)
time.Sleep(wait.Jitter(s.ControllerStartInterval.Duration, ControllerStartJitter))
} }
cloud, err := cloudprovider.InitCloudProvider(s.CloudProvider, s.CloudConfigFile) cloud, err := cloudprovider.InitCloudProvider(s.CloudProvider, s.CloudConfigFile)
@ -211,11 +219,13 @@ func StartControllers(s *options.CMServer, kubeClient *client.Client, kubeconfig
flowcontrol.NewTokenBucketRateLimiter(s.DeletingPodsQps, s.DeletingPodsBurst), flowcontrol.NewTokenBucketRateLimiter(s.DeletingPodsQps, s.DeletingPodsBurst),
s.NodeMonitorGracePeriod.Duration, s.NodeStartupGracePeriod.Duration, s.NodeMonitorPeriod.Duration, clusterCIDR, s.AllocateNodeCIDRs) s.NodeMonitorGracePeriod.Duration, s.NodeStartupGracePeriod.Duration, s.NodeMonitorPeriod.Duration, clusterCIDR, s.AllocateNodeCIDRs)
nodeController.Run(s.NodeSyncPeriod.Duration) nodeController.Run(s.NodeSyncPeriod.Duration)
time.Sleep(wait.Jitter(s.ControllerStartInterval.Duration, ControllerStartJitter))
serviceController := servicecontroller.New(cloud, clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "service-controller")), s.ClusterName) serviceController := servicecontroller.New(cloud, clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "service-controller")), s.ClusterName)
if err := serviceController.Run(s.ServiceSyncPeriod.Duration, s.NodeSyncPeriod.Duration); err != nil { if err := serviceController.Run(s.ServiceSyncPeriod.Duration, s.NodeSyncPeriod.Duration); err != nil {
glog.Errorf("Failed to start service controller: %v", err) glog.Errorf("Failed to start service controller: %v", err)
} }
time.Sleep(wait.Jitter(s.ControllerStartInterval.Duration, ControllerStartJitter))
if s.AllocateNodeCIDRs { if s.AllocateNodeCIDRs {
if cloud == nil { if cloud == nil {
@ -225,6 +235,7 @@ func StartControllers(s *options.CMServer, kubeClient *client.Client, kubeconfig
} else { } else {
routeController := routecontroller.New(routes, clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "route-controller")), s.ClusterName, clusterCIDR) routeController := routecontroller.New(routes, clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "route-controller")), s.ClusterName, clusterCIDR)
routeController.Run(s.NodeSyncPeriod.Duration) routeController.Run(s.NodeSyncPeriod.Duration)
time.Sleep(wait.Jitter(s.ControllerStartInterval.Duration, ControllerStartJitter))
} }
} else { } else {
glog.Infof("allocate-node-cidrs set to %v, node controller not creating routes", s.AllocateNodeCIDRs) glog.Infof("allocate-node-cidrs set to %v, node controller not creating routes", s.AllocateNodeCIDRs)
@ -249,6 +260,7 @@ func StartControllers(s *options.CMServer, kubeClient *client.Client, kubeconfig
GroupKindsToReplenish: groupKindsToReplenish, GroupKindsToReplenish: groupKindsToReplenish,
} }
go resourcequotacontroller.NewResourceQuotaController(resourceQuotaControllerOptions).Run(s.ConcurrentResourceQuotaSyncs, wait.NeverStop) go resourcequotacontroller.NewResourceQuotaController(resourceQuotaControllerOptions).Run(s.ConcurrentResourceQuotaSyncs, wait.NeverStop)
time.Sleep(wait.Jitter(s.ControllerStartInterval.Duration, ControllerStartJitter))
// If apiserver is not running we should wait for some time and fail only then. This is particularly // If apiserver is not running we should wait for some time and fail only then. This is particularly
// important when we start apiserver and controller manager at the same time. // important when we start apiserver and controller manager at the same time.
@ -279,6 +291,7 @@ func StartControllers(s *options.CMServer, kubeClient *client.Client, kubeconfig
} }
namespaceController := namespacecontroller.NewNamespaceController(namespaceKubeClient, namespaceClientPool, groupVersionResources, s.NamespaceSyncPeriod.Duration, api.FinalizerKubernetes) namespaceController := namespacecontroller.NewNamespaceController(namespaceKubeClient, namespaceClientPool, groupVersionResources, s.NamespaceSyncPeriod.Duration, api.FinalizerKubernetes)
go namespaceController.Run(s.ConcurrentNamespaceSyncs, wait.NeverStop) go namespaceController.Run(s.ConcurrentNamespaceSyncs, wait.NeverStop)
time.Sleep(wait.Jitter(s.ControllerStartInterval.Duration, ControllerStartJitter))
groupVersion := "extensions/v1beta1" groupVersion := "extensions/v1beta1"
resources, found := resourceMap[groupVersion] resources, found := resourceMap[groupVersion]
@ -297,30 +310,35 @@ func StartControllers(s *options.CMServer, kubeClient *client.Client, kubeconfig
) )
go podautoscaler.NewHorizontalController(hpaClient.Core(), hpaClient.Extensions(), hpaClient, metricsClient, s.HorizontalPodAutoscalerSyncPeriod.Duration). go podautoscaler.NewHorizontalController(hpaClient.Core(), hpaClient.Extensions(), hpaClient, metricsClient, s.HorizontalPodAutoscalerSyncPeriod.Duration).
Run(wait.NeverStop) Run(wait.NeverStop)
time.Sleep(wait.Jitter(s.ControllerStartInterval.Duration, ControllerStartJitter))
} }
if containsResource(resources, "daemonsets") { if containsResource(resources, "daemonsets") {
glog.Infof("Starting daemon set controller") glog.Infof("Starting daemon set controller")
go daemon.NewDaemonSetsController(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "daemon-set-controller")), ResyncPeriod(s), s.LookupCacheSizeForDaemonSet). go daemon.NewDaemonSetsController(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "daemon-set-controller")), ResyncPeriod(s), s.LookupCacheSizeForDaemonSet).
Run(s.ConcurrentDaemonSetSyncs, wait.NeverStop) Run(s.ConcurrentDaemonSetSyncs, wait.NeverStop)
time.Sleep(wait.Jitter(s.ControllerStartInterval.Duration, ControllerStartJitter))
} }
if containsResource(resources, "jobs") { if containsResource(resources, "jobs") {
glog.Infof("Starting job controller") glog.Infof("Starting job controller")
go job.NewJobController(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "job-controller")), ResyncPeriod(s)). go job.NewJobController(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "job-controller")), ResyncPeriod(s)).
Run(s.ConcurrentJobSyncs, wait.NeverStop) Run(s.ConcurrentJobSyncs, wait.NeverStop)
time.Sleep(wait.Jitter(s.ControllerStartInterval.Duration, ControllerStartJitter))
} }
if containsResource(resources, "deployments") { if containsResource(resources, "deployments") {
glog.Infof("Starting deployment controller") glog.Infof("Starting deployment controller")
go deployment.NewDeploymentController(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "deployment-controller")), ResyncPeriod(s)). go deployment.NewDeploymentController(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "deployment-controller")), ResyncPeriod(s)).
Run(s.ConcurrentDeploymentSyncs, wait.NeverStop) Run(s.ConcurrentDeploymentSyncs, wait.NeverStop)
time.Sleep(wait.Jitter(s.ControllerStartInterval.Duration, ControllerStartJitter))
} }
if containsResource(resources, "replicasets") { if containsResource(resources, "replicasets") {
glog.Infof("Starting ReplicaSet controller") glog.Infof("Starting ReplicaSet controller")
go replicaset.NewReplicaSetController(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "replicaset-controller")), ResyncPeriod(s), replicaset.BurstReplicas, s.LookupCacheSizeForRS). go replicaset.NewReplicaSetController(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "replicaset-controller")), ResyncPeriod(s), replicaset.BurstReplicas, s.LookupCacheSizeForRS).
Run(s.ConcurrentRSSyncs, wait.NeverStop) Run(s.ConcurrentRSSyncs, wait.NeverStop)
time.Sleep(wait.Jitter(s.ControllerStartInterval.Duration, ControllerStartJitter))
} }
} }
@ -332,6 +350,7 @@ func StartControllers(s *options.CMServer, kubeClient *client.Client, kubeconfig
pvclaimBinder := persistentvolumecontroller.NewPersistentVolumeClaimBinder(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "persistent-volume-binder")), s.PVClaimBinderSyncPeriod.Duration) pvclaimBinder := persistentvolumecontroller.NewPersistentVolumeClaimBinder(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "persistent-volume-binder")), s.PVClaimBinderSyncPeriod.Duration)
pvclaimBinder.Run() pvclaimBinder.Run()
time.Sleep(wait.Jitter(s.ControllerStartInterval.Duration, ControllerStartJitter))
pvRecycler, err := persistentvolumecontroller.NewPersistentVolumeRecycler( pvRecycler, err := persistentvolumecontroller.NewPersistentVolumeRecycler(
clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "persistent-volume-recycler")), clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "persistent-volume-recycler")),
@ -344,6 +363,7 @@ func StartControllers(s *options.CMServer, kubeClient *client.Client, kubeconfig
glog.Fatalf("Failed to start persistent volume recycler: %+v", err) glog.Fatalf("Failed to start persistent volume recycler: %+v", err)
} }
pvRecycler.Run() pvRecycler.Run()
time.Sleep(wait.Jitter(s.ControllerStartInterval.Duration, ControllerStartJitter))
if provisioner != nil { if provisioner != nil {
pvController, err := persistentvolumecontroller.NewPersistentVolumeProvisionerController(persistentvolumecontroller.NewControllerClient(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "persistent-volume-provisioner"))), s.PVClaimBinderSyncPeriod.Duration, s.ClusterName, volumePlugins, provisioner, cloud) pvController, err := persistentvolumecontroller.NewPersistentVolumeProvisionerController(persistentvolumecontroller.NewControllerClient(clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "persistent-volume-provisioner"))), s.PVClaimBinderSyncPeriod.Duration, s.ClusterName, volumePlugins, provisioner, cloud)
@ -351,6 +371,7 @@ func StartControllers(s *options.CMServer, kubeClient *client.Client, kubeconfig
glog.Fatalf("Failed to start persistent volume provisioner controller: %+v", err) glog.Fatalf("Failed to start persistent volume provisioner controller: %+v", err)
} }
pvController.Run() pvController.Run()
time.Sleep(wait.Jitter(s.ControllerStartInterval.Duration, ControllerStartJitter))
} }
var rootCA []byte var rootCA []byte
@ -379,6 +400,7 @@ func StartControllers(s *options.CMServer, kubeClient *client.Client, kubeconfig
RootCA: rootCA, RootCA: rootCA,
}, },
).Run() ).Run()
time.Sleep(wait.Jitter(s.ControllerStartInterval.Duration, ControllerStartJitter))
} }
} }
@ -386,6 +408,7 @@ func StartControllers(s *options.CMServer, kubeClient *client.Client, kubeconfig
clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "service-account-controller")), clientset.NewForConfigOrDie(restclient.AddUserAgent(kubeconfig, "service-account-controller")),
serviceaccountcontroller.DefaultServiceAccountsControllerOptions(), serviceaccountcontroller.DefaultServiceAccountsControllerOptions(),
).Run() ).Run()
time.Sleep(wait.Jitter(s.ControllerStartInterval.Duration, ControllerStartJitter))
select {} select {}
} }

View File

@ -81,9 +81,10 @@ func NewCMServer() *CMServer {
IncrementTimeoutHostPath: 30, IncrementTimeoutHostPath: 30,
}, },
}, },
KubeAPIQPS: 20.0, KubeAPIQPS: 20.0,
KubeAPIBurst: 30, KubeAPIBurst: 30,
LeaderElection: leaderelection.DefaultLeaderElectionConfiguration(), LeaderElection: leaderelection.DefaultLeaderElectionConfiguration(),
ControllerStartInterval: unversioned.Duration{0 * time.Second},
}, },
} }
return &s return &s
@ -146,5 +147,6 @@ func (s *CMServer) AddFlags(fs *pflag.FlagSet) {
fs.StringVar(&s.RootCAFile, "root-ca-file", s.RootCAFile, "If set, this root certificate authority will be included in service account's token secret. This must be a valid PEM-encoded CA bundle.") fs.StringVar(&s.RootCAFile, "root-ca-file", s.RootCAFile, "If set, this root certificate authority will be included in service account's token secret. This must be a valid PEM-encoded CA bundle.")
fs.Float32Var(&s.KubeAPIQPS, "kube-api-qps", s.KubeAPIQPS, "QPS to use while talking with kubernetes apiserver") fs.Float32Var(&s.KubeAPIQPS, "kube-api-qps", s.KubeAPIQPS, "QPS to use while talking with kubernetes apiserver")
fs.IntVar(&s.KubeAPIBurst, "kube-api-burst", s.KubeAPIBurst, "Burst to use while talking with kubernetes apiserver") fs.IntVar(&s.KubeAPIBurst, "kube-api-burst", s.KubeAPIBurst, "Burst to use while talking with kubernetes apiserver")
fs.DurationVar(&s.ControllerStartInterval.Duration, "controller-start-interval", s.ControllerStartInterval.Duration, "Interval between starting controller managers.")
leaderelection.BindFlags(&s.LeaderElection, fs) leaderelection.BindFlags(&s.LeaderElection, fs)
} }

View File

@ -67,6 +67,7 @@ kube-controller-manager
--concurrent-replicaset-syncs=5: The number of replica sets that are allowed to sync concurrently. Larger number = more responsive replica management, but more CPU (and network) load --concurrent-replicaset-syncs=5: The number of replica sets that are allowed to sync concurrently. Larger number = more responsive replica management, but more CPU (and network) load
--concurrent-resource-quota-syncs=5: The number of resource quotas that are allowed to sync concurrently. Larger number = more responsive quota management, but more CPU (and network) load --concurrent-resource-quota-syncs=5: The number of resource quotas that are allowed to sync concurrently. Larger number = more responsive quota management, but more CPU (and network) load
--concurrent_rc_syncs=5: The number of replication controllers that are allowed to sync concurrently. Larger number = more responsive replica management, but more CPU (and network) load --concurrent_rc_syncs=5: The number of replication controllers that are allowed to sync concurrently. Larger number = more responsive replica management, but more CPU (and network) load
--controller-start-interval=0: Interval between starting controller managers.
--daemonset-lookup-cache-size=1024: The the size of lookup cache for daemonsets. Larger number = more responsive daemonsets, but more MEM load. --daemonset-lookup-cache-size=1024: The the size of lookup cache for daemonsets. Larger number = more responsive daemonsets, but more MEM load.
--deleting-pods-burst=10: Number of nodes on which pods are bursty deleted in case of node failure. For more details look into RateLimiter. --deleting-pods-burst=10: Number of nodes on which pods are bursty deleted in case of node failure. For more details look into RateLimiter.
--deleting-pods-qps=0.1: Number of nodes per second on which pods are deleted in case of node failure. --deleting-pods-qps=0.1: Number of nodes per second on which pods are deleted in case of node failure.
@ -108,7 +109,7 @@ kube-controller-manager
--terminated-pod-gc-threshold=12500: Number of terminated pods that can exist before the terminated pod garbage collector starts deleting terminated pods. If <= 0, the terminated pod garbage collector is disabled. --terminated-pod-gc-threshold=12500: Number of terminated pods that can exist before the terminated pod garbage collector starts deleting terminated pods. If <= 0, the terminated pod garbage collector is disabled.
``` ```
###### Auto generated by spf13/cobra on 29-Feb-2016 ###### Auto generated by spf13/cobra on 5-Apr-2016
<!-- BEGIN MUNGE: GENERATED_ANALYTICS --> <!-- BEGIN MUNGE: GENERATED_ANALYTICS -->

View File

@ -65,6 +65,7 @@ conntrack-tcp-timeout-established
contain-pod-resources contain-pod-resources
container-port container-port
container-runtime container-runtime
controller-start-interval
cors-allowed-origins cors-allowed-origins
cpu-cfs-quota cpu-cfs-quota
cpu-percent cpu-percent

View File

@ -127,6 +127,9 @@ func DeepCopy_componentconfig_KubeControllerManagerConfiguration(in KubeControll
if err := DeepCopy_componentconfig_VolumeConfiguration(in.VolumeConfiguration, &out.VolumeConfiguration, c); err != nil { if err := DeepCopy_componentconfig_VolumeConfiguration(in.VolumeConfiguration, &out.VolumeConfiguration, c); err != nil {
return err return err
} }
if err := unversioned.DeepCopy_unversioned_Duration(in.ControllerStartInterval, &out.ControllerStartInterval, c); err != nil {
return err
}
return nil return nil
} }

File diff suppressed because it is too large Load Diff

View File

@ -510,6 +510,8 @@ type KubeControllerManagerConfiguration struct {
LeaderElection LeaderElectionConfiguration `json:"leaderElection"` LeaderElection LeaderElectionConfiguration `json:"leaderElection"`
// vloumeConfiguration holds configuration for volume related features. // vloumeConfiguration holds configuration for volume related features.
VolumeConfiguration VolumeConfiguration `json:"volumeConfiguration"` VolumeConfiguration VolumeConfiguration `json:"volumeConfiguration"`
// How long to wait between starting controller managers
ControllerStartInterval unversioned.Duration `json:"controllerStartInterval"`
} }
// VolumeConfiguration contains *all* enumerated flags meant to configure all volume // VolumeConfiguration contains *all* enumerated flags meant to configure all volume