Merge pull request #42498 from jcbsmpsn/add-jitter-to-rotation-threshold

Automatic merge from submit-queue (batch tested with PRs 44364, 44361, 42498)

Fix the certificate rotation threshold and add jitter.

Adjusts the certificate rotation threshold to be fixed, with some jitter to
spread out the load on the Certificate Signing Request API. The rotation
threshold is fixed at 20% now, meaning when 20% of the certificate's total
duration is remaining, the certificate manager will attempt to rotate, with
jitter +/-10%. For certificates of duration 1 month that means they will
rotate after 24 days, +/- 3 days.

On a 6000 node cluster, assuming all nodes added at nearly the same time, this
should result in 6000 nodes rotating spread over 6 days (total range of the
jitter), or ~42 nodes / hour requesting new certificates.
pull/6/head
Kubernetes Submit Queue 2017-04-14 17:56:01 -07:00 committed by GitHub
commit 4e3bbe3915
2 changed files with 38 additions and 52 deletions

View File

@ -75,7 +75,6 @@ type manager struct {
certStore Store
certAccessLock sync.RWMutex
cert *tls.Certificate
shouldRotatePercent uint
}
// NewManager returns a new certificate manager. A certificate manager is
@ -85,25 +84,19 @@ func NewManager(
certSigningRequestClient certificatesclient.CertificateSigningRequestInterface,
template *x509.CertificateRequest,
usages []certificates.KeyUsage,
certificateStore Store,
certRotationPercent uint) (Manager, error) {
certificateStore Store) (Manager, error) {
cert, err := certificateStore.Current()
if err != nil {
return nil, err
}
if certRotationPercent > 100 {
certRotationPercent = 100
}
m := manager{
certSigningRequestClient: certSigningRequestClient,
template: template,
usages: usages,
certStore: certificateStore,
cert: cert,
shouldRotatePercent: certRotationPercent,
}
return &m, nil
@ -129,15 +122,10 @@ func (m *manager) GetCertificate(clientHello *tls.ClientHelloInfo) (*tls.Certifi
// Start will start the background work of rotating the certificates.
func (m *manager) Start() {
if m.shouldRotatePercent < 1 {
glog.V(2).Infof("Certificate rotation is not enabled.")
return
}
// Certificate rotation depends on access to the API server certificate
// signing API, so don't start the certificate manager if we don't have a
// client. This will happen on the master, where the kubelet is responsible
// for bootstrapping the pods of the master components.
// client. This will happen on the cluster master, where the kubelet is
// responsible for bootstrapping the pods of the master components.
if m.certSigningRequestClient == nil {
glog.V(2).Infof("Certificate rotation is not enabled, no connection to the apiserver.")
return
@ -160,9 +148,17 @@ func (m *manager) shouldRotate() bool {
m.certAccessLock.RLock()
defer m.certAccessLock.RUnlock()
notAfter := m.cert.Leaf.NotAfter
total := notAfter.Sub(m.cert.Leaf.NotBefore)
remaining := notAfter.Sub(time.Now())
return remaining < 0 || uint(remaining*100/total) < m.shouldRotatePercent
totalDuration := float64(notAfter.Sub(m.cert.Leaf.NotBefore))
// Use some jitter to set the rotation threshold so each node will rotate
// at approximately 70-90% of the total lifetime of the certificate. With
// jitter, if a number of nodes are added to a cluster at approximately the
// same time (such as cluster creation time), they won't all try to rotate
// certificates at the same time for the rest of the life of the cluster.
jitteryDuration := wait.Jitter(time.Duration(totalDuration), 0.2) - time.Duration(totalDuration*0.3)
rotationThreshold := m.cert.Leaf.NotBefore.Add(jitteryDuration)
return time.Now().After(rotationThreshold)
}
func (m *manager) rotateCerts() error {

View File

@ -78,18 +78,6 @@ O1eRCsCGPAnUCviFgNeH15ug+6N54DTTR6ZV/TTV64FDOcsox9nrhYcmH9sYuITi
-----END CERTIFICATE-----`
)
func TestNewManagerNoRotation(t *testing.T) {
cert, err := tls.X509KeyPair([]byte(certificateData), []byte(privateKeyData))
if err != nil {
t.Fatalf("Unable to initialize a certificate: %v", err)
}
store := &fakeStore{cert: &cert}
if _, err := NewManager(nil, &x509.CertificateRequest{}, []certificates.KeyUsage{}, store, 0); err != nil {
t.Fatalf("Failed to initialize the certificate manager: %v", err)
}
}
func TestShouldRotate(t *testing.T) {
now := time.Now()
tests := []struct {
@ -98,32 +86,34 @@ func TestShouldRotate(t *testing.T) {
notAfter time.Time
shouldRotate bool
}{
{"half way", now.Add(-24 * time.Hour), now.Add(24 * time.Hour), false},
{"nearly there", now.Add(-100 * time.Hour), now.Add(1 * time.Hour), true},
{"just started", now.Add(-1 * time.Hour), now.Add(100 * time.Hour), false},
{"just issued, still good", now.Add(-1 * time.Hour), now.Add(99 * time.Hour), false},
{"half way expired, still good", now.Add(-24 * time.Hour), now.Add(24 * time.Hour), false},
{"mostly expired, still good", now.Add(-69 * time.Hour), now.Add(31 * time.Hour), false},
{"just about expired, should rotate", now.Add(-91 * time.Hour), now.Add(9 * time.Hour), true},
{"nearly expired, should rotate", now.Add(-99 * time.Hour), now.Add(1 * time.Hour), true},
{"already expired, should rotate", now.Add(-10 * time.Hour), now.Add(-1 * time.Hour), true},
}
for _, test := range tests {
m := manager{
cert: &tls.Certificate{
Leaf: &x509.Certificate{
NotAfter: test.notAfter,
NotBefore: test.notBefore,
t.Run(test.name, func(t *testing.T) {
m := manager{
cert: &tls.Certificate{
Leaf: &x509.Certificate{
NotAfter: test.notAfter,
NotBefore: test.notBefore,
},
},
},
template: &x509.CertificateRequest{},
usages: []certificates.KeyUsage{},
shouldRotatePercent: 10,
}
if m.shouldRotate() != test.shouldRotate {
t.Errorf("For test case %s, time %v, a certificate issued for (%v, %v) should rotate should be %t.",
test.name,
now,
m.cert.Leaf.NotBefore,
m.cert.Leaf.NotAfter,
test.shouldRotate)
}
template: &x509.CertificateRequest{},
usages: []certificates.KeyUsage{},
}
if m.shouldRotate() != test.shouldRotate {
t.Errorf("For time %v, a certificate issued for (%v, %v) should rotate should be %t.",
now,
m.cert.Leaf.NotBefore,
m.cert.Leaf.NotAfter,
test.shouldRotate)
}
})
}
}