mirror of https://github.com/k3s-io/k3s
Merge pull request #42498 from jcbsmpsn/add-jitter-to-rotation-threshold
Automatic merge from submit-queue (batch tested with PRs 44364, 44361, 42498) Fix the certificate rotation threshold and add jitter. Adjusts the certificate rotation threshold to be fixed, with some jitter to spread out the load on the Certificate Signing Request API. The rotation threshold is fixed at 20% now, meaning when 20% of the certificate's total duration is remaining, the certificate manager will attempt to rotate, with jitter +/-10%. For certificates of duration 1 month that means they will rotate after 24 days, +/- 3 days. On a 6000 node cluster, assuming all nodes added at nearly the same time, this should result in 6000 nodes rotating spread over 6 days (total range of the jitter), or ~42 nodes / hour requesting new certificates.pull/6/head
commit
4e3bbe3915
|
@ -75,7 +75,6 @@ type manager struct {
|
|||
certStore Store
|
||||
certAccessLock sync.RWMutex
|
||||
cert *tls.Certificate
|
||||
shouldRotatePercent uint
|
||||
}
|
||||
|
||||
// NewManager returns a new certificate manager. A certificate manager is
|
||||
|
@ -85,25 +84,19 @@ func NewManager(
|
|||
certSigningRequestClient certificatesclient.CertificateSigningRequestInterface,
|
||||
template *x509.CertificateRequest,
|
||||
usages []certificates.KeyUsage,
|
||||
certificateStore Store,
|
||||
certRotationPercent uint) (Manager, error) {
|
||||
certificateStore Store) (Manager, error) {
|
||||
|
||||
cert, err := certificateStore.Current()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if certRotationPercent > 100 {
|
||||
certRotationPercent = 100
|
||||
}
|
||||
|
||||
m := manager{
|
||||
certSigningRequestClient: certSigningRequestClient,
|
||||
template: template,
|
||||
usages: usages,
|
||||
certStore: certificateStore,
|
||||
cert: cert,
|
||||
shouldRotatePercent: certRotationPercent,
|
||||
}
|
||||
|
||||
return &m, nil
|
||||
|
@ -129,15 +122,10 @@ func (m *manager) GetCertificate(clientHello *tls.ClientHelloInfo) (*tls.Certifi
|
|||
|
||||
// Start will start the background work of rotating the certificates.
|
||||
func (m *manager) Start() {
|
||||
if m.shouldRotatePercent < 1 {
|
||||
glog.V(2).Infof("Certificate rotation is not enabled.")
|
||||
return
|
||||
}
|
||||
|
||||
// Certificate rotation depends on access to the API server certificate
|
||||
// signing API, so don't start the certificate manager if we don't have a
|
||||
// client. This will happen on the master, where the kubelet is responsible
|
||||
// for bootstrapping the pods of the master components.
|
||||
// client. This will happen on the cluster master, where the kubelet is
|
||||
// responsible for bootstrapping the pods of the master components.
|
||||
if m.certSigningRequestClient == nil {
|
||||
glog.V(2).Infof("Certificate rotation is not enabled, no connection to the apiserver.")
|
||||
return
|
||||
|
@ -160,9 +148,17 @@ func (m *manager) shouldRotate() bool {
|
|||
m.certAccessLock.RLock()
|
||||
defer m.certAccessLock.RUnlock()
|
||||
notAfter := m.cert.Leaf.NotAfter
|
||||
total := notAfter.Sub(m.cert.Leaf.NotBefore)
|
||||
remaining := notAfter.Sub(time.Now())
|
||||
return remaining < 0 || uint(remaining*100/total) < m.shouldRotatePercent
|
||||
totalDuration := float64(notAfter.Sub(m.cert.Leaf.NotBefore))
|
||||
|
||||
// Use some jitter to set the rotation threshold so each node will rotate
|
||||
// at approximately 70-90% of the total lifetime of the certificate. With
|
||||
// jitter, if a number of nodes are added to a cluster at approximately the
|
||||
// same time (such as cluster creation time), they won't all try to rotate
|
||||
// certificates at the same time for the rest of the life of the cluster.
|
||||
jitteryDuration := wait.Jitter(time.Duration(totalDuration), 0.2) - time.Duration(totalDuration*0.3)
|
||||
|
||||
rotationThreshold := m.cert.Leaf.NotBefore.Add(jitteryDuration)
|
||||
return time.Now().After(rotationThreshold)
|
||||
}
|
||||
|
||||
func (m *manager) rotateCerts() error {
|
||||
|
|
|
@ -78,18 +78,6 @@ O1eRCsCGPAnUCviFgNeH15ug+6N54DTTR6ZV/TTV64FDOcsox9nrhYcmH9sYuITi
|
|||
-----END CERTIFICATE-----`
|
||||
)
|
||||
|
||||
func TestNewManagerNoRotation(t *testing.T) {
|
||||
cert, err := tls.X509KeyPair([]byte(certificateData), []byte(privateKeyData))
|
||||
if err != nil {
|
||||
t.Fatalf("Unable to initialize a certificate: %v", err)
|
||||
}
|
||||
|
||||
store := &fakeStore{cert: &cert}
|
||||
if _, err := NewManager(nil, &x509.CertificateRequest{}, []certificates.KeyUsage{}, store, 0); err != nil {
|
||||
t.Fatalf("Failed to initialize the certificate manager: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestShouldRotate(t *testing.T) {
|
||||
now := time.Now()
|
||||
tests := []struct {
|
||||
|
@ -98,32 +86,34 @@ func TestShouldRotate(t *testing.T) {
|
|||
notAfter time.Time
|
||||
shouldRotate bool
|
||||
}{
|
||||
{"half way", now.Add(-24 * time.Hour), now.Add(24 * time.Hour), false},
|
||||
{"nearly there", now.Add(-100 * time.Hour), now.Add(1 * time.Hour), true},
|
||||
{"just started", now.Add(-1 * time.Hour), now.Add(100 * time.Hour), false},
|
||||
{"just issued, still good", now.Add(-1 * time.Hour), now.Add(99 * time.Hour), false},
|
||||
{"half way expired, still good", now.Add(-24 * time.Hour), now.Add(24 * time.Hour), false},
|
||||
{"mostly expired, still good", now.Add(-69 * time.Hour), now.Add(31 * time.Hour), false},
|
||||
{"just about expired, should rotate", now.Add(-91 * time.Hour), now.Add(9 * time.Hour), true},
|
||||
{"nearly expired, should rotate", now.Add(-99 * time.Hour), now.Add(1 * time.Hour), true},
|
||||
{"already expired, should rotate", now.Add(-10 * time.Hour), now.Add(-1 * time.Hour), true},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
m := manager{
|
||||
cert: &tls.Certificate{
|
||||
Leaf: &x509.Certificate{
|
||||
NotAfter: test.notAfter,
|
||||
NotBefore: test.notBefore,
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
m := manager{
|
||||
cert: &tls.Certificate{
|
||||
Leaf: &x509.Certificate{
|
||||
NotAfter: test.notAfter,
|
||||
NotBefore: test.notBefore,
|
||||
},
|
||||
},
|
||||
},
|
||||
template: &x509.CertificateRequest{},
|
||||
usages: []certificates.KeyUsage{},
|
||||
shouldRotatePercent: 10,
|
||||
}
|
||||
|
||||
if m.shouldRotate() != test.shouldRotate {
|
||||
t.Errorf("For test case %s, time %v, a certificate issued for (%v, %v) should rotate should be %t.",
|
||||
test.name,
|
||||
now,
|
||||
m.cert.Leaf.NotBefore,
|
||||
m.cert.Leaf.NotAfter,
|
||||
test.shouldRotate)
|
||||
}
|
||||
template: &x509.CertificateRequest{},
|
||||
usages: []certificates.KeyUsage{},
|
||||
}
|
||||
if m.shouldRotate() != test.shouldRotate {
|
||||
t.Errorf("For time %v, a certificate issued for (%v, %v) should rotate should be %t.",
|
||||
now,
|
||||
m.cert.Leaf.NotBefore,
|
||||
m.cert.Leaf.NotAfter,
|
||||
test.shouldRotate)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue