Merge pull request #59316 from smarterclayton/terminate_early

Automatic merge from submit-queue (batch tested with PRs 58716, 59977, 59316, 59884, 60117). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Cap how long the kubelet waits when it has no client cert

If we go a certain amount of time without being able to create a client
cert and we have no current client cert from the store, exit. This
prevents a corrupted local copy of the cert from leaving the Kubelet in a
zombie state forever. Exiting allows a config loop outside the Kubelet
to clean up the file or the bootstrap client cert to get another client
cert.

Five minutes is a totally arbitary timeout, judged to give enough time for really slow static pods to boot.

@mikedanese

```release-note
Set an upper bound (5 minutes) on how long the Kubelet will wait before exiting when the client cert from disk is missing or invalid. This prevents the Kubelet from waiting forever without attempting to bootstrap a new client credentials.
```
pull/6/head
Kubernetes Submit Queue 2018-02-21 15:40:41 -08:00 committed by GitHub
commit 2bbaf430d8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 46 additions and 12 deletions

View File

@ -527,9 +527,11 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.Dependencies) (err error) {
if err != nil {
return err
}
// we set exitIfExpired to true because we use this client configuration to request new certs - if we are unable
// to request new certs, we will be unable to continue normal operation
if err := kubeletcertificate.UpdateTransport(wait.NeverStop, clientConfig, clientCertificateManager, true); err != nil {
// we set exitAfter to five minutes because we use this client configuration to request new certs - if we are unable
// to request new certs, we will be unable to continue normal operation. Exiting the process allows a wrapper
// or the bootstrapping credentials to potentially lay down new initial config.
if err := kubeletcertificate.UpdateTransport(wait.NeverStop, clientConfig, clientCertificateManager, 5*time.Minute); err != nil {
return err
}
}

View File

@ -43,15 +43,21 @@ import (
// connections, forcing the client to re-handshake with the server and use the
// new certificate.
//
// The exitAfter duration, if set, will terminate the current process if a certificate
// is not available from the store (because it has been deleted on disk or is corrupt)
// or if the certificate has expired and the server is responsive. This allows the
// process parent or the bootstrap credentials an opportunity to retrieve a new initial
// certificate.
//
// stopCh should be used to indicate when the transport is unused and doesn't need
// to continue checking the manager.
func UpdateTransport(stopCh <-chan struct{}, clientConfig *restclient.Config, clientCertificateManager certificate.Manager, exitIfExpired bool) error {
return updateTransport(stopCh, 10*time.Second, clientConfig, clientCertificateManager, exitIfExpired)
func UpdateTransport(stopCh <-chan struct{}, clientConfig *restclient.Config, clientCertificateManager certificate.Manager, exitAfter time.Duration) error {
return updateTransport(stopCh, 10*time.Second, clientConfig, clientCertificateManager, exitAfter)
}
// updateTransport is an internal method that exposes how often this method checks that the
// client cert has changed. Intended for testing.
func updateTransport(stopCh <-chan struct{}, period time.Duration, clientConfig *restclient.Config, clientCertificateManager certificate.Manager, exitIfExpired bool) error {
// client cert has changed.
func updateTransport(stopCh <-chan struct{}, period time.Duration, clientConfig *restclient.Config, clientCertificateManager certificate.Manager, exitAfter time.Duration) error {
if clientConfig.Transport != nil {
return fmt.Errorf("there is already a transport configured")
}
@ -77,16 +83,35 @@ func updateTransport(stopCh <-chan struct{}, period time.Duration, clientConfig
conns: make(map[*closableConn]struct{}),
}
lastCertAvailable := time.Now()
lastCert := clientCertificateManager.Current()
go wait.Until(func() {
curr := clientCertificateManager.Current()
if exitIfExpired && curr != nil && time.Now().After(curr.Leaf.NotAfter) {
if clientCertificateManager.ServerHealthy() {
glog.Fatalf("The currently active client certificate has expired and the server is responsive, exiting.")
if exitAfter > 0 {
now := time.Now()
if curr == nil {
// the certificate has been deleted from disk or is otherwise corrupt
if now.After(lastCertAvailable.Add(exitAfter)) {
if clientCertificateManager.ServerHealthy() {
glog.Fatalf("It has been %s since a valid client cert was found and the server is responsive, exiting.", exitAfter)
} else {
glog.Errorf("It has been %s since a valid client cert was found, but the server is not responsive. A restart may be necessary to retrieve new initial credentials.", exitAfter)
}
}
} else {
glog.Errorf("The currently active client certificate has expired, but the server is not responsive. A restart may be necessary to retrieve new initial credentials.")
// the certificate is expired
if now.After(curr.Leaf.NotAfter) {
if clientCertificateManager.ServerHealthy() {
glog.Fatalf("The currently active client certificate has expired and the server is responsive, exiting.")
} else {
glog.Errorf("The currently active client certificate has expired, but the server is not responsive. A restart may be necessary to retrieve new initial credentials.")
}
}
lastCertAvailable = now
}
}
if curr == nil || lastCert == curr {
// Cert hasn't been rotated.
return

View File

@ -187,7 +187,7 @@ func TestRotateShutsDownConnections(t *testing.T) {
}
// Check for a new cert every 10 milliseconds
if err := updateTransport(stop, 10*time.Millisecond, c, m, false); err != nil {
if err := updateTransport(stop, 10*time.Millisecond, c, m, 0); err != nil {
t.Fatal(err)
}

View File

@ -268,6 +268,13 @@ func getCurrentCertificateOrBootstrap(
return nil, false, fmt.Errorf("unable to parse certificate data: %v", err)
}
bootstrapCert.Leaf = certs[0]
if _, err := store.Update(bootstrapCertificatePEM, bootstrapKeyPEM); err != nil {
utilruntime.HandleError(fmt.Errorf("Unable to set the cert/key pair to the bootstrap certificate: %v", err))
} else {
glog.V(4).Infof("Updated the store to contain the initial bootstrap certificate")
}
return &bootstrapCert, true, nil
}