mirror of https://github.com/k3s-io/k3s
Merge pull request #59316 from smarterclayton/terminate_early
Automatic merge from submit-queue (batch tested with PRs 58716, 59977, 59316, 59884, 60117). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Cap how long the kubelet waits when it has no client cert If we go a certain amount of time without being able to create a client cert and we have no current client cert from the store, exit. This prevents a corrupted local copy of the cert from leaving the Kubelet in a zombie state forever. Exiting allows a config loop outside the Kubelet to clean up the file or the bootstrap client cert to get another client cert. Five minutes is a totally arbitary timeout, judged to give enough time for really slow static pods to boot. @mikedanese ```release-note Set an upper bound (5 minutes) on how long the Kubelet will wait before exiting when the client cert from disk is missing or invalid. This prevents the Kubelet from waiting forever without attempting to bootstrap a new client credentials. ```pull/6/head
commit
2bbaf430d8
|
@ -527,9 +527,11 @@ func run(s *options.KubeletServer, kubeDeps *kubelet.Dependencies) (err error) {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// we set exitIfExpired to true because we use this client configuration to request new certs - if we are unable
|
||||
// to request new certs, we will be unable to continue normal operation
|
||||
if err := kubeletcertificate.UpdateTransport(wait.NeverStop, clientConfig, clientCertificateManager, true); err != nil {
|
||||
|
||||
// we set exitAfter to five minutes because we use this client configuration to request new certs - if we are unable
|
||||
// to request new certs, we will be unable to continue normal operation. Exiting the process allows a wrapper
|
||||
// or the bootstrapping credentials to potentially lay down new initial config.
|
||||
if err := kubeletcertificate.UpdateTransport(wait.NeverStop, clientConfig, clientCertificateManager, 5*time.Minute); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
|
|
@ -43,15 +43,21 @@ import (
|
|||
// connections, forcing the client to re-handshake with the server and use the
|
||||
// new certificate.
|
||||
//
|
||||
// The exitAfter duration, if set, will terminate the current process if a certificate
|
||||
// is not available from the store (because it has been deleted on disk or is corrupt)
|
||||
// or if the certificate has expired and the server is responsive. This allows the
|
||||
// process parent or the bootstrap credentials an opportunity to retrieve a new initial
|
||||
// certificate.
|
||||
//
|
||||
// stopCh should be used to indicate when the transport is unused and doesn't need
|
||||
// to continue checking the manager.
|
||||
func UpdateTransport(stopCh <-chan struct{}, clientConfig *restclient.Config, clientCertificateManager certificate.Manager, exitIfExpired bool) error {
|
||||
return updateTransport(stopCh, 10*time.Second, clientConfig, clientCertificateManager, exitIfExpired)
|
||||
func UpdateTransport(stopCh <-chan struct{}, clientConfig *restclient.Config, clientCertificateManager certificate.Manager, exitAfter time.Duration) error {
|
||||
return updateTransport(stopCh, 10*time.Second, clientConfig, clientCertificateManager, exitAfter)
|
||||
}
|
||||
|
||||
// updateTransport is an internal method that exposes how often this method checks that the
|
||||
// client cert has changed. Intended for testing.
|
||||
func updateTransport(stopCh <-chan struct{}, period time.Duration, clientConfig *restclient.Config, clientCertificateManager certificate.Manager, exitIfExpired bool) error {
|
||||
// client cert has changed.
|
||||
func updateTransport(stopCh <-chan struct{}, period time.Duration, clientConfig *restclient.Config, clientCertificateManager certificate.Manager, exitAfter time.Duration) error {
|
||||
if clientConfig.Transport != nil {
|
||||
return fmt.Errorf("there is already a transport configured")
|
||||
}
|
||||
|
@ -77,16 +83,35 @@ func updateTransport(stopCh <-chan struct{}, period time.Duration, clientConfig
|
|||
conns: make(map[*closableConn]struct{}),
|
||||
}
|
||||
|
||||
lastCertAvailable := time.Now()
|
||||
lastCert := clientCertificateManager.Current()
|
||||
go wait.Until(func() {
|
||||
curr := clientCertificateManager.Current()
|
||||
if exitIfExpired && curr != nil && time.Now().After(curr.Leaf.NotAfter) {
|
||||
if clientCertificateManager.ServerHealthy() {
|
||||
glog.Fatalf("The currently active client certificate has expired and the server is responsive, exiting.")
|
||||
|
||||
if exitAfter > 0 {
|
||||
now := time.Now()
|
||||
if curr == nil {
|
||||
// the certificate has been deleted from disk or is otherwise corrupt
|
||||
if now.After(lastCertAvailable.Add(exitAfter)) {
|
||||
if clientCertificateManager.ServerHealthy() {
|
||||
glog.Fatalf("It has been %s since a valid client cert was found and the server is responsive, exiting.", exitAfter)
|
||||
} else {
|
||||
glog.Errorf("It has been %s since a valid client cert was found, but the server is not responsive. A restart may be necessary to retrieve new initial credentials.", exitAfter)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
glog.Errorf("The currently active client certificate has expired, but the server is not responsive. A restart may be necessary to retrieve new initial credentials.")
|
||||
// the certificate is expired
|
||||
if now.After(curr.Leaf.NotAfter) {
|
||||
if clientCertificateManager.ServerHealthy() {
|
||||
glog.Fatalf("The currently active client certificate has expired and the server is responsive, exiting.")
|
||||
} else {
|
||||
glog.Errorf("The currently active client certificate has expired, but the server is not responsive. A restart may be necessary to retrieve new initial credentials.")
|
||||
}
|
||||
}
|
||||
lastCertAvailable = now
|
||||
}
|
||||
}
|
||||
|
||||
if curr == nil || lastCert == curr {
|
||||
// Cert hasn't been rotated.
|
||||
return
|
||||
|
|
|
@ -187,7 +187,7 @@ func TestRotateShutsDownConnections(t *testing.T) {
|
|||
}
|
||||
|
||||
// Check for a new cert every 10 milliseconds
|
||||
if err := updateTransport(stop, 10*time.Millisecond, c, m, false); err != nil {
|
||||
if err := updateTransport(stop, 10*time.Millisecond, c, m, 0); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
|
|
|
@ -268,6 +268,13 @@ func getCurrentCertificateOrBootstrap(
|
|||
return nil, false, fmt.Errorf("unable to parse certificate data: %v", err)
|
||||
}
|
||||
bootstrapCert.Leaf = certs[0]
|
||||
|
||||
if _, err := store.Update(bootstrapCertificatePEM, bootstrapKeyPEM); err != nil {
|
||||
utilruntime.HandleError(fmt.Errorf("Unable to set the cert/key pair to the bootstrap certificate: %v", err))
|
||||
} else {
|
||||
glog.V(4).Infof("Updated the store to contain the initial bootstrap certificate")
|
||||
}
|
||||
|
||||
return &bootstrapCert, true, nil
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue