Merge pull request #42272 from marun/apiserver-fail-fast

Automatic merge from submit-queue

apiserver: Update genericapiserver to panic on listener error

Previously runServer would try to listen again if a listener error occurred.  This commit changes the response to a panic to allow a process manager (systemd/kubelet/etc) to react to the failure.

**Release note**:

```release-note
The Kubernetes API server now exits if it encounters a networking failure (e.g. the networking interface hosting its address goes away) to allow a process manager (systemd/kubelet/etc) to react to the problem.  Previously the server would log the failure and try again to bind to its configured address:port.
```

cc: @liggitt @sttts @deads2k @derekwaynecarr
pull/6/head
Kubernetes Submit Queue 2017-04-19 23:51:34 -07:00 committed by GitHub
commit afc01d92d2
1 changed files with 13 additions and 38 deletions

View File

@ -23,7 +23,6 @@ import (
"net" "net"
"net/http" "net/http"
"strings" "strings"
"sync"
"time" "time"
utilruntime "k8s.io/apimachinery/pkg/util/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime"
@ -100,7 +99,6 @@ func RunServer(server *http.Server, network string, stopCh <-chan struct{}) (int
network = "tcp" network = "tcp"
} }
// first listen is synchronous (fail early!)
ln, err := net.Listen(network, server.Addr) ln, err := net.Listen(network, server.Addr)
if err != nil { if err != nil {
return 0, fmt.Errorf("failed to listen on %v: %v", server.Addr, err) return 0, fmt.Errorf("failed to listen on %v: %v", server.Addr, err)
@ -113,52 +111,29 @@ func RunServer(server *http.Server, network string, stopCh <-chan struct{}) (int
return 0, fmt.Errorf("invalid listen address: %q", ln.Addr().String()) return 0, fmt.Errorf("invalid listen address: %q", ln.Addr().String())
} }
lock := sync.Mutex{} // to avoid we close an old listener during a listen retry // Stop the server by closing the listener
go func() { go func() {
<-stopCh <-stopCh
lock.Lock()
defer lock.Unlock()
ln.Close() ln.Close()
}() }()
go func() { go func() {
defer utilruntime.HandleCrash() defer utilruntime.HandleCrash()
for { var listener net.Listener
var listener net.Listener listener = tcpKeepAliveListener{ln.(*net.TCPListener)}
listener = tcpKeepAliveListener{ln.(*net.TCPListener)} if server.TLSConfig != nil {
if server.TLSConfig != nil { listener = tls.NewListener(listener, server.TLSConfig)
listener = tls.NewListener(listener, server.TLSConfig) }
}
err := server.Serve(listener) err := server.Serve(listener)
glog.Errorf("Error serving %v (%v); will try again.", server.Addr, err)
// listen again msg := fmt.Sprintf("Stopped listening on %s", tcpAddr.String())
func() { select {
lock.Lock() case <-stopCh:
defer lock.Unlock() glog.Info(msg)
for { default:
time.Sleep(15 * time.Second) panic(fmt.Sprintf("%s due to error: %v", msg, err))
ln, err = net.Listen(network, server.Addr)
if err == nil {
return
}
select {
case <-stopCh:
return
default:
}
glog.Errorf("Error listening on %v (%v); will try again.", server.Addr, err)
}
}()
select {
case <-stopCh:
return
default:
}
} }
}() }()