mirror of https://github.com/k3s-io/k3s
Add timeouts to HealthChecks and retry checks
Fixes issue #3532. Added timeouts for HTTP and TCP checks and enabled kubelet/probe to kubelet#maxRetries times before declaring Failure. Added Probe.TimeoutSecs to API Probe variants now check container.LivenessProbe.TimeoutSeconds Also added a test for timeouts in http_test.go.pull/6/head
parent
e335e2d3e2
commit
e8c33b7916
|
@ -279,6 +279,8 @@ type Probe struct {
|
||||||
Handler `json:",inline"`
|
Handler `json:",inline"`
|
||||||
// Length of time before health checking is activated. In seconds.
|
// Length of time before health checking is activated. In seconds.
|
||||||
InitialDelaySeconds int64 `json:"initialDelaySeconds,omitempty"`
|
InitialDelaySeconds int64 `json:"initialDelaySeconds,omitempty"`
|
||||||
|
// Length of time before health checking times out. In seconds.
|
||||||
|
TimeoutSeconds int64 `json:"timeoutSeconds,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// PullPolicy describes a policy for if/when to pull a container image
|
// PullPolicy describes a policy for if/when to pull a container image
|
||||||
|
|
|
@ -911,6 +911,7 @@ func init() {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
out.InitialDelaySeconds = in.InitialDelaySeconds
|
out.InitialDelaySeconds = in.InitialDelaySeconds
|
||||||
|
out.TimeoutSeconds = in.TimeoutSeconds
|
||||||
return nil
|
return nil
|
||||||
},
|
},
|
||||||
func(in *LivenessProbe, out *newer.Probe, s conversion.Scope) error {
|
func(in *LivenessProbe, out *newer.Probe, s conversion.Scope) error {
|
||||||
|
@ -924,6 +925,7 @@ func init() {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
out.InitialDelaySeconds = in.InitialDelaySeconds
|
out.InitialDelaySeconds = in.InitialDelaySeconds
|
||||||
|
out.TimeoutSeconds = in.TimeoutSeconds
|
||||||
return nil
|
return nil
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
|
@ -227,6 +227,8 @@ type LivenessProbe struct {
|
||||||
Exec *ExecAction `json:"exec,omitempty" description:"parameters for exec-based liveness probe"`
|
Exec *ExecAction `json:"exec,omitempty" description:"parameters for exec-based liveness probe"`
|
||||||
// Length of time before health checking is activated. In seconds.
|
// Length of time before health checking is activated. In seconds.
|
||||||
InitialDelaySeconds int64 `json:"initialDelaySeconds,omitempty" description:"number of seconds after the container has started before liveness probes are initiated"`
|
InitialDelaySeconds int64 `json:"initialDelaySeconds,omitempty" description:"number of seconds after the container has started before liveness probes are initiated"`
|
||||||
|
// Length of time before health checking times out. In seconds.
|
||||||
|
TimeoutSeconds int64 `json:"timeoutSeconds,omitempty" description:"number of seconds after which liveness probes timeout; defaults to 1 second"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// PullPolicy describes a policy for if/when to pull a container image
|
// PullPolicy describes a policy for if/when to pull a container image
|
||||||
|
|
|
@ -824,6 +824,7 @@ func init() {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
out.InitialDelaySeconds = in.InitialDelaySeconds
|
out.InitialDelaySeconds = in.InitialDelaySeconds
|
||||||
|
out.TimeoutSeconds = in.TimeoutSeconds
|
||||||
return nil
|
return nil
|
||||||
},
|
},
|
||||||
func(in *LivenessProbe, out *newer.Probe, s conversion.Scope) error {
|
func(in *LivenessProbe, out *newer.Probe, s conversion.Scope) error {
|
||||||
|
@ -837,6 +838,7 @@ func init() {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
out.InitialDelaySeconds = in.InitialDelaySeconds
|
out.InitialDelaySeconds = in.InitialDelaySeconds
|
||||||
|
out.TimeoutSeconds = in.TimeoutSeconds
|
||||||
return nil
|
return nil
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
|
@ -191,6 +191,8 @@ type LivenessProbe struct {
|
||||||
Exec *ExecAction `json:"exec,omitempty" description:"parameters for exec-based liveness probe"`
|
Exec *ExecAction `json:"exec,omitempty" description:"parameters for exec-based liveness probe"`
|
||||||
// Length of time before health checking is activated. In seconds.
|
// Length of time before health checking is activated. In seconds.
|
||||||
InitialDelaySeconds int64 `json:"initialDelaySeconds,omitempty" description:"number of seconds after the container has started before liveness probes are initiated"`
|
InitialDelaySeconds int64 `json:"initialDelaySeconds,omitempty" description:"number of seconds after the container has started before liveness probes are initiated"`
|
||||||
|
// Length of time before health checking times out. In seconds.
|
||||||
|
TimeoutSeconds int64 `json:"timeoutSeconds,omitempty" description:"number of seconds after which liveness probes timeout; defaults to 1 second"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// PullPolicy describes a policy for if/when to pull a container image
|
// PullPolicy describes a policy for if/when to pull a container image
|
||||||
|
|
|
@ -297,6 +297,8 @@ type Probe struct {
|
||||||
Handler `json:",inline"`
|
Handler `json:",inline"`
|
||||||
// Length of time before health checking is activated. In seconds.
|
// Length of time before health checking is activated. In seconds.
|
||||||
InitialDelaySeconds int64 `json:"initialDelaySeconds,omitempty"`
|
InitialDelaySeconds int64 `json:"initialDelaySeconds,omitempty"`
|
||||||
|
// Length of time before health checking times out. In seconds.
|
||||||
|
TimeoutSeconds int64 `json:"timeoutSeconds,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// PullPolicy describes a policy for if/when to pull a container image
|
// PullPolicy describes a policy for if/when to pull a container image
|
||||||
|
|
|
@ -55,6 +55,7 @@ const defaultChanSize = 1024
|
||||||
const minShares = 2
|
const minShares = 2
|
||||||
const sharesPerCPU = 1024
|
const sharesPerCPU = 1024
|
||||||
const milliCPUToCPU = 1000
|
const milliCPUToCPU = 1000
|
||||||
|
const maxRetries int = 3
|
||||||
|
|
||||||
// SyncHandler is an interface implemented by Kubelet, for testability
|
// SyncHandler is an interface implemented by Kubelet, for testability
|
||||||
type SyncHandler interface {
|
type SyncHandler interface {
|
||||||
|
@ -1417,7 +1418,7 @@ func (kl *Kubelet) GetPodStatus(podFullName string, uid types.UID) (api.PodStatu
|
||||||
return podStatus, err
|
return podStatus, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (kl *Kubelet) probeLiveness(podFullName string, podUID types.UID, status api.PodStatus, container api.Container, dockerContainer *docker.APIContainers) (probe.Status, error) {
|
func (kl *Kubelet) probeLiveness(podFullName string, podUID types.UID, status api.PodStatus, container api.Container, dockerContainer *docker.APIContainers) (healthStatus probe.Status, err error) {
|
||||||
// Give the container 60 seconds to start up.
|
// Give the container 60 seconds to start up.
|
||||||
if container.LivenessProbe == nil {
|
if container.LivenessProbe == nil {
|
||||||
return probe.Success, nil
|
return probe.Success, nil
|
||||||
|
@ -1425,7 +1426,13 @@ func (kl *Kubelet) probeLiveness(podFullName string, podUID types.UID, status ap
|
||||||
if time.Now().Unix()-dockerContainer.Created < container.LivenessProbe.InitialDelaySeconds {
|
if time.Now().Unix()-dockerContainer.Created < container.LivenessProbe.InitialDelaySeconds {
|
||||||
return probe.Success, nil
|
return probe.Success, nil
|
||||||
}
|
}
|
||||||
return kl.probeContainer(container.LivenessProbe, podFullName, podUID, status, container)
|
for i := 0; i < maxRetries; i++ {
|
||||||
|
healthStatus, err = kl.probeContainer(container.LivenessProbe, podFullName, podUID, status, container)
|
||||||
|
if healthStatus == probe.Success {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return healthStatus, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns logs of current machine.
|
// Returns logs of current machine.
|
||||||
|
|
|
@ -19,6 +19,7 @@ package kubelet
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/probe"
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/probe"
|
||||||
|
@ -39,6 +40,14 @@ var (
|
||||||
)
|
)
|
||||||
|
|
||||||
func (kl *Kubelet) probeContainer(p *api.Probe, podFullName string, podUID types.UID, status api.PodStatus, container api.Container) (probe.Status, error) {
|
func (kl *Kubelet) probeContainer(p *api.Probe, podFullName string, podUID types.UID, status api.PodStatus, container api.Container) (probe.Status, error) {
|
||||||
|
var timeout time.Duration
|
||||||
|
secs := container.LivenessProbe.TimeoutSeconds
|
||||||
|
if secs > 0 {
|
||||||
|
timeout = time.Duration(secs) * time.Second
|
||||||
|
} else {
|
||||||
|
timeout = 1 * time.Second
|
||||||
|
}
|
||||||
|
|
||||||
if p.Exec != nil {
|
if p.Exec != nil {
|
||||||
return execprober.Probe(kl.newExecInContainer(podFullName, podUID, container))
|
return execprober.Probe(kl.newExecInContainer(podFullName, podUID, container))
|
||||||
}
|
}
|
||||||
|
@ -47,14 +56,15 @@ func (kl *Kubelet) probeContainer(p *api.Probe, podFullName string, podUID types
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return probe.Unknown, err
|
return probe.Unknown, err
|
||||||
}
|
}
|
||||||
return httprober.Probe(extractGetParams(p.HTTPGet, status, port))
|
host, port, path := extractGetParams(p.HTTPGet, status, port)
|
||||||
|
return httprober.Probe(host, port, path, timeout)
|
||||||
}
|
}
|
||||||
if p.TCPSocket != nil {
|
if p.TCPSocket != nil {
|
||||||
port, err := extractPort(p.TCPSocket.Port, container)
|
port, err := extractPort(p.TCPSocket.Port, container)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return probe.Unknown, err
|
return probe.Unknown, err
|
||||||
}
|
}
|
||||||
return tcprober.Probe(status.PodIP, port)
|
return tcprober.Probe(status.PodIP, port, timeout)
|
||||||
}
|
}
|
||||||
glog.Warningf("Failed to find probe builder for %s %+v", container.Name, container.LivenessProbe)
|
glog.Warningf("Failed to find probe builder for %s %+v", container.Name, container.LivenessProbe)
|
||||||
return probe.Unknown, nil
|
return probe.Unknown, nil
|
||||||
|
|
|
@ -21,6 +21,7 @@ import (
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/probe"
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/probe"
|
||||||
|
|
||||||
|
@ -28,16 +29,17 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
func New() HTTPProber {
|
func New() HTTPProber {
|
||||||
return HTTPProber{&http.Client{}}
|
transport := &http.Transport{}
|
||||||
|
return HTTPProber{transport}
|
||||||
}
|
}
|
||||||
|
|
||||||
type HTTPProber struct {
|
type HTTPProber struct {
|
||||||
client HTTPGetInterface
|
transport *http.Transport
|
||||||
}
|
}
|
||||||
|
|
||||||
// Probe returns a ProbeRunner capable of running an http check.
|
// Probe returns a ProbeRunner capable of running an http check.
|
||||||
func (pr *HTTPProber) Probe(host string, port int, path string) (probe.Status, error) {
|
func (pr *HTTPProber) Probe(host string, port int, path string, timeout time.Duration) (probe.Status, error) {
|
||||||
return DoHTTPProbe(formatURL(host, port, path), pr.client)
|
return DoHTTPProbe(formatURL(host, port, path), &http.Client{Timeout: timeout, Transport: pr.transport})
|
||||||
}
|
}
|
||||||
|
|
||||||
type HTTPGetInterface interface {
|
type HTTPGetInterface interface {
|
||||||
|
|
|
@ -23,6 +23,7 @@ import (
|
||||||
"net/url"
|
"net/url"
|
||||||
"strconv"
|
"strconv"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/probe"
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/probe"
|
||||||
)
|
)
|
||||||
|
@ -46,20 +47,25 @@ func TestFormatURL(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestHTTPProbeChecker(t *testing.T) {
|
func TestHTTPProbeChecker(t *testing.T) {
|
||||||
|
handleReq := func(s int) func(w http.ResponseWriter) {
|
||||||
|
return func(w http.ResponseWriter) { w.WriteHeader(s) }
|
||||||
|
}
|
||||||
|
|
||||||
prober := New()
|
prober := New()
|
||||||
testCases := []struct {
|
testCases := []struct {
|
||||||
status int
|
handler func(w http.ResponseWriter)
|
||||||
health probe.Status
|
health probe.Status
|
||||||
}{
|
}{
|
||||||
// The probe will be filled in below. This is primarily testing that an HTTP GET happens.
|
// The probe will be filled in below. This is primarily testing that an HTTP GET happens.
|
||||||
{http.StatusOK, probe.Success},
|
{handleReq(http.StatusOK), probe.Success},
|
||||||
{-1, probe.Failure},
|
{handleReq(-1), probe.Failure},
|
||||||
|
{func(w http.ResponseWriter) { time.Sleep(3 * time.Second) }, probe.Failure},
|
||||||
}
|
}
|
||||||
for _, test := range testCases {
|
for _, test := range testCases {
|
||||||
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
w.WriteHeader(test.status)
|
test.handler(w)
|
||||||
}))
|
}))
|
||||||
u, err := url.Parse(ts.URL)
|
u, err := url.Parse(server.URL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("Unexpected error: %v", err)
|
t.Errorf("Unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
|
@ -71,7 +77,7 @@ func TestHTTPProbeChecker(t *testing.T) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("Unexpected error: %v", err)
|
t.Errorf("Unexpected error: %v", err)
|
||||||
}
|
}
|
||||||
health, err := prober.Probe(host, p, "")
|
health, err := prober.Probe(host, p, "", 1*time.Second)
|
||||||
if test.health == probe.Unknown && err == nil {
|
if test.health == probe.Unknown && err == nil {
|
||||||
t.Errorf("Expected error")
|
t.Errorf("Expected error")
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,7 @@ package tcp
|
||||||
import (
|
import (
|
||||||
"net"
|
"net"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/probe"
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/probe"
|
||||||
|
|
||||||
|
@ -31,16 +32,16 @@ func New() TCPProber {
|
||||||
|
|
||||||
type TCPProber struct{}
|
type TCPProber struct{}
|
||||||
|
|
||||||
func (pr TCPProber) Probe(host string, port int) (probe.Status, error) {
|
func (pr TCPProber) Probe(host string, port int, timeout time.Duration) (probe.Status, error) {
|
||||||
return DoTCPProbe(net.JoinHostPort(host, strconv.Itoa(port)))
|
return DoTCPProbe(net.JoinHostPort(host, strconv.Itoa(port)), timeout)
|
||||||
}
|
}
|
||||||
|
|
||||||
// DoTCPProbe checks that a TCP socket to the address can be opened.
|
// DoTCPProbe checks that a TCP socket to the address can be opened.
|
||||||
// If the socket can be opened, it returns Success
|
// If the socket can be opened, it returns Success
|
||||||
// If the socket fails to open, it returns Failure.
|
// If the socket fails to open, it returns Failure.
|
||||||
// This is exported because some other packages may want to do direct TCP probes.
|
// This is exported because some other packages may want to do direct TCP probes.
|
||||||
func DoTCPProbe(addr string) (probe.Status, error) {
|
func DoTCPProbe(addr string, timeout time.Duration) (probe.Status, error) {
|
||||||
conn, err := net.Dial("tcp", addr)
|
conn, err := net.DialTimeout("tcp", addr, timeout)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return probe.Failure, nil
|
return probe.Failure, nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,6 +23,7 @@ import (
|
||||||
"net/url"
|
"net/url"
|
||||||
"strconv"
|
"strconv"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/probe"
|
"github.com/GoogleCloudPlatform/kubernetes/pkg/probe"
|
||||||
)
|
)
|
||||||
|
@ -59,7 +60,7 @@ func TestTcpHealthChecker(t *testing.T) {
|
||||||
if !test.usePort {
|
if !test.usePort {
|
||||||
p = -1
|
p = -1
|
||||||
}
|
}
|
||||||
status, err := prober.Probe(host, p)
|
status, err := prober.Probe(host, p, 1*time.Second)
|
||||||
if status != test.expectedStatus {
|
if status != test.expectedStatus {
|
||||||
t.Errorf("expected: %v, got: %v", test.expectedStatus, status)
|
t.Errorf("expected: %v, got: %v", test.expectedStatus, status)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue