diff --git a/pkg/api/types.go b/pkg/api/types.go index a9179a16b2..c2c2db28ef 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -279,6 +279,8 @@ type Probe struct { Handler `json:",inline"` // Length of time before health checking is activated. In seconds. InitialDelaySeconds int64 `json:"initialDelaySeconds,omitempty"` + // Length of time before health checking times out. In seconds. + TimeoutSeconds int64 `json:"timeoutSeconds,omitempty"` } // PullPolicy describes a policy for if/when to pull a container image diff --git a/pkg/api/v1beta1/conversion.go b/pkg/api/v1beta1/conversion.go index a205d054fe..4058a01cf5 100644 --- a/pkg/api/v1beta1/conversion.go +++ b/pkg/api/v1beta1/conversion.go @@ -911,6 +911,7 @@ func init() { return err } out.InitialDelaySeconds = in.InitialDelaySeconds + out.TimeoutSeconds = in.TimeoutSeconds return nil }, func(in *LivenessProbe, out *newer.Probe, s conversion.Scope) error { @@ -924,6 +925,7 @@ func init() { return err } out.InitialDelaySeconds = in.InitialDelaySeconds + out.TimeoutSeconds = in.TimeoutSeconds return nil }, ) diff --git a/pkg/api/v1beta1/types.go b/pkg/api/v1beta1/types.go index 6940734702..6b0e1c59d3 100644 --- a/pkg/api/v1beta1/types.go +++ b/pkg/api/v1beta1/types.go @@ -227,6 +227,8 @@ type LivenessProbe struct { Exec *ExecAction `json:"exec,omitempty" description:"parameters for exec-based liveness probe"` // Length of time before health checking is activated. In seconds. InitialDelaySeconds int64 `json:"initialDelaySeconds,omitempty" description:"number of seconds after the container has started before liveness probes are initiated"` + // Length of time before health checking times out. In seconds. + TimeoutSeconds int64 `json:"timeoutSeconds,omitempty" description:"number of seconds after which liveness probes timeout; defaults to 1 second"` } // PullPolicy describes a policy for if/when to pull a container image diff --git a/pkg/api/v1beta2/conversion.go b/pkg/api/v1beta2/conversion.go index 4ae43951a2..336cf64fe2 100644 --- a/pkg/api/v1beta2/conversion.go +++ b/pkg/api/v1beta2/conversion.go @@ -824,6 +824,7 @@ func init() { return err } out.InitialDelaySeconds = in.InitialDelaySeconds + out.TimeoutSeconds = in.TimeoutSeconds return nil }, func(in *LivenessProbe, out *newer.Probe, s conversion.Scope) error { @@ -837,6 +838,7 @@ func init() { return err } out.InitialDelaySeconds = in.InitialDelaySeconds + out.TimeoutSeconds = in.TimeoutSeconds return nil }, ) diff --git a/pkg/api/v1beta2/types.go b/pkg/api/v1beta2/types.go index 052b4ee777..0d18fe914e 100644 --- a/pkg/api/v1beta2/types.go +++ b/pkg/api/v1beta2/types.go @@ -191,6 +191,8 @@ type LivenessProbe struct { Exec *ExecAction `json:"exec,omitempty" description:"parameters for exec-based liveness probe"` // Length of time before health checking is activated. In seconds. InitialDelaySeconds int64 `json:"initialDelaySeconds,omitempty" description:"number of seconds after the container has started before liveness probes are initiated"` + // Length of time before health checking times out. In seconds. + TimeoutSeconds int64 `json:"timeoutSeconds,omitempty" description:"number of seconds after which liveness probes timeout; defaults to 1 second"` } // PullPolicy describes a policy for if/when to pull a container image diff --git a/pkg/api/v1beta3/types.go b/pkg/api/v1beta3/types.go index 26c7a07478..2c628534dd 100644 --- a/pkg/api/v1beta3/types.go +++ b/pkg/api/v1beta3/types.go @@ -297,6 +297,8 @@ type Probe struct { Handler `json:",inline"` // Length of time before health checking is activated. In seconds. InitialDelaySeconds int64 `json:"initialDelaySeconds,omitempty"` + // Length of time before health checking times out. In seconds. + TimeoutSeconds int64 `json:"timeoutSeconds,omitempty"` } // PullPolicy describes a policy for if/when to pull a container image diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index a2a1f113d4..d0b284bee8 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -55,6 +55,7 @@ const defaultChanSize = 1024 const minShares = 2 const sharesPerCPU = 1024 const milliCPUToCPU = 1000 +const maxRetries int = 3 // SyncHandler is an interface implemented by Kubelet, for testability type SyncHandler interface { @@ -1417,7 +1418,7 @@ func (kl *Kubelet) GetPodStatus(podFullName string, uid types.UID) (api.PodStatu return podStatus, err } -func (kl *Kubelet) probeLiveness(podFullName string, podUID types.UID, status api.PodStatus, container api.Container, dockerContainer *docker.APIContainers) (probe.Status, error) { +func (kl *Kubelet) probeLiveness(podFullName string, podUID types.UID, status api.PodStatus, container api.Container, dockerContainer *docker.APIContainers) (healthStatus probe.Status, err error) { // Give the container 60 seconds to start up. if container.LivenessProbe == nil { return probe.Success, nil @@ -1425,7 +1426,13 @@ func (kl *Kubelet) probeLiveness(podFullName string, podUID types.UID, status ap if time.Now().Unix()-dockerContainer.Created < container.LivenessProbe.InitialDelaySeconds { return probe.Success, nil } - return kl.probeContainer(container.LivenessProbe, podFullName, podUID, status, container) + for i := 0; i < maxRetries; i++ { + healthStatus, err = kl.probeContainer(container.LivenessProbe, podFullName, podUID, status, container) + if healthStatus == probe.Success { + return + } + } + return healthStatus, err } // Returns logs of current machine. diff --git a/pkg/kubelet/probe.go b/pkg/kubelet/probe.go index 8d107872d7..72bc972c5f 100644 --- a/pkg/kubelet/probe.go +++ b/pkg/kubelet/probe.go @@ -19,6 +19,7 @@ package kubelet import ( "fmt" "strconv" + "time" "github.com/GoogleCloudPlatform/kubernetes/pkg/api" "github.com/GoogleCloudPlatform/kubernetes/pkg/probe" @@ -39,6 +40,14 @@ var ( ) func (kl *Kubelet) probeContainer(p *api.Probe, podFullName string, podUID types.UID, status api.PodStatus, container api.Container) (probe.Status, error) { + var timeout time.Duration + secs := container.LivenessProbe.TimeoutSeconds + if secs > 0 { + timeout = time.Duration(secs) * time.Second + } else { + timeout = 1 * time.Second + } + if p.Exec != nil { return execprober.Probe(kl.newExecInContainer(podFullName, podUID, container)) } @@ -47,14 +56,15 @@ func (kl *Kubelet) probeContainer(p *api.Probe, podFullName string, podUID types if err != nil { return probe.Unknown, err } - return httprober.Probe(extractGetParams(p.HTTPGet, status, port)) + host, port, path := extractGetParams(p.HTTPGet, status, port) + return httprober.Probe(host, port, path, timeout) } if p.TCPSocket != nil { port, err := extractPort(p.TCPSocket.Port, container) if err != nil { return probe.Unknown, err } - return tcprober.Probe(status.PodIP, port) + return tcprober.Probe(status.PodIP, port, timeout) } glog.Warningf("Failed to find probe builder for %s %+v", container.Name, container.LivenessProbe) return probe.Unknown, nil diff --git a/pkg/probe/http/http.go b/pkg/probe/http/http.go index e3e7898dbc..09f8917882 100644 --- a/pkg/probe/http/http.go +++ b/pkg/probe/http/http.go @@ -21,6 +21,7 @@ import ( "net/http" "net/url" "strconv" + "time" "github.com/GoogleCloudPlatform/kubernetes/pkg/probe" @@ -28,16 +29,17 @@ import ( ) func New() HTTPProber { - return HTTPProber{&http.Client{}} + transport := &http.Transport{} + return HTTPProber{transport} } type HTTPProber struct { - client HTTPGetInterface + transport *http.Transport } // Probe returns a ProbeRunner capable of running an http check. -func (pr *HTTPProber) Probe(host string, port int, path string) (probe.Status, error) { - return DoHTTPProbe(formatURL(host, port, path), pr.client) +func (pr *HTTPProber) Probe(host string, port int, path string, timeout time.Duration) (probe.Status, error) { + return DoHTTPProbe(formatURL(host, port, path), &http.Client{Timeout: timeout, Transport: pr.transport}) } type HTTPGetInterface interface { diff --git a/pkg/probe/http/http_test.go b/pkg/probe/http/http_test.go index fd9009f661..be9b0ae8ce 100644 --- a/pkg/probe/http/http_test.go +++ b/pkg/probe/http/http_test.go @@ -23,6 +23,7 @@ import ( "net/url" "strconv" "testing" + "time" "github.com/GoogleCloudPlatform/kubernetes/pkg/probe" ) @@ -46,20 +47,25 @@ func TestFormatURL(t *testing.T) { } func TestHTTPProbeChecker(t *testing.T) { + handleReq := func(s int) func(w http.ResponseWriter) { + return func(w http.ResponseWriter) { w.WriteHeader(s) } + } + prober := New() testCases := []struct { - status int - health probe.Status + handler func(w http.ResponseWriter) + health probe.Status }{ // The probe will be filled in below. This is primarily testing that an HTTP GET happens. - {http.StatusOK, probe.Success}, - {-1, probe.Failure}, + {handleReq(http.StatusOK), probe.Success}, + {handleReq(-1), probe.Failure}, + {func(w http.ResponseWriter) { time.Sleep(3 * time.Second) }, probe.Failure}, } for _, test := range testCases { - ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.WriteHeader(test.status) + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + test.handler(w) })) - u, err := url.Parse(ts.URL) + u, err := url.Parse(server.URL) if err != nil { t.Errorf("Unexpected error: %v", err) } @@ -71,7 +77,7 @@ func TestHTTPProbeChecker(t *testing.T) { if err != nil { t.Errorf("Unexpected error: %v", err) } - health, err := prober.Probe(host, p, "") + health, err := prober.Probe(host, p, "", 1*time.Second) if test.health == probe.Unknown && err == nil { t.Errorf("Expected error") } diff --git a/pkg/probe/tcp/tcp.go b/pkg/probe/tcp/tcp.go index a452a802c7..8232295a0b 100644 --- a/pkg/probe/tcp/tcp.go +++ b/pkg/probe/tcp/tcp.go @@ -19,6 +19,7 @@ package tcp import ( "net" "strconv" + "time" "github.com/GoogleCloudPlatform/kubernetes/pkg/probe" @@ -31,16 +32,16 @@ func New() TCPProber { type TCPProber struct{} -func (pr TCPProber) Probe(host string, port int) (probe.Status, error) { - return DoTCPProbe(net.JoinHostPort(host, strconv.Itoa(port))) +func (pr TCPProber) Probe(host string, port int, timeout time.Duration) (probe.Status, error) { + return DoTCPProbe(net.JoinHostPort(host, strconv.Itoa(port)), timeout) } // DoTCPProbe checks that a TCP socket to the address can be opened. // If the socket can be opened, it returns Success // If the socket fails to open, it returns Failure. // This is exported because some other packages may want to do direct TCP probes. -func DoTCPProbe(addr string) (probe.Status, error) { - conn, err := net.Dial("tcp", addr) +func DoTCPProbe(addr string, timeout time.Duration) (probe.Status, error) { + conn, err := net.DialTimeout("tcp", addr, timeout) if err != nil { return probe.Failure, nil } diff --git a/pkg/probe/tcp/tcp_test.go b/pkg/probe/tcp/tcp_test.go index bd8f61fdac..5be9125eb7 100644 --- a/pkg/probe/tcp/tcp_test.go +++ b/pkg/probe/tcp/tcp_test.go @@ -23,6 +23,7 @@ import ( "net/url" "strconv" "testing" + "time" "github.com/GoogleCloudPlatform/kubernetes/pkg/probe" ) @@ -59,7 +60,7 @@ func TestTcpHealthChecker(t *testing.T) { if !test.usePort { p = -1 } - status, err := prober.Probe(host, p) + status, err := prober.Probe(host, p, 1*time.Second) if status != test.expectedStatus { t.Errorf("expected: %v, got: %v", test.expectedStatus, status) }