Better kubelet logging for probes

Log when we actually run probes and event when they fail.  Print the output of
a probe, too.
pull/6/head
Tim Hockin 2015-05-13 17:30:37 -07:00
parent d85dc7b2ea
commit 75617e8760
11 changed files with 162 additions and 114 deletions

View File

@ -81,8 +81,8 @@ func (fakeKubeletClient) GetConnectionInfo(host string) (string, uint, http.Roun
return "", 0, nil, errors.New("Not Implemented")
}
func (fakeKubeletClient) HealthCheck(host string) (probe.Result, error) {
return probe.Success, nil
func (fakeKubeletClient) HealthCheck(host string) (probe.Result, string, error) {
return probe.Success, "", nil
}
type delegateHandler struct {

View File

@ -35,11 +35,11 @@ type KubeletClient interface {
// KubeletHealthchecker is an interface for healthchecking kubelets
type KubeletHealthChecker interface {
HealthCheck(host string) (probe.Result, error)
HealthCheck(host string) (result probe.Result, output string, err error)
}
type ConnectionInfoGetter interface {
GetConnectionInfo(host string) (scheme string, port uint, transport http.RoundTripper, error error)
GetConnectionInfo(host string) (scheme string, port uint, transport http.RoundTripper, err error)
}
// HTTPKubeletClient is the default implementation of KubeletHealthchecker, accesses the kubelet over HTTP.
@ -103,7 +103,7 @@ func (c *HTTPKubeletClient) url(host, path, query string) string {
}).String()
}
func (c *HTTPKubeletClient) HealthCheck(host string) (probe.Result, error) {
func (c *HTTPKubeletClient) HealthCheck(host string) (probe.Result, string, error) {
return httprobe.DoHTTPProbe(c.url(host, "/healthz", ""), c.Client)
}
@ -112,8 +112,8 @@ func (c *HTTPKubeletClient) HealthCheck(host string) (probe.Result, error) {
// no kubelets.
type FakeKubeletClient struct{}
func (c FakeKubeletClient) HealthCheck(host string) (probe.Result, error) {
return probe.Unknown, errors.New("Not Implemented")
func (c FakeKubeletClient) HealthCheck(host string) (probe.Result, string, error) {
return probe.Unknown, "", errors.New("Not Implemented")
}
func (c FakeKubeletClient) GetConnectionInfo(host string) (string, uint, http.RoundTripper, error) {

View File

@ -59,7 +59,7 @@ func TestHTTPKubeletClient(t *testing.T) {
Client: http.DefaultClient,
Port: uint(port),
}
gotObj, err := c.HealthCheck(parts[0])
gotObj, _, err := c.HealthCheck(parts[0])
if err != nil {
t.Errorf("unexpected error: %v", err)
}
@ -93,7 +93,7 @@ func TestHTTPKubeletClientError(t *testing.T) {
Client: http.DefaultClient,
Port: uint(port),
}
gotObj, err := c.HealthCheck(parts[0])
gotObj, _, err := c.HealthCheck(parts[0])
if gotObj != expectObj {
t.Errorf("expected: %#v, got %#v", expectObj, gotObj)
}
@ -114,7 +114,7 @@ func TestNewKubeletClient(t *testing.T) {
}
host := "127.0.0.1"
healthStatus, err := client.HealthCheck(host)
healthStatus, _, err := client.HealthCheck(host)
if healthStatus != probe.Failure {
t.Errorf("Expected %v and got %v.", probe.Failure, healthStatus)
}

View File

@ -414,11 +414,12 @@ func TestKillContainerInPodWithError(t *testing.T) {
type fakeExecProber struct {
result probe.Result
output string
err error
}
func (p fakeExecProber) Probe(_ uexec.Cmd) (probe.Result, error) {
return p.result, p.err
func (p fakeExecProber) Probe(_ uexec.Cmd) (probe.Result, string, error) {
return p.result, p.output, p.err
}
func replaceProber(dm *DockerManager, result probe.Result, err error) {
@ -530,14 +531,14 @@ func TestProbeContainer(t *testing.T) {
testContainer: api.Container{
ReadinessProbe: &api.Probe{InitialDelaySeconds: 100},
},
expectedResult: probe.Success,
expectedResult: probe.Failure,
expectedReadiness: false,
},
{
testContainer: api.Container{
ReadinessProbe: &api.Probe{InitialDelaySeconds: -100},
},
expectedResult: probe.Success,
expectedResult: probe.Unknown,
expectedReadiness: false,
},
{
@ -595,7 +596,7 @@ func TestProbeContainer(t *testing.T) {
LivenessProbe: &api.Probe{InitialDelaySeconds: 100},
ReadinessProbe: &api.Probe{InitialDelaySeconds: 100},
},
expectedResult: probe.Success,
expectedResult: probe.Failure,
expectedReadiness: false,
},
{
@ -603,7 +604,7 @@ func TestProbeContainer(t *testing.T) {
LivenessProbe: &api.Probe{InitialDelaySeconds: 100},
ReadinessProbe: &api.Probe{InitialDelaySeconds: -100},
},
expectedResult: probe.Success,
expectedResult: probe.Unknown,
expectedReadiness: false,
},
{
@ -668,7 +669,7 @@ func TestProbeContainer(t *testing.T) {
},
}
for _, test := range tests {
for i, test := range tests {
if test.expectError {
replaceProber(manager, test.expectedResult, errors.New("error"))
} else {
@ -676,16 +677,16 @@ func TestProbeContainer(t *testing.T) {
}
result, err := manager.prober.Probe(&api.Pod{}, api.PodStatus{}, test.testContainer, dc.ID, dc.Created)
if test.expectError && err == nil {
t.Error("Expected error but did no error was returned.")
t.Error("[%d] Expected error but did no error was returned.", i)
}
if !test.expectError && err != nil {
t.Errorf("Expected error but got: %v", err)
t.Errorf("[%d] Expected error but got: %v", i, err)
}
if test.expectedResult != result {
t.Errorf("Expected result was %v but probeContainer() returned %v", test.expectedResult, result)
t.Errorf("[%d] Expected result was %v but probeContainer() returned %v", i, test.expectedResult, result)
}
if test.expectedReadiness != manager.readinessManager.GetReadiness(dc.ID) {
t.Errorf("Expected readiness was %v but probeContainer() set %v", test.expectedReadiness, manager.readinessManager.GetReadiness(dc.ID))
t.Errorf("[%d] Expected readiness was %v but probeContainer() set %v", i, test.expectedReadiness, manager.readinessManager.GetReadiness(dc.ID))
}
}
}

View File

@ -92,84 +92,109 @@ func NewTestProber(
// If the container's liveness probe is unsuccessful, set readiness to false.
// If liveness is successful, do a readiness check and set readiness accordingly.
func (pb *prober) Probe(pod *api.Pod, status api.PodStatus, container api.Container, containerID string, createdAt int64) (probe.Result, error) {
// Build a name string for logs.
ctrName := fmt.Sprintf("%s:%s", kubecontainer.GetPodFullName(pod), container.Name)
// Probe liveness.
live, err := pb.probeLiveness(pod, status, container, containerID, createdAt)
if err != nil {
glog.V(1).Infof("Liveness probe errored: %v", err)
pb.readinessManager.SetReadiness(containerID, false)
return probe.Unknown, err
}
if live != probe.Success {
glog.V(1).Infof("Liveness probe unsuccessful: %v", live)
pb.readinessManager.SetReadiness(containerID, false)
return live, nil
if container.LivenessProbe != nil {
live, output, err := pb.probeLiveness(pod, status, container, containerID, createdAt)
if err != nil || live != probe.Success {
// Liveness failed in one way or another.
pb.readinessManager.SetReadiness(containerID, false)
ref, ok := pb.refManager.GetRef(containerID)
if !ok {
glog.Warningf("No ref for pod %q - '%v'", containerID, container.Name)
}
if err != nil {
glog.V(1).Infof("Liveness probe for %q errored: %v", ctrName, err)
if ok {
pb.recorder.Eventf(ref, "unhealthy", "Liveness probe errored: %v", err)
}
return probe.Unknown, err
} else { // live != probe.Success
glog.V(1).Infof("Liveness probe for %q failed (%v): %s", ctrName, live, output)
if ok {
pb.recorder.Eventf(ref, "unhealthy", "Liveness probe failed: %s", output)
}
return live, nil
}
}
glog.V(3).Infof("Liveness probe for %q succeeded", ctrName)
}
// Probe readiness.
ready, err := pb.probeReadiness(pod, status, container, containerID, createdAt)
if err == nil && ready == probe.Success {
glog.V(3).Infof("Readiness probe successful: %v", ready)
pb.readinessManager.SetReadiness(containerID, true)
return probe.Success, nil
}
glog.V(1).Infof("Readiness probe failed/errored: %v, %v", ready, err)
pb.readinessManager.SetReadiness(containerID, false)
ref, ok := pb.refManager.GetRef(containerID)
if !ok {
glog.Warningf("No ref for pod '%v' - '%v'", containerID, container.Name)
return probe.Success, err
}
if ready != probe.Success {
pb.recorder.Eventf(ref, "unhealthy", "Readiness Probe Failed %v - %v", containerID, container.Name)
if container.ReadinessProbe != nil {
ready, output, err := pb.probeReadiness(pod, status, container, containerID, createdAt)
if err != nil || ready != probe.Success {
// Readiness failed in one way or another.
pb.readinessManager.SetReadiness(containerID, false)
ref, ok := pb.refManager.GetRef(containerID)
if !ok {
glog.Warningf("No ref for pod '%v' - '%v'", containerID, container.Name)
}
if err != nil {
glog.V(1).Infof("readiness probe for %q errored: %v", ctrName, err)
if ok {
pb.recorder.Eventf(ref, "unhealthy", "Readiness probe errored: %v", err)
}
return probe.Unknown, err
} else { // ready != probe.Success
glog.V(1).Infof("Readiness probe for %q failed (%v): %s", ctrName, ready, output)
if ok {
pb.recorder.Eventf(ref, "unhealthy", "Readiness probe failed: %s", output)
}
return ready, nil
}
}
glog.V(1).Infof("Readiness probe for %q succeeded", ctrName)
}
pb.readinessManager.SetReadiness(containerID, true)
return probe.Success, nil
}
// probeLiveness probes the liveness of a container.
// If the initalDelay since container creation on liveness probe has not passed the probe will return probe.Success.
func (pb *prober) probeLiveness(pod *api.Pod, status api.PodStatus, container api.Container, containerID string, createdAt int64) (probe.Result, error) {
func (pb *prober) probeLiveness(pod *api.Pod, status api.PodStatus, container api.Container, containerID string, createdAt int64) (probe.Result, string, error) {
p := container.LivenessProbe
if p == nil {
return probe.Success, nil
return probe.Success, "", nil
}
if time.Now().Unix()-createdAt < p.InitialDelaySeconds {
return probe.Success, nil
return probe.Success, "", nil
}
return pb.runProbeWithRetries(p, pod, status, container, containerID, maxProbeRetries)
}
// probeReadiness probes the readiness of a container.
// If the initial delay on the readiness probe has not passed the probe will return probe.Failure.
func (pb *prober) probeReadiness(pod *api.Pod, status api.PodStatus, container api.Container, containerID string, createdAt int64) (probe.Result, error) {
func (pb *prober) probeReadiness(pod *api.Pod, status api.PodStatus, container api.Container, containerID string, createdAt int64) (probe.Result, string, error) {
p := container.ReadinessProbe
if p == nil {
return probe.Success, nil
return probe.Success, "", nil
}
if time.Now().Unix()-createdAt < p.InitialDelaySeconds {
return probe.Failure, nil
return probe.Failure, "", nil
}
return pb.runProbeWithRetries(p, pod, status, container, containerID, maxProbeRetries)
}
// runProbeWithRetries tries to probe the container in a finite loop, it returns the last result
// if it never succeeds.
func (pb *prober) runProbeWithRetries(p *api.Probe, pod *api.Pod, status api.PodStatus, container api.Container, containerID string, retries int) (probe.Result, error) {
func (pb *prober) runProbeWithRetries(p *api.Probe, pod *api.Pod, status api.PodStatus, container api.Container, containerID string, retries int) (probe.Result, string, error) {
var err error
var result probe.Result
var output string
for i := 0; i < retries; i++ {
result, err = pb.runProbe(p, pod, status, container, containerID)
result, output, err = pb.runProbe(p, pod, status, container, containerID)
if result == probe.Success {
return probe.Success, nil
return probe.Success, output, nil
}
}
return result, err
return result, output, err
}
func (pb *prober) runProbe(p *api.Probe, pod *api.Pod, status api.PodStatus, container api.Container, containerID string) (probe.Result, error) {
func (pb *prober) runProbe(p *api.Probe, pod *api.Pod, status api.PodStatus, container api.Container, containerID string) (probe.Result, string, error) {
timeout := time.Duration(p.TimeoutSeconds) * time.Second
if p.Exec != nil {
glog.V(4).Infof("Exec-Probe Pod: %v, Container: %v, Command: %v", pod, container, p.Exec.Command)
@ -178,7 +203,7 @@ func (pb *prober) runProbe(p *api.Probe, pod *api.Pod, status api.PodStatus, con
if p.HTTPGet != nil {
port, err := extractPort(p.HTTPGet.Port, container)
if err != nil {
return probe.Unknown, err
return probe.Unknown, "", err
}
host, port, path := extractGetParams(p.HTTPGet, status, port)
glog.V(4).Infof("HTTP-Probe Host: %v, Port: %v, Path: %v", host, port, path)
@ -187,13 +212,13 @@ func (pb *prober) runProbe(p *api.Probe, pod *api.Pod, status api.PodStatus, con
if p.TCPSocket != nil {
port, err := extractPort(p.TCPSocket.Port, container)
if err != nil {
return probe.Unknown, err
return probe.Unknown, "", err
}
glog.V(4).Infof("TCP-Probe PodIP: %v, Port: %v, Timeout: %v", status.PodIP, port, timeout)
return pb.tcp.Probe(status.PodIP, port, timeout)
}
glog.Warningf("Failed to find probe builder for container: %v", container)
return probe.Unknown, nil
return probe.Unknown, "", nil
}
func extractGetParams(action *api.HTTPGetAction, status api.PodStatus, port int) (string, int, string) {

View File

@ -28,24 +28,24 @@ func New() ExecProber {
}
type ExecProber interface {
Probe(e exec.Cmd) (probe.Result, error)
Probe(e exec.Cmd) (probe.Result, string, error)
}
type execProber struct{}
func (pr execProber) Probe(e exec.Cmd) (probe.Result, error) {
func (pr execProber) Probe(e exec.Cmd) (probe.Result, string, error) {
data, err := e.CombinedOutput()
glog.V(4).Infof("health check response: %s", string(data))
glog.V(4).Infof("Exec probe response: %q", string(data))
if err != nil {
exit, ok := err.(exec.ExitError)
if ok {
if exit.ExitStatus() == 0 {
return probe.Success, nil
return probe.Success, string(data), nil
} else {
return probe.Failure, nil
return probe.Failure, string(data), nil
}
}
return probe.Unknown, err
return probe.Unknown, "", err
}
return probe.Success, nil
return probe.Success, string(data), nil
}

View File

@ -34,13 +34,6 @@ func (f *FakeCmd) CombinedOutput() ([]byte, error) {
func (f *FakeCmd) SetDir(dir string) {}
type healthCheckTest struct {
expectedStatus probe.Result
expectError bool
output []byte
err error
}
type fakeExitError struct {
exited bool
statusCode int
@ -64,30 +57,39 @@ func (f *fakeExitError) ExitStatus() int {
func TestExec(t *testing.T) {
prober := New()
fake := FakeCmd{}
tests := []healthCheckTest{
tests := []struct {
expectedStatus probe.Result
expectError bool
output string
err error
}{
// Ok
{probe.Success, false, []byte("OK"), nil},
{probe.Success, false, "OK", nil},
// Ok
{probe.Success, false, []byte("OK"), &fakeExitError{true, 0}},
{probe.Success, false, "OK", &fakeExitError{true, 0}},
// Run returns error
{probe.Unknown, true, []byte("OK, NOT"), fmt.Errorf("test error")},
{probe.Unknown, true, "", fmt.Errorf("test error")},
// Unhealthy
{probe.Failure, false, []byte("Fail"), &fakeExitError{true, 1}},
{probe.Failure, false, "Fail", &fakeExitError{true, 1}},
}
for _, test := range tests {
fake.out = test.output
fake.err = test.err
status, err := prober.Probe(&fake)
for i, test := range tests {
fake := FakeCmd{
out: []byte(test.output),
err: test.err,
}
status, output, err := prober.Probe(&fake)
if status != test.expectedStatus {
t.Errorf("expected %v, got %v", test.expectedStatus, status)
t.Errorf("[%d] expected %v, got %v", i, test.expectedStatus, status)
}
if err != nil && test.expectError == false {
t.Errorf("unexpected error: %v", err)
t.Errorf("[%d] unexpected error: %v", i, err)
}
if err == nil && test.expectError == true {
t.Errorf("unexpected non-error")
t.Errorf("[%d] unexpected non-error", i)
}
if test.output != output {
t.Errorf("[%d] expected %s, got %s", i, test.output, output)
}
}
}

View File

@ -17,6 +17,7 @@ limitations under the License.
package http
import (
"io/ioutil"
"net"
"net/http"
"net/url"
@ -34,7 +35,7 @@ func New() HTTPProber {
}
type HTTPProber interface {
Probe(host string, port int, path string, timeout time.Duration) (probe.Result, error)
Probe(host string, port int, path string, timeout time.Duration) (probe.Result, string, error)
}
type httpProber struct {
@ -42,7 +43,7 @@ type httpProber struct {
}
// Probe returns a ProbeRunner capable of running an http check.
func (pr httpProber) Probe(host string, port int, path string, timeout time.Duration) (probe.Result, error) {
func (pr httpProber) Probe(host string, port int, path string, timeout time.Duration) (probe.Result, string, error) {
return DoHTTPProbe(formatURL(host, port, path), &http.Client{Timeout: timeout, Transport: pr.transport})
}
@ -54,18 +55,23 @@ type HTTPGetInterface interface {
// If the HTTP response code is successful (i.e. 400 > code >= 200), it returns Success.
// If the HTTP response code is unsuccessful or HTTP communication fails, it returns Failure.
// This is exported because some other packages may want to do direct HTTP probes.
func DoHTTPProbe(url string, client HTTPGetInterface) (probe.Result, error) {
func DoHTTPProbe(url string, client HTTPGetInterface) (probe.Result, string, error) {
res, err := client.Get(url)
if err != nil {
glog.V(1).Infof("HTTP probe error: %v", err)
return probe.Failure, nil
// Convert errors into failures to catch timeouts.
return probe.Failure, err.Error(), nil
}
defer res.Body.Close()
if res.StatusCode >= http.StatusOK && res.StatusCode < http.StatusBadRequest {
return probe.Success, nil
b, err := ioutil.ReadAll(res.Body)
if err != nil {
return probe.Failure, "", err
}
glog.V(1).Infof("Health check failed for %s, Response: %v", url, *res)
return probe.Failure, nil
body := string(b)
if res.StatusCode >= http.StatusOK && res.StatusCode < http.StatusBadRequest {
return probe.Success, body, nil
}
glog.V(4).Infof("Probe failed for %s, Response: %v", url, *res)
return probe.Failure, body, nil
}
// formatURL formats a URL from args. For testability.

View File

@ -22,6 +22,7 @@ import (
"net/http/httptest"
"net/url"
"strconv"
"strings"
"testing"
"time"
@ -47,19 +48,23 @@ func TestFormatURL(t *testing.T) {
}
func TestHTTPProbeChecker(t *testing.T) {
handleReq := func(s int) func(w http.ResponseWriter) {
return func(w http.ResponseWriter) { w.WriteHeader(s) }
handleReq := func(s int, body string) func(w http.ResponseWriter) {
return func(w http.ResponseWriter) {
w.WriteHeader(s)
w.Write([]byte(body))
}
}
prober := New()
testCases := []struct {
handler func(w http.ResponseWriter)
health probe.Result
body string
}{
// The probe will be filled in below. This is primarily testing that an HTTP GET happens.
{handleReq(http.StatusOK), probe.Success},
{handleReq(-1), probe.Failure},
{func(w http.ResponseWriter) { time.Sleep(3 * time.Second) }, probe.Failure},
{handleReq(http.StatusOK, "ok body"), probe.Success, "ok body"},
{handleReq(-1, "fail body"), probe.Failure, "fail body"},
{func(w http.ResponseWriter) { time.Sleep(3 * time.Second) }, probe.Failure, "use of closed network connection"},
}
for _, test := range testCases {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
@ -77,7 +82,7 @@ func TestHTTPProbeChecker(t *testing.T) {
if err != nil {
t.Errorf("Unexpected error: %v", err)
}
health, err := prober.Probe(host, p, "", 1*time.Second)
health, output, err := prober.Probe(host, p, "", 1*time.Second)
if test.health == probe.Unknown && err == nil {
t.Errorf("Expected error")
}
@ -87,5 +92,8 @@ func TestHTTPProbeChecker(t *testing.T) {
if health != test.health {
t.Errorf("Expected %v, got %v", test.health, health)
}
if !strings.Contains(output, test.body) {
t.Errorf("Expected %v, got %v", test.body, output)
}
}
}

View File

@ -31,12 +31,12 @@ func New() TCPProber {
}
type TCPProber interface {
Probe(host string, port int, timeout time.Duration) (probe.Result, error)
Probe(host string, port int, timeout time.Duration) (probe.Result, string, error)
}
type tcpProber struct{}
func (pr tcpProber) Probe(host string, port int, timeout time.Duration) (probe.Result, error) {
func (pr tcpProber) Probe(host string, port int, timeout time.Duration) (probe.Result, string, error) {
return DoTCPProbe(net.JoinHostPort(host, strconv.Itoa(port)), timeout)
}
@ -44,14 +44,15 @@ func (pr tcpProber) Probe(host string, port int, timeout time.Duration) (probe.R
// If the socket can be opened, it returns Success
// If the socket fails to open, it returns Failure.
// This is exported because some other packages may want to do direct TCP probes.
func DoTCPProbe(addr string, timeout time.Duration) (probe.Result, error) {
func DoTCPProbe(addr string, timeout time.Duration) (probe.Result, string, error) {
conn, err := net.DialTimeout("tcp", addr, timeout)
if err != nil {
return probe.Failure, nil
// Convert errors to failures to handle timeouts.
return probe.Failure, err.Error(), nil
}
err = conn.Close()
if err != nil {
glog.Errorf("unexpected error closing health check socket: %v (%#v)", err, err)
glog.Errorf("Unexpected error closing TCP probe socket: %v (%#v)", err, err)
}
return probe.Success, nil
return probe.Success, "", nil
}

View File

@ -22,6 +22,7 @@ import (
"net/http/httptest"
"net/url"
"strconv"
"strings"
"testing"
"time"
@ -34,10 +35,11 @@ func TestTcpHealthChecker(t *testing.T) {
expectedStatus probe.Result
usePort bool
expectError bool
output string
}{
// The probe will be filled in below. This is primarily testing that a connection is made.
{probe.Success, true, false},
{probe.Failure, false, false},
{probe.Success, true, false, ""},
{probe.Failure, false, false, "tcp: unknown port"},
}
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
@ -60,7 +62,7 @@ func TestTcpHealthChecker(t *testing.T) {
if !test.usePort {
p = -1
}
status, err := prober.Probe(host, p, 1*time.Second)
status, output, err := prober.Probe(host, p, 1*time.Second)
if status != test.expectedStatus {
t.Errorf("expected: %v, got: %v", test.expectedStatus, status)
}
@ -70,5 +72,8 @@ func TestTcpHealthChecker(t *testing.T) {
if err == nil && test.expectError {
t.Errorf("unexpected non-error.")
}
if !strings.Contains(output, test.output) {
t.Errorf("expected %s, got %s", test.output, output)
}
}
}