Merge pull request #61369 from rramkumar1/expose-kubelet-health-checks

Automatic merge from submit-queue (batch tested with PRs 61894, 61369). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Expose kubelet health checks using new prometheus endpoint

**What this PR does / why we need it**:
Expose the results of kubelet liveness and readiness probes through a new endpoint on the kubelet called /containerHealth. This endpoint will expose a Prometheus metric. Below is a snippet of output when that endpoint is queried.

```
rramkumar@e2e-test-rramkumar-master ~ $ curl localhost:10255/metrics/probes
# HELP prober_probe_result The result of a liveness or readiness probe for a container.
# TYPE prober_probe_result gauge
prober_probe_result{container_name="kube-apiserver",namespace="kube-system",pod_name="kube-apiserver-e2e-test-rramkumar-master",pod_uid="949e11ad296ad9e3c842fd900f8cc723",probe_type="Liveness"} 0
prober_probe_result{container_name="kube-controller-manager",namespace="kube-system",pod_name="kube-controller-manager-e2e-test-rramkumar-master",pod_uid="0abfc37840bba279706ec39ae53a924c",probe_type="Liveness"} 0
prober_probe_result{container_name="kube-scheduler",namespace="kube-system",pod_name="kube-scheduler-e2e-test-rramkumar-master",pod_uid="0cd4171f9c806808291e6e24f99f0454",probe_type="Liveness"} 0
prober_probe_result{container_name="l7-lb-controller",namespace="kube-system",pod_name="l7-lb-controller-v0.9.8-alpha.2-e2e-test-rramkumar-master",pod_uid="968c792f4c1772566c71403dca2407f9",probe_type="Liveness"} 0
```

**Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*:
Fixes #58235



**Release note**:
```release-note
Kubelet now exposes a new endpoint /metrics/probes which exposes a Prometheus metric containing the liveness and/or readiness probe results for a container.
```
pull/8/head
Kubernetes Submit Queue 2018-03-30 21:16:04 -07:00 committed by GitHub
commit 20f7f37c49
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 49 additions and 0 deletions

View File

@ -26,6 +26,7 @@ go_library(
"//pkg/probe/http:go_default_library",
"//pkg/probe/tcp:go_default_library",
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/intstr:go_default_library",

View File

@ -20,6 +20,7 @@ import (
"sync"
"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
@ -31,6 +32,16 @@ import (
"k8s.io/kubernetes/pkg/kubelet/util/format"
)
// ProberResults stores the results of a probe as prometheus metrics.
var ProberResults = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Subsystem: "prober",
Name: "probe_result",
Help: "The result of a liveness or readiness probe for a container.",
},
[]string{"probe_type", "container_name", "pod_name", "namespace", "pod_uid"},
)
// Manager manages pod probing. It creates a probe "worker" for every container that specifies a
// probe (AddPod). The worker periodically probes its assigned container and caches the results. The
// manager use the cached probe results to set the appropriate Ready state in the PodStatus when

View File

@ -58,6 +58,18 @@ func (r Result) String() string {
}
}
// ToPrometheusType translates a Result to a form which is better understood by prometheus.
func (r Result) ToPrometheusType() float64 {
switch r {
case Success:
return 0
case Failure:
return 1
default:
return -1
}
}
// Update is an enum of the types of updates sent over the Updates channel.
type Update struct {
ContainerID kubecontainer.ContainerID

View File

@ -21,6 +21,7 @@ import (
"time"
"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/runtime"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
@ -65,6 +66,10 @@ type worker struct {
// If set, skip probing.
onHold bool
// proberResultsMetricLabels holds the labels attached to this worker
// for the ProberResults metric.
proberResultsMetricLabels prometheus.Labels
}
// Creates and starts a new probe worker.
@ -93,6 +98,14 @@ func newWorker(
w.initialValue = results.Success
}
w.proberResultsMetricLabels = prometheus.Labels{
"probe_type": w.probeType.String(),
"container_name": w.container.Name,
"pod_name": w.pod.Name,
"namespace": w.pod.Namespace,
"pod_uid": string(w.pod.UID),
}
return w
}
@ -114,6 +127,7 @@ func (w *worker) run() {
}
w.probeManager.removeWorker(w.pod.UID, w.container.Name, w.probeType)
ProberResults.Delete(w.proberResultsMetricLabels)
}()
probeLoop:
@ -218,6 +232,7 @@ func (w *worker) doProbe() (keepGoing bool) {
}
w.resultsManager.Set(w.containerID, result, w.pod)
ProberResults.With(w.proberResultsMetricLabels).Set(result.ToPrometheusType())
if w.probeType == liveness && result == results.Failure {
// The container fails a liveness check, it will need to be restarted.

View File

@ -19,6 +19,7 @@ go_library(
"//pkg/apis/core:go_default_library",
"//pkg/apis/core/v1/validation:go_default_library",
"//pkg/kubelet/container:go_default_library",
"//pkg/kubelet/prober:go_default_library",
"//pkg/kubelet/server/portforward:go_default_library",
"//pkg/kubelet/server/remotecommand:go_default_library",
"//pkg/kubelet/server/stats:go_default_library",

View File

@ -54,6 +54,7 @@ import (
api "k8s.io/kubernetes/pkg/apis/core"
"k8s.io/kubernetes/pkg/apis/core/v1/validation"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/prober"
"k8s.io/kubernetes/pkg/kubelet/server/portforward"
remotecommandserver "k8s.io/kubernetes/pkg/kubelet/server/remotecommand"
"k8s.io/kubernetes/pkg/kubelet/server/stats"
@ -66,6 +67,7 @@ import (
const (
metricsPath = "/metrics"
cadvisorMetricsPath = "/metrics/cadvisor"
proberMetricsPath = "/metrics/probes"
specPath = "/spec/"
statsPath = "/stats/"
logsPath = "/logs/"
@ -281,6 +283,13 @@ func (s *Server) InstallDefaultHandlers() {
promhttp.HandlerFor(r, promhttp.HandlerOpts{ErrorHandling: promhttp.ContinueOnError}),
)
// prober metrics are exposed under a different endpoint
p := prometheus.NewRegistry()
p.MustRegister(prober.ProberResults)
s.restfulCont.Handle(proberMetricsPath,
promhttp.HandlerFor(p, promhttp.HandlerOpts{ErrorHandling: promhttp.ContinueOnError}),
)
ws = new(restful.WebService)
ws.
Path(specPath).