mirror of https://github.com/k3s-io/k3s
Merge pull request #61369 from rramkumar1/expose-kubelet-health-checks
Automatic merge from submit-queue (batch tested with PRs 61894, 61369). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Expose kubelet health checks using new prometheus endpoint **What this PR does / why we need it**: Expose the results of kubelet liveness and readiness probes through a new endpoint on the kubelet called /containerHealth. This endpoint will expose a Prometheus metric. Below is a snippet of output when that endpoint is queried. ``` rramkumar@e2e-test-rramkumar-master ~ $ curl localhost:10255/metrics/probes # HELP prober_probe_result The result of a liveness or readiness probe for a container. # TYPE prober_probe_result gauge prober_probe_result{container_name="kube-apiserver",namespace="kube-system",pod_name="kube-apiserver-e2e-test-rramkumar-master",pod_uid="949e11ad296ad9e3c842fd900f8cc723",probe_type="Liveness"} 0 prober_probe_result{container_name="kube-controller-manager",namespace="kube-system",pod_name="kube-controller-manager-e2e-test-rramkumar-master",pod_uid="0abfc37840bba279706ec39ae53a924c",probe_type="Liveness"} 0 prober_probe_result{container_name="kube-scheduler",namespace="kube-system",pod_name="kube-scheduler-e2e-test-rramkumar-master",pod_uid="0cd4171f9c806808291e6e24f99f0454",probe_type="Liveness"} 0 prober_probe_result{container_name="l7-lb-controller",namespace="kube-system",pod_name="l7-lb-controller-v0.9.8-alpha.2-e2e-test-rramkumar-master",pod_uid="968c792f4c1772566c71403dca2407f9",probe_type="Liveness"} 0 ``` **Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*: Fixes #58235 **Release note**: ```release-note Kubelet now exposes a new endpoint /metrics/probes which exposes a Prometheus metric containing the liveness and/or readiness probe results for a container. ```pull/8/head
commit
20f7f37c49
|
@ -26,6 +26,7 @@ go_library(
|
|||
"//pkg/probe/http:go_default_library",
|
||||
"//pkg/probe/tcp:go_default_library",
|
||||
"//vendor/github.com/golang/glog:go_default_library",
|
||||
"//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
|
||||
"//vendor/k8s.io/api/core/v1:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
|
||||
"//vendor/k8s.io/apimachinery/pkg/util/intstr:go_default_library",
|
||||
|
|
|
@ -20,6 +20,7 @@ import (
|
|||
"sync"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
|
@ -31,6 +32,16 @@ import (
|
|||
"k8s.io/kubernetes/pkg/kubelet/util/format"
|
||||
)
|
||||
|
||||
// ProberResults stores the results of a probe as prometheus metrics.
|
||||
var ProberResults = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Subsystem: "prober",
|
||||
Name: "probe_result",
|
||||
Help: "The result of a liveness or readiness probe for a container.",
|
||||
},
|
||||
[]string{"probe_type", "container_name", "pod_name", "namespace", "pod_uid"},
|
||||
)
|
||||
|
||||
// Manager manages pod probing. It creates a probe "worker" for every container that specifies a
|
||||
// probe (AddPod). The worker periodically probes its assigned container and caches the results. The
|
||||
// manager use the cached probe results to set the appropriate Ready state in the PodStatus when
|
||||
|
|
|
@ -58,6 +58,18 @@ func (r Result) String() string {
|
|||
}
|
||||
}
|
||||
|
||||
// ToPrometheusType translates a Result to a form which is better understood by prometheus.
|
||||
func (r Result) ToPrometheusType() float64 {
|
||||
switch r {
|
||||
case Success:
|
||||
return 0
|
||||
case Failure:
|
||||
return 1
|
||||
default:
|
||||
return -1
|
||||
}
|
||||
}
|
||||
|
||||
// Update is an enum of the types of updates sent over the Updates channel.
|
||||
type Update struct {
|
||||
ContainerID kubecontainer.ContainerID
|
||||
|
|
|
@ -21,6 +21,7 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/util/runtime"
|
||||
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
|
||||
|
@ -65,6 +66,10 @@ type worker struct {
|
|||
|
||||
// If set, skip probing.
|
||||
onHold bool
|
||||
|
||||
// proberResultsMetricLabels holds the labels attached to this worker
|
||||
// for the ProberResults metric.
|
||||
proberResultsMetricLabels prometheus.Labels
|
||||
}
|
||||
|
||||
// Creates and starts a new probe worker.
|
||||
|
@ -93,6 +98,14 @@ func newWorker(
|
|||
w.initialValue = results.Success
|
||||
}
|
||||
|
||||
w.proberResultsMetricLabels = prometheus.Labels{
|
||||
"probe_type": w.probeType.String(),
|
||||
"container_name": w.container.Name,
|
||||
"pod_name": w.pod.Name,
|
||||
"namespace": w.pod.Namespace,
|
||||
"pod_uid": string(w.pod.UID),
|
||||
}
|
||||
|
||||
return w
|
||||
}
|
||||
|
||||
|
@ -114,6 +127,7 @@ func (w *worker) run() {
|
|||
}
|
||||
|
||||
w.probeManager.removeWorker(w.pod.UID, w.container.Name, w.probeType)
|
||||
ProberResults.Delete(w.proberResultsMetricLabels)
|
||||
}()
|
||||
|
||||
probeLoop:
|
||||
|
@ -218,6 +232,7 @@ func (w *worker) doProbe() (keepGoing bool) {
|
|||
}
|
||||
|
||||
w.resultsManager.Set(w.containerID, result, w.pod)
|
||||
ProberResults.With(w.proberResultsMetricLabels).Set(result.ToPrometheusType())
|
||||
|
||||
if w.probeType == liveness && result == results.Failure {
|
||||
// The container fails a liveness check, it will need to be restarted.
|
||||
|
|
|
@ -19,6 +19,7 @@ go_library(
|
|||
"//pkg/apis/core:go_default_library",
|
||||
"//pkg/apis/core/v1/validation:go_default_library",
|
||||
"//pkg/kubelet/container:go_default_library",
|
||||
"//pkg/kubelet/prober:go_default_library",
|
||||
"//pkg/kubelet/server/portforward:go_default_library",
|
||||
"//pkg/kubelet/server/remotecommand:go_default_library",
|
||||
"//pkg/kubelet/server/stats:go_default_library",
|
||||
|
|
|
@ -54,6 +54,7 @@ import (
|
|||
api "k8s.io/kubernetes/pkg/apis/core"
|
||||
"k8s.io/kubernetes/pkg/apis/core/v1/validation"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/prober"
|
||||
"k8s.io/kubernetes/pkg/kubelet/server/portforward"
|
||||
remotecommandserver "k8s.io/kubernetes/pkg/kubelet/server/remotecommand"
|
||||
"k8s.io/kubernetes/pkg/kubelet/server/stats"
|
||||
|
@ -66,6 +67,7 @@ import (
|
|||
const (
|
||||
metricsPath = "/metrics"
|
||||
cadvisorMetricsPath = "/metrics/cadvisor"
|
||||
proberMetricsPath = "/metrics/probes"
|
||||
specPath = "/spec/"
|
||||
statsPath = "/stats/"
|
||||
logsPath = "/logs/"
|
||||
|
@ -281,6 +283,13 @@ func (s *Server) InstallDefaultHandlers() {
|
|||
promhttp.HandlerFor(r, promhttp.HandlerOpts{ErrorHandling: promhttp.ContinueOnError}),
|
||||
)
|
||||
|
||||
// prober metrics are exposed under a different endpoint
|
||||
p := prometheus.NewRegistry()
|
||||
p.MustRegister(prober.ProberResults)
|
||||
s.restfulCont.Handle(proberMetricsPath,
|
||||
promhttp.HandlerFor(p, promhttp.HandlerOpts{ErrorHandling: promhttp.ContinueOnError}),
|
||||
)
|
||||
|
||||
ws = new(restful.WebService)
|
||||
ws.
|
||||
Path(specPath).
|
||||
|
|
Loading…
Reference in New Issue