mirror of https://github.com/k3s-io/k3s
444 lines
13 KiB
Go
444 lines
13 KiB
Go
/*
|
|
Copyright 2015 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package metrics
|
|
|
|
import (
|
|
"bufio"
|
|
"net"
|
|
"net/http"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
utilnet "k8s.io/apimachinery/pkg/util/net"
|
|
"k8s.io/apiserver/pkg/endpoints/request"
|
|
|
|
"github.com/emicklei/go-restful"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
)
|
|
|
|
// resettableCollector is the interface implemented by prometheus.MetricVec
|
|
// that can be used by Prometheus to collect metrics and reset their values.
|
|
type resettableCollector interface {
|
|
prometheus.Collector
|
|
Reset()
|
|
}
|
|
|
|
var (
|
|
// TODO(a-robinson): Add unit tests for the handling of these metrics once
|
|
// the upstream library supports it.
|
|
requestCounter = prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "apiserver_request_count",
|
|
Help: "Counter of apiserver requests broken out for each verb, API resource, client, and HTTP response contentType and code.",
|
|
},
|
|
[]string{"verb", "resource", "subresource", "scope", "client", "contentType", "code"},
|
|
)
|
|
longRunningRequestGauge = prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "apiserver_longrunning_gauge",
|
|
Help: "Gauge of all active long-running apiserver requests broken out by verb, API resource, and scope. Not all requests are tracked this way.",
|
|
},
|
|
[]string{"verb", "resource", "subresource", "scope"},
|
|
)
|
|
requestLatencies = prometheus.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Name: "apiserver_request_latencies",
|
|
Help: "Response latency distribution in microseconds for each verb, resource and subresource.",
|
|
// Use buckets ranging from 125 ms to 8 seconds.
|
|
Buckets: prometheus.ExponentialBuckets(125000, 2.0, 7),
|
|
},
|
|
[]string{"verb", "resource", "subresource", "scope"},
|
|
)
|
|
requestLatenciesSummary = prometheus.NewSummaryVec(
|
|
prometheus.SummaryOpts{
|
|
Name: "apiserver_request_latencies_summary",
|
|
Help: "Response latency summary in microseconds for each verb, resource and subresource.",
|
|
// Make the sliding window of 5h.
|
|
// TODO: The value for this should be based on our SLI definition (medium term).
|
|
MaxAge: 5 * time.Hour,
|
|
},
|
|
[]string{"verb", "resource", "subresource", "scope"},
|
|
)
|
|
responseSizes = prometheus.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Name: "apiserver_response_sizes",
|
|
Help: "Response size distribution in bytes for each verb, resource, subresource and scope (namespace/cluster).",
|
|
// Use buckets ranging from 1000 bytes (1KB) to 10^9 bytes (1GB).
|
|
Buckets: prometheus.ExponentialBuckets(1000, 10.0, 7),
|
|
},
|
|
[]string{"verb", "resource", "subresource", "scope"},
|
|
)
|
|
// DroppedRequests is a number of requests dropped with 'Try again later' response"
|
|
DroppedRequests = prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "apiserver_dropped_requests",
|
|
Help: "Number of requests dropped with 'Try again later' response",
|
|
},
|
|
[]string{"requestKind"},
|
|
)
|
|
// RegisteredWatchers is a number of currently registered watchers splitted by resource.
|
|
RegisteredWatchers = prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "apiserver_registered_watchers",
|
|
Help: "Number of currently registered watchers for a given resources",
|
|
},
|
|
[]string{"group", "version", "kind"},
|
|
)
|
|
// Because of volatality of the base metric this is pre-aggregated one. Instead of reporing current usage all the time
|
|
// it reports maximal usage during the last second.
|
|
currentInflightRequests = prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "apiserver_current_inflight_requests",
|
|
Help: "Maximal mumber of currently used inflight request limit of this apiserver per request kind in last second.",
|
|
},
|
|
[]string{"requestKind"},
|
|
)
|
|
kubectlExeRegexp = regexp.MustCompile(`^.*((?i:kubectl\.exe))`)
|
|
|
|
metrics = []resettableCollector{
|
|
requestCounter,
|
|
longRunningRequestGauge,
|
|
requestLatencies,
|
|
requestLatenciesSummary,
|
|
responseSizes,
|
|
DroppedRequests,
|
|
RegisteredWatchers,
|
|
currentInflightRequests,
|
|
}
|
|
)
|
|
|
|
const (
|
|
// ReadOnlyKind is a string identifying read only request kind
|
|
ReadOnlyKind = "readOnly"
|
|
// MutatingKind is a string identifying mutating request kind
|
|
MutatingKind = "mutating"
|
|
)
|
|
|
|
var registerMetrics sync.Once
|
|
|
|
// Register all metrics.
|
|
func Register() {
|
|
registerMetrics.Do(func() {
|
|
for _, metric := range metrics {
|
|
prometheus.MustRegister(metric)
|
|
}
|
|
})
|
|
}
|
|
|
|
// Reset all metrics.
|
|
func Reset() {
|
|
for _, metric := range metrics {
|
|
metric.Reset()
|
|
}
|
|
}
|
|
|
|
func UpdateInflightRequestMetrics(nonmutating, mutating int) {
|
|
currentInflightRequests.WithLabelValues(ReadOnlyKind).Set(float64(nonmutating))
|
|
currentInflightRequests.WithLabelValues(MutatingKind).Set(float64(mutating))
|
|
}
|
|
|
|
// Record records a single request to the standard metrics endpoints. For use by handlers that perform their own
|
|
// processing. All API paths should use InstrumentRouteFunc implicitly. Use this instead of MonitorRequest if
|
|
// you already have a RequestInfo object.
|
|
func Record(req *http.Request, requestInfo *request.RequestInfo, contentType string, code int, responseSizeInBytes int, elapsed time.Duration) {
|
|
if requestInfo == nil {
|
|
requestInfo = &request.RequestInfo{Verb: req.Method, Path: req.URL.Path}
|
|
}
|
|
scope := CleanScope(requestInfo)
|
|
if requestInfo.IsResourceRequest {
|
|
MonitorRequest(req, strings.ToUpper(requestInfo.Verb), requestInfo.Resource, requestInfo.Subresource, scope, contentType, code, responseSizeInBytes, elapsed)
|
|
} else {
|
|
MonitorRequest(req, strings.ToUpper(requestInfo.Verb), "", requestInfo.Path, scope, contentType, code, responseSizeInBytes, elapsed)
|
|
}
|
|
}
|
|
|
|
// RecordLongRunning tracks the execution of a long running request against the API server. It provides an accurate count
|
|
// of the total number of open long running requests. requestInfo may be nil if the caller is not in the normal request flow.
|
|
func RecordLongRunning(req *http.Request, requestInfo *request.RequestInfo, fn func()) {
|
|
if requestInfo == nil {
|
|
requestInfo = &request.RequestInfo{Verb: req.Method, Path: req.URL.Path}
|
|
}
|
|
var g prometheus.Gauge
|
|
scope := CleanScope(requestInfo)
|
|
reportedVerb := cleanVerb(strings.ToUpper(requestInfo.Verb), req)
|
|
if requestInfo.IsResourceRequest {
|
|
g = longRunningRequestGauge.WithLabelValues(reportedVerb, requestInfo.Resource, requestInfo.Subresource, scope)
|
|
} else {
|
|
g = longRunningRequestGauge.WithLabelValues(reportedVerb, "", requestInfo.Path, scope)
|
|
}
|
|
g.Inc()
|
|
defer g.Dec()
|
|
fn()
|
|
}
|
|
|
|
// MonitorRequest handles standard transformations for client and the reported verb and then invokes Monitor to record
|
|
// a request. verb must be uppercase to be backwards compatible with existing monitoring tooling.
|
|
func MonitorRequest(req *http.Request, verb, resource, subresource, scope, contentType string, httpCode, respSize int, elapsed time.Duration) {
|
|
reportedVerb := cleanVerb(verb, req)
|
|
client := cleanUserAgent(utilnet.GetHTTPClient(req))
|
|
elapsedMicroseconds := float64(elapsed / time.Microsecond)
|
|
requestCounter.WithLabelValues(reportedVerb, resource, subresource, scope, client, contentType, codeToString(httpCode)).Inc()
|
|
requestLatencies.WithLabelValues(reportedVerb, resource, subresource, scope).Observe(elapsedMicroseconds)
|
|
requestLatenciesSummary.WithLabelValues(reportedVerb, resource, subresource, scope).Observe(elapsedMicroseconds)
|
|
// We are only interested in response sizes of read requests.
|
|
if verb == "GET" || verb == "LIST" {
|
|
responseSizes.WithLabelValues(reportedVerb, resource, subresource, scope).Observe(float64(respSize))
|
|
}
|
|
}
|
|
|
|
// InstrumentRouteFunc works like Prometheus' InstrumentHandlerFunc but wraps
|
|
// the go-restful RouteFunction instead of a HandlerFunc plus some Kubernetes endpoint specific information.
|
|
func InstrumentRouteFunc(verb, resource, subresource, scope string, routeFunc restful.RouteFunction) restful.RouteFunction {
|
|
return restful.RouteFunction(func(request *restful.Request, response *restful.Response) {
|
|
now := time.Now()
|
|
|
|
delegate := &ResponseWriterDelegator{ResponseWriter: response.ResponseWriter}
|
|
|
|
_, cn := response.ResponseWriter.(http.CloseNotifier)
|
|
_, fl := response.ResponseWriter.(http.Flusher)
|
|
_, hj := response.ResponseWriter.(http.Hijacker)
|
|
var rw http.ResponseWriter
|
|
if cn && fl && hj {
|
|
rw = &fancyResponseWriterDelegator{delegate}
|
|
} else {
|
|
rw = delegate
|
|
}
|
|
response.ResponseWriter = rw
|
|
|
|
routeFunc(request, response)
|
|
|
|
MonitorRequest(request.Request, verb, resource, subresource, scope, delegate.Header().Get("Content-Type"), delegate.Status(), delegate.ContentLength(), time.Since(now))
|
|
})
|
|
}
|
|
|
|
// InstrumentHandlerFunc works like Prometheus' InstrumentHandlerFunc but adds some Kubernetes endpoint specific information.
|
|
func InstrumentHandlerFunc(verb, resource, subresource, scope string, handler http.HandlerFunc) http.HandlerFunc {
|
|
return func(w http.ResponseWriter, req *http.Request) {
|
|
now := time.Now()
|
|
|
|
delegate := &ResponseWriterDelegator{ResponseWriter: w}
|
|
|
|
_, cn := w.(http.CloseNotifier)
|
|
_, fl := w.(http.Flusher)
|
|
_, hj := w.(http.Hijacker)
|
|
if cn && fl && hj {
|
|
w = &fancyResponseWriterDelegator{delegate}
|
|
} else {
|
|
w = delegate
|
|
}
|
|
|
|
handler(w, req)
|
|
|
|
MonitorRequest(req, verb, resource, subresource, scope, delegate.Header().Get("Content-Type"), delegate.Status(), delegate.ContentLength(), time.Since(now))
|
|
}
|
|
}
|
|
|
|
// CleanScope returns the scope of the request.
|
|
func CleanScope(requestInfo *request.RequestInfo) string {
|
|
if requestInfo.Namespace != "" {
|
|
return "namespace"
|
|
}
|
|
if requestInfo.Name != "" {
|
|
return "resource"
|
|
}
|
|
if requestInfo.IsResourceRequest {
|
|
return "cluster"
|
|
}
|
|
// this is the empty scope
|
|
return ""
|
|
}
|
|
|
|
func cleanVerb(verb string, request *http.Request) string {
|
|
reportedVerb := verb
|
|
if verb == "LIST" {
|
|
// see apimachinery/pkg/runtime/conversion.go Convert_Slice_string_To_bool
|
|
if values := request.URL.Query()["watch"]; len(values) > 0 {
|
|
if value := strings.ToLower(values[0]); value != "0" && value != "false" {
|
|
reportedVerb = "WATCH"
|
|
}
|
|
}
|
|
}
|
|
// normalize the legacy WATCHLIST to WATCH to ensure users aren't surprised by metrics
|
|
if verb == "WATCHLIST" {
|
|
reportedVerb = "WATCH"
|
|
}
|
|
return reportedVerb
|
|
}
|
|
|
|
func cleanUserAgent(ua string) string {
|
|
// We collapse all "web browser"-type user agents into one "browser" to reduce metric cardinality.
|
|
if strings.HasPrefix(ua, "Mozilla/") {
|
|
return "Browser"
|
|
}
|
|
// If an old "kubectl.exe" has passed us its full path, we discard the path portion.
|
|
ua = kubectlExeRegexp.ReplaceAllString(ua, "$1")
|
|
return ua
|
|
}
|
|
|
|
// ResponseWriterDelegator interface wraps http.ResponseWriter to additionally record content-length, status-code, etc.
|
|
type ResponseWriterDelegator struct {
|
|
http.ResponseWriter
|
|
|
|
status int
|
|
written int64
|
|
wroteHeader bool
|
|
}
|
|
|
|
func (r *ResponseWriterDelegator) WriteHeader(code int) {
|
|
r.status = code
|
|
r.wroteHeader = true
|
|
r.ResponseWriter.WriteHeader(code)
|
|
}
|
|
|
|
func (r *ResponseWriterDelegator) Write(b []byte) (int, error) {
|
|
if !r.wroteHeader {
|
|
r.WriteHeader(http.StatusOK)
|
|
}
|
|
n, err := r.ResponseWriter.Write(b)
|
|
r.written += int64(n)
|
|
return n, err
|
|
}
|
|
|
|
func (r *ResponseWriterDelegator) Status() int {
|
|
return r.status
|
|
}
|
|
|
|
func (r *ResponseWriterDelegator) ContentLength() int {
|
|
return int(r.written)
|
|
}
|
|
|
|
type fancyResponseWriterDelegator struct {
|
|
*ResponseWriterDelegator
|
|
}
|
|
|
|
func (f *fancyResponseWriterDelegator) CloseNotify() <-chan bool {
|
|
return f.ResponseWriter.(http.CloseNotifier).CloseNotify()
|
|
}
|
|
|
|
func (f *fancyResponseWriterDelegator) Flush() {
|
|
f.ResponseWriter.(http.Flusher).Flush()
|
|
}
|
|
|
|
func (f *fancyResponseWriterDelegator) Hijack() (net.Conn, *bufio.ReadWriter, error) {
|
|
return f.ResponseWriter.(http.Hijacker).Hijack()
|
|
}
|
|
|
|
// Small optimization over Itoa
|
|
func codeToString(s int) string {
|
|
switch s {
|
|
case 100:
|
|
return "100"
|
|
case 101:
|
|
return "101"
|
|
|
|
case 200:
|
|
return "200"
|
|
case 201:
|
|
return "201"
|
|
case 202:
|
|
return "202"
|
|
case 203:
|
|
return "203"
|
|
case 204:
|
|
return "204"
|
|
case 205:
|
|
return "205"
|
|
case 206:
|
|
return "206"
|
|
|
|
case 300:
|
|
return "300"
|
|
case 301:
|
|
return "301"
|
|
case 302:
|
|
return "302"
|
|
case 304:
|
|
return "304"
|
|
case 305:
|
|
return "305"
|
|
case 307:
|
|
return "307"
|
|
|
|
case 400:
|
|
return "400"
|
|
case 401:
|
|
return "401"
|
|
case 402:
|
|
return "402"
|
|
case 403:
|
|
return "403"
|
|
case 404:
|
|
return "404"
|
|
case 405:
|
|
return "405"
|
|
case 406:
|
|
return "406"
|
|
case 407:
|
|
return "407"
|
|
case 408:
|
|
return "408"
|
|
case 409:
|
|
return "409"
|
|
case 410:
|
|
return "410"
|
|
case 411:
|
|
return "411"
|
|
case 412:
|
|
return "412"
|
|
case 413:
|
|
return "413"
|
|
case 414:
|
|
return "414"
|
|
case 415:
|
|
return "415"
|
|
case 416:
|
|
return "416"
|
|
case 417:
|
|
return "417"
|
|
case 418:
|
|
return "418"
|
|
|
|
case 500:
|
|
return "500"
|
|
case 501:
|
|
return "501"
|
|
case 502:
|
|
return "502"
|
|
case 503:
|
|
return "503"
|
|
case 504:
|
|
return "504"
|
|
case 505:
|
|
return "505"
|
|
|
|
case 428:
|
|
return "428"
|
|
case 429:
|
|
return "429"
|
|
case 431:
|
|
return "431"
|
|
case 511:
|
|
return "511"
|
|
|
|
default:
|
|
return strconv.Itoa(s)
|
|
}
|
|
}
|