Merge pull request #52237 from smarterclayton/watch_metric

Automatic merge from submit-queue (batch tested with PRs 51824, 50476, 52451, 52009, 52237)

Improve apiserver metrics reporting

Normalize "WATCHLIST" to "WATCH", add "scope" to the other metrics (listing 50k pods is != listing pods in a namespace), and add a new scope "resource" to cover individual resource calls.

This roughly aligns metrics with our ACL model (technically resource scope is GET, but POST to a subresource and POST to a namespace are not the same thing).

```release-note
WATCHLIST calls are now reported as WATCH verbs in prometheus for the apiserver_request_* series.  A new "scope" label is added to all apiserver_request_* values that is either 'cluster', 'resource', or 'namespace' depending on which level the query is performed at.
```
pull/6/head
Kubernetes Submit Queue 2017-09-15 01:08:11 -07:00 committed by GitHub
commit 93ddb7be5f
7 changed files with 57 additions and 42 deletions

View File

@ -96,6 +96,9 @@ func (r *ProxyHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) {
if namespace != "" {
scope = "namespace"
}
if requestInfo.Name != "" {
scope = "resource"
}
ctx = request.WithNamespace(ctx, namespace)
if len(parts) < 2 {

View File

@ -550,25 +550,24 @@ func (a *APIInstaller) registerResourceHandlers(path string, storage rest.Storag
versionedObject = defaultVersionedObject
}
reqScope.Namer = action.Namer
namespaced := ""
requestScope := "cluster"
var namespaced string
var operationSuffix string
if apiResource.Namespaced {
requestScope = "namespace"
namespaced = "Namespaced"
}
operationSuffix := ""
if strings.HasSuffix(action.Path, "/{path:*}") {
requestScope = "resource"
operationSuffix = operationSuffix + "WithPath"
}
if action.AllNamespaces {
requestScope = "cluster"
operationSuffix = operationSuffix + "ForAllNamespaces"
namespaced = ""
}
// This variable is calculated for the purpose of instrumentation.
namespaceScope := "cluster"
if namespaced != "" {
namespaceScope = "namespace"
}
if kubeVerb, found := toDiscoveryKubeVerb[action.Verb]; found {
if len(kubeVerb) != 0 {
kubeVerbs[kubeVerb] = struct{}{}
@ -601,9 +600,9 @@ func (a *APIInstaller) registerResourceHandlers(path string, storage rest.Storag
if needOverride {
// need change the reported verb
handler = metrics.InstrumentRouteFunc(verbOverrider.OverrideMetricsVerb(action.Verb), resource, subresource, namespaceScope, handler)
handler = metrics.InstrumentRouteFunc(verbOverrider.OverrideMetricsVerb(action.Verb), resource, subresource, requestScope, handler)
} else {
handler = metrics.InstrumentRouteFunc(action.Verb, resource, subresource, namespaceScope, handler)
handler = metrics.InstrumentRouteFunc(action.Verb, resource, subresource, requestScope, handler)
}
if a.enableAPIResponseCompression {
@ -637,7 +636,7 @@ func (a *APIInstaller) registerResourceHandlers(path string, storage rest.Storag
if hasSubresource {
doc = "list " + subresource + " of objects of kind " + kind
}
handler := metrics.InstrumentRouteFunc(action.Verb, resource, subresource, namespaceScope, restfulListResource(lister, watcher, reqScope, false, a.minRequestTimeout))
handler := metrics.InstrumentRouteFunc(action.Verb, resource, subresource, requestScope, restfulListResource(lister, watcher, reqScope, false, a.minRequestTimeout))
if a.enableAPIResponseCompression {
handler = genericfilters.RestfulWithCompression(handler, a.group.Context)
}
@ -672,7 +671,7 @@ func (a *APIInstaller) registerResourceHandlers(path string, storage rest.Storag
if hasSubresource {
doc = "replace " + subresource + " of the specified " + kind
}
handler := metrics.InstrumentRouteFunc(action.Verb, resource, subresource, namespaceScope, restfulUpdateResource(updater, reqScope, a.group.Typer, admit))
handler := metrics.InstrumentRouteFunc(action.Verb, resource, subresource, requestScope, restfulUpdateResource(updater, reqScope, a.group.Typer, admit))
route := ws.PUT(action.Path).To(handler).
Doc(doc).
Param(ws.QueryParameter("pretty", "If 'true', then the output is pretty printed.")).
@ -688,7 +687,7 @@ func (a *APIInstaller) registerResourceHandlers(path string, storage rest.Storag
if hasSubresource {
doc = "partially update " + subresource + " of the specified " + kind
}
handler := metrics.InstrumentRouteFunc(action.Verb, resource, subresource, namespaceScope, restfulPatchResource(patcher, reqScope, admit, mapping.ObjectConvertor))
handler := metrics.InstrumentRouteFunc(action.Verb, resource, subresource, requestScope, restfulPatchResource(patcher, reqScope, admit, mapping.ObjectConvertor))
route := ws.PATCH(action.Path).To(handler).
Doc(doc).
Param(ws.QueryParameter("pretty", "If 'true', then the output is pretty printed.")).
@ -707,7 +706,7 @@ func (a *APIInstaller) registerResourceHandlers(path string, storage rest.Storag
} else {
handler = restfulCreateResource(creater, reqScope, a.group.Typer, admit)
}
handler = metrics.InstrumentRouteFunc(action.Verb, resource, subresource, namespaceScope, handler)
handler = metrics.InstrumentRouteFunc(action.Verb, resource, subresource, requestScope, handler)
article := getArticleForNoun(kind, " ")
doc := "create" + article + kind
if hasSubresource {
@ -729,7 +728,7 @@ func (a *APIInstaller) registerResourceHandlers(path string, storage rest.Storag
if hasSubresource {
doc = "delete " + subresource + " of" + article + kind
}
handler := metrics.InstrumentRouteFunc(action.Verb, resource, subresource, namespaceScope, restfulDeleteResource(gracefulDeleter, isGracefulDeleter, reqScope, admit))
handler := metrics.InstrumentRouteFunc(action.Verb, resource, subresource, requestScope, restfulDeleteResource(gracefulDeleter, isGracefulDeleter, reqScope, admit))
route := ws.DELETE(action.Path).To(handler).
Doc(doc).
Param(ws.QueryParameter("pretty", "If 'true', then the output is pretty printed.")).
@ -750,7 +749,7 @@ func (a *APIInstaller) registerResourceHandlers(path string, storage rest.Storag
if hasSubresource {
doc = "delete collection of " + subresource + " of a " + kind
}
handler := metrics.InstrumentRouteFunc(action.Verb, resource, subresource, namespaceScope, restfulDeleteCollection(collectionDeleter, isCollectionDeleter, reqScope, admit))
handler := metrics.InstrumentRouteFunc(action.Verb, resource, subresource, requestScope, restfulDeleteCollection(collectionDeleter, isCollectionDeleter, reqScope, admit))
route := ws.DELETE(action.Path).To(handler).
Doc(doc).
Param(ws.QueryParameter("pretty", "If 'true', then the output is pretty printed.")).
@ -769,7 +768,7 @@ func (a *APIInstaller) registerResourceHandlers(path string, storage rest.Storag
if hasSubresource {
doc = "watch changes to " + subresource + " of an object of kind " + kind
}
handler := metrics.InstrumentRouteFunc(action.Verb, resource, subresource, namespaceScope, restfulListResource(lister, watcher, reqScope, true, a.minRequestTimeout))
handler := metrics.InstrumentRouteFunc(action.Verb, resource, subresource, requestScope, restfulListResource(lister, watcher, reqScope, true, a.minRequestTimeout))
route := ws.GET(action.Path).To(handler).
Doc(doc).
Param(ws.QueryParameter("pretty", "If 'true', then the output is pretty printed.")).
@ -788,7 +787,7 @@ func (a *APIInstaller) registerResourceHandlers(path string, storage rest.Storag
if hasSubresource {
doc = "watch individual changes to a list of " + subresource + " of " + kind
}
handler := metrics.InstrumentRouteFunc(action.Verb, resource, subresource, namespaceScope, restfulListResource(lister, watcher, reqScope, true, a.minRequestTimeout))
handler := metrics.InstrumentRouteFunc(action.Verb, resource, subresource, requestScope, restfulListResource(lister, watcher, reqScope, true, a.minRequestTimeout))
route := ws.GET(action.Path).To(handler).
Doc(doc).
Param(ws.QueryParameter("pretty", "If 'true', then the output is pretty printed.")).
@ -806,20 +805,20 @@ func (a *APIInstaller) registerResourceHandlers(path string, storage rest.Storag
// TODO: DEPRECATED in v1.2.
case "PROXY": // Proxy requests to a resource.
// Accept all methods as per http://issue.k8s.io/3996
routes = append(routes, buildProxyRoute(ws, "GET", a.prefix, action.Path, kind, resource, subresource, namespaced, namespaceScope, hasSubresource, action.Params, proxyHandler, operationSuffix))
routes = append(routes, buildProxyRoute(ws, "PUT", a.prefix, action.Path, kind, resource, subresource, namespaced, namespaceScope, hasSubresource, action.Params, proxyHandler, operationSuffix))
routes = append(routes, buildProxyRoute(ws, "POST", a.prefix, action.Path, kind, resource, subresource, namespaced, namespaceScope, hasSubresource, action.Params, proxyHandler, operationSuffix))
routes = append(routes, buildProxyRoute(ws, "PATCH", a.prefix, action.Path, kind, resource, subresource, namespaced, namespaceScope, hasSubresource, action.Params, proxyHandler, operationSuffix))
routes = append(routes, buildProxyRoute(ws, "DELETE", a.prefix, action.Path, kind, resource, subresource, namespaced, namespaceScope, hasSubresource, action.Params, proxyHandler, operationSuffix))
routes = append(routes, buildProxyRoute(ws, "HEAD", a.prefix, action.Path, kind, resource, subresource, namespaced, namespaceScope, hasSubresource, action.Params, proxyHandler, operationSuffix))
routes = append(routes, buildProxyRoute(ws, "OPTIONS", a.prefix, action.Path, kind, resource, subresource, namespaced, namespaceScope, hasSubresource, action.Params, proxyHandler, operationSuffix))
routes = append(routes, buildProxyRoute(ws, "GET", a.prefix, action.Path, kind, resource, subresource, namespaced, requestScope, hasSubresource, action.Params, proxyHandler, operationSuffix))
routes = append(routes, buildProxyRoute(ws, "PUT", a.prefix, action.Path, kind, resource, subresource, namespaced, requestScope, hasSubresource, action.Params, proxyHandler, operationSuffix))
routes = append(routes, buildProxyRoute(ws, "POST", a.prefix, action.Path, kind, resource, subresource, namespaced, requestScope, hasSubresource, action.Params, proxyHandler, operationSuffix))
routes = append(routes, buildProxyRoute(ws, "PATCH", a.prefix, action.Path, kind, resource, subresource, namespaced, requestScope, hasSubresource, action.Params, proxyHandler, operationSuffix))
routes = append(routes, buildProxyRoute(ws, "DELETE", a.prefix, action.Path, kind, resource, subresource, namespaced, requestScope, hasSubresource, action.Params, proxyHandler, operationSuffix))
routes = append(routes, buildProxyRoute(ws, "HEAD", a.prefix, action.Path, kind, resource, subresource, namespaced, requestScope, hasSubresource, action.Params, proxyHandler, operationSuffix))
routes = append(routes, buildProxyRoute(ws, "OPTIONS", a.prefix, action.Path, kind, resource, subresource, namespaced, requestScope, hasSubresource, action.Params, proxyHandler, operationSuffix))
case "CONNECT":
for _, method := range connecter.ConnectMethods() {
doc := "connect " + method + " requests to " + kind
if hasSubresource {
doc = "connect " + method + " requests to " + subresource + " of " + kind
}
handler := metrics.InstrumentRouteFunc(action.Verb, resource, subresource, namespaceScope, restfulConnectResource(connecter, reqScope, admit, path, hasSubresource))
handler := metrics.InstrumentRouteFunc(action.Verb, resource, subresource, requestScope, restfulConnectResource(connecter, reqScope, admit, path, hasSubresource))
route := ws.Method(method).Path(action.Path).
To(handler).
Doc(doc).
@ -892,7 +891,7 @@ func routeFunction(handler http.Handler) restful.RouteFunction {
}
func buildProxyRoute(ws *restful.WebService,
method, prefix, path, kind, resource, subresource, namespaced, namespaceScope string,
method, prefix, path, kind, resource, subresource, namespaced, requestScope string,
hasSubresource bool,
params []*restful.Parameter,
proxyHandler http.Handler,
@ -901,7 +900,7 @@ func buildProxyRoute(ws *restful.WebService,
if hasSubresource {
doc = "proxy " + method + " requests to " + subresource + " of " + kind
}
handler := metrics.InstrumentRouteFunc("PROXY", resource, subresource, namespaceScope, routeFunction(proxyHandler))
handler := metrics.InstrumentRouteFunc("PROXY", resource, subresource, requestScope, routeFunction(proxyHandler))
proxyRoute := ws.Method(method).Path(path).To(handler).
Doc(doc).
Operation("proxy" + strings.Title(method) + namespaced + kind + strings.Title(subresource) + operationSuffix).

View File

@ -41,7 +41,7 @@ var (
Name: "apiserver_request_count",
Help: "Counter of apiserver requests broken out for each verb, API resource, client, and HTTP response contentType and code.",
},
[]string{"verb", "resource", "subresource", "client", "contentType", "code"},
[]string{"verb", "resource", "subresource", "scope", "client", "contentType", "code"},
)
requestLatencies = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
@ -50,7 +50,7 @@ var (
// Use buckets ranging from 125 ms to 8 seconds.
Buckets: prometheus.ExponentialBuckets(125000, 2.0, 7),
},
[]string{"verb", "resource", "subresource"},
[]string{"verb", "resource", "subresource", "scope"},
)
requestLatenciesSummary = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
@ -59,7 +59,7 @@ var (
// Make the sliding window of 1h.
MaxAge: time.Hour,
},
[]string{"verb", "resource", "subresource"},
[]string{"verb", "resource", "subresource", "scope"},
)
responseSizes = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
@ -85,9 +85,9 @@ func Register() {
// uppercase to be backwards compatible with existing monitoring tooling.
func Monitor(verb, resource, subresource, scope, client, contentType string, httpCode, respSize int, reqStart time.Time) {
elapsed := float64((time.Since(reqStart)) / time.Microsecond)
requestCounter.WithLabelValues(verb, resource, subresource, client, contentType, codeToString(httpCode)).Inc()
requestLatencies.WithLabelValues(verb, resource, subresource).Observe(elapsed)
requestLatenciesSummary.WithLabelValues(verb, resource, subresource).Observe(elapsed)
requestCounter.WithLabelValues(verb, resource, subresource, scope, client, contentType, codeToString(httpCode)).Inc()
requestLatencies.WithLabelValues(verb, resource, subresource, scope).Observe(elapsed)
requestLatenciesSummary.WithLabelValues(verb, resource, subresource, scope).Observe(elapsed)
// We are only interested in response sizes of read requests.
if verb == "GET" || verb == "LIST" {
responseSizes.WithLabelValues(verb, resource, subresource, scope).Observe(float64(respSize))
@ -106,6 +106,10 @@ func MonitorRequest(request *http.Request, verb, resource, subresource, scope, c
}
}
}
// normalize the legacy WATCHLIST to WATCH to ensure users aren't surprised by metrics
if verb == "WATCHLIST" {
reportedVerb = "WATCH"
}
client := cleanUserAgent(utilnet.GetHTTPClient(request))
Monitor(reportedVerb, resource, subresource, scope, client, contentType, httpCode, respSize, reqStart)

View File

@ -112,6 +112,9 @@ func WithMaxInFlightLimit(
if requestInfo.Namespace != "" {
scope = "namespace"
}
if requestInfo.Name != "" {
scope = "resource"
}
if requestInfo.IsResourceRequest {
metrics.MonitorRequest(r, strings.ToUpper(requestInfo.Verb), requestInfo.Resource, requestInfo.Subresource, "", scope, http.StatusTooManyRequests, 0, time.Now())
} else {

View File

@ -61,6 +61,9 @@ func WithTimeoutForNonLongRunningRequests(handler http.Handler, requestContextMa
if requestInfo.Namespace != "" {
scope = "namespace"
}
if requestInfo.Name != "" {
scope = "resource"
}
if requestInfo.IsResourceRequest {
metrics.MonitorRequest(req, strings.ToUpper(requestInfo.Verb), requestInfo.Resource, requestInfo.Subresource, "", scope, http.StatusGatewayTimeout, 0, now)
} else {

View File

@ -230,6 +230,7 @@ type APICall struct {
Resource string `json:"resource"`
Subresource string `json:"subresource"`
Verb string `json:"verb"`
Scope string `json:"scope"`
Latency LatencyMetric `json:"latency"`
Count int `json:"count"`
}
@ -261,14 +262,14 @@ func (a *APIResponsiveness) Less(i, j int) bool {
// Set request latency for a particular quantile in the APICall metric entry (creating one if necessary).
// 0 <= quantile <=1 (e.g. 0.95 is 95%tile, 0.5 is median)
// Only 0.5, 0.9 and 0.99 quantiles are supported.
func (a *APIResponsiveness) addMetricRequestLatency(resource, subresource, verb string, quantile float64, latency time.Duration) {
func (a *APIResponsiveness) addMetricRequestLatency(resource, subresource, verb, scope string, quantile float64, latency time.Duration) {
for i, apicall := range a.APICalls {
if apicall.Resource == resource && apicall.Subresource == subresource && apicall.Verb == verb {
if apicall.Resource == resource && apicall.Subresource == subresource && apicall.Verb == verb && apicall.Scope == scope {
a.APICalls[i] = setQuantileAPICall(apicall, quantile, latency)
return
}
}
apicall := setQuantileAPICall(APICall{Resource: resource, Subresource: subresource, Verb: verb}, quantile, latency)
apicall := setQuantileAPICall(APICall{Resource: resource, Subresource: subresource, Verb: verb, Scope: scope}, quantile, latency)
a.APICalls = append(a.APICalls, apicall)
}
@ -292,14 +293,14 @@ func setQuantile(metric *LatencyMetric, quantile float64, latency time.Duration)
}
// Add request count to the APICall metric entry (creating one if necessary).
func (a *APIResponsiveness) addMetricRequestCount(resource, subresource, verb string, count int) {
func (a *APIResponsiveness) addMetricRequestCount(resource, subresource, verb, scope string, count int) {
for i, apicall := range a.APICalls {
if apicall.Resource == resource && apicall.Subresource == subresource && apicall.Verb == verb {
if apicall.Resource == resource && apicall.Subresource == subresource && apicall.Verb == verb && apicall.Scope == scope {
a.APICalls[i].Count += count
return
}
}
apicall := APICall{Resource: resource, Subresource: subresource, Verb: verb, Count: count}
apicall := APICall{Resource: resource, Subresource: subresource, Verb: verb, Count: count, Scope: scope}
a.APICalls = append(a.APICalls, apicall)
}
@ -332,6 +333,7 @@ func readLatencyMetrics(c clientset.Interface) (*APIResponsiveness, error) {
resource := string(sample.Metric["resource"])
subresource := string(sample.Metric["subresource"])
verb := string(sample.Metric["verb"])
scope := string(sample.Metric["scope"])
if ignoredResources.Has(resource) || ignoredVerbs.Has(verb) {
continue
}
@ -343,10 +345,10 @@ func readLatencyMetrics(c clientset.Interface) (*APIResponsiveness, error) {
if err != nil {
return nil, err
}
a.addMetricRequestLatency(resource, subresource, verb, quantile, time.Duration(int64(latency))*time.Microsecond)
a.addMetricRequestLatency(resource, subresource, verb, scope, quantile, time.Duration(int64(latency))*time.Microsecond)
case "apiserver_request_count":
count := sample.Value
a.addMetricRequestCount(resource, subresource, verb, int(count))
a.addMetricRequestCount(resource, subresource, verb, scope, int(count))
}
}

View File

@ -44,6 +44,7 @@ func ApiCallToPerfData(apicalls *APIResponsiveness) *perftype.PerfData {
"Verb": apicall.Verb,
"Resource": apicall.Resource,
"Subresource": apicall.Subresource,
"Scope": apicall.Scope,
"Count": fmt.Sprintf("%v", apicall.Count),
},
}