From 017f57a6b0cc052cfb041bf653f1ba984c6ec26b Mon Sep 17 00:00:00 2001 From: Casey Callendrello Date: Wed, 13 Feb 2019 18:48:45 +0100 Subject: [PATCH] proxy: add some useful metrics This adds some useful metrics around pending changes and last successful sync time. The goal is for administrators to be able to alert on proxies that, for whatever reason, are quite stale. Signed-off-by: Casey Callendrello --- pkg/proxy/BUILD | 1 + pkg/proxy/endpoints.go | 5 ++++ pkg/proxy/iptables/proxier.go | 1 + pkg/proxy/ipvs/proxier.go | 1 + pkg/proxy/metrics/metrics.go | 55 ++++++++++++++++++++++++++++++++++ pkg/proxy/service.go | 4 +++ pkg/proxy/winkernel/metrics.go | 11 +++++++ pkg/proxy/winkernel/proxier.go | 1 + 8 files changed, 79 insertions(+) diff --git a/pkg/proxy/BUILD b/pkg/proxy/BUILD index b43afc322c..8c69b95835 100644 --- a/pkg/proxy/BUILD +++ b/pkg/proxy/BUILD @@ -18,6 +18,7 @@ go_library( deps = [ "//pkg/api/v1/service:go_default_library", "//pkg/proxy/config:go_default_library", + "//pkg/proxy/metrics:go_default_library", "//pkg/proxy/util:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/types:go_default_library", diff --git a/pkg/proxy/endpoints.go b/pkg/proxy/endpoints.go index 713f7e17eb..d81b2cab9e 100644 --- a/pkg/proxy/endpoints.go +++ b/pkg/proxy/endpoints.go @@ -29,6 +29,7 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/tools/record" + "k8s.io/kubernetes/pkg/proxy/metrics" utilproxy "k8s.io/kubernetes/pkg/proxy/util" utilnet "k8s.io/utils/net" ) @@ -127,6 +128,7 @@ func (ect *EndpointChangeTracker) Update(previous, current *v1.Endpoints) bool { if endpoints == nil { return false } + metrics.EndpointChangesTotal.Inc() namespacedName := types.NamespacedName{Namespace: endpoints.Namespace, Name: endpoints.Name} ect.lock.Lock() @@ -154,6 +156,8 @@ func (ect *EndpointChangeTracker) Update(previous, current *v1.Endpoints) bool { // should be exported. delete(ect.lastChangeTriggerTimes, namespacedName) } + + metrics.EndpointChangesPending.Set(float64(len(ect.items))) return len(ect.items) > 0 } @@ -295,6 +299,7 @@ func (em EndpointsMap) apply(changes *EndpointChangeTracker, staleEndpoints *[]S detectStaleConnections(change.previous, change.current, staleEndpoints, staleServiceNames) } changes.items = make(map[types.NamespacedName]*endpointsChange) + metrics.EndpointChangesPending.Set(0) for _, lastChangeTriggerTime := range changes.lastChangeTriggerTimes { *lastChangeTriggerTimes = append(*lastChangeTriggerTimes, lastChangeTriggerTime...) } diff --git a/pkg/proxy/iptables/proxier.go b/pkg/proxy/iptables/proxier.go index e4b13b17c9..4d380f3f20 100644 --- a/pkg/proxy/iptables/proxier.go +++ b/pkg/proxy/iptables/proxier.go @@ -1395,6 +1395,7 @@ func (proxier *Proxier) syncProxyRules() { if proxier.healthzServer != nil { proxier.healthzServer.UpdateTimestamp() } + metrics.SyncProxyRulesLastTimestamp.SetToCurrentTime() // Update healthchecks. The endpoints list might include services that are // not "OnlyLocal", but the services list will not, and the healthChecker diff --git a/pkg/proxy/ipvs/proxier.go b/pkg/proxy/ipvs/proxier.go index 02452930bb..433cc5d9f5 100644 --- a/pkg/proxy/ipvs/proxier.go +++ b/pkg/proxy/ipvs/proxier.go @@ -1272,6 +1272,7 @@ func (proxier *Proxier) syncProxyRules() { if proxier.healthzServer != nil { proxier.healthzServer.UpdateTimestamp() } + metrics.SyncProxyRulesLastTimestamp.SetToCurrentTime() // Update healthchecks. The endpoints list might include services that are // not "OnlyLocal", but the services list will not, and the healthChecker diff --git a/pkg/proxy/metrics/metrics.go b/pkg/proxy/metrics/metrics.go index 38924387ff..54d7f0a4ca 100644 --- a/pkg/proxy/metrics/metrics.go +++ b/pkg/proxy/metrics/metrics.go @@ -46,6 +46,16 @@ var ( }, ) + // SyncProxyRulesLastTimestamp is the timestamp proxy rules were last + // successfully synced. + SyncProxyRulesLastTimestamp = prometheus.NewGauge( + prometheus.GaugeOpts{ + Subsystem: kubeProxySubsystem, + Name: "sync_proxy_rules_last_timestamp_seconds", + Help: "The last time proxy rules were successfully synced", + }, + ) + // NetworkProgrammingLatency is defined as the time it took to program the network - from the time // the service or pod has changed to the time the change was propagated and the proper kube-proxy // rules were synced. Exported for each endpoints object that were part of the rules sync. @@ -63,6 +73,46 @@ var ( Buckets: prometheus.ExponentialBuckets(0.001, 2, 20), }, ) + + // EndpointChangesPending is the number of pending endpoint changes that + // have not yet been synced to the proxy. + EndpointChangesPending = prometheus.NewGauge( + prometheus.GaugeOpts{ + Subsystem: kubeProxySubsystem, + Name: "sync_proxy_rules_endpoint_changes_pending", + Help: "Pending proxy rules Endpoint changes", + }, + ) + + // EndpointChangesTotal is the number of endpoint changes that the proxy + // has seen. + EndpointChangesTotal = prometheus.NewCounter( + prometheus.CounterOpts{ + Subsystem: kubeProxySubsystem, + Name: "sync_proxy_rules_endpoint_changes_total", + Help: "Cumulative proxy rules Endpoint changes", + }, + ) + + // ServiceChangesPending is the number of pending service changes that + // have not yet been synced to the proxy. + ServiceChangesPending = prometheus.NewGauge( + prometheus.GaugeOpts{ + Subsystem: kubeProxySubsystem, + Name: "sync_proxy_rules_service_changes_pending", + Help: "Pending proxy rules Service changes", + }, + ) + + // ServiceChangesTotal is the number of service changes that the proxy has + // seen. + ServiceChangesTotal = prometheus.NewCounter( + prometheus.CounterOpts{ + Subsystem: kubeProxySubsystem, + Name: "sync_proxy_rules_service_changes_total", + Help: "Cumulative proxy rules Service changes", + }, + ) ) var registerMetricsOnce sync.Once @@ -72,7 +122,12 @@ func RegisterMetrics() { registerMetricsOnce.Do(func() { prometheus.MustRegister(SyncProxyRulesLatency) prometheus.MustRegister(DeprecatedSyncProxyRulesLatency) + prometheus.MustRegister(SyncProxyRulesLastTimestamp) prometheus.MustRegister(NetworkProgrammingLatency) + prometheus.MustRegister(EndpointChangesPending) + prometheus.MustRegister(EndpointChangesTotal) + prometheus.MustRegister(ServiceChangesPending) + prometheus.MustRegister(ServiceChangesTotal) }) } diff --git a/pkg/proxy/service.go b/pkg/proxy/service.go index eab394a925..eecc643adb 100644 --- a/pkg/proxy/service.go +++ b/pkg/proxy/service.go @@ -30,6 +30,7 @@ import ( "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/tools/record" apiservice "k8s.io/kubernetes/pkg/api/v1/service" + "k8s.io/kubernetes/pkg/proxy/metrics" utilproxy "k8s.io/kubernetes/pkg/proxy/util" utilnet "k8s.io/utils/net" ) @@ -198,6 +199,7 @@ func (sct *ServiceChangeTracker) Update(previous, current *v1.Service) bool { if svc == nil { return false } + metrics.ServiceChangesTotal.Inc() namespacedName := types.NamespacedName{Namespace: svc.Namespace, Name: svc.Name} sct.lock.Lock() @@ -214,6 +216,7 @@ func (sct *ServiceChangeTracker) Update(previous, current *v1.Service) bool { if reflect.DeepEqual(change.previous, change.current) { delete(sct.items, namespacedName) } + metrics.ServiceChangesPending.Set(float64(len(sct.items))) return len(sct.items) > 0 } @@ -296,6 +299,7 @@ func (sm *ServiceMap) apply(changes *ServiceChangeTracker, UDPStaleClusterIP set } // clear changes after applying them to ServiceMap. changes.items = make(map[types.NamespacedName]*serviceChange) + metrics.ServiceChangesPending.Set(0) return } diff --git a/pkg/proxy/winkernel/metrics.go b/pkg/proxy/winkernel/metrics.go index 61cf962ee0..729cc5e626 100644 --- a/pkg/proxy/winkernel/metrics.go +++ b/pkg/proxy/winkernel/metrics.go @@ -43,6 +43,16 @@ var ( Buckets: prometheus.ExponentialBuckets(1000, 2, 15), }, ) + + // SyncProxyRulesLastTimestamp is the timestamp proxy rules were last + // successfully synced. + SyncProxyRulesLastTimestamp = prometheus.NewGauge( + prometheus.GaugeOpts{ + Subsystem: kubeProxySubsystem, + Name: "sync_proxy_rules_last_timestamp_seconds", + Help: "The last time proxy rules were successfully synced", + }, + ) ) var registerMetricsOnce sync.Once @@ -51,6 +61,7 @@ func RegisterMetrics() { registerMetricsOnce.Do(func() { prometheus.MustRegister(SyncProxyRulesLatency) prometheus.MustRegister(DeprecatedSyncProxyRulesLatency) + prometheus.MustRegister(SyncProxyRulesLastTimestamp) }) } diff --git a/pkg/proxy/winkernel/proxier.go b/pkg/proxy/winkernel/proxier.go index 061b8922c0..4817a4a261 100644 --- a/pkg/proxy/winkernel/proxier.go +++ b/pkg/proxy/winkernel/proxier.go @@ -1197,6 +1197,7 @@ func (proxier *Proxier) syncProxyRules() { if proxier.healthzServer != nil { proxier.healthzServer.UpdateTimestamp() } + SyncProxyRulesLastTimestamp.SetToCurrentTime() // Update healthchecks. The endpoints list might include services that are // not "OnlyLocal", but the services list will not, and the healthChecker