proxy: add some useful metrics

This adds some useful metrics around pending changes and last successful
sync time.

The goal is for administrators to be able to alert on proxies that, for
whatever reason, are quite stale.

Signed-off-by: Casey Callendrello <cdc@redhat.com>
k3s-v1.15.3
Casey Callendrello 2019-02-13 18:48:45 +01:00
parent a1588cfe34
commit 017f57a6b0
8 changed files with 79 additions and 0 deletions

View File

@ -18,6 +18,7 @@ go_library(
deps = [
"//pkg/api/v1/service:go_default_library",
"//pkg/proxy/config:go_default_library",
"//pkg/proxy/metrics:go_default_library",
"//pkg/proxy/util:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",

View File

@ -29,6 +29,7 @@ import (
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/tools/record"
"k8s.io/kubernetes/pkg/proxy/metrics"
utilproxy "k8s.io/kubernetes/pkg/proxy/util"
utilnet "k8s.io/utils/net"
)
@ -127,6 +128,7 @@ func (ect *EndpointChangeTracker) Update(previous, current *v1.Endpoints) bool {
if endpoints == nil {
return false
}
metrics.EndpointChangesTotal.Inc()
namespacedName := types.NamespacedName{Namespace: endpoints.Namespace, Name: endpoints.Name}
ect.lock.Lock()
@ -154,6 +156,8 @@ func (ect *EndpointChangeTracker) Update(previous, current *v1.Endpoints) bool {
// should be exported.
delete(ect.lastChangeTriggerTimes, namespacedName)
}
metrics.EndpointChangesPending.Set(float64(len(ect.items)))
return len(ect.items) > 0
}
@ -295,6 +299,7 @@ func (em EndpointsMap) apply(changes *EndpointChangeTracker, staleEndpoints *[]S
detectStaleConnections(change.previous, change.current, staleEndpoints, staleServiceNames)
}
changes.items = make(map[types.NamespacedName]*endpointsChange)
metrics.EndpointChangesPending.Set(0)
for _, lastChangeTriggerTime := range changes.lastChangeTriggerTimes {
*lastChangeTriggerTimes = append(*lastChangeTriggerTimes, lastChangeTriggerTime...)
}

View File

@ -1395,6 +1395,7 @@ func (proxier *Proxier) syncProxyRules() {
if proxier.healthzServer != nil {
proxier.healthzServer.UpdateTimestamp()
}
metrics.SyncProxyRulesLastTimestamp.SetToCurrentTime()
// Update healthchecks. The endpoints list might include services that are
// not "OnlyLocal", but the services list will not, and the healthChecker

View File

@ -1272,6 +1272,7 @@ func (proxier *Proxier) syncProxyRules() {
if proxier.healthzServer != nil {
proxier.healthzServer.UpdateTimestamp()
}
metrics.SyncProxyRulesLastTimestamp.SetToCurrentTime()
// Update healthchecks. The endpoints list might include services that are
// not "OnlyLocal", but the services list will not, and the healthChecker

View File

@ -46,6 +46,16 @@ var (
},
)
// SyncProxyRulesLastTimestamp is the timestamp proxy rules were last
// successfully synced.
SyncProxyRulesLastTimestamp = prometheus.NewGauge(
prometheus.GaugeOpts{
Subsystem: kubeProxySubsystem,
Name: "sync_proxy_rules_last_timestamp_seconds",
Help: "The last time proxy rules were successfully synced",
},
)
// NetworkProgrammingLatency is defined as the time it took to program the network - from the time
// the service or pod has changed to the time the change was propagated and the proper kube-proxy
// rules were synced. Exported for each endpoints object that were part of the rules sync.
@ -63,6 +73,46 @@ var (
Buckets: prometheus.ExponentialBuckets(0.001, 2, 20),
},
)
// EndpointChangesPending is the number of pending endpoint changes that
// have not yet been synced to the proxy.
EndpointChangesPending = prometheus.NewGauge(
prometheus.GaugeOpts{
Subsystem: kubeProxySubsystem,
Name: "sync_proxy_rules_endpoint_changes_pending",
Help: "Pending proxy rules Endpoint changes",
},
)
// EndpointChangesTotal is the number of endpoint changes that the proxy
// has seen.
EndpointChangesTotal = prometheus.NewCounter(
prometheus.CounterOpts{
Subsystem: kubeProxySubsystem,
Name: "sync_proxy_rules_endpoint_changes_total",
Help: "Cumulative proxy rules Endpoint changes",
},
)
// ServiceChangesPending is the number of pending service changes that
// have not yet been synced to the proxy.
ServiceChangesPending = prometheus.NewGauge(
prometheus.GaugeOpts{
Subsystem: kubeProxySubsystem,
Name: "sync_proxy_rules_service_changes_pending",
Help: "Pending proxy rules Service changes",
},
)
// ServiceChangesTotal is the number of service changes that the proxy has
// seen.
ServiceChangesTotal = prometheus.NewCounter(
prometheus.CounterOpts{
Subsystem: kubeProxySubsystem,
Name: "sync_proxy_rules_service_changes_total",
Help: "Cumulative proxy rules Service changes",
},
)
)
var registerMetricsOnce sync.Once
@ -72,7 +122,12 @@ func RegisterMetrics() {
registerMetricsOnce.Do(func() {
prometheus.MustRegister(SyncProxyRulesLatency)
prometheus.MustRegister(DeprecatedSyncProxyRulesLatency)
prometheus.MustRegister(SyncProxyRulesLastTimestamp)
prometheus.MustRegister(NetworkProgrammingLatency)
prometheus.MustRegister(EndpointChangesPending)
prometheus.MustRegister(EndpointChangesTotal)
prometheus.MustRegister(ServiceChangesPending)
prometheus.MustRegister(ServiceChangesTotal)
})
}

View File

@ -30,6 +30,7 @@ import (
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/tools/record"
apiservice "k8s.io/kubernetes/pkg/api/v1/service"
"k8s.io/kubernetes/pkg/proxy/metrics"
utilproxy "k8s.io/kubernetes/pkg/proxy/util"
utilnet "k8s.io/utils/net"
)
@ -198,6 +199,7 @@ func (sct *ServiceChangeTracker) Update(previous, current *v1.Service) bool {
if svc == nil {
return false
}
metrics.ServiceChangesTotal.Inc()
namespacedName := types.NamespacedName{Namespace: svc.Namespace, Name: svc.Name}
sct.lock.Lock()
@ -214,6 +216,7 @@ func (sct *ServiceChangeTracker) Update(previous, current *v1.Service) bool {
if reflect.DeepEqual(change.previous, change.current) {
delete(sct.items, namespacedName)
}
metrics.ServiceChangesPending.Set(float64(len(sct.items)))
return len(sct.items) > 0
}
@ -296,6 +299,7 @@ func (sm *ServiceMap) apply(changes *ServiceChangeTracker, UDPStaleClusterIP set
}
// clear changes after applying them to ServiceMap.
changes.items = make(map[types.NamespacedName]*serviceChange)
metrics.ServiceChangesPending.Set(0)
return
}

View File

@ -43,6 +43,16 @@ var (
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
},
)
// SyncProxyRulesLastTimestamp is the timestamp proxy rules were last
// successfully synced.
SyncProxyRulesLastTimestamp = prometheus.NewGauge(
prometheus.GaugeOpts{
Subsystem: kubeProxySubsystem,
Name: "sync_proxy_rules_last_timestamp_seconds",
Help: "The last time proxy rules were successfully synced",
},
)
)
var registerMetricsOnce sync.Once
@ -51,6 +61,7 @@ func RegisterMetrics() {
registerMetricsOnce.Do(func() {
prometheus.MustRegister(SyncProxyRulesLatency)
prometheus.MustRegister(DeprecatedSyncProxyRulesLatency)
prometheus.MustRegister(SyncProxyRulesLastTimestamp)
})
}

View File

@ -1197,6 +1197,7 @@ func (proxier *Proxier) syncProxyRules() {
if proxier.healthzServer != nil {
proxier.healthzServer.UpdateTimestamp()
}
SyncProxyRulesLastTimestamp.SetToCurrentTime()
// Update healthchecks. The endpoints list might include services that are
// not "OnlyLocal", but the services list will not, and the healthChecker