mirror of https://github.com/k3s-io/k3s
proxy: add some useful metrics
This adds some useful metrics around pending changes and last successful sync time. The goal is for administrators to be able to alert on proxies that, for whatever reason, are quite stale. Signed-off-by: Casey Callendrello <cdc@redhat.com>k3s-v1.15.3
parent
a1588cfe34
commit
017f57a6b0
|
@ -18,6 +18,7 @@ go_library(
|
|||
deps = [
|
||||
"//pkg/api/v1/service:go_default_library",
|
||||
"//pkg/proxy/config:go_default_library",
|
||||
"//pkg/proxy/metrics:go_default_library",
|
||||
"//pkg/proxy/util:go_default_library",
|
||||
"//staging/src/k8s.io/api/core/v1:go_default_library",
|
||||
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
|
||||
|
|
|
@ -29,6 +29,7 @@ import (
|
|||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/client-go/tools/record"
|
||||
"k8s.io/kubernetes/pkg/proxy/metrics"
|
||||
utilproxy "k8s.io/kubernetes/pkg/proxy/util"
|
||||
utilnet "k8s.io/utils/net"
|
||||
)
|
||||
|
@ -127,6 +128,7 @@ func (ect *EndpointChangeTracker) Update(previous, current *v1.Endpoints) bool {
|
|||
if endpoints == nil {
|
||||
return false
|
||||
}
|
||||
metrics.EndpointChangesTotal.Inc()
|
||||
namespacedName := types.NamespacedName{Namespace: endpoints.Namespace, Name: endpoints.Name}
|
||||
|
||||
ect.lock.Lock()
|
||||
|
@ -154,6 +156,8 @@ func (ect *EndpointChangeTracker) Update(previous, current *v1.Endpoints) bool {
|
|||
// should be exported.
|
||||
delete(ect.lastChangeTriggerTimes, namespacedName)
|
||||
}
|
||||
|
||||
metrics.EndpointChangesPending.Set(float64(len(ect.items)))
|
||||
return len(ect.items) > 0
|
||||
}
|
||||
|
||||
|
@ -295,6 +299,7 @@ func (em EndpointsMap) apply(changes *EndpointChangeTracker, staleEndpoints *[]S
|
|||
detectStaleConnections(change.previous, change.current, staleEndpoints, staleServiceNames)
|
||||
}
|
||||
changes.items = make(map[types.NamespacedName]*endpointsChange)
|
||||
metrics.EndpointChangesPending.Set(0)
|
||||
for _, lastChangeTriggerTime := range changes.lastChangeTriggerTimes {
|
||||
*lastChangeTriggerTimes = append(*lastChangeTriggerTimes, lastChangeTriggerTime...)
|
||||
}
|
||||
|
|
|
@ -1395,6 +1395,7 @@ func (proxier *Proxier) syncProxyRules() {
|
|||
if proxier.healthzServer != nil {
|
||||
proxier.healthzServer.UpdateTimestamp()
|
||||
}
|
||||
metrics.SyncProxyRulesLastTimestamp.SetToCurrentTime()
|
||||
|
||||
// Update healthchecks. The endpoints list might include services that are
|
||||
// not "OnlyLocal", but the services list will not, and the healthChecker
|
||||
|
|
|
@ -1272,6 +1272,7 @@ func (proxier *Proxier) syncProxyRules() {
|
|||
if proxier.healthzServer != nil {
|
||||
proxier.healthzServer.UpdateTimestamp()
|
||||
}
|
||||
metrics.SyncProxyRulesLastTimestamp.SetToCurrentTime()
|
||||
|
||||
// Update healthchecks. The endpoints list might include services that are
|
||||
// not "OnlyLocal", but the services list will not, and the healthChecker
|
||||
|
|
|
@ -46,6 +46,16 @@ var (
|
|||
},
|
||||
)
|
||||
|
||||
// SyncProxyRulesLastTimestamp is the timestamp proxy rules were last
|
||||
// successfully synced.
|
||||
SyncProxyRulesLastTimestamp = prometheus.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Subsystem: kubeProxySubsystem,
|
||||
Name: "sync_proxy_rules_last_timestamp_seconds",
|
||||
Help: "The last time proxy rules were successfully synced",
|
||||
},
|
||||
)
|
||||
|
||||
// NetworkProgrammingLatency is defined as the time it took to program the network - from the time
|
||||
// the service or pod has changed to the time the change was propagated and the proper kube-proxy
|
||||
// rules were synced. Exported for each endpoints object that were part of the rules sync.
|
||||
|
@ -63,6 +73,46 @@ var (
|
|||
Buckets: prometheus.ExponentialBuckets(0.001, 2, 20),
|
||||
},
|
||||
)
|
||||
|
||||
// EndpointChangesPending is the number of pending endpoint changes that
|
||||
// have not yet been synced to the proxy.
|
||||
EndpointChangesPending = prometheus.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Subsystem: kubeProxySubsystem,
|
||||
Name: "sync_proxy_rules_endpoint_changes_pending",
|
||||
Help: "Pending proxy rules Endpoint changes",
|
||||
},
|
||||
)
|
||||
|
||||
// EndpointChangesTotal is the number of endpoint changes that the proxy
|
||||
// has seen.
|
||||
EndpointChangesTotal = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Subsystem: kubeProxySubsystem,
|
||||
Name: "sync_proxy_rules_endpoint_changes_total",
|
||||
Help: "Cumulative proxy rules Endpoint changes",
|
||||
},
|
||||
)
|
||||
|
||||
// ServiceChangesPending is the number of pending service changes that
|
||||
// have not yet been synced to the proxy.
|
||||
ServiceChangesPending = prometheus.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Subsystem: kubeProxySubsystem,
|
||||
Name: "sync_proxy_rules_service_changes_pending",
|
||||
Help: "Pending proxy rules Service changes",
|
||||
},
|
||||
)
|
||||
|
||||
// ServiceChangesTotal is the number of service changes that the proxy has
|
||||
// seen.
|
||||
ServiceChangesTotal = prometheus.NewCounter(
|
||||
prometheus.CounterOpts{
|
||||
Subsystem: kubeProxySubsystem,
|
||||
Name: "sync_proxy_rules_service_changes_total",
|
||||
Help: "Cumulative proxy rules Service changes",
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
var registerMetricsOnce sync.Once
|
||||
|
@ -72,7 +122,12 @@ func RegisterMetrics() {
|
|||
registerMetricsOnce.Do(func() {
|
||||
prometheus.MustRegister(SyncProxyRulesLatency)
|
||||
prometheus.MustRegister(DeprecatedSyncProxyRulesLatency)
|
||||
prometheus.MustRegister(SyncProxyRulesLastTimestamp)
|
||||
prometheus.MustRegister(NetworkProgrammingLatency)
|
||||
prometheus.MustRegister(EndpointChangesPending)
|
||||
prometheus.MustRegister(EndpointChangesTotal)
|
||||
prometheus.MustRegister(ServiceChangesPending)
|
||||
prometheus.MustRegister(ServiceChangesTotal)
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@ import (
|
|||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"k8s.io/client-go/tools/record"
|
||||
apiservice "k8s.io/kubernetes/pkg/api/v1/service"
|
||||
"k8s.io/kubernetes/pkg/proxy/metrics"
|
||||
utilproxy "k8s.io/kubernetes/pkg/proxy/util"
|
||||
utilnet "k8s.io/utils/net"
|
||||
)
|
||||
|
@ -198,6 +199,7 @@ func (sct *ServiceChangeTracker) Update(previous, current *v1.Service) bool {
|
|||
if svc == nil {
|
||||
return false
|
||||
}
|
||||
metrics.ServiceChangesTotal.Inc()
|
||||
namespacedName := types.NamespacedName{Namespace: svc.Namespace, Name: svc.Name}
|
||||
|
||||
sct.lock.Lock()
|
||||
|
@ -214,6 +216,7 @@ func (sct *ServiceChangeTracker) Update(previous, current *v1.Service) bool {
|
|||
if reflect.DeepEqual(change.previous, change.current) {
|
||||
delete(sct.items, namespacedName)
|
||||
}
|
||||
metrics.ServiceChangesPending.Set(float64(len(sct.items)))
|
||||
return len(sct.items) > 0
|
||||
}
|
||||
|
||||
|
@ -296,6 +299,7 @@ func (sm *ServiceMap) apply(changes *ServiceChangeTracker, UDPStaleClusterIP set
|
|||
}
|
||||
// clear changes after applying them to ServiceMap.
|
||||
changes.items = make(map[types.NamespacedName]*serviceChange)
|
||||
metrics.ServiceChangesPending.Set(0)
|
||||
return
|
||||
}
|
||||
|
||||
|
|
|
@ -43,6 +43,16 @@ var (
|
|||
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
|
||||
},
|
||||
)
|
||||
|
||||
// SyncProxyRulesLastTimestamp is the timestamp proxy rules were last
|
||||
// successfully synced.
|
||||
SyncProxyRulesLastTimestamp = prometheus.NewGauge(
|
||||
prometheus.GaugeOpts{
|
||||
Subsystem: kubeProxySubsystem,
|
||||
Name: "sync_proxy_rules_last_timestamp_seconds",
|
||||
Help: "The last time proxy rules were successfully synced",
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
var registerMetricsOnce sync.Once
|
||||
|
@ -51,6 +61,7 @@ func RegisterMetrics() {
|
|||
registerMetricsOnce.Do(func() {
|
||||
prometheus.MustRegister(SyncProxyRulesLatency)
|
||||
prometheus.MustRegister(DeprecatedSyncProxyRulesLatency)
|
||||
prometheus.MustRegister(SyncProxyRulesLastTimestamp)
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
@ -1197,6 +1197,7 @@ func (proxier *Proxier) syncProxyRules() {
|
|||
if proxier.healthzServer != nil {
|
||||
proxier.healthzServer.UpdateTimestamp()
|
||||
}
|
||||
SyncProxyRulesLastTimestamp.SetToCurrentTime()
|
||||
|
||||
// Update healthchecks. The endpoints list might include services that are
|
||||
// not "OnlyLocal", but the services list will not, and the healthChecker
|
||||
|
|
Loading…
Reference in New Issue