mirror of https://github.com/k3s-io/k3s
Merge pull request #53446 from sjenning/network-plugin-metrics
Automatic merge from submit-queue (batch tested with PRs 53454, 53446, 52935, 53443, 52917). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. kubelet: add latency metrics to network plugin manager This PR adds latency metrics to the network plugin operations, namely `GetPodNetworkStatus()`, `SetUpPod()`, and `TearDownPod()`. I recently had to debug and issue where a PLEG relist hang was occurring due to a hang in a CNI plugin and it would have been really nice to have these. Between the these new metrics and `docker_operations_latency_microseconds`, we will be able to account for nearly all the time consuming routines in the PLEG relist. @derekwaynecarr @smarterclayton @eparis @vishh ```release-note Metrics were added to network plugin to report latency of CNI operations ``` /sig nodepull/6/head
commit
eaaa93c70c
|
@ -15,6 +15,7 @@ go_library(
|
|||
"//pkg/kubelet/apis/kubeletconfig:go_default_library",
|
||||
"//pkg/kubelet/container:go_default_library",
|
||||
"//pkg/kubelet/network/hostport:go_default_library",
|
||||
"//pkg/kubelet/network/metrics:go_default_library",
|
||||
"//pkg/util/sysctl:go_default_library",
|
||||
"//vendor/github.com/golang/glog:go_default_library",
|
||||
"//vendor/k8s.io/api/core/v1:go_default_library",
|
||||
|
@ -42,6 +43,7 @@ filegroup(
|
|||
"//pkg/kubelet/network/hairpin:all-srcs",
|
||||
"//pkg/kubelet/network/hostport:all-srcs",
|
||||
"//pkg/kubelet/network/kubenet:all-srcs",
|
||||
"//pkg/kubelet/network/metrics:all-srcs",
|
||||
"//pkg/kubelet/network/testing:all-srcs",
|
||||
],
|
||||
tags = ["automanaged"],
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = ["metrics.go"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = ["//vendor/github.com/prometheus/client_golang/prometheus:go_default_library"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "package-srcs",
|
||||
srcs = glob(["**"]),
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:private"],
|
||||
)
|
||||
|
||||
filegroup(
|
||||
name = "all-srcs",
|
||||
srcs = [":package-srcs"],
|
||||
tags = ["automanaged"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
const (
|
||||
// NetworkPluginOperationsKey is the key for operation count metrics.
|
||||
NetworkPluginOperationsKey = "network_plugin_operations"
|
||||
// NetworkPluginOperationsLatencyKey is the key for the operation latency metrics.
|
||||
NetworkPluginOperationsLatencyKey = "network_plugin_operations_latency_microseconds"
|
||||
|
||||
// Keep the "kubelet" subsystem for backward compatibility.
|
||||
kubeletSubsystem = "kubelet"
|
||||
)
|
||||
|
||||
var (
|
||||
// NetworkPluginOperationsLatency collects operation latency numbers by operation
|
||||
// type.
|
||||
NetworkPluginOperationsLatency = prometheus.NewSummaryVec(
|
||||
prometheus.SummaryOpts{
|
||||
Subsystem: kubeletSubsystem,
|
||||
Name: NetworkPluginOperationsLatencyKey,
|
||||
Help: "Latency in microseconds of network plugin operations. Broken down by operation type.",
|
||||
},
|
||||
[]string{"operation_type"},
|
||||
)
|
||||
)
|
||||
|
||||
var registerMetrics sync.Once
|
||||
|
||||
// Register all metrics.
|
||||
func Register() {
|
||||
registerMetrics.Do(func() {
|
||||
prometheus.MustRegister(NetworkPluginOperationsLatency)
|
||||
})
|
||||
}
|
||||
|
||||
// SinceInMicroseconds gets the time since the specified start in microseconds.
|
||||
func SinceInMicroseconds(start time.Time) float64 {
|
||||
return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds())
|
||||
}
|
|
@ -21,6 +21,7 @@ import (
|
|||
"net"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"k8s.io/api/core/v1"
|
||||
|
@ -32,6 +33,7 @@ import (
|
|||
"k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
"k8s.io/kubernetes/pkg/kubelet/network/hostport"
|
||||
"k8s.io/kubernetes/pkg/kubelet/network/metrics"
|
||||
utilsysctl "k8s.io/kubernetes/pkg/util/sysctl"
|
||||
utilexec "k8s.io/utils/exec"
|
||||
)
|
||||
|
@ -304,6 +306,7 @@ type PluginManager struct {
|
|||
}
|
||||
|
||||
func NewPluginManager(plugin NetworkPlugin) *PluginManager {
|
||||
metrics.Register()
|
||||
return &PluginManager{
|
||||
plugin: plugin,
|
||||
pods: make(map[string]*podLock),
|
||||
|
@ -371,7 +374,13 @@ func (pm *PluginManager) podUnlock(fullPodName string) {
|
|||
}
|
||||
}
|
||||
|
||||
// recordOperation records operation and duration
|
||||
func recordOperation(operation string, start time.Time) {
|
||||
metrics.NetworkPluginOperationsLatency.WithLabelValues(operation).Observe(metrics.SinceInMicroseconds(start))
|
||||
}
|
||||
|
||||
func (pm *PluginManager) GetPodNetworkStatus(podNamespace, podName string, id kubecontainer.ContainerID) (*PodNetworkStatus, error) {
|
||||
defer recordOperation("get_pod_network_status", time.Now())
|
||||
fullPodName := kubecontainer.BuildPodFullName(podName, podNamespace)
|
||||
pm.podLock(fullPodName).Lock()
|
||||
defer pm.podUnlock(fullPodName)
|
||||
|
@ -385,6 +394,7 @@ func (pm *PluginManager) GetPodNetworkStatus(podNamespace, podName string, id ku
|
|||
}
|
||||
|
||||
func (pm *PluginManager) SetUpPod(podNamespace, podName string, id kubecontainer.ContainerID, annotations map[string]string) error {
|
||||
defer recordOperation("set_up_pod", time.Now())
|
||||
fullPodName := kubecontainer.BuildPodFullName(podName, podNamespace)
|
||||
pm.podLock(fullPodName).Lock()
|
||||
defer pm.podUnlock(fullPodName)
|
||||
|
@ -398,6 +408,7 @@ func (pm *PluginManager) SetUpPod(podNamespace, podName string, id kubecontainer
|
|||
}
|
||||
|
||||
func (pm *PluginManager) TearDownPod(podNamespace, podName string, id kubecontainer.ContainerID) error {
|
||||
defer recordOperation("tear_down_pod", time.Now())
|
||||
fullPodName := kubecontainer.BuildPodFullName(podName, podNamespace)
|
||||
pm.podLock(fullPodName).Lock()
|
||||
defer pm.podUnlock(fullPodName)
|
||||
|
|
Loading…
Reference in New Issue