Merge pull request #52319 from yujuhong/docker-metrics

Automatic merge from submit-queue (batch tested with PRs 51067, 52319, 52803, 52961, 51972). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>..

Move prometheus metrics for docker operations into dockershim
pull/6/head
Kubernetes Submit Queue 2017-09-25 14:50:51 -07:00 committed by GitHub
commit 69011d10c2
9 changed files with 134 additions and 47 deletions

View File

@ -44,6 +44,7 @@ go_library(
"//pkg/kubelet/dockershim/cm:go_default_library",
"//pkg/kubelet/dockershim/errors:go_default_library",
"//pkg/kubelet/dockershim/libdocker:go_default_library",
"//pkg/kubelet/dockershim/metrics:go_default_library",
"//pkg/kubelet/leaky:go_default_library",
"//pkg/kubelet/network:go_default_library",
"//pkg/kubelet/network/cni:go_default_library",
@ -135,6 +136,7 @@ filegroup(
"//pkg/kubelet/dockershim/cm:all-srcs",
"//pkg/kubelet/dockershim/errors:all-srcs",
"//pkg/kubelet/dockershim/libdocker:all-srcs",
"//pkg/kubelet/dockershim/metrics:all-srcs",
"//pkg/kubelet/dockershim/remote:all-srcs",
"//pkg/kubelet/dockershim/testing:all-srcs",
],

View File

@ -45,6 +45,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/util/cache"
"k8s.io/kubernetes/pkg/kubelet/dockershim/libdocker"
"k8s.io/kubernetes/pkg/kubelet/dockershim/metrics"
)
const (
@ -219,6 +220,10 @@ func NewDockerService(client libdocker.Interface, podSandboxImage string, stream
},
versionCacheTTL,
)
// Register prometheus metrics.
metrics.Register()
return ds, nil
}

View File

@ -29,7 +29,7 @@ go_library(
"kube_docker_client.go",
],
deps = [
"//pkg/kubelet/metrics:go_default_library",
"//pkg/kubelet/dockershim/metrics:go_default_library",
"//vendor/github.com/docker/distribution/reference:go_default_library",
"//vendor/github.com/docker/docker/api/types:go_default_library",
"//vendor/github.com/docker/docker/api/types/container:go_default_library",

View File

@ -22,7 +22,8 @@ import (
dockertypes "github.com/docker/docker/api/types"
dockercontainer "github.com/docker/docker/api/types/container"
dockerimagetypes "github.com/docker/docker/api/types/image"
"k8s.io/kubernetes/pkg/kubelet/metrics"
"k8s.io/kubernetes/pkg/kubelet/dockershim/metrics"
)
// instrumentedInterface wraps the Interface and records the operations

View File

@ -0,0 +1,22 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = ["metrics.go"],
visibility = ["//visibility:public"],
deps = ["//vendor/github.com/prometheus/client_golang/prometheus:go_default_library"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@ -0,0 +1,96 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"sync"
"time"
"github.com/prometheus/client_golang/prometheus"
)
const (
// DockerOperationsKey is the key for docker operation metrics.
DockerOperationsKey = "docker_operations"
// DockerOperationsLatencyKey is the key for the operation latency metrics.
DockerOperationsLatencyKey = "docker_operations_latency_microseconds"
// DockerOperationsErrorsKey is the key for the operation error metrics.
DockerOperationsErrorsKey = "docker_operations_errors"
// DockerOperationsTimeoutKey is the key for the operation timoeut metrics.
DockerOperationsTimeoutKey = "docker_operations_timeout"
// Keep the "kubelet" subsystem for backward compatibility.
kubeletSubsystem = "kubelet"
)
var (
// DockerOperationsLatency collects operation latency numbers by operation
// type.
DockerOperationsLatency = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Subsystem: kubeletSubsystem,
Name: DockerOperationsLatencyKey,
Help: "Latency in microseconds of Docker operations. Broken down by operation type.",
},
[]string{"operation_type"},
)
// DockerOperations collects operation counts by operation type.
DockerOperations = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: kubeletSubsystem,
Name: DockerOperationsKey,
Help: "Cumulative number of Docker operations by operation type.",
},
[]string{"operation_type"},
)
// DockerOperationsErrors collects operation errors by operation
// type.
DockerOperationsErrors = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: kubeletSubsystem,
Name: DockerOperationsErrorsKey,
Help: "Cumulative number of Docker operation errors by operation type.",
},
[]string{"operation_type"},
)
// DockerOperationsTimeout collects operation timeouts by operation type.
DockerOperationsTimeout = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: kubeletSubsystem,
Name: DockerOperationsTimeoutKey,
Help: "Cumulative number of Docker operation timeout by operation type.",
},
[]string{"operation_type"},
)
)
var registerMetrics sync.Once
// Register all metrics.
func Register() {
registerMetrics.Do(func() {
prometheus.MustRegister(DockerOperationsLatency)
prometheus.MustRegister(DockerOperations)
prometheus.MustRegister(DockerOperationsErrors)
prometheus.MustRegister(DockerOperationsTimeout)
})
}
// SinceInMicroseconds gets the time since the specified start in microseconds.
func SinceInMicroseconds(start time.Time) float64 {
return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds())
}

View File

@ -30,10 +30,6 @@ const (
PodWorkerLatencyKey = "pod_worker_latency_microseconds"
PodStartLatencyKey = "pod_start_latency_microseconds"
CgroupManagerOperationsKey = "cgroup_manager_latency_microseconds"
DockerOperationsLatencyKey = "docker_operations_latency_microseconds"
DockerOperationsKey = "docker_operations"
DockerOperationsErrorsKey = "docker_operations_errors"
DockerOperationsTimeoutKey = "docker_operations_timeout"
PodWorkerStartLatencyKey = "pod_worker_start_latency_microseconds"
PLEGRelistLatencyKey = "pleg_relist_latency_microseconds"
PLEGRelistIntervalKey = "pleg_relist_interval_microseconds"
@ -88,39 +84,6 @@ var (
Help: "Latency in microseconds from seeing a pod to starting a worker.",
},
)
// TODO(random-liu): Move the following docker metrics into shim once dockertools is deprecated.
DockerOperationsLatency = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Subsystem: KubeletSubsystem,
Name: DockerOperationsLatencyKey,
Help: "Latency in microseconds of Docker operations. Broken down by operation type.",
},
[]string{"operation_type"},
)
DockerOperations = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: KubeletSubsystem,
Name: DockerOperationsKey,
Help: "Cumulative number of Docker operations by operation type.",
},
[]string{"operation_type"},
)
DockerOperationsErrors = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: KubeletSubsystem,
Name: DockerOperationsErrorsKey,
Help: "Cumulative number of Docker operation errors by operation type.",
},
[]string{"operation_type"},
)
DockerOperationsTimeout = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: KubeletSubsystem,
Name: DockerOperationsTimeoutKey,
Help: "Cumulative number of Docker operation timeout by operation type.",
},
[]string{"operation_type"},
)
PLEGRelistLatency = prometheus.NewSummary(
prometheus.SummaryOpts{
Subsystem: KubeletSubsystem,
@ -226,13 +189,9 @@ func Register(containerCache kubecontainer.RuntimeCache) {
registerMetrics.Do(func() {
prometheus.MustRegister(PodWorkerLatency)
prometheus.MustRegister(PodStartLatency)
prometheus.MustRegister(DockerOperationsLatency)
prometheus.MustRegister(CgroupManagerLatency)
prometheus.MustRegister(PodWorkerStartLatency)
prometheus.MustRegister(ContainersPerPodCount)
prometheus.MustRegister(DockerOperations)
prometheus.MustRegister(DockerOperationsErrors)
prometheus.MustRegister(DockerOperationsTimeout)
prometheus.MustRegister(newPodAndContainerCollector(containerCache))
prometheus.MustRegister(PLEGRelistLatency)
prometheus.MustRegister(PLEGRelistInterval)

View File

@ -58,6 +58,7 @@ go_library(
"//pkg/kubectl:go_default_library",
"//pkg/kubelet/apis/kubeletconfig:go_default_library",
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
"//pkg/kubelet/dockershim/metrics:go_default_library",
"//pkg/kubelet/events:go_default_library",
"//pkg/kubelet/metrics:go_default_library",
"//pkg/kubelet/sysctl:go_default_library",

View File

@ -35,6 +35,7 @@ import (
clientset "k8s.io/client-go/kubernetes"
stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
dockermetrics "k8s.io/kubernetes/pkg/kubelet/dockershim/metrics"
kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
"k8s.io/kubernetes/pkg/master/ports"
"k8s.io/kubernetes/test/e2e/framework/metrics"
@ -104,7 +105,7 @@ func GetKubeletLatencyMetrics(ms metrics.KubeletMetrics) KubeletLatencyMetrics {
kubeletmetrics.PodWorkerStartLatencyKey,
kubeletmetrics.PodStartLatencyKey,
kubeletmetrics.CgroupManagerOperationsKey,
kubeletmetrics.DockerOperationsLatencyKey,
dockermetrics.DockerOperationsLatencyKey,
kubeletmetrics.PodWorkerStartLatencyKey,
kubeletmetrics.PLEGRelistLatencyKey,
)
@ -235,9 +236,9 @@ func getNodeRuntimeOperationErrorRate(c clientset.Interface, node string) (NodeR
}
// If no corresponding metrics are found, the returned samples will be empty. Then the following
// loop will be skipped automatically.
allOps := ms[kubeletmetrics.DockerOperationsKey]
errOps := ms[kubeletmetrics.DockerOperationsErrorsKey]
timeoutOps := ms[kubeletmetrics.DockerOperationsTimeoutKey]
allOps := ms[dockermetrics.DockerOperationsKey]
errOps := ms[dockermetrics.DockerOperationsErrorsKey]
timeoutOps := ms[dockermetrics.DockerOperationsTimeoutKey]
for _, sample := range allOps {
operation := string(sample.Metric["operation_type"])
result[operation] = &RuntimeOperationErrorRate{TotalNumber: float64(sample.Value)}