mirror of https://github.com/k3s-io/k3s
Add instrumented CRI service which is enabled for both grpc and non-grpc
integration.pull/6/head
parent
cfbb0eb2ff
commit
ced5a848f5
|
@ -16,6 +16,7 @@ go_library(
|
|||
"doc.go",
|
||||
"fake_kuberuntime_manager.go",
|
||||
"helpers.go",
|
||||
"instrumented_services.go",
|
||||
"kuberuntime_container.go",
|
||||
"kuberuntime_gc.go",
|
||||
"kuberuntime_image.go",
|
||||
|
@ -38,6 +39,7 @@ go_library(
|
|||
"//pkg/kubelet/events:go_default_library",
|
||||
"//pkg/kubelet/images:go_default_library",
|
||||
"//pkg/kubelet/lifecycle:go_default_library",
|
||||
"//pkg/kubelet/metrics:go_default_library",
|
||||
"//pkg/kubelet/network:go_default_library",
|
||||
"//pkg/kubelet/prober/results:go_default_library",
|
||||
"//pkg/kubelet/qos:go_default_library",
|
||||
|
|
|
@ -0,0 +1,223 @@
|
|||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package kuberuntime
|
||||
|
||||
import (
|
||||
"io"
|
||||
"time"
|
||||
|
||||
internalApi "k8s.io/kubernetes/pkg/kubelet/api"
|
||||
runtimeApi "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/runtime"
|
||||
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
)
|
||||
|
||||
// instrumentedRuntimeService wraps the RuntimeService and records the operations
|
||||
// and errors metrics.
|
||||
type instrumentedRuntimeService struct {
|
||||
service internalApi.RuntimeService
|
||||
}
|
||||
|
||||
// Creates an instrumented RuntimeInterface from an existing RuntimeService.
|
||||
func NewInstrumentedRuntimeService(service internalApi.RuntimeService) internalApi.RuntimeService {
|
||||
return &instrumentedRuntimeService{service: service}
|
||||
}
|
||||
|
||||
// instrumentedImageManagerService wraps the ImageManagerService and records the operations
|
||||
// and errors metrics.
|
||||
type instrumentedImageManagerService struct {
|
||||
service internalApi.ImageManagerService
|
||||
}
|
||||
|
||||
// Creates an instrumented ImageManagerService from an existing ImageManagerService.
|
||||
func NewInstrumentedImageManagerService(service internalApi.ImageManagerService) internalApi.ImageManagerService {
|
||||
return &instrumentedImageManagerService{service: service}
|
||||
}
|
||||
|
||||
// recordOperation records the duration of the operation.
|
||||
func recordOperation(operation string, start time.Time) {
|
||||
metrics.RuntimeOperations.WithLabelValues(operation).Inc()
|
||||
metrics.RuntimeOperationsLatency.WithLabelValues(operation).Observe(metrics.SinceInMicroseconds(start))
|
||||
}
|
||||
|
||||
// recordError records error for metric if an error occurred.
|
||||
func recordError(operation string, err error) {
|
||||
if err != nil {
|
||||
metrics.RuntimeOperationsErrors.WithLabelValues(operation).Inc()
|
||||
}
|
||||
}
|
||||
|
||||
func (in instrumentedRuntimeService) Version(apiVersion string) (*runtimeApi.VersionResponse, error) {
|
||||
const operation = "version"
|
||||
defer recordOperation(operation, time.Now())
|
||||
|
||||
out, err := in.service.Version(apiVersion)
|
||||
recordError(operation, err)
|
||||
return out, err
|
||||
}
|
||||
|
||||
func (in instrumentedRuntimeService) CreateContainer(podSandboxID string, config *runtimeApi.ContainerConfig, sandboxConfig *runtimeApi.PodSandboxConfig) (string, error) {
|
||||
const operation = "create_container"
|
||||
defer recordOperation(operation, time.Now())
|
||||
|
||||
out, err := in.service.CreateContainer(podSandboxID, config, sandboxConfig)
|
||||
recordError(operation, err)
|
||||
return out, err
|
||||
}
|
||||
|
||||
func (in instrumentedRuntimeService) StartContainer(containerID string) error {
|
||||
const operation = "start_container"
|
||||
defer recordOperation(operation, time.Now())
|
||||
|
||||
err := in.service.StartContainer(containerID)
|
||||
recordError(operation, err)
|
||||
return err
|
||||
}
|
||||
|
||||
func (in instrumentedRuntimeService) StopContainer(containerID string, timeout int64) error {
|
||||
const operation = "stop_container"
|
||||
defer recordOperation(operation, time.Now())
|
||||
|
||||
err := in.service.StopContainer(containerID, timeout)
|
||||
recordError(operation, err)
|
||||
return err
|
||||
}
|
||||
|
||||
func (in instrumentedRuntimeService) RemoveContainer(containerID string) error {
|
||||
const operation = "remove_container"
|
||||
defer recordOperation(operation, time.Now())
|
||||
|
||||
err := in.service.RemoveContainer(containerID)
|
||||
recordError(operation, err)
|
||||
return err
|
||||
}
|
||||
|
||||
func (in instrumentedRuntimeService) ListContainers(filter *runtimeApi.ContainerFilter) ([]*runtimeApi.Container, error) {
|
||||
const operation = "list_containers"
|
||||
defer recordOperation(operation, time.Now())
|
||||
|
||||
out, err := in.service.ListContainers(filter)
|
||||
recordError(operation, err)
|
||||
return out, err
|
||||
}
|
||||
|
||||
func (in instrumentedRuntimeService) ContainerStatus(containerID string) (*runtimeApi.ContainerStatus, error) {
|
||||
const operation = "container_status"
|
||||
defer recordOperation(operation, time.Now())
|
||||
|
||||
out, err := in.service.ContainerStatus(containerID)
|
||||
recordError(operation, err)
|
||||
return out, err
|
||||
}
|
||||
|
||||
func (in instrumentedRuntimeService) Exec(containerID string, cmd []string, tty bool, stdin io.Reader, stdout, stderr io.WriteCloser) error {
|
||||
const operation = "exec"
|
||||
defer recordOperation(operation, time.Now())
|
||||
|
||||
err := in.service.Exec(containerID, cmd, tty, stdin, stdout, stderr)
|
||||
recordError(operation, err)
|
||||
return err
|
||||
}
|
||||
|
||||
func (in instrumentedRuntimeService) RunPodSandbox(config *runtimeApi.PodSandboxConfig) (string, error) {
|
||||
const operation = "run_podsandbox"
|
||||
defer recordOperation(operation, time.Now())
|
||||
|
||||
out, err := in.service.RunPodSandbox(config)
|
||||
recordError(operation, err)
|
||||
return out, err
|
||||
}
|
||||
|
||||
func (in instrumentedRuntimeService) StopPodSandbox(podSandboxID string) error {
|
||||
const operation = "stop_podsandbox"
|
||||
defer recordOperation(operation, time.Now())
|
||||
|
||||
err := in.service.StopPodSandbox(podSandboxID)
|
||||
recordError(operation, err)
|
||||
return err
|
||||
}
|
||||
|
||||
func (in instrumentedRuntimeService) RemovePodSandbox(podSandboxID string) error {
|
||||
const operation = "remove_podsandbox"
|
||||
defer recordOperation(operation, time.Now())
|
||||
|
||||
err := in.service.RemovePodSandbox(podSandboxID)
|
||||
recordError(operation, err)
|
||||
return err
|
||||
}
|
||||
|
||||
func (in instrumentedRuntimeService) PodSandboxStatus(podSandboxID string) (*runtimeApi.PodSandboxStatus, error) {
|
||||
const operation = "podsandbox_status"
|
||||
defer recordOperation(operation, time.Now())
|
||||
|
||||
out, err := in.service.PodSandboxStatus(podSandboxID)
|
||||
recordError(operation, err)
|
||||
return out, err
|
||||
}
|
||||
|
||||
func (in instrumentedRuntimeService) ListPodSandbox(filter *runtimeApi.PodSandboxFilter) ([]*runtimeApi.PodSandbox, error) {
|
||||
const operation = "list_podsandbox"
|
||||
defer recordOperation(operation, time.Now())
|
||||
|
||||
out, err := in.service.ListPodSandbox(filter)
|
||||
recordError(operation, err)
|
||||
return out, err
|
||||
}
|
||||
|
||||
func (in instrumentedRuntimeService) UpdateRuntimeConfig(runtimeConfig *runtimeApi.RuntimeConfig) error {
|
||||
const operation = "update_runtime_config"
|
||||
defer recordOperation(operation, time.Now())
|
||||
|
||||
err := in.service.UpdateRuntimeConfig(runtimeConfig)
|
||||
recordError(operation, err)
|
||||
return err
|
||||
}
|
||||
|
||||
func (in instrumentedImageManagerService) ListImages(filter *runtimeApi.ImageFilter) ([]*runtimeApi.Image, error) {
|
||||
const operation = "list_images"
|
||||
defer recordOperation(operation, time.Now())
|
||||
|
||||
out, err := in.service.ListImages(filter)
|
||||
recordError(operation, err)
|
||||
return out, err
|
||||
}
|
||||
|
||||
func (in instrumentedImageManagerService) ImageStatus(image *runtimeApi.ImageSpec) (*runtimeApi.Image, error) {
|
||||
const operation = "image_status"
|
||||
defer recordOperation(operation, time.Now())
|
||||
|
||||
out, err := in.service.ImageStatus(image)
|
||||
recordError(operation, err)
|
||||
return out, err
|
||||
}
|
||||
|
||||
func (in instrumentedImageManagerService) PullImage(image *runtimeApi.ImageSpec, auth *runtimeApi.AuthConfig) error {
|
||||
const operation = "pull_image"
|
||||
defer recordOperation(operation, time.Now())
|
||||
|
||||
err := in.service.PullImage(image, auth)
|
||||
recordError(operation, err)
|
||||
return err
|
||||
}
|
||||
|
||||
func (in instrumentedImageManagerService) RemoveImage(image *runtimeApi.ImageSpec) error {
|
||||
const operation = "remove_image"
|
||||
defer recordOperation(operation, time.Now())
|
||||
|
||||
err := in.service.RemoveImage(image)
|
||||
recordError(operation, err)
|
||||
return err
|
||||
}
|
|
@ -138,8 +138,8 @@ func NewKubeGenericRuntimeManager(
|
|||
osInterface: osInterface,
|
||||
networkPlugin: networkPlugin,
|
||||
runtimeHelper: runtimeHelper,
|
||||
runtimeService: runtimeService,
|
||||
imageService: imageService,
|
||||
runtimeService: NewInstrumentedRuntimeService(runtimeService),
|
||||
imageService: NewInstrumentedImageManagerService(imageService),
|
||||
keyring: credentialprovider.NewDockerKeyring(),
|
||||
}
|
||||
|
||||
|
|
|
@ -39,6 +39,10 @@ const (
|
|||
PodWorkerStartLatencyKey = "pod_worker_start_latency_microseconds"
|
||||
PLEGRelistLatencyKey = "pleg_relist_latency_microseconds"
|
||||
PLEGRelistIntervalKey = "pleg_relist_interval_microseconds"
|
||||
// Metrics keys of remote runtime operations
|
||||
RuntimeOperationsKey = "runtime_operations"
|
||||
RuntimeOperationsLatencyKey = "runtime_operations_latency_microseconds"
|
||||
RuntimeOperationsErrorsKey = "runtime_operations_errors"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -93,6 +97,7 @@ var (
|
|||
Help: "Latency in microseconds from seeing a pod to starting a worker.",
|
||||
},
|
||||
)
|
||||
// TODO(random-liu): Move the following docker metrics into shim once dockertools is deprecated.
|
||||
DockerOperationsLatency = prometheus.NewSummaryVec(
|
||||
prometheus.SummaryOpts{
|
||||
Subsystem: KubeletSubsystem,
|
||||
|
@ -139,6 +144,31 @@ var (
|
|||
Help: "Interval in microseconds between relisting in PLEG.",
|
||||
},
|
||||
)
|
||||
// Metrics of remote runtime operations.
|
||||
RuntimeOperations = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Subsystem: KubeletSubsystem,
|
||||
Name: RuntimeOperationsKey,
|
||||
Help: "Cumulative number of runtime operations by operation type.",
|
||||
},
|
||||
[]string{"operation_type"},
|
||||
)
|
||||
RuntimeOperationsLatency = prometheus.NewSummaryVec(
|
||||
prometheus.SummaryOpts{
|
||||
Subsystem: KubeletSubsystem,
|
||||
Name: RuntimeOperationsLatencyKey,
|
||||
Help: "Latency in microseconds of runtime operations. Broken down by operation type.",
|
||||
},
|
||||
[]string{"operation_type"},
|
||||
)
|
||||
RuntimeOperationsErrors = prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Subsystem: KubeletSubsystem,
|
||||
Name: RuntimeOperationsErrorsKey,
|
||||
Help: "Cumulative number of runtime operation errors by operation type.",
|
||||
},
|
||||
[]string{"operation_type"},
|
||||
)
|
||||
)
|
||||
|
||||
var registerMetrics sync.Once
|
||||
|
@ -161,6 +191,9 @@ func Register(containerCache kubecontainer.RuntimeCache) {
|
|||
prometheus.MustRegister(newPodAndContainerCollector(containerCache))
|
||||
prometheus.MustRegister(PLEGRelistLatency)
|
||||
prometheus.MustRegister(PLEGRelistInterval)
|
||||
prometheus.MustRegister(RuntimeOperations)
|
||||
prometheus.MustRegister(RuntimeOperationsLatency)
|
||||
prometheus.MustRegister(RuntimeOperationsErrors)
|
||||
})
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue