2017-08-18 22:08:44 +00:00
|
|
|
/*
|
|
|
|
Copyright 2017 The Kubernetes Authors.
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package stats
|
|
|
|
|
|
|
|
import (
|
2018-01-30 00:24:03 +00:00
|
|
|
"errors"
|
2017-08-18 22:08:44 +00:00
|
|
|
"fmt"
|
2017-11-01 14:06:01 +00:00
|
|
|
"path"
|
2017-10-26 06:46:49 +00:00
|
|
|
"sort"
|
2017-11-01 14:06:01 +00:00
|
|
|
"strings"
|
2019-02-02 09:14:26 +00:00
|
|
|
"sync"
|
2017-08-31 22:37:04 +00:00
|
|
|
"time"
|
2017-08-18 22:08:44 +00:00
|
|
|
|
2017-08-31 22:37:04 +00:00
|
|
|
cadvisorfs "github.com/google/cadvisor/fs"
|
2019-02-15 04:20:23 +00:00
|
|
|
"k8s.io/klog"
|
|
|
|
|
2017-08-31 22:37:04 +00:00
|
|
|
cadvisorapiv2 "github.com/google/cadvisor/info/v2"
|
|
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
|
|
"k8s.io/apimachinery/pkg/types"
|
2017-08-18 22:08:44 +00:00
|
|
|
internalapi "k8s.io/kubernetes/pkg/kubelet/apis/cri"
|
2018-02-06 22:11:09 +00:00
|
|
|
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/runtime/v1alpha2"
|
2017-08-18 22:08:44 +00:00
|
|
|
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
|
|
|
|
"k8s.io/kubernetes/pkg/kubelet/cadvisor"
|
2018-02-21 03:49:51 +00:00
|
|
|
"k8s.io/kubernetes/pkg/kubelet/kuberuntime"
|
2017-08-18 22:08:44 +00:00
|
|
|
"k8s.io/kubernetes/pkg/kubelet/server/stats"
|
2017-10-26 06:46:49 +00:00
|
|
|
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
|
2017-08-18 22:08:44 +00:00
|
|
|
)
|
|
|
|
|
2019-02-02 09:14:26 +00:00
|
|
|
var (
|
|
|
|
// defaultCachePeriod is the default cache period for each cpuUsage.
|
|
|
|
defaultCachePeriod = 10 * time.Minute
|
|
|
|
)
|
|
|
|
|
2017-08-18 22:08:44 +00:00
|
|
|
// criStatsProvider implements the containerStatsProvider interface by getting
|
|
|
|
// the container stats from CRI.
|
|
|
|
type criStatsProvider struct {
|
|
|
|
// cadvisor is used to get the node root filesystem's stats (such as the
|
|
|
|
// capacity/available bytes/inodes) that will be populated in per container
|
|
|
|
// filesystem stats.
|
|
|
|
cadvisor cadvisor.Interface
|
|
|
|
// resourceAnalyzer is used to get the volume stats of the pods.
|
|
|
|
resourceAnalyzer stats.ResourceAnalyzer
|
|
|
|
// runtimeService is used to get the status and stats of the pods and its
|
|
|
|
// managed containers.
|
|
|
|
runtimeService internalapi.RuntimeService
|
|
|
|
// imageService is used to get the stats of the image filesystem.
|
|
|
|
imageService internalapi.ImageManagerService
|
2018-02-21 03:49:51 +00:00
|
|
|
// logMetrics provides the metrics for container logs
|
|
|
|
logMetricsService LogMetricsService
|
2019-02-02 09:14:26 +00:00
|
|
|
|
|
|
|
// cpuUsageCache caches the cpu usage for containers.
|
|
|
|
cpuUsageCache map[string]*runtimeapi.CpuUsage
|
|
|
|
mutex sync.Mutex
|
2017-08-18 22:08:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// newCRIStatsProvider returns a containerStatsProvider implementation that
|
|
|
|
// provides container stats using CRI.
|
|
|
|
func newCRIStatsProvider(
|
|
|
|
cadvisor cadvisor.Interface,
|
|
|
|
resourceAnalyzer stats.ResourceAnalyzer,
|
|
|
|
runtimeService internalapi.RuntimeService,
|
|
|
|
imageService internalapi.ImageManagerService,
|
2018-02-21 03:49:51 +00:00
|
|
|
logMetricsService LogMetricsService,
|
2017-08-18 22:08:44 +00:00
|
|
|
) containerStatsProvider {
|
|
|
|
return &criStatsProvider{
|
2018-02-21 03:49:51 +00:00
|
|
|
cadvisor: cadvisor,
|
|
|
|
resourceAnalyzer: resourceAnalyzer,
|
|
|
|
runtimeService: runtimeService,
|
|
|
|
imageService: imageService,
|
|
|
|
logMetricsService: logMetricsService,
|
2019-02-02 09:14:26 +00:00
|
|
|
cpuUsageCache: make(map[string]*runtimeapi.CpuUsage),
|
2017-08-18 22:08:44 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-08-31 22:37:04 +00:00
|
|
|
// ListPodStats returns the stats of all the pod-managed containers.
|
2017-08-18 22:08:44 +00:00
|
|
|
func (p *criStatsProvider) ListPodStats() ([]statsapi.PodStats, error) {
|
2017-08-31 22:37:04 +00:00
|
|
|
// Gets node root filesystem information, which will be used to populate
|
|
|
|
// the available and capacity bytes/inodes in container stats.
|
|
|
|
rootFsInfo, err := p.cadvisor.RootFsInfo()
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to get rootFs info: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
containers, err := p.runtimeService.ListContainers(&runtimeapi.ContainerFilter{})
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to list all containers: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Creates pod sandbox map.
|
|
|
|
podSandboxMap := make(map[string]*runtimeapi.PodSandbox)
|
|
|
|
podSandboxes, err := p.runtimeService.ListPodSandbox(&runtimeapi.PodSandboxFilter{})
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to list all pod sandboxes: %v", err)
|
|
|
|
}
|
|
|
|
for _, s := range podSandboxes {
|
|
|
|
podSandboxMap[s.Id] = s
|
|
|
|
}
|
2018-02-07 08:36:20 +00:00
|
|
|
// fsIDtoInfo is a map from filesystem id to its stats. This will be used
|
|
|
|
// as a cache to avoid querying cAdvisor for the filesystem stats with the
|
|
|
|
// same filesystem id many times.
|
|
|
|
fsIDtoInfo := make(map[runtimeapi.FilesystemIdentifier]*cadvisorapiv2.FsInfo)
|
2017-08-31 22:37:04 +00:00
|
|
|
|
|
|
|
// sandboxIDToPodStats is a temporary map from sandbox ID to its pod stats.
|
|
|
|
sandboxIDToPodStats := make(map[string]*statsapi.PodStats)
|
|
|
|
|
2017-09-18 06:41:07 +00:00
|
|
|
resp, err := p.runtimeService.ListContainerStats(&runtimeapi.ContainerStatsFilter{})
|
2017-08-31 22:37:04 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to list all container stats: %v", err)
|
|
|
|
}
|
2017-10-26 06:46:49 +00:00
|
|
|
|
|
|
|
containers = removeTerminatedContainer(containers)
|
|
|
|
// Creates container map.
|
|
|
|
containerMap := make(map[string]*runtimeapi.Container)
|
|
|
|
for _, c := range containers {
|
|
|
|
containerMap[c.Id] = c
|
|
|
|
}
|
|
|
|
|
2018-02-23 22:20:38 +00:00
|
|
|
allInfos, err := getCadvisorContainerInfo(p.cadvisor)
|
2017-11-01 14:06:01 +00:00
|
|
|
if err != nil {
|
2018-02-23 22:20:38 +00:00
|
|
|
return nil, fmt.Errorf("failed to fetch cadvisor stats: %v", err)
|
2017-11-01 14:06:01 +00:00
|
|
|
}
|
2018-02-23 22:20:38 +00:00
|
|
|
caInfos := getCRICadvisorStats(allInfos)
|
2017-11-01 14:06:01 +00:00
|
|
|
|
2017-09-18 06:41:07 +00:00
|
|
|
for _, stats := range resp {
|
2017-08-31 22:37:04 +00:00
|
|
|
containerID := stats.Attributes.Id
|
|
|
|
container, found := containerMap[containerID]
|
|
|
|
if !found {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
podSandboxID := container.PodSandboxId
|
|
|
|
podSandbox, found := podSandboxMap[podSandboxID]
|
|
|
|
if !found {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// Creates the stats of the pod (if not created yet) which the
|
|
|
|
// container belongs to.
|
|
|
|
ps, found := sandboxIDToPodStats[podSandboxID]
|
|
|
|
if !found {
|
2017-11-10 00:48:33 +00:00
|
|
|
ps = buildPodStats(podSandbox)
|
2017-08-31 22:37:04 +00:00
|
|
|
sandboxIDToPodStats[podSandboxID] = ps
|
|
|
|
}
|
2018-05-17 07:28:46 +00:00
|
|
|
|
|
|
|
// Fill available stats for full set of required pod stats
|
2018-02-21 03:49:51 +00:00
|
|
|
cs := p.makeContainerStats(stats, container, &rootFsInfo, fsIDtoInfo, podSandbox.GetMetadata().GetUid())
|
2019-02-15 04:20:23 +00:00
|
|
|
p.addPodNetworkStats(ps, podSandboxID, caInfos, cs)
|
2018-05-17 07:28:46 +00:00
|
|
|
p.addPodCPUMemoryStats(ps, types.UID(podSandbox.Metadata.Uid), allInfos, cs)
|
|
|
|
|
2017-11-01 14:06:01 +00:00
|
|
|
// If cadvisor stats is available for the container, use it to populate
|
|
|
|
// container stats
|
|
|
|
caStats, caFound := caInfos[containerID]
|
|
|
|
if !caFound {
|
2018-11-09 18:49:10 +00:00
|
|
|
klog.V(4).Infof("Unable to find cadvisor stats for %q", containerID)
|
2017-11-01 14:06:01 +00:00
|
|
|
} else {
|
|
|
|
p.addCadvisorContainerStats(cs, &caStats)
|
|
|
|
}
|
|
|
|
ps.Containers = append(ps.Containers, *cs)
|
2017-08-31 22:37:04 +00:00
|
|
|
}
|
2019-02-02 09:14:26 +00:00
|
|
|
// cleanup outdated caches.
|
|
|
|
p.cleanupOutdatedCaches()
|
2017-08-31 22:37:04 +00:00
|
|
|
|
|
|
|
result := make([]statsapi.PodStats, 0, len(sandboxIDToPodStats))
|
|
|
|
for _, s := range sandboxIDToPodStats {
|
2017-11-10 00:48:33 +00:00
|
|
|
p.makePodStorageStats(s, &rootFsInfo)
|
2017-08-31 22:37:04 +00:00
|
|
|
result = append(result, *s)
|
|
|
|
}
|
|
|
|
return result, nil
|
2017-08-18 22:08:44 +00:00
|
|
|
}
|
|
|
|
|
2018-09-19 14:26:00 +00:00
|
|
|
// ListPodCPUAndMemoryStats returns the CPU and Memory stats of all the pod-managed containers.
|
|
|
|
func (p *criStatsProvider) ListPodCPUAndMemoryStats() ([]statsapi.PodStats, error) {
|
|
|
|
containers, err := p.runtimeService.ListContainers(&runtimeapi.ContainerFilter{})
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to list all containers: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Creates pod sandbox map.
|
|
|
|
podSandboxMap := make(map[string]*runtimeapi.PodSandbox)
|
|
|
|
podSandboxes, err := p.runtimeService.ListPodSandbox(&runtimeapi.PodSandboxFilter{})
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to list all pod sandboxes: %v", err)
|
|
|
|
}
|
|
|
|
for _, s := range podSandboxes {
|
|
|
|
podSandboxMap[s.Id] = s
|
|
|
|
}
|
|
|
|
|
|
|
|
// sandboxIDToPodStats is a temporary map from sandbox ID to its pod stats.
|
|
|
|
sandboxIDToPodStats := make(map[string]*statsapi.PodStats)
|
|
|
|
|
|
|
|
resp, err := p.runtimeService.ListContainerStats(&runtimeapi.ContainerStatsFilter{})
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to list all container stats: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
containers = removeTerminatedContainer(containers)
|
|
|
|
// Creates container map.
|
|
|
|
containerMap := make(map[string]*runtimeapi.Container)
|
|
|
|
for _, c := range containers {
|
|
|
|
containerMap[c.Id] = c
|
|
|
|
}
|
|
|
|
|
|
|
|
allInfos, err := getCadvisorContainerInfo(p.cadvisor)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to fetch cadvisor stats: %v", err)
|
|
|
|
}
|
|
|
|
caInfos := getCRICadvisorStats(allInfos)
|
|
|
|
|
|
|
|
for _, stats := range resp {
|
|
|
|
containerID := stats.Attributes.Id
|
|
|
|
container, found := containerMap[containerID]
|
|
|
|
if !found {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
podSandboxID := container.PodSandboxId
|
|
|
|
podSandbox, found := podSandboxMap[podSandboxID]
|
|
|
|
if !found {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// Creates the stats of the pod (if not created yet) which the
|
|
|
|
// container belongs to.
|
|
|
|
ps, found := sandboxIDToPodStats[podSandboxID]
|
|
|
|
if !found {
|
|
|
|
ps = buildPodStats(podSandbox)
|
|
|
|
sandboxIDToPodStats[podSandboxID] = ps
|
|
|
|
}
|
|
|
|
|
|
|
|
// Fill available CPU and memory stats for full set of required pod stats
|
|
|
|
cs := p.makeContainerCPUAndMemoryStats(stats, container)
|
|
|
|
p.addPodCPUMemoryStats(ps, types.UID(podSandbox.Metadata.Uid), allInfos, cs)
|
|
|
|
|
|
|
|
// If cadvisor stats is available for the container, use it to populate
|
|
|
|
// container stats
|
|
|
|
caStats, caFound := caInfos[containerID]
|
|
|
|
if !caFound {
|
2018-11-09 18:49:10 +00:00
|
|
|
klog.V(4).Infof("Unable to find cadvisor stats for %q", containerID)
|
2018-09-19 14:26:00 +00:00
|
|
|
} else {
|
|
|
|
p.addCadvisorContainerStats(cs, &caStats)
|
|
|
|
}
|
|
|
|
ps.Containers = append(ps.Containers, *cs)
|
|
|
|
}
|
2019-02-02 09:14:26 +00:00
|
|
|
// cleanup outdated caches.
|
|
|
|
p.cleanupOutdatedCaches()
|
2018-09-19 14:26:00 +00:00
|
|
|
|
|
|
|
result := make([]statsapi.PodStats, 0, len(sandboxIDToPodStats))
|
|
|
|
for _, s := range sandboxIDToPodStats {
|
|
|
|
result = append(result, *s)
|
|
|
|
}
|
|
|
|
return result, nil
|
|
|
|
}
|
|
|
|
|
2017-08-31 22:37:04 +00:00
|
|
|
// ImageFsStats returns the stats of the image filesystem.
|
2017-08-18 22:08:44 +00:00
|
|
|
func (p *criStatsProvider) ImageFsStats() (*statsapi.FsStats, error) {
|
2017-09-18 06:41:07 +00:00
|
|
|
resp, err := p.imageService.ImageFsInfo()
|
2017-08-31 22:37:04 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// CRI may return the stats of multiple image filesystems but we only
|
|
|
|
// return the first one.
|
|
|
|
//
|
|
|
|
// TODO(yguo0905): Support returning stats of multiple image filesystems.
|
2017-09-18 06:41:07 +00:00
|
|
|
for _, fs := range resp {
|
2017-08-31 22:37:04 +00:00
|
|
|
s := &statsapi.FsStats{
|
2018-09-03 07:16:35 +00:00
|
|
|
Time: metav1.NewTime(time.Unix(0, fs.Timestamp)),
|
|
|
|
UsedBytes: &fs.UsedBytes.Value,
|
|
|
|
}
|
|
|
|
if fs.InodesUsed != nil {
|
|
|
|
s.InodesUsed = &fs.InodesUsed.Value
|
2017-08-31 22:37:04 +00:00
|
|
|
}
|
2018-02-07 08:36:20 +00:00
|
|
|
imageFsInfo := p.getFsInfo(fs.GetFsId())
|
2017-08-31 22:37:04 +00:00
|
|
|
if imageFsInfo != nil {
|
2018-02-07 08:36:20 +00:00
|
|
|
// The image filesystem id is unknown to the local node or there's
|
|
|
|
// an error on retrieving the stats. In these cases, we omit those
|
|
|
|
// stats and return the best-effort partial result. See
|
2017-08-31 22:37:04 +00:00
|
|
|
// https://github.com/kubernetes/heapster/issues/1793.
|
|
|
|
s.AvailableBytes = &imageFsInfo.Available
|
|
|
|
s.CapacityBytes = &imageFsInfo.Capacity
|
|
|
|
s.InodesFree = imageFsInfo.InodesFree
|
|
|
|
s.Inodes = imageFsInfo.Inodes
|
|
|
|
}
|
|
|
|
return s, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil, fmt.Errorf("imageFs information is unavailable")
|
|
|
|
}
|
|
|
|
|
2018-01-30 00:24:03 +00:00
|
|
|
// ImageFsDevice returns name of the device where the image filesystem locates,
|
|
|
|
// e.g. /dev/sda1.
|
|
|
|
func (p *criStatsProvider) ImageFsDevice() (string, error) {
|
|
|
|
resp, err := p.imageService.ImageFsInfo()
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
for _, fs := range resp {
|
2018-02-07 08:36:20 +00:00
|
|
|
fsInfo := p.getFsInfo(fs.GetFsId())
|
2018-01-30 00:24:03 +00:00
|
|
|
if fsInfo != nil {
|
|
|
|
return fsInfo.Device, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return "", errors.New("imagefs device is not found")
|
|
|
|
}
|
|
|
|
|
2017-08-31 22:37:04 +00:00
|
|
|
// getFsInfo returns the information of the filesystem with the specified
|
2018-02-07 08:36:20 +00:00
|
|
|
// fsID. If any error occurs, this function logs the error and returns
|
2017-08-31 22:37:04 +00:00
|
|
|
// nil.
|
2018-02-07 08:36:20 +00:00
|
|
|
func (p *criStatsProvider) getFsInfo(fsID *runtimeapi.FilesystemIdentifier) *cadvisorapiv2.FsInfo {
|
|
|
|
if fsID == nil {
|
2018-11-09 18:49:10 +00:00
|
|
|
klog.V(2).Infof("Failed to get filesystem info: fsID is nil.")
|
2017-08-31 22:37:04 +00:00
|
|
|
return nil
|
|
|
|
}
|
2018-02-07 08:36:20 +00:00
|
|
|
mountpoint := fsID.GetMountpoint()
|
|
|
|
fsInfo, err := p.cadvisor.GetDirFsInfo(mountpoint)
|
2017-08-31 22:37:04 +00:00
|
|
|
if err != nil {
|
2018-02-07 08:36:20 +00:00
|
|
|
msg := fmt.Sprintf("Failed to get the info of the filesystem with mountpoint %q: %v.", mountpoint, err)
|
2017-08-31 22:37:04 +00:00
|
|
|
if err == cadvisorfs.ErrNoSuchDevice {
|
2018-11-09 18:49:10 +00:00
|
|
|
klog.V(2).Info(msg)
|
2017-08-31 22:37:04 +00:00
|
|
|
} else {
|
2018-11-09 18:49:10 +00:00
|
|
|
klog.Error(msg)
|
2017-08-31 22:37:04 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
return &fsInfo
|
|
|
|
}
|
|
|
|
|
2017-11-01 14:06:01 +00:00
|
|
|
// buildPodStats returns a PodStats that identifies the Pod managing cinfo
|
2017-11-10 00:48:33 +00:00
|
|
|
func buildPodStats(podSandbox *runtimeapi.PodSandbox) *statsapi.PodStats {
|
|
|
|
return &statsapi.PodStats{
|
2017-08-31 22:37:04 +00:00
|
|
|
PodRef: statsapi.PodReference{
|
|
|
|
Name: podSandbox.Metadata.Name,
|
|
|
|
UID: podSandbox.Metadata.Uid,
|
|
|
|
Namespace: podSandbox.Metadata.Namespace,
|
|
|
|
},
|
|
|
|
// The StartTime in the summary API is the pod creation time.
|
|
|
|
StartTime: metav1.NewTime(time.Unix(0, podSandbox.CreatedAt)),
|
|
|
|
}
|
2017-11-10 00:48:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (p *criStatsProvider) makePodStorageStats(s *statsapi.PodStats, rootFsInfo *cadvisorapiv2.FsInfo) *statsapi.PodStats {
|
2017-08-31 22:37:04 +00:00
|
|
|
podUID := types.UID(s.PodRef.UID)
|
|
|
|
if vstats, found := p.resourceAnalyzer.GetPodVolumeStats(podUID); found {
|
2017-11-10 00:48:33 +00:00
|
|
|
ephemeralStats := make([]statsapi.VolumeStats, len(vstats.EphemeralVolumes))
|
|
|
|
copy(ephemeralStats, vstats.EphemeralVolumes)
|
|
|
|
s.VolumeStats = append(vstats.EphemeralVolumes, vstats.PersistentVolumes...)
|
|
|
|
s.EphemeralStorage = calcEphemeralStorage(s.Containers, ephemeralStats, rootFsInfo)
|
2017-08-31 22:37:04 +00:00
|
|
|
}
|
|
|
|
return s
|
|
|
|
}
|
|
|
|
|
2018-05-17 07:28:46 +00:00
|
|
|
func (p *criStatsProvider) addPodNetworkStats(
|
2017-11-01 14:06:01 +00:00
|
|
|
ps *statsapi.PodStats,
|
2018-02-23 22:20:38 +00:00
|
|
|
podSandboxID string,
|
|
|
|
caInfos map[string]cadvisorapiv2.ContainerInfo,
|
2018-05-17 07:28:46 +00:00
|
|
|
cs *statsapi.ContainerStats,
|
2017-11-01 14:06:01 +00:00
|
|
|
) {
|
2018-02-23 22:20:38 +00:00
|
|
|
caPodSandbox, found := caInfos[podSandboxID]
|
2018-05-17 07:28:46 +00:00
|
|
|
// try get network stats from cadvisor first.
|
2018-02-23 22:20:38 +00:00
|
|
|
if found {
|
2019-02-15 04:20:23 +00:00
|
|
|
ps.Network = cadvisorInfoToNetworkStats(ps.PodRef.Name, &caPodSandbox)
|
2018-05-17 07:28:46 +00:00
|
|
|
return
|
2018-02-23 22:20:38 +00:00
|
|
|
}
|
2018-05-17 07:28:46 +00:00
|
|
|
|
|
|
|
// TODO: sum Pod network stats from container stats.
|
2018-11-09 18:49:10 +00:00
|
|
|
klog.V(4).Infof("Unable to find cadvisor stats for sandbox %q", podSandboxID)
|
2018-02-23 22:20:38 +00:00
|
|
|
}
|
|
|
|
|
2018-05-17 07:28:46 +00:00
|
|
|
func (p *criStatsProvider) addPodCPUMemoryStats(
|
2018-02-23 22:20:38 +00:00
|
|
|
ps *statsapi.PodStats,
|
|
|
|
podUID types.UID,
|
|
|
|
allInfos map[string]cadvisorapiv2.ContainerInfo,
|
2018-05-17 07:28:46 +00:00
|
|
|
cs *statsapi.ContainerStats,
|
2018-02-23 22:20:38 +00:00
|
|
|
) {
|
2018-05-17 07:28:46 +00:00
|
|
|
// try get cpu and memory stats from cadvisor first.
|
2018-02-23 22:20:38 +00:00
|
|
|
podCgroupInfo := getCadvisorPodInfoFromPodUID(podUID, allInfos)
|
|
|
|
if podCgroupInfo != nil {
|
|
|
|
cpu, memory := cadvisorInfoToCPUandMemoryStats(podCgroupInfo)
|
|
|
|
ps.CPU = cpu
|
|
|
|
ps.Memory = memory
|
2018-05-17 07:28:46 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Sum Pod cpu and memory stats from containers stats.
|
|
|
|
if cs.CPU != nil {
|
|
|
|
if ps.CPU == nil {
|
|
|
|
ps.CPU = &statsapi.CPUStats{}
|
|
|
|
}
|
|
|
|
|
|
|
|
ps.CPU.Time = cs.StartTime
|
|
|
|
usageCoreNanoSeconds := getUint64Value(cs.CPU.UsageCoreNanoSeconds) + getUint64Value(ps.CPU.UsageCoreNanoSeconds)
|
|
|
|
usageNanoCores := getUint64Value(cs.CPU.UsageNanoCores) + getUint64Value(ps.CPU.UsageNanoCores)
|
|
|
|
ps.CPU.UsageCoreNanoSeconds = &usageCoreNanoSeconds
|
|
|
|
ps.CPU.UsageNanoCores = &usageNanoCores
|
|
|
|
}
|
|
|
|
|
|
|
|
if cs.Memory != nil {
|
|
|
|
if ps.Memory == nil {
|
|
|
|
ps.Memory = &statsapi.MemoryStats{}
|
|
|
|
}
|
|
|
|
|
|
|
|
ps.Memory.Time = cs.Memory.Time
|
|
|
|
availableBytes := getUint64Value(cs.Memory.AvailableBytes) + getUint64Value(ps.Memory.AvailableBytes)
|
|
|
|
usageBytes := getUint64Value(cs.Memory.UsageBytes) + getUint64Value(ps.Memory.UsageBytes)
|
|
|
|
workingSetBytes := getUint64Value(cs.Memory.WorkingSetBytes) + getUint64Value(ps.Memory.WorkingSetBytes)
|
|
|
|
rSSBytes := getUint64Value(cs.Memory.RSSBytes) + getUint64Value(ps.Memory.RSSBytes)
|
|
|
|
pageFaults := getUint64Value(cs.Memory.PageFaults) + getUint64Value(ps.Memory.PageFaults)
|
|
|
|
majorPageFaults := getUint64Value(cs.Memory.MajorPageFaults) + getUint64Value(ps.Memory.MajorPageFaults)
|
|
|
|
ps.Memory.AvailableBytes = &availableBytes
|
|
|
|
ps.Memory.UsageBytes = &usageBytes
|
|
|
|
ps.Memory.WorkingSetBytes = &workingSetBytes
|
|
|
|
ps.Memory.RSSBytes = &rSSBytes
|
|
|
|
ps.Memory.PageFaults = &pageFaults
|
|
|
|
ps.Memory.MajorPageFaults = &majorPageFaults
|
2018-02-23 22:20:38 +00:00
|
|
|
}
|
2017-11-01 14:06:01 +00:00
|
|
|
}
|
|
|
|
|
2017-08-31 22:37:04 +00:00
|
|
|
func (p *criStatsProvider) makeContainerStats(
|
|
|
|
stats *runtimeapi.ContainerStats,
|
|
|
|
container *runtimeapi.Container,
|
|
|
|
rootFsInfo *cadvisorapiv2.FsInfo,
|
2018-02-07 08:36:20 +00:00
|
|
|
fsIDtoInfo map[runtimeapi.FilesystemIdentifier]*cadvisorapiv2.FsInfo,
|
2018-02-21 03:49:51 +00:00
|
|
|
uid string,
|
2017-08-31 22:37:04 +00:00
|
|
|
) *statsapi.ContainerStats {
|
|
|
|
result := &statsapi.ContainerStats{
|
|
|
|
Name: stats.Attributes.Metadata.Name,
|
|
|
|
// The StartTime in the summary API is the container creation time.
|
|
|
|
StartTime: metav1.NewTime(time.Unix(0, container.CreatedAt)),
|
2018-04-09 01:43:39 +00:00
|
|
|
CPU: &statsapi.CPUStats{},
|
|
|
|
Memory: &statsapi.MemoryStats{},
|
|
|
|
Rootfs: &statsapi.FsStats{},
|
2017-08-31 22:37:04 +00:00
|
|
|
// UserDefinedMetrics is not supported by CRI.
|
|
|
|
}
|
|
|
|
if stats.Cpu != nil {
|
|
|
|
result.CPU.Time = metav1.NewTime(time.Unix(0, stats.Cpu.Timestamp))
|
|
|
|
if stats.Cpu.UsageCoreNanoSeconds != nil {
|
|
|
|
result.CPU.UsageCoreNanoSeconds = &stats.Cpu.UsageCoreNanoSeconds.Value
|
|
|
|
}
|
2019-02-02 09:14:26 +00:00
|
|
|
|
|
|
|
usageNanoCores := p.getContainerUsageNanoCores(stats)
|
|
|
|
if usageNanoCores != nil {
|
|
|
|
result.CPU.UsageNanoCores = usageNanoCores
|
|
|
|
}
|
2017-08-31 22:37:04 +00:00
|
|
|
}
|
|
|
|
if stats.Memory != nil {
|
|
|
|
result.Memory.Time = metav1.NewTime(time.Unix(0, stats.Memory.Timestamp))
|
|
|
|
if stats.Memory.WorkingSetBytes != nil {
|
|
|
|
result.Memory.WorkingSetBytes = &stats.Memory.WorkingSetBytes.Value
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if stats.WritableLayer != nil {
|
|
|
|
result.Rootfs.Time = metav1.NewTime(time.Unix(0, stats.WritableLayer.Timestamp))
|
|
|
|
if stats.WritableLayer.UsedBytes != nil {
|
|
|
|
result.Rootfs.UsedBytes = &stats.WritableLayer.UsedBytes.Value
|
|
|
|
}
|
|
|
|
if stats.WritableLayer.InodesUsed != nil {
|
|
|
|
result.Rootfs.InodesUsed = &stats.WritableLayer.InodesUsed.Value
|
|
|
|
}
|
|
|
|
}
|
2018-02-07 08:36:20 +00:00
|
|
|
fsID := stats.GetWritableLayer().GetFsId()
|
|
|
|
if fsID != nil {
|
|
|
|
imageFsInfo, found := fsIDtoInfo[*fsID]
|
2017-09-15 03:30:09 +00:00
|
|
|
if !found {
|
2018-02-07 08:36:20 +00:00
|
|
|
imageFsInfo = p.getFsInfo(fsID)
|
|
|
|
fsIDtoInfo[*fsID] = imageFsInfo
|
2017-09-15 03:30:09 +00:00
|
|
|
}
|
|
|
|
if imageFsInfo != nil {
|
2018-02-07 08:36:20 +00:00
|
|
|
// The image filesystem id is unknown to the local node or there's
|
|
|
|
// an error on retrieving the stats. In these cases, we omit those stats
|
2017-09-15 03:30:09 +00:00
|
|
|
// and return the best-effort partial result. See
|
|
|
|
// https://github.com/kubernetes/heapster/issues/1793.
|
|
|
|
result.Rootfs.AvailableBytes = &imageFsInfo.Available
|
|
|
|
result.Rootfs.CapacityBytes = &imageFsInfo.Capacity
|
|
|
|
result.Rootfs.InodesFree = imageFsInfo.InodesFree
|
|
|
|
result.Rootfs.Inodes = imageFsInfo.Inodes
|
|
|
|
}
|
2017-08-31 22:37:04 +00:00
|
|
|
}
|
2018-02-21 03:49:51 +00:00
|
|
|
containerLogPath := kuberuntime.BuildContainerLogsDirectory(types.UID(uid), container.GetMetadata().GetName())
|
|
|
|
result.Logs = p.getContainerLogStats(containerLogPath, rootFsInfo)
|
2017-08-31 22:37:04 +00:00
|
|
|
return result
|
2017-08-18 22:08:44 +00:00
|
|
|
}
|
2017-10-26 06:46:49 +00:00
|
|
|
|
2018-09-19 14:26:00 +00:00
|
|
|
func (p *criStatsProvider) makeContainerCPUAndMemoryStats(
|
|
|
|
stats *runtimeapi.ContainerStats,
|
|
|
|
container *runtimeapi.Container,
|
|
|
|
) *statsapi.ContainerStats {
|
|
|
|
result := &statsapi.ContainerStats{
|
|
|
|
Name: stats.Attributes.Metadata.Name,
|
|
|
|
// The StartTime in the summary API is the container creation time.
|
|
|
|
StartTime: metav1.NewTime(time.Unix(0, container.CreatedAt)),
|
|
|
|
CPU: &statsapi.CPUStats{},
|
|
|
|
Memory: &statsapi.MemoryStats{},
|
|
|
|
// UserDefinedMetrics is not supported by CRI.
|
|
|
|
}
|
|
|
|
if stats.Cpu != nil {
|
|
|
|
result.CPU.Time = metav1.NewTime(time.Unix(0, stats.Cpu.Timestamp))
|
|
|
|
if stats.Cpu.UsageCoreNanoSeconds != nil {
|
|
|
|
result.CPU.UsageCoreNanoSeconds = &stats.Cpu.UsageCoreNanoSeconds.Value
|
|
|
|
}
|
2019-02-02 09:14:26 +00:00
|
|
|
|
|
|
|
usageNanoCores := p.getContainerUsageNanoCores(stats)
|
|
|
|
if usageNanoCores != nil {
|
|
|
|
result.CPU.UsageNanoCores = usageNanoCores
|
|
|
|
}
|
2018-09-19 14:26:00 +00:00
|
|
|
}
|
|
|
|
if stats.Memory != nil {
|
|
|
|
result.Memory.Time = metav1.NewTime(time.Unix(0, stats.Memory.Timestamp))
|
|
|
|
if stats.Memory.WorkingSetBytes != nil {
|
|
|
|
result.Memory.WorkingSetBytes = &stats.Memory.WorkingSetBytes.Value
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result
|
|
|
|
}
|
|
|
|
|
2019-02-02 09:14:26 +00:00
|
|
|
// getContainerUsageNanoCores gets usageNanoCores based on cached usageCoreNanoSeconds.
|
|
|
|
func (p *criStatsProvider) getContainerUsageNanoCores(stats *runtimeapi.ContainerStats) *uint64 {
|
|
|
|
if stats == nil || stats.Cpu == nil || stats.Cpu.UsageCoreNanoSeconds == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
p.mutex.Lock()
|
|
|
|
defer func() {
|
|
|
|
// Update cache with new value.
|
|
|
|
p.cpuUsageCache[stats.Attributes.Id] = stats.Cpu
|
|
|
|
p.mutex.Unlock()
|
|
|
|
}()
|
|
|
|
|
|
|
|
cached, ok := p.cpuUsageCache[stats.Attributes.Id]
|
|
|
|
if !ok || cached.UsageCoreNanoSeconds == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
nanoSeconds := stats.Cpu.Timestamp - cached.Timestamp
|
|
|
|
usageNanoCores := (stats.Cpu.UsageCoreNanoSeconds.Value - cached.UsageCoreNanoSeconds.Value) * uint64(time.Second/time.Nanosecond) / uint64(nanoSeconds)
|
|
|
|
return &usageNanoCores
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *criStatsProvider) cleanupOutdatedCaches() {
|
|
|
|
p.mutex.Lock()
|
|
|
|
defer p.mutex.Unlock()
|
|
|
|
|
|
|
|
for k, v := range p.cpuUsageCache {
|
|
|
|
if v == nil {
|
|
|
|
delete(p.cpuUsageCache, k)
|
|
|
|
}
|
|
|
|
|
|
|
|
if time.Since(time.Unix(0, v.Timestamp)) > defaultCachePeriod {
|
|
|
|
delete(p.cpuUsageCache, k)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-26 06:46:49 +00:00
|
|
|
// removeTerminatedContainer returns the specified container but with
|
|
|
|
// the stats of the terminated containers removed.
|
|
|
|
func removeTerminatedContainer(containers []*runtimeapi.Container) []*runtimeapi.Container {
|
|
|
|
containerMap := make(map[containerID][]*runtimeapi.Container)
|
|
|
|
// Sort order by create time
|
|
|
|
sort.Slice(containers, func(i, j int) bool {
|
|
|
|
return containers[i].CreatedAt < containers[j].CreatedAt
|
|
|
|
})
|
|
|
|
for _, container := range containers {
|
|
|
|
refID := containerID{
|
|
|
|
podRef: buildPodRef(container.Labels),
|
|
|
|
containerName: kubetypes.GetContainerName(container.Labels),
|
|
|
|
}
|
|
|
|
containerMap[refID] = append(containerMap[refID], container)
|
|
|
|
}
|
|
|
|
|
|
|
|
result := make([]*runtimeapi.Container, 0)
|
|
|
|
for _, refs := range containerMap {
|
|
|
|
if len(refs) == 1 {
|
|
|
|
result = append(result, refs[0])
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
found := false
|
|
|
|
for i := 0; i < len(refs); i++ {
|
|
|
|
if refs[i].State == runtimeapi.ContainerState_CONTAINER_RUNNING {
|
|
|
|
found = true
|
|
|
|
result = append(result, refs[i])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if !found {
|
|
|
|
result = append(result, refs[len(refs)-1])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result
|
|
|
|
}
|
2017-11-01 14:06:01 +00:00
|
|
|
|
|
|
|
func (p *criStatsProvider) addCadvisorContainerStats(
|
|
|
|
cs *statsapi.ContainerStats,
|
|
|
|
caPodStats *cadvisorapiv2.ContainerInfo,
|
|
|
|
) {
|
|
|
|
if caPodStats.Spec.HasCustomMetrics {
|
|
|
|
cs.UserDefinedMetrics = cadvisorInfoToUserDefinedMetrics(caPodStats)
|
|
|
|
}
|
|
|
|
|
|
|
|
cpu, memory := cadvisorInfoToCPUandMemoryStats(caPodStats)
|
|
|
|
if cpu != nil {
|
|
|
|
cs.CPU = cpu
|
|
|
|
}
|
|
|
|
if memory != nil {
|
|
|
|
cs.Memory = memory
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-02-23 22:20:38 +00:00
|
|
|
func getCRICadvisorStats(infos map[string]cadvisorapiv2.ContainerInfo) map[string]cadvisorapiv2.ContainerInfo {
|
2017-11-01 14:06:01 +00:00
|
|
|
stats := make(map[string]cadvisorapiv2.ContainerInfo)
|
|
|
|
infos = removeTerminatedContainerInfo(infos)
|
|
|
|
for key, info := range infos {
|
|
|
|
// On systemd using devicemapper each mount into the container has an
|
|
|
|
// associated cgroup. We ignore them to ensure we do not get duplicate
|
|
|
|
// entries in our summary. For details on .mount units:
|
|
|
|
// http://man7.org/linux/man-pages/man5/systemd.mount.5.html
|
|
|
|
if strings.HasSuffix(key, ".mount") {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
// Build the Pod key if this container is managed by a Pod
|
|
|
|
if !isPodManagedContainer(&info) {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
stats[path.Base(key)] = info
|
|
|
|
}
|
2018-02-23 22:20:38 +00:00
|
|
|
return stats
|
2017-11-01 14:06:01 +00:00
|
|
|
}
|
2018-02-21 03:49:51 +00:00
|
|
|
|
|
|
|
// TODO Cache the metrics in container log manager
|
|
|
|
func (p *criStatsProvider) getContainerLogStats(path string, rootFsInfo *cadvisorapiv2.FsInfo) *statsapi.FsStats {
|
|
|
|
m := p.logMetricsService.createLogMetricsProvider(path)
|
|
|
|
logMetrics, err := m.GetMetrics()
|
|
|
|
if err != nil {
|
2018-11-09 18:49:10 +00:00
|
|
|
klog.Errorf("Unable to fetch container log stats for path %s: %v ", path, err)
|
2018-02-21 03:49:51 +00:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
result := &statsapi.FsStats{
|
|
|
|
Time: metav1.NewTime(rootFsInfo.Timestamp),
|
|
|
|
AvailableBytes: &rootFsInfo.Available,
|
|
|
|
CapacityBytes: &rootFsInfo.Capacity,
|
|
|
|
InodesFree: rootFsInfo.InodesFree,
|
|
|
|
Inodes: rootFsInfo.Inodes,
|
|
|
|
}
|
|
|
|
usedbytes := uint64(logMetrics.Used.Value())
|
|
|
|
result.UsedBytes = &usedbytes
|
|
|
|
inodesUsed := uint64(logMetrics.InodesUsed.Value())
|
|
|
|
result.InodesUsed = &inodesUsed
|
|
|
|
return result
|
|
|
|
}
|