diff --git a/pkg/kubelet/stats/cadvisor_stats_provider.go b/pkg/kubelet/stats/cadvisor_stats_provider.go index b498c5158e..17a10df2f6 100644 --- a/pkg/kubelet/stats/cadvisor_stats_provider.go +++ b/pkg/kubelet/stats/cadvisor_stats_provider.go @@ -76,20 +76,9 @@ func (p *cadvisorStatsProvider) ListPodStats() ([]statsapi.PodStats, error) { if err != nil { return nil, fmt.Errorf("failed to get imageFs info: %v", err) } - - infos, err := p.cadvisor.ContainerInfoV2("/", cadvisorapiv2.RequestOptions{ - IdType: cadvisorapiv2.TypeName, - Count: 2, // 2 samples are needed to compute "instantaneous" CPU - Recursive: true, - }) + infos, err := getCadvisorContainerInfo(p.cadvisor) if err != nil { - if _, ok := infos["/"]; ok { - // If the failure is partial, log it and return a best-effort - // response. - glog.Errorf("Partial failure issuing cadvisor.ContainerInfoV2: %v", err) - } else { - return nil, fmt.Errorf("failed to get root cgroup stats: %v", err) - } + return nil, fmt.Errorf("failed to get container info from cadvisor: %v", err) } // removeTerminatedContainerInfo will also remove pod level cgroups, so save the infos into allInfos first allInfos := infos @@ -352,3 +341,21 @@ func hasMemoryAndCPUInstUsage(info *cadvisorapiv2.ContainerInfo) bool { } return cstat.CpuInst.Usage.Total != 0 && cstat.Memory.RSS != 0 } + +func getCadvisorContainerInfo(ca cadvisor.Interface) (map[string]cadvisorapiv2.ContainerInfo, error) { + infos, err := ca.ContainerInfoV2("/", cadvisorapiv2.RequestOptions{ + IdType: cadvisorapiv2.TypeName, + Count: 2, // 2 samples are needed to compute "instantaneous" CPU + Recursive: true, + }) + if err != nil { + if _, ok := infos["/"]; ok { + // If the failure is partial, log it and return a best-effort + // response. + glog.Errorf("Partial failure issuing cadvisor.ContainerInfoV2: %v", err) + } else { + return nil, fmt.Errorf("failed to get root cgroup stats: %v", err) + } + } + return infos, nil +} diff --git a/pkg/kubelet/stats/cri_stats_provider.go b/pkg/kubelet/stats/cri_stats_provider.go index afc364068b..5934c2962b 100644 --- a/pkg/kubelet/stats/cri_stats_provider.go +++ b/pkg/kubelet/stats/cri_stats_provider.go @@ -18,7 +18,9 @@ package stats import ( "fmt" + "path" "sort" + "strings" "time" "github.com/golang/glog" @@ -112,6 +114,11 @@ func (p *criStatsProvider) ListPodStats() ([]statsapi.PodStats, error) { containerMap[c.Id] = c } + caInfos, err := getCRICadvisorStats(p.cadvisor) + if err != nil { + return nil, fmt.Errorf("failed to get container info from cadvisor: %v", err) + } + for _, stats := range resp { containerID := stats.Attributes.Id container, found := containerMap[containerID] @@ -132,10 +139,25 @@ func (p *criStatsProvider) ListPodStats() ([]statsapi.PodStats, error) { ps, found := sandboxIDToPodStats[podSandboxID] if !found { ps = buildPodStats(podSandbox) + // Fill stats from cadvisor is available for full set of required pod stats + caPodSandbox, found := caInfos[podSandboxID] + if !found { + glog.V(4).Info("Unable to find cadvisor stats for sandbox %q", podSandboxID) + } else { + p.addCadvisorPodStats(ps, &caPodSandbox) + } sandboxIDToPodStats[podSandboxID] = ps } - containerStats := p.makeContainerStats(stats, container, &rootFsInfo, uuidToFsInfo) - ps.Containers = append(ps.Containers, *containerStats) + cs := p.makeContainerStats(stats, container, &rootFsInfo, uuidToFsInfo) + // If cadvisor stats is available for the container, use it to populate + // container stats + caStats, caFound := caInfos[containerID] + if !caFound { + glog.V(4).Info("Unable to find cadvisor stats for %q", containerID) + } else { + p.addCadvisorContainerStats(cs, &caStats) + } + ps.Containers = append(ps.Containers, *cs) } result := make([]statsapi.PodStats, 0, len(sandboxIDToPodStats)) @@ -201,7 +223,7 @@ func (p *criStatsProvider) getFsInfo(storageID *runtimeapi.StorageIdentifier) *c return &fsInfo } -// buildPodRef returns a PodStats that identifies the Pod managing cinfo +// buildPodStats returns a PodStats that identifies the Pod managing cinfo func buildPodStats(podSandbox *runtimeapi.PodSandbox) *statsapi.PodStats { return &statsapi.PodStats{ PodRef: statsapi.PodReference{ @@ -211,7 +233,6 @@ func buildPodStats(podSandbox *runtimeapi.PodSandbox) *statsapi.PodStats { }, // The StartTime in the summary API is the pod creation time. StartTime: metav1.NewTime(time.Unix(0, podSandbox.CreatedAt)), - // Network stats are not supported by CRI. } } @@ -226,6 +247,13 @@ func (p *criStatsProvider) makePodStorageStats(s *statsapi.PodStats, rootFsInfo return s } +func (p *criStatsProvider) addCadvisorPodStats( + ps *statsapi.PodStats, + caPodSandbox *cadvisorapiv2.ContainerInfo, +) { + ps.Network = cadvisorInfoToNetworkStats(ps.PodRef.Name, caPodSandbox) +} + func (p *criStatsProvider) makeContainerStats( stats *runtimeapi.ContainerStats, container *runtimeapi.Container, @@ -336,3 +364,44 @@ func removeTerminatedContainer(containers []*runtimeapi.Container) []*runtimeapi } return result } + +func (p *criStatsProvider) addCadvisorContainerStats( + cs *statsapi.ContainerStats, + caPodStats *cadvisorapiv2.ContainerInfo, +) { + if caPodStats.Spec.HasCustomMetrics { + cs.UserDefinedMetrics = cadvisorInfoToUserDefinedMetrics(caPodStats) + } + + cpu, memory := cadvisorInfoToCPUandMemoryStats(caPodStats) + if cpu != nil { + cs.CPU = cpu + } + if memory != nil { + cs.Memory = memory + } +} + +func getCRICadvisorStats(ca cadvisor.Interface) (map[string]cadvisorapiv2.ContainerInfo, error) { + stats := make(map[string]cadvisorapiv2.ContainerInfo) + infos, err := getCadvisorContainerInfo(ca) + if err != nil { + return nil, fmt.Errorf("failed to fetch cadvisor stats: %v", err) + } + infos = removeTerminatedContainerInfo(infos) + for key, info := range infos { + // On systemd using devicemapper each mount into the container has an + // associated cgroup. We ignore them to ensure we do not get duplicate + // entries in our summary. For details on .mount units: + // http://man7.org/linux/man-pages/man5/systemd.mount.5.html + if strings.HasSuffix(key, ".mount") { + continue + } + // Build the Pod key if this container is managed by a Pod + if !isPodManagedContainer(&info) { + continue + } + stats[path.Base(key)] = info + } + return stats, nil +} diff --git a/pkg/kubelet/stats/cri_stats_provider_test.go b/pkg/kubelet/stats/cri_stats_provider_test.go index b966d0ba80..5cff0ac7e9 100644 --- a/pkg/kubelet/stats/cri_stats_provider_test.go +++ b/pkg/kubelet/stats/cri_stats_provider_test.go @@ -22,20 +22,47 @@ import ( "time" cadvisorfs "github.com/google/cadvisor/fs" - "github.com/stretchr/testify/assert" - cadvisorapiv2 "github.com/google/cadvisor/info/v2" + "github.com/stretchr/testify/assert" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" critest "k8s.io/kubernetes/pkg/kubelet/apis/cri/testing" runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1" cadvisortest "k8s.io/kubernetes/pkg/kubelet/cadvisor/testing" kubecontainertest "k8s.io/kubernetes/pkg/kubelet/container/testing" + "k8s.io/kubernetes/pkg/kubelet/leaky" kubepodtest "k8s.io/kubernetes/pkg/kubelet/pod/testing" serverstats "k8s.io/kubernetes/pkg/kubelet/server/stats" ) func TestCRIListPodStats(t *testing.T) { + const ( + seedRoot = 0 + seedRuntime = 100 + seedKubelet = 200 + seedMisc = 300 + seedSandbox0 = 1000 + seedContainer0 = 2000 + seedSandbox1 = 3000 + seedContainer1 = 4000 + seedContainer2 = 5000 + seedSandbox2 = 6000 + seedContainer3 = 7000 + ) + + const ( + pName0 = "pod0" + pName1 = "pod1" + pName2 = "pod2" + ) + + const ( + cName0 = "container0-name" + cName1 = "container1-name" + cName2 = "container2-name" + cName3 = "container3-name" + ) + var ( imageFsStorageUUID = "imagefs-storage-uuid" unknownStorageUUID = "unknown-storage-uuid" @@ -43,19 +70,19 @@ func TestCRIListPodStats(t *testing.T) { rootFsInfo = getTestFsInfo(1000) sandbox0 = makeFakePodSandbox("sandbox0-name", "sandbox0-uid", "sandbox0-ns") - container0 = makeFakeContainer(sandbox0, "container0-name", 0, false) + container0 = makeFakeContainer(sandbox0, cName0, 0, false) containerStats0 = makeFakeContainerStats(container0, imageFsStorageUUID) - container1 = makeFakeContainer(sandbox0, "container1-name", 0, false) + container1 = makeFakeContainer(sandbox0, cName1, 0, false) containerStats1 = makeFakeContainerStats(container1, unknownStorageUUID) sandbox1 = makeFakePodSandbox("sandbox1-name", "sandbox1-uid", "sandbox1-ns") - container2 = makeFakeContainer(sandbox1, "container2-name", 0, false) + container2 = makeFakeContainer(sandbox1, cName2, 0, false) containerStats2 = makeFakeContainerStats(container2, imageFsStorageUUID) sandbox2 = makeFakePodSandbox("sandbox2-name", "sandbox2-uid", "sandbox2-ns") - container3 = makeFakeContainer(sandbox2, "container3-name", 0, true) + container3 = makeFakeContainer(sandbox2, cName3, 0, true) containerStats3 = makeFakeContainerStats(container3, imageFsStorageUUID) - container4 = makeFakeContainer(sandbox2, "container3-name", 1, false) + container4 = makeFakeContainer(sandbox2, cName3, 1, false) containerStats4 = makeFakeContainerStats(container4, imageFsStorageUUID) ) @@ -68,7 +95,27 @@ func TestCRIListPodStats(t *testing.T) { fakeImageService = critest.NewFakeImageService() ) + infos := map[string]cadvisorapiv2.ContainerInfo{ + "/": getTestContainerInfo(seedRoot, "", "", ""), + "/kubelet": getTestContainerInfo(seedKubelet, "", "", ""), + "/system": getTestContainerInfo(seedMisc, "", "", ""), + sandbox0.PodSandboxStatus.Id: getTestContainerInfo(seedSandbox0, pName0, sandbox0.PodSandboxStatus.Metadata.Namespace, leaky.PodInfraContainerName), + container0.ContainerStatus.Id: getTestContainerInfo(seedContainer0, pName0, sandbox0.PodSandboxStatus.Metadata.Namespace, cName0), + container1.ContainerStatus.Id: getTestContainerInfo(seedContainer1, pName0, sandbox0.PodSandboxStatus.Metadata.Namespace, cName1), + sandbox1.PodSandboxStatus.Id: getTestContainerInfo(seedSandbox1, pName1, sandbox1.PodSandboxStatus.Metadata.Namespace, leaky.PodInfraContainerName), + container2.ContainerStatus.Id: getTestContainerInfo(seedContainer2, pName1, sandbox1.PodSandboxStatus.Metadata.Namespace, cName2), + sandbox2.PodSandboxStatus.Id: getTestContainerInfo(seedSandbox2, pName2, sandbox2.PodSandboxStatus.Metadata.Namespace, leaky.PodInfraContainerName), + container4.ContainerStatus.Id: getTestContainerInfo(seedContainer3, pName2, sandbox2.PodSandboxStatus.Metadata.Namespace, cName3), + } + + options := cadvisorapiv2.RequestOptions{ + IdType: cadvisorapiv2.TypeName, + Count: 2, + Recursive: true, + } + mockCadvisor. + On("ContainerInfoV2", "/", options).Return(infos, nil). On("RootFsInfo").Return(rootFsInfo, nil). On("GetFsInfoByFsUUID", imageFsStorageUUID).Return(imageFsInfo, nil). On("GetFsInfoByFsUUID", unknownStorageUUID).Return(cadvisorapiv2.FsInfo{}, cadvisorfs.ErrNoSuchDevice) @@ -117,16 +164,18 @@ func TestCRIListPodStats(t *testing.T) { for _, s := range p0.Containers { containerStatsMap[s.Name] = s } - c0 := containerStatsMap["container0-name"] + + c0 := containerStatsMap[cName0] assert.Equal(container0.CreatedAt, c0.StartTime.UnixNano()) - checkCRICPUAndMemoryStats(assert, c0, containerStats0) + checkCRICPUAndMemoryStats(assert, c0, infos[container0.ContainerStatus.Id].Stats[0]) checkCRIRootfsStats(assert, c0, containerStats0, &imageFsInfo) checkCRILogsStats(assert, c0, &rootFsInfo) - c1 := containerStatsMap["container1-name"] + c1 := containerStatsMap[cName1] assert.Equal(container1.CreatedAt, c1.StartTime.UnixNano()) - checkCRICPUAndMemoryStats(assert, c1, containerStats1) + checkCRICPUAndMemoryStats(assert, c1, infos[container1.ContainerStatus.Id].Stats[0]) checkCRIRootfsStats(assert, c1, containerStats1, nil) checkCRILogsStats(assert, c1, &rootFsInfo) + checkCRINetworkStats(assert, p0.Network, infos[sandbox0.PodSandboxStatus.Id].Stats[0].Network) p1 := podStatsMap[statsapi.PodReference{Name: "sandbox1-name", UID: "sandbox1-uid", Namespace: "sandbox1-ns"}] assert.Equal(sandbox1.CreatedAt, p1.StartTime.UnixNano()) @@ -134,11 +183,12 @@ func TestCRIListPodStats(t *testing.T) { checkEphemeralStorageStats(assert, p1, ephemeralVolumes, []*runtimeapi.ContainerStats{containerStats2}) c2 := p1.Containers[0] - assert.Equal("container2-name", c2.Name) + assert.Equal(cName2, c2.Name) assert.Equal(container2.CreatedAt, c2.StartTime.UnixNano()) - checkCRICPUAndMemoryStats(assert, c2, containerStats2) + checkCRICPUAndMemoryStats(assert, c2, infos[container2.ContainerStatus.Id].Stats[0]) checkCRIRootfsStats(assert, c2, containerStats2, &imageFsInfo) checkCRILogsStats(assert, c2, &rootFsInfo) + checkCRINetworkStats(assert, p1.Network, infos[sandbox1.PodSandboxStatus.Id].Stats[0].Network) p2 := podStatsMap[statsapi.PodReference{Name: "sandbox2-name", UID: "sandbox2-uid", Namespace: "sandbox2-ns"}] assert.Equal(sandbox2.CreatedAt, p2.StartTime.UnixNano()) @@ -147,12 +197,13 @@ func TestCRIListPodStats(t *testing.T) { checkEphemeralStorageStats(assert, p2, ephemeralVolumes, []*runtimeapi.ContainerStats{containerStats4}) c3 := p2.Containers[0] - assert.Equal("container3-name", c3.Name) + assert.Equal(cName3, c3.Name) assert.Equal(container4.CreatedAt, c3.StartTime.UnixNano()) - checkCRICPUAndMemoryStats(assert, c3, containerStats4) + checkCRICPUAndMemoryStats(assert, c3, infos[container4.ContainerStatus.Id].Stats[0]) checkCRIRootfsStats(assert, c3, containerStats4, &imageFsInfo) checkCRILogsStats(assert, c3, &rootFsInfo) + checkCRINetworkStats(assert, p2.Network, infos[sandbox2.PodSandboxStatus.Id].Stats[0].Network) mockCadvisor.AssertExpectations(t) } @@ -306,18 +357,16 @@ func makeFakeVolumeStats(volumeNames []string) []statsapi.VolumeStats { return volumes } -func checkCRICPUAndMemoryStats(assert *assert.Assertions, actual statsapi.ContainerStats, cs *runtimeapi.ContainerStats) { - assert.Equal(cs.Cpu.Timestamp, actual.CPU.Time.UnixNano()) - assert.Equal(cs.Cpu.UsageCoreNanoSeconds.Value, *actual.CPU.UsageCoreNanoSeconds) - assert.Zero(*actual.CPU.UsageNanoCores) +func checkCRICPUAndMemoryStats(assert *assert.Assertions, actual statsapi.ContainerStats, cs *cadvisorapiv2.ContainerStats) { + assert.Equal(cs.Timestamp.UnixNano(), actual.CPU.Time.UnixNano()) + assert.Equal(cs.Cpu.Usage.Total, *actual.CPU.UsageCoreNanoSeconds) + assert.Equal(cs.CpuInst.Usage.Total, *actual.CPU.UsageNanoCores) - assert.Equal(cs.Memory.Timestamp, actual.Memory.Time.UnixNano()) - assert.Nil(actual.Memory.AvailableBytes) - assert.Nil(actual.Memory.UsageBytes) - assert.Equal(cs.Memory.WorkingSetBytes.Value, *actual.Memory.WorkingSetBytes) - assert.Zero(*actual.Memory.RSSBytes) - assert.Nil(actual.Memory.PageFaults) - assert.Nil(actual.Memory.MajorPageFaults) + assert.Equal(cs.Memory.Usage, *actual.Memory.UsageBytes) + assert.Equal(cs.Memory.WorkingSet, *actual.Memory.WorkingSetBytes) + assert.Equal(cs.Memory.RSS, *actual.Memory.RSSBytes) + assert.Equal(cs.Memory.ContainerData.Pgfault, *actual.Memory.PageFaults) + assert.Equal(cs.Memory.ContainerData.Pgmajfault, *actual.Memory.MajorPageFaults) } func checkCRIRootfsStats(assert *assert.Assertions, actual statsapi.ContainerStats, cs *runtimeapi.ContainerStats, imageFsInfo *cadvisorapiv2.FsInfo) { @@ -361,3 +410,10 @@ func checkEphemeralStorageStats(assert *assert.Assertions, actual statsapi.PodSt assert.Equal(int(*actual.EphemeralStorage.UsedBytes), int(totalUsed)) assert.Equal(int(*actual.EphemeralStorage.InodesUsed), int(inodesUsed)) } + +func checkCRINetworkStats(assert *assert.Assertions, actual *statsapi.NetworkStats, expected *cadvisorapiv2.NetworkStats) { + assert.Equal(expected.Interfaces[0].RxBytes, *actual.RxBytes) + assert.Equal(expected.Interfaces[0].RxErrors, *actual.RxErrors) + assert.Equal(expected.Interfaces[0].TxBytes, *actual.TxBytes) + assert.Equal(expected.Interfaces[0].TxErrors, *actual.TxErrors) +} diff --git a/pkg/kubelet/stats/helper.go b/pkg/kubelet/stats/helper.go index 6315c315f2..e4fde7d163 100644 --- a/pkg/kubelet/stats/helper.go +++ b/pkg/kubelet/stats/helper.go @@ -86,29 +86,12 @@ func cadvisorInfoToContainerStats(name string, info *cadvisorapiv2.ContainerInfo if rootFs != nil { // The container logs live on the node rootfs device - result.Logs = &statsapi.FsStats{ - Time: metav1.NewTime(cstat.Timestamp), - AvailableBytes: &rootFs.Available, - CapacityBytes: &rootFs.Capacity, - InodesFree: rootFs.InodesFree, - Inodes: rootFs.Inodes, - } - - if rootFs.Inodes != nil && rootFs.InodesFree != nil { - logsInodesUsed := *rootFs.Inodes - *rootFs.InodesFree - result.Logs.InodesUsed = &logsInodesUsed - } + result.Logs = buildLogsStats(cstat, rootFs) } if imageFs != nil { // The container rootFs lives on the imageFs devices (which may not be the node root fs) - result.Rootfs = &statsapi.FsStats{ - Time: metav1.NewTime(cstat.Timestamp), - AvailableBytes: &imageFs.Available, - CapacityBytes: &imageFs.Capacity, - InodesFree: imageFs.InodesFree, - Inodes: imageFs.Inodes, - } + result.Rootfs = buildRootfsStats(cstat, imageFs) } cfs := cstat.Filesystem @@ -274,3 +257,29 @@ func getCgroupStats(cadvisor cadvisor.Interface, containerName string) (*cadviso } return stats, nil } + +func buildLogsStats(cstat *cadvisorapiv2.ContainerStats, rootFs *cadvisorapiv2.FsInfo) *statsapi.FsStats { + fsStats := &statsapi.FsStats{ + Time: metav1.NewTime(cstat.Timestamp), + AvailableBytes: &rootFs.Available, + CapacityBytes: &rootFs.Capacity, + InodesFree: rootFs.InodesFree, + Inodes: rootFs.Inodes, + } + + if rootFs.Inodes != nil && rootFs.InodesFree != nil { + logsInodesUsed := *rootFs.Inodes - *rootFs.InodesFree + fsStats.InodesUsed = &logsInodesUsed + } + return fsStats +} + +func buildRootfsStats(cstat *cadvisorapiv2.ContainerStats, imageFs *cadvisorapiv2.FsInfo) *statsapi.FsStats { + return &statsapi.FsStats{ + Time: metav1.NewTime(cstat.Timestamp), + AvailableBytes: &imageFs.Available, + CapacityBytes: &imageFs.Capacity, + InodesFree: imageFs.InodesFree, + Inodes: imageFs.Inodes, + } +}