Merge pull request #55810 from abhi/cri_stats

Automatic merge from submit-queue (batch tested with PRs 54843, 55810). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Integrating cadvisor stats to CRI Pod stats collection

For kubernetes-incubator/cri-containerd#341



**What this PR does / why we need it**:
This PR contains changes to update kubelet to consume metrics from both CRI and cadvisor for Pod stats. The corresponding cadvisor changes are here https://github.com/google/cadvisor/pull/1795.
Today CRI stats provides only core metrics such as CPU Usage, Memory Usage, Disk Usage. However the stats exposed by kubelet which is a superset of core metrics and Network, User Defined Metrics are not provided by CRI stats. Hence kubelet stats are extracted from 2 sources , CRI stats and cadvisor stats. 
After the change the summary stats for a CRI based runtime (containerd) is as show here:
https://gist.github.com/abhi/d7351861df6430eb4bc5d711d274ec35

This PR also contains test case change to reflect the stats for cri stats provider. 

**Which issue(s) this PR fixes** :
Fixes # https://github.com/kubernetes-incubator/cri-containerd/issues/341

**Special notes for your reviewer**:
https://docs.google.com/presentation/d/1Os3nyMRBlFuiBLCjPgeaPv6jXylrZW5jiDXJejlA3Wg/edit#slide=id.g27cb4cb6d8_0_0

Godep will be updated to main tree after https://github.com/google/cadvisor/pull/1795 PR is merged.

**Release note**:

```release-note
Kubelet can provide full summary api support except container log stats for CRI container runtime now.
```
cc @Random-Liu
pull/6/head
Kubernetes Submit Queue 2017-11-23 01:13:18 -08:00 committed by GitHub
commit 76e2bed9b1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 203 additions and 62 deletions

View File

@ -76,20 +76,9 @@ func (p *cadvisorStatsProvider) ListPodStats() ([]statsapi.PodStats, error) {
if err != nil {
return nil, fmt.Errorf("failed to get imageFs info: %v", err)
}
infos, err := p.cadvisor.ContainerInfoV2("/", cadvisorapiv2.RequestOptions{
IdType: cadvisorapiv2.TypeName,
Count: 2, // 2 samples are needed to compute "instantaneous" CPU
Recursive: true,
})
infos, err := getCadvisorContainerInfo(p.cadvisor)
if err != nil {
if _, ok := infos["/"]; ok {
// If the failure is partial, log it and return a best-effort
// response.
glog.Errorf("Partial failure issuing cadvisor.ContainerInfoV2: %v", err)
} else {
return nil, fmt.Errorf("failed to get root cgroup stats: %v", err)
}
return nil, fmt.Errorf("failed to get container info from cadvisor: %v", err)
}
// removeTerminatedContainerInfo will also remove pod level cgroups, so save the infos into allInfos first
allInfos := infos
@ -352,3 +341,21 @@ func hasMemoryAndCPUInstUsage(info *cadvisorapiv2.ContainerInfo) bool {
}
return cstat.CpuInst.Usage.Total != 0 && cstat.Memory.RSS != 0
}
func getCadvisorContainerInfo(ca cadvisor.Interface) (map[string]cadvisorapiv2.ContainerInfo, error) {
infos, err := ca.ContainerInfoV2("/", cadvisorapiv2.RequestOptions{
IdType: cadvisorapiv2.TypeName,
Count: 2, // 2 samples are needed to compute "instantaneous" CPU
Recursive: true,
})
if err != nil {
if _, ok := infos["/"]; ok {
// If the failure is partial, log it and return a best-effort
// response.
glog.Errorf("Partial failure issuing cadvisor.ContainerInfoV2: %v", err)
} else {
return nil, fmt.Errorf("failed to get root cgroup stats: %v", err)
}
}
return infos, nil
}

View File

@ -18,7 +18,9 @@ package stats
import (
"fmt"
"path"
"sort"
"strings"
"time"
"github.com/golang/glog"
@ -112,6 +114,11 @@ func (p *criStatsProvider) ListPodStats() ([]statsapi.PodStats, error) {
containerMap[c.Id] = c
}
caInfos, err := getCRICadvisorStats(p.cadvisor)
if err != nil {
return nil, fmt.Errorf("failed to get container info from cadvisor: %v", err)
}
for _, stats := range resp {
containerID := stats.Attributes.Id
container, found := containerMap[containerID]
@ -132,10 +139,25 @@ func (p *criStatsProvider) ListPodStats() ([]statsapi.PodStats, error) {
ps, found := sandboxIDToPodStats[podSandboxID]
if !found {
ps = buildPodStats(podSandbox)
// Fill stats from cadvisor is available for full set of required pod stats
caPodSandbox, found := caInfos[podSandboxID]
if !found {
glog.V(4).Info("Unable to find cadvisor stats for sandbox %q", podSandboxID)
} else {
p.addCadvisorPodStats(ps, &caPodSandbox)
}
sandboxIDToPodStats[podSandboxID] = ps
}
containerStats := p.makeContainerStats(stats, container, &rootFsInfo, uuidToFsInfo)
ps.Containers = append(ps.Containers, *containerStats)
cs := p.makeContainerStats(stats, container, &rootFsInfo, uuidToFsInfo)
// If cadvisor stats is available for the container, use it to populate
// container stats
caStats, caFound := caInfos[containerID]
if !caFound {
glog.V(4).Info("Unable to find cadvisor stats for %q", containerID)
} else {
p.addCadvisorContainerStats(cs, &caStats)
}
ps.Containers = append(ps.Containers, *cs)
}
result := make([]statsapi.PodStats, 0, len(sandboxIDToPodStats))
@ -201,7 +223,7 @@ func (p *criStatsProvider) getFsInfo(storageID *runtimeapi.StorageIdentifier) *c
return &fsInfo
}
// buildPodRef returns a PodStats that identifies the Pod managing cinfo
// buildPodStats returns a PodStats that identifies the Pod managing cinfo
func buildPodStats(podSandbox *runtimeapi.PodSandbox) *statsapi.PodStats {
return &statsapi.PodStats{
PodRef: statsapi.PodReference{
@ -211,7 +233,6 @@ func buildPodStats(podSandbox *runtimeapi.PodSandbox) *statsapi.PodStats {
},
// The StartTime in the summary API is the pod creation time.
StartTime: metav1.NewTime(time.Unix(0, podSandbox.CreatedAt)),
// Network stats are not supported by CRI.
}
}
@ -226,6 +247,13 @@ func (p *criStatsProvider) makePodStorageStats(s *statsapi.PodStats, rootFsInfo
return s
}
func (p *criStatsProvider) addCadvisorPodStats(
ps *statsapi.PodStats,
caPodSandbox *cadvisorapiv2.ContainerInfo,
) {
ps.Network = cadvisorInfoToNetworkStats(ps.PodRef.Name, caPodSandbox)
}
func (p *criStatsProvider) makeContainerStats(
stats *runtimeapi.ContainerStats,
container *runtimeapi.Container,
@ -336,3 +364,44 @@ func removeTerminatedContainer(containers []*runtimeapi.Container) []*runtimeapi
}
return result
}
func (p *criStatsProvider) addCadvisorContainerStats(
cs *statsapi.ContainerStats,
caPodStats *cadvisorapiv2.ContainerInfo,
) {
if caPodStats.Spec.HasCustomMetrics {
cs.UserDefinedMetrics = cadvisorInfoToUserDefinedMetrics(caPodStats)
}
cpu, memory := cadvisorInfoToCPUandMemoryStats(caPodStats)
if cpu != nil {
cs.CPU = cpu
}
if memory != nil {
cs.Memory = memory
}
}
func getCRICadvisorStats(ca cadvisor.Interface) (map[string]cadvisorapiv2.ContainerInfo, error) {
stats := make(map[string]cadvisorapiv2.ContainerInfo)
infos, err := getCadvisorContainerInfo(ca)
if err != nil {
return nil, fmt.Errorf("failed to fetch cadvisor stats: %v", err)
}
infos = removeTerminatedContainerInfo(infos)
for key, info := range infos {
// On systemd using devicemapper each mount into the container has an
// associated cgroup. We ignore them to ensure we do not get duplicate
// entries in our summary. For details on .mount units:
// http://man7.org/linux/man-pages/man5/systemd.mount.5.html
if strings.HasSuffix(key, ".mount") {
continue
}
// Build the Pod key if this container is managed by a Pod
if !isPodManagedContainer(&info) {
continue
}
stats[path.Base(key)] = info
}
return stats, nil
}

View File

@ -22,20 +22,47 @@ import (
"time"
cadvisorfs "github.com/google/cadvisor/fs"
"github.com/stretchr/testify/assert"
cadvisorapiv2 "github.com/google/cadvisor/info/v2"
"github.com/stretchr/testify/assert"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
critest "k8s.io/kubernetes/pkg/kubelet/apis/cri/testing"
runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime"
statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
cadvisortest "k8s.io/kubernetes/pkg/kubelet/cadvisor/testing"
kubecontainertest "k8s.io/kubernetes/pkg/kubelet/container/testing"
"k8s.io/kubernetes/pkg/kubelet/leaky"
kubepodtest "k8s.io/kubernetes/pkg/kubelet/pod/testing"
serverstats "k8s.io/kubernetes/pkg/kubelet/server/stats"
)
func TestCRIListPodStats(t *testing.T) {
const (
seedRoot = 0
seedRuntime = 100
seedKubelet = 200
seedMisc = 300
seedSandbox0 = 1000
seedContainer0 = 2000
seedSandbox1 = 3000
seedContainer1 = 4000
seedContainer2 = 5000
seedSandbox2 = 6000
seedContainer3 = 7000
)
const (
pName0 = "pod0"
pName1 = "pod1"
pName2 = "pod2"
)
const (
cName0 = "container0-name"
cName1 = "container1-name"
cName2 = "container2-name"
cName3 = "container3-name"
)
var (
imageFsStorageUUID = "imagefs-storage-uuid"
unknownStorageUUID = "unknown-storage-uuid"
@ -43,19 +70,19 @@ func TestCRIListPodStats(t *testing.T) {
rootFsInfo = getTestFsInfo(1000)
sandbox0 = makeFakePodSandbox("sandbox0-name", "sandbox0-uid", "sandbox0-ns")
container0 = makeFakeContainer(sandbox0, "container0-name", 0, false)
container0 = makeFakeContainer(sandbox0, cName0, 0, false)
containerStats0 = makeFakeContainerStats(container0, imageFsStorageUUID)
container1 = makeFakeContainer(sandbox0, "container1-name", 0, false)
container1 = makeFakeContainer(sandbox0, cName1, 0, false)
containerStats1 = makeFakeContainerStats(container1, unknownStorageUUID)
sandbox1 = makeFakePodSandbox("sandbox1-name", "sandbox1-uid", "sandbox1-ns")
container2 = makeFakeContainer(sandbox1, "container2-name", 0, false)
container2 = makeFakeContainer(sandbox1, cName2, 0, false)
containerStats2 = makeFakeContainerStats(container2, imageFsStorageUUID)
sandbox2 = makeFakePodSandbox("sandbox2-name", "sandbox2-uid", "sandbox2-ns")
container3 = makeFakeContainer(sandbox2, "container3-name", 0, true)
container3 = makeFakeContainer(sandbox2, cName3, 0, true)
containerStats3 = makeFakeContainerStats(container3, imageFsStorageUUID)
container4 = makeFakeContainer(sandbox2, "container3-name", 1, false)
container4 = makeFakeContainer(sandbox2, cName3, 1, false)
containerStats4 = makeFakeContainerStats(container4, imageFsStorageUUID)
)
@ -68,7 +95,27 @@ func TestCRIListPodStats(t *testing.T) {
fakeImageService = critest.NewFakeImageService()
)
infos := map[string]cadvisorapiv2.ContainerInfo{
"/": getTestContainerInfo(seedRoot, "", "", ""),
"/kubelet": getTestContainerInfo(seedKubelet, "", "", ""),
"/system": getTestContainerInfo(seedMisc, "", "", ""),
sandbox0.PodSandboxStatus.Id: getTestContainerInfo(seedSandbox0, pName0, sandbox0.PodSandboxStatus.Metadata.Namespace, leaky.PodInfraContainerName),
container0.ContainerStatus.Id: getTestContainerInfo(seedContainer0, pName0, sandbox0.PodSandboxStatus.Metadata.Namespace, cName0),
container1.ContainerStatus.Id: getTestContainerInfo(seedContainer1, pName0, sandbox0.PodSandboxStatus.Metadata.Namespace, cName1),
sandbox1.PodSandboxStatus.Id: getTestContainerInfo(seedSandbox1, pName1, sandbox1.PodSandboxStatus.Metadata.Namespace, leaky.PodInfraContainerName),
container2.ContainerStatus.Id: getTestContainerInfo(seedContainer2, pName1, sandbox1.PodSandboxStatus.Metadata.Namespace, cName2),
sandbox2.PodSandboxStatus.Id: getTestContainerInfo(seedSandbox2, pName2, sandbox2.PodSandboxStatus.Metadata.Namespace, leaky.PodInfraContainerName),
container4.ContainerStatus.Id: getTestContainerInfo(seedContainer3, pName2, sandbox2.PodSandboxStatus.Metadata.Namespace, cName3),
}
options := cadvisorapiv2.RequestOptions{
IdType: cadvisorapiv2.TypeName,
Count: 2,
Recursive: true,
}
mockCadvisor.
On("ContainerInfoV2", "/", options).Return(infos, nil).
On("RootFsInfo").Return(rootFsInfo, nil).
On("GetFsInfoByFsUUID", imageFsStorageUUID).Return(imageFsInfo, nil).
On("GetFsInfoByFsUUID", unknownStorageUUID).Return(cadvisorapiv2.FsInfo{}, cadvisorfs.ErrNoSuchDevice)
@ -117,16 +164,18 @@ func TestCRIListPodStats(t *testing.T) {
for _, s := range p0.Containers {
containerStatsMap[s.Name] = s
}
c0 := containerStatsMap["container0-name"]
c0 := containerStatsMap[cName0]
assert.Equal(container0.CreatedAt, c0.StartTime.UnixNano())
checkCRICPUAndMemoryStats(assert, c0, containerStats0)
checkCRICPUAndMemoryStats(assert, c0, infos[container0.ContainerStatus.Id].Stats[0])
checkCRIRootfsStats(assert, c0, containerStats0, &imageFsInfo)
checkCRILogsStats(assert, c0, &rootFsInfo)
c1 := containerStatsMap["container1-name"]
c1 := containerStatsMap[cName1]
assert.Equal(container1.CreatedAt, c1.StartTime.UnixNano())
checkCRICPUAndMemoryStats(assert, c1, containerStats1)
checkCRICPUAndMemoryStats(assert, c1, infos[container1.ContainerStatus.Id].Stats[0])
checkCRIRootfsStats(assert, c1, containerStats1, nil)
checkCRILogsStats(assert, c1, &rootFsInfo)
checkCRINetworkStats(assert, p0.Network, infos[sandbox0.PodSandboxStatus.Id].Stats[0].Network)
p1 := podStatsMap[statsapi.PodReference{Name: "sandbox1-name", UID: "sandbox1-uid", Namespace: "sandbox1-ns"}]
assert.Equal(sandbox1.CreatedAt, p1.StartTime.UnixNano())
@ -134,11 +183,12 @@ func TestCRIListPodStats(t *testing.T) {
checkEphemeralStorageStats(assert, p1, ephemeralVolumes, []*runtimeapi.ContainerStats{containerStats2})
c2 := p1.Containers[0]
assert.Equal("container2-name", c2.Name)
assert.Equal(cName2, c2.Name)
assert.Equal(container2.CreatedAt, c2.StartTime.UnixNano())
checkCRICPUAndMemoryStats(assert, c2, containerStats2)
checkCRICPUAndMemoryStats(assert, c2, infos[container2.ContainerStatus.Id].Stats[0])
checkCRIRootfsStats(assert, c2, containerStats2, &imageFsInfo)
checkCRILogsStats(assert, c2, &rootFsInfo)
checkCRINetworkStats(assert, p1.Network, infos[sandbox1.PodSandboxStatus.Id].Stats[0].Network)
p2 := podStatsMap[statsapi.PodReference{Name: "sandbox2-name", UID: "sandbox2-uid", Namespace: "sandbox2-ns"}]
assert.Equal(sandbox2.CreatedAt, p2.StartTime.UnixNano())
@ -147,12 +197,13 @@ func TestCRIListPodStats(t *testing.T) {
checkEphemeralStorageStats(assert, p2, ephemeralVolumes, []*runtimeapi.ContainerStats{containerStats4})
c3 := p2.Containers[0]
assert.Equal("container3-name", c3.Name)
assert.Equal(cName3, c3.Name)
assert.Equal(container4.CreatedAt, c3.StartTime.UnixNano())
checkCRICPUAndMemoryStats(assert, c3, containerStats4)
checkCRICPUAndMemoryStats(assert, c3, infos[container4.ContainerStatus.Id].Stats[0])
checkCRIRootfsStats(assert, c3, containerStats4, &imageFsInfo)
checkCRILogsStats(assert, c3, &rootFsInfo)
checkCRINetworkStats(assert, p2.Network, infos[sandbox2.PodSandboxStatus.Id].Stats[0].Network)
mockCadvisor.AssertExpectations(t)
}
@ -306,18 +357,16 @@ func makeFakeVolumeStats(volumeNames []string) []statsapi.VolumeStats {
return volumes
}
func checkCRICPUAndMemoryStats(assert *assert.Assertions, actual statsapi.ContainerStats, cs *runtimeapi.ContainerStats) {
assert.Equal(cs.Cpu.Timestamp, actual.CPU.Time.UnixNano())
assert.Equal(cs.Cpu.UsageCoreNanoSeconds.Value, *actual.CPU.UsageCoreNanoSeconds)
assert.Zero(*actual.CPU.UsageNanoCores)
func checkCRICPUAndMemoryStats(assert *assert.Assertions, actual statsapi.ContainerStats, cs *cadvisorapiv2.ContainerStats) {
assert.Equal(cs.Timestamp.UnixNano(), actual.CPU.Time.UnixNano())
assert.Equal(cs.Cpu.Usage.Total, *actual.CPU.UsageCoreNanoSeconds)
assert.Equal(cs.CpuInst.Usage.Total, *actual.CPU.UsageNanoCores)
assert.Equal(cs.Memory.Timestamp, actual.Memory.Time.UnixNano())
assert.Nil(actual.Memory.AvailableBytes)
assert.Nil(actual.Memory.UsageBytes)
assert.Equal(cs.Memory.WorkingSetBytes.Value, *actual.Memory.WorkingSetBytes)
assert.Zero(*actual.Memory.RSSBytes)
assert.Nil(actual.Memory.PageFaults)
assert.Nil(actual.Memory.MajorPageFaults)
assert.Equal(cs.Memory.Usage, *actual.Memory.UsageBytes)
assert.Equal(cs.Memory.WorkingSet, *actual.Memory.WorkingSetBytes)
assert.Equal(cs.Memory.RSS, *actual.Memory.RSSBytes)
assert.Equal(cs.Memory.ContainerData.Pgfault, *actual.Memory.PageFaults)
assert.Equal(cs.Memory.ContainerData.Pgmajfault, *actual.Memory.MajorPageFaults)
}
func checkCRIRootfsStats(assert *assert.Assertions, actual statsapi.ContainerStats, cs *runtimeapi.ContainerStats, imageFsInfo *cadvisorapiv2.FsInfo) {
@ -361,3 +410,10 @@ func checkEphemeralStorageStats(assert *assert.Assertions, actual statsapi.PodSt
assert.Equal(int(*actual.EphemeralStorage.UsedBytes), int(totalUsed))
assert.Equal(int(*actual.EphemeralStorage.InodesUsed), int(inodesUsed))
}
func checkCRINetworkStats(assert *assert.Assertions, actual *statsapi.NetworkStats, expected *cadvisorapiv2.NetworkStats) {
assert.Equal(expected.Interfaces[0].RxBytes, *actual.RxBytes)
assert.Equal(expected.Interfaces[0].RxErrors, *actual.RxErrors)
assert.Equal(expected.Interfaces[0].TxBytes, *actual.TxBytes)
assert.Equal(expected.Interfaces[0].TxErrors, *actual.TxErrors)
}

View File

@ -86,29 +86,12 @@ func cadvisorInfoToContainerStats(name string, info *cadvisorapiv2.ContainerInfo
if rootFs != nil {
// The container logs live on the node rootfs device
result.Logs = &statsapi.FsStats{
Time: metav1.NewTime(cstat.Timestamp),
AvailableBytes: &rootFs.Available,
CapacityBytes: &rootFs.Capacity,
InodesFree: rootFs.InodesFree,
Inodes: rootFs.Inodes,
}
if rootFs.Inodes != nil && rootFs.InodesFree != nil {
logsInodesUsed := *rootFs.Inodes - *rootFs.InodesFree
result.Logs.InodesUsed = &logsInodesUsed
}
result.Logs = buildLogsStats(cstat, rootFs)
}
if imageFs != nil {
// The container rootFs lives on the imageFs devices (which may not be the node root fs)
result.Rootfs = &statsapi.FsStats{
Time: metav1.NewTime(cstat.Timestamp),
AvailableBytes: &imageFs.Available,
CapacityBytes: &imageFs.Capacity,
InodesFree: imageFs.InodesFree,
Inodes: imageFs.Inodes,
}
result.Rootfs = buildRootfsStats(cstat, imageFs)
}
cfs := cstat.Filesystem
@ -274,3 +257,29 @@ func getCgroupStats(cadvisor cadvisor.Interface, containerName string) (*cadviso
}
return stats, nil
}
func buildLogsStats(cstat *cadvisorapiv2.ContainerStats, rootFs *cadvisorapiv2.FsInfo) *statsapi.FsStats {
fsStats := &statsapi.FsStats{
Time: metav1.NewTime(cstat.Timestamp),
AvailableBytes: &rootFs.Available,
CapacityBytes: &rootFs.Capacity,
InodesFree: rootFs.InodesFree,
Inodes: rootFs.Inodes,
}
if rootFs.Inodes != nil && rootFs.InodesFree != nil {
logsInodesUsed := *rootFs.Inodes - *rootFs.InodesFree
fsStats.InodesUsed = &logsInodesUsed
}
return fsStats
}
func buildRootfsStats(cstat *cadvisorapiv2.ContainerStats, imageFs *cadvisorapiv2.FsInfo) *statsapi.FsStats {
return &statsapi.FsStats{
Time: metav1.NewTime(cstat.Timestamp),
AvailableBytes: &imageFs.Available,
CapacityBytes: &imageFs.Capacity,
InodesFree: imageFs.InodesFree,
Inodes: imageFs.Inodes,
}
}