diff --git a/test/e2e_node/BUILD b/test/e2e_node/BUILD index 53e775dd67..9a668942f5 100644 --- a/test/e2e_node/BUILD +++ b/test/e2e_node/BUILD @@ -34,6 +34,7 @@ go_library( "//pkg/kubelet/apis/kubeletconfig/scheme:go_default_library", "//pkg/kubelet/apis/kubeletconfig/v1beta1:go_default_library", "//pkg/kubelet/apis/stats/v1alpha1:go_default_library", + "//pkg/kubelet/cm:go_default_library", "//pkg/kubelet/cm/devicemanager:go_default_library", "//pkg/kubelet/metrics:go_default_library", "//pkg/kubelet/remote:go_default_library", diff --git a/test/e2e_node/eviction_test.go b/test/e2e_node/eviction_test.go index 335a65136f..fb59ca781f 100644 --- a/test/e2e_node/eviction_test.go +++ b/test/e2e_node/eviction_test.go @@ -131,8 +131,8 @@ var _ = framework.KubeDescribe("MemoryAllocatableEviction [Slow] [Serial] [Disru // Set large system and kube reserved values to trigger allocatable thresholds far before hard eviction thresholds. kubeReserved := getNodeCPUAndMemoryCapacity(f)[v1.ResourceMemory] // The default hard eviction threshold is 250Mb, so Allocatable = Capacity - Reserved - 250Mb - // We want Allocatable = 150Mb, so set Reserved = Capacity - Allocatable - 250Mb = Capacity - 400Mb - kubeReserved.Sub(resource.MustParse("400Mi")) + // We want Allocatable = 50Mb, so set Reserved = Capacity - Allocatable - 250Mb = Capacity - 300Mb + kubeReserved.Sub(resource.MustParse("300Mi")) initialConfig.KubeReserved = map[string]string{ string(v1.ResourceMemory): kubeReserved.String(), } @@ -380,6 +380,8 @@ func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expe // Place the remainder of the test within a context so that the kubelet config is set before and after the test. Context("", func() { BeforeEach(func() { + // reduce memory usage in the allocatable cgroup to ensure we do not have MemoryPressure + reduceAllocatableMemoryUsage() // Nodes do not immediately report local storage capacity // Sleep so that pods requesting local storage do not fail to schedule time.Sleep(30 * time.Second) @@ -447,6 +449,7 @@ func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expe By(fmt.Sprintf("deleting pod: %s", spec.pod.Name)) f.PodClient().DeleteSync(spec.pod.Name, &metav1.DeleteOptions{}, 10*time.Minute) } + reduceAllocatableMemoryUsage() if expectedNodeCondition == v1.NodeDiskPressure && framework.TestContext.PrepullImages { // The disk eviction test may cause the prepulled images to be evicted, // prepull those images again to ensure this test not affect following tests. @@ -607,7 +610,12 @@ func logMemoryMetrics() { return } if summary.Node.Memory != nil && summary.Node.Memory.WorkingSetBytes != nil && summary.Node.Memory.AvailableBytes != nil { - framework.Logf("Node.Memory.WorkingSetBytes: %d, summary.Node.Memory.AvailableBytes: %d", *summary.Node.Memory.WorkingSetBytes, *summary.Node.Memory.AvailableBytes) + framework.Logf("Node.Memory.WorkingSetBytes: %d, Node.Memory.AvailableBytes: %d", *summary.Node.Memory.WorkingSetBytes, *summary.Node.Memory.AvailableBytes) + } + for _, sysContainer := range summary.Node.SystemContainers { + if sysContainer.Name == stats.SystemContainerPods && sysContainer.Memory != nil && sysContainer.Memory.WorkingSetBytes != nil && sysContainer.Memory.AvailableBytes != nil { + framework.Logf("Allocatable.Memory.WorkingSetBytes: %d, Allocatable.Memory.AvailableBytes: %d", *sysContainer.Memory.WorkingSetBytes, *sysContainer.Memory.AvailableBytes) + } } for _, pod := range summary.Pods { framework.Logf("Pod: %s", pod.PodRef.Name) diff --git a/test/e2e_node/pods_container_manager_test.go b/test/e2e_node/pods_container_manager_test.go index 70fcbd6f0f..b54cee5478 100644 --- a/test/e2e_node/pods_container_manager_test.go +++ b/test/e2e_node/pods_container_manager_test.go @@ -54,8 +54,6 @@ func getResourceRequirements(requests, limits v1.ResourceList) v1.ResourceRequir } const ( - // Kubelet internal cgroup name for node allocatable cgroup. - defaultNodeAllocatableCgroup = "kubepods" // Kubelet internal cgroup name for burstable tier burstableCgroup = "burstable" // Kubelet internal cgroup name for besteffort tier @@ -68,12 +66,7 @@ func makePodToVerifyCgroups(cgroupNames []cm.CgroupName) *v1.Pod { cgroupFsNames := []string{} for _, cgroupName := range cgroupNames { // Add top level cgroup used to enforce node allocatable. - cgroupName = cm.CgroupName(path.Join(defaultNodeAllocatableCgroup, string(cgroupName))) - if framework.TestContext.KubeletConfig.CgroupDriver == "systemd" { - cgroupFsNames = append(cgroupFsNames, cm.ConvertCgroupNameToSystemd(cgroupName, true)) - } else { - cgroupFsNames = append(cgroupFsNames, string(cgroupName)) - } + cgroupFsNames = append(cgroupFsNames, toCgroupFsName(path.Join(defaultNodeAllocatableCgroup, string(cgroupName)))) } glog.Infof("expecting %v cgroups to be found", cgroupFsNames) // build the pod command to either verify cgroups exist @@ -117,10 +110,7 @@ func makePodToVerifyCgroups(cgroupNames []cm.CgroupName) *v1.Pod { // makePodToVerifyCgroupRemoved verfies the specified cgroup does not exist. func makePodToVerifyCgroupRemoved(cgroupName cm.CgroupName) *v1.Pod { - cgroupFsName := string(cgroupName) - if framework.TestContext.KubeletConfig.CgroupDriver == "systemd" { - cgroupFsName = cm.ConvertCgroupNameToSystemd(cm.CgroupName(cgroupName), true) - } + cgroupFsName := toCgroupFsName(string(cgroupName)) pod := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "pod" + string(uuid.NewUUID()), diff --git a/test/e2e_node/util.go b/test/e2e_node/util.go index 4dd7cf7090..8006fae550 100644 --- a/test/e2e_node/util.go +++ b/test/e2e_node/util.go @@ -41,6 +41,7 @@ import ( kubeletscheme "k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig/scheme" kubeletconfigv1beta1 "k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig/v1beta1" stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1" + "k8s.io/kubernetes/pkg/kubelet/cm" kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics" "k8s.io/kubernetes/pkg/kubelet/remote" "k8s.io/kubernetes/test/e2e/framework" @@ -57,6 +58,9 @@ var startServices = flag.Bool("start-services", true, "If true, start local node var stopServices = flag.Bool("stop-services", true, "If true, stop local node services after running tests") var busyboxImage = "busybox" +// Kubelet internal cgroup name for node allocatable cgroup. +const defaultNodeAllocatableCgroup = "kubepods" + func getNodeSummary() (*stats.Summary, error) { req, err := http.NewRequest("GET", *kubeletAddress+"/stats/summary", nil) if err != nil { @@ -407,3 +411,19 @@ func restartKubelet() { stdout, err = exec.Command("sudo", "systemctl", "restart", kube).CombinedOutput() framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %v", err, stdout) } + +func toCgroupFsName(cgroup string) string { + if framework.TestContext.KubeletConfig.CgroupDriver == "systemd" { + return cm.ConvertCgroupNameToSystemd(cm.CgroupName(cgroup), true) + } + return cgroup +} + +// reduceAllocatableMemoryUsage uses memory.force_empty (https://lwn.net/Articles/432224/) +// to make the kernel reclaim memory in the allocatable cgroup +// the time to reduce pressure may be unbounded, but usually finishes within a second +func reduceAllocatableMemoryUsage() { + cmd := fmt.Sprintf("echo 0 > /sys/fs/cgroup/memory/%s/memory.force_empty", toCgroupFsName(defaultNodeAllocatableCgroup)) + _, err := exec.Command("sudo", "sh", "-c", cmd).CombinedOutput() + framework.ExpectNoError(err) +}