Merge pull request #60900 from dashpole/eviction_test_no_pressure

Automatic merge from submit-queue (batch tested with PRs 60900, 62215, 62196). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

[Flaky test fix] Use memory.force_empty before and after eviction tests

**What this PR does / why we need it**:
(copied from https://github.com/kubernetes/kubernetes/pull/60720):
MemoryAllocatableEviction tests have been somewhat flaky: https://k8s-testgrid.appspot.com/sig-node-kubelet#kubelet-serial-gce-e2e&include-filter-by-regex=MemoryAllocatable
The failure on the flakes is ["Pod ran to completion"](https://k8s-gubernator.appspot.com/build/kubernetes-jenkins/logs/ci-kubernetes-node-kubelet-serial/3785#k8sio-memoryallocatableeviction-slow-serial-disruptive-when-we-run-containers-that-should-cause-memorypressure-should-eventually-evict-all-of-the-correct-pods).
Looking at [an example log](https://storage.googleapis.com/kubernetes-jenkins/logs/ci-kubernetes-node-kubelet-serial/3785/artifacts/tmp-node-e2e-6070a774-cos-stable-63-10032-71-0/kubelet.log) (and search for memory-hog-pod, we can see that this pod fails admission because the allocatable memory threshold has already been crossed.
`eviction manager: thresholds - ignoring grace period: threshold [signal=allocatableMemory.available, quantity=250Mi] observed 242404Ki`

https://github.com/kubernetes/kubernetes/pull/60720 wasn't effective.  To clean-up after each eviction test, and prepare for the next, use memory.force_empty to make the kernel reclaim memory in the allocatable cgroup before and after eviction tests.

**Special notes for your reviewer**:
I tested to make sure this doesn't break Cgroup Manager tests.
It should work on both cgroupfs and systemd based systems, although I have only tested in on cgroupfs.

**Release note**:
```release-note
NONE
```

/assign @yujuhong @Random-Liu 
/sig node
/priority important-soon
/kind bug

its getting a little late in the release cycle, so we can probably wait until after code freeze is lifted for this.
pull/8/head
Kubernetes Submit Queue 2018-04-06 21:30:06 -07:00 committed by GitHub
commit 3bc1a0a1d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 34 additions and 15 deletions

View File

@ -34,6 +34,7 @@ go_library(
"//pkg/kubelet/apis/kubeletconfig/scheme:go_default_library",
"//pkg/kubelet/apis/kubeletconfig/v1beta1:go_default_library",
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
"//pkg/kubelet/cm:go_default_library",
"//pkg/kubelet/cm/devicemanager:go_default_library",
"//pkg/kubelet/metrics:go_default_library",
"//pkg/kubelet/remote:go_default_library",

View File

@ -131,8 +131,8 @@ var _ = framework.KubeDescribe("MemoryAllocatableEviction [Slow] [Serial] [Disru
// Set large system and kube reserved values to trigger allocatable thresholds far before hard eviction thresholds.
kubeReserved := getNodeCPUAndMemoryCapacity(f)[v1.ResourceMemory]
// The default hard eviction threshold is 250Mb, so Allocatable = Capacity - Reserved - 250Mb
// We want Allocatable = 150Mb, so set Reserved = Capacity - Allocatable - 250Mb = Capacity - 400Mb
kubeReserved.Sub(resource.MustParse("400Mi"))
// We want Allocatable = 50Mb, so set Reserved = Capacity - Allocatable - 250Mb = Capacity - 300Mb
kubeReserved.Sub(resource.MustParse("300Mi"))
initialConfig.KubeReserved = map[string]string{
string(v1.ResourceMemory): kubeReserved.String(),
}
@ -380,6 +380,8 @@ func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expe
// Place the remainder of the test within a context so that the kubelet config is set before and after the test.
Context("", func() {
BeforeEach(func() {
// reduce memory usage in the allocatable cgroup to ensure we do not have MemoryPressure
reduceAllocatableMemoryUsage()
// Nodes do not immediately report local storage capacity
// Sleep so that pods requesting local storage do not fail to schedule
time.Sleep(30 * time.Second)
@ -447,6 +449,7 @@ func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expe
By(fmt.Sprintf("deleting pod: %s", spec.pod.Name))
f.PodClient().DeleteSync(spec.pod.Name, &metav1.DeleteOptions{}, 10*time.Minute)
}
reduceAllocatableMemoryUsage()
if expectedNodeCondition == v1.NodeDiskPressure && framework.TestContext.PrepullImages {
// The disk eviction test may cause the prepulled images to be evicted,
// prepull those images again to ensure this test not affect following tests.
@ -607,7 +610,12 @@ func logMemoryMetrics() {
return
}
if summary.Node.Memory != nil && summary.Node.Memory.WorkingSetBytes != nil && summary.Node.Memory.AvailableBytes != nil {
framework.Logf("Node.Memory.WorkingSetBytes: %d, summary.Node.Memory.AvailableBytes: %d", *summary.Node.Memory.WorkingSetBytes, *summary.Node.Memory.AvailableBytes)
framework.Logf("Node.Memory.WorkingSetBytes: %d, Node.Memory.AvailableBytes: %d", *summary.Node.Memory.WorkingSetBytes, *summary.Node.Memory.AvailableBytes)
}
for _, sysContainer := range summary.Node.SystemContainers {
if sysContainer.Name == stats.SystemContainerPods && sysContainer.Memory != nil && sysContainer.Memory.WorkingSetBytes != nil && sysContainer.Memory.AvailableBytes != nil {
framework.Logf("Allocatable.Memory.WorkingSetBytes: %d, Allocatable.Memory.AvailableBytes: %d", *sysContainer.Memory.WorkingSetBytes, *sysContainer.Memory.AvailableBytes)
}
}
for _, pod := range summary.Pods {
framework.Logf("Pod: %s", pod.PodRef.Name)

View File

@ -54,8 +54,6 @@ func getResourceRequirements(requests, limits v1.ResourceList) v1.ResourceRequir
}
const (
// Kubelet internal cgroup name for node allocatable cgroup.
defaultNodeAllocatableCgroup = "kubepods"
// Kubelet internal cgroup name for burstable tier
burstableCgroup = "burstable"
// Kubelet internal cgroup name for besteffort tier
@ -68,12 +66,7 @@ func makePodToVerifyCgroups(cgroupNames []cm.CgroupName) *v1.Pod {
cgroupFsNames := []string{}
for _, cgroupName := range cgroupNames {
// Add top level cgroup used to enforce node allocatable.
cgroupName = cm.CgroupName(path.Join(defaultNodeAllocatableCgroup, string(cgroupName)))
if framework.TestContext.KubeletConfig.CgroupDriver == "systemd" {
cgroupFsNames = append(cgroupFsNames, cm.ConvertCgroupNameToSystemd(cgroupName, true))
} else {
cgroupFsNames = append(cgroupFsNames, string(cgroupName))
}
cgroupFsNames = append(cgroupFsNames, toCgroupFsName(path.Join(defaultNodeAllocatableCgroup, string(cgroupName))))
}
glog.Infof("expecting %v cgroups to be found", cgroupFsNames)
// build the pod command to either verify cgroups exist
@ -117,10 +110,7 @@ func makePodToVerifyCgroups(cgroupNames []cm.CgroupName) *v1.Pod {
// makePodToVerifyCgroupRemoved verfies the specified cgroup does not exist.
func makePodToVerifyCgroupRemoved(cgroupName cm.CgroupName) *v1.Pod {
cgroupFsName := string(cgroupName)
if framework.TestContext.KubeletConfig.CgroupDriver == "systemd" {
cgroupFsName = cm.ConvertCgroupNameToSystemd(cm.CgroupName(cgroupName), true)
}
cgroupFsName := toCgroupFsName(string(cgroupName))
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod" + string(uuid.NewUUID()),

View File

@ -41,6 +41,7 @@ import (
kubeletscheme "k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig/scheme"
kubeletconfigv1beta1 "k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig/v1beta1"
stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
"k8s.io/kubernetes/pkg/kubelet/cm"
kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
"k8s.io/kubernetes/pkg/kubelet/remote"
"k8s.io/kubernetes/test/e2e/framework"
@ -57,6 +58,9 @@ var startServices = flag.Bool("start-services", true, "If true, start local node
var stopServices = flag.Bool("stop-services", true, "If true, stop local node services after running tests")
var busyboxImage = "busybox"
// Kubelet internal cgroup name for node allocatable cgroup.
const defaultNodeAllocatableCgroup = "kubepods"
func getNodeSummary() (*stats.Summary, error) {
req, err := http.NewRequest("GET", *kubeletAddress+"/stats/summary", nil)
if err != nil {
@ -407,3 +411,19 @@ func restartKubelet() {
stdout, err = exec.Command("sudo", "systemctl", "restart", kube).CombinedOutput()
framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %v", err, stdout)
}
func toCgroupFsName(cgroup string) string {
if framework.TestContext.KubeletConfig.CgroupDriver == "systemd" {
return cm.ConvertCgroupNameToSystemd(cm.CgroupName(cgroup), true)
}
return cgroup
}
// reduceAllocatableMemoryUsage uses memory.force_empty (https://lwn.net/Articles/432224/)
// to make the kernel reclaim memory in the allocatable cgroup
// the time to reduce pressure may be unbounded, but usually finishes within a second
func reduceAllocatableMemoryUsage() {
cmd := fmt.Sprintf("echo 0 > /sys/fs/cgroup/memory/%s/memory.force_empty", toCgroupFsName(defaultNodeAllocatableCgroup))
_, err := exec.Command("sudo", "sh", "-c", cmd).CombinedOutput()
framework.ExpectNoError(err)
}