mirror of https://github.com/k3s-io/k3s
Merge pull request #60900 from dashpole/eviction_test_no_pressure
Automatic merge from submit-queue (batch tested with PRs 60900, 62215, 62196). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. [Flaky test fix] Use memory.force_empty before and after eviction tests **What this PR does / why we need it**: (copied from https://github.com/kubernetes/kubernetes/pull/60720): MemoryAllocatableEviction tests have been somewhat flaky: https://k8s-testgrid.appspot.com/sig-node-kubelet#kubelet-serial-gce-e2e&include-filter-by-regex=MemoryAllocatable The failure on the flakes is ["Pod ran to completion"](https://k8s-gubernator.appspot.com/build/kubernetes-jenkins/logs/ci-kubernetes-node-kubelet-serial/3785#k8sio-memoryallocatableeviction-slow-serial-disruptive-when-we-run-containers-that-should-cause-memorypressure-should-eventually-evict-all-of-the-correct-pods). Looking at [an example log](https://storage.googleapis.com/kubernetes-jenkins/logs/ci-kubernetes-node-kubelet-serial/3785/artifacts/tmp-node-e2e-6070a774-cos-stable-63-10032-71-0/kubelet.log) (and search for memory-hog-pod, we can see that this pod fails admission because the allocatable memory threshold has already been crossed. `eviction manager: thresholds - ignoring grace period: threshold [signal=allocatableMemory.available, quantity=250Mi] observed 242404Ki` https://github.com/kubernetes/kubernetes/pull/60720 wasn't effective. To clean-up after each eviction test, and prepare for the next, use memory.force_empty to make the kernel reclaim memory in the allocatable cgroup before and after eviction tests. **Special notes for your reviewer**: I tested to make sure this doesn't break Cgroup Manager tests. It should work on both cgroupfs and systemd based systems, although I have only tested in on cgroupfs. **Release note**: ```release-note NONE ``` /assign @yujuhong @Random-Liu /sig node /priority important-soon /kind bug its getting a little late in the release cycle, so we can probably wait until after code freeze is lifted for this.pull/8/head
commit
3bc1a0a1d0
|
@ -34,6 +34,7 @@ go_library(
|
|||
"//pkg/kubelet/apis/kubeletconfig/scheme:go_default_library",
|
||||
"//pkg/kubelet/apis/kubeletconfig/v1beta1:go_default_library",
|
||||
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
|
||||
"//pkg/kubelet/cm:go_default_library",
|
||||
"//pkg/kubelet/cm/devicemanager:go_default_library",
|
||||
"//pkg/kubelet/metrics:go_default_library",
|
||||
"//pkg/kubelet/remote:go_default_library",
|
||||
|
|
|
@ -131,8 +131,8 @@ var _ = framework.KubeDescribe("MemoryAllocatableEviction [Slow] [Serial] [Disru
|
|||
// Set large system and kube reserved values to trigger allocatable thresholds far before hard eviction thresholds.
|
||||
kubeReserved := getNodeCPUAndMemoryCapacity(f)[v1.ResourceMemory]
|
||||
// The default hard eviction threshold is 250Mb, so Allocatable = Capacity - Reserved - 250Mb
|
||||
// We want Allocatable = 150Mb, so set Reserved = Capacity - Allocatable - 250Mb = Capacity - 400Mb
|
||||
kubeReserved.Sub(resource.MustParse("400Mi"))
|
||||
// We want Allocatable = 50Mb, so set Reserved = Capacity - Allocatable - 250Mb = Capacity - 300Mb
|
||||
kubeReserved.Sub(resource.MustParse("300Mi"))
|
||||
initialConfig.KubeReserved = map[string]string{
|
||||
string(v1.ResourceMemory): kubeReserved.String(),
|
||||
}
|
||||
|
@ -380,6 +380,8 @@ func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expe
|
|||
// Place the remainder of the test within a context so that the kubelet config is set before and after the test.
|
||||
Context("", func() {
|
||||
BeforeEach(func() {
|
||||
// reduce memory usage in the allocatable cgroup to ensure we do not have MemoryPressure
|
||||
reduceAllocatableMemoryUsage()
|
||||
// Nodes do not immediately report local storage capacity
|
||||
// Sleep so that pods requesting local storage do not fail to schedule
|
||||
time.Sleep(30 * time.Second)
|
||||
|
@ -447,6 +449,7 @@ func runEvictionTest(f *framework.Framework, pressureTimeout time.Duration, expe
|
|||
By(fmt.Sprintf("deleting pod: %s", spec.pod.Name))
|
||||
f.PodClient().DeleteSync(spec.pod.Name, &metav1.DeleteOptions{}, 10*time.Minute)
|
||||
}
|
||||
reduceAllocatableMemoryUsage()
|
||||
if expectedNodeCondition == v1.NodeDiskPressure && framework.TestContext.PrepullImages {
|
||||
// The disk eviction test may cause the prepulled images to be evicted,
|
||||
// prepull those images again to ensure this test not affect following tests.
|
||||
|
@ -607,7 +610,12 @@ func logMemoryMetrics() {
|
|||
return
|
||||
}
|
||||
if summary.Node.Memory != nil && summary.Node.Memory.WorkingSetBytes != nil && summary.Node.Memory.AvailableBytes != nil {
|
||||
framework.Logf("Node.Memory.WorkingSetBytes: %d, summary.Node.Memory.AvailableBytes: %d", *summary.Node.Memory.WorkingSetBytes, *summary.Node.Memory.AvailableBytes)
|
||||
framework.Logf("Node.Memory.WorkingSetBytes: %d, Node.Memory.AvailableBytes: %d", *summary.Node.Memory.WorkingSetBytes, *summary.Node.Memory.AvailableBytes)
|
||||
}
|
||||
for _, sysContainer := range summary.Node.SystemContainers {
|
||||
if sysContainer.Name == stats.SystemContainerPods && sysContainer.Memory != nil && sysContainer.Memory.WorkingSetBytes != nil && sysContainer.Memory.AvailableBytes != nil {
|
||||
framework.Logf("Allocatable.Memory.WorkingSetBytes: %d, Allocatable.Memory.AvailableBytes: %d", *sysContainer.Memory.WorkingSetBytes, *sysContainer.Memory.AvailableBytes)
|
||||
}
|
||||
}
|
||||
for _, pod := range summary.Pods {
|
||||
framework.Logf("Pod: %s", pod.PodRef.Name)
|
||||
|
|
|
@ -54,8 +54,6 @@ func getResourceRequirements(requests, limits v1.ResourceList) v1.ResourceRequir
|
|||
}
|
||||
|
||||
const (
|
||||
// Kubelet internal cgroup name for node allocatable cgroup.
|
||||
defaultNodeAllocatableCgroup = "kubepods"
|
||||
// Kubelet internal cgroup name for burstable tier
|
||||
burstableCgroup = "burstable"
|
||||
// Kubelet internal cgroup name for besteffort tier
|
||||
|
@ -68,12 +66,7 @@ func makePodToVerifyCgroups(cgroupNames []cm.CgroupName) *v1.Pod {
|
|||
cgroupFsNames := []string{}
|
||||
for _, cgroupName := range cgroupNames {
|
||||
// Add top level cgroup used to enforce node allocatable.
|
||||
cgroupName = cm.CgroupName(path.Join(defaultNodeAllocatableCgroup, string(cgroupName)))
|
||||
if framework.TestContext.KubeletConfig.CgroupDriver == "systemd" {
|
||||
cgroupFsNames = append(cgroupFsNames, cm.ConvertCgroupNameToSystemd(cgroupName, true))
|
||||
} else {
|
||||
cgroupFsNames = append(cgroupFsNames, string(cgroupName))
|
||||
}
|
||||
cgroupFsNames = append(cgroupFsNames, toCgroupFsName(path.Join(defaultNodeAllocatableCgroup, string(cgroupName))))
|
||||
}
|
||||
glog.Infof("expecting %v cgroups to be found", cgroupFsNames)
|
||||
// build the pod command to either verify cgroups exist
|
||||
|
@ -117,10 +110,7 @@ func makePodToVerifyCgroups(cgroupNames []cm.CgroupName) *v1.Pod {
|
|||
|
||||
// makePodToVerifyCgroupRemoved verfies the specified cgroup does not exist.
|
||||
func makePodToVerifyCgroupRemoved(cgroupName cm.CgroupName) *v1.Pod {
|
||||
cgroupFsName := string(cgroupName)
|
||||
if framework.TestContext.KubeletConfig.CgroupDriver == "systemd" {
|
||||
cgroupFsName = cm.ConvertCgroupNameToSystemd(cm.CgroupName(cgroupName), true)
|
||||
}
|
||||
cgroupFsName := toCgroupFsName(string(cgroupName))
|
||||
pod := &v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "pod" + string(uuid.NewUUID()),
|
||||
|
|
|
@ -41,6 +41,7 @@ import (
|
|||
kubeletscheme "k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig/scheme"
|
||||
kubeletconfigv1beta1 "k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig/v1beta1"
|
||||
stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm"
|
||||
kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
"k8s.io/kubernetes/pkg/kubelet/remote"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
|
@ -57,6 +58,9 @@ var startServices = flag.Bool("start-services", true, "If true, start local node
|
|||
var stopServices = flag.Bool("stop-services", true, "If true, stop local node services after running tests")
|
||||
var busyboxImage = "busybox"
|
||||
|
||||
// Kubelet internal cgroup name for node allocatable cgroup.
|
||||
const defaultNodeAllocatableCgroup = "kubepods"
|
||||
|
||||
func getNodeSummary() (*stats.Summary, error) {
|
||||
req, err := http.NewRequest("GET", *kubeletAddress+"/stats/summary", nil)
|
||||
if err != nil {
|
||||
|
@ -407,3 +411,19 @@ func restartKubelet() {
|
|||
stdout, err = exec.Command("sudo", "systemctl", "restart", kube).CombinedOutput()
|
||||
framework.ExpectNoError(err, "Failed to restart kubelet with systemctl: %v, %v", err, stdout)
|
||||
}
|
||||
|
||||
func toCgroupFsName(cgroup string) string {
|
||||
if framework.TestContext.KubeletConfig.CgroupDriver == "systemd" {
|
||||
return cm.ConvertCgroupNameToSystemd(cm.CgroupName(cgroup), true)
|
||||
}
|
||||
return cgroup
|
||||
}
|
||||
|
||||
// reduceAllocatableMemoryUsage uses memory.force_empty (https://lwn.net/Articles/432224/)
|
||||
// to make the kernel reclaim memory in the allocatable cgroup
|
||||
// the time to reduce pressure may be unbounded, but usually finishes within a second
|
||||
func reduceAllocatableMemoryUsage() {
|
||||
cmd := fmt.Sprintf("echo 0 > /sys/fs/cgroup/memory/%s/memory.force_empty", toCgroupFsName(defaultNodeAllocatableCgroup))
|
||||
_, err := exec.Command("sudo", "sh", "-c", cmd).CombinedOutput()
|
||||
framework.ExpectNoError(err)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue