mirror of https://github.com/k3s-io/k3s
Merge pull request #44520 from dashpole/test_eviction_fix
Automatic merge from submit-queue (batch tested with PRs 44520, 45253, 45838, 44685, 45901) Ensure ordering of using dynamic kubelet config and setting up tests. This PR simply places the body of the eviction test within its own context. This ensures that the kubelet config is set before the pods are created, and that the kubelet config is reverted only after the pods are deleted.pull/6/head
commit
85775105f1
|
@ -65,16 +65,22 @@ var _ = framework.KubeDescribe("AllocatableEviction [Slow] [Serial] [Disruptive]
|
|||
}
|
||||
evictionTestTimeout := 40 * time.Minute
|
||||
testCondition := "Memory Pressure"
|
||||
kubeletConfigUpdate := func(initialConfig *componentconfig.KubeletConfiguration) {
|
||||
initialConfig.EvictionHard = "memory.available<10%"
|
||||
// Set large system and kube reserved values to trigger allocatable thresholds far before hard eviction thresholds.
|
||||
initialConfig.SystemReserved = componentconfig.ConfigurationMap(map[string]string{"memory": "1Gi"})
|
||||
initialConfig.KubeReserved = componentconfig.ConfigurationMap(map[string]string{"memory": "1Gi"})
|
||||
initialConfig.EnforceNodeAllocatable = []string{cm.NodeAllocatableEnforcementKey}
|
||||
initialConfig.ExperimentalNodeAllocatableIgnoreEvictionThreshold = false
|
||||
initialConfig.CgroupsPerQOS = true
|
||||
}
|
||||
runEvictionTest(f, testCondition, podTestSpecs, evictionTestTimeout, hasMemoryPressure, kubeletConfigUpdate)
|
||||
|
||||
Context(fmt.Sprintf("when we run containers that should cause %s", testCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *componentconfig.KubeletConfiguration) {
|
||||
initialConfig.EvictionHard = "memory.available<10%"
|
||||
// Set large system and kube reserved values to trigger allocatable thresholds far before hard eviction thresholds.
|
||||
initialConfig.SystemReserved = componentconfig.ConfigurationMap(map[string]string{"memory": "1Gi"})
|
||||
initialConfig.KubeReserved = componentconfig.ConfigurationMap(map[string]string{"memory": "1Gi"})
|
||||
initialConfig.EnforceNodeAllocatable = []string{cm.NodeAllocatableEnforcementKey}
|
||||
initialConfig.ExperimentalNodeAllocatableIgnoreEvictionThreshold = false
|
||||
initialConfig.CgroupsPerQOS = true
|
||||
})
|
||||
// Place the remainder of the test within a context so that the kubelet config is set before and after the test.
|
||||
Context("With kubeconfig updated", func() {
|
||||
runEvictionTest(f, testCondition, podTestSpecs, evictionTestTimeout, hasMemoryPressure)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// Returns TRUE if the node has Memory Pressure, FALSE otherwise
|
||||
|
|
|
@ -114,11 +114,16 @@ var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive] [Flak
|
|||
}
|
||||
evictionTestTimeout := 30 * time.Minute
|
||||
testCondition := "Disk Pressure due to Inodes"
|
||||
kubeletConfigUpdate := func(initialConfig *componentconfig.KubeletConfiguration) {
|
||||
initialConfig.EvictionHard = "nodefs.inodesFree<50%"
|
||||
}
|
||||
|
||||
runEvictionTest(f, testCondition, podTestSpecs, evictionTestTimeout, hasInodePressure, kubeletConfigUpdate)
|
||||
Context(fmt.Sprintf("when we run containers that should cause %s", testCondition), func() {
|
||||
tempSetCurrentKubeletConfig(f, func(initialConfig *componentconfig.KubeletConfiguration) {
|
||||
initialConfig.EvictionHard = "nodefs.inodesFree<50%"
|
||||
})
|
||||
// Place the remainder of the test within a context so that the kubelet config is set before and after the test.
|
||||
Context("With kubeconfig updated", func() {
|
||||
runEvictionTest(f, testCondition, podTestSpecs, evictionTestTimeout, hasInodePressure)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// Struct used by runEvictionTest that specifies the pod, and when that pod should be evicted, relative to other pods
|
||||
|
@ -137,171 +142,166 @@ type podTestSpec struct {
|
|||
// It ensures that all lower evictionPriority pods are eventually evicted.
|
||||
// runEvictionTest then cleans up the testing environment by deleting provided nodes, and ensures that testCondition no longer exists
|
||||
func runEvictionTest(f *framework.Framework, testCondition string, podTestSpecs []podTestSpec, evictionTestTimeout time.Duration,
|
||||
hasPressureCondition func(*framework.Framework, string) (bool, error), updateFunction func(initialConfig *componentconfig.KubeletConfiguration)) {
|
||||
hasPressureCondition func(*framework.Framework, string) (bool, error)) {
|
||||
BeforeEach(func() {
|
||||
By("seting up pods to be used by tests")
|
||||
for _, spec := range podTestSpecs {
|
||||
By(fmt.Sprintf("creating pod with container: %s", spec.pod.Name))
|
||||
f.PodClient().CreateSync(&spec.pod)
|
||||
}
|
||||
})
|
||||
|
||||
Context(fmt.Sprintf("when we run containers that should cause %s", testCondition), func() {
|
||||
|
||||
tempSetCurrentKubeletConfig(f, updateFunction)
|
||||
BeforeEach(func() {
|
||||
By("seting up pods to be used by tests")
|
||||
for _, spec := range podTestSpecs {
|
||||
By(fmt.Sprintf("creating pod with container: %s", spec.pod.Name))
|
||||
f.PodClient().CreateSync(&spec.pod)
|
||||
It(fmt.Sprintf("should eventually see %s, and then evict all of the correct pods", testCondition), func() {
|
||||
configEnabled, err := isKubeletConfigEnabled(f)
|
||||
framework.ExpectNoError(err)
|
||||
if !configEnabled {
|
||||
framework.Skipf("Dynamic kubelet config must be enabled for this test to run.")
|
||||
}
|
||||
Eventually(func() error {
|
||||
hasPressure, err := hasPressureCondition(f, testCondition)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
})
|
||||
|
||||
It(fmt.Sprintf("should eventually see %s, and then evict all of the correct pods", testCondition), func() {
|
||||
configEnabled, err := isKubeletConfigEnabled(f)
|
||||
framework.ExpectNoError(err)
|
||||
if !configEnabled {
|
||||
framework.Skipf("Dynamic kubelet config must be enabled for this test to run.")
|
||||
if hasPressure {
|
||||
return nil
|
||||
}
|
||||
Eventually(func() error {
|
||||
hasPressure, err := hasPressureCondition(f, testCondition)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if hasPressure {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("Condition: %s not encountered", testCondition)
|
||||
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
|
||||
return fmt.Errorf("Condition: %s not encountered", testCondition)
|
||||
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
|
||||
|
||||
Eventually(func() error {
|
||||
// Gather current information
|
||||
updatedPodList, err := f.ClientSet.Core().Pods(f.Namespace.Name).List(metav1.ListOptions{})
|
||||
updatedPods := updatedPodList.Items
|
||||
Eventually(func() error {
|
||||
// Gather current information
|
||||
updatedPodList, err := f.ClientSet.Core().Pods(f.Namespace.Name).List(metav1.ListOptions{})
|
||||
updatedPods := updatedPodList.Items
|
||||
for _, p := range updatedPods {
|
||||
framework.Logf("fetching pod %s; phase= %v", p.Name, p.Status.Phase)
|
||||
}
|
||||
_, err = hasPressureCondition(f, testCondition)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
By("checking eviction ordering and ensuring important pods dont fail")
|
||||
done := true
|
||||
for _, priorityPodSpec := range podTestSpecs {
|
||||
var priorityPod v1.Pod
|
||||
for _, p := range updatedPods {
|
||||
framework.Logf("fetching pod %s; phase= %v", p.Name, p.Status.Phase)
|
||||
}
|
||||
_, err = hasPressureCondition(f, testCondition)
|
||||
if err != nil {
|
||||
return err
|
||||
if p.Name == priorityPodSpec.pod.Name {
|
||||
priorityPod = p
|
||||
}
|
||||
}
|
||||
Expect(priorityPod).NotTo(BeNil())
|
||||
|
||||
By("checking eviction ordering and ensuring important pods dont fail")
|
||||
done := true
|
||||
for _, priorityPodSpec := range podTestSpecs {
|
||||
var priorityPod v1.Pod
|
||||
// Check eviction ordering.
|
||||
// Note: it is alright for a priority 1 and priority 2 pod (for example) to fail in the same round
|
||||
for _, lowPriorityPodSpec := range podTestSpecs {
|
||||
var lowPriorityPod v1.Pod
|
||||
for _, p := range updatedPods {
|
||||
if p.Name == priorityPodSpec.pod.Name {
|
||||
priorityPod = p
|
||||
if p.Name == lowPriorityPodSpec.pod.Name {
|
||||
lowPriorityPod = p
|
||||
}
|
||||
}
|
||||
Expect(priorityPod).NotTo(BeNil())
|
||||
|
||||
// Check eviction ordering.
|
||||
// Note: it is alright for a priority 1 and priority 2 pod (for example) to fail in the same round
|
||||
for _, lowPriorityPodSpec := range podTestSpecs {
|
||||
var lowPriorityPod v1.Pod
|
||||
for _, p := range updatedPods {
|
||||
if p.Name == lowPriorityPodSpec.pod.Name {
|
||||
lowPriorityPod = p
|
||||
}
|
||||
}
|
||||
Expect(lowPriorityPod).NotTo(BeNil())
|
||||
if priorityPodSpec.evictionPriority < lowPriorityPodSpec.evictionPriority && lowPriorityPod.Status.Phase == v1.PodRunning {
|
||||
Expect(priorityPod.Status.Phase).NotTo(Equal(v1.PodFailed),
|
||||
fmt.Sprintf("%s pod failed before %s pod", priorityPodSpec.pod.Name, lowPriorityPodSpec.pod.Name))
|
||||
}
|
||||
}
|
||||
|
||||
// EvictionPriority 0 pods should not fail
|
||||
if priorityPodSpec.evictionPriority == 0 {
|
||||
Expect(lowPriorityPod).NotTo(BeNil())
|
||||
if priorityPodSpec.evictionPriority < lowPriorityPodSpec.evictionPriority && lowPriorityPod.Status.Phase == v1.PodRunning {
|
||||
Expect(priorityPod.Status.Phase).NotTo(Equal(v1.PodFailed),
|
||||
fmt.Sprintf("%s pod failed (and shouldn't have failed)", priorityPod.Name))
|
||||
}
|
||||
|
||||
// If a pod that is not evictionPriority 0 has not been evicted, we are not done
|
||||
if priorityPodSpec.evictionPriority != 0 && priorityPod.Status.Phase != v1.PodFailed {
|
||||
done = false
|
||||
fmt.Sprintf("%s pod failed before %s pod", priorityPodSpec.pod.Name, lowPriorityPodSpec.pod.Name))
|
||||
}
|
||||
}
|
||||
if done {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("pods that caused %s have not been evicted.", testCondition)
|
||||
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
|
||||
|
||||
// We observe pressure from the API server. The eviction manager observes pressure from the kubelet internal stats.
|
||||
// This means the eviction manager will observe pressure before we will, creating a delay between when the eviction manager
|
||||
// evicts a pod, and when we observe the pressure by querrying the API server. Add a delay here to account for this delay
|
||||
By("making sure pressure from test has surfaced before continuing")
|
||||
time.Sleep(pressureDelay)
|
||||
// EvictionPriority 0 pods should not fail
|
||||
if priorityPodSpec.evictionPriority == 0 {
|
||||
Expect(priorityPod.Status.Phase).NotTo(Equal(v1.PodFailed),
|
||||
fmt.Sprintf("%s pod failed (and shouldn't have failed)", priorityPod.Name))
|
||||
}
|
||||
|
||||
By("making sure conditions eventually return to normal")
|
||||
Eventually(func() error {
|
||||
hasPressure, err := hasPressureCondition(f, testCondition)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if hasPressure {
|
||||
return fmt.Errorf("Conditions havent returned to normal, we still have %s", testCondition)
|
||||
// If a pod that is not evictionPriority 0 has not been evicted, we are not done
|
||||
if priorityPodSpec.evictionPriority != 0 && priorityPod.Status.Phase != v1.PodFailed {
|
||||
done = false
|
||||
}
|
||||
}
|
||||
if done {
|
||||
return nil
|
||||
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
|
||||
}
|
||||
return fmt.Errorf("pods that caused %s have not been evicted.", testCondition)
|
||||
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
|
||||
|
||||
By("making sure conditions do not return, and that pods that shouldnt fail dont fail")
|
||||
Consistently(func() error {
|
||||
hasPressure, err := hasPressureCondition(f, testCondition)
|
||||
if err != nil {
|
||||
// Race conditions sometimes occur when checking pressure condition due to #38710 (Docker bug)
|
||||
// Do not fail the test when this occurs, since this is expected to happen occasionally.
|
||||
framework.Logf("Failed to check pressure condition. Error: %v", err)
|
||||
return nil
|
||||
}
|
||||
if hasPressure {
|
||||
return fmt.Errorf("%s dissappeared and then reappeared", testCondition)
|
||||
}
|
||||
// Gather current information
|
||||
updatedPodList, _ := f.ClientSet.Core().Pods(f.Namespace.Name).List(metav1.ListOptions{})
|
||||
for _, priorityPodSpec := range podTestSpecs {
|
||||
// EvictionPriority 0 pods should not fail
|
||||
if priorityPodSpec.evictionPriority == 0 {
|
||||
for _, p := range updatedPodList.Items {
|
||||
if p.Name == priorityPodSpec.pod.Name && p.Status.Phase == v1.PodFailed {
|
||||
return fmt.Errorf("%s pod failed (delayed) and shouldn't have failed", p.Name)
|
||||
}
|
||||
// We observe pressure from the API server. The eviction manager observes pressure from the kubelet internal stats.
|
||||
// This means the eviction manager will observe pressure before we will, creating a delay between when the eviction manager
|
||||
// evicts a pod, and when we observe the pressure by querrying the API server. Add a delay here to account for this delay
|
||||
By("making sure pressure from test has surfaced before continuing")
|
||||
time.Sleep(pressureDelay)
|
||||
|
||||
By("making sure conditions eventually return to normal")
|
||||
Eventually(func() error {
|
||||
hasPressure, err := hasPressureCondition(f, testCondition)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if hasPressure {
|
||||
return fmt.Errorf("Conditions havent returned to normal, we still have %s", testCondition)
|
||||
}
|
||||
return nil
|
||||
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
|
||||
|
||||
By("making sure conditions do not return, and that pods that shouldnt fail dont fail")
|
||||
Consistently(func() error {
|
||||
hasPressure, err := hasPressureCondition(f, testCondition)
|
||||
if err != nil {
|
||||
// Race conditions sometimes occur when checking pressure condition due to #38710 (Docker bug)
|
||||
// Do not fail the test when this occurs, since this is expected to happen occasionally.
|
||||
framework.Logf("Failed to check pressure condition. Error: %v", err)
|
||||
return nil
|
||||
}
|
||||
if hasPressure {
|
||||
return fmt.Errorf("%s dissappeared and then reappeared", testCondition)
|
||||
}
|
||||
// Gather current information
|
||||
updatedPodList, _ := f.ClientSet.Core().Pods(f.Namespace.Name).List(metav1.ListOptions{})
|
||||
for _, priorityPodSpec := range podTestSpecs {
|
||||
// EvictionPriority 0 pods should not fail
|
||||
if priorityPodSpec.evictionPriority == 0 {
|
||||
for _, p := range updatedPodList.Items {
|
||||
if p.Name == priorityPodSpec.pod.Name && p.Status.Phase == v1.PodFailed {
|
||||
return fmt.Errorf("%s pod failed (delayed) and shouldn't have failed", p.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}, postTestConditionMonitoringPeriod, evictionPollInterval).Should(BeNil())
|
||||
}
|
||||
return nil
|
||||
}, postTestConditionMonitoringPeriod, evictionPollInterval).Should(BeNil())
|
||||
|
||||
By("making sure we can start a new pod after the test")
|
||||
podName := "test-admit-pod"
|
||||
f.PodClient().CreateSync(&v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: podName,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: framework.GetPauseImageNameForHostArch(),
|
||||
Name: podName,
|
||||
},
|
||||
By("making sure we can start a new pod after the test")
|
||||
podName := "test-admit-pod"
|
||||
f.PodClient().CreateSync(&v1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: podName,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
RestartPolicy: v1.RestartPolicyNever,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Image: framework.GetPauseImageNameForHostArch(),
|
||||
Name: podName,
|
||||
},
|
||||
},
|
||||
})
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
AfterEach(func() {
|
||||
By("deleting pods")
|
||||
for _, spec := range podTestSpecs {
|
||||
By(fmt.Sprintf("deleting pod: %s", spec.pod.Name))
|
||||
f.PodClient().DeleteSync(spec.pod.Name, &metav1.DeleteOptions{}, framework.DefaultPodDeletionTimeout)
|
||||
}
|
||||
AfterEach(func() {
|
||||
By("deleting pods")
|
||||
for _, spec := range podTestSpecs {
|
||||
By(fmt.Sprintf("deleting pod: %s", spec.pod.Name))
|
||||
f.PodClient().DeleteSync(spec.pod.Name, &metav1.DeleteOptions{}, framework.DefaultPodDeletionTimeout)
|
||||
}
|
||||
|
||||
if CurrentGinkgoTestDescription().Failed {
|
||||
if framework.TestContext.DumpLogsOnFailure {
|
||||
logPodEvents(f)
|
||||
logNodeEvents(f)
|
||||
}
|
||||
By("sleeping to allow for cleanup of test")
|
||||
time.Sleep(postTestConditionMonitoringPeriod)
|
||||
if CurrentGinkgoTestDescription().Failed {
|
||||
if framework.TestContext.DumpLogsOnFailure {
|
||||
logPodEvents(f)
|
||||
logNodeEvents(f)
|
||||
}
|
||||
})
|
||||
By("sleeping to allow for cleanup of test")
|
||||
time.Sleep(postTestConditionMonitoringPeriod)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue