Merge pull request #44520 from dashpole/test_eviction_fix

Automatic merge from submit-queue (batch tested with PRs 44520, 45253, 45838, 44685, 45901)

Ensure ordering of using dynamic kubelet config and setting up tests.

This PR simply places the body of the eviction test within its own context.  This ensures that the kubelet config is set before the pods are created, and that the kubelet config is reverted only after the pods are deleted.
pull/6/head
Kubernetes Submit Queue 2017-05-16 21:27:54 -07:00 committed by GitHub
commit 85775105f1
2 changed files with 157 additions and 151 deletions

View File

@ -65,16 +65,22 @@ var _ = framework.KubeDescribe("AllocatableEviction [Slow] [Serial] [Disruptive]
}
evictionTestTimeout := 40 * time.Minute
testCondition := "Memory Pressure"
kubeletConfigUpdate := func(initialConfig *componentconfig.KubeletConfiguration) {
initialConfig.EvictionHard = "memory.available<10%"
// Set large system and kube reserved values to trigger allocatable thresholds far before hard eviction thresholds.
initialConfig.SystemReserved = componentconfig.ConfigurationMap(map[string]string{"memory": "1Gi"})
initialConfig.KubeReserved = componentconfig.ConfigurationMap(map[string]string{"memory": "1Gi"})
initialConfig.EnforceNodeAllocatable = []string{cm.NodeAllocatableEnforcementKey}
initialConfig.ExperimentalNodeAllocatableIgnoreEvictionThreshold = false
initialConfig.CgroupsPerQOS = true
}
runEvictionTest(f, testCondition, podTestSpecs, evictionTestTimeout, hasMemoryPressure, kubeletConfigUpdate)
Context(fmt.Sprintf("when we run containers that should cause %s", testCondition), func() {
tempSetCurrentKubeletConfig(f, func(initialConfig *componentconfig.KubeletConfiguration) {
initialConfig.EvictionHard = "memory.available<10%"
// Set large system and kube reserved values to trigger allocatable thresholds far before hard eviction thresholds.
initialConfig.SystemReserved = componentconfig.ConfigurationMap(map[string]string{"memory": "1Gi"})
initialConfig.KubeReserved = componentconfig.ConfigurationMap(map[string]string{"memory": "1Gi"})
initialConfig.EnforceNodeAllocatable = []string{cm.NodeAllocatableEnforcementKey}
initialConfig.ExperimentalNodeAllocatableIgnoreEvictionThreshold = false
initialConfig.CgroupsPerQOS = true
})
// Place the remainder of the test within a context so that the kubelet config is set before and after the test.
Context("With kubeconfig updated", func() {
runEvictionTest(f, testCondition, podTestSpecs, evictionTestTimeout, hasMemoryPressure)
})
})
})
// Returns TRUE if the node has Memory Pressure, FALSE otherwise

View File

@ -114,11 +114,16 @@ var _ = framework.KubeDescribe("InodeEviction [Slow] [Serial] [Disruptive] [Flak
}
evictionTestTimeout := 30 * time.Minute
testCondition := "Disk Pressure due to Inodes"
kubeletConfigUpdate := func(initialConfig *componentconfig.KubeletConfiguration) {
initialConfig.EvictionHard = "nodefs.inodesFree<50%"
}
runEvictionTest(f, testCondition, podTestSpecs, evictionTestTimeout, hasInodePressure, kubeletConfigUpdate)
Context(fmt.Sprintf("when we run containers that should cause %s", testCondition), func() {
tempSetCurrentKubeletConfig(f, func(initialConfig *componentconfig.KubeletConfiguration) {
initialConfig.EvictionHard = "nodefs.inodesFree<50%"
})
// Place the remainder of the test within a context so that the kubelet config is set before and after the test.
Context("With kubeconfig updated", func() {
runEvictionTest(f, testCondition, podTestSpecs, evictionTestTimeout, hasInodePressure)
})
})
})
// Struct used by runEvictionTest that specifies the pod, and when that pod should be evicted, relative to other pods
@ -137,171 +142,166 @@ type podTestSpec struct {
// It ensures that all lower evictionPriority pods are eventually evicted.
// runEvictionTest then cleans up the testing environment by deleting provided nodes, and ensures that testCondition no longer exists
func runEvictionTest(f *framework.Framework, testCondition string, podTestSpecs []podTestSpec, evictionTestTimeout time.Duration,
hasPressureCondition func(*framework.Framework, string) (bool, error), updateFunction func(initialConfig *componentconfig.KubeletConfiguration)) {
hasPressureCondition func(*framework.Framework, string) (bool, error)) {
BeforeEach(func() {
By("seting up pods to be used by tests")
for _, spec := range podTestSpecs {
By(fmt.Sprintf("creating pod with container: %s", spec.pod.Name))
f.PodClient().CreateSync(&spec.pod)
}
})
Context(fmt.Sprintf("when we run containers that should cause %s", testCondition), func() {
tempSetCurrentKubeletConfig(f, updateFunction)
BeforeEach(func() {
By("seting up pods to be used by tests")
for _, spec := range podTestSpecs {
By(fmt.Sprintf("creating pod with container: %s", spec.pod.Name))
f.PodClient().CreateSync(&spec.pod)
It(fmt.Sprintf("should eventually see %s, and then evict all of the correct pods", testCondition), func() {
configEnabled, err := isKubeletConfigEnabled(f)
framework.ExpectNoError(err)
if !configEnabled {
framework.Skipf("Dynamic kubelet config must be enabled for this test to run.")
}
Eventually(func() error {
hasPressure, err := hasPressureCondition(f, testCondition)
if err != nil {
return err
}
})
It(fmt.Sprintf("should eventually see %s, and then evict all of the correct pods", testCondition), func() {
configEnabled, err := isKubeletConfigEnabled(f)
framework.ExpectNoError(err)
if !configEnabled {
framework.Skipf("Dynamic kubelet config must be enabled for this test to run.")
if hasPressure {
return nil
}
Eventually(func() error {
hasPressure, err := hasPressureCondition(f, testCondition)
if err != nil {
return err
}
if hasPressure {
return nil
}
return fmt.Errorf("Condition: %s not encountered", testCondition)
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
return fmt.Errorf("Condition: %s not encountered", testCondition)
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
Eventually(func() error {
// Gather current information
updatedPodList, err := f.ClientSet.Core().Pods(f.Namespace.Name).List(metav1.ListOptions{})
updatedPods := updatedPodList.Items
Eventually(func() error {
// Gather current information
updatedPodList, err := f.ClientSet.Core().Pods(f.Namespace.Name).List(metav1.ListOptions{})
updatedPods := updatedPodList.Items
for _, p := range updatedPods {
framework.Logf("fetching pod %s; phase= %v", p.Name, p.Status.Phase)
}
_, err = hasPressureCondition(f, testCondition)
if err != nil {
return err
}
By("checking eviction ordering and ensuring important pods dont fail")
done := true
for _, priorityPodSpec := range podTestSpecs {
var priorityPod v1.Pod
for _, p := range updatedPods {
framework.Logf("fetching pod %s; phase= %v", p.Name, p.Status.Phase)
}
_, err = hasPressureCondition(f, testCondition)
if err != nil {
return err
if p.Name == priorityPodSpec.pod.Name {
priorityPod = p
}
}
Expect(priorityPod).NotTo(BeNil())
By("checking eviction ordering and ensuring important pods dont fail")
done := true
for _, priorityPodSpec := range podTestSpecs {
var priorityPod v1.Pod
// Check eviction ordering.
// Note: it is alright for a priority 1 and priority 2 pod (for example) to fail in the same round
for _, lowPriorityPodSpec := range podTestSpecs {
var lowPriorityPod v1.Pod
for _, p := range updatedPods {
if p.Name == priorityPodSpec.pod.Name {
priorityPod = p
if p.Name == lowPriorityPodSpec.pod.Name {
lowPriorityPod = p
}
}
Expect(priorityPod).NotTo(BeNil())
// Check eviction ordering.
// Note: it is alright for a priority 1 and priority 2 pod (for example) to fail in the same round
for _, lowPriorityPodSpec := range podTestSpecs {
var lowPriorityPod v1.Pod
for _, p := range updatedPods {
if p.Name == lowPriorityPodSpec.pod.Name {
lowPriorityPod = p
}
}
Expect(lowPriorityPod).NotTo(BeNil())
if priorityPodSpec.evictionPriority < lowPriorityPodSpec.evictionPriority && lowPriorityPod.Status.Phase == v1.PodRunning {
Expect(priorityPod.Status.Phase).NotTo(Equal(v1.PodFailed),
fmt.Sprintf("%s pod failed before %s pod", priorityPodSpec.pod.Name, lowPriorityPodSpec.pod.Name))
}
}
// EvictionPriority 0 pods should not fail
if priorityPodSpec.evictionPriority == 0 {
Expect(lowPriorityPod).NotTo(BeNil())
if priorityPodSpec.evictionPriority < lowPriorityPodSpec.evictionPriority && lowPriorityPod.Status.Phase == v1.PodRunning {
Expect(priorityPod.Status.Phase).NotTo(Equal(v1.PodFailed),
fmt.Sprintf("%s pod failed (and shouldn't have failed)", priorityPod.Name))
}
// If a pod that is not evictionPriority 0 has not been evicted, we are not done
if priorityPodSpec.evictionPriority != 0 && priorityPod.Status.Phase != v1.PodFailed {
done = false
fmt.Sprintf("%s pod failed before %s pod", priorityPodSpec.pod.Name, lowPriorityPodSpec.pod.Name))
}
}
if done {
return nil
}
return fmt.Errorf("pods that caused %s have not been evicted.", testCondition)
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
// We observe pressure from the API server. The eviction manager observes pressure from the kubelet internal stats.
// This means the eviction manager will observe pressure before we will, creating a delay between when the eviction manager
// evicts a pod, and when we observe the pressure by querrying the API server. Add a delay here to account for this delay
By("making sure pressure from test has surfaced before continuing")
time.Sleep(pressureDelay)
// EvictionPriority 0 pods should not fail
if priorityPodSpec.evictionPriority == 0 {
Expect(priorityPod.Status.Phase).NotTo(Equal(v1.PodFailed),
fmt.Sprintf("%s pod failed (and shouldn't have failed)", priorityPod.Name))
}
By("making sure conditions eventually return to normal")
Eventually(func() error {
hasPressure, err := hasPressureCondition(f, testCondition)
if err != nil {
return err
}
if hasPressure {
return fmt.Errorf("Conditions havent returned to normal, we still have %s", testCondition)
// If a pod that is not evictionPriority 0 has not been evicted, we are not done
if priorityPodSpec.evictionPriority != 0 && priorityPod.Status.Phase != v1.PodFailed {
done = false
}
}
if done {
return nil
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
}
return fmt.Errorf("pods that caused %s have not been evicted.", testCondition)
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
By("making sure conditions do not return, and that pods that shouldnt fail dont fail")
Consistently(func() error {
hasPressure, err := hasPressureCondition(f, testCondition)
if err != nil {
// Race conditions sometimes occur when checking pressure condition due to #38710 (Docker bug)
// Do not fail the test when this occurs, since this is expected to happen occasionally.
framework.Logf("Failed to check pressure condition. Error: %v", err)
return nil
}
if hasPressure {
return fmt.Errorf("%s dissappeared and then reappeared", testCondition)
}
// Gather current information
updatedPodList, _ := f.ClientSet.Core().Pods(f.Namespace.Name).List(metav1.ListOptions{})
for _, priorityPodSpec := range podTestSpecs {
// EvictionPriority 0 pods should not fail
if priorityPodSpec.evictionPriority == 0 {
for _, p := range updatedPodList.Items {
if p.Name == priorityPodSpec.pod.Name && p.Status.Phase == v1.PodFailed {
return fmt.Errorf("%s pod failed (delayed) and shouldn't have failed", p.Name)
}
// We observe pressure from the API server. The eviction manager observes pressure from the kubelet internal stats.
// This means the eviction manager will observe pressure before we will, creating a delay between when the eviction manager
// evicts a pod, and when we observe the pressure by querrying the API server. Add a delay here to account for this delay
By("making sure pressure from test has surfaced before continuing")
time.Sleep(pressureDelay)
By("making sure conditions eventually return to normal")
Eventually(func() error {
hasPressure, err := hasPressureCondition(f, testCondition)
if err != nil {
return err
}
if hasPressure {
return fmt.Errorf("Conditions havent returned to normal, we still have %s", testCondition)
}
return nil
}, evictionTestTimeout, evictionPollInterval).Should(BeNil())
By("making sure conditions do not return, and that pods that shouldnt fail dont fail")
Consistently(func() error {
hasPressure, err := hasPressureCondition(f, testCondition)
if err != nil {
// Race conditions sometimes occur when checking pressure condition due to #38710 (Docker bug)
// Do not fail the test when this occurs, since this is expected to happen occasionally.
framework.Logf("Failed to check pressure condition. Error: %v", err)
return nil
}
if hasPressure {
return fmt.Errorf("%s dissappeared and then reappeared", testCondition)
}
// Gather current information
updatedPodList, _ := f.ClientSet.Core().Pods(f.Namespace.Name).List(metav1.ListOptions{})
for _, priorityPodSpec := range podTestSpecs {
// EvictionPriority 0 pods should not fail
if priorityPodSpec.evictionPriority == 0 {
for _, p := range updatedPodList.Items {
if p.Name == priorityPodSpec.pod.Name && p.Status.Phase == v1.PodFailed {
return fmt.Errorf("%s pod failed (delayed) and shouldn't have failed", p.Name)
}
}
}
return nil
}, postTestConditionMonitoringPeriod, evictionPollInterval).Should(BeNil())
}
return nil
}, postTestConditionMonitoringPeriod, evictionPollInterval).Should(BeNil())
By("making sure we can start a new pod after the test")
podName := "test-admit-pod"
f.PodClient().CreateSync(&v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: podName,
},
Spec: v1.PodSpec{
RestartPolicy: v1.RestartPolicyNever,
Containers: []v1.Container{
{
Image: framework.GetPauseImageNameForHostArch(),
Name: podName,
},
By("making sure we can start a new pod after the test")
podName := "test-admit-pod"
f.PodClient().CreateSync(&v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: podName,
},
Spec: v1.PodSpec{
RestartPolicy: v1.RestartPolicyNever,
Containers: []v1.Container{
{
Image: framework.GetPauseImageNameForHostArch(),
Name: podName,
},
},
})
},
})
})
AfterEach(func() {
By("deleting pods")
for _, spec := range podTestSpecs {
By(fmt.Sprintf("deleting pod: %s", spec.pod.Name))
f.PodClient().DeleteSync(spec.pod.Name, &metav1.DeleteOptions{}, framework.DefaultPodDeletionTimeout)
}
AfterEach(func() {
By("deleting pods")
for _, spec := range podTestSpecs {
By(fmt.Sprintf("deleting pod: %s", spec.pod.Name))
f.PodClient().DeleteSync(spec.pod.Name, &metav1.DeleteOptions{}, framework.DefaultPodDeletionTimeout)
}
if CurrentGinkgoTestDescription().Failed {
if framework.TestContext.DumpLogsOnFailure {
logPodEvents(f)
logNodeEvents(f)
}
By("sleeping to allow for cleanup of test")
time.Sleep(postTestConditionMonitoringPeriod)
if CurrentGinkgoTestDescription().Failed {
if framework.TestContext.DumpLogsOnFailure {
logPodEvents(f)
logNodeEvents(f)
}
})
By("sleeping to allow for cleanup of test")
time.Sleep(postTestConditionMonitoringPeriod)
}
})
}