mirror of https://github.com/k3s-io/k3s
commit
535090312f
|
@ -160,6 +160,7 @@ GCE_SLOW_TESTS=(
|
|||
"monotonically\sincreasing\srestart\scount" # 1.5 to 5 min, file: pods.go, slow by design
|
||||
"Garbage\scollector\sshould" # 7 min, file: garbage_collector.go, slow by design
|
||||
"KubeProxy\sshould\stest\skube-proxy" # 9 min 30 sec, file: kubeproxy.go, issue: #14204
|
||||
"cap\sback-off\sat\sMaxContainerBackOff" # 20 mins file: manager.go, PR: #12648
|
||||
)
|
||||
|
||||
# Tests which are not able to be run in parallel.
|
||||
|
|
|
@ -88,8 +88,8 @@ const (
|
|||
// Location of container logs.
|
||||
containerLogsDir = "/var/log/containers"
|
||||
|
||||
// max backoff period
|
||||
maxContainerBackOff = 300 * time.Second
|
||||
// max backoff period, exported for the e2e test
|
||||
MaxContainerBackOff = 300 * time.Second
|
||||
|
||||
// Capacity of the channel for storing pods to kill. A small number should
|
||||
// suffice because a goroutine is dedicated to check the channel and does
|
||||
|
@ -308,7 +308,7 @@ func NewMainKubelet(
|
|||
}
|
||||
|
||||
procFs := procfs.NewProcFs()
|
||||
imageBackOff := util.NewBackOff(resyncInterval, maxContainerBackOff)
|
||||
imageBackOff := util.NewBackOff(resyncInterval, MaxContainerBackOff)
|
||||
// Initialize the runtime.
|
||||
switch containerRuntime {
|
||||
case "docker":
|
||||
|
@ -425,7 +425,7 @@ func NewMainKubelet(
|
|||
}
|
||||
}
|
||||
|
||||
klet.backOff = util.NewBackOff(resyncInterval, maxContainerBackOff)
|
||||
klet.backOff = util.NewBackOff(resyncInterval, MaxContainerBackOff)
|
||||
klet.podKillingCh = make(chan *kubecontainer.Pod, podKillingChannelCapacity)
|
||||
|
||||
klet.sourcesSeen = sets.NewString()
|
||||
|
|
256
test/e2e/pods.go
256
test/e2e/pods.go
|
@ -20,6 +20,7 @@ import (
|
|||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
@ -31,6 +32,7 @@ import (
|
|||
"k8s.io/kubernetes/pkg/api/resource"
|
||||
client "k8s.io/kubernetes/pkg/client/unversioned"
|
||||
"k8s.io/kubernetes/pkg/fields"
|
||||
"k8s.io/kubernetes/pkg/kubelet"
|
||||
"k8s.io/kubernetes/pkg/labels"
|
||||
"k8s.io/kubernetes/pkg/util"
|
||||
"k8s.io/kubernetes/pkg/util/wait"
|
||||
|
@ -44,6 +46,12 @@ const (
|
|||
defaultObservationTimeout = time.Minute * 2
|
||||
)
|
||||
|
||||
var (
|
||||
buildBackOffDuration = time.Minute
|
||||
syncLoopFrequency = 10 * time.Second
|
||||
maxBackOffTolerance = time.Duration(1.3 * float64(kubelet.MaxContainerBackOff))
|
||||
)
|
||||
|
||||
func runLivenessTest(c *client.Client, ns string, podDescr *api.Pod, expectNumRestarts int, timeout time.Duration) {
|
||||
By(fmt.Sprintf("Creating pod %s in namespace %s", podDescr.Name, ns))
|
||||
_, err := c.Pods(ns).Create(podDescr)
|
||||
|
@ -134,6 +142,70 @@ func testHostIP(c *client.Client, ns string, pod *api.Pod) {
|
|||
}
|
||||
}
|
||||
|
||||
func runPodFromStruct(framework *Framework, pod *api.Pod) {
|
||||
By("submitting the pod to kubernetes")
|
||||
|
||||
podClient := framework.Client.Pods(framework.Namespace.Name)
|
||||
pod, err := podClient.Create(pod)
|
||||
if err != nil {
|
||||
Failf("Failed to create pod: %v", err)
|
||||
}
|
||||
|
||||
expectNoError(framework.WaitForPodRunning(pod.Name))
|
||||
|
||||
By("verifying the pod is in kubernetes")
|
||||
pod, err = podClient.Get(pod.Name)
|
||||
if err != nil {
|
||||
Failf("failed to get pod: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func startPodAndGetBackOffs(framework *Framework, pod *api.Pod, podName string, containerName string, sleepAmount time.Duration) (time.Duration, time.Duration) {
|
||||
runPodFromStruct(framework, pod)
|
||||
time.Sleep(sleepAmount)
|
||||
|
||||
By("getting restart delay-0")
|
||||
_, err := getRestartDelay(framework.Client, pod, framework.Namespace.Name, podName, containerName)
|
||||
if err != nil {
|
||||
Failf("timed out waiting for container restart in pod=%s/%s", podName, containerName)
|
||||
}
|
||||
|
||||
By("getting restart delay-1")
|
||||
delay1, err := getRestartDelay(framework.Client, pod, framework.Namespace.Name, podName, containerName)
|
||||
if err != nil {
|
||||
Failf("timed out waiting for container restart in pod=%s/%s", podName, containerName)
|
||||
}
|
||||
|
||||
By("getting restart delay-2")
|
||||
delay2, err := getRestartDelay(framework.Client, pod, framework.Namespace.Name, podName, containerName)
|
||||
if err != nil {
|
||||
Failf("timed out waiting for container restart in pod=%s/%s", podName, containerName)
|
||||
}
|
||||
return delay1, delay2
|
||||
}
|
||||
|
||||
func getRestartDelay(c *client.Client, pod *api.Pod, ns string, name string, containerName string) (time.Duration, error) {
|
||||
beginTime := time.Now()
|
||||
for time.Since(beginTime) < (2 * maxBackOffTolerance) { // may just miss the 1st MaxContainerBackOff delay
|
||||
time.Sleep(time.Second)
|
||||
pod, err := c.Pods(ns).Get(name)
|
||||
expectNoError(err, fmt.Sprintf("getting pod %s", name))
|
||||
status, ok := api.GetContainerStatus(pod.Status.ContainerStatuses, containerName)
|
||||
if !ok {
|
||||
Logf("getRestartDelay: status missing")
|
||||
continue
|
||||
}
|
||||
|
||||
if status.State.Waiting == nil && status.State.Running != nil && status.LastTerminationState.Terminated != nil && status.State.Running.StartedAt.Time.After(beginTime) {
|
||||
startedAt := status.State.Running.StartedAt.Time
|
||||
finishedAt := status.LastTerminationState.Terminated.FinishedAt.Time
|
||||
Logf("getRestartDelay: finishedAt=%s restartedAt=%s (%s)", finishedAt, startedAt, startedAt.Sub(finishedAt))
|
||||
return startedAt.Sub(finishedAt), nil
|
||||
}
|
||||
}
|
||||
return 0, fmt.Errorf("timeout getting pod restart delay")
|
||||
}
|
||||
|
||||
var _ = Describe("Pods", func() {
|
||||
framework := NewFramework("pods")
|
||||
|
||||
|
@ -755,6 +827,190 @@ var _ = Describe("Pods", func() {
|
|||
}
|
||||
})
|
||||
|
||||
It("should have their container restart back-off timer increase exponentially", func() {
|
||||
podName := "pod-back-off-exponentially"
|
||||
containerName := "back-off"
|
||||
podClient := framework.Client.Pods(framework.Namespace.Name)
|
||||
pod := &api.Pod{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: podName,
|
||||
Labels: map[string]string{"test": "back-off-image"},
|
||||
},
|
||||
Spec: api.PodSpec{
|
||||
Containers: []api.Container{
|
||||
{
|
||||
Name: containerName,
|
||||
Image: "gcr.io/google_containers/busybox",
|
||||
Command: []string{"/bin/sh", "-c", "sleep 5", "/crash/missing"},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
defer func() {
|
||||
By("deleting the pod")
|
||||
podClient.Delete(pod.Name, api.NewDeleteOptions(0))
|
||||
}()
|
||||
|
||||
delay1, delay2 := startPodAndGetBackOffs(framework, pod, podName, containerName, buildBackOffDuration)
|
||||
delay1 += 1 // divide by zero
|
||||
ratio := float64(delay2) / float64(delay1)
|
||||
if math.Floor(ratio) != 2 && math.Ceil(ratio) != 2 {
|
||||
Failf("back-off gap is not increasing exponentially pod=%s/%s delay1=%s delay2=%s", podName, containerName, delay1, delay2)
|
||||
}
|
||||
})
|
||||
|
||||
It("should have their auto-restart back-off timer reset on image update", func() {
|
||||
podName := "pod-back-off-image"
|
||||
containerName := "back-off"
|
||||
podClient := framework.Client.Pods(framework.Namespace.Name)
|
||||
pod := &api.Pod{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: podName,
|
||||
Labels: map[string]string{"test": "back-off-image"},
|
||||
},
|
||||
Spec: api.PodSpec{
|
||||
Containers: []api.Container{
|
||||
{
|
||||
Name: containerName,
|
||||
Image: "gcr.io/google_containers/busybox",
|
||||
Command: []string{"/bin/sh", "-c", "sleep 5", "/crash/missing"},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
defer func() {
|
||||
By("deleting the pod")
|
||||
podClient.Delete(pod.Name, api.NewDeleteOptions(0))
|
||||
}()
|
||||
|
||||
delay1, delay2 := startPodAndGetBackOffs(framework, pod, podName, containerName, buildBackOffDuration)
|
||||
|
||||
By("updating the image")
|
||||
pod, err := podClient.Get(pod.Name)
|
||||
if err != nil {
|
||||
Failf("failed to get pod: %v", err)
|
||||
}
|
||||
pod.Spec.Containers[0].Image = "nginx"
|
||||
pod, err = podClient.Update(pod)
|
||||
if err != nil {
|
||||
Failf("error updating pod=%s/%s %v", podName, containerName, err)
|
||||
}
|
||||
time.Sleep(syncLoopFrequency)
|
||||
expectNoError(framework.WaitForPodRunning(pod.Name))
|
||||
|
||||
By("get restart delay after image update")
|
||||
delayAfterUpdate, err := getRestartDelay(framework.Client, pod, framework.Namespace.Name, podName, containerName)
|
||||
if err != nil {
|
||||
Failf("timed out waiting for container restart in pod=%s/%s", podName, containerName)
|
||||
}
|
||||
|
||||
if delayAfterUpdate > delay2 || delayAfterUpdate > delay1 {
|
||||
Failf("updating image did not reset the back-off value in pod=%s/%s d3=%s d2=%s d1=%s", podName, containerName, delayAfterUpdate, delay1, delay2)
|
||||
}
|
||||
})
|
||||
|
||||
It("should not back-off restarting a container on LivenessProbe failure", func() {
|
||||
podClient := framework.Client.Pods(framework.Namespace.Name)
|
||||
podName := "pod-back-off-liveness"
|
||||
containerName := "back-off-liveness"
|
||||
pod := &api.Pod{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: podName,
|
||||
Labels: map[string]string{"test": "liveness"},
|
||||
},
|
||||
Spec: api.PodSpec{
|
||||
Containers: []api.Container{
|
||||
{
|
||||
Name: containerName,
|
||||
Image: "gcr.io/google_containers/busybox",
|
||||
Command: []string{"/bin/sh", "-c", "echo ok >/tmp/health; sleep 5; rm -rf /tmp/health; sleep 600"},
|
||||
LivenessProbe: &api.Probe{
|
||||
Handler: api.Handler{
|
||||
Exec: &api.ExecAction{
|
||||
Command: []string{"cat", "/tmp/health"},
|
||||
},
|
||||
},
|
||||
InitialDelaySeconds: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
defer func() {
|
||||
By("deleting the pod")
|
||||
podClient.Delete(pod.Name, api.NewDeleteOptions(0))
|
||||
}()
|
||||
|
||||
delay1, delay2 := startPodAndGetBackOffs(framework, pod, podName, containerName, buildBackOffDuration)
|
||||
|
||||
if math.Abs(float64(delay2-delay1)) > float64(syncLoopFrequency) {
|
||||
Failf("back-off increasing on LivenessProbe failure delay1=%s delay2=%s", delay1, delay2)
|
||||
}
|
||||
})
|
||||
|
||||
It("should cap back-off at MaxContainerBackOff", func() {
|
||||
podClient := framework.Client.Pods(framework.Namespace.Name)
|
||||
podName := "back-off-cap"
|
||||
containerName := "back-off-cap"
|
||||
pod := &api.Pod{
|
||||
ObjectMeta: api.ObjectMeta{
|
||||
Name: podName,
|
||||
Labels: map[string]string{"test": "liveness"},
|
||||
},
|
||||
Spec: api.PodSpec{
|
||||
Containers: []api.Container{
|
||||
{
|
||||
Name: containerName,
|
||||
Image: "gcr.io/google_containers/busybox",
|
||||
Command: []string{"/bin/sh", "-c", "sleep 5", "/crash/missing"},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
defer func() {
|
||||
By("deleting the pod")
|
||||
podClient.Delete(pod.Name, api.NewDeleteOptions(0))
|
||||
}()
|
||||
|
||||
runPodFromStruct(framework, pod)
|
||||
time.Sleep(2 * kubelet.MaxContainerBackOff) // it takes slightly more than 2*x to get to a back-off of x
|
||||
|
||||
// wait for a delay == capped delay of MaxContainerBackOff
|
||||
By("geting restart delay when capped")
|
||||
var (
|
||||
delay1 time.Duration
|
||||
err error
|
||||
)
|
||||
for i := 0; i < 3; i++ {
|
||||
delay1, err = getRestartDelay(framework.Client, pod, framework.Namespace.Name, podName, containerName)
|
||||
if err != nil {
|
||||
Failf("timed out waiting for container restart in pod=%s/%s", podName, containerName)
|
||||
}
|
||||
|
||||
if delay1 < kubelet.MaxContainerBackOff {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if (delay1 < kubelet.MaxContainerBackOff) || (delay1 > maxBackOffTolerance) {
|
||||
Failf("expected %s back-off got=%s in delay1", kubelet.MaxContainerBackOff, delay1)
|
||||
}
|
||||
|
||||
By("getting restart delay after a capped delay")
|
||||
delay2, err := getRestartDelay(framework.Client, pod, framework.Namespace.Name, podName, containerName)
|
||||
if err != nil {
|
||||
Failf("timed out waiting for container restart in pod=%s/%s", podName, containerName)
|
||||
}
|
||||
|
||||
if delay2 < kubelet.MaxContainerBackOff || delay2 > maxBackOffTolerance { // syncloop cumulative drift
|
||||
Failf("expected %s back-off got=%s on delay2", kubelet.MaxContainerBackOff, delay2)
|
||||
}
|
||||
})
|
||||
|
||||
// The following tests for remote command execution and port forwarding are
|
||||
// commented out because the GCE environment does not currently have nsenter
|
||||
// in the kubelet's PATH, nor does it have socat installed. Once we figure
|
||||
|
|
Loading…
Reference in New Issue