From cb57dc4cb58cf3f551645475f4b6896fbe5b14b5 Mon Sep 17 00:00:00 2001 From: Yu-Ju Hong Date: Fri, 23 Sep 2016 14:04:17 -0700 Subject: [PATCH] kuberuntime: include container hash in backoff keys We should reset the backoff if the content of the container has been updated. --- pkg/kubelet/kuberuntime/helpers.go | 9 ++++ pkg/kubelet/kuberuntime/helpers_test.go | 47 +++++++++++++++++++ .../kuberuntime/kuberuntime_manager.go | 10 ++-- 3 files changed, 61 insertions(+), 5 deletions(-) create mode 100644 pkg/kubelet/kuberuntime/helpers_test.go diff --git a/pkg/kubelet/kuberuntime/helpers.go b/pkg/kubelet/kuberuntime/helpers.go index 67928e3783..d5684f2e48 100644 --- a/pkg/kubelet/kuberuntime/helpers.go +++ b/pkg/kubelet/kuberuntime/helpers.go @@ -18,6 +18,7 @@ package kuberuntime import ( "fmt" + "strconv" "github.com/golang/glog" "k8s.io/kubernetes/pkg/api" @@ -160,3 +161,11 @@ func milliCPUToQuota(milliCPU int64) (quota int64, period int64) { return } + +// getStableKey generates a key (string) to uniquely identify a +// (pod, container) tuple. The key should include the content of the +// container, so that any change to the container generates a new key. +func getStableKey(pod *api.Pod, container *api.Container) string { + hash := strconv.FormatUint(kubecontainer.HashContainer(container), 16) + return fmt.Sprintf("%s_%s_%s_%s_%s", pod.Name, pod.Namespace, string(pod.UID), container.Name, hash) +} diff --git a/pkg/kubelet/kuberuntime/helpers_test.go b/pkg/kubelet/kuberuntime/helpers_test.go new file mode 100644 index 0000000000..c1b2782bbd --- /dev/null +++ b/pkg/kubelet/kuberuntime/helpers_test.go @@ -0,0 +1,47 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kuberuntime + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "k8s.io/kubernetes/pkg/api" +) + +func TestStableKey(t *testing.T) { + container := &api.Container{ + Name: "test_container", + Image: "foo/image:v1", + } + pod := &api.Pod{ + ObjectMeta: api.ObjectMeta{ + Name: "test_pod", + Namespace: "test_pod_namespace", + UID: "test_pod_uid", + }, + Spec: api.PodSpec{ + Containers: []api.Container{*container}, + }, + } + oldKey := getStableKey(pod, container) + + // Updating the container image should change the key. + container.Image = "foo/image:v2" + newKey := getStableKey(pod, container) + assert.NotEqual(t, oldKey, newKey) +} diff --git a/pkg/kubelet/kuberuntime/kuberuntime_manager.go b/pkg/kubelet/kuberuntime/kuberuntime_manager.go index b5c26f6f00..541e10b763 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_manager.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_manager.go @@ -627,18 +627,18 @@ func (m *kubeGenericRuntimeManager) doBackOff(pod *api.Pod, container *api.Conta glog.Infof("checking backoff for container %q in pod %q", container.Name, format.Pod(pod)) // Use the finished time of the latest exited container as the start point to calculate whether to do back-off. ts := cStatus.FinishedAt - // backOff requires a unique id to identify the container - stableName := fmt.Sprintf("%s_%s_%s_%s", pod.Name, pod.Namespace, string(pod.UID), container.Name) - if backOff.IsInBackOffSince(stableName, ts) { + // backOff requires a unique key to identify the container. + key := getStableKey(pod, container) + if backOff.IsInBackOffSince(key, ts) { if ref, err := kubecontainer.GenerateContainerRef(pod, container); err == nil { m.recorder.Eventf(ref, api.EventTypeWarning, events.BackOffStartContainer, "Back-off restarting failed container") } - err := fmt.Errorf("Back-off %s restarting failed container=%s pod=%s", backOff.Get(stableName), container.Name, format.Pod(pod)) + err := fmt.Errorf("Back-off %s restarting failed container=%s pod=%s", backOff.Get(key), container.Name, format.Pod(pod)) glog.Infof("%s", err.Error()) return true, err.Error(), kubecontainer.ErrCrashLoopBackOff } - backOff.Next(stableName, ts) + backOff.Next(key, ts) return false, "", nil }