From 691f0482fbe976d169729a4134bedc0420ecbd42 Mon Sep 17 00:00:00 2001 From: Pengfei Ni Date: Tue, 14 Mar 2017 12:26:29 +0800 Subject: [PATCH 1/2] Fix sandbox garbage collection. Sandboxes are garbage collected only when they are containing no containers at all and not the latest sandbox if it is belonging to an existing pod. --- pkg/kubelet/kuberuntime/kuberuntime_gc.go | 108 ++++++++++++++-------- 1 file changed, 70 insertions(+), 38 deletions(-) diff --git a/pkg/kubelet/kuberuntime/kuberuntime_gc.go b/pkg/kubelet/kuberuntime/kuberuntime_gc.go index a025012374..e60cda17b5 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_gc.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_gc.go @@ -30,20 +30,6 @@ import ( kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" ) -// sandboxMinGCAge is the minimum age for an empty sandbox before it is garbage collected. -// This is introduced to avoid a sandbox being garbage collected before its containers are -// created. -// Notice that if the first container of a sandbox is created too late (exceeds sandboxMinGCAge), -// the sandbox could still be garbaged collected. In that case, SyncPod will recreate the -// sandbox and make sure old containers are all stopped. -// In the following figure, 'o' is a stopped sandbox, 'x' is a removed sandbox. It shows -// that, approximately if a sandbox keeps crashing and MinAge = 1/n GC Period, there will -// be 1/n more sandboxes not garbage collected. -// oooooo|xxxxxx|xxxxxx| <--- MinAge = 0 -// gc gc gc gc -// oooooo|oooxxx|xxxxxx| <--- MinAge = 1/2 GC Perod -const sandboxMinGCAge time.Duration = 30 * time.Second - // containerGC is the manager of garbage collection. type containerGC struct { client internalapi.RuntimeService @@ -72,6 +58,16 @@ type containerGCInfo struct { createTime time.Time } +// sandboxGCInfo is the internal information kept for sandboxes being considered for GC. +type sandboxGCInfo struct { + // The ID of the sandbox. + id string + // Creation time for the sandbox. + createTime time.Time + // If true, the sandbox is ready or still has containers. + active bool +} + // evictUnit is considered for eviction as units of (UID, container name) pair. type evictUnit struct { // UID of the pod. @@ -81,6 +77,7 @@ type evictUnit struct { } type containersByEvictUnit map[evictUnit][]containerGCInfo +type sandboxesByPodUID map[types.UID][]sandboxGCInfo // NumContainers returns the number of containers in this map. func (cu containersByEvictUnit) NumContainers() int { @@ -103,6 +100,13 @@ func (a byCreated) Len() int { return len(a) } func (a byCreated) Swap(i, j int) { a[i], a[j] = a[j], a[i] } func (a byCreated) Less(i, j int) bool { return a[i].createTime.After(a[j].createTime) } +// Newest first. +type sandboxByCreated []sandboxGCInfo + +func (a sandboxByCreated) Len() int { return len(a) } +func (a sandboxByCreated) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a sandboxByCreated) Less(i, j int) bool { return a[i].createTime.After(a[j].createTime) } + // enforceMaxContainersPerEvictUnit enforces MaxPerPodContainer for each evictUnit. func (cgc *containerGC) enforceMaxContainersPerEvictUnit(evictUnits containersByEvictUnit, MaxContainers int) { for key := range evictUnits { @@ -118,7 +122,7 @@ func (cgc *containerGC) enforceMaxContainersPerEvictUnit(evictUnits containersBy func (cgc *containerGC) removeOldestN(containers []containerGCInfo, toRemove int) []containerGCInfo { // Remove from oldest to newest (last to first). numToKeep := len(containers) - toRemove - for i := numToKeep; i < len(containers); i++ { + for i := len(containers) - 1; i >= numToKeep; i-- { if err := cgc.manager.removeContainer(containers[i].id); err != nil { glog.Errorf("Failed to remove container %q: %v", containers[i].id, err) } @@ -128,6 +132,18 @@ func (cgc *containerGC) removeOldestN(containers []containerGCInfo, toRemove int return containers[:numToKeep] } +// removeOldestNSandboxes removes the oldest inactive toRemove sandboxes and +// returns the resulting slice. +func (cgc *containerGC) removeOldestNSandboxes(sandboxes []sandboxGCInfo, toRemove int) { + // Remove from oldest to newest (last to first). + numToKeep := len(sandboxes) - toRemove + for i := len(sandboxes) - 1; i >= numToKeep; i-- { + if !sandboxes[i].active { + cgc.removeSandbox(sandboxes[i].id) + } + } +} + // removeSandbox removes the sandbox by sandboxID. func (cgc *containerGC) removeSandbox(sandboxID string) { glog.V(4).Infof("Removing sandbox %q", sandboxID) @@ -239,9 +255,13 @@ func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy return nil } -// evictSandboxes evicts all sandboxes that are evictable. Evictable sandboxes are: not running -// and contains no containers at all. -func (cgc *containerGC) evictSandboxes(minAge time.Duration) error { +// evictSandboxes remove all evictable sandboxes. An evictable sandbox must +// meet the following requirements: +// 1. not in ready state +// 2. contains no containers. +// 3. belong to a non-existent (i.e., already removed) pod, or is not the +// most recently created sandbox for the pod. +func (cgc *containerGC) evictSandboxes() error { containers, err := cgc.manager.getKubeletContainers(true) if err != nil { return err @@ -252,38 +272,50 @@ func (cgc *containerGC) evictSandboxes(minAge time.Duration) error { return err } - evictSandboxes := make([]string, 0) - newestGCTime := time.Now().Add(-minAge) + sandboxesByPod := make(sandboxesByPodUID) for _, sandbox := range sandboxes { - // Prune out ready sandboxes. - if sandbox.State == runtimeapi.PodSandboxState_SANDBOX_READY { - continue + podUID := types.UID(sandbox.Metadata.Uid) + sandboxInfo := sandboxGCInfo{ + id: sandbox.Id, + createTime: time.Unix(0, sandbox.CreatedAt), } - // Prune out sandboxes that still have containers. - found := false + // Set ready sandboxes to be active. + if sandbox.State == runtimeapi.PodSandboxState_SANDBOX_READY { + sandboxInfo.active = true + } + + // Set sandboxes that still have containers to be active. + hasContainers := false sandboxID := sandbox.Id for _, container := range containers { if container.PodSandboxId == sandboxID { - found = true + hasContainers = true break } } - if found { - continue + if hasContainers { + sandboxInfo.active = true } - // Only garbage collect sandboxes older than sandboxMinGCAge. - createdAt := time.Unix(0, sandbox.CreatedAt) - if createdAt.After(newestGCTime) { - continue - } - glog.V(4).Infof("PodSandbox %q is eligible for garbage collection since it was created before %v: %+v", sandboxID, newestGCTime, sandbox) - evictSandboxes = append(evictSandboxes, sandboxID) + sandboxesByPod[podUID] = append(sandboxesByPod[podUID], sandboxInfo) } - for _, sandbox := range evictSandboxes { - cgc.removeSandbox(sandbox) + // Sort the sandboxes by age. + for uid := range sandboxesByPod { + sort.Sort(sandboxByCreated(sandboxesByPod[uid])) + } + + for podUID, sandboxes := range sandboxesByPod { + if cgc.isPodDeleted(podUID) { + // Remove all evictable sandboxes if the pod has been removed. + // Note that the latest dead sandbox is also removed if there is + // already an active one. + cgc.removeOldestNSandboxes(sandboxes, len(sandboxes)) + } else { + // Keep latest one if the pod still exists. + cgc.removeOldestNSandboxes(sandboxes, len(sandboxes)-1) + } } return nil } @@ -342,7 +374,7 @@ func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, } // Remove sandboxes with zero containers - if err := cgc.evictSandboxes(sandboxMinGCAge); err != nil { + if err := cgc.evictSandboxes(); err != nil { return err } From 1c593bd62caf9f8e67be0c4790913c66f7a06f80 Mon Sep 17 00:00:00 2001 From: Pengfei Ni Date: Wed, 3 May 2017 14:29:07 +0800 Subject: [PATCH 2/2] Update test for sandbox gc --- .../kuberuntime/kuberuntime_gc_test.go | 61 ++++++++++++------- 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/pkg/kubelet/kuberuntime/kuberuntime_gc_test.go b/pkg/kubelet/kuberuntime/kuberuntime_gc_test.go index 8b3d0ebaae..811b0d2e8b 100644 --- a/pkg/kubelet/kuberuntime/kuberuntime_gc_test.go +++ b/pkg/kubelet/kuberuntime/kuberuntime_gc_test.go @@ -34,6 +34,19 @@ func TestSandboxGC(t *testing.T) { fakeRuntime, _, m, err := createTestRuntimeManager() assert.NoError(t, err) + fakePodGetter := m.containerGC.podGetter.(*fakePodGetter) + makeGCSandbox := func(pod *v1.Pod, attempt uint32, state runtimeapi.PodSandboxState, withPodGetter bool) sandboxTemplate { + if withPodGetter { + // initialize the pod getter + fakePodGetter.pods[pod.UID] = pod + } + return sandboxTemplate{ + pod: pod, + state: state, + attempt: attempt, + } + } + pods := []*v1.Pod{ makeTestPod("foo1", "new", "1234", []v1.Container{ makeTestContainer("bar1", "busybox"), @@ -42,6 +55,9 @@ func TestSandboxGC(t *testing.T) { makeTestPod("foo2", "new", "5678", []v1.Container{ makeTestContainer("bar3", "busybox"), }), + makeTestPod("deleted", "new", "9012", []v1.Container{ + makeTestContainer("bar4", "busybox"), + }), } for c, test := range []struct { @@ -52,55 +68,58 @@ func TestSandboxGC(t *testing.T) { remain []int // template indexes of remaining sandboxes }{ { - description: "sandbox with no containers should be garbage collected.", + description: "notready sandboxes without containers for deleted pods should be garbage collected.", sandboxes: []sandboxTemplate{ - {pod: pods[0], state: runtimeapi.PodSandboxState_SANDBOX_NOTREADY}, + makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false), }, containers: []containerTemplate{}, remain: []int{}, }, { - description: "running sandbox should not be garbage collected.", + description: "ready sandboxes without containers for deleted pods should not be garbage collected.", sandboxes: []sandboxTemplate{ - {pod: pods[0], state: runtimeapi.PodSandboxState_SANDBOX_READY}, + makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_READY, false), }, containers: []containerTemplate{}, remain: []int{0}, }, + { + description: "sandboxes for existing pods should not be garbage collected.", + sandboxes: []sandboxTemplate{ + makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_READY, true), + makeGCSandbox(pods[1], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true), + }, + containers: []containerTemplate{}, + remain: []int{0, 1}, + }, { description: "sandbox with containers should not be garbage collected.", sandboxes: []sandboxTemplate{ - {pod: pods[0], state: runtimeapi.PodSandboxState_SANDBOX_NOTREADY}, + makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false), }, containers: []containerTemplate{ {pod: pods[0], container: &pods[0].Spec.Containers[0], state: runtimeapi.ContainerState_CONTAINER_EXITED}, }, remain: []int{0}, }, - { - description: "sandbox within min age should not be garbage collected.", - sandboxes: []sandboxTemplate{ - {pod: pods[0], createdAt: time.Now().UnixNano(), state: runtimeapi.PodSandboxState_SANDBOX_NOTREADY}, - {pod: pods[1], createdAt: time.Now().Add(-2 * time.Hour).UnixNano(), state: runtimeapi.PodSandboxState_SANDBOX_NOTREADY}, - }, - containers: []containerTemplate{}, - minAge: time.Hour, // assume the test won't take an hour - remain: []int{0}, - }, { description: "multiple sandboxes should be handled properly.", sandboxes: []sandboxTemplate{ // running sandbox. - {pod: pods[0], attempt: 1, state: runtimeapi.PodSandboxState_SANDBOX_READY}, - // exited sandbox with containers. - {pod: pods[1], attempt: 1, state: runtimeapi.PodSandboxState_SANDBOX_NOTREADY}, + makeGCSandbox(pods[0], 1, runtimeapi.PodSandboxState_SANDBOX_READY, true), // exited sandbox without containers. - {pod: pods[1], attempt: 0, state: runtimeapi.PodSandboxState_SANDBOX_NOTREADY}, + makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false), + // exited sandbox with containers. + makeGCSandbox(pods[1], 1, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true), + // exited sandbox without containers. + makeGCSandbox(pods[1], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false), + // exited sandbox without containers for deleted pods. + makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false), }, containers: []containerTemplate{ {pod: pods[1], container: &pods[1].Spec.Containers[0], sandboxAttempt: 1, state: runtimeapi.ContainerState_CONTAINER_EXITED}, }, - remain: []int{0, 1}, + remain: []int{0, 2}, }, } { t.Logf("TestCase #%d: %+v", c, test) @@ -109,7 +128,7 @@ func TestSandboxGC(t *testing.T) { fakeRuntime.SetFakeSandboxes(fakeSandboxes) fakeRuntime.SetFakeContainers(fakeContainers) - err := m.containerGC.evictSandboxes(test.minAge) + err := m.containerGC.evictSandboxes() assert.NoError(t, err) realRemain, err := fakeRuntime.ListPodSandbox(nil) assert.NoError(t, err)