Merge pull request #43053 from feiskyer/sandbox_gc

Automatic merge from submit-queue (batch tested with PRs 45005, 43053)

kubelet: fix sandbox garbage collection

**What this PR does / why we need it**:

Currently, kuberuntime garbage collection can't distinguish just-created sandboxes from failed sandboxes. Especially when the time from sandbox creation to ready is longer than GC's minAge. In such cases, those sandboxes may be garbage collected early before they are ready.

This PR removes `sandboxMinGCAge` and only garbage collect sandboxes when 

* they are containing no containers at all
* and not the latest sandbox if it is belonging to an existing pod.

**Which issue this PR fixes** 

Fixes #42856.

**Release note**:

```release-note
NONE
```

cc @yujuhong @Random-Liu
pull/6/head
Kubernetes Submit Queue 2017-05-03 01:07:07 -07:00 committed by GitHub
commit 451b7ca7f7
2 changed files with 110 additions and 59 deletions

View File

@ -30,20 +30,6 @@ import (
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
)
// sandboxMinGCAge is the minimum age for an empty sandbox before it is garbage collected.
// This is introduced to avoid a sandbox being garbage collected before its containers are
// created.
// Notice that if the first container of a sandbox is created too late (exceeds sandboxMinGCAge),
// the sandbox could still be garbaged collected. In that case, SyncPod will recreate the
// sandbox and make sure old containers are all stopped.
// In the following figure, 'o' is a stopped sandbox, 'x' is a removed sandbox. It shows
// that, approximately if a sandbox keeps crashing and MinAge = 1/n GC Period, there will
// be 1/n more sandboxes not garbage collected.
// oooooo|xxxxxx|xxxxxx| <--- MinAge = 0
// gc gc gc gc
// oooooo|oooxxx|xxxxxx| <--- MinAge = 1/2 GC Perod
const sandboxMinGCAge time.Duration = 30 * time.Second
// containerGC is the manager of garbage collection.
type containerGC struct {
client internalapi.RuntimeService
@ -72,6 +58,16 @@ type containerGCInfo struct {
createTime time.Time
}
// sandboxGCInfo is the internal information kept for sandboxes being considered for GC.
type sandboxGCInfo struct {
// The ID of the sandbox.
id string
// Creation time for the sandbox.
createTime time.Time
// If true, the sandbox is ready or still has containers.
active bool
}
// evictUnit is considered for eviction as units of (UID, container name) pair.
type evictUnit struct {
// UID of the pod.
@ -81,6 +77,7 @@ type evictUnit struct {
}
type containersByEvictUnit map[evictUnit][]containerGCInfo
type sandboxesByPodUID map[types.UID][]sandboxGCInfo
// NumContainers returns the number of containers in this map.
func (cu containersByEvictUnit) NumContainers() int {
@ -103,6 +100,13 @@ func (a byCreated) Len() int { return len(a) }
func (a byCreated) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a byCreated) Less(i, j int) bool { return a[i].createTime.After(a[j].createTime) }
// Newest first.
type sandboxByCreated []sandboxGCInfo
func (a sandboxByCreated) Len() int { return len(a) }
func (a sandboxByCreated) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a sandboxByCreated) Less(i, j int) bool { return a[i].createTime.After(a[j].createTime) }
// enforceMaxContainersPerEvictUnit enforces MaxPerPodContainer for each evictUnit.
func (cgc *containerGC) enforceMaxContainersPerEvictUnit(evictUnits containersByEvictUnit, MaxContainers int) {
for key := range evictUnits {
@ -118,7 +122,7 @@ func (cgc *containerGC) enforceMaxContainersPerEvictUnit(evictUnits containersBy
func (cgc *containerGC) removeOldestN(containers []containerGCInfo, toRemove int) []containerGCInfo {
// Remove from oldest to newest (last to first).
numToKeep := len(containers) - toRemove
for i := numToKeep; i < len(containers); i++ {
for i := len(containers) - 1; i >= numToKeep; i-- {
if err := cgc.manager.removeContainer(containers[i].id); err != nil {
glog.Errorf("Failed to remove container %q: %v", containers[i].id, err)
}
@ -128,6 +132,18 @@ func (cgc *containerGC) removeOldestN(containers []containerGCInfo, toRemove int
return containers[:numToKeep]
}
// removeOldestNSandboxes removes the oldest inactive toRemove sandboxes and
// returns the resulting slice.
func (cgc *containerGC) removeOldestNSandboxes(sandboxes []sandboxGCInfo, toRemove int) {
// Remove from oldest to newest (last to first).
numToKeep := len(sandboxes) - toRemove
for i := len(sandboxes) - 1; i >= numToKeep; i-- {
if !sandboxes[i].active {
cgc.removeSandbox(sandboxes[i].id)
}
}
}
// removeSandbox removes the sandbox by sandboxID.
func (cgc *containerGC) removeSandbox(sandboxID string) {
glog.V(4).Infof("Removing sandbox %q", sandboxID)
@ -239,9 +255,13 @@ func (cgc *containerGC) evictContainers(gcPolicy kubecontainer.ContainerGCPolicy
return nil
}
// evictSandboxes evicts all sandboxes that are evictable. Evictable sandboxes are: not running
// and contains no containers at all.
func (cgc *containerGC) evictSandboxes(minAge time.Duration) error {
// evictSandboxes remove all evictable sandboxes. An evictable sandbox must
// meet the following requirements:
// 1. not in ready state
// 2. contains no containers.
// 3. belong to a non-existent (i.e., already removed) pod, or is not the
// most recently created sandbox for the pod.
func (cgc *containerGC) evictSandboxes() error {
containers, err := cgc.manager.getKubeletContainers(true)
if err != nil {
return err
@ -252,38 +272,50 @@ func (cgc *containerGC) evictSandboxes(minAge time.Duration) error {
return err
}
evictSandboxes := make([]string, 0)
newestGCTime := time.Now().Add(-minAge)
sandboxesByPod := make(sandboxesByPodUID)
for _, sandbox := range sandboxes {
// Prune out ready sandboxes.
if sandbox.State == runtimeapi.PodSandboxState_SANDBOX_READY {
continue
podUID := types.UID(sandbox.Metadata.Uid)
sandboxInfo := sandboxGCInfo{
id: sandbox.Id,
createTime: time.Unix(0, sandbox.CreatedAt),
}
// Prune out sandboxes that still have containers.
found := false
// Set ready sandboxes to be active.
if sandbox.State == runtimeapi.PodSandboxState_SANDBOX_READY {
sandboxInfo.active = true
}
// Set sandboxes that still have containers to be active.
hasContainers := false
sandboxID := sandbox.Id
for _, container := range containers {
if container.PodSandboxId == sandboxID {
found = true
hasContainers = true
break
}
}
if found {
continue
if hasContainers {
sandboxInfo.active = true
}
// Only garbage collect sandboxes older than sandboxMinGCAge.
createdAt := time.Unix(0, sandbox.CreatedAt)
if createdAt.After(newestGCTime) {
continue
}
glog.V(4).Infof("PodSandbox %q is eligible for garbage collection since it was created before %v: %+v", sandboxID, newestGCTime, sandbox)
evictSandboxes = append(evictSandboxes, sandboxID)
sandboxesByPod[podUID] = append(sandboxesByPod[podUID], sandboxInfo)
}
for _, sandbox := range evictSandboxes {
cgc.removeSandbox(sandbox)
// Sort the sandboxes by age.
for uid := range sandboxesByPod {
sort.Sort(sandboxByCreated(sandboxesByPod[uid]))
}
for podUID, sandboxes := range sandboxesByPod {
if cgc.isPodDeleted(podUID) {
// Remove all evictable sandboxes if the pod has been removed.
// Note that the latest dead sandbox is also removed if there is
// already an active one.
cgc.removeOldestNSandboxes(sandboxes, len(sandboxes))
} else {
// Keep latest one if the pod still exists.
cgc.removeOldestNSandboxes(sandboxes, len(sandboxes)-1)
}
}
return nil
}
@ -342,7 +374,7 @@ func (cgc *containerGC) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy,
}
// Remove sandboxes with zero containers
if err := cgc.evictSandboxes(sandboxMinGCAge); err != nil {
if err := cgc.evictSandboxes(); err != nil {
return err
}

View File

@ -34,6 +34,19 @@ func TestSandboxGC(t *testing.T) {
fakeRuntime, _, m, err := createTestRuntimeManager()
assert.NoError(t, err)
fakePodGetter := m.containerGC.podGetter.(*fakePodGetter)
makeGCSandbox := func(pod *v1.Pod, attempt uint32, state runtimeapi.PodSandboxState, withPodGetter bool) sandboxTemplate {
if withPodGetter {
// initialize the pod getter
fakePodGetter.pods[pod.UID] = pod
}
return sandboxTemplate{
pod: pod,
state: state,
attempt: attempt,
}
}
pods := []*v1.Pod{
makeTestPod("foo1", "new", "1234", []v1.Container{
makeTestContainer("bar1", "busybox"),
@ -42,6 +55,9 @@ func TestSandboxGC(t *testing.T) {
makeTestPod("foo2", "new", "5678", []v1.Container{
makeTestContainer("bar3", "busybox"),
}),
makeTestPod("deleted", "new", "9012", []v1.Container{
makeTestContainer("bar4", "busybox"),
}),
}
for c, test := range []struct {
@ -52,55 +68,58 @@ func TestSandboxGC(t *testing.T) {
remain []int // template indexes of remaining sandboxes
}{
{
description: "sandbox with no containers should be garbage collected.",
description: "notready sandboxes without containers for deleted pods should be garbage collected.",
sandboxes: []sandboxTemplate{
{pod: pods[0], state: runtimeapi.PodSandboxState_SANDBOX_NOTREADY},
makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false),
},
containers: []containerTemplate{},
remain: []int{},
},
{
description: "running sandbox should not be garbage collected.",
description: "ready sandboxes without containers for deleted pods should not be garbage collected.",
sandboxes: []sandboxTemplate{
{pod: pods[0], state: runtimeapi.PodSandboxState_SANDBOX_READY},
makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_READY, false),
},
containers: []containerTemplate{},
remain: []int{0},
},
{
description: "sandboxes for existing pods should not be garbage collected.",
sandboxes: []sandboxTemplate{
makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_READY, true),
makeGCSandbox(pods[1], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true),
},
containers: []containerTemplate{},
remain: []int{0, 1},
},
{
description: "sandbox with containers should not be garbage collected.",
sandboxes: []sandboxTemplate{
{pod: pods[0], state: runtimeapi.PodSandboxState_SANDBOX_NOTREADY},
makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false),
},
containers: []containerTemplate{
{pod: pods[0], container: &pods[0].Spec.Containers[0], state: runtimeapi.ContainerState_CONTAINER_EXITED},
},
remain: []int{0},
},
{
description: "sandbox within min age should not be garbage collected.",
sandboxes: []sandboxTemplate{
{pod: pods[0], createdAt: time.Now().UnixNano(), state: runtimeapi.PodSandboxState_SANDBOX_NOTREADY},
{pod: pods[1], createdAt: time.Now().Add(-2 * time.Hour).UnixNano(), state: runtimeapi.PodSandboxState_SANDBOX_NOTREADY},
},
containers: []containerTemplate{},
minAge: time.Hour, // assume the test won't take an hour
remain: []int{0},
},
{
description: "multiple sandboxes should be handled properly.",
sandboxes: []sandboxTemplate{
// running sandbox.
{pod: pods[0], attempt: 1, state: runtimeapi.PodSandboxState_SANDBOX_READY},
// exited sandbox with containers.
{pod: pods[1], attempt: 1, state: runtimeapi.PodSandboxState_SANDBOX_NOTREADY},
makeGCSandbox(pods[0], 1, runtimeapi.PodSandboxState_SANDBOX_READY, true),
// exited sandbox without containers.
{pod: pods[1], attempt: 0, state: runtimeapi.PodSandboxState_SANDBOX_NOTREADY},
makeGCSandbox(pods[0], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false),
// exited sandbox with containers.
makeGCSandbox(pods[1], 1, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, true),
// exited sandbox without containers.
makeGCSandbox(pods[1], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false),
// exited sandbox without containers for deleted pods.
makeGCSandbox(pods[2], 0, runtimeapi.PodSandboxState_SANDBOX_NOTREADY, false),
},
containers: []containerTemplate{
{pod: pods[1], container: &pods[1].Spec.Containers[0], sandboxAttempt: 1, state: runtimeapi.ContainerState_CONTAINER_EXITED},
},
remain: []int{0, 1},
remain: []int{0, 2},
},
} {
t.Logf("TestCase #%d: %+v", c, test)
@ -109,7 +128,7 @@ func TestSandboxGC(t *testing.T) {
fakeRuntime.SetFakeSandboxes(fakeSandboxes)
fakeRuntime.SetFakeContainers(fakeContainers)
err := m.containerGC.evictSandboxes(test.minAge)
err := m.containerGC.evictSandboxes()
assert.NoError(t, err)
realRemain, err := fakeRuntime.ListPodSandbox(nil)
assert.NoError(t, err)