Merge pull request #70694 from mborsz/rcconfig

Don't fail RCConfig.start on node restart.
pull/58/head
k8s-ci-robot 2018-11-07 11:36:59 -08:00 committed by GitHub
commit 46413e7958
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 23 additions and 12 deletions

View File

@ -21,6 +21,7 @@ import (
"fmt" "fmt"
"math" "math"
"os" "os"
"strings"
"sync" "sync"
"time" "time"
@ -242,6 +243,18 @@ func (p PodDiff) String(ignorePhases sets.String) string {
return ret return ret
} }
// DeletedPods returns a slice of pods that were present at the beginning
// and then disappeared.
func (p PodDiff) DeletedPods() []string {
var deletedPods []string
for podName, podInfo := range p {
if podInfo.hostname == nonExist {
deletedPods = append(deletedPods, podName)
}
}
return deletedPods
}
// Diff computes a PodDiff given 2 lists of pods. // Diff computes a PodDiff given 2 lists of pods.
func Diff(oldPods []*v1.Pod, curPods []*v1.Pod) PodDiff { func Diff(oldPods []*v1.Pod, curPods []*v1.Pod) PodDiff {
podInfoMap := PodDiff{} podInfoMap := PodDiff{}
@ -765,9 +778,8 @@ func (config *RCConfig) start() error {
pods := ps.List() pods := ps.List()
startupStatus := ComputeRCStartupStatus(pods, config.Replicas) startupStatus := ComputeRCStartupStatus(pods, config.Replicas)
pods = startupStatus.Created
if config.CreatedPods != nil { if config.CreatedPods != nil {
*config.CreatedPods = pods *config.CreatedPods = startupStatus.Created
} }
if !config.Silent { if !config.Silent {
config.RCConfigLog(startupStatus.String(config.Name)) config.RCConfigLog(startupStatus.String(config.Name))
@ -787,16 +799,15 @@ func (config *RCConfig) start() error {
} }
return fmt.Errorf("%d containers failed which is more than allowed %d", startupStatus.FailedContainers, maxContainerFailures) return fmt.Errorf("%d containers failed which is more than allowed %d", startupStatus.FailedContainers, maxContainerFailures)
} }
if len(pods) < len(oldPods) || len(pods) > config.Replicas {
// This failure mode includes: diff := Diff(oldPods, pods)
// kubelet is dead, so node controller deleted pods and rc creates more deletedPods := diff.DeletedPods()
// - diagnose by noting the pod diff below. if len(deletedPods) != 0 {
// pod is unhealthy, so replication controller creates another to take its place // There are some pods that have disappeared.
// - diagnose by comparing the previous "2 Pod states" lines for inactive pods err := fmt.Errorf("%d pods disappeared for %s: %v", len(deletedPods), config.Name, strings.Join(deletedPods, ", "))
errorStr := fmt.Sprintf("Number of reported pods for %s changed: %d vs %d", config.Name, len(pods), len(oldPods)) config.RCConfigLog(err.Error())
config.RCConfigLog("%v, pods that changed since the last iteration:", errorStr) config.RCConfigLog(diff.String(sets.NewString()))
config.RCConfigLog(Diff(oldPods, pods).String(sets.NewString())) return err
return fmt.Errorf(errorStr)
} }
if len(pods) > len(oldPods) || startupStatus.Running > oldRunning { if len(pods) > len(oldPods) || startupStatus.Running > oldRunning {