mirror of https://github.com/k3s-io/k3s
Merge pull request #70694 from mborsz/rcconfig
Don't fail RCConfig.start on node restart.pull/58/head
commit
46413e7958
|
@ -21,6 +21,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"math"
|
"math"
|
||||||
"os"
|
"os"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
@ -242,6 +243,18 @@ func (p PodDiff) String(ignorePhases sets.String) string {
|
||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DeletedPods returns a slice of pods that were present at the beginning
|
||||||
|
// and then disappeared.
|
||||||
|
func (p PodDiff) DeletedPods() []string {
|
||||||
|
var deletedPods []string
|
||||||
|
for podName, podInfo := range p {
|
||||||
|
if podInfo.hostname == nonExist {
|
||||||
|
deletedPods = append(deletedPods, podName)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return deletedPods
|
||||||
|
}
|
||||||
|
|
||||||
// Diff computes a PodDiff given 2 lists of pods.
|
// Diff computes a PodDiff given 2 lists of pods.
|
||||||
func Diff(oldPods []*v1.Pod, curPods []*v1.Pod) PodDiff {
|
func Diff(oldPods []*v1.Pod, curPods []*v1.Pod) PodDiff {
|
||||||
podInfoMap := PodDiff{}
|
podInfoMap := PodDiff{}
|
||||||
|
@ -765,9 +778,8 @@ func (config *RCConfig) start() error {
|
||||||
pods := ps.List()
|
pods := ps.List()
|
||||||
startupStatus := ComputeRCStartupStatus(pods, config.Replicas)
|
startupStatus := ComputeRCStartupStatus(pods, config.Replicas)
|
||||||
|
|
||||||
pods = startupStatus.Created
|
|
||||||
if config.CreatedPods != nil {
|
if config.CreatedPods != nil {
|
||||||
*config.CreatedPods = pods
|
*config.CreatedPods = startupStatus.Created
|
||||||
}
|
}
|
||||||
if !config.Silent {
|
if !config.Silent {
|
||||||
config.RCConfigLog(startupStatus.String(config.Name))
|
config.RCConfigLog(startupStatus.String(config.Name))
|
||||||
|
@ -787,16 +799,15 @@ func (config *RCConfig) start() error {
|
||||||
}
|
}
|
||||||
return fmt.Errorf("%d containers failed which is more than allowed %d", startupStatus.FailedContainers, maxContainerFailures)
|
return fmt.Errorf("%d containers failed which is more than allowed %d", startupStatus.FailedContainers, maxContainerFailures)
|
||||||
}
|
}
|
||||||
if len(pods) < len(oldPods) || len(pods) > config.Replicas {
|
|
||||||
// This failure mode includes:
|
diff := Diff(oldPods, pods)
|
||||||
// kubelet is dead, so node controller deleted pods and rc creates more
|
deletedPods := diff.DeletedPods()
|
||||||
// - diagnose by noting the pod diff below.
|
if len(deletedPods) != 0 {
|
||||||
// pod is unhealthy, so replication controller creates another to take its place
|
// There are some pods that have disappeared.
|
||||||
// - diagnose by comparing the previous "2 Pod states" lines for inactive pods
|
err := fmt.Errorf("%d pods disappeared for %s: %v", len(deletedPods), config.Name, strings.Join(deletedPods, ", "))
|
||||||
errorStr := fmt.Sprintf("Number of reported pods for %s changed: %d vs %d", config.Name, len(pods), len(oldPods))
|
config.RCConfigLog(err.Error())
|
||||||
config.RCConfigLog("%v, pods that changed since the last iteration:", errorStr)
|
config.RCConfigLog(diff.String(sets.NewString()))
|
||||||
config.RCConfigLog(Diff(oldPods, pods).String(sets.NewString()))
|
return err
|
||||||
return fmt.Errorf(errorStr)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(pods) > len(oldPods) || startupStatus.Running > oldRunning {
|
if len(pods) > len(oldPods) || startupStatus.Running > oldRunning {
|
||||||
|
|
Loading…
Reference in New Issue