Merge pull request #31699 from gmarek/allnodesready

Automatic merge from submit-queue

AllNodesReady waits for all system pods to be running

Fix #29820

cc @pwittrock @dchen1107
pull/6/head
Kubernetes Submit Queue 2016-08-31 07:46:45 -07:00 committed by GitHub
commit a93b979881
2 changed files with 77 additions and 1 deletions

View File

@ -167,6 +167,13 @@ var (
// For parsing Kubectl version for version-skewed testing.
gitVersionRegexp = regexp.MustCompile("GitVersion:\"(v.+?)\"")
// Slice of regexps for names of pods that have to be running to consider a Node "healthy"
requiredPerNodePods = []*regexp.Regexp{
regexp.MustCompile(".*kube-proxy.*"),
regexp.MustCompile(".*fluentd-elasticsearch.*"),
regexp.MustCompile(".*node-problem-detector.*"),
}
)
// GetServerArchitecture fetches the architecture of the cluster's apiserver.
@ -4003,7 +4010,10 @@ func WaitForNodeToBe(c *client.Client, name string, conditionType api.NodeCondit
return false
}
// checks whether all registered nodes are ready
// Checks whether all registered nodes are ready.
// TODO: we should change the AllNodesReady call in AfterEach to WaitForAllNodesHealthy,
// and figure out how to do it in a configurable way, as we can't expect all setups to run
// default test add-ons.
func AllNodesReady(c *client.Client, timeout time.Duration) error {
Logf("Waiting up to %v for all nodes to be ready", timeout)
@ -4033,6 +4043,71 @@ func AllNodesReady(c *client.Client, timeout time.Duration) error {
return nil
}
// checks whether all registered nodes are ready and all required Pods are running on them.
func WaitForAllNodesHealthy(c *client.Client, timeout time.Duration) error {
Logf("Waiting up to %v for all nodes to be ready", timeout)
var notReady []api.Node
var missingPodsPerNode map[string][]string
err := wait.PollImmediate(Poll, timeout, func() (bool, error) {
notReady = nil
// It should be OK to list unschedulable Nodes here.
nodes, err := c.Nodes().List(api.ListOptions{ResourceVersion: "0"})
if err != nil {
return false, err
}
for _, node := range nodes.Items {
if !IsNodeConditionSetAsExpected(&node, api.NodeReady, true) {
notReady = append(notReady, node)
}
}
pods, err := c.Pods(api.NamespaceAll).List(api.ListOptions{ResourceVersion: "0"})
if err != nil {
return false, err
}
systemPodsPerNode := make(map[string][]string)
for _, pod := range pods.Items {
if pod.Namespace == api.NamespaceSystem && pod.Status.Phase == api.PodRunning {
if pod.Spec.NodeName != "" {
systemPodsPerNode[pod.Spec.NodeName] = append(systemPodsPerNode[pod.Spec.NodeName], pod.Name)
}
}
}
missingPodsPerNode = make(map[string][]string)
for _, node := range nodes.Items {
if !system.IsMasterNode(&node) {
for _, requiredPod := range requiredPerNodePods {
foundRequired := false
for _, presentPod := range systemPodsPerNode[node.Name] {
if requiredPod.MatchString(presentPod) {
foundRequired = true
break
}
}
if !foundRequired {
missingPodsPerNode[node.Name] = append(missingPodsPerNode[node.Name], requiredPod.String())
}
}
}
}
return len(notReady) == 0 && len(missingPodsPerNode) == 0, nil
})
if err != nil && err != wait.ErrWaitTimeout {
return err
}
if len(notReady) > 0 {
return fmt.Errorf("Not ready nodes: %v", notReady)
}
if len(missingPodsPerNode) > 0 {
return fmt.Errorf("Not running system Pods: %v", missingPodsPerNode)
}
return nil
}
// Filters nodes in NodeList in place, removing nodes that do not
// satisfy the given condition
// TODO: consider merging with pkg/client/cache.NodeLister

View File

@ -198,6 +198,7 @@ var _ = framework.KubeDescribe("SchedulerPredicates [Serial]", func() {
ns = f.Namespace.Name
nodeList = &api.NodeList{}
framework.WaitForAllNodesHealthy(c, time.Minute)
masterNodes, nodeList = framework.GetMasterAndWorkerNodesOrDie(c)
err := framework.CheckTestingNSDeletedExcept(c, ns)