2015-05-21 21:43:42 +00:00
|
|
|
/*
|
2016-06-03 00:25:58 +00:00
|
|
|
Copyright 2015 The Kubernetes Authors.
|
2015-05-21 21:43:42 +00:00
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package e2e
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"time"
|
|
|
|
|
2015-08-05 22:03:47 +00:00
|
|
|
"k8s.io/kubernetes/pkg/api"
|
|
|
|
"k8s.io/kubernetes/pkg/fields"
|
|
|
|
"k8s.io/kubernetes/pkg/labels"
|
|
|
|
"k8s.io/kubernetes/pkg/util/wait"
|
2016-04-07 17:21:31 +00:00
|
|
|
"k8s.io/kubernetes/test/e2e/framework"
|
2015-05-21 21:43:42 +00:00
|
|
|
|
|
|
|
. "github.com/onsi/ginkgo"
|
|
|
|
. "github.com/onsi/gomega"
|
|
|
|
)
|
|
|
|
|
2016-04-07 17:21:31 +00:00
|
|
|
var _ = framework.KubeDescribe("Restart [Disruptive]", func() {
|
|
|
|
f := framework.NewDefaultFramework("restart")
|
|
|
|
var ps *framework.PodStore
|
2015-05-21 21:43:42 +00:00
|
|
|
|
|
|
|
BeforeEach(func() {
|
2015-06-22 21:14:54 +00:00
|
|
|
// This test requires the ability to restart all nodes, so the provider
|
|
|
|
// check must be identical to that call.
|
2016-04-07 17:21:31 +00:00
|
|
|
framework.SkipUnlessProviderIs("gce", "gke")
|
2015-06-22 21:14:54 +00:00
|
|
|
|
2016-04-07 17:21:31 +00:00
|
|
|
ps = framework.NewPodStore(f.Client, api.NamespaceSystem, labels.Everything(), fields.Everything())
|
2015-05-21 21:43:42 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
AfterEach(func() {
|
2016-02-09 23:50:07 +00:00
|
|
|
if ps != nil {
|
|
|
|
ps.Stop()
|
2015-06-22 21:14:54 +00:00
|
|
|
}
|
2015-05-21 21:43:42 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
It("should restart all nodes and ensure all nodes and pods recover", func() {
|
2016-04-07 17:21:31 +00:00
|
|
|
nn := framework.TestContext.CloudConfig.NumNodes
|
2015-05-21 21:43:42 +00:00
|
|
|
|
|
|
|
By("ensuring all nodes are ready")
|
2016-05-26 16:42:47 +00:00
|
|
|
nodeNamesBefore, err := framework.CheckNodesReady(f.Client, framework.NodeReadyInitialTimeout, nn)
|
2015-05-21 21:43:42 +00:00
|
|
|
Expect(err).NotTo(HaveOccurred())
|
2016-04-07 17:21:31 +00:00
|
|
|
framework.Logf("Got the following nodes before restart: %v", nodeNamesBefore)
|
2015-05-21 21:43:42 +00:00
|
|
|
|
|
|
|
By("ensuring all pods are running and ready")
|
|
|
|
pods := ps.List()
|
|
|
|
podNamesBefore := make([]string, len(pods))
|
|
|
|
for i, p := range pods {
|
|
|
|
podNamesBefore[i] = p.ObjectMeta.Name
|
|
|
|
}
|
2015-07-10 21:01:45 +00:00
|
|
|
ns := api.NamespaceSystem
|
2016-05-25 16:29:50 +00:00
|
|
|
if !framework.CheckPodsRunningReadyOrSucceeded(f.Client, ns, podNamesBefore, framework.PodReadyBeforeTimeout) {
|
|
|
|
framework.Failf("At least one pod wasn't running and ready or succeeded at test start.")
|
2015-05-21 21:43:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
By("restarting all of the nodes")
|
2016-05-26 16:42:47 +00:00
|
|
|
err = restartNodes(framework.TestContext.Provider, framework.RestartPerNodeTimeout)
|
2015-05-21 21:43:42 +00:00
|
|
|
Expect(err).NotTo(HaveOccurred())
|
|
|
|
|
|
|
|
By("ensuring all nodes are ready after the restart")
|
2016-05-26 16:42:47 +00:00
|
|
|
nodeNamesAfter, err := framework.CheckNodesReady(f.Client, framework.RestartNodeReadyAgainTimeout, nn)
|
2015-05-21 21:43:42 +00:00
|
|
|
Expect(err).NotTo(HaveOccurred())
|
2016-04-07 17:21:31 +00:00
|
|
|
framework.Logf("Got the following nodes after restart: %v", nodeNamesAfter)
|
2015-05-21 21:43:42 +00:00
|
|
|
|
|
|
|
// Make sure that we have the same number of nodes. We're not checking
|
|
|
|
// that the names match because that's implementation specific.
|
|
|
|
By("ensuring the same number of nodes exist after the restart")
|
|
|
|
if len(nodeNamesBefore) != len(nodeNamesAfter) {
|
2016-04-07 17:21:31 +00:00
|
|
|
framework.Failf("Had %d nodes before nodes were restarted, but now only have %d",
|
2015-05-21 21:43:42 +00:00
|
|
|
len(nodeNamesBefore), len(nodeNamesAfter))
|
|
|
|
}
|
|
|
|
|
|
|
|
// Make sure that we have the same number of pods. We're not checking
|
|
|
|
// that the names match because they are recreated with different names
|
|
|
|
// across node restarts.
|
|
|
|
By("ensuring the same number of pods are running and ready after restart")
|
|
|
|
podCheckStart := time.Now()
|
2016-05-26 16:42:47 +00:00
|
|
|
podNamesAfter, err := waitForNPods(ps, len(podNamesBefore), framework.RestartPodReadyAgainTimeout)
|
2015-05-21 21:43:42 +00:00
|
|
|
Expect(err).NotTo(HaveOccurred())
|
2016-05-26 16:42:47 +00:00
|
|
|
remaining := framework.RestartPodReadyAgainTimeout - time.Since(podCheckStart)
|
2016-06-02 14:38:51 +00:00
|
|
|
if !framework.CheckPodsRunningReadyOrSucceeded(f.Client, ns, podNamesAfter, remaining) {
|
2016-04-07 17:21:31 +00:00
|
|
|
framework.Failf("At least one pod wasn't running and ready after the restart.")
|
2015-05-21 21:43:42 +00:00
|
|
|
}
|
|
|
|
})
|
|
|
|
})
|
|
|
|
|
|
|
|
// waitForNPods tries to list pods using c until it finds expect of them,
|
|
|
|
// returning their names if it can do so before timeout.
|
2016-04-07 17:21:31 +00:00
|
|
|
func waitForNPods(ps *framework.PodStore, expect int, timeout time.Duration) ([]string, error) {
|
2015-05-21 21:43:42 +00:00
|
|
|
// Loop until we find expect pods or timeout is passed.
|
|
|
|
var pods []*api.Pod
|
|
|
|
var errLast error
|
2016-04-07 17:21:31 +00:00
|
|
|
found := wait.Poll(framework.Poll, timeout, func() (bool, error) {
|
2015-05-21 21:43:42 +00:00
|
|
|
pods = ps.List()
|
|
|
|
if len(pods) != expect {
|
|
|
|
errLast = fmt.Errorf("expected to find %d pods but found only %d", expect, len(pods))
|
2016-04-07 17:21:31 +00:00
|
|
|
framework.Logf("Error getting pods: %v", errLast)
|
2015-05-21 21:43:42 +00:00
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
return true, nil
|
|
|
|
}) == nil
|
|
|
|
// Extract the names of all found pods.
|
|
|
|
podNames := make([]string, len(pods))
|
|
|
|
for i, p := range pods {
|
|
|
|
podNames[i] = p.ObjectMeta.Name
|
|
|
|
}
|
|
|
|
if !found {
|
|
|
|
return podNames, fmt.Errorf("couldn't find %d pods within %v; last error: %v",
|
|
|
|
expect, timeout, errLast)
|
|
|
|
}
|
|
|
|
return podNames, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// restartNodes uses provider to do a restart of all nodes in the cluster,
|
|
|
|
// allowing up to nt per node.
|
|
|
|
func restartNodes(provider string, nt time.Duration) error {
|
|
|
|
switch provider {
|
2015-07-10 17:47:22 +00:00
|
|
|
case "gce", "gke":
|
2015-06-17 07:13:26 +00:00
|
|
|
return migRollingUpdateSelf(nt)
|
2015-05-21 21:43:42 +00:00
|
|
|
default:
|
|
|
|
return fmt.Errorf("restartNodes(...) not implemented for %s", provider)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-26 17:05:34 +00:00
|
|
|
// TODO(marekbiskup): Switch this to MIG recreate-instances. This can be done
|
2015-07-09 22:51:40 +00:00
|
|
|
// with the following bash, but needs to be written in Go:
|
|
|
|
//
|
|
|
|
// # Step 1: Get instance names.
|
2015-10-06 15:56:34 +00:00
|
|
|
// list=$(gcloud compute instance-groups --project=${PROJECT} --zone=${ZONE} instances --group=${GROUP} list)
|
2015-07-09 22:51:40 +00:00
|
|
|
// i=""
|
|
|
|
// for l in $list; do
|
|
|
|
// i="${l##*/},${i}"
|
|
|
|
// done
|
|
|
|
//
|
|
|
|
// # Step 2: Start the recreate.
|
2015-07-22 11:40:22 +00:00
|
|
|
// output=$(gcloud compute instance-groups managed --project=${PROJECT} --zone=${ZONE} recreate-instances ${GROUP} --instance="${i}")
|
2015-07-09 22:51:40 +00:00
|
|
|
// op=${output##*:}
|
|
|
|
//
|
|
|
|
// # Step 3: Wait until it's complete.
|
|
|
|
// status=""
|
|
|
|
// while [[ "${status}" != "DONE" ]]; do
|
2015-07-22 11:40:22 +00:00
|
|
|
// output=$(gcloud compute instance-groups managed --zone="${ZONE}" get-operation ${op} | grep status)
|
2015-07-09 22:51:40 +00:00
|
|
|
// status=${output##*:}
|
|
|
|
// done
|
2015-06-17 07:13:26 +00:00
|
|
|
func migRollingUpdateSelf(nt time.Duration) error {
|
2015-05-21 21:43:42 +00:00
|
|
|
By("getting the name of the template for the managed instance group")
|
2016-05-26 16:42:47 +00:00
|
|
|
tmpl, err := framework.MigTemplate()
|
2015-05-21 21:43:42 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("couldn't get MIG template name: %v", err)
|
|
|
|
}
|
2016-05-26 16:42:47 +00:00
|
|
|
return framework.MigRollingUpdate(tmpl, nt)
|
2015-06-17 07:13:26 +00:00
|
|
|
}
|