2015-06-03 23:18:26 +00:00
|
|
|
/*
|
|
|
|
Copyright 2015 The Kubernetes Authors All rights reserved.
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package e2e
|
|
|
|
|
|
|
|
import (
|
|
|
|
"time"
|
|
|
|
|
2015-08-05 22:03:47 +00:00
|
|
|
"k8s.io/kubernetes/pkg/api"
|
|
|
|
"k8s.io/kubernetes/pkg/fields"
|
|
|
|
"k8s.io/kubernetes/pkg/labels"
|
|
|
|
"k8s.io/kubernetes/pkg/util/wait"
|
2015-06-03 23:18:26 +00:00
|
|
|
|
|
|
|
. "github.com/onsi/ginkgo"
|
|
|
|
. "github.com/onsi/gomega"
|
|
|
|
)
|
|
|
|
|
|
|
|
var _ = Describe("Etcd failure", func() {
|
|
|
|
|
2015-06-22 21:14:54 +00:00
|
|
|
var skipped bool
|
2015-10-27 12:14:57 +00:00
|
|
|
framework := Framework{BaseName: "etcd-failure"}
|
2015-06-03 23:18:26 +00:00
|
|
|
|
|
|
|
BeforeEach(func() {
|
2015-07-20 21:53:39 +00:00
|
|
|
// This test requires:
|
|
|
|
// - SSH
|
|
|
|
// - master access
|
|
|
|
// ... so the provider check should be identical to the intersection of
|
|
|
|
// providers that provide those capabilities.
|
2015-06-22 21:14:54 +00:00
|
|
|
skipped = true
|
|
|
|
SkipUnlessProviderIs("gce")
|
|
|
|
skipped = false
|
|
|
|
|
2015-10-27 12:14:57 +00:00
|
|
|
framework.beforeEach()
|
|
|
|
|
2015-06-03 23:18:26 +00:00
|
|
|
Expect(RunRC(RCConfig{
|
|
|
|
Client: framework.Client,
|
|
|
|
Name: "baz",
|
|
|
|
Namespace: framework.Namespace.Name,
|
2015-10-12 22:14:59 +00:00
|
|
|
Image: "beta.gcr.io/google_containers/pause:2.0",
|
2015-06-03 23:18:26 +00:00
|
|
|
Replicas: 1,
|
|
|
|
})).NotTo(HaveOccurred())
|
|
|
|
})
|
|
|
|
|
2015-10-27 12:14:57 +00:00
|
|
|
AfterEach(func() {
|
|
|
|
if skipped {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
framework.afterEach()
|
|
|
|
})
|
|
|
|
|
2015-06-03 23:18:26 +00:00
|
|
|
It("should recover from network partition with master", func() {
|
|
|
|
etcdFailTest(
|
|
|
|
framework,
|
|
|
|
"sudo iptables -A INPUT -p tcp --destination-port 4001 -j DROP",
|
|
|
|
"sudo iptables -D INPUT -p tcp --destination-port 4001 -j DROP",
|
|
|
|
)
|
|
|
|
})
|
|
|
|
|
|
|
|
It("should recover from SIGKILL", func() {
|
|
|
|
etcdFailTest(
|
|
|
|
framework,
|
|
|
|
"pgrep etcd | xargs -I {} sudo kill -9 {}",
|
|
|
|
"echo 'do nothing. monit should restart etcd.'",
|
|
|
|
)
|
|
|
|
})
|
|
|
|
})
|
|
|
|
|
2015-10-27 12:14:57 +00:00
|
|
|
func etcdFailTest(framework Framework, failCommand, fixCommand string) {
|
2015-06-24 21:54:51 +00:00
|
|
|
doEtcdFailure(failCommand, fixCommand)
|
2015-06-03 23:18:26 +00:00
|
|
|
|
|
|
|
checkExistingRCRecovers(framework)
|
|
|
|
|
2015-10-27 12:14:57 +00:00
|
|
|
ServeImageOrFail(&framework, "basic", "gcr.io/google_containers/serve_hostname:1.1")
|
2015-06-03 23:18:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// For this duration, etcd will be failed by executing a failCommand on the master.
|
|
|
|
// If repeat is true, the failCommand will be called at a rate of once per second for
|
|
|
|
// the failure duration. If repeat is false, failCommand will only be called once at the
|
|
|
|
// beginning of the failure duration. After this duration, we execute a fixCommand on the
|
|
|
|
// master and go on to assert that etcd and kubernetes components recover.
|
|
|
|
const etcdFailureDuration = 20 * time.Second
|
|
|
|
|
2015-06-24 21:54:51 +00:00
|
|
|
func doEtcdFailure(failCommand, fixCommand string) {
|
2015-06-03 23:18:26 +00:00
|
|
|
By("failing etcd")
|
|
|
|
|
2015-06-24 21:54:51 +00:00
|
|
|
masterExec(failCommand)
|
|
|
|
time.Sleep(etcdFailureDuration)
|
2015-06-03 23:18:26 +00:00
|
|
|
masterExec(fixCommand)
|
|
|
|
}
|
|
|
|
|
|
|
|
func masterExec(cmd string) {
|
2015-07-24 11:36:16 +00:00
|
|
|
stdout, stderr, code, err := SSH(cmd, getMasterHost()+":22", testContext.Provider)
|
2015-06-03 23:18:26 +00:00
|
|
|
Expect(err).NotTo(HaveOccurred())
|
|
|
|
if code != 0 {
|
|
|
|
Failf("master exec command, '%v' failed with exitcode %v: \n\tstdout: %v\n\tstderr: %v", cmd, code, stdout, stderr)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-10-27 12:14:57 +00:00
|
|
|
func checkExistingRCRecovers(f Framework) {
|
2015-06-03 23:18:26 +00:00
|
|
|
By("assert that the pre-existing replication controller recovers")
|
|
|
|
podClient := f.Client.Pods(f.Namespace.Name)
|
|
|
|
rcSelector := labels.Set{"name": "baz"}.AsSelector()
|
|
|
|
|
|
|
|
By("deleting pods from existing replication controller")
|
2015-06-24 21:54:51 +00:00
|
|
|
expectNoError(wait.Poll(time.Millisecond*500, time.Second*60, func() (bool, error) {
|
2015-06-09 16:37:43 +00:00
|
|
|
pods, err := podClient.List(rcSelector, fields.Everything())
|
2015-06-22 22:44:30 +00:00
|
|
|
if err != nil {
|
|
|
|
Logf("apiserver returned error, as expected before recovery: %v", err)
|
|
|
|
return false, nil
|
|
|
|
}
|
2015-06-09 16:37:43 +00:00
|
|
|
if len(pods.Items) == 0 {
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
for _, pod := range pods.Items {
|
|
|
|
err = podClient.Delete(pod.Name, api.NewDeleteOptions(0))
|
|
|
|
Expect(err).NotTo(HaveOccurred())
|
|
|
|
}
|
2015-06-22 22:44:30 +00:00
|
|
|
Logf("apiserver has recovered")
|
2015-06-09 16:37:43 +00:00
|
|
|
return true, nil
|
|
|
|
}))
|
2015-06-03 23:18:26 +00:00
|
|
|
|
|
|
|
By("waiting for replication controller to recover")
|
2015-06-24 21:54:51 +00:00
|
|
|
expectNoError(wait.Poll(time.Millisecond*500, time.Second*60, func() (bool, error) {
|
2015-06-03 23:18:26 +00:00
|
|
|
pods, err := podClient.List(rcSelector, fields.Everything())
|
|
|
|
Expect(err).NotTo(HaveOccurred())
|
|
|
|
for _, pod := range pods.Items {
|
2015-08-19 15:35:00 +00:00
|
|
|
if pod.DeletionTimestamp == nil && api.IsPodReady(&pod) {
|
2015-06-03 23:18:26 +00:00
|
|
|
return true, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false, nil
|
|
|
|
}))
|
|
|
|
}
|