mirror of https://github.com/k3s-io/k3s
E2E test to verify pod failover during node power-off
parent
2d64ce5e8e
commit
3db4f2b843
|
@ -25,6 +25,7 @@ import (
|
|||
"k8s.io/api/core/v1"
|
||||
extensions "k8s.io/api/extensions/v1beta1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/uuid"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
"k8s.io/apimachinery/pkg/watch"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
|
@ -211,3 +212,86 @@ func WaitForDeploymentRevision(c clientset.Interface, d *extensions.Deployment,
|
|||
func CheckDeploymentRevisionAndImage(c clientset.Interface, ns, deploymentName, revision, image string) error {
|
||||
return testutils.CheckDeploymentRevisionAndImage(c, ns, deploymentName, revision, image)
|
||||
}
|
||||
|
||||
func CreateDeployment(client clientset.Interface, replicas int32, podLabels map[string]string, namespace string, pvclaims []*v1.PersistentVolumeClaim, command string) (*extensions.Deployment, error) {
|
||||
deploymentSpec := MakeDeployment(replicas, podLabels, namespace, pvclaims, false, command)
|
||||
deployment, err := client.Extensions().Deployments(namespace).Create(deploymentSpec)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("deployment %q Create API error: %v", deploymentSpec.Name, err)
|
||||
}
|
||||
Logf("Waiting deployment %q to complete", deploymentSpec.Name)
|
||||
err = WaitForDeploymentComplete(client, deployment)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("deployment %q failed to complete: %v", deploymentSpec.Name, err)
|
||||
}
|
||||
return deployment, nil
|
||||
}
|
||||
|
||||
// MakeDeployment creates a deployment definition based on the namespace. The deployment references the PVC's
|
||||
// name. A slice of BASH commands can be supplied as args to be run by the pod
|
||||
func MakeDeployment(replicas int32, podLabels map[string]string, namespace string, pvclaims []*v1.PersistentVolumeClaim, isPrivileged bool, command string) *extensions.Deployment {
|
||||
if len(command) == 0 {
|
||||
command = "while true; do sleep 1; done"
|
||||
}
|
||||
zero := int64(0)
|
||||
deploymentName := "deployment-" + string(uuid.NewUUID())
|
||||
deploymentSpec := &extensions.Deployment{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: deploymentName,
|
||||
Namespace: namespace,
|
||||
},
|
||||
Spec: extensions.DeploymentSpec{
|
||||
Replicas: &replicas,
|
||||
Template: v1.PodTemplateSpec{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Labels: podLabels,
|
||||
},
|
||||
Spec: v1.PodSpec{
|
||||
TerminationGracePeriodSeconds: &zero,
|
||||
Containers: []v1.Container{
|
||||
{
|
||||
Name: "write-pod",
|
||||
Image: "busybox",
|
||||
Command: []string{"/bin/sh"},
|
||||
Args: []string{"-c", command},
|
||||
SecurityContext: &v1.SecurityContext{
|
||||
Privileged: &isPrivileged,
|
||||
},
|
||||
},
|
||||
},
|
||||
RestartPolicy: v1.RestartPolicyAlways,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
var volumeMounts = make([]v1.VolumeMount, len(pvclaims))
|
||||
var volumes = make([]v1.Volume, len(pvclaims))
|
||||
for index, pvclaim := range pvclaims {
|
||||
volumename := fmt.Sprintf("volume%v", index+1)
|
||||
volumeMounts[index] = v1.VolumeMount{Name: volumename, MountPath: "/mnt/" + volumename}
|
||||
volumes[index] = v1.Volume{Name: volumename, VolumeSource: v1.VolumeSource{PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ClaimName: pvclaim.Name, ReadOnly: false}}}
|
||||
}
|
||||
deploymentSpec.Spec.Template.Spec.Containers[0].VolumeMounts = volumeMounts
|
||||
deploymentSpec.Spec.Template.Spec.Volumes = volumes
|
||||
return deploymentSpec
|
||||
}
|
||||
|
||||
// GetPodsForDeployment gets pods for the given deployment
|
||||
func GetPodsForDeployment(client clientset.Interface, deployment *extensions.Deployment) (*v1.PodList, error) {
|
||||
replicaSet, err := deploymentutil.GetNewReplicaSet(deployment, client.ExtensionsV1beta1())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Failed to get new replica set for deployment %q: %v", deployment.Name, err)
|
||||
}
|
||||
if replicaSet == nil {
|
||||
return nil, fmt.Errorf("expected a new replica set for deployment %q, found none", deployment.Name)
|
||||
}
|
||||
podListFunc := func(namespace string, options metav1.ListOptions) (*v1.PodList, error) {
|
||||
return client.Core().Pods(namespace).List(options)
|
||||
}
|
||||
rsList := []*extensions.ReplicaSet{replicaSet}
|
||||
podList, err := deploymentutil.ListPods(deployment, rsList, podListFunc)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Failed to list Pods of Deployment %q: %v", deployment.Name, err)
|
||||
}
|
||||
return podList, nil
|
||||
}
|
||||
|
|
|
@ -33,6 +33,7 @@ go_library(
|
|||
"vsphere_volume_disksize.go",
|
||||
"vsphere_volume_fstype.go",
|
||||
"vsphere_volume_master_restart.go",
|
||||
"vsphere_volume_node_poweroff.go",
|
||||
"vsphere_volume_ops_storm.go",
|
||||
"vsphere_volume_perf.go",
|
||||
"vsphere_volume_placement.go",
|
||||
|
@ -63,6 +64,7 @@ go_library(
|
|||
"//vendor/google.golang.org/api/googleapi:go_default_library",
|
||||
"//vendor/k8s.io/api/batch/v1:go_default_library",
|
||||
"//vendor/k8s.io/api/core/v1:go_default_library",
|
||||
"//vendor/k8s.io/api/extensions/v1beta1:go_default_library",
|
||||
"//vendor/k8s.io/api/policy/v1beta1:go_default_library",
|
||||
"//vendor/k8s.io/api/rbac/v1beta1:go_default_library",
|
||||
"//vendor/k8s.io/api/storage/v1:go_default_library",
|
||||
|
|
|
@ -46,6 +46,14 @@ const (
|
|||
storageclass4 = "sc-user-specified-ds"
|
||||
)
|
||||
|
||||
// volumeState represents the state of a volume.
|
||||
type volumeState int32
|
||||
|
||||
const (
|
||||
volumeStateDetached volumeState = 1
|
||||
volumeStateAttached volumeState = 2
|
||||
)
|
||||
|
||||
// Sanity check for vSphere testing. Verify the persistent disk attached to the node.
|
||||
func verifyVSphereDiskAttached(vsp *vsphere.VSphere, volumePath string, nodeName types.NodeName) (bool, error) {
|
||||
var (
|
||||
|
@ -101,40 +109,58 @@ func waitForVSphereDisksToDetach(vsp *vsphere.VSphere, nodeVolumes map[k8stype.N
|
|||
return nil
|
||||
}
|
||||
|
||||
// Wait until vsphere vmdk is deteched from the given node or time out after 5 minutes
|
||||
func waitForVSphereDiskToDetach(vsp *vsphere.VSphere, volumePath string, nodeName types.NodeName) error {
|
||||
// Wait until vsphere vmdk moves to expected state on the given node, or time out after 6 minutes
|
||||
func waitForVSphereDiskStatus(vsp *vsphere.VSphere, volumePath string, nodeName types.NodeName, expectedState volumeState) error {
|
||||
var (
|
||||
err error
|
||||
diskAttached = true
|
||||
detachTimeout = 5 * time.Minute
|
||||
detachPollTime = 10 * time.Second
|
||||
err error
|
||||
diskAttached bool
|
||||
currentState volumeState
|
||||
timeout = 6 * time.Minute
|
||||
pollTime = 10 * time.Second
|
||||
)
|
||||
if vsp == nil {
|
||||
vsp, err = vsphere.GetVSphere()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var attachedState = map[bool]volumeState{
|
||||
true: volumeStateAttached,
|
||||
false: volumeStateDetached,
|
||||
}
|
||||
err = wait.Poll(detachPollTime, detachTimeout, func() (bool, error) {
|
||||
|
||||
var attachedStateMsg = map[volumeState]string{
|
||||
volumeStateAttached: "attached to",
|
||||
volumeStateDetached: "detached from",
|
||||
}
|
||||
|
||||
err = wait.Poll(pollTime, timeout, func() (bool, error) {
|
||||
diskAttached, err = verifyVSphereDiskAttached(vsp, volumePath, nodeName)
|
||||
if err != nil {
|
||||
return true, err
|
||||
}
|
||||
if !diskAttached {
|
||||
framework.Logf("Volume %q appears to have successfully detached from %q.",
|
||||
volumePath, nodeName)
|
||||
|
||||
currentState = attachedState[diskAttached]
|
||||
if currentState == expectedState {
|
||||
framework.Logf("Volume %q has successfully %s %q", volumePath, attachedStateMsg[currentState], nodeName)
|
||||
return true, nil
|
||||
}
|
||||
framework.Logf("Waiting for Volume %q to detach from %q.", volumePath, nodeName)
|
||||
framework.Logf("Waiting for Volume %q to be %s %q.", volumePath, attachedStateMsg[expectedState], nodeName)
|
||||
return false, nil
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if diskAttached {
|
||||
return fmt.Errorf("Gave up waiting for Volume %q to detach from %q after %v", volumePath, nodeName, detachTimeout)
|
||||
|
||||
if currentState != expectedState {
|
||||
err = fmt.Errorf("Gave up waiting for Volume %q to be %s %q after %v", volumePath, attachedStateMsg[expectedState], nodeName, timeout)
|
||||
}
|
||||
return nil
|
||||
return err
|
||||
}
|
||||
|
||||
// Wait until vsphere vmdk is attached from the given node or time out after 6 minutes
|
||||
func waitForVSphereDiskToAttach(vsp *vsphere.VSphere, volumePath string, nodeName types.NodeName) error {
|
||||
return waitForVSphereDiskStatus(vsp, volumePath, nodeName, volumeStateAttached)
|
||||
}
|
||||
|
||||
// Wait until vsphere vmdk is detached from the given node or time out after 6 minutes
|
||||
func waitForVSphereDiskToDetach(vsp *vsphere.VSphere, volumePath string, nodeName types.NodeName) error {
|
||||
return waitForVSphereDiskStatus(vsp, volumePath, nodeName, volumeStateDetached)
|
||||
}
|
||||
|
||||
// function to create vsphere volume spec with given VMDK volume path, Reclaim Policy and labels
|
||||
|
|
|
@ -0,0 +1,197 @@
|
|||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package storage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
"github.com/vmware/govmomi/find"
|
||||
"golang.org/x/net/context"
|
||||
|
||||
vimtypes "github.com/vmware/govmomi/vim25/types"
|
||||
"k8s.io/api/core/v1"
|
||||
extensions "k8s.io/api/extensions/v1beta1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/kubernetes/pkg/cloudprovider/providers/vsphere"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
)
|
||||
|
||||
/*
|
||||
Test to verify volume status after node power off:
|
||||
1. Verify the pod got provisioned on a different node with volume attached to it
|
||||
2. Verify the volume is detached from the powered off node
|
||||
*/
|
||||
var _ = SIGDescribe("Node Poweroff [Feature:vsphere] [Slow] [Disruptive]", func() {
|
||||
f := framework.NewDefaultFramework("node-poweroff")
|
||||
var (
|
||||
client clientset.Interface
|
||||
namespace string
|
||||
vsp *vsphere.VSphere
|
||||
workingDir string
|
||||
err error
|
||||
)
|
||||
|
||||
BeforeEach(func() {
|
||||
framework.SkipUnlessProviderIs("vsphere")
|
||||
client = f.ClientSet
|
||||
namespace = f.Namespace.Name
|
||||
framework.ExpectNoError(framework.WaitForAllNodesSchedulable(client, framework.TestContext.NodeSchedulableTimeout))
|
||||
nodeList := framework.GetReadySchedulableNodesOrDie(f.ClientSet)
|
||||
Expect(nodeList.Items).NotTo(BeEmpty(), "Unable to find ready and schedulable Node")
|
||||
Expect(len(nodeList.Items) > 1).To(BeTrue(), "At least 2 nodes are required for this test")
|
||||
vsp, err = vsphere.GetVSphere()
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
workingDir = os.Getenv("VSPHERE_WORKING_DIR")
|
||||
Expect(workingDir).NotTo(BeEmpty())
|
||||
})
|
||||
|
||||
/*
|
||||
Steps:
|
||||
1. Create a StorageClass
|
||||
2. Create a PVC with the StorageClass
|
||||
3. Create a Deployment with 1 replica, using the PVC
|
||||
4. Verify the pod got provisioned on a node
|
||||
5. Verify the volume is attached to the node
|
||||
6. Power off the node where pod got provisioned
|
||||
7. Verify the pod got provisioned on a different node
|
||||
8. Verify the volume is attached to the new node
|
||||
9. Verify the volume is detached from the old node
|
||||
10. Delete the Deployment and wait for the volume to be detached
|
||||
11. Delete the PVC
|
||||
12. Delete the StorageClass
|
||||
*/
|
||||
It("verify volume status after node power off", func() {
|
||||
By("Creating a Storage Class")
|
||||
storageClassSpec := getVSphereStorageClassSpec("test-sc", nil)
|
||||
storageclass, err := client.StorageV1().StorageClasses().Create(storageClassSpec)
|
||||
Expect(err).NotTo(HaveOccurred(), fmt.Sprintf("Failed to create storage class with err: %v", err))
|
||||
defer client.StorageV1().StorageClasses().Delete(storageclass.Name, nil)
|
||||
|
||||
By("Creating PVC using the Storage Class")
|
||||
pvclaimSpec := getVSphereClaimSpecWithStorageClassAnnotation(namespace, "1Gi", storageclass)
|
||||
pvclaim, err := framework.CreatePVC(client, namespace, pvclaimSpec)
|
||||
Expect(err).NotTo(HaveOccurred(), fmt.Sprintf("Failed to create PVC with err: %v", err))
|
||||
defer framework.DeletePersistentVolumeClaim(client, pvclaim.Name, namespace)
|
||||
|
||||
By("Waiting for PVC to be in bound phase")
|
||||
pvclaims := []*v1.PersistentVolumeClaim{pvclaim}
|
||||
pvs, err := framework.WaitForPVClaimBoundPhase(client, pvclaims, framework.ClaimProvisionTimeout)
|
||||
Expect(err).NotTo(HaveOccurred(), fmt.Sprintf("Failed to wait until PVC phase set to bound: %v", err))
|
||||
volumePath := pvs[0].Spec.VsphereVolume.VolumePath
|
||||
|
||||
By("Creating a Deployment")
|
||||
deployment, err := framework.CreateDeployment(client, int32(1), map[string]string{"test": "app"}, namespace, pvclaims, "")
|
||||
defer client.Extensions().Deployments(namespace).Delete(deployment.Name, &metav1.DeleteOptions{})
|
||||
|
||||
By("Get pod from the deployement")
|
||||
podList, err := framework.GetPodsForDeployment(client, deployment)
|
||||
Expect(podList.Items).NotTo(BeEmpty())
|
||||
pod := podList.Items[0]
|
||||
node1 := types.NodeName(pod.Spec.NodeName)
|
||||
|
||||
By(fmt.Sprintf("Verify disk is attached to the node: %v", node1))
|
||||
isAttached, err := verifyVSphereDiskAttached(vsp, volumePath, node1)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
Expect(isAttached).To(BeTrue(), "Disk is not attached to the node")
|
||||
|
||||
By(fmt.Sprintf("Power off the node: %v", node1))
|
||||
govMoMiClient, err := vsphere.GetgovmomiClient(nil)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
f := find.NewFinder(govMoMiClient.Client, true)
|
||||
ctx, _ := context.WithCancel(context.Background())
|
||||
|
||||
vmPath := filepath.Join(workingDir, string(node1))
|
||||
vm, err := f.VirtualMachine(ctx, vmPath)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
|
||||
_, err = vm.PowerOff(ctx)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
defer vm.PowerOn(ctx)
|
||||
|
||||
err = vm.WaitForPowerState(ctx, vimtypes.VirtualMachinePowerStatePoweredOff)
|
||||
Expect(err).NotTo(HaveOccurred(), "Unable to power off the node")
|
||||
|
||||
// Waiting for the pod to be failed over to a different node
|
||||
node2, err := waitForPodToFailover(client, deployment, node1)
|
||||
Expect(err).NotTo(HaveOccurred(), "Pod did not fail over to a different node")
|
||||
|
||||
By(fmt.Sprintf("Waiting for disk to be attached to the new node: %v", node2))
|
||||
err = waitForVSphereDiskToAttach(vsp, volumePath, node2)
|
||||
Expect(err).NotTo(HaveOccurred(), "Disk is not attached to the node")
|
||||
|
||||
By(fmt.Sprintf("Waiting for disk to be detached from the previous node: %v", node1))
|
||||
err = waitForVSphereDiskToDetach(vsp, volumePath, node1)
|
||||
Expect(err).NotTo(HaveOccurred(), "Disk is not detached from the node")
|
||||
|
||||
By(fmt.Sprintf("Power on the previous node: %v", node1))
|
||||
vm.PowerOn(ctx)
|
||||
err = vm.WaitForPowerState(ctx, vimtypes.VirtualMachinePowerStatePoweredOn)
|
||||
Expect(err).NotTo(HaveOccurred(), "Unable to power on the node")
|
||||
})
|
||||
})
|
||||
|
||||
// Wait until the pod failed over to a different node, or time out after 3 minutes
|
||||
func waitForPodToFailover(client clientset.Interface, deployment *extensions.Deployment, oldNode types.NodeName) (types.NodeName, error) {
|
||||
var (
|
||||
err error
|
||||
newNode types.NodeName
|
||||
timeout = 3 * time.Minute
|
||||
pollTime = 10 * time.Second
|
||||
)
|
||||
|
||||
err = wait.Poll(pollTime, timeout, func() (bool, error) {
|
||||
newNode, err = getNodeForDeployment(client, deployment)
|
||||
if err != nil {
|
||||
return true, err
|
||||
}
|
||||
|
||||
if newNode != oldNode {
|
||||
framework.Logf("The pod has been failed over from %q to %q", oldNode, newNode)
|
||||
return true, nil
|
||||
}
|
||||
|
||||
framework.Logf("Waiting for pod to be failed over from %q", oldNode)
|
||||
return false, nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
if err == wait.ErrWaitTimeout {
|
||||
framework.Logf("Time out after waiting for %v", timeout)
|
||||
}
|
||||
framework.Logf("Pod did not fail over from %q with error: %v", oldNode, err)
|
||||
return "", err
|
||||
}
|
||||
|
||||
return getNodeForDeployment(client, deployment)
|
||||
}
|
||||
|
||||
func getNodeForDeployment(client clientset.Interface, deployment *extensions.Deployment) (types.NodeName, error) {
|
||||
podList, err := framework.GetPodsForDeployment(client, deployment)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return types.NodeName(podList.Items[0].Spec.NodeName), nil
|
||||
}
|
Loading…
Reference in New Issue