mirror of https://github.com/k3s-io/k3s
Merge pull request #44923 from copejon/adjust-kubelet-restart-to-os
Automatic merge from submit-queue De-flaking E2E: PersistentVolumes[Disruptive] - restart kubelet via service manager CLI Current process for restarting the kubelet accesses `/etc/init.d/kubelet`, which is not a common path across OSes. This PR changes the behavior of `kubeletCommand()` to instead attempt restarting the kubelet service through the system manager CLI. It first attempts a systemd call, then falls back to sysVinit. If neither calls work, the test exits. This PR _does not_ remove `[Flaky]`. Right now, the test fails consistently in the gci-gce-flaky suite. After merge, I'll monitor the test grid for ~week. If no flakes appear, I'll submit a pr to pull the Flaky tag off. Additional Changes - Improved error messages - Cleanup on failure in `initTestCase` Fixes #44933 **Release note**: ```release-note NONE ```pull/6/head
commit
97295740e3
|
@ -18,10 +18,12 @@ package storage
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
. "github.com/onsi/ginkgo"
|
. "github.com/onsi/ginkgo"
|
||||||
. "github.com/onsi/gomega"
|
. "github.com/onsi/gomega"
|
||||||
|
apierrs "k8s.io/apimachinery/pkg/api/errors"
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
"k8s.io/apimachinery/pkg/labels"
|
"k8s.io/apimachinery/pkg/labels"
|
||||||
"k8s.io/kubernetes/pkg/api/v1"
|
"k8s.io/kubernetes/pkg/api/v1"
|
||||||
|
@ -58,10 +60,11 @@ var _ = framework.KubeDescribe("PersistentVolumes [Volume][Disruptive][Flaky]",
|
||||||
volLabel labels.Set
|
volLabel labels.Set
|
||||||
selector *metav1.LabelSelector
|
selector *metav1.LabelSelector
|
||||||
)
|
)
|
||||||
|
|
||||||
BeforeEach(func() {
|
BeforeEach(func() {
|
||||||
// To protect the NFS volume pod from the kubelet restart, we isolate it on its own node.
|
// To protect the NFS volume pod from the kubelet restart, we isolate it on its own node.
|
||||||
framework.SkipUnlessNodeCountIsAtLeast(MinNodes)
|
framework.SkipUnlessNodeCountIsAtLeast(MinNodes)
|
||||||
|
framework.SkipIfProviderIs("local")
|
||||||
|
|
||||||
c = f.ClientSet
|
c = f.ClientSet
|
||||||
ns = f.Namespace.Name
|
ns = f.Namespace.Name
|
||||||
volLabel = labels.Set{framework.VolumeSelectorKey: ns}
|
volLabel = labels.Set{framework.VolumeSelectorKey: ns}
|
||||||
|
@ -70,7 +73,7 @@ var _ = framework.KubeDescribe("PersistentVolumes [Volume][Disruptive][Flaky]",
|
||||||
// Start the NFS server pod.
|
// Start the NFS server pod.
|
||||||
framework.Logf("[BeforeEach] Creating NFS Server Pod")
|
framework.Logf("[BeforeEach] Creating NFS Server Pod")
|
||||||
nfsServerPod = initNFSserverPod(c, ns)
|
nfsServerPod = initNFSserverPod(c, ns)
|
||||||
|
framework.Logf("NFS server Pod %q created on Node %q", nfsServerPod.Name, nfsServerPod.Spec.NodeName)
|
||||||
framework.Logf("[BeforeEach] Configuring PersistentVolume")
|
framework.Logf("[BeforeEach] Configuring PersistentVolume")
|
||||||
nfsServerIP = nfsServerPod.Status.PodIP
|
nfsServerIP = nfsServerPod.Status.PodIP
|
||||||
Expect(nfsServerIP).NotTo(BeEmpty())
|
Expect(nfsServerIP).NotTo(BeEmpty())
|
||||||
|
@ -105,11 +108,9 @@ var _ = framework.KubeDescribe("PersistentVolumes [Volume][Disruptive][Flaky]",
|
||||||
Expect(clientNodeIP).NotTo(BeEmpty())
|
Expect(clientNodeIP).NotTo(BeEmpty())
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
AfterEach(func() {
|
AfterEach(func() {
|
||||||
framework.DeletePodWithWait(f, c, nfsServerPod)
|
framework.DeletePodWithWait(f, c, nfsServerPod)
|
||||||
})
|
})
|
||||||
|
|
||||||
Context("when kubelet restarts", func() {
|
Context("when kubelet restarts", func() {
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -117,24 +118,21 @@ var _ = framework.KubeDescribe("PersistentVolumes [Volume][Disruptive][Flaky]",
|
||||||
pv *v1.PersistentVolume
|
pv *v1.PersistentVolume
|
||||||
pvc *v1.PersistentVolumeClaim
|
pvc *v1.PersistentVolumeClaim
|
||||||
)
|
)
|
||||||
|
|
||||||
BeforeEach(func() {
|
BeforeEach(func() {
|
||||||
framework.Logf("Initializing test spec")
|
framework.Logf("Initializing test spec")
|
||||||
clientPod, pv, pvc = initTestCase(f, c, nfsPVconfig, pvcConfig, ns, clientNode.Name)
|
clientPod, pv, pvc = initTestCase(f, c, nfsPVconfig, pvcConfig, ns, clientNode.Name)
|
||||||
})
|
})
|
||||||
|
|
||||||
AfterEach(func() {
|
AfterEach(func() {
|
||||||
framework.Logf("Tearing down test spec")
|
framework.Logf("Tearing down test spec")
|
||||||
tearDownTestCase(c, f, ns, clientPod, pvc, pv)
|
tearDownTestCase(c, f, ns, clientPod, pvc, pv)
|
||||||
pv, pvc, clientPod = nil, nil, nil
|
pv, pvc, clientPod = nil, nil, nil
|
||||||
})
|
})
|
||||||
|
|
||||||
// Test table housing the It() title string and test spec. runTest is type testBody, defined at
|
// Test table housing the It() title string and test spec. runTest is type testBody, defined at
|
||||||
// the start of this file. To add tests, define a function mirroring the testBody signature and assign
|
// the start of this file. To add tests, define a function mirroring the testBody signature and assign
|
||||||
// to runTest.
|
// to runTest.
|
||||||
disruptiveTestTable := []disruptiveTest{
|
disruptiveTestTable := []disruptiveTest{
|
||||||
{
|
{
|
||||||
testItStmt: "Should test that a file written to the mount before kubelet restart can be read after restart.",
|
testItStmt: "Should test that a file written to the mount before kubelet restart is readable after restart.",
|
||||||
runTest: testKubeletRestartsAndRestoresMount,
|
runTest: testKubeletRestartsAndRestoresMount,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -142,7 +140,6 @@ var _ = framework.KubeDescribe("PersistentVolumes [Volume][Disruptive][Flaky]",
|
||||||
runTest: testVolumeUnmountsFromDeletedPod,
|
runTest: testVolumeUnmountsFromDeletedPod,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test loop executes each disruptiveTest iteratively.
|
// Test loop executes each disruptiveTest iteratively.
|
||||||
for _, test := range disruptiveTestTable {
|
for _, test := range disruptiveTestTable {
|
||||||
func(t disruptiveTest) {
|
func(t disruptiveTest) {
|
||||||
|
@ -159,14 +156,16 @@ var _ = framework.KubeDescribe("PersistentVolumes [Volume][Disruptive][Flaky]",
|
||||||
func testKubeletRestartsAndRestoresMount(c clientset.Interface, f *framework.Framework, clientPod *v1.Pod, pvc *v1.PersistentVolumeClaim, pv *v1.PersistentVolume) {
|
func testKubeletRestartsAndRestoresMount(c clientset.Interface, f *framework.Framework, clientPod *v1.Pod, pvc *v1.PersistentVolumeClaim, pv *v1.PersistentVolume) {
|
||||||
By("Writing to the volume.")
|
By("Writing to the volume.")
|
||||||
file := "/mnt/_SUCCESS"
|
file := "/mnt/_SUCCESS"
|
||||||
_, err := podExec(clientPod, fmt.Sprintf("touch %s", file))
|
out, err := podExec(clientPod, fmt.Sprintf("touch %s", file))
|
||||||
|
framework.Logf(out)
|
||||||
Expect(err).NotTo(HaveOccurred())
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
|
||||||
By("Restarting kubelet")
|
By("Restarting kubelet")
|
||||||
kubeletCommand(kRestart, c, clientPod)
|
kubeletCommand(kRestart, c, clientPod)
|
||||||
|
|
||||||
By("Testing that written file is accessible.")
|
By("Testing that written file is accessible.")
|
||||||
_, err = podExec(clientPod, fmt.Sprintf("cat %s", file))
|
out, err = podExec(clientPod, fmt.Sprintf("cat %s", file))
|
||||||
|
framework.Logf(out)
|
||||||
Expect(err).NotTo(HaveOccurred())
|
Expect(err).NotTo(HaveOccurred())
|
||||||
framework.Logf("Volume mount detected on pod %s and written file %s is readable post-restart.", clientPod.Name, file)
|
framework.Logf("Volume mount detected on pod %s and written file %s is readable post-restart.", clientPod.Name, file)
|
||||||
}
|
}
|
||||||
|
@ -178,36 +177,61 @@ func testVolumeUnmountsFromDeletedPod(c clientset.Interface, f *framework.Framew
|
||||||
nodeIP = nodeIP + ":22"
|
nodeIP = nodeIP + ":22"
|
||||||
|
|
||||||
By("Expecting the volume mount to be found.")
|
By("Expecting the volume mount to be found.")
|
||||||
result, err := framework.SSH(fmt.Sprintf("mount| grep %s", string(clientPod.UID)), nodeIP, framework.TestContext.Provider)
|
result, err := framework.SSH(fmt.Sprintf("mount | grep %s", clientPod.UID), nodeIP, framework.TestContext.Provider)
|
||||||
Expect(err).NotTo(HaveOccurred())
|
framework.LogSSHResult(result)
|
||||||
Expect(result.Code).To(BeZero())
|
Expect(err).NotTo(HaveOccurred(), "Encountered SSH error.")
|
||||||
|
Expect(result.Code).To(BeZero(), fmt.Sprintf("Expected grep exit code of 0, got %s", result.Code))
|
||||||
|
|
||||||
By("Restarting the kubelet.")
|
By("Stopping the kubelet.")
|
||||||
kubeletCommand(kStop, c, clientPod)
|
kubeletCommand(kStop, c, clientPod)
|
||||||
framework.ExpectNoError(framework.DeletePodWithWait(f, c, clientPod), "Failed to delete pod ", clientPod.Name)
|
defer func() {
|
||||||
|
if err != nil {
|
||||||
kubeletCommand(kStart, c, clientPod)
|
kubeletCommand(kStart, c, clientPod)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
By(fmt.Sprintf("Deleting Pod %q", clientPod.Name))
|
||||||
|
err = c.Core().Pods(clientPod.Namespace).Delete(clientPod.Name, &metav1.DeleteOptions{})
|
||||||
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
By("Starting the kubelet and waiting for pod to delete.")
|
||||||
|
kubeletCommand(kStart, c, clientPod)
|
||||||
|
err = f.WaitForPodTerminated(clientPod.Name, "")
|
||||||
|
if !apierrs.IsNotFound(err) && err != nil {
|
||||||
|
Expect(err).NotTo(HaveOccurred(), "Expected pod to terminate.")
|
||||||
|
}
|
||||||
|
|
||||||
By("Expecting the volume mount not to be found.")
|
By("Expecting the volume mount not to be found.")
|
||||||
result, err = framework.SSH(fmt.Sprintf("mount| grep %s", string(clientPod.UID)), nodeIP, framework.TestContext.Provider)
|
result, err = framework.SSH(fmt.Sprintf("mount | grep %s", clientPod.UID), nodeIP, framework.TestContext.Provider)
|
||||||
Expect(err).NotTo(HaveOccurred())
|
framework.LogSSHResult(result)
|
||||||
Expect(result.Code).NotTo(BeZero())
|
Expect(err).NotTo(HaveOccurred(), "Encountered SSH error.")
|
||||||
|
Expect(result.Stdout).To(BeEmpty(), "Expected grep stdout to be empty (i.e. no mount found).")
|
||||||
framework.Logf("Volume unmounted on node %s", clientPod.Spec.NodeName)
|
framework.Logf("Volume unmounted on node %s", clientPod.Spec.NodeName)
|
||||||
}
|
}
|
||||||
|
|
||||||
// initTestCase initializes spec resources (pv, pvc, and pod) and returns pointers to be consumed
|
// initTestCase initializes spec resources (pv, pvc, and pod) and returns pointers to be consumed
|
||||||
// by the test.
|
// by the test.
|
||||||
func initTestCase(f *framework.Framework, c clientset.Interface, pvConfig framework.PersistentVolumeConfig, pvcConfig framework.PersistentVolumeClaimConfig, ns, nodeName string) (*v1.Pod, *v1.PersistentVolume, *v1.PersistentVolumeClaim) {
|
func initTestCase(f *framework.Framework, c clientset.Interface, pvConfig framework.PersistentVolumeConfig, pvcConfig framework.PersistentVolumeClaimConfig, ns, nodeName string) (*v1.Pod, *v1.PersistentVolume, *v1.PersistentVolumeClaim) {
|
||||||
|
|
||||||
pv, pvc, err := framework.CreatePVPVC(c, pvConfig, pvcConfig, ns, false)
|
pv, pvc, err := framework.CreatePVPVC(c, pvConfig, pvcConfig, ns, false)
|
||||||
|
defer func() {
|
||||||
|
if err != nil {
|
||||||
|
framework.DeletePersistentVolumeClaim(c, pvc.Name, ns)
|
||||||
|
framework.DeletePersistentVolume(c, pv.Name)
|
||||||
|
}
|
||||||
|
}()
|
||||||
Expect(err).NotTo(HaveOccurred())
|
Expect(err).NotTo(HaveOccurred())
|
||||||
pod := framework.MakePod(ns, []*v1.PersistentVolumeClaim{pvc}, true, "")
|
pod := framework.MakePod(ns, []*v1.PersistentVolumeClaim{pvc}, true, "")
|
||||||
pod.Spec.NodeName = nodeName
|
pod.Spec.NodeName = nodeName
|
||||||
framework.Logf("Creating nfs client Pod %s on node %s", pod.Name, nodeName)
|
framework.Logf("Creating NFS client pod.")
|
||||||
pod, err = c.CoreV1().Pods(ns).Create(pod)
|
pod, err = c.CoreV1().Pods(ns).Create(pod)
|
||||||
|
framework.Logf("NFS client Pod %q created on Node %q", pod.Name, nodeName)
|
||||||
Expect(err).NotTo(HaveOccurred())
|
Expect(err).NotTo(HaveOccurred())
|
||||||
|
defer func() {
|
||||||
|
if err != nil {
|
||||||
|
framework.DeletePodWithWait(f, c, pod)
|
||||||
|
}
|
||||||
|
}()
|
||||||
err = framework.WaitForPodRunningInNamespace(c, pod)
|
err = framework.WaitForPodRunningInNamespace(c, pod)
|
||||||
Expect(err).NotTo(HaveOccurred())
|
Expect(err).NotTo(HaveOccurred(), fmt.Sprintf("Pod %q timed out waiting for phase: Running", pod.Name))
|
||||||
|
// Return created api objects
|
||||||
pod, err = c.CoreV1().Pods(ns).Get(pod.Name, metav1.GetOptions{})
|
pod, err = c.CoreV1().Pods(ns).Get(pod.Name, metav1.GetOptions{})
|
||||||
Expect(err).NotTo(HaveOccurred())
|
Expect(err).NotTo(HaveOccurred())
|
||||||
pvc, err = c.CoreV1().PersistentVolumeClaims(ns).Get(pvc.Name, metav1.GetOptions{})
|
pvc, err = c.CoreV1().PersistentVolumeClaims(ns).Get(pvc.Name, metav1.GetOptions{})
|
||||||
|
@ -218,22 +242,36 @@ func initTestCase(f *framework.Framework, c clientset.Interface, pvConfig framew
|
||||||
}
|
}
|
||||||
|
|
||||||
// tearDownTestCase destroy resources created by initTestCase.
|
// tearDownTestCase destroy resources created by initTestCase.
|
||||||
func tearDownTestCase(c clientset.Interface, f *framework.Framework, ns string, pod *v1.Pod, pvc *v1.PersistentVolumeClaim, pv *v1.PersistentVolume) {
|
func tearDownTestCase(c clientset.Interface, f *framework.Framework, ns string, client *v1.Pod, pvc *v1.PersistentVolumeClaim, pv *v1.PersistentVolume) {
|
||||||
framework.ExpectNoError(framework.DeletePodWithWait(f, c, pod), "tearDown: Failed to delete pod ", pod.Name)
|
// Ignore deletion errors. Failing on them will interrupt test cleanup.
|
||||||
framework.ExpectNoError(framework.DeletePersistentVolumeClaim(c, pvc.Name, ns), "tearDown: Failed to delete PVC ", pvc.Name)
|
framework.DeletePodWithWait(f, c, client)
|
||||||
framework.ExpectNoError(framework.DeletePersistentVolume(c, pv.Name), "tearDown: Failed to delete PV ", pv.Name)
|
framework.DeletePersistentVolumeClaim(c, pvc.Name, ns)
|
||||||
|
framework.DeletePersistentVolume(c, pv.Name)
|
||||||
}
|
}
|
||||||
|
|
||||||
// kubeletCommand performs `start`, `restart`, or `stop` on the kubelet running on the node of the target pod.
|
// kubeletCommand performs `start`, `restart`, or `stop` on the kubelet running on the node of the target pod and waits
|
||||||
|
// for the desired statues..
|
||||||
|
// - First issues the command via `systemctl`
|
||||||
|
// - If `systemctl` returns stderr "command not found, issues the command via `service`
|
||||||
|
// - If `service` also returns stderr "command not found", the test is aborted.
|
||||||
// Allowed kubeletOps are `kStart`, `kStop`, and `kRestart`
|
// Allowed kubeletOps are `kStart`, `kStop`, and `kRestart`
|
||||||
func kubeletCommand(kOp kubeletOpt, c clientset.Interface, pod *v1.Pod) {
|
func kubeletCommand(kOp kubeletOpt, c clientset.Interface, pod *v1.Pod) {
|
||||||
nodeIP, err := framework.GetHostExternalAddress(c, pod)
|
nodeIP, err := framework.GetHostExternalAddress(c, pod)
|
||||||
Expect(err).NotTo(HaveOccurred())
|
Expect(err).NotTo(HaveOccurred())
|
||||||
nodeIP = nodeIP + ":22"
|
nodeIP = nodeIP + ":22"
|
||||||
sshResult, err := framework.SSH("sudo /etc/init.d/kubelet "+string(kOp), nodeIP, framework.TestContext.Provider)
|
systemctlCmd := fmt.Sprintf("sudo systemctl %s kubelet", string(kOp))
|
||||||
Expect(err).NotTo(HaveOccurred())
|
framework.Logf("Attempting `%s`", systemctlCmd)
|
||||||
|
sshResult, err := framework.SSH(systemctlCmd, nodeIP, framework.TestContext.Provider)
|
||||||
|
Expect(err).NotTo(HaveOccurred(), fmt.Sprintf("SSH to Node %q errored.", pod.Spec.NodeName))
|
||||||
framework.LogSSHResult(sshResult)
|
framework.LogSSHResult(sshResult)
|
||||||
|
if strings.Contains(sshResult.Stderr, "command not found") {
|
||||||
|
serviceCmd := fmt.Sprintf("sudo service kubelet %s", string(kOp))
|
||||||
|
framework.Logf("Attempting `%s`", serviceCmd)
|
||||||
|
sshResult, err = framework.SSH(serviceCmd, nodeIP, framework.TestContext.Provider)
|
||||||
|
Expect(err).NotTo(HaveOccurred(), fmt.Sprintf("SSH to Node %q errored.", pod.Spec.NodeName))
|
||||||
|
framework.LogSSHResult(sshResult)
|
||||||
|
}
|
||||||
|
Expect(sshResult.Code).To(BeZero(), "Failed to [%s] kubelet:\n%#v", string(kOp), sshResult)
|
||||||
// On restart, waiting for node NotReady prevents a race condition where the node takes a few moments to leave the
|
// On restart, waiting for node NotReady prevents a race condition where the node takes a few moments to leave the
|
||||||
// Ready state which in turn short circuits WaitForNodeToBeReady()
|
// Ready state which in turn short circuits WaitForNodeToBeReady()
|
||||||
if kOp == kStop || kOp == kRestart {
|
if kOp == kStop || kOp == kRestart {
|
||||||
|
|
Loading…
Reference in New Issue