From f64c508e2e85393c5f59d26abd630e7c11791b81 Mon Sep 17 00:00:00 2001 From: Lantao Liu Date: Mon, 8 Jan 2018 18:46:51 +0000 Subject: [PATCH 1/2] Add getCRIClient and set default values for CRI related flags --- hack/make-rules/test-e2e-node.sh | 20 ++++++++++---------- test/e2e/framework/test_context.go | 22 +++++++++++++--------- test/e2e_node/image_list.go | 16 +--------------- test/e2e_node/services/kubelet.go | 12 ++++++++++++ test/e2e_node/util.go | 24 ++++++++++++++++++++++++ 5 files changed, 60 insertions(+), 34 deletions(-) diff --git a/hack/make-rules/test-e2e-node.sh b/hack/make-rules/test-e2e-node.sh index 1cff4c85d8..5733f1eb44 100755 --- a/hack/make-rules/test-e2e-node.sh +++ b/hack/make-rules/test-e2e-node.sh @@ -60,6 +60,16 @@ if [ ! -d "${artifacts}" ]; then fi echo "Test artifacts will be written to ${artifacts}" +if [[ $runtime == "remote" ]] ; then + if [[ ! -z $container_runtime_endpoint ]] ; then + test_args="--container-runtime-endpoint=${container_runtime_endpoint} $test_args" + fi + if [[ ! -z $image_service_endpoint ]] ; then + test_args="--image-service-endpoint=$image_service_endpoint $test_args" + fi +fi + + if [ $remote = true ] ; then # The following options are only valid in remote run. images=${IMAGES:-""} @@ -153,22 +163,12 @@ else # Runtime flags test_args='--kubelet-flags="--container-runtime='$runtime'" '$test_args - if [[ $runtime == "remote" ]] ; then - if [[ ! -z $container_runtime_endpoint ]] ; then - test_args='--kubelet-flags="--container-runtime-endpoint='$container_runtime_endpoint'" '$test_args - fi - if [[ ! -z $image_service_endpoint ]] ; then - test_args='--kubelet-flags="--image-service-endpoint='$image_service_endpoint'" '$test_args - fi - fi # Test using the host the script was run on # Provided for backwards compatibility go run test/e2e_node/runner/local/run_local.go \ --system-spec-name="$system_spec_name" --ginkgo-flags="$ginkgoflags" \ --test-flags="--container-runtime=${runtime} \ - --container-runtime-endpoint=${container_runtime_endpoint} \ - --image-service-endpoint=${image_service_endpoint} \ --alsologtostderr --v 4 --report-dir=${artifacts} --node-name $(hostname) \ $test_args" --build-dependencies=true 2>&1 | tee -i "${artifacts}/build-log.txt" exit $? diff --git a/test/e2e/framework/test_context.go b/test/e2e/framework/test_context.go index 54e261abff..31b2dde185 100644 --- a/test/e2e/framework/test_context.go +++ b/test/e2e/framework/test_context.go @@ -57,14 +57,16 @@ type TestContextType struct { Prefix string MinStartupPods int // Timeout for waiting for system pods to be running - SystemPodsStartupTimeout time.Duration - UpgradeTarget string - EtcdUpgradeStorage string - EtcdUpgradeVersion string - UpgradeImage string - GCEUpgradeScript string - ContainerRuntime string - ContainerRuntimeEndpoint string + SystemPodsStartupTimeout time.Duration + UpgradeTarget string + EtcdUpgradeStorage string + EtcdUpgradeVersion string + UpgradeImage string + GCEUpgradeScript string + ContainerRuntime string + ContainerRuntimeEndpoint string + ContainerRuntimeProcessName string + ContainerRuntimePidFile string // SystemdServices are comma separated list of systemd services the test framework // will dump logs for. SystemdServices string @@ -203,7 +205,9 @@ func RegisterCommonFlags() { flag.StringVar(&TestContext.FeatureGates, "feature-gates", "", "A set of key=value pairs that describe feature gates for alpha/experimental features.") flag.StringVar(&TestContext.Viper, "viper-config", "e2e", "The name of the viper config i.e. 'e2e' will read values from 'e2e.json' locally. All e2e parameters are meant to be configurable by viper.") flag.StringVar(&TestContext.ContainerRuntime, "container-runtime", "docker", "The container runtime of cluster VM instances (docker/rkt/remote).") - flag.StringVar(&TestContext.ContainerRuntimeEndpoint, "container-runtime-endpoint", "", "The container runtime endpoint of cluster VM instances.") + flag.StringVar(&TestContext.ContainerRuntimeEndpoint, "container-runtime-endpoint", "unix:///var/run/dockershim.sock", "The container runtime endpoint of cluster VM instances.") + flag.StringVar(&TestContext.ContainerRuntimeProcessName, "container-runtime-process-name", "dockerd", "The name of the container runtime process.") + flag.StringVar(&TestContext.ContainerRuntimePidFile, "container-runtime-pid-file", "/var/run/docker.pid", "The pid file of the container runtime.") flag.StringVar(&TestContext.SystemdServices, "systemd-services", "docker", "The comma separated list of systemd services the framework will dump logs for.") flag.StringVar(&TestContext.ImageServiceEndpoint, "image-service-endpoint", "", "The image service endpoint of cluster VM instances.") flag.StringVar(&TestContext.DockershimCheckpointDir, "dockershim-checkpoint-dir", "/var/lib/dockershim/sandbox", "The directory for dockershim to store sandbox checkpoints.") diff --git a/test/e2e_node/image_list.go b/test/e2e_node/image_list.go index 01360451fc..0c37c88752 100644 --- a/test/e2e_node/image_list.go +++ b/test/e2e_node/image_list.go @@ -17,7 +17,6 @@ limitations under the License. package e2e_node import ( - "errors" "fmt" "os/exec" "os/user" @@ -28,7 +27,6 @@ import ( "k8s.io/apimachinery/pkg/util/sets" internalapi "k8s.io/kubernetes/pkg/kubelet/apis/cri" runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" - "k8s.io/kubernetes/pkg/kubelet/remote" commontest "k8s.io/kubernetes/test/e2e/common" "k8s.io/kubernetes/test/e2e/framework" imageutils "k8s.io/kubernetes/test/utils/image" @@ -39,8 +37,6 @@ const ( maxImagePullRetries = 5 // Sleep duration between image pull retry attempts. imagePullRetryDelay = time.Second - // connection timeout for gRPC image service connection - imageServiceConnectionTimeout = 15 * time.Minute ) // NodeImageWhiteList is a list of images used in node e2e test. These images will be prepulled @@ -107,17 +103,7 @@ func getPuller() (puller, error) { case "docker": return &dockerPuller{}, nil case "remote": - endpoint := framework.TestContext.ContainerRuntimeEndpoint - if framework.TestContext.ImageServiceEndpoint != "" { - //ImageServiceEndpoint is the same as ContainerRuntimeEndpoint if not - //explicitly specified - //https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/kubelet.go#L517 - endpoint = framework.TestContext.ImageServiceEndpoint - } - if endpoint == "" { - return nil, errors.New("can't prepull images, no remote endpoint provided") - } - is, err := remote.NewRemoteImageService(endpoint, imageServiceConnectionTimeout) + _, is, err := getCRIClient() if err != nil { return nil, err } diff --git a/test/e2e_node/services/kubelet.go b/test/e2e_node/services/kubelet.go index aa803859a3..1fee35acfd 100644 --- a/test/e2e_node/services/kubelet.go +++ b/test/e2e_node/services/kubelet.go @@ -303,6 +303,18 @@ func (e *E2EServices) startKubelet() (*server, error) { cmdArgs = append(cmdArgs, "--hostname-override", framework.TestContext.NodeName) } + if framework.TestContext.ContainerRuntime != "" { + cmdArgs = append(cmdArgs, "--container-runtime", framework.TestContext.ContainerRuntime) + } + + if framework.TestContext.ContainerRuntimeEndpoint != "" { + cmdArgs = append(cmdArgs, "--container-runtime-endpoint", framework.TestContext.ContainerRuntimeEndpoint) + } + + if framework.TestContext.ImageServiceEndpoint != "" { + cmdArgs = append(cmdArgs, "--image-service-endpoint", framework.TestContext.ImageServiceEndpoint) + } + // Write config file or flags, depending on whether --generate-kubelet-config-file was provided if genKubeletConfigFile { if err := writeKubeletConfigFile(kc, kubeletConfigPath); err != nil { diff --git a/test/e2e_node/util.go b/test/e2e_node/util.go index 0d00f88944..bf1914c5e7 100644 --- a/test/e2e_node/util.go +++ b/test/e2e_node/util.go @@ -35,11 +35,13 @@ import ( "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/kubernetes/scheme" "k8s.io/kubernetes/pkg/features" + internalapi "k8s.io/kubernetes/pkg/kubelet/apis/cri" "k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig" kubeletscheme "k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig/scheme" kubeletconfigv1alpha1 "k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig/v1alpha1" stats "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1" kubeletmetrics "k8s.io/kubernetes/pkg/kubelet/metrics" + "k8s.io/kubernetes/pkg/kubelet/remote" "k8s.io/kubernetes/test/e2e/framework" "k8s.io/kubernetes/test/e2e/framework/metrics" @@ -365,3 +367,25 @@ func runCommand(cmd ...string) (string, error) { } return string(output), nil } + +// getCRIClient connects CRI and returns CRI runtime service clients and image service client. +func getCRIClient() (internalapi.RuntimeService, internalapi.ImageManagerService, error) { + // connection timeout for CRI service connection + const connectionTimeout = 2 * time.Minute + runtimeEndpoint := framework.TestContext.ContainerRuntimeEndpoint + r, err := remote.NewRemoteRuntimeService(runtimeEndpoint, connectionTimeout) + if err != nil { + return nil, nil, err + } + imageManagerEndpoint := runtimeEndpoint + if framework.TestContext.ImageServiceEndpoint != "" { + //ImageServiceEndpoint is the same as ContainerRuntimeEndpoint if not + //explicitly specified + imageManagerEndpoint = framework.TestContext.ImageServiceEndpoint + } + i, err := remote.NewRemoteImageService(imageManagerEndpoint, connectionTimeout) + if err != nil { + return nil, nil, err + } + return r, i, nil +} From e05a5b9f7a9723c12a652a7e1ff7fb069d60f623 Mon Sep 17 00:00:00 2001 From: Lantao Liu Date: Mon, 8 Jan 2018 18:47:54 +0000 Subject: [PATCH 2/2] Remove unnecessary docker specific logic in node e2e test. --- test/e2e_node/BUILD | 3 +- test/e2e_node/container_manager_test.go | 27 +++++--- test/e2e_node/cpu_manager_test.go | 42 +++++++----- test/e2e_node/dockershim_checkpoint_test.go | 4 ++ test/e2e_node/garbage_collector_test.go | 71 ++++++++++----------- test/e2e_node/restart_test.go | 40 ++++++++---- 6 files changed, 108 insertions(+), 79 deletions(-) diff --git a/test/e2e_node/BUILD b/test/e2e_node/BUILD index e6431d05de..e70755c536 100644 --- a/test/e2e_node/BUILD +++ b/test/e2e_node/BUILD @@ -120,13 +120,14 @@ go_test( "//pkg/apis/core:go_default_library", "//pkg/features:go_default_library", "//pkg/kubelet:go_default_library", + "//pkg/kubelet/apis/cri:go_default_library", + "//pkg/kubelet/apis/cri/v1alpha1/runtime:go_default_library", "//pkg/kubelet/apis/kubeletconfig:go_default_library", "//pkg/kubelet/apis/stats/v1alpha1:go_default_library", "//pkg/kubelet/cm:go_default_library", "//pkg/kubelet/cm/cpumanager:go_default_library", "//pkg/kubelet/cm/cpuset:go_default_library", "//pkg/kubelet/container:go_default_library", - "//pkg/kubelet/dockershim/libdocker:go_default_library", "//pkg/kubelet/images:go_default_library", "//pkg/kubelet/kubeletconfig:go_default_library", "//pkg/kubelet/kubeletconfig/status:go_default_library", diff --git a/test/e2e_node/container_manager_test.go b/test/e2e_node/container_manager_test.go index 54187d73a5..fa123c4d54 100644 --- a/test/e2e_node/container_manager_test.go +++ b/test/e2e_node/container_manager_test.go @@ -31,6 +31,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/uuid" + runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" "k8s.io/kubernetes/test/e2e/framework" . "github.com/onsi/ginkgo" @@ -76,10 +77,10 @@ var _ = framework.KubeDescribe("Container Manager Misc [Serial]", func() { f := framework.NewDefaultFramework("kubelet-container-manager") Describe("Validate OOM score adjustments", func() { Context("once the node is setup", func() { - It("docker daemon's oom-score-adj should be -999", func() { - dockerPids, err := getPidsForProcess(dockerProcessName, dockerPidFile) - Expect(err).To(BeNil(), "failed to get list of docker daemon pids") - for _, pid := range dockerPids { + It("container runtime's oom-score-adj should be -999", func() { + runtimePids, err := getPidsForProcess(framework.TestContext.ContainerRuntimeProcessName, framework.TestContext.ContainerRuntimePidFile) + Expect(err).To(BeNil(), "failed to get list of container runtime pids") + for _, pid := range runtimePids { Eventually(func() error { return validateOOMScoreAdjSetting(pid, -999) }, 5*time.Minute, 30*time.Second).Should(BeNil()) @@ -148,14 +149,22 @@ var _ = framework.KubeDescribe("Container Manager Misc [Serial]", func() { return validateOOMScoreAdjSetting(shPids[0], 1000) }, 2*time.Minute, time.Second*4).Should(BeNil()) }) - // Log the running containers here to help debugging. Use `docker ps` - // directly for now because the test is already docker specific. + // Log the running containers here to help debugging. AfterEach(func() { if CurrentGinkgoTestDescription().Failed { - By("Dump all running docker containers") - output, err := exec.Command("docker", "ps").CombinedOutput() + By("Dump all running containers") + runtime, _, err := getCRIClient() Expect(err).NotTo(HaveOccurred()) - framework.Logf("Running docker containers:\n%s", string(output)) + containers, err := runtime.ListContainers(&runtimeapi.ContainerFilter{ + State: &runtimeapi.ContainerStateValue{ + State: runtimeapi.ContainerState_CONTAINER_RUNNING, + }, + }) + Expect(err).NotTo(HaveOccurred()) + framework.Logf("Running containers:\n") + for _, c := range containers { + framework.Logf("%+v\n", c) + } } }) }) diff --git a/test/e2e_node/cpu_manager_test.go b/test/e2e_node/cpu_manager_test.go index 4186d78cb4..9ac0bfca4a 100644 --- a/test/e2e_node/cpu_manager_test.go +++ b/test/e2e_node/cpu_manager_test.go @@ -27,9 +27,11 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/kubernetes/pkg/features" + runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" "k8s.io/kubernetes/pkg/kubelet/apis/kubeletconfig" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager" "k8s.io/kubernetes/pkg/kubelet/cm/cpuset" + "k8s.io/kubernetes/pkg/kubelet/types" "k8s.io/kubernetes/test/e2e/framework" . "github.com/onsi/ginkgo" @@ -101,14 +103,21 @@ func getLocalNodeCPUDetails(f *framework.Framework) (cpuCapVal int64, cpuAllocVa return cpuCap.Value(), (cpuCap.Value() - cpuRes.Value()), cpuRes.Value() } -// TODO(balajismaniam): Make this func generic to all container runtimes. -func waitForContainerRemoval(ctnPartName string) { +func waitForContainerRemoval(containerName, podName, podNS string) { + rs, _, err := getCRIClient() + Expect(err).NotTo(HaveOccurred()) Eventually(func() bool { - err := exec.Command("/bin/sh", "-c", fmt.Sprintf("if [ -n \"$(docker ps -a | grep -i %s)\" ]; then exit 1; fi", ctnPartName)).Run() + containers, err := rs.ListContainers(&runtimeapi.ContainerFilter{ + LabelSelector: map[string]string{ + types.KubernetesPodNameLabel: podName, + types.KubernetesPodNamespaceLabel: podNS, + types.KubernetesContainerNameLabel: containerName, + }, + }) if err != nil { return false } - return true + return len(containers) == 0 }, 2*time.Minute, 1*time.Second).Should(BeTrue()) } @@ -135,9 +144,8 @@ func setOldKubeletConfig(f *framework.Framework, oldCfg *kubeletconfig.KubeletCo } func enableCPUManagerInKubelet(f *framework.Framework) (oldCfg *kubeletconfig.KubeletConfiguration) { - // Run only if the container runtime is Docker. - // TODO(balajismaniam): Make this test generic to all container runtimes. - framework.RunIfContainerRuntimeIs("docker") + // Run only if the container runtime is not docker or remote (not rkt). + framework.RunIfContainerRuntimeIs("docker", "remote") // Enable CPU Manager in Kubelet with static policy. oldCfg, err := getCurrentKubeletConfig() @@ -219,7 +227,7 @@ func runCPUManagerTests(f *framework.Framework) { By("by deleting the pods and waiting for container removal") deletePods(f, []string{pod.Name}) - waitForContainerRemoval(fmt.Sprintf("%s_%s", pod.Spec.Containers[0].Name, pod.Name)) + waitForContainerRemoval(pod.Spec.Containers[0].Name, pod.Name, pod.Namespace) By("running a Gu pod") ctnAttrs = []ctnAttribute{ @@ -245,7 +253,7 @@ func runCPUManagerTests(f *framework.Framework) { By("by deleting the pods and waiting for container removal") deletePods(f, []string{pod.Name}) - waitForContainerRemoval(fmt.Sprintf("%s_%s", pod.Spec.Containers[0].Name, pod.Name)) + waitForContainerRemoval(pod.Spec.Containers[0].Name, pod.Name, pod.Namespace) By("running multiple Gu and non-Gu pods") ctnAttrs = []ctnAttribute{ @@ -291,8 +299,8 @@ func runCPUManagerTests(f *framework.Framework) { By("by deleting the pods and waiting for container removal") deletePods(f, []string{pod1.Name, pod2.Name}) - waitForContainerRemoval(fmt.Sprintf("%s_%s", pod1.Spec.Containers[0].Name, pod1.Name)) - waitForContainerRemoval(fmt.Sprintf("%s_%s", pod2.Spec.Containers[0].Name, pod2.Name)) + waitForContainerRemoval(pod1.Spec.Containers[0].Name, pod1.Name, pod1.Namespace) + waitForContainerRemoval(pod2.Spec.Containers[0].Name, pod2.Name, pod2.Namespace) // Skip rest of the tests if CPU capacity < 3. if cpuCap < 3 { @@ -327,7 +335,7 @@ func runCPUManagerTests(f *framework.Framework) { By("by deleting the pods and waiting for container removal") deletePods(f, []string{pod.Name}) - waitForContainerRemoval(fmt.Sprintf("%s_%s", pod.Spec.Containers[0].Name, pod.Name)) + waitForContainerRemoval(pod.Spec.Containers[0].Name, pod.Name, pod.Namespace) By("running a Gu pod with multiple containers requesting integer CPUs") ctnAttrs = []ctnAttribute{ @@ -365,8 +373,8 @@ func runCPUManagerTests(f *framework.Framework) { By("by deleting the pods and waiting for container removal") deletePods(f, []string{pod.Name}) - waitForContainerRemoval(fmt.Sprintf("%s_%s", pod.Spec.Containers[0].Name, pod.Name)) - waitForContainerRemoval(fmt.Sprintf("%s_%s", pod.Spec.Containers[1].Name, pod.Name)) + waitForContainerRemoval(pod.Spec.Containers[0].Name, pod.Name, pod.Namespace) + waitForContainerRemoval(pod.Spec.Containers[1].Name, pod.Name, pod.Namespace) By("running multiple Gu pods") ctnAttrs = []ctnAttribute{ @@ -410,15 +418,15 @@ func runCPUManagerTests(f *framework.Framework) { By("by deleting the pods and waiting for container removal") deletePods(f, []string{pod1.Name, pod2.Name}) - waitForContainerRemoval(fmt.Sprintf("%s_%s", pod1.Spec.Containers[0].Name, pod1.Name)) - waitForContainerRemoval(fmt.Sprintf("%s_%s", pod2.Spec.Containers[0].Name, pod2.Name)) + waitForContainerRemoval(pod1.Spec.Containers[0].Name, pod1.Name, pod1.Namespace) + waitForContainerRemoval(pod2.Spec.Containers[0].Name, pod2.Name, pod2.Namespace) setOldKubeletConfig(f, oldCfg) }) } // Serial because the test updates kubelet configuration. -var _ = SIGDescribe("CPU Manager [Feature:CPUManager]", func() { +var _ = SIGDescribe("CPU Manager [Serial] [Feature:CPUManager]", func() { f := framework.NewDefaultFramework("cpu-manager-test") Context("With kubeconfig updated with static CPU Manager policy run the CPU Manager tests", func() { diff --git a/test/e2e_node/dockershim_checkpoint_test.go b/test/e2e_node/dockershim_checkpoint_test.go index 0dfdfbfc11..018213d1b2 100644 --- a/test/e2e_node/dockershim_checkpoint_test.go +++ b/test/e2e_node/dockershim_checkpoint_test.go @@ -45,6 +45,10 @@ const ( var _ = SIGDescribe("Dockershim [Serial] [Disruptive] [Feature:Docker]", func() { f := framework.NewDefaultFramework("dockerhism-checkpoint-test") + BeforeEach(func() { + framework.RunIfContainerRuntimeIs("docker") + }) + It("should clean up pod sandbox checkpoint after pod deletion", func() { podName := "pod-checkpoint-no-disrupt" runPodCheckpointTest(f, podName, func() { diff --git a/test/e2e_node/garbage_collector_test.go b/test/e2e_node/garbage_collector_test.go index dd4578ef03..04010cd6ac 100644 --- a/test/e2e_node/garbage_collector_test.go +++ b/test/e2e_node/garbage_collector_test.go @@ -19,12 +19,13 @@ package e2e_node import ( "fmt" "strconv" - "strings" "time" "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/kubernetes/pkg/kubelet/dockershim/libdocker" + internalapi "k8s.io/kubernetes/pkg/kubelet/apis/cri" + runtimeapi "k8s.io/kubernetes/pkg/kubelet/apis/cri/v1alpha1/runtime" + "k8s.io/kubernetes/pkg/kubelet/types" "k8s.io/kubernetes/test/e2e/framework" . "github.com/onsi/ginkgo" @@ -130,8 +131,7 @@ var _ = framework.KubeDescribe("GarbageCollect [Serial]", func() { }, } for _, test := range tests { - // TODO (dashpole): Once the Container Runtime Interface (CRI) is complete, generalize run on other runtimes (other than docker) - dockerContainerGCTest(f, test) + containerGCTest(f, test) } }) @@ -142,6 +142,32 @@ var _ = framework.KubeDescribe("GarbageCollect [Serial]", func() { // while containers are running, if not constrained by maxPerPodContainer or maxTotalContainers, keep an extra copy of each container // once pods are killed, all containers are eventually cleaned up func containerGCTest(f *framework.Framework, test testRun) { + var runtime internalapi.RuntimeService + BeforeEach(func() { + var err error + runtime, _, err = getCRIClient() + Expect(err).NotTo(HaveOccurred()) + }) + for _, pod := range test.testPods { + // Initialize the getContainerNames function to use CRI runtime client. + pod.getContainerNames = func() ([]string, error) { + relevantContainers := []string{} + containers, err := runtime.ListContainers(&runtimeapi.ContainerFilter{ + LabelSelector: map[string]string{ + types.KubernetesPodNameLabel: pod.podName, + types.KubernetesPodNamespaceLabel: f.Namespace.Name, + }, + }) + if err != nil { + return relevantContainers, err + } + for _, container := range containers { + relevantContainers = append(relevantContainers, container.Labels[types.KubernetesContainerNameLabel]) + } + return relevantContainers, nil + } + } + Context(fmt.Sprintf("Garbage Collection Test: %s", test.testName), func() { BeforeEach(func() { realPods := getPods(test.testPods) @@ -175,7 +201,7 @@ func containerGCTest(f *framework.Framework, test testRun) { for i := 0; i < pod.numContainers; i++ { containerCount := 0 for _, containerName := range containerNames { - if strings.Contains(containerName, pod.getContainerName(i)) { + if containerName == pod.getContainerName(i) { containerCount += 1 } } @@ -203,7 +229,7 @@ func containerGCTest(f *framework.Framework, test testRun) { for i := 0; i < pod.numContainers; i++ { containerCount := 0 for _, containerName := range containerNames { - if strings.Contains(containerName, pod.getContainerName(i)) { + if containerName == pod.getContainerName(i) { containerCount += 1 } } @@ -245,39 +271,6 @@ func containerGCTest(f *framework.Framework, test testRun) { }) } -// Runs containerGCTest using the docker runtime. -func dockerContainerGCTest(f *framework.Framework, test testRun) { - var runtime libdocker.Interface - BeforeEach(func() { - runtime = libdocker.ConnectToDockerOrDie( - defaultDockerEndpoint, - defaultRuntimeRequestTimeoutDuration, - defaultImagePullProgressDeadline, - false, - false, - ) - }) - for _, pod := range test.testPods { - // Initialize the getContainerNames function to use the libdocker api - thisPrefix := pod.containerPrefix - pod.getContainerNames = func() ([]string, error) { - relevantContainers := []string{} - dockerContainers, err := libdocker.GetKubeletDockerContainers(runtime, true) - if err != nil { - return relevantContainers, err - } - for _, container := range dockerContainers { - // only look for containers from this testspec - if strings.Contains(container.Names[0], thisPrefix) { - relevantContainers = append(relevantContainers, container.Names[0]) - } - } - return relevantContainers, nil - } - } - containerGCTest(f, test) -} - func getPods(specs []*testPodSpec) (pods []*v1.Pod) { for _, spec := range specs { By(fmt.Sprintf("Creating %v containers with restartCount: %v", spec.numContainers, spec.restartCount)) diff --git a/test/e2e_node/restart_test.go b/test/e2e_node/restart_test.go index 747d0b9799..ac37cf606f 100644 --- a/test/e2e_node/restart_test.go +++ b/test/e2e_node/restart_test.go @@ -28,6 +28,7 @@ import ( "os/exec" . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" "k8s.io/api/core/v1" testutils "k8s.io/kubernetes/test/utils" ) @@ -75,11 +76,11 @@ var _ = framework.KubeDescribe("Restart [Serial] [Slow] [Disruptive]", func() { ) f := framework.NewDefaultFramework("restart-test") - Context("Docker Daemon", func() { + Context("Container Runtime", func() { Context("Network", func() { It("should recover from ip leak", func() { - pods := newTestPods(podCount, false, framework.GetPauseImageNameForHostArch(), "restart-docker-test") + pods := newTestPods(podCount, false, framework.GetPauseImageNameForHostArch(), "restart-container-runtime-test") By(fmt.Sprintf("Trying to create %d pods on node", len(pods))) createBatchPodWithRateControl(f, pods, podCreationInterval) defer deletePodsSync(f, pods) @@ -88,34 +89,47 @@ var _ = framework.KubeDescribe("Restart [Serial] [Slow] [Disruptive]", func() { // startTimeout fit on the node and the node is now saturated. runningPods := waitForPods(f, podCount, startTimeout) if len(runningPods) < minPods { - framework.Failf("Failed to start %d pods, cannot test that restarting docker doesn't leak IPs", minPods) + framework.Failf("Failed to start %d pods, cannot test that restarting container runtime doesn't leak IPs", minPods) } for i := 0; i < restartCount; i += 1 { - By(fmt.Sprintf("Restarting Docker Daemon iteration %d", i)) - - // TODO: Find a uniform way to deal with systemctl/initctl/service operations. #34494 - if stdout, err := exec.Command("sudo", "systemctl", "restart", "docker").CombinedOutput(); err != nil { - framework.Logf("Failed to trigger docker restart with systemd/systemctl: %v, stdout: %q", err, string(stdout)) - if stdout, err = exec.Command("sudo", "service", "docker", "restart").CombinedOutput(); err != nil { - framework.Failf("Failed to trigger docker restart with upstart/service: %v, stdout: %q", err, string(stdout)) + By(fmt.Sprintf("Killing container runtime iteration %d", i)) + // Wait for container runtime to be running + var pid int + Eventually(func() error { + runtimePids, err := getPidsForProcess(framework.TestContext.ContainerRuntimeProcessName, framework.TestContext.ContainerRuntimePidFile) + if err != nil { + return err } + if len(runtimePids) != 1 { + return fmt.Errorf("unexpected container runtime pid list: %+v", runtimePids) + } + // Make sure the container runtime is running, pid got from pid file may not be running. + pid = runtimePids[0] + if _, err := exec.Command("sudo", "ps", "-p", fmt.Sprintf("%d", pid)).CombinedOutput(); err != nil { + return err + } + return nil + }, 1*time.Minute, 2*time.Second).Should(BeNil()) + if stdout, err := exec.Command("sudo", "kill", fmt.Sprintf("%d", pid)).CombinedOutput(); err != nil { + framework.Failf("Failed to kill container runtime (pid=%d): %v, stdout: %q", pid, err, string(stdout)) } + // Assume that container runtime will be restarted by systemd/supervisord etc. time.Sleep(20 * time.Second) } By("Checking currently Running/Ready pods") postRestartRunningPods := waitForPods(f, len(runningPods), recoverTimeout) if len(postRestartRunningPods) == 0 { - framework.Failf("Failed to start *any* pods after docker restart, this might indicate an IP leak") + framework.Failf("Failed to start *any* pods after container runtime restart, this might indicate an IP leak") } By("Confirm no containers have terminated") for _, pod := range postRestartRunningPods { if c := testutils.TerminatedContainers(pod); len(c) != 0 { - framework.Failf("Pod %q has failed containers %+v after docker restart, this might indicate an IP leak", pod.Name, c) + framework.Failf("Pod %q has failed containers %+v after container runtime restart, this might indicate an IP leak", pod.Name, c) } } - By(fmt.Sprintf("Docker restart test passed with %d pods", len(postRestartRunningPods))) + By(fmt.Sprintf("Container runtime restart test passed with %d pods", len(postRestartRunningPods))) }) }) })