From ae0dde5c7dae6052eb5691366f2dbc3c45d3b892 Mon Sep 17 00:00:00 2001 From: Davanum Srinivas Date: Tue, 28 Aug 2018 08:32:54 -0400 Subject: [PATCH] Remove e2e-image-puller A long time ago, We added the image prepulling as a workaround due to the overwhelming amount of flake caused by pulling during the tests. This functionality has been broken for a while now when we switched to a COS image where mounting `docker` binary into `busybox` stopped working. So we just have dead code we should clean up. Change-Id: I538171a5c1d9361eee7f9e0a99655b88b1721e3e --- build/lib/release.sh | 1 - cluster/gce/config-test.sh | 4 - cluster/gce/gci/configure-helper.sh | 11 -- cluster/gce/manifests/BUILD | 1 - .../gce/manifests/e2e-image-puller.manifest | 117 ------------------ cluster/gce/util.sh | 1 - test/e2e/apps/network_partition.go | 3 +- test/e2e/e2e.go | 32 +---- test/e2e/framework/framework.go | 20 --- test/e2e/framework/util.go | 11 +- test/e2e/lifecycle/resize_nodes.go | 7 +- test/e2e/node/kubelet_perf.go | 7 -- .../equivalence_cache_predicates.go | 5 +- test/e2e/scheduling/priorities.go | 3 +- 14 files changed, 8 insertions(+), 215 deletions(-) delete mode 100644 cluster/gce/manifests/e2e-image-puller.manifest diff --git a/build/lib/release.sh b/build/lib/release.sh index dd5edef2b1..1275e6e1d9 100644 --- a/build/lib/release.sh +++ b/build/lib/release.sh @@ -415,7 +415,6 @@ function kube::release::package_kube_manifests_tarball() { cp "${src_dir}/kube-controller-manager.manifest" "${dst_dir}" cp "${src_dir}/kube-addon-manager.yaml" "${dst_dir}" cp "${src_dir}/glbc.manifest" "${dst_dir}" - cp "${src_dir}/e2e-image-puller.manifest" "${dst_dir}/" cp "${src_dir}/etcd-empty-dir-cleanup.yaml" "${dst_dir}/" local internal_manifest for internal_manifest in $(ls "${src_dir}" | grep "^internal-*"); do diff --git a/cluster/gce/config-test.sh b/cluster/gce/config-test.sh index e3a8165b30..0290f41878 100755 --- a/cluster/gce/config-test.sh +++ b/cluster/gce/config-test.sh @@ -386,10 +386,6 @@ HAIRPIN_MODE="${HAIRPIN_MODE:-hairpin-veth}" # promiscuous-bridge, hairpin-veth, # Optional: if set to true, kube-up will configure the cluster to run e2e tests. E2E_STORAGE_TEST_ENVIRONMENT=${KUBE_E2E_STORAGE_TEST_ENVIRONMENT:-false} -# Optional: if set to true, a image puller is deployed. Only for use in e2e clusters. -# TODO: Pipe this through GKE e2e clusters once we know it helps. -PREPULL_E2E_IMAGES="${PREPULL_E2E_IMAGES:-true}" - # Evict pods whenever compute resource availability on the nodes gets below a threshold. EVICTION_HARD="${EVICTION_HARD:-memory.available<250Mi,nodefs.available<10%,nodefs.inodesFree<5%}" diff --git a/cluster/gce/gci/configure-helper.sh b/cluster/gce/gci/configure-helper.sh index 331213b150..d4b89ddabd 100644 --- a/cluster/gce/gci/configure-helper.sh +++ b/cluster/gce/gci/configure-helper.sh @@ -2526,14 +2526,6 @@ function setup-node-termination-handler-manifest { fi } -# Starts an image-puller - used in test clusters. -function start-image-puller { - echo "Start image-puller" - local -r e2e_image_puller_manifest="${KUBE_HOME}/kube-manifests/kubernetes/gci-trusty/e2e-image-puller.manifest" - update-container-runtime "${e2e_image_puller_manifest}" - cp "${e2e_image_puller_manifest}" /etc/kubernetes/manifests/ -} - # Setups manifests for ingress controller and gce-specific policies for service controller. function start-lb-controller { setup-addon-manifests "addons" "loadbalancing" @@ -2745,9 +2737,6 @@ function main() { if [[ "${KUBE_PROXY_DAEMONSET:-}" != "true" ]]; then start-kube-proxy fi - if [[ "${PREPULL_E2E_IMAGES:-}" == "true" ]]; then - start-image-puller - fi if [[ "${ENABLE_NODE_PROBLEM_DETECTOR:-}" == "standalone" ]]; then start-node-problem-detector fi diff --git a/cluster/gce/manifests/BUILD b/cluster/gce/manifests/BUILD index f2c3c279b7..0530be7531 100644 --- a/cluster/gce/manifests/BUILD +++ b/cluster/gce/manifests/BUILD @@ -15,7 +15,6 @@ filegroup( srcs = [ "abac-authz-policy.jsonl", "cluster-autoscaler.manifest", - "e2e-image-puller.manifest", "etcd.manifest", "etcd-empty-dir-cleanup.yaml", "glbc.manifest", diff --git a/cluster/gce/manifests/e2e-image-puller.manifest b/cluster/gce/manifests/e2e-image-puller.manifest deleted file mode 100644 index cf2c8f2825..0000000000 --- a/cluster/gce/manifests/e2e-image-puller.manifest +++ /dev/null @@ -1,117 +0,0 @@ -# e2e-image-puller seeds nodes in an e2e cluster with test images. -apiVersion: v1 -kind: Pod -metadata: - name: e2e-image-puller - namespace: kube-system - labels: - name: e2e-image-puller -spec: - containers: - - name: image-puller - resources: - requests: - cpu: 50m - limits: - cpu: 50m - image: k8s.gcr.io/busybox:1.24 - # TODO: Replace this with a go script that pulls in parallel? - # Currently it takes ~5m to pull all e2e images, so this is OK, and - # fewer moving parts is always better. - # TODO: Replace the hardcoded image list with an autogen list; the list is - # currently hard-coded for static verification. It was generated via: - # grep -Iiroh "gcr.io/.*" "${KUBE_ROOT}/test/e2e" | \ - # sed -e "s/[,\")}]//g" | awk '{print $1}' | sort | uniq | tr '\n' ' ' - # We always want the subshell to exit 0 so this pod doesn't end up - # blocking tests in an Error state. - command: - - /bin/sh - - -c - - > - for i in - k8s.gcr.io/alpine-with-bash:1.0 - k8s.gcr.io/apparmor-loader:0.1 - k8s.gcr.io/busybox:1.24 - k8s.gcr.io/dnsutils:e2e - k8s.gcr.io/e2e-net-amd64:1.0 - k8s.gcr.io/echoserver:1.10 - k8s.gcr.io/eptest:0.1 - k8s.gcr.io/fakegitserver:0.1 - k8s.gcr.io/galera-install:0.1 - k8s.gcr.io/invalid-image:invalid-tag - k8s.gcr.io/iperf:e2e - k8s.gcr.io/jessie-dnsutils:e2e - k8s.gcr.io/k8s-dns-dnsmasq-amd64:1.14.5 - k8s.gcr.io/liveness:e2e - k8s.gcr.io/logs-generator:v0.1.0 - k8s.gcr.io/mounttest:0.8 - k8s.gcr.io/mounttest-user:0.5 - k8s.gcr.io/mysql-galera:e2e - k8s.gcr.io/mysql-healthz:1.0 - k8s.gcr.io/netexec:1.4 - k8s.gcr.io/netexec:1.5 - k8s.gcr.io/netexec:1.7 - k8s.gcr.io/nettest:1.7 - k8s.gcr.io/nginx:1.7.9 - k8s.gcr.io/nginx-ingress-controller:0.9.0-beta.1 - k8s.gcr.io/nginx-slim:0.7 - k8s.gcr.io/nginx-slim:0.8 - k8s.gcr.io/node-problem-detector:v0.3.0 - k8s.gcr.io/pause - k8s.gcr.io/porter:4524579c0eb935c056c8e75563b4e1eda31587e0 - k8s.gcr.io/portforwardtester:1.2 - k8s.gcr.io/redis-install-3.2.0:e2e - k8s.gcr.io/resource_consumer:beta4 - k8s.gcr.io/resource_consumer/controller:beta4 - gcr.io/kubernetes-e2e-test-images/serve-hostname-amd64:1.1 - gcr.io/kubernetes-e2e-test-images/hostexec-amd64:1.1 - k8s.gcr.io/servicelb:0.1 - k8s.gcr.io/test-webserver:e2e - k8s.gcr.io/update-demo:kitten - k8s.gcr.io/update-demo:nautilus - gcr.io/kubernetes-e2e-test-images/volume-ceph:0.1 - gcr.io/kubernetes-e2e-test-images/volume-gluster:0.2 - gcr.io/kubernetes-e2e-test-images/volume-iscsi:0.1 - gcr.io/kubernetes-e2e-test-images/volume-nfs:0.8 - gcr.io/kubernetes-e2e-test-images/volume-rbd:0.1 - k8s.gcr.io/zookeeper-install-3.5.0-alpha:e2e - gcr.io/google_samples/gb-redisslave:nonexistent - ; do echo $(date '+%X') pulling $i; crictl pull $i 1>/dev/null; done; exit 0; - securityContext: - privileged: true - volumeMounts: - - mountPath: {{ container_runtime_endpoint }} - name: socket - - mountPath: /usr/bin/crictl - name: crictl - - mountPath: /etc/crictl.yaml - name: config - # Add a container that runs a health-check - - name: nethealth-check - resources: - requests: - cpu: 50m - limits: - cpu: 50m - image: k8s.gcr.io/kube-nethealth-amd64:1.0 - command: - - /bin/sh - - -c - - "/usr/bin/nethealth || true" - volumes: - - hostPath: - path: {{ container_runtime_endpoint }} - type: Socket - name: socket - - hostPath: - path: /home/kubernetes/bin/crictl - type: File - name: crictl - - hostPath: - path: /etc/crictl.yaml - type: File - name: config - # This pod is really fire-and-forget. - restartPolicy: OnFailure - # This pod needs hostNetworking for true VM perf measurement as well as avoiding cbr0 issues - hostNetwork: true diff --git a/cluster/gce/util.sh b/cluster/gce/util.sh index 155a282c9f..a031bd0851 100755 --- a/cluster/gce/util.sh +++ b/cluster/gce/util.sh @@ -864,7 +864,6 @@ KUBELET_CERT: $(yaml-quote ${KUBELET_CERT_BASE64:-}) KUBELET_KEY: $(yaml-quote ${KUBELET_KEY_BASE64:-}) NETWORK_PROVIDER: $(yaml-quote ${NETWORK_PROVIDER:-}) NETWORK_POLICY_PROVIDER: $(yaml-quote ${NETWORK_POLICY_PROVIDER:-}) -PREPULL_E2E_IMAGES: $(yaml-quote ${PREPULL_E2E_IMAGES:-}) HAIRPIN_MODE: $(yaml-quote ${HAIRPIN_MODE:-}) E2E_STORAGE_TEST_ENVIRONMENT: $(yaml-quote ${E2E_STORAGE_TEST_ENVIRONMENT:-}) KUBE_DOCKER_REGISTRY: $(yaml-quote ${KUBE_DOCKER_REGISTRY:-}) diff --git a/test/e2e/apps/network_partition.go b/test/e2e/apps/network_partition.go index 140ca97c06..8fbebc9680 100644 --- a/test/e2e/apps/network_partition.go +++ b/test/e2e/apps/network_partition.go @@ -106,12 +106,11 @@ var _ = SIGDescribe("Network Partition [Disruptive] [Slow]", func() { f := framework.NewDefaultFramework("network-partition") var c clientset.Interface var ns string - ignoreLabels := framework.ImagePullerLabels BeforeEach(func() { c = f.ClientSet ns = f.Namespace.Name - _, err := framework.GetPodsInNamespace(c, ns, ignoreLabels) + _, err := framework.GetPodsInNamespace(c, ns, map[string]string{}) Expect(err).NotTo(HaveOccurred()) // TODO(foxish): Re-enable testing on gce after kubernetes#56787 is fixed. diff --git a/test/e2e/e2e.go b/test/e2e/e2e.go index 11fef7d0b9..8b20070c53 100644 --- a/test/e2e/e2e.go +++ b/test/e2e/e2e.go @@ -183,7 +183,7 @@ var _ = ginkgo.SynchronizedBeforeSuite(func() []byte { // #41007. To avoid those pods preventing the whole test runs (and just // wasting the whole run), we allow for some not-ready pods (with the // number equal to the number of allowed not-ready nodes). - if err := framework.WaitForPodsRunningReady(c, metav1.NamespaceSystem, int32(framework.TestContext.MinStartupPods), int32(framework.TestContext.AllowedNotReadyNodes), podStartupTimeout, framework.ImagePullerLabels); err != nil { + if err := framework.WaitForPodsRunningReady(c, metav1.NamespaceSystem, int32(framework.TestContext.MinStartupPods), int32(framework.TestContext.AllowedNotReadyNodes), podStartupTimeout, map[string]string{}); err != nil { framework.DumpAllNamespaceInfo(c, metav1.NamespaceSystem) framework.LogFailedContainers(c, metav1.NamespaceSystem, framework.Logf) runKubernetesServiceTestContainer(c, metav1.NamespaceDefault) @@ -194,36 +194,6 @@ var _ = ginkgo.SynchronizedBeforeSuite(func() []byte { framework.Logf("WARNING: Waiting for all daemonsets to be ready failed: %v", err) } - if err := framework.WaitForPodsSuccess(c, metav1.NamespaceSystem, framework.ImagePullerLabels, framework.ImagePrePullingTimeout); err != nil { - // There is no guarantee that the image pulling will succeed in 3 minutes - // and we don't even run the image puller on all platforms (including GKE). - // We wait for it so we get an indication of failures in the logs, and to - // maximize benefit of image pre-pulling. - framework.Logf("WARNING: Image pulling pods failed to enter success in %v: %v", framework.ImagePrePullingTimeout, err) - } - - // Dump the output of the nethealth containers only once per run - if framework.TestContext.DumpLogsOnFailure { - logFunc := framework.Logf - if framework.TestContext.ReportDir != "" { - filePath := path.Join(framework.TestContext.ReportDir, "nethealth.txt") - file, err := os.Create(filePath) - if err != nil { - framework.Logf("Failed to create a file with network health data %v: %v\nPrinting to stdout", filePath, err) - } else { - defer file.Close() - if err = file.Chmod(0644); err != nil { - framework.Logf("Failed to chmod to 644 of %v: %v", filePath, err) - } - logFunc = framework.GetLogToFileFunc(file) - framework.Logf("Dumping network health container logs from all nodes to file %v", filePath) - } - } else { - framework.Logf("Dumping network health container logs from all nodes...") - } - framework.LogContainersInPodsWithLabels(c, metav1.NamespaceSystem, framework.ImagePullerLabels, "nethealth", logFunc) - } - // Log the version of the server and this client. framework.Logf("e2e test version: %s", version.Get().GitVersion) diff --git a/test/e2e/framework/framework.go b/test/e2e/framework/framework.go index fcb1959b41..2e4ca93a70 100644 --- a/test/e2e/framework/framework.go +++ b/test/e2e/framework/framework.go @@ -21,7 +21,6 @@ import ( "bytes" "fmt" "os" - "path" "strings" "sync" "time" @@ -340,25 +339,6 @@ func (f *Framework) AfterEach() { if !f.SkipNamespaceCreation { DumpAllNamespaceInfo(f.ClientSet, f.Namespace.Name) } - - logFunc := Logf - if TestContext.ReportDir != "" { - filePath := path.Join(TestContext.ReportDir, "image-puller.txt") - file, err := os.Create(filePath) - if err != nil { - By(fmt.Sprintf("Failed to create a file with image-puller data %v: %v\nPrinting to stdout", filePath, err)) - } else { - By(fmt.Sprintf("Dumping a list of prepulled images on each node to file %v", filePath)) - defer file.Close() - if err = file.Chmod(0644); err != nil { - Logf("Failed to chmod to 644 of %v: %v", filePath, err) - } - logFunc = GetLogToFileFunc(file) - } - } else { - By("Dumping a list of prepulled images on each node...") - } - LogContainersInPodsWithLabels(f.ClientSet, metav1.NamespaceSystem, ImagePullerLabels, "image-puller", logFunc) } if TestContext.GatherKubeSystemResourceUsageData != "false" && TestContext.GatherKubeSystemResourceUsageData != "none" && f.gatherer != nil { diff --git a/test/e2e/framework/util.go b/test/e2e/framework/util.go index 2b8786459c..1ef4a3e683 100644 --- a/test/e2e/framework/util.go +++ b/test/e2e/framework/util.go @@ -201,19 +201,10 @@ const ( // ssh port sshPort = "22" - - // ImagePrePullingTimeout is the time we wait for the e2e-image-puller - // static pods to pull the list of seeded images. If they don't pull - // images within this time we simply log their output and carry on - // with the tests. - ImagePrePullingTimeout = 5 * time.Minute ) var ( BusyBoxImage = imageutils.GetE2EImage(imageutils.BusyBox) - // Label allocated to the image puller static pod that runs on each node - // before e2es. - ImagePullerLabels = map[string]string{"name": "e2e-image-puller"} // For parsing Kubectl version for version-skewed testing. gitVersionRegexp = regexp.MustCompile("GitVersion:\"(v.+?)\"") @@ -634,7 +625,7 @@ func WaitForPodsSuccess(c clientset.Interface, ns string, successPodLabels map[s // // If ignoreLabels is not empty, pods matching this selector are ignored. func WaitForPodsRunningReady(c clientset.Interface, ns string, minPods, allowedNotReadyPods int32, timeout time.Duration, ignoreLabels map[string]string) error { - ignoreSelector := labels.SelectorFromSet(ignoreLabels) + ignoreSelector := labels.SelectorFromSet(map[string]string{}) start := time.Now() Logf("Waiting up to %v for all pods (need at least %d) in namespace '%s' to be running and ready", timeout, minPods, ns) diff --git a/test/e2e/lifecycle/resize_nodes.go b/test/e2e/lifecycle/resize_nodes.go index fe03db808b..99323a0473 100644 --- a/test/e2e/lifecycle/resize_nodes.go +++ b/test/e2e/lifecycle/resize_nodes.go @@ -45,13 +45,12 @@ var _ = SIGDescribe("Nodes [Disruptive]", func() { var systemPodsNo int32 var c clientset.Interface var ns string - ignoreLabels := framework.ImagePullerLabels var group string BeforeEach(func() { c = f.ClientSet ns = f.Namespace.Name - systemPods, err := framework.GetPodsInNamespace(c, ns, ignoreLabels) + systemPods, err := framework.GetPodsInNamespace(c, ns, map[string]string{}) Expect(err).NotTo(HaveOccurred()) systemPodsNo = int32(len(systemPods)) if strings.Index(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") >= 0 { @@ -104,10 +103,8 @@ var _ = SIGDescribe("Nodes [Disruptive]", func() { // Many e2e tests assume that the cluster is fully healthy before they start. Wait until // the cluster is restored to health. By("waiting for system pods to successfully restart") - err := framework.WaitForPodsRunningReady(c, metav1.NamespaceSystem, systemPodsNo, 0, framework.PodReadyBeforeTimeout, ignoreLabels) + err := framework.WaitForPodsRunningReady(c, metav1.NamespaceSystem, systemPodsNo, 0, framework.PodReadyBeforeTimeout, map[string]string{}) Expect(err).NotTo(HaveOccurred()) - By("waiting for image prepulling pods to complete") - framework.WaitForPodsSuccess(c, metav1.NamespaceSystem, framework.ImagePullerLabels, framework.ImagePrePullingTimeout) }) It("should be able to delete nodes", func() { diff --git a/test/e2e/node/kubelet_perf.go b/test/e2e/node/kubelet_perf.go index 029155da6c..98b8a9cd6b 100644 --- a/test/e2e/node/kubelet_perf.go +++ b/test/e2e/node/kubelet_perf.go @@ -21,7 +21,6 @@ import ( "strings" "time" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/uuid" clientset "k8s.io/client-go/kubernetes" @@ -199,12 +198,6 @@ var _ = SIGDescribe("Kubelet [Serial] [Slow]", func() { var rm *framework.ResourceMonitor BeforeEach(func() { - // Wait until image prepull pod has completed so that they wouldn't - // affect the runtime cpu usage. Fail the test if prepulling cannot - // finish in time. - if err := framework.WaitForPodsSuccess(f.ClientSet, metav1.NamespaceSystem, framework.ImagePullerLabels, imagePrePullingLongTimeout); err != nil { - framework.Failf("Image puller didn't complete in %v, not running resource usage test since the metrics might be adultrated", imagePrePullingLongTimeout) - } nodes := framework.GetReadySchedulableNodesOrDie(f.ClientSet) nodeNames = sets.NewString() for _, node := range nodes.Items { diff --git a/test/e2e/scheduling/equivalence_cache_predicates.go b/test/e2e/scheduling/equivalence_cache_predicates.go index 3c52b9c953..bd705fb53c 100644 --- a/test/e2e/scheduling/equivalence_cache_predicates.go +++ b/test/e2e/scheduling/equivalence_cache_predicates.go @@ -46,7 +46,6 @@ var _ = framework.KubeDescribe("EquivalenceCache [Serial]", func() { var systemPodsNo int var ns string f := framework.NewDefaultFramework("equivalence-cache") - ignoreLabels := framework.ImagePullerLabels BeforeEach(func() { cs = f.ClientSet @@ -60,7 +59,7 @@ var _ = framework.KubeDescribe("EquivalenceCache [Serial]", func() { // Every test case in this suite assumes that cluster add-on pods stay stable and // cannot be run in parallel with any other test that touches Nodes or Pods. // It is so because we need to have precise control on what's running in the cluster. - systemPods, err := framework.GetPodsInNamespace(cs, ns, ignoreLabels) + systemPods, err := framework.GetPodsInNamespace(cs, ns, map[string]string{}) Expect(err).NotTo(HaveOccurred()) systemPodsNo = 0 for _, pod := range systemPods { @@ -69,7 +68,7 @@ var _ = framework.KubeDescribe("EquivalenceCache [Serial]", func() { } } - err = framework.WaitForPodsRunningReady(cs, api.NamespaceSystem, int32(systemPodsNo), int32(systemPodsNo), framework.PodReadyBeforeTimeout, ignoreLabels) + err = framework.WaitForPodsRunningReady(cs, api.NamespaceSystem, int32(systemPodsNo), int32(systemPodsNo), framework.PodReadyBeforeTimeout, map[string]string{}) Expect(err).NotTo(HaveOccurred()) for _, node := range nodeList.Items { diff --git a/test/e2e/scheduling/priorities.go b/test/e2e/scheduling/priorities.go index 484745460d..bc1636cbfd 100644 --- a/test/e2e/scheduling/priorities.go +++ b/test/e2e/scheduling/priorities.go @@ -63,7 +63,6 @@ var _ = SIGDescribe("SchedulerPriorities [Serial]", func() { var systemPodsNo int var ns string f := framework.NewDefaultFramework("sched-priority") - ignoreLabels := framework.ImagePullerLabels AfterEach(func() { }) @@ -78,7 +77,7 @@ var _ = SIGDescribe("SchedulerPriorities [Serial]", func() { err := framework.CheckTestingNSDeletedExcept(cs, ns) framework.ExpectNoError(err) - err = framework.WaitForPodsRunningReady(cs, metav1.NamespaceSystem, int32(systemPodsNo), 0, framework.PodReadyBeforeTimeout, ignoreLabels) + err = framework.WaitForPodsRunningReady(cs, metav1.NamespaceSystem, int32(systemPodsNo), 0, framework.PodReadyBeforeTimeout, map[string]string{}) Expect(err).NotTo(HaveOccurred()) })