Add an e2e image puller static pod

pull/6/head
Prashanth B 2016-05-18 02:16:32 +00:00
parent 4215fe57a5
commit 730555b09b
13 changed files with 172 additions and 19 deletions

View File

@ -505,6 +505,7 @@ CA_CERT: $(yaml-quote ${CA_CERT_BASE64:-})
KUBELET_CERT: $(yaml-quote ${KUBELET_CERT_BASE64:-})
KUBELET_KEY: $(yaml-quote ${KUBELET_KEY_BASE64:-})
NETWORK_PROVIDER: $(yaml-quote ${NETWORK_PROVIDER:-})
PREPULL_E2E_IMAGES: $(yaml-quote ${PREPULL_E2E_IMAGES:-})
HAIRPIN_MODE: $(yaml-quote ${HAIRPIN_MODE:-})
OPENCONTRAIL_TAG: $(yaml-quote ${OPENCONTRAIL_TAG:-})
OPENCONTRAIL_KUBERNETES_TAG: $(yaml-quote ${OPENCONTRAIL_KUBERNETES_TAG:-})

View File

@ -158,3 +158,7 @@ HAIRPIN_MODE="${HAIRPIN_MODE:-promiscuous-bridge}" # promiscuous-bridge, hairpin
# Optional: if set to true, kube-up will configure the cluster to run e2e tests.
E2E_STORAGE_TEST_ENVIRONMENT=${KUBE_E2E_STORAGE_TEST_ENVIRONMENT:-false}
# Optional: if set to true, a image puller is deployed. Only for use in e2e clusters.
# TODO: Pipe this through GKE e2e clusters once we know it helps.
PREPULL_E2E_IMAGES="${PREPULL_E2E_IMAGES:-true}"

View File

@ -440,6 +440,7 @@ dns_server: '$(echo "$DNS_SERVER_IP" | sed -e "s/'/''/g")'
dns_domain: '$(echo "$DNS_DOMAIN" | sed -e "s/'/''/g")'
admission_control: '$(echo "$ADMISSION_CONTROL" | sed -e "s/'/''/g")'
network_provider: '$(echo "$NETWORK_PROVIDER" | sed -e "s/'/''/g")'
prepull_e2e_images: '$(echo "$PREPULL_E2E_IMAGES" | sed -e "s/'/''/g")'
hairpin_mode: '$(echo "$HAIRPIN_MODE" | sed -e "s/'/''/g")'
opencontrail_tag: '$(echo "$OPENCONTRAIL_TAG" | sed -e "s/'/''/g")'
opencontrail_kubernetes_tag: '$(echo "$OPENCONTRAIL_KUBERNETES_TAG")'

View File

@ -0,0 +1,44 @@
# e2e-image-puller seeds nodes in an e2e cluster with test images.
apiVersion: v1
kind: Pod
metadata:
name: e2e-image-puller
namespace: kube-system
labels:
name: e2e-image-puller
spec:
containers:
- name: image-puller
resources:
requests:
cpu: 100m
limits:
cpu: 100m
image: gcr.io/google_containers/busybox:1.24
# TODO: Replace this with a go script that pulls in parallel?
# Currently it takes ~5m to pull all e2e images, so this is OK, and
# fewer moving parts is always better.
# TODO: Replace the hardcoded image list with an autogen list; the list is
# currently hard-coded for static verification. It was generated via:
# grep -Iiroh "gcr.io/google_.*" "${KUBE_ROOT}/test/e2e" | \
# sed -e "s/[,\")}]//g" | awk '{print $1}' | sort | uniq | tr '\n' ' '
command:
- /bin/sh
- -c
- "for i in gcr.io/google_containers/busybox gcr.io/google_containers/busybox:1.24 gcr.io/google_containers/dnsutils:e2e gcr.io/google_containers/eptest:0.1 gcr.io/google_containers/fakegitserver:0.1 gcr.io/google_containers/hostexec:1.2 gcr.io/google_containers/iperf:e2e gcr.io/google_containers/jessie-dnsutils:e2e gcr.io/google_containers/liveness:e2e gcr.io/google_containers/mounttest:0.2 gcr.io/google_containers/mounttest:0.5 gcr.io/google_containers/mounttest:0.6 gcr.io/google_containers/mounttest-user:0.3 gcr.io/google_containers/netexec:1.4 gcr.io/google_containers/netexec:1.5 gcr.io/google_containers/nettest:1.7 gcr.io/google_containers/nettest:1.8 gcr.io/google_containers/nginx:1.7.9 gcr.io/google_containers/nginx-slim:0.5 gcr.io/google_containers/n-way-http:1.0 gcr.io/google_containers/pause:2.0 gcr.io/google_containers/pause-amd64:3.0 gcr.io/google_containers/porter:cd5cb5791ebaa8641955f0e8c2a9bed669b1eaab gcr.io/google_containers/portforwardtester:1.0 gcr.io/google_containers/redis:e2e gcr.io/google_containers/resource_consumer:beta2 gcr.io/google_containers/serve_hostname:v1.4 gcr.io/google_containers/servicelb:0.1 gcr.io/google_containers/test-webserver:e2e gcr.io/google_containers/ubuntu:14.04 gcr.io/google_containers/update-demo:kitten gcr.io/google_containers/update-demo:nautilus gcr.io/google_containers/volume-ceph:0.1 gcr.io/google_containers/volume-gluster:0.2 gcr.io/google_containers/volume-iscsi:0.1 gcr.io/google_containers/volume-nfs:0.6 gcr.io/google_containers/volume-rbd:0.1 gcr.io/google_samples/gb-redisslave:v1; do echo $(date '+%X') pulling $i; docker pull $i 1>/dev/null; done;"
securityContext:
privileged: true
volumeMounts:
- mountPath: /var/run/docker.sock
name: socket
- mountPath: /usr/bin/docker
name: docker
volumes:
- hostPath:
path: /var/run/docker.sock
name: socket
- hostPath:
path: /usr/bin/docker
name: docker
# This pod is really fire-and-forget.
restartPolicy: Never

View File

@ -0,0 +1,12 @@
/etc/kubernetes/manifests/e2e-image-puller.manifest:
file.managed:
- source: salt://e2e-image-puller/e2e-image-puller.manifest
- template: jinja
- user: root
- group: root
- mode: 644
- makedirs: true
- dir_mode: 755
- require:
- service: docker
- service: kubelet

View File

@ -38,6 +38,9 @@ base:
{% endif %}
{% if pillar.get('enable_cluster_registry', '').lower() == 'true' %}
- kube-registry-proxy
{% endif %}
{% if pillar['prepull_e2e_images'] is defined and pillar['prepull_e2e_images'].lower() == 'true' %}
- e2e-image-puller
{% endif %}
- logrotate
- supervisor

View File

@ -44,6 +44,12 @@ const (
// running and ready before any e2e tests run. It includes pulling all of
// the pods (as of 5/18/15 this is 8 pods).
podStartupTimeout = 10 * time.Minute
// imagePrePullingTimeout is the time we wait for the e2e-image-puller
// static pods to pull the list of seeded images. If they don't pull
// images within this time we simply log their output and carry on
// with the tests.
imagePrePullingTimeout = 5 * time.Minute
)
var (
@ -119,7 +125,7 @@ var _ = ginkgo.SynchronizedBeforeSuite(func() []byte {
// cluster infrastructure pods that are being pulled or started can block
// test pods from running, and tests that ensure all pods are running and
// ready will fail).
if err := framework.WaitForPodsRunningReady(api.NamespaceSystem, int32(framework.TestContext.MinStartupPods), podStartupTimeout); err != nil {
if err := framework.WaitForPodsRunningReady(api.NamespaceSystem, int32(framework.TestContext.MinStartupPods), podStartupTimeout, framework.ImagePullerLabels); err != nil {
if c, errClient := framework.LoadClient(); errClient != nil {
framework.Logf("Unable to dump cluster information because: %v", errClient)
} else {
@ -130,6 +136,14 @@ var _ = ginkgo.SynchronizedBeforeSuite(func() []byte {
framework.Failf("Error waiting for all pods to be running and ready: %v", err)
}
if err := framework.WaitForPodsSuccess(api.NamespaceSystem, framework.ImagePullerLabels, imagePrePullingTimeout); err != nil {
// There is no guarantee that the image pulling will succeed in 3 minutes
// and we don't even run the image puller on all platforms (including GKE).
// We wait for it so we get an indication of failures in the logs, and to
// maximize benefit of image pre-pulling.
framework.Logf("WARNING: Image pulling pods failed to enter success in %v: %v", podStartupTimeout, err)
}
return nil
}, func(data []byte) {

View File

@ -209,6 +209,8 @@ func (f *Framework) AfterEach() {
// Print events if the test failed.
if CurrentGinkgoTestDescription().Failed && TestContext.DumpLogsOnFailure {
DumpAllNamespaceInfo(f.Client, f.Namespace.Name)
By(fmt.Sprintf("Dumping a list of prepulled images on each node"))
LogPodsWithLabels(f.Client, api.NamespaceSystem, ImagePullerLabels)
}
summaries := make([]TestDataSummary, 0)

View File

@ -130,6 +130,10 @@ const (
ClaimProvisionTimeout = 5 * time.Minute
)
// Label allocated to the image puller static pod that runs on each node
// before e2es.
var ImagePullerLabels = map[string]string{"name": "e2e-image-puller"}
// SubResource proxy should have been functional in v1.0.0, but SubResource
// proxy via tunneling is known to be broken in v1.0. See
// https://github.com/kubernetes/kubernetes/pull/15224#issuecomment-146769463
@ -413,6 +417,48 @@ func hasReplicationControllersForPod(rcs *api.ReplicationControllerList, pod api
return false
}
// WaitForPodsSuccess waits till all labels matching the given selector enter
// the Success state. The caller is expected to only invoke this method once the
// pods have been created.
func WaitForPodsSuccess(ns string, successPodLabels map[string]string, timeout time.Duration) error {
c, err := LoadClient()
if err != nil {
return err
}
successPodSelector := labels.SelectorFromSet(successPodLabels)
start, badPods := time.Now(), []api.Pod{}
if wait.PollImmediate(30*time.Second, timeout, func() (bool, error) {
podList, err := c.Pods(ns).List(api.ListOptions{LabelSelector: successPodSelector})
if err != nil {
Logf("Error getting pods in namespace %q: %v", ns, err)
return false, nil
}
if len(podList.Items) == 0 {
Logf("Waiting for pods to enter Success, but no pods in %q match label %v", ns, successPodLabels)
return true, nil
}
badPods = []api.Pod{}
for _, pod := range podList.Items {
if pod.Status.Phase != api.PodSucceeded {
badPods = append(badPods, pod)
}
}
successPods := len(podList.Items) - len(badPods)
Logf("%d / %d pods in namespace %q are in Success state (%d seconds elapsed)",
successPods, len(podList.Items), ns, int(time.Since(start).Seconds()))
if len(badPods) == 0 {
return true, nil
}
return false, nil
}) != nil {
logPodStates(badPods)
LogPodsWithLabels(c, ns, successPodLabels)
return fmt.Errorf("Not all pods in namespace %q are successful within %v", ns, timeout)
}
return nil
}
// WaitForPodsRunningReady waits up to timeout to ensure that all pods in
// namespace ns are either running and ready, or failed but controlled by a
// replication controller. Also, it ensures that at least minPods are running
@ -420,11 +466,17 @@ func hasReplicationControllersForPod(rcs *api.ReplicationControllerList, pod api
// that it requires the list of pods on every iteration. This is useful, for
// example, in cluster startup, because the number of pods increases while
// waiting.
func WaitForPodsRunningReady(ns string, minPods int32, timeout time.Duration) error {
// If ignoreSuccessPods is true, pods in the "Success" state are ignored and
// this function waits for minPods to enter Running/Ready. Otherwise an error is
// returned even if there are minPods pods, some of which are in Running/Ready
// and some in Success. This is to allow the client to decide if "Success"
// means "Ready" or not.
func WaitForPodsRunningReady(ns string, minPods int32, timeout time.Duration, ignoreLabels map[string]string) error {
c, err := LoadClient()
if err != nil {
return err
}
ignoreSelector := labels.SelectorFromSet(ignoreLabels)
start := time.Now()
Logf("Waiting up to %v for all pods (need at least %d) in namespace '%s' to be running and ready",
timeout, minPods, ns)
@ -449,6 +501,10 @@ func WaitForPodsRunningReady(ns string, minPods int32, timeout time.Duration) er
}
nOk, replicaOk, badPods := int32(0), int32(0), []api.Pod{}
for _, pod := range podList.Items {
if len(ignoreLabels) != 0 && ignoreSelector.Matches(labels.Set(pod.Labels)) {
Logf("%v in state %v, ignoring", pod.Name, pod.Status.Phase)
continue
}
if res, err := PodRunningReady(&pod); res && err == nil {
nOk++
if hasReplicationControllersForPod(rcList, pod) {
@ -535,6 +591,20 @@ func RunKubernetesServiceTestContainer(repoRoot string, ns string) {
}
}
func kubectlLogPod(c *client.Client, pod api.Pod) {
for _, container := range pod.Spec.Containers {
logs, err := GetPodLogs(c, pod.Namespace, pod.Name, container.Name)
if err != nil {
logs, err = getPreviousPodLogs(c, pod.Namespace, pod.Name, container.Name)
if err != nil {
Logf("Failed to get logs of pod %v, container %v, err: %v", pod.Name, container.Name, err)
}
}
By(fmt.Sprintf("Logs of %v/%v:%v on node %v", pod.Namespace, pod.Name, container.Name, pod.Spec.NodeName))
Logf(logs)
}
}
func LogFailedContainers(ns string) {
c, err := LoadClient()
if err != nil {
@ -549,21 +619,23 @@ func LogFailedContainers(ns string) {
Logf("Running kubectl logs on non-ready containers in %v", ns)
for _, pod := range podList.Items {
if res, err := PodRunningReady(&pod); !res || err != nil {
for _, container := range pod.Spec.Containers {
logs, err := GetPodLogs(c, ns, pod.Name, container.Name)
if err != nil {
logs, err = getPreviousPodLogs(c, ns, pod.Name, container.Name)
if err != nil {
Logf("Failed to get logs of pod %v, container %v, err: %v", pod.Name, container.Name, err)
}
}
By(fmt.Sprintf("Logs of %v/%v:%v on node %v", ns, pod.Name, container.Name, pod.Spec.NodeName))
Logf(logs)
}
kubectlLogPod(c, pod)
}
}
}
func LogPodsWithLabels(c *client.Client, ns string, match map[string]string) {
podList, err := c.Pods(ns).List(api.ListOptions{LabelSelector: labels.SelectorFromSet(match)})
if err != nil {
Logf("Error getting pods in namespace %q: %v", ns, err)
return
}
Logf("Running kubectl logs on pods with labels %v in %v", match, ns)
for _, pod := range podList.Items {
kubectlLogPod(c, pod)
}
}
// DeleteNamespaces deletes all namespaces that match the given delete and skip filters.
// Filter is by simple strings.Contains; first skip filter, then delete filter.
// Returns the list of deleted namespaces or an error.

View File

@ -69,7 +69,7 @@ var _ = framework.KubeDescribe("Mesos", func() {
const ns = "static-pods"
numpods := int32(len(nodelist.Items))
framework.ExpectNoError(framework.WaitForPodsRunningReady(ns, numpods, wait.ForeverTestTimeout),
framework.ExpectNoError(framework.WaitForPodsRunningReady(ns, numpods, wait.ForeverTestTimeout, map[string]string{}),
fmt.Sprintf("number of static pods in namespace %s is %d", ns, numpods))
})

View File

@ -124,7 +124,7 @@ var _ = framework.KubeDescribe("Networking", func() {
"Rerun it with at least two nodes to get complete coverage.")
}
podNames := LaunchNetTestPodPerNode(f, nodes, svcname, "1.8")
podNames := LaunchNetTestPodPerNode(f, nodes, svcname)
// Clean up the pods
defer func() {
@ -256,7 +256,7 @@ var _ = framework.KubeDescribe("Networking", func() {
})
})
func LaunchNetTestPodPerNode(f *framework.Framework, nodes *api.NodeList, name, version string) []string {
func LaunchNetTestPodPerNode(f *framework.Framework, nodes *api.NodeList, name string) []string {
podNames := []string{}
totalPods := len(nodes.Items)
@ -275,7 +275,7 @@ func LaunchNetTestPodPerNode(f *framework.Framework, nodes *api.NodeList, name,
Containers: []api.Container{
{
Name: "webserver",
Image: "gcr.io/google_containers/nettest:" + version,
Image: "gcr.io/google_containers/nettest:1.8",
Args: []string{
"-service=" + name,
//peers >= totalPods should be asserted by the container.

View File

@ -396,7 +396,7 @@ var _ = framework.KubeDescribe("Nodes [Disruptive]", func() {
// the cluster is restored to health.
By("waiting for system pods to successfully restart")
err := framework.WaitForPodsRunningReady(api.NamespaceSystem, systemPodsNo, framework.PodReadyBeforeTimeout)
err := framework.WaitForPodsRunningReady(api.NamespaceSystem, systemPodsNo, framework.PodReadyBeforeTimeout, map[string]string{})
Expect(err).NotTo(HaveOccurred())
})

View File

@ -196,7 +196,7 @@ var _ = framework.KubeDescribe("SchedulerPredicates [Serial]", func() {
}
}
err = framework.WaitForPodsRunningReady(api.NamespaceSystem, int32(systemPodsNo), framework.PodReadyBeforeTimeout)
err = framework.WaitForPodsRunningReady(api.NamespaceSystem, int32(systemPodsNo), framework.PodReadyBeforeTimeout, map[string]string{})
Expect(err).NotTo(HaveOccurred())
for _, node := range nodeList.Items {