Delete orphaned namespaces at e2e start for soak tests

This will help prevent soak clusters getting hosed if a test is
interrupted without cleaning up resources.
pull/6/head
Jeff Lowdermilk 2015-11-17 10:44:15 -08:00
parent ebe5649939
commit e95cf1d109
6 changed files with 116 additions and 34 deletions

View File

@ -104,6 +104,7 @@ export PATH=$(dirname "${e2e_test}"):"${PATH}"
--node-instance-group="${NODE_INSTANCE_GROUP:-}" \
--num-nodes="${NUM_MINIONS:-}" \
--prefix="${KUBE_GCE_INSTANCE_PREFIX:-e2e}" \
${E2E_CLEAN_START:+"--clean-start=true"} \
${E2E_MIN_STARTUP_PODS:+"--minStartupPods=${E2E_MIN_STARTUP_PODS}"} \
${E2E_REPORT_DIR:+"--report-dir=${E2E_REPORT_DIR}"} \
"${@:-}"

View File

@ -453,6 +453,8 @@ case ${JOB_NAME} in
: ${E2E_DOWN:="false"}
: ${E2E_NETWORK:="gce-soak-weekly"}
: ${E2E_UP:="false"}
# Clear out any orphaned namespaces in case previous run was interrupted.
: ${E2E_CLEAN_START:="true"}
: ${GINKGO_TEST_ARGS:="--ginkgo.skip=$(join_regex_allow_empty \
${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \
${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
@ -481,6 +483,8 @@ case ${JOB_NAME} in
: ${E2E_DOWN:="false"}
: ${E2E_NETWORK:="gce-soak-weekly-1-1"}
: ${E2E_UP:="false"}
# Clear out any orphaned namespaces in case previous run was interrupted.
: ${E2E_CLEAN_START:="true"}
: ${GINKGO_TEST_ARGS:="--ginkgo.skip=$(join_regex_allow_empty \
${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \
${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
@ -749,6 +753,8 @@ case ${JOB_NAME} in
: ${E2E_NETWORK:="gke-soak-weekly"}
: ${E2E_DOWN:="false"}
: ${E2E_UP:="false"}
# Clear out any orphaned namespaces in case previous run was interrupted.
: ${E2E_CLEAN_START:="true"}
: ${PROJECT:="kubernetes-jenkins"}
: ${E2E_OPT:="--check_version_skew=false"}
: ${GINKGO_TEST_ARGS:="--ginkgo.skip=$(join_regex_allow_empty \
@ -1448,6 +1454,7 @@ export KUBE_SKIP_CONFIRMATIONS=y
export E2E_UP="${E2E_UP:-true}"
export E2E_TEST="${E2E_TEST:-true}"
export E2E_DOWN="${E2E_DOWN:-true}"
export E2E_CLEAN_START="${E2E_CLEAN_START:-}"
# Used by hack/ginkgo-e2e.sh to enable ginkgo's parallel test runner.
export GINKGO_PARALLEL=${GINKGO_PARALLEL:-}

View File

@ -33,6 +33,7 @@ cert-dir
certificate-authority
cgroup-root
chaos-chance
clean-start
cleanup-iptables
client-ca-file
client-certificate

View File

@ -83,6 +83,7 @@ func init() {
flag.StringVar(&testContext.PrometheusPushGateway, "prom-push-gateway", "", "The URL to prometheus gateway, so that metrics can be pushed during e2es and scraped by prometheus. Typically something like 127.0.0.1:9091.")
flag.BoolVar(&testContext.VerifyServiceAccount, "e2e-verify-service-account", true, "If true tests will verify the service account before running.")
flag.BoolVar(&testContext.DeleteNamespace, "delete-namespace", true, "If true tests will delete namespace after completion. It is only designed to make debugging easier, DO NOT turn it off by default.")
flag.BoolVar(&testContext.CleanStart, "clean-start", false, "If true, purge all namespaces except default and system before running tests. This serves to cleanup test namespaces from failed/interrupted e2e runs in a long-lived cluster.")
flag.BoolVar(&testContext.GatherKubeSystemResourceUsageData, "gather-resource-usage", true, "If set to true framework will be monitoring resource usage of system add-ons in (some) e2e tests.")
}
@ -126,6 +127,24 @@ func TestE2E(t *testing.T) {
}
gomega.RegisterFailHandler(ginkgo.Fail)
c, err := loadClient()
if err != nil {
glog.Fatal("Error loading client: ", err)
}
// Delete any namespaces except default and kube-system. This ensures no
// lingering resources are left over from a previous test run.
if testContext.CleanStart {
deleted, err := deleteNamespaces(c, nil /* deleteFilter */, []string{api.NamespaceSystem, api.NamespaceDefault})
if err != nil {
t.Errorf("Error deleting orphaned namespaces: %v", err)
}
glog.Infof("Waiting for deletion of the following namespaces: %v", deleted)
if err := waitForNamespacesDeleted(c, deleted, namespaceCleanupTimeout); err != nil {
glog.Fatalf("Failed to delete orphaned namespaces %v: %v", deleted, err)
}
}
// Ensure all pods are running and ready before starting tests (otherwise,
// cluster infrastructure pods that are being pulled or started can block
// test pods from running, and tests that ensure all pods are running and

View File

@ -32,17 +32,6 @@ import (
. "github.com/onsi/gomega"
)
func countRemaining(c *client.Client, withName string) (int, error) {
var cnt = 0
nsList, err := c.Namespaces().List(labels.Everything(), fields.Everything())
for _, item := range nsList.Items {
if strings.Contains(item.Name, "nslifetest") {
cnt++
}
}
return cnt, err
}
func extinguish(c *client.Client, totalNS int, maxAllowedAfterDel int, maxSeconds int) {
var err error
@ -59,40 +48,33 @@ func extinguish(c *client.Client, totalNS int, maxAllowedAfterDel int, maxSecond
}
wg.Wait()
By("Waiting 10 seconds")
//Wait 10 seconds, then SEND delete requests for all the namespaces.
By("Waiting 10 seconds")
time.Sleep(time.Duration(10 * time.Second))
By("Deleting namespaces")
nsList, err := c.Namespaces().List(labels.Everything(), fields.Everything())
deleted, err := deleteNamespaces(c, []string{"nslifetest"}, nil /* skipFilter */)
Expect(err).NotTo(HaveOccurred())
var nsCount = 0
for _, item := range nsList.Items {
if strings.Contains(item.Name, "nslifetest") {
wg.Add(1)
nsCount++
go func(nsName string) {
defer wg.Done()
defer GinkgoRecover()
Expect(c.Namespaces().Delete(nsName)).To(Succeed())
Logf("namespace : %v api call to delete is complete ", nsName)
}(item.Name)
}
}
Expect(nsCount).To(Equal(totalNS))
wg.Wait()
Expect(len(deleted)).To(Equal(totalNS))
By("Waiting for namespaces to vanish")
//Now POLL until all namespaces have been eradicated.
expectNoError(wait.Poll(2*time.Second, time.Duration(maxSeconds)*time.Second,
func() (bool, error) {
if rem, err := countRemaining(c, "nslifetest"); err != nil || rem > maxAllowedAfterDel {
Logf("Remaining namespaces : %v", rem)
var cnt = 0
nsList, err := c.Namespaces().List(labels.Everything(), fields.Everything())
if err != nil {
return false, err
} else {
return true, nil
}
for _, item := range nsList.Items {
if strings.Contains(item.Name, "nslifetest") {
cnt++
}
}
if cnt > maxAllowedAfterDel {
Logf("Remaining namespaces : %v", cnt)
return false, nil
}
return true, nil
}))
}
var _ = Describe("Namespaces", func() {

View File

@ -29,6 +29,7 @@ import (
"path/filepath"
"strconv"
"strings"
"sync"
"time"
"k8s.io/kubernetes/pkg/api"
@ -67,6 +68,11 @@ const (
// TODO: Make this 30 seconds once #4566 is resolved.
podStartTimeout = 5 * time.Minute
// If there are any orphaned namespaces to clean up, this test is running
// on a long lived cluster. A long wait here is preferably to spurious test
// failures caused by leaked resources from a previous test run.
namespaceCleanupTimeout = 15 * time.Minute
// Some pods can take much longer to get ready due to volume attach/detach latency.
slowPodStartTimeout = 15 * time.Minute
@ -127,6 +133,7 @@ type TestContextType struct {
PrometheusPushGateway string
VerifyServiceAccount bool
DeleteNamespace bool
CleanStart bool
GatherKubeSystemResourceUsageData bool
}
@ -401,6 +408,71 @@ func waitForPodsRunningReady(ns string, minPods int, timeout time.Duration) erro
return nil
}
// deleteNamespaces deletes all namespaces that match the given delete and skip filters.
// Filter is by simple strings.Contains; first skip filter, then delete filter.
// Returns the list of deleted namespaces or an error.
func deleteNamespaces(c *client.Client, deleteFilter, skipFilter []string) ([]string, error) {
By("Deleting namespaces")
nsList, err := c.Namespaces().List(labels.Everything(), fields.Everything())
Expect(err).NotTo(HaveOccurred())
var deleted []string
var wg sync.WaitGroup
OUTER:
for _, item := range nsList.Items {
if skipFilter != nil {
for _, pattern := range skipFilter {
if strings.Contains(item.Name, pattern) {
continue OUTER
}
}
}
if deleteFilter != nil {
var shouldDelete bool
for _, pattern := range deleteFilter {
if strings.Contains(item.Name, pattern) {
shouldDelete = true
break
}
}
if !shouldDelete {
continue OUTER
}
}
wg.Add(1)
deleted = append(deleted, item.Name)
go func(nsName string) {
defer wg.Done()
defer GinkgoRecover()
Expect(c.Namespaces().Delete(nsName)).To(Succeed())
Logf("namespace : %v api call to delete is complete ", nsName)
}(item.Name)
}
wg.Wait()
return deleted, nil
}
func waitForNamespacesDeleted(c *client.Client, namespaces []string, timeout time.Duration) error {
By("Waiting for namespaces to vanish")
nsMap := map[string]bool{}
for _, ns := range namespaces {
nsMap[ns] = true
}
//Now POLL until all namespaces have been eradicated.
return wait.Poll(2*time.Second, timeout,
func() (bool, error) {
nsList, err := c.Namespaces().List(labels.Everything(), fields.Everything())
if err != nil {
return false, err
}
for _, item := range nsList.Items {
if _, ok := nsMap[item.Name]; ok {
return false, nil
}
}
return true, nil
})
}
func waitForServiceAccountInNamespace(c *client.Client, ns, serviceAccountName string, timeout time.Duration) error {
Logf("Waiting up to %v for service account %s to be provisioned in ns %s", timeout, serviceAccountName, ns)
for start := time.Now(); time.Since(start) < timeout; time.Sleep(poll) {