mirror of https://github.com/k3s-io/k3s
Delete orphaned namespaces at e2e start for soak tests
This will help prevent soak clusters getting hosed if a test is interrupted without cleaning up resources.pull/6/head
parent
ebe5649939
commit
e95cf1d109
|
@ -104,6 +104,7 @@ export PATH=$(dirname "${e2e_test}"):"${PATH}"
|
|||
--node-instance-group="${NODE_INSTANCE_GROUP:-}" \
|
||||
--num-nodes="${NUM_MINIONS:-}" \
|
||||
--prefix="${KUBE_GCE_INSTANCE_PREFIX:-e2e}" \
|
||||
${E2E_CLEAN_START:+"--clean-start=true"} \
|
||||
${E2E_MIN_STARTUP_PODS:+"--minStartupPods=${E2E_MIN_STARTUP_PODS}"} \
|
||||
${E2E_REPORT_DIR:+"--report-dir=${E2E_REPORT_DIR}"} \
|
||||
"${@:-}"
|
||||
|
|
|
@ -453,6 +453,8 @@ case ${JOB_NAME} in
|
|||
: ${E2E_DOWN:="false"}
|
||||
: ${E2E_NETWORK:="gce-soak-weekly"}
|
||||
: ${E2E_UP:="false"}
|
||||
# Clear out any orphaned namespaces in case previous run was interrupted.
|
||||
: ${E2E_CLEAN_START:="true"}
|
||||
: ${GINKGO_TEST_ARGS:="--ginkgo.skip=$(join_regex_allow_empty \
|
||||
${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \
|
||||
${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
|
||||
|
@ -481,6 +483,8 @@ case ${JOB_NAME} in
|
|||
: ${E2E_DOWN:="false"}
|
||||
: ${E2E_NETWORK:="gce-soak-weekly-1-1"}
|
||||
: ${E2E_UP:="false"}
|
||||
# Clear out any orphaned namespaces in case previous run was interrupted.
|
||||
: ${E2E_CLEAN_START:="true"}
|
||||
: ${GINKGO_TEST_ARGS:="--ginkgo.skip=$(join_regex_allow_empty \
|
||||
${GCE_DEFAULT_SKIP_TESTS[@]:+${GCE_DEFAULT_SKIP_TESTS[@]}} \
|
||||
${GCE_FLAKY_TESTS[@]:+${GCE_FLAKY_TESTS[@]}} \
|
||||
|
@ -749,6 +753,8 @@ case ${JOB_NAME} in
|
|||
: ${E2E_NETWORK:="gke-soak-weekly"}
|
||||
: ${E2E_DOWN:="false"}
|
||||
: ${E2E_UP:="false"}
|
||||
# Clear out any orphaned namespaces in case previous run was interrupted.
|
||||
: ${E2E_CLEAN_START:="true"}
|
||||
: ${PROJECT:="kubernetes-jenkins"}
|
||||
: ${E2E_OPT:="--check_version_skew=false"}
|
||||
: ${GINKGO_TEST_ARGS:="--ginkgo.skip=$(join_regex_allow_empty \
|
||||
|
@ -1448,6 +1454,7 @@ export KUBE_SKIP_CONFIRMATIONS=y
|
|||
export E2E_UP="${E2E_UP:-true}"
|
||||
export E2E_TEST="${E2E_TEST:-true}"
|
||||
export E2E_DOWN="${E2E_DOWN:-true}"
|
||||
export E2E_CLEAN_START="${E2E_CLEAN_START:-}"
|
||||
# Used by hack/ginkgo-e2e.sh to enable ginkgo's parallel test runner.
|
||||
export GINKGO_PARALLEL=${GINKGO_PARALLEL:-}
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@ cert-dir
|
|||
certificate-authority
|
||||
cgroup-root
|
||||
chaos-chance
|
||||
clean-start
|
||||
cleanup-iptables
|
||||
client-ca-file
|
||||
client-certificate
|
||||
|
|
|
@ -83,6 +83,7 @@ func init() {
|
|||
flag.StringVar(&testContext.PrometheusPushGateway, "prom-push-gateway", "", "The URL to prometheus gateway, so that metrics can be pushed during e2es and scraped by prometheus. Typically something like 127.0.0.1:9091.")
|
||||
flag.BoolVar(&testContext.VerifyServiceAccount, "e2e-verify-service-account", true, "If true tests will verify the service account before running.")
|
||||
flag.BoolVar(&testContext.DeleteNamespace, "delete-namespace", true, "If true tests will delete namespace after completion. It is only designed to make debugging easier, DO NOT turn it off by default.")
|
||||
flag.BoolVar(&testContext.CleanStart, "clean-start", false, "If true, purge all namespaces except default and system before running tests. This serves to cleanup test namespaces from failed/interrupted e2e runs in a long-lived cluster.")
|
||||
flag.BoolVar(&testContext.GatherKubeSystemResourceUsageData, "gather-resource-usage", true, "If set to true framework will be monitoring resource usage of system add-ons in (some) e2e tests.")
|
||||
}
|
||||
|
||||
|
@ -126,6 +127,24 @@ func TestE2E(t *testing.T) {
|
|||
}
|
||||
gomega.RegisterFailHandler(ginkgo.Fail)
|
||||
|
||||
c, err := loadClient()
|
||||
if err != nil {
|
||||
glog.Fatal("Error loading client: ", err)
|
||||
}
|
||||
|
||||
// Delete any namespaces except default and kube-system. This ensures no
|
||||
// lingering resources are left over from a previous test run.
|
||||
if testContext.CleanStart {
|
||||
deleted, err := deleteNamespaces(c, nil /* deleteFilter */, []string{api.NamespaceSystem, api.NamespaceDefault})
|
||||
if err != nil {
|
||||
t.Errorf("Error deleting orphaned namespaces: %v", err)
|
||||
}
|
||||
glog.Infof("Waiting for deletion of the following namespaces: %v", deleted)
|
||||
if err := waitForNamespacesDeleted(c, deleted, namespaceCleanupTimeout); err != nil {
|
||||
glog.Fatalf("Failed to delete orphaned namespaces %v: %v", deleted, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure all pods are running and ready before starting tests (otherwise,
|
||||
// cluster infrastructure pods that are being pulled or started can block
|
||||
// test pods from running, and tests that ensure all pods are running and
|
||||
|
|
|
@ -32,17 +32,6 @@ import (
|
|||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
func countRemaining(c *client.Client, withName string) (int, error) {
|
||||
var cnt = 0
|
||||
nsList, err := c.Namespaces().List(labels.Everything(), fields.Everything())
|
||||
for _, item := range nsList.Items {
|
||||
if strings.Contains(item.Name, "nslifetest") {
|
||||
cnt++
|
||||
}
|
||||
}
|
||||
return cnt, err
|
||||
}
|
||||
|
||||
func extinguish(c *client.Client, totalNS int, maxAllowedAfterDel int, maxSeconds int) {
|
||||
var err error
|
||||
|
||||
|
@ -59,40 +48,33 @@ func extinguish(c *client.Client, totalNS int, maxAllowedAfterDel int, maxSecond
|
|||
}
|
||||
wg.Wait()
|
||||
|
||||
By("Waiting 10 seconds")
|
||||
//Wait 10 seconds, then SEND delete requests for all the namespaces.
|
||||
By("Waiting 10 seconds")
|
||||
time.Sleep(time.Duration(10 * time.Second))
|
||||
By("Deleting namespaces")
|
||||
nsList, err := c.Namespaces().List(labels.Everything(), fields.Everything())
|
||||
deleted, err := deleteNamespaces(c, []string{"nslifetest"}, nil /* skipFilter */)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
var nsCount = 0
|
||||
for _, item := range nsList.Items {
|
||||
if strings.Contains(item.Name, "nslifetest") {
|
||||
wg.Add(1)
|
||||
nsCount++
|
||||
go func(nsName string) {
|
||||
defer wg.Done()
|
||||
defer GinkgoRecover()
|
||||
Expect(c.Namespaces().Delete(nsName)).To(Succeed())
|
||||
Logf("namespace : %v api call to delete is complete ", nsName)
|
||||
}(item.Name)
|
||||
}
|
||||
}
|
||||
Expect(nsCount).To(Equal(totalNS))
|
||||
wg.Wait()
|
||||
Expect(len(deleted)).To(Equal(totalNS))
|
||||
|
||||
By("Waiting for namespaces to vanish")
|
||||
//Now POLL until all namespaces have been eradicated.
|
||||
expectNoError(wait.Poll(2*time.Second, time.Duration(maxSeconds)*time.Second,
|
||||
func() (bool, error) {
|
||||
if rem, err := countRemaining(c, "nslifetest"); err != nil || rem > maxAllowedAfterDel {
|
||||
Logf("Remaining namespaces : %v", rem)
|
||||
var cnt = 0
|
||||
nsList, err := c.Namespaces().List(labels.Everything(), fields.Everything())
|
||||
if err != nil {
|
||||
return false, err
|
||||
} else {
|
||||
return true, nil
|
||||
}
|
||||
for _, item := range nsList.Items {
|
||||
if strings.Contains(item.Name, "nslifetest") {
|
||||
cnt++
|
||||
}
|
||||
}
|
||||
if cnt > maxAllowedAfterDel {
|
||||
Logf("Remaining namespaces : %v", cnt)
|
||||
return false, nil
|
||||
}
|
||||
return true, nil
|
||||
}))
|
||||
|
||||
}
|
||||
|
||||
var _ = Describe("Namespaces", func() {
|
||||
|
|
|
@ -29,6 +29,7 @@ import (
|
|||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"k8s.io/kubernetes/pkg/api"
|
||||
|
@ -67,6 +68,11 @@ const (
|
|||
// TODO: Make this 30 seconds once #4566 is resolved.
|
||||
podStartTimeout = 5 * time.Minute
|
||||
|
||||
// If there are any orphaned namespaces to clean up, this test is running
|
||||
// on a long lived cluster. A long wait here is preferably to spurious test
|
||||
// failures caused by leaked resources from a previous test run.
|
||||
namespaceCleanupTimeout = 15 * time.Minute
|
||||
|
||||
// Some pods can take much longer to get ready due to volume attach/detach latency.
|
||||
slowPodStartTimeout = 15 * time.Minute
|
||||
|
||||
|
@ -127,6 +133,7 @@ type TestContextType struct {
|
|||
PrometheusPushGateway string
|
||||
VerifyServiceAccount bool
|
||||
DeleteNamespace bool
|
||||
CleanStart bool
|
||||
GatherKubeSystemResourceUsageData bool
|
||||
}
|
||||
|
||||
|
@ -401,6 +408,71 @@ func waitForPodsRunningReady(ns string, minPods int, timeout time.Duration) erro
|
|||
return nil
|
||||
}
|
||||
|
||||
// deleteNamespaces deletes all namespaces that match the given delete and skip filters.
|
||||
// Filter is by simple strings.Contains; first skip filter, then delete filter.
|
||||
// Returns the list of deleted namespaces or an error.
|
||||
func deleteNamespaces(c *client.Client, deleteFilter, skipFilter []string) ([]string, error) {
|
||||
By("Deleting namespaces")
|
||||
nsList, err := c.Namespaces().List(labels.Everything(), fields.Everything())
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
var deleted []string
|
||||
var wg sync.WaitGroup
|
||||
OUTER:
|
||||
for _, item := range nsList.Items {
|
||||
if skipFilter != nil {
|
||||
for _, pattern := range skipFilter {
|
||||
if strings.Contains(item.Name, pattern) {
|
||||
continue OUTER
|
||||
}
|
||||
}
|
||||
}
|
||||
if deleteFilter != nil {
|
||||
var shouldDelete bool
|
||||
for _, pattern := range deleteFilter {
|
||||
if strings.Contains(item.Name, pattern) {
|
||||
shouldDelete = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !shouldDelete {
|
||||
continue OUTER
|
||||
}
|
||||
}
|
||||
wg.Add(1)
|
||||
deleted = append(deleted, item.Name)
|
||||
go func(nsName string) {
|
||||
defer wg.Done()
|
||||
defer GinkgoRecover()
|
||||
Expect(c.Namespaces().Delete(nsName)).To(Succeed())
|
||||
Logf("namespace : %v api call to delete is complete ", nsName)
|
||||
}(item.Name)
|
||||
}
|
||||
wg.Wait()
|
||||
return deleted, nil
|
||||
}
|
||||
|
||||
func waitForNamespacesDeleted(c *client.Client, namespaces []string, timeout time.Duration) error {
|
||||
By("Waiting for namespaces to vanish")
|
||||
nsMap := map[string]bool{}
|
||||
for _, ns := range namespaces {
|
||||
nsMap[ns] = true
|
||||
}
|
||||
//Now POLL until all namespaces have been eradicated.
|
||||
return wait.Poll(2*time.Second, timeout,
|
||||
func() (bool, error) {
|
||||
nsList, err := c.Namespaces().List(labels.Everything(), fields.Everything())
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
for _, item := range nsList.Items {
|
||||
if _, ok := nsMap[item.Name]; ok {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
return true, nil
|
||||
})
|
||||
}
|
||||
|
||||
func waitForServiceAccountInNamespace(c *client.Client, ns, serviceAccountName string, timeout time.Duration) error {
|
||||
Logf("Waiting up to %v for service account %s to be provisioned in ns %s", timeout, serviceAccountName, ns)
|
||||
for start := time.Now(); time.Since(start) < timeout; time.Sleep(poll) {
|
||||
|
|
Loading…
Reference in New Issue