Merge pull request #60209 from shyamjvs/retry-rc-creation-in-test-framework

Automatic merge from submit-queue (batch tested with PRs 60377, 60209). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Add smart retries to resource creations in testing framework

Fix https://github.com/kubernetes/kubernetes/issues/55860

/cc @wojtek-t 

```release-note
NONE
```
pull/6/head
Kubernetes Submit Queue 2018-02-26 03:12:31 -08:00 committed by GitHub
commit ab1c3dcce6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 250 additions and 70 deletions

View File

@ -5110,6 +5110,7 @@ func DumpDebugInfo(c clientset.Interface, ns string) {
}
}
// TODO: Get rid of this duplicate function in favour of the one in test/utils.
func IsRetryableAPIError(err error) bool {
return apierrs.IsTimeout(err) || apierrs.IsServerTimeout(err) || apierrs.IsTooManyRequests(err)
}

View File

@ -22,7 +22,6 @@ go_library(
"//vendor/github.com/onsi/ginkgo:go_default_library",
"//vendor/github.com/onsi/gomega:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/errors:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/meta:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",

View File

@ -26,7 +26,6 @@ import (
"time"
"k8s.io/api/core/v1"
apierrs "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
@ -55,7 +54,6 @@ const (
MinSaturationThreshold = 2 * time.Minute
MinPodsPerSecondThroughput = 8
DensityPollInterval = 10 * time.Second
MaxLatencyPodCreationTries = 5
)
// Maximum container failures this test tolerates before failing.
@ -901,13 +899,7 @@ func createRunningPodFromRC(wg *sync.WaitGroup, c clientset.Interface, name, ns,
},
},
}
for attempt := 1; attempt <= MaxLatencyPodCreationTries; attempt++ {
_, err := c.CoreV1().ReplicationControllers(ns).Create(rc)
if err == nil || apierrs.IsAlreadyExists(err) {
break
}
Expect(attempt < MaxLatencyPodCreationTries && framework.IsRetryableAPIError(err)).To(Equal(true))
}
framework.ExpectNoError(testutils.CreateRCWithRetries(c, ns, rc))
framework.ExpectNoError(framework.WaitForControlledPodsRunning(c, ns, name, api.Kind("ReplicationController")))
framework.Logf("Found pod '%s' running", name)
}

View File

@ -28,7 +28,6 @@ import (
"time"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@ -229,8 +228,7 @@ var _ = SIGDescribe("Load capacity", func() {
configs, secretConfigs, configMapConfigs = generateConfigs(totalPods, itArg.image, itArg.command, namespaces, itArg.kind, itArg.secretsPerPod, itArg.configMapsPerPod)
if itArg.quotas {
err := CreateQuotas(f, namespaces, 2*totalPods, testPhaseDurations.StartPhase(115, "quota creation"))
framework.ExpectNoError(err)
framework.ExpectNoError(CreateQuotas(f, namespaces, 2*totalPods, testPhaseDurations.StartPhase(115, "quota creation")))
}
serviceCreationPhase := testPhaseDurations.StartPhase(120, "services creation")
@ -240,8 +238,7 @@ var _ = SIGDescribe("Load capacity", func() {
services := generateServicesForConfigs(configs)
createService := func(i int) {
defer GinkgoRecover()
_, err := clientset.CoreV1().Services(services[i].Namespace).Create(services[i])
framework.ExpectNoError(err)
framework.ExpectNoError(testutils.CreateServiceWithRetries(clientset, services[i].Namespace, services[i]))
}
workqueue.Parallelize(serviceOperationsParallelism, len(services), createService)
framework.Logf("%v Services created.", len(services))
@ -251,8 +248,7 @@ var _ = SIGDescribe("Load capacity", func() {
framework.Logf("Starting to delete services...")
deleteService := func(i int) {
defer GinkgoRecover()
err := clientset.CoreV1().Services(services[i].Namespace).Delete(services[i].Name, nil)
framework.ExpectNoError(err)
framework.ExpectNoError(clientset.CoreV1().Services(services[i].Namespace).Delete(services[i].Name, nil))
}
workqueue.Parallelize(serviceOperationsParallelism, len(services), deleteService)
framework.Logf("Services deleted")
@ -729,20 +725,11 @@ func CreateQuotas(f *framework.Framework, namespaces []*v1.Namespace, podCount i
Hard: v1.ResourceList{"pods": *resource.NewQuantity(int64(podCount), resource.DecimalSI)},
},
}
for _, ns := range namespaces {
if err := wait.PollImmediate(2*time.Second, 30*time.Second, func() (bool, error) {
quotaTemplate.Name = ns.Name + "-quota"
_, err := f.ClientSet.CoreV1().ResourceQuotas(ns.Name).Create(quotaTemplate)
if err != nil && !errors.IsAlreadyExists(err) {
framework.Logf("Unexpected error while creating resource quota: %v", err)
return false, nil
}
return true, nil
}); err != nil {
return fmt.Errorf("Failed to create quota: %v", err)
quotaTemplate.Name = ns.Name + "-quota"
if err := testutils.CreateResourceQuotaWithRetries(f.ClientSet, ns.Name, quotaTemplate); err != nil {
return fmt.Errorf("Error creating quota: %v", err)
}
}
return nil
}

View File

@ -9,6 +9,7 @@ go_library(
name = "go_default_library",
srcs = [
"conditions.go",
"create_resources.go",
"density_utils.go",
"deployment.go",
"pod_store.go",

View File

@ -0,0 +1,224 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// TODO: Refactor common part of functions in this file for generic object kinds.
package utils
import (
"fmt"
"time"
batch "k8s.io/api/batch/v1"
"k8s.io/api/core/v1"
extensions "k8s.io/api/extensions/v1beta1"
apierrs "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/util/wait"
clientset "k8s.io/client-go/kubernetes"
)
const (
// Parameters for retrying with exponential backoff.
retryBackoffInitialDuration = 100 * time.Millisecond
retryBackoffFactor = 3
retryBackoffJitter = 0
retryBackoffSteps = 6
)
// Utility for retrying the given function with exponential backoff.
func RetryWithExponentialBackOff(fn wait.ConditionFunc) error {
backoff := wait.Backoff{
Duration: retryBackoffInitialDuration,
Factor: retryBackoffFactor,
Jitter: retryBackoffJitter,
Steps: retryBackoffSteps,
}
return wait.ExponentialBackoff(backoff, fn)
}
func IsRetryableAPIError(err error) bool {
return apierrs.IsTimeout(err) || apierrs.IsServerTimeout(err) || apierrs.IsTooManyRequests(err)
}
func CreatePodWithRetries(c clientset.Interface, namespace string, obj *v1.Pod) error {
if obj == nil {
return fmt.Errorf("Object provided to create is empty")
}
createFunc := func() (bool, error) {
_, err := c.CoreV1().Pods(namespace).Create(obj)
if err == nil || apierrs.IsAlreadyExists(err) {
return true, nil
}
if IsRetryableAPIError(err) {
return false, nil
}
return false, fmt.Errorf("Failed to create object with non-retriable error: %v", err)
}
return RetryWithExponentialBackOff(createFunc)
}
func CreateRCWithRetries(c clientset.Interface, namespace string, obj *v1.ReplicationController) error {
if obj == nil {
return fmt.Errorf("Object provided to create is empty")
}
createFunc := func() (bool, error) {
_, err := c.CoreV1().ReplicationControllers(namespace).Create(obj)
if err == nil || apierrs.IsAlreadyExists(err) {
return true, nil
}
if IsRetryableAPIError(err) {
return false, nil
}
return false, fmt.Errorf("Failed to create object with non-retriable error: %v", err)
}
return RetryWithExponentialBackOff(createFunc)
}
func CreateReplicaSetWithRetries(c clientset.Interface, namespace string, obj *extensions.ReplicaSet) error {
if obj == nil {
return fmt.Errorf("Object provided to create is empty")
}
createFunc := func() (bool, error) {
_, err := c.ExtensionsV1beta1().ReplicaSets(namespace).Create(obj)
if err == nil || apierrs.IsAlreadyExists(err) {
return true, nil
}
if IsRetryableAPIError(err) {
return false, nil
}
return false, fmt.Errorf("Failed to create object with non-retriable error: %v", err)
}
return RetryWithExponentialBackOff(createFunc)
}
func CreateDeploymentWithRetries(c clientset.Interface, namespace string, obj *extensions.Deployment) error {
if obj == nil {
return fmt.Errorf("Object provided to create is empty")
}
createFunc := func() (bool, error) {
_, err := c.ExtensionsV1beta1().Deployments(namespace).Create(obj)
if err == nil || apierrs.IsAlreadyExists(err) {
return true, nil
}
if IsRetryableAPIError(err) {
return false, nil
}
return false, fmt.Errorf("Failed to create object with non-retriable error: %v", err)
}
return RetryWithExponentialBackOff(createFunc)
}
func CreateDaemonSetWithRetries(c clientset.Interface, namespace string, obj *extensions.DaemonSet) error {
if obj == nil {
return fmt.Errorf("Object provided to create is empty")
}
createFunc := func() (bool, error) {
_, err := c.ExtensionsV1beta1().DaemonSets(namespace).Create(obj)
if err == nil || apierrs.IsAlreadyExists(err) {
return true, nil
}
if IsRetryableAPIError(err) {
return false, nil
}
return false, fmt.Errorf("Failed to create object with non-retriable error: %v", err)
}
return RetryWithExponentialBackOff(createFunc)
}
func CreateJobWithRetries(c clientset.Interface, namespace string, obj *batch.Job) error {
if obj == nil {
return fmt.Errorf("Object provided to create is empty")
}
createFunc := func() (bool, error) {
_, err := c.BatchV1().Jobs(namespace).Create(obj)
if err == nil || apierrs.IsAlreadyExists(err) {
return true, nil
}
if IsRetryableAPIError(err) {
return false, nil
}
return false, fmt.Errorf("Failed to create object with non-retriable error: %v", err)
}
return RetryWithExponentialBackOff(createFunc)
}
func CreateSecretWithRetries(c clientset.Interface, namespace string, obj *v1.Secret) error {
if obj == nil {
return fmt.Errorf("Object provided to create is empty")
}
createFunc := func() (bool, error) {
_, err := c.CoreV1().Secrets(namespace).Create(obj)
if err == nil || apierrs.IsAlreadyExists(err) {
return true, nil
}
if IsRetryableAPIError(err) {
return false, nil
}
return false, fmt.Errorf("Failed to create object with non-retriable error: %v", err)
}
return RetryWithExponentialBackOff(createFunc)
}
func CreateConfigMapWithRetries(c clientset.Interface, namespace string, obj *v1.ConfigMap) error {
if obj == nil {
return fmt.Errorf("Object provided to create is empty")
}
createFunc := func() (bool, error) {
_, err := c.CoreV1().ConfigMaps(namespace).Create(obj)
if err == nil || apierrs.IsAlreadyExists(err) {
return true, nil
}
if IsRetryableAPIError(err) {
return false, nil
}
return false, fmt.Errorf("Failed to create object with non-retriable error: %v", err)
}
return RetryWithExponentialBackOff(createFunc)
}
func CreateServiceWithRetries(c clientset.Interface, namespace string, obj *v1.Service) error {
if obj == nil {
return fmt.Errorf("Object provided to create is empty")
}
createFunc := func() (bool, error) {
_, err := c.CoreV1().Services(namespace).Create(obj)
if err == nil || apierrs.IsAlreadyExists(err) {
return true, nil
}
if IsRetryableAPIError(err) {
return false, nil
}
return false, fmt.Errorf("Failed to create object with non-retriable error: %v", err)
}
return RetryWithExponentialBackOff(createFunc)
}
func CreateResourceQuotaWithRetries(c clientset.Interface, namespace string, obj *v1.ResourceQuota) error {
if obj == nil {
return fmt.Errorf("Object provided to create is empty")
}
createFunc := func() (bool, error) {
_, err := c.CoreV1().ResourceQuotas(namespace).Create(obj)
if err == nil || apierrs.IsAlreadyExists(err) {
return true, nil
}
if IsRetryableAPIError(err) {
return false, nil
}
return false, fmt.Errorf("Failed to create object with non-retriable error: %v", err)
}
return RetryWithExponentialBackOff(createFunc)
}

View File

@ -81,15 +81,7 @@ func WaitUntilPodIsScheduled(c clientset.Interface, name, namespace string, time
func RunPodAndGetNodeName(c clientset.Interface, pod *v1.Pod, timeout time.Duration) (string, error) {
name := pod.Name
namespace := pod.Namespace
var err error
// Create a Pod
for i := 0; i < retries; i++ {
_, err = c.CoreV1().Pods(namespace).Create(pod)
if err == nil || apierrs.IsAlreadyExists(err) {
break
}
}
if err != nil && !apierrs.IsAlreadyExists(err) {
if err := CreatePodWithRetries(c, namespace, pod); err != nil {
return "", err
}
p, err := WaitUntilPodIsScheduled(c, name, namespace, timeout)
@ -325,8 +317,7 @@ func (config *DeploymentConfig) create() error {
config.applyTo(&deployment.Spec.Template)
_, err := config.Client.ExtensionsV1beta1().Deployments(config.Namespace).Create(deployment)
if err != nil {
if err := CreateDeploymentWithRetries(config.Client, config.Namespace, deployment); err != nil {
return fmt.Errorf("Error creating deployment: %v", err)
}
config.RCConfigLog("Created deployment with name: %v, namespace: %v, replica count: %v", deployment.Name, config.Namespace, removePtr(deployment.Spec.Replicas))
@ -396,8 +387,7 @@ func (config *ReplicaSetConfig) create() error {
config.applyTo(&rs.Spec.Template)
_, err := config.Client.ExtensionsV1beta1().ReplicaSets(config.Namespace).Create(rs)
if err != nil {
if err := CreateReplicaSetWithRetries(config.Client, config.Namespace, rs); err != nil {
return fmt.Errorf("Error creating replica set: %v", err)
}
config.RCConfigLog("Created replica set with name: %v, namespace: %v, replica count: %v", rs.Name, config.Namespace, removePtr(rs.Spec.Replicas))
@ -463,8 +453,7 @@ func (config *JobConfig) create() error {
config.applyTo(&job.Spec.Template)
_, err := config.Client.BatchV1().Jobs(config.Namespace).Create(job)
if err != nil {
if err := CreateJobWithRetries(config.Client, config.Namespace, job); err != nil {
return fmt.Errorf("Error creating job: %v", err)
}
config.RCConfigLog("Created job with name: %v, namespace: %v, parallelism/completions: %v", job.Name, config.Namespace, job.Spec.Parallelism)
@ -584,8 +573,7 @@ func (config *RCConfig) create() error {
config.applyTo(rc.Spec.Template)
_, err := config.Client.CoreV1().ReplicationControllers(config.Namespace).Create(rc)
if err != nil {
if err := CreateRCWithRetries(config.Client, config.Namespace, rc); err != nil {
return fmt.Errorf("Error creating replication controller: %v", err)
}
config.RCConfigLog("Created replication controller with name: %v, namespace: %v, replica count: %v", rc.Name, config.Namespace, removePtr(rc.Spec.Replicas))
@ -795,7 +783,6 @@ func (config *RCConfig) start() error {
// List only pods from a given replication controller.
options := metav1.ListOptions{LabelSelector: label.String()}
if pods, err := config.Client.CoreV1().Pods(metav1.NamespaceAll).List(options); err == nil {
for _, pod := range pods.Items {
config.RCConfigLog("Pod %s\t%s\t%s\t%s", pod.Name, pod.Spec.NodeName, pod.Status.Phase, pod.DeletionTimestamp)
}
@ -823,8 +810,7 @@ func StartPods(c clientset.Interface, replicas int, namespace string, podNamePre
pod.ObjectMeta.Labels["name"] = podName
pod.ObjectMeta.Labels["startPodsID"] = startPodsID
pod.Spec.Containers[0].Name = podName
_, err := c.CoreV1().Pods(namespace).Create(&pod)
if err != nil {
if err := CreatePodWithRetries(c, namespace, &pod); err != nil {
return err
}
}
@ -1020,14 +1006,10 @@ func MakePodSpec() v1.PodSpec {
}
func makeCreatePod(client clientset.Interface, namespace string, podTemplate *v1.Pod) error {
var err error
for attempt := 0; attempt < retries; attempt++ {
if _, err := client.CoreV1().Pods(namespace).Create(podTemplate); err == nil {
return nil
}
glog.Errorf("Error while creating pod, maybe retry: %v", err)
if err := CreatePodWithRetries(client, namespace, podTemplate); err != nil {
return fmt.Errorf("Error creating pod: %v", err)
}
return fmt.Errorf("Terminal error while creating pod, won't retry: %v", err)
return nil
}
func CreatePod(client clientset.Interface, namespace string, podCount int, podTemplate *v1.Pod) error {
@ -1065,14 +1047,10 @@ func createController(client clientset.Interface, controllerName, namespace stri
},
},
}
var err error
for attempt := 0; attempt < retries; attempt++ {
if _, err := client.CoreV1().ReplicationControllers(namespace).Create(rc); err == nil {
return nil
}
glog.Errorf("Error while creating rc, maybe retry: %v", err)
if err := CreateRCWithRetries(client, namespace, rc); err != nil {
return fmt.Errorf("Error creating replication controller: %v", err)
}
return fmt.Errorf("Terminal error while creating rc, won't retry: %v", err)
return nil
}
func NewCustomCreatePodStrategy(podTemplate *v1.Pod) TestPodCreateStrategy {
@ -1127,8 +1105,7 @@ func (config *SecretConfig) Run() error {
secret.StringData[k] = v
}
_, err := config.Client.CoreV1().Secrets(config.Namespace).Create(secret)
if err != nil {
if err := CreateSecretWithRetries(config.Client, config.Namespace, secret); err != nil {
return fmt.Errorf("Error creating secret: %v", err)
}
config.LogFunc("Created secret %v/%v", config.Namespace, config.Name)
@ -1186,8 +1163,7 @@ func (config *ConfigMapConfig) Run() error {
configMap.Data[k] = v
}
_, err := config.Client.CoreV1().ConfigMaps(config.Namespace).Create(configMap)
if err != nil {
if err := CreateConfigMapWithRetries(config.Client, config.Namespace, configMap); err != nil {
return fmt.Errorf("Error creating configmap: %v", err)
}
config.LogFunc("Created configmap %v/%v", config.Namespace, config.Name)
@ -1266,12 +1242,12 @@ func (config *DaemonConfig) Run() error {
},
}
_, err := config.Client.ExtensionsV1beta1().DaemonSets(config.Namespace).Create(daemon)
if err != nil {
return fmt.Errorf("Error creating DaemonSet %v: %v", config.Name, err)
if err := CreateDaemonSetWithRetries(config.Client, config.Namespace, daemon); err != nil {
return fmt.Errorf("Error creating daemonset: %v", err)
}
var nodes *v1.NodeList
var err error
for i := 0; i < retries; i++ {
// Wait for all daemons to be running
nodes, err = config.Client.CoreV1().Nodes().List(metav1.ListOptions{ResourceVersion: "0"})