Merge pull request #70866 from feiskyer/backoff

Reduce Azure API calls by replacing the current backoff retry with SDK's backoff
pull/564/head
Kubernetes Prow Robot 2018-12-20 19:24:14 -08:00 committed by GitHub
commit e1552d8ef9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 480 additions and 225 deletions

View File

@ -59,6 +59,9 @@ const (
vmTypeVMSS = "vmss" vmTypeVMSS = "vmss"
vmTypeStandard = "standard" vmTypeStandard = "standard"
backoffModeDefault = "default"
backoffModeV2 = "v2"
loadBalancerSkuBasic = "basic" loadBalancerSkuBasic = "basic"
loadBalancerSkuStandard = "standard" loadBalancerSkuStandard = "standard"
@ -115,6 +118,12 @@ type Config struct {
CloudProviderBackoffDuration int `json:"cloudProviderBackoffDuration" yaml:"cloudProviderBackoffDuration"` CloudProviderBackoffDuration int `json:"cloudProviderBackoffDuration" yaml:"cloudProviderBackoffDuration"`
// Backoff jitter // Backoff jitter
CloudProviderBackoffJitter float64 `json:"cloudProviderBackoffJitter" yaml:"cloudProviderBackoffJitter"` CloudProviderBackoffJitter float64 `json:"cloudProviderBackoffJitter" yaml:"cloudProviderBackoffJitter"`
// Backoff mode, options are v2 and default.
// * default means two-layer backoff retrying, one in the cloud provider and the other in the Azure SDK.
// * v2 means only backoff in the Azure SDK is used. In such mode, CloudProviderBackoffDuration and
// CloudProviderBackoffJitter are omitted.
// "default" will be used if not specified.
CloudProviderBackoffMode string `json:"cloudProviderBackoffMode" yaml:"cloudProviderBackoffMode"`
// Enable rate limiting // Enable rate limiting
CloudProviderRateLimit bool `json:"cloudProviderRateLimit" yaml:"cloudProviderRateLimit"` CloudProviderRateLimit bool `json:"cloudProviderRateLimit" yaml:"cloudProviderRateLimit"`
// Rate limit QPS (Read) // Rate limit QPS (Read)
@ -268,6 +277,48 @@ func NewCloud(configReader io.Reader) (cloudprovider.Interface, error) {
config.CloudProviderRateLimitBucketWrite) config.CloudProviderRateLimitBucketWrite)
} }
// Conditionally configure resource request backoff
resourceRequestBackoff := wait.Backoff{
Steps: 1,
}
if config.CloudProviderBackoff {
// Assign backoff defaults if no configuration was passed in
if config.CloudProviderBackoffRetries == 0 {
config.CloudProviderBackoffRetries = backoffRetriesDefault
}
if config.CloudProviderBackoffDuration == 0 {
config.CloudProviderBackoffDuration = backoffDurationDefault
}
if config.CloudProviderBackoffExponent == 0 {
config.CloudProviderBackoffExponent = backoffExponentDefault
} else if config.shouldOmitCloudProviderBackoff() {
klog.Warning("Azure cloud provider config 'cloudProviderBackoffExponent' has been deprecated for 'v2' backoff mode. 2 is always used as the backoff exponent.")
}
if config.CloudProviderBackoffJitter == 0 {
config.CloudProviderBackoffJitter = backoffJitterDefault
} else if config.shouldOmitCloudProviderBackoff() {
klog.Warning("Azure cloud provider config 'cloudProviderBackoffJitter' has been deprecated for 'v2' backoff mode.")
}
if !config.shouldOmitCloudProviderBackoff() {
resourceRequestBackoff = wait.Backoff{
Steps: config.CloudProviderBackoffRetries,
Factor: config.CloudProviderBackoffExponent,
Duration: time.Duration(config.CloudProviderBackoffDuration) * time.Second,
Jitter: config.CloudProviderBackoffJitter,
}
}
klog.V(2).Infof("Azure cloudprovider using try backoff: retries=%d, exponent=%f, duration=%d, jitter=%f",
config.CloudProviderBackoffRetries,
config.CloudProviderBackoffExponent,
config.CloudProviderBackoffDuration,
config.CloudProviderBackoffJitter)
} else {
// CloudProviderBackoffRetries will be set to 1 by default as the requirements of Azure SDK.
config.CloudProviderBackoffRetries = 1
config.CloudProviderBackoffDuration = backoffDurationDefault
}
// Do not add master nodes to standard LB by default. // Do not add master nodes to standard LB by default.
if config.ExcludeMasterFromStandardLB == nil { if config.ExcludeMasterFromStandardLB == nil {
config.ExcludeMasterFromStandardLB = &defaultExcludeMasterFromStandardLB config.ExcludeMasterFromStandardLB = &defaultExcludeMasterFromStandardLB
@ -279,6 +330,9 @@ func NewCloud(configReader io.Reader) (cloudprovider.Interface, error) {
servicePrincipalToken: servicePrincipalToken, servicePrincipalToken: servicePrincipalToken,
rateLimiterReader: operationPollRateLimiter, rateLimiterReader: operationPollRateLimiter,
rateLimiterWriter: operationPollRateLimiterWrite, rateLimiterWriter: operationPollRateLimiterWrite,
CloudProviderBackoffRetries: config.CloudProviderBackoffRetries,
CloudProviderBackoffDuration: config.CloudProviderBackoffDuration,
ShouldOmitCloudProviderBackoff: config.shouldOmitCloudProviderBackoff(),
} }
az := Cloud{ az := Cloud{
Config: *config, Config: *config,
@ -287,6 +341,7 @@ func NewCloud(configReader io.Reader) (cloudprovider.Interface, error) {
nodeResourceGroups: map[string]string{}, nodeResourceGroups: map[string]string{},
unmanagedNodes: sets.NewString(), unmanagedNodes: sets.NewString(),
routeCIDRs: map[string]string{}, routeCIDRs: map[string]string{},
resourceRequestBackoff: resourceRequestBackoff,
DisksClient: newAzDisksClient(azClientConfig), DisksClient: newAzDisksClient(azClientConfig),
RoutesClient: newAzRoutesClient(azClientConfig), RoutesClient: newAzRoutesClient(azClientConfig),
@ -304,34 +359,6 @@ func NewCloud(configReader io.Reader) (cloudprovider.Interface, error) {
FileClient: &azureFileClient{env: *env}, FileClient: &azureFileClient{env: *env},
} }
// Conditionally configure resource request backoff
if az.CloudProviderBackoff {
// Assign backoff defaults if no configuration was passed in
if az.CloudProviderBackoffRetries == 0 {
az.CloudProviderBackoffRetries = backoffRetriesDefault
}
if az.CloudProviderBackoffExponent == 0 {
az.CloudProviderBackoffExponent = backoffExponentDefault
}
if az.CloudProviderBackoffDuration == 0 {
az.CloudProviderBackoffDuration = backoffDurationDefault
}
if az.CloudProviderBackoffJitter == 0 {
az.CloudProviderBackoffJitter = backoffJitterDefault
}
az.resourceRequestBackoff = wait.Backoff{
Steps: az.CloudProviderBackoffRetries,
Factor: az.CloudProviderBackoffExponent,
Duration: time.Duration(az.CloudProviderBackoffDuration) * time.Second,
Jitter: az.CloudProviderBackoffJitter,
}
klog.V(2).Infof("Azure cloudprovider using try backoff: retries=%d, exponent=%f, duration=%d, jitter=%f",
az.CloudProviderBackoffRetries,
az.CloudProviderBackoffExponent,
az.CloudProviderBackoffDuration,
az.CloudProviderBackoffJitter)
}
az.metadata, err = NewInstanceMetadataService(metadataURL) az.metadata, err = NewInstanceMetadataService(metadataURL)
if err != nil { if err != nil {
return nil, err return nil, err

View File

@ -17,19 +17,18 @@ limitations under the License.
package azure package azure
import ( import (
"context"
"fmt" "fmt"
"net/http" "net/http"
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-10-01/compute" "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-10-01/compute"
"github.com/Azure/azure-sdk-for-go/services/network/mgmt/2017-09-01/network" "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2017-09-01/network"
"k8s.io/klog"
"k8s.io/api/core/v1" "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/util/wait"
cloudprovider "k8s.io/cloud-provider" cloudprovider "k8s.io/cloud-provider"
"k8s.io/klog"
) )
// requestBackoff if backoff is disabled in cloud provider it // requestBackoff if backoff is disabled in cloud provider it
@ -43,7 +42,6 @@ func (az *Cloud) requestBackoff() (resourceRequestBackoff wait.Backoff) {
resourceRequestBackoff = wait.Backoff{ resourceRequestBackoff = wait.Backoff{
Steps: 1, Steps: 1,
} }
return resourceRequestBackoff return resourceRequestBackoff
} }
@ -73,12 +71,11 @@ func (az *Cloud) GetVirtualMachineWithRetry(name types.NodeName) (compute.Virtua
if err == wait.ErrWaitTimeout { if err == wait.ErrWaitTimeout {
err = retryErr err = retryErr
} }
return machine, err return machine, err
} }
// VirtualMachineClientListWithRetry invokes az.VirtualMachinesClient.List with exponential backoff retry // ListVirtualMachinesWithRetry invokes az.VirtualMachinesClient.List with exponential backoff retry
func (az *Cloud) VirtualMachineClientListWithRetry(resourceGroup string) ([]compute.VirtualMachine, error) { func (az *Cloud) ListVirtualMachinesWithRetry(resourceGroup string) ([]compute.VirtualMachine, error) {
allNodes := []compute.VirtualMachine{} allNodes := []compute.VirtualMachine{}
err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
var retryErr error var retryErr error
@ -101,12 +98,38 @@ func (az *Cloud) VirtualMachineClientListWithRetry(resourceGroup string) ([]comp
return allNodes, err return allNodes, err
} }
// ListVirtualMachines invokes az.VirtualMachinesClient.List with exponential backoff retry
func (az *Cloud) ListVirtualMachines(resourceGroup string) ([]compute.VirtualMachine, error) {
if az.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
allNodes, err := az.VirtualMachinesClient.List(ctx, resourceGroup)
if err != nil {
klog.Errorf("VirtualMachinesClient.List(%v) failure with err=%v", resourceGroup, err)
return nil, err
}
klog.V(2).Infof("VirtualMachinesClient.List(%v) success", resourceGroup)
return allNodes, nil
}
return az.ListVirtualMachinesWithRetry(resourceGroup)
}
func (az *Cloud) getIPForMachine(nodeName types.NodeName) (string, string, error) {
if az.Config.shouldOmitCloudProviderBackoff() {
return az.vmSet.GetIPByNodeName(string(nodeName))
}
return az.GetIPForMachineWithRetry(nodeName)
}
// GetIPForMachineWithRetry invokes az.getIPForMachine with exponential backoff retry // GetIPForMachineWithRetry invokes az.getIPForMachine with exponential backoff retry
func (az *Cloud) GetIPForMachineWithRetry(name types.NodeName) (string, string, error) { func (az *Cloud) GetIPForMachineWithRetry(name types.NodeName) (string, string, error) {
var ip, publicIP string var ip, publicIP string
err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
var retryErr error var retryErr error
ip, publicIP, retryErr = az.getIPForMachine(name) ip, publicIP, retryErr = az.vmSet.GetIPByNodeName(string(name))
if retryErr != nil { if retryErr != nil {
klog.Errorf("GetIPForMachineWithRetry(%s): backoff failure, will retry,err=%v", name, retryErr) klog.Errorf("GetIPForMachineWithRetry(%s): backoff failure, will retry,err=%v", name, retryErr)
return false, nil return false, nil
@ -117,6 +140,28 @@ func (az *Cloud) GetIPForMachineWithRetry(name types.NodeName) (string, string,
return ip, publicIP, err return ip, publicIP, err
} }
// CreateOrUpdateSecurityGroup invokes az.SecurityGroupsClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateSecurityGroup(service *v1.Service, sg network.SecurityGroup) error {
if az.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
resp, err := az.SecurityGroupsClient.CreateOrUpdate(ctx, az.ResourceGroup, *sg.Name, sg)
klog.V(10).Infof("SecurityGroupsClient.CreateOrUpdate(%s): end", *sg.Name)
if err == nil {
if isSuccessHTTPResponse(resp) {
// Invalidate the cache right after updating
az.nsgCache.Delete(*sg.Name)
} else if resp != nil {
return fmt.Errorf("HTTP response %q", resp.Status)
}
}
return err
}
return az.CreateOrUpdateSGWithRetry(service, sg)
}
// CreateOrUpdateSGWithRetry invokes az.SecurityGroupsClient.CreateOrUpdate with exponential backoff retry // CreateOrUpdateSGWithRetry invokes az.SecurityGroupsClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateSGWithRetry(service *v1.Service, sg network.SecurityGroup) error { func (az *Cloud) CreateOrUpdateSGWithRetry(service *v1.Service, sg network.SecurityGroup) error {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
@ -134,8 +179,30 @@ func (az *Cloud) CreateOrUpdateSGWithRetry(service *v1.Service, sg network.Secur
}) })
} }
// CreateOrUpdateLBWithRetry invokes az.LoadBalancerClient.CreateOrUpdate with exponential backoff retry // CreateOrUpdateLB invokes az.LoadBalancerClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateLBWithRetry(service *v1.Service, lb network.LoadBalancer) error { func (az *Cloud) CreateOrUpdateLB(service *v1.Service, lb network.LoadBalancer) error {
if az.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
resp, err := az.LoadBalancerClient.CreateOrUpdate(ctx, az.ResourceGroup, *lb.Name, lb)
klog.V(10).Infof("LoadBalancerClient.CreateOrUpdate(%s): end", *lb.Name)
if err == nil {
if isSuccessHTTPResponse(resp) {
// Invalidate the cache right after updating
az.lbCache.Delete(*lb.Name)
} else if resp != nil {
return fmt.Errorf("HTTP response %q", resp.Status)
}
}
return err
}
return az.createOrUpdateLBWithRetry(service, lb)
}
// createOrUpdateLBWithRetry invokes az.LoadBalancerClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) createOrUpdateLBWithRetry(service *v1.Service, lb network.LoadBalancer) error {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
ctx, cancel := getContextWithCancel() ctx, cancel := getContextWithCancel()
defer cancel() defer cancel()
@ -151,8 +218,27 @@ func (az *Cloud) CreateOrUpdateLBWithRetry(service *v1.Service, lb network.LoadB
}) })
} }
// ListLBWithRetry invokes az.LoadBalancerClient.List with exponential backoff retry // ListLB invokes az.LoadBalancerClient.List with exponential backoff retry
func (az *Cloud) ListLBWithRetry(service *v1.Service) ([]network.LoadBalancer, error) { func (az *Cloud) ListLB(service *v1.Service) ([]network.LoadBalancer, error) {
if az.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
allLBs, err := az.LoadBalancerClient.List(ctx, az.ResourceGroup)
if err != nil {
az.Event(service, v1.EventTypeWarning, "ListLoadBalancers", err.Error())
klog.Errorf("LoadBalancerClient.List(%v) failure with err=%v", az.ResourceGroup, err)
return nil, err
}
klog.V(2).Infof("LoadBalancerClient.List(%v) success", az.ResourceGroup)
return allLBs, nil
}
return az.listLBWithRetry(service)
}
// listLBWithRetry invokes az.LoadBalancerClient.List with exponential backoff retry
func (az *Cloud) listLBWithRetry(service *v1.Service) ([]network.LoadBalancer, error) {
var allLBs []network.LoadBalancer var allLBs []network.LoadBalancer
err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
@ -178,8 +264,27 @@ func (az *Cloud) ListLBWithRetry(service *v1.Service) ([]network.LoadBalancer, e
return allLBs, nil return allLBs, nil
} }
// ListPIPWithRetry list the PIP resources in the given resource group // ListPIP list the PIP resources in the given resource group
func (az *Cloud) ListPIPWithRetry(service *v1.Service, pipResourceGroup string) ([]network.PublicIPAddress, error) { func (az *Cloud) ListPIP(service *v1.Service, pipResourceGroup string) ([]network.PublicIPAddress, error) {
if az.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
allPIPs, err := az.PublicIPAddressesClient.List(ctx, pipResourceGroup)
if err != nil {
az.Event(service, v1.EventTypeWarning, "ListPublicIPs", err.Error())
klog.Errorf("PublicIPAddressesClient.List(%v) failure with err=%v", pipResourceGroup, err)
return nil, err
}
klog.V(2).Infof("PublicIPAddressesClient.List(%v) success", pipResourceGroup)
return allPIPs, nil
}
return az.listPIPWithRetry(service, pipResourceGroup)
}
// listPIPWithRetry list the PIP resources in the given resource group
func (az *Cloud) listPIPWithRetry(service *v1.Service, pipResourceGroup string) ([]network.PublicIPAddress, error) {
var allPIPs []network.PublicIPAddress var allPIPs []network.PublicIPAddress
err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
@ -205,8 +310,22 @@ func (az *Cloud) ListPIPWithRetry(service *v1.Service, pipResourceGroup string)
return allPIPs, nil return allPIPs, nil
} }
// CreateOrUpdatePIPWithRetry invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry // CreateOrUpdatePIP invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdatePIPWithRetry(service *v1.Service, pipResourceGroup string, pip network.PublicIPAddress) error { func (az *Cloud) CreateOrUpdatePIP(service *v1.Service, pipResourceGroup string, pip network.PublicIPAddress) error {
if az.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
resp, err := az.PublicIPAddressesClient.CreateOrUpdate(ctx, pipResourceGroup, *pip.Name, pip)
klog.V(10).Infof("PublicIPAddressesClient.CreateOrUpdate(%s, %s): end", pipResourceGroup, *pip.Name)
return az.processHTTPResponse(service, "CreateOrUpdatePublicIPAddress", resp, err)
}
return az.createOrUpdatePIPWithRetry(service, pipResourceGroup, pip)
}
// createOrUpdatePIPWithRetry invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) createOrUpdatePIPWithRetry(service *v1.Service, pipResourceGroup string, pip network.PublicIPAddress) error {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
ctx, cancel := getContextWithCancel() ctx, cancel := getContextWithCancel()
defer cancel() defer cancel()
@ -217,8 +336,22 @@ func (az *Cloud) CreateOrUpdatePIPWithRetry(service *v1.Service, pipResourceGrou
}) })
} }
// CreateOrUpdateInterfaceWithRetry invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry // CreateOrUpdateInterface invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateInterfaceWithRetry(service *v1.Service, nic network.Interface) error { func (az *Cloud) CreateOrUpdateInterface(service *v1.Service, nic network.Interface) error {
if az.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
resp, err := az.InterfacesClient.CreateOrUpdate(ctx, az.ResourceGroup, *nic.Name, nic)
klog.V(10).Infof("InterfacesClient.CreateOrUpdate(%s): end", *nic.Name)
return az.processHTTPResponse(service, "CreateOrUpdateInterface", resp, err)
}
return az.createOrUpdateInterfaceWithRetry(service, nic)
}
// createOrUpdateInterfaceWithRetry invokes az.PublicIPAddressesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) createOrUpdateInterfaceWithRetry(service *v1.Service, nic network.Interface) error {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
ctx, cancel := getContextWithCancel() ctx, cancel := getContextWithCancel()
defer cancel() defer cancel()
@ -229,8 +362,21 @@ func (az *Cloud) CreateOrUpdateInterfaceWithRetry(service *v1.Service, nic netwo
}) })
} }
// DeletePublicIPWithRetry invokes az.PublicIPAddressesClient.Delete with exponential backoff retry // DeletePublicIP invokes az.PublicIPAddressesClient.Delete with exponential backoff retry
func (az *Cloud) DeletePublicIPWithRetry(service *v1.Service, pipResourceGroup string, pipName string) error { func (az *Cloud) DeletePublicIP(service *v1.Service, pipResourceGroup string, pipName string) error {
if az.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
resp, err := az.PublicIPAddressesClient.Delete(ctx, pipResourceGroup, pipName)
return az.processHTTPResponse(service, "DeletePublicIPAddress", resp, err)
}
return az.deletePublicIPWithRetry(service, pipResourceGroup, pipName)
}
// deletePublicIPWithRetry invokes az.PublicIPAddressesClient.Delete with exponential backoff retry
func (az *Cloud) deletePublicIPWithRetry(service *v1.Service, pipResourceGroup string, pipName string) error {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
ctx, cancel := getContextWithCancel() ctx, cancel := getContextWithCancel()
defer cancel() defer cancel()
@ -240,8 +386,29 @@ func (az *Cloud) DeletePublicIPWithRetry(service *v1.Service, pipResourceGroup s
}) })
} }
// DeleteLBWithRetry invokes az.LoadBalancerClient.Delete with exponential backoff retry // DeleteLB invokes az.LoadBalancerClient.Delete with exponential backoff retry
func (az *Cloud) DeleteLBWithRetry(service *v1.Service, lbName string) error { func (az *Cloud) DeleteLB(service *v1.Service, lbName string) error {
if az.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
resp, err := az.LoadBalancerClient.Delete(ctx, az.ResourceGroup, lbName)
if err == nil {
if isSuccessHTTPResponse(resp) {
// Invalidate the cache right after updating
az.lbCache.Delete(lbName)
} else if resp != nil {
return fmt.Errorf("HTTP response %q", resp.Status)
}
}
return err
}
return az.deleteLBWithRetry(service, lbName)
}
// deleteLBWithRetry invokes az.LoadBalancerClient.Delete with exponential backoff retry
func (az *Cloud) deleteLBWithRetry(service *v1.Service, lbName string) error {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
ctx, cancel := getContextWithCancel() ctx, cancel := getContextWithCancel()
defer cancel() defer cancel()
@ -256,8 +423,21 @@ func (az *Cloud) DeleteLBWithRetry(service *v1.Service, lbName string) error {
}) })
} }
// CreateOrUpdateRouteTableWithRetry invokes az.RouteTablesClient.CreateOrUpdate with exponential backoff retry // CreateOrUpdateRouteTable invokes az.RouteTablesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateRouteTableWithRetry(routeTable network.RouteTable) error { func (az *Cloud) CreateOrUpdateRouteTable(routeTable network.RouteTable) error {
if az.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
resp, err := az.RouteTablesClient.CreateOrUpdate(ctx, az.ResourceGroup, az.RouteTableName, routeTable)
return az.processHTTPResponse(nil, "", resp, err)
}
return az.createOrUpdateRouteTableWithRetry(routeTable)
}
// createOrUpdateRouteTableWithRetry invokes az.RouteTablesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) createOrUpdateRouteTableWithRetry(routeTable network.RouteTable) error {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
ctx, cancel := getContextWithCancel() ctx, cancel := getContextWithCancel()
defer cancel() defer cancel()
@ -267,8 +447,22 @@ func (az *Cloud) CreateOrUpdateRouteTableWithRetry(routeTable network.RouteTable
}) })
} }
// CreateOrUpdateRouteWithRetry invokes az.RoutesClient.CreateOrUpdate with exponential backoff retry // CreateOrUpdateRoute invokes az.RoutesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) CreateOrUpdateRouteWithRetry(route network.Route) error { func (az *Cloud) CreateOrUpdateRoute(route network.Route) error {
if az.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
resp, err := az.RoutesClient.CreateOrUpdate(ctx, az.ResourceGroup, az.RouteTableName, *route.Name, route)
klog.V(10).Infof("RoutesClient.CreateOrUpdate(%s): end", *route.Name)
return az.processHTTPResponse(nil, "", resp, err)
}
return az.createOrUpdateRouteWithRetry(route)
}
// createOrUpdateRouteWithRetry invokes az.RoutesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) createOrUpdateRouteWithRetry(route network.Route) error {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
ctx, cancel := getContextWithCancel() ctx, cancel := getContextWithCancel()
defer cancel() defer cancel()
@ -279,14 +473,28 @@ func (az *Cloud) CreateOrUpdateRouteWithRetry(route network.Route) error {
}) })
} }
// DeleteRouteWithRetry invokes az.RoutesClient.Delete with exponential backoff retry // DeleteRouteWithName invokes az.RoutesClient.CreateOrUpdate with exponential backoff retry
func (az *Cloud) DeleteRouteWithRetry(routeName string) error { func (az *Cloud) DeleteRouteWithName(routeName string) error {
if az.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
resp, err := az.RoutesClient.Delete(ctx, az.ResourceGroup, az.RouteTableName, routeName)
klog.V(10).Infof("RoutesClient.Delete(%s,%s): end", az.RouteTableName, routeName)
return az.processHTTPResponse(nil, "", resp, err)
}
return az.deleteRouteWithRetry(routeName)
}
// deleteRouteWithRetry invokes az.RoutesClient.Delete with exponential backoff retry
func (az *Cloud) deleteRouteWithRetry(routeName string) error {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
ctx, cancel := getContextWithCancel() ctx, cancel := getContextWithCancel()
defer cancel() defer cancel()
resp, err := az.RoutesClient.Delete(ctx, az.ResourceGroup, az.RouteTableName, routeName) resp, err := az.RoutesClient.Delete(ctx, az.ResourceGroup, az.RouteTableName, routeName)
klog.V(10).Infof("RoutesClient.Delete(%s): end", az.RouteTableName) klog.V(10).Infof("RoutesClient.Delete(%s,%s): end", az.RouteTableName, routeName)
return az.processHTTPRetryResponse(nil, "", resp, err) return az.processHTTPRetryResponse(nil, "", resp, err)
}) })
} }
@ -303,21 +511,17 @@ func (az *Cloud) CreateOrUpdateVMWithRetry(resourceGroup, vmName string, newVM c
}) })
} }
// UpdateVmssVMWithRetry invokes az.VirtualMachineScaleSetVMsClient.Update with exponential backoff retry // isSuccessHTTPResponse determines if the response from an HTTP request suggests success
func (az *Cloud) UpdateVmssVMWithRetry(ctx context.Context, resourceGroupName string, VMScaleSetName string, instanceID string, parameters compute.VirtualMachineScaleSetVM) error { func isSuccessHTTPResponse(resp *http.Response) bool {
return wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) { if resp == nil {
resp, err := az.VirtualMachineScaleSetVMsClient.Update(ctx, resourceGroupName, VMScaleSetName, instanceID, parameters) return false
klog.V(10).Infof("VirtualMachinesClient.CreateOrUpdate(%s,%s): end", VMScaleSetName, instanceID)
return az.processHTTPRetryResponse(nil, "", resp, err)
})
} }
// isSuccessHTTPResponse determines if the response from an HTTP request suggests success
func isSuccessHTTPResponse(resp http.Response) bool {
// HTTP 2xx suggests a successful response // HTTP 2xx suggests a successful response
if 199 < resp.StatusCode && resp.StatusCode < 300 { if 199 < resp.StatusCode && resp.StatusCode < 300 {
return true return true
} }
return false return false
} }
@ -337,7 +541,7 @@ func shouldRetryHTTPRequest(resp *http.Response, err error) bool {
} }
func (az *Cloud) processHTTPRetryResponse(service *v1.Service, reason string, resp *http.Response, err error) (bool, error) { func (az *Cloud) processHTTPRetryResponse(service *v1.Service, reason string, resp *http.Response, err error) (bool, error) {
if resp != nil && isSuccessHTTPResponse(*resp) { if resp != nil && isSuccessHTTPResponse(resp) {
// HTTP 2xx suggests a successful response // HTTP 2xx suggests a successful response
return true, nil return true, nil
} }
@ -358,3 +562,24 @@ func (az *Cloud) processHTTPRetryResponse(service *v1.Service, reason string, re
// Fall-through: stop periodic backoff // Fall-through: stop periodic backoff
return true, nil return true, nil
} }
func (az *Cloud) processHTTPResponse(service *v1.Service, reason string, resp *http.Response, err error) error {
if isSuccessHTTPResponse(resp) {
// HTTP 2xx suggests a successful response
return nil
}
if err != nil {
az.Event(service, v1.EventTypeWarning, reason, err.Error())
klog.Errorf("processHTTPRetryResponse failure with err: %v", err)
} else if resp != nil {
az.Event(service, v1.EventTypeWarning, reason, fmt.Sprintf("Azure HTTP response %d", resp.StatusCode))
klog.Errorf("processHTTPRetryResponse failure with HTTP response %q", resp.Status)
}
return err
}
func (cfg *Config) shouldOmitCloudProviderBackoff() bool {
return cfg.CloudProviderBackoffMode == backoffModeV2
}

View File

@ -50,7 +50,6 @@ func TestShouldRetryHTTPRequest(t *testing.T) {
expected: false, expected: false,
}, },
} }
for _, test := range tests { for _, test := range tests {
resp := &http.Response{ resp := &http.Response{
StatusCode: test.code, StatusCode: test.code,
@ -85,7 +84,7 @@ func TestIsSuccessResponse(t *testing.T) {
resp := http.Response{ resp := http.Response{
StatusCode: test.code, StatusCode: test.code,
} }
res := isSuccessHTTPResponse(resp) res := isSuccessHTTPResponse(&resp)
if res != test.expected { if res != test.expected {
t.Errorf("expected: %v, saw: %v", test.expected, res) t.Errorf("expected: %v, saw: %v", test.expected, res)
} }

View File

@ -145,6 +145,10 @@ type azClientConfig struct {
//Details: https://docs.microsoft.com/en-us/azure/azure-resource-manager/resource-manager-request-limits //Details: https://docs.microsoft.com/en-us/azure/azure-resource-manager/resource-manager-request-limits
rateLimiterReader flowcontrol.RateLimiter rateLimiterReader flowcontrol.RateLimiter
rateLimiterWriter flowcontrol.RateLimiter rateLimiterWriter flowcontrol.RateLimiter
CloudProviderBackoffRetries int
CloudProviderBackoffDuration int
ShouldOmitCloudProviderBackoff bool
} }
// azVirtualMachinesClient implements VirtualMachinesClient. // azVirtualMachinesClient implements VirtualMachinesClient.
@ -163,6 +167,10 @@ func newAzVirtualMachinesClient(config *azClientConfig) *azVirtualMachinesClient
virtualMachinesClient.BaseURI = config.resourceManagerEndpoint virtualMachinesClient.BaseURI = config.resourceManagerEndpoint
virtualMachinesClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken) virtualMachinesClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken)
virtualMachinesClient.PollingDelay = 5 * time.Second virtualMachinesClient.PollingDelay = 5 * time.Second
if config.ShouldOmitCloudProviderBackoff {
virtualMachinesClient.RetryAttempts = config.CloudProviderBackoffRetries
virtualMachinesClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second
}
configureUserAgent(&virtualMachinesClient.Client) configureUserAgent(&virtualMachinesClient.Client)
return &azVirtualMachinesClient{ return &azVirtualMachinesClient{
@ -254,6 +262,10 @@ func newAzInterfacesClient(config *azClientConfig) *azInterfacesClient {
interfacesClient.BaseURI = config.resourceManagerEndpoint interfacesClient.BaseURI = config.resourceManagerEndpoint
interfacesClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken) interfacesClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken)
interfacesClient.PollingDelay = 5 * time.Second interfacesClient.PollingDelay = 5 * time.Second
if config.ShouldOmitCloudProviderBackoff {
interfacesClient.RetryAttempts = config.CloudProviderBackoffRetries
interfacesClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second
}
configureUserAgent(&interfacesClient.Client) configureUserAgent(&interfacesClient.Client)
return &azInterfacesClient{ return &azInterfacesClient{
@ -333,6 +345,10 @@ func newAzLoadBalancersClient(config *azClientConfig) *azLoadBalancersClient {
loadBalancerClient.BaseURI = config.resourceManagerEndpoint loadBalancerClient.BaseURI = config.resourceManagerEndpoint
loadBalancerClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken) loadBalancerClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken)
loadBalancerClient.PollingDelay = 5 * time.Second loadBalancerClient.PollingDelay = 5 * time.Second
if config.ShouldOmitCloudProviderBackoff {
loadBalancerClient.RetryAttempts = config.CloudProviderBackoffRetries
loadBalancerClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second
}
configureUserAgent(&loadBalancerClient.Client) configureUserAgent(&loadBalancerClient.Client)
return &azLoadBalancersClient{ return &azLoadBalancersClient{
@ -449,6 +465,10 @@ func newAzPublicIPAddressesClient(config *azClientConfig) *azPublicIPAddressesCl
publicIPAddressClient.BaseURI = config.resourceManagerEndpoint publicIPAddressClient.BaseURI = config.resourceManagerEndpoint
publicIPAddressClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken) publicIPAddressClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken)
publicIPAddressClient.PollingDelay = 5 * time.Second publicIPAddressClient.PollingDelay = 5 * time.Second
if config.ShouldOmitCloudProviderBackoff {
publicIPAddressClient.RetryAttempts = config.CloudProviderBackoffRetries
publicIPAddressClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second
}
configureUserAgent(&publicIPAddressClient.Client) configureUserAgent(&publicIPAddressClient.Client)
return &azPublicIPAddressesClient{ return &azPublicIPAddressesClient{
@ -564,6 +584,10 @@ func newAzSubnetsClient(config *azClientConfig) *azSubnetsClient {
subnetsClient.BaseURI = config.resourceManagerEndpoint subnetsClient.BaseURI = config.resourceManagerEndpoint
subnetsClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken) subnetsClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken)
subnetsClient.PollingDelay = 5 * time.Second subnetsClient.PollingDelay = 5 * time.Second
if config.ShouldOmitCloudProviderBackoff {
subnetsClient.RetryAttempts = config.CloudProviderBackoffRetries
subnetsClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second
}
configureUserAgent(&subnetsClient.Client) configureUserAgent(&subnetsClient.Client)
return &azSubnetsClient{ return &azSubnetsClient{
@ -679,6 +703,10 @@ func newAzSecurityGroupsClient(config *azClientConfig) *azSecurityGroupsClient {
securityGroupsClient.BaseURI = config.resourceManagerEndpoint securityGroupsClient.BaseURI = config.resourceManagerEndpoint
securityGroupsClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken) securityGroupsClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken)
securityGroupsClient.PollingDelay = 5 * time.Second securityGroupsClient.PollingDelay = 5 * time.Second
if config.ShouldOmitCloudProviderBackoff {
securityGroupsClient.RetryAttempts = config.CloudProviderBackoffRetries
securityGroupsClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second
}
configureUserAgent(&securityGroupsClient.Client) configureUserAgent(&securityGroupsClient.Client)
return &azSecurityGroupsClient{ return &azSecurityGroupsClient{
@ -794,6 +822,10 @@ func newAzVirtualMachineScaleSetsClient(config *azClientConfig) *azVirtualMachin
virtualMachineScaleSetsClient.BaseURI = config.resourceManagerEndpoint virtualMachineScaleSetsClient.BaseURI = config.resourceManagerEndpoint
virtualMachineScaleSetsClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken) virtualMachineScaleSetsClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken)
virtualMachineScaleSetsClient.PollingDelay = 5 * time.Second virtualMachineScaleSetsClient.PollingDelay = 5 * time.Second
if config.ShouldOmitCloudProviderBackoff {
virtualMachineScaleSetsClient.RetryAttempts = config.CloudProviderBackoffRetries
virtualMachineScaleSetsClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second
}
configureUserAgent(&virtualMachineScaleSetsClient.Client) configureUserAgent(&virtualMachineScaleSetsClient.Client)
return &azVirtualMachineScaleSetsClient{ return &azVirtualMachineScaleSetsClient{
@ -910,6 +942,10 @@ func newAzVirtualMachineScaleSetVMsClient(config *azClientConfig) *azVirtualMach
virtualMachineScaleSetVMsClient.BaseURI = config.resourceManagerEndpoint virtualMachineScaleSetVMsClient.BaseURI = config.resourceManagerEndpoint
virtualMachineScaleSetVMsClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken) virtualMachineScaleSetVMsClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken)
virtualMachineScaleSetVMsClient.PollingDelay = 5 * time.Second virtualMachineScaleSetVMsClient.PollingDelay = 5 * time.Second
if config.ShouldOmitCloudProviderBackoff {
virtualMachineScaleSetVMsClient.RetryAttempts = config.CloudProviderBackoffRetries
virtualMachineScaleSetVMsClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second
}
configureUserAgent(&virtualMachineScaleSetVMsClient.Client) configureUserAgent(&virtualMachineScaleSetVMsClient.Client)
return &azVirtualMachineScaleSetVMsClient{ return &azVirtualMachineScaleSetVMsClient{
@ -1018,6 +1054,10 @@ func newAzRoutesClient(config *azClientConfig) *azRoutesClient {
routesClient.BaseURI = config.resourceManagerEndpoint routesClient.BaseURI = config.resourceManagerEndpoint
routesClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken) routesClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken)
routesClient.PollingDelay = 5 * time.Second routesClient.PollingDelay = 5 * time.Second
if config.ShouldOmitCloudProviderBackoff {
routesClient.RetryAttempts = config.CloudProviderBackoffRetries
routesClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second
}
configureUserAgent(&routesClient.Client) configureUserAgent(&routesClient.Client)
return &azRoutesClient{ return &azRoutesClient{
@ -1087,6 +1127,10 @@ func newAzRouteTablesClient(config *azClientConfig) *azRouteTablesClient {
routeTablesClient.BaseURI = config.resourceManagerEndpoint routeTablesClient.BaseURI = config.resourceManagerEndpoint
routeTablesClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken) routeTablesClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken)
routeTablesClient.PollingDelay = 5 * time.Second routeTablesClient.PollingDelay = 5 * time.Second
if config.ShouldOmitCloudProviderBackoff {
routeTablesClient.RetryAttempts = config.CloudProviderBackoffRetries
routeTablesClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second
}
configureUserAgent(&routeTablesClient.Client) configureUserAgent(&routeTablesClient.Client)
return &azRouteTablesClient{ return &azRouteTablesClient{
@ -1148,6 +1192,10 @@ func newAzStorageAccountClient(config *azClientConfig) *azStorageAccountClient {
storageAccountClient := storage.NewAccountsClientWithBaseURI(config.resourceManagerEndpoint, config.subscriptionID) storageAccountClient := storage.NewAccountsClientWithBaseURI(config.resourceManagerEndpoint, config.subscriptionID)
storageAccountClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken) storageAccountClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken)
storageAccountClient.PollingDelay = 5 * time.Second storageAccountClient.PollingDelay = 5 * time.Second
if config.ShouldOmitCloudProviderBackoff {
storageAccountClient.RetryAttempts = config.CloudProviderBackoffRetries
storageAccountClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second
}
configureUserAgent(&storageAccountClient.Client) configureUserAgent(&storageAccountClient.Client)
return &azStorageAccountClient{ return &azStorageAccountClient{
@ -1259,6 +1307,10 @@ func newAzDisksClient(config *azClientConfig) *azDisksClient {
disksClient := compute.NewDisksClientWithBaseURI(config.resourceManagerEndpoint, config.subscriptionID) disksClient := compute.NewDisksClientWithBaseURI(config.resourceManagerEndpoint, config.subscriptionID)
disksClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken) disksClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken)
disksClient.PollingDelay = 5 * time.Second disksClient.PollingDelay = 5 * time.Second
if config.ShouldOmitCloudProviderBackoff {
disksClient.RetryAttempts = config.CloudProviderBackoffRetries
disksClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second
}
configureUserAgent(&disksClient.Client) configureUserAgent(&disksClient.Client)
return &azDisksClient{ return &azDisksClient{
@ -1345,6 +1397,10 @@ func newAzVirtualMachineSizesClient(config *azClientConfig) *azVirtualMachineSiz
VirtualMachineSizesClient.BaseURI = config.resourceManagerEndpoint VirtualMachineSizesClient.BaseURI = config.resourceManagerEndpoint
VirtualMachineSizesClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken) VirtualMachineSizesClient.Authorizer = autorest.NewBearerAuthorizer(config.servicePrincipalToken)
VirtualMachineSizesClient.PollingDelay = 5 * time.Second VirtualMachineSizesClient.PollingDelay = 5 * time.Second
if config.ShouldOmitCloudProviderBackoff {
VirtualMachineSizesClient.RetryAttempts = config.CloudProviderBackoffRetries
VirtualMachineSizesClient.RetryDuration = time.Duration(config.CloudProviderBackoffDuration) * time.Second
}
configureUserAgent(&VirtualMachineSizesClient.Client) configureUserAgent(&VirtualMachineSizesClient.Client)
return &azVirtualMachineSizesClient{ return &azVirtualMachineSizesClient{

View File

@ -47,7 +47,7 @@ func (az *Cloud) NodeAddresses(ctx context.Context, name types.NodeName) ([]v1.N
} }
addressGetter := func(nodeName types.NodeName) ([]v1.NodeAddress, error) { addressGetter := func(nodeName types.NodeName) ([]v1.NodeAddress, error) {
ip, publicIP, err := az.GetIPForMachineWithRetry(nodeName) ip, publicIP, err := az.getIPForMachine(nodeName)
if err != nil { if err != nil {
klog.V(2).Infof("NodeAddresses(%s) abort backoff: %v", nodeName, err) klog.V(2).Infof("NodeAddresses(%s) abort backoff: %v", nodeName, err)
return nil, err return nil, err

View File

@ -27,11 +27,11 @@ import (
"k8s.io/api/core/v1" "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/sets"
cloudprovider "k8s.io/cloud-provider" cloudprovider "k8s.io/cloud-provider"
"k8s.io/klog"
serviceapi "k8s.io/kubernetes/pkg/api/v1/service" serviceapi "k8s.io/kubernetes/pkg/api/v1/service"
"github.com/Azure/azure-sdk-for-go/services/network/mgmt/2017-09-01/network" "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2017-09-01/network"
"github.com/Azure/go-autorest/autorest/to" "github.com/Azure/go-autorest/autorest/to"
"k8s.io/klog"
) )
const ( const (
@ -210,7 +210,7 @@ func (az *Cloud) getServiceLoadBalancer(service *v1.Service, clusterName string,
primaryVMSetName := az.vmSet.GetPrimaryVMSetName() primaryVMSetName := az.vmSet.GetPrimaryVMSetName()
defaultLBName := az.getAzureLoadBalancerName(clusterName, primaryVMSetName, isInternal) defaultLBName := az.getAzureLoadBalancerName(clusterName, primaryVMSetName, isInternal)
existingLBs, err := az.ListLBWithRetry(service) existingLBs, err := az.ListLB(service)
if err != nil { if err != nil {
return nil, nil, false, err return nil, nil, false, err
} }
@ -387,7 +387,7 @@ func (az *Cloud) determinePublicIPName(clusterName string, service *v1.Service)
pipResourceGroup := az.getPublicIPAddressResourceGroup(service) pipResourceGroup := az.getPublicIPAddressResourceGroup(service)
pips, err := az.ListPIPWithRetry(service, pipResourceGroup) pips, err := az.ListPIP(service, pipResourceGroup)
if err != nil { if err != nil {
return "", err return "", err
} }
@ -474,13 +474,13 @@ func (az *Cloud) ensurePublicIPExists(service *v1.Service, pipName string, domai
} }
klog.V(2).Infof("ensurePublicIPExists for service(%s): pip(%s) - creating", serviceName, *pip.Name) klog.V(2).Infof("ensurePublicIPExists for service(%s): pip(%s) - creating", serviceName, *pip.Name)
klog.V(10).Infof("CreateOrUpdatePIPWithRetry(%s, %q): start", pipResourceGroup, *pip.Name) klog.V(10).Infof("CreateOrUpdatePIP(%s, %q): start", pipResourceGroup, *pip.Name)
err = az.CreateOrUpdatePIPWithRetry(service, pipResourceGroup, pip) err = az.CreateOrUpdatePIP(service, pipResourceGroup, pip)
if err != nil { if err != nil {
klog.V(2).Infof("ensure(%s) abort backoff: pip(%s) - creating", serviceName, *pip.Name) klog.V(2).Infof("ensure(%s) abort backoff: pip(%s) - creating", serviceName, *pip.Name)
return nil, err return nil, err
} }
klog.V(10).Infof("CreateOrUpdatePIPWithRetry(%s, %q): end", pipResourceGroup, *pip.Name) klog.V(10).Infof("CreateOrUpdatePIP(%s, %q): end", pipResourceGroup, *pip.Name)
ctx, cancel := getContextWithCancel() ctx, cancel := getContextWithCancel()
defer cancel() defer cancel()
@ -818,16 +818,16 @@ func (az *Cloud) reconcileLoadBalancer(clusterName string, service *v1.Service,
klog.V(10).Infof("EnsureBackendPoolDeleted(%s, %s): end", lbBackendPoolID, vmSetName) klog.V(10).Infof("EnsureBackendPoolDeleted(%s, %s): end", lbBackendPoolID, vmSetName)
// Remove the LB. // Remove the LB.
klog.V(10).Infof("reconcileLoadBalancer: az.DeleteLBWithRetry(%q): start", lbName) klog.V(10).Infof("reconcileLoadBalancer: az.DeleteLB(%q): start", lbName)
err = az.DeleteLBWithRetry(service, lbName) err = az.DeleteLB(service, lbName)
if err != nil { if err != nil {
klog.V(2).Infof("reconcileLoadBalancer for service(%s) abort backoff: lb(%s) - deleting; no remaining frontendIPConfigurations", serviceName, lbName) klog.V(2).Infof("reconcileLoadBalancer for service(%s) abort backoff: lb(%s) - deleting; no remaining frontendIPConfigurations", serviceName, lbName)
return nil, err return nil, err
} }
klog.V(10).Infof("az.DeleteLBWithRetry(%q): end", lbName) klog.V(10).Infof("az.DeleteLB(%q): end", lbName)
} else { } else {
klog.V(2).Infof("reconcileLoadBalancer: reconcileLoadBalancer for service(%s): lb(%s) - updating", serviceName, lbName) klog.V(2).Infof("reconcileLoadBalancer: reconcileLoadBalancer for service(%s): lb(%s) - updating", serviceName, lbName)
err := az.CreateOrUpdateLBWithRetry(service, *lb) err := az.CreateOrUpdateLB(service, *lb)
if err != nil { if err != nil {
klog.V(2).Infof("reconcileLoadBalancer for service(%s) abort backoff: lb(%s) - updating", serviceName, lbName) klog.V(2).Infof("reconcileLoadBalancer for service(%s) abort backoff: lb(%s) - updating", serviceName, lbName)
return nil, err return nil, err
@ -1143,8 +1143,8 @@ func (az *Cloud) reconcileSecurityGroup(clusterName string, service *v1.Service,
if dirtySg { if dirtySg {
sg.SecurityRules = &updatedRules sg.SecurityRules = &updatedRules
klog.V(2).Infof("reconcileSecurityGroup for service(%s): sg(%s) - updating", serviceName, *sg.Name) klog.V(2).Infof("reconcileSecurityGroup for service(%s): sg(%s) - updating", serviceName, *sg.Name)
klog.V(10).Infof("CreateOrUpdateSGWithRetry(%q): start", *sg.Name) klog.V(10).Infof("CreateOrUpdateSecurityGroup(%q): start", *sg.Name)
err := az.CreateOrUpdateSGWithRetry(service, sg) err := az.CreateOrUpdateSecurityGroup(service, sg)
if err != nil { if err != nil {
klog.V(2).Infof("ensure(%s) abort backoff: sg(%s) - updating", serviceName, *sg.Name) klog.V(2).Infof("ensure(%s) abort backoff: sg(%s) - updating", serviceName, *sg.Name)
// TODO (Nov 2017): remove when augmented security rules are out of preview // TODO (Nov 2017): remove when augmented security rules are out of preview
@ -1157,7 +1157,7 @@ func (az *Cloud) reconcileSecurityGroup(clusterName string, service *v1.Service,
// END TODO // END TODO
return nil, err return nil, err
} }
klog.V(10).Infof("CreateOrUpdateSGWithRetry(%q): end", *sg.Name) klog.V(10).Infof("CreateOrUpdateSecurityGroup(%q): end", *sg.Name)
} }
return &sg, nil return &sg, nil
} }
@ -1315,7 +1315,7 @@ func (az *Cloud) reconcilePublicIP(clusterName string, service *v1.Service, lb *
pipResourceGroup := az.getPublicIPAddressResourceGroup(service) pipResourceGroup := az.getPublicIPAddressResourceGroup(service)
pips, err := az.ListPIPWithRetry(service, pipResourceGroup) pips, err := az.ListPIP(service, pipResourceGroup)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -1414,7 +1414,7 @@ func (az *Cloud) safeDeletePublicIP(service *v1.Service, pipResourceGroup string
// Update load balancer when frontendIPConfigUpdated or loadBalancerRuleUpdated. // Update load balancer when frontendIPConfigUpdated or loadBalancerRuleUpdated.
if frontendIPConfigUpdated || loadBalancerRuleUpdated { if frontendIPConfigUpdated || loadBalancerRuleUpdated {
err := az.CreateOrUpdateLBWithRetry(service, *lb) err := az.CreateOrUpdateLB(service, *lb)
if err != nil { if err != nil {
klog.Errorf("safeDeletePublicIP for service(%s) failed with error: %v", getServiceName(service), err) klog.Errorf("safeDeletePublicIP for service(%s) failed with error: %v", getServiceName(service), err)
return err return err
@ -1423,14 +1423,14 @@ func (az *Cloud) safeDeletePublicIP(service *v1.Service, pipResourceGroup string
} }
pipName := to.String(pip.Name) pipName := to.String(pip.Name)
klog.V(10).Infof("DeletePublicIPWithRetry(%s, %q): start", pipResourceGroup, pipName) klog.V(10).Infof("DeletePublicIP(%s, %q): start", pipResourceGroup, pipName)
err := az.DeletePublicIPWithRetry(service, pipResourceGroup, pipName) err := az.DeletePublicIP(service, pipResourceGroup, pipName)
if err != nil { if err != nil {
if err = ignoreStatusNotFoundFromError(err); err != nil { if err = ignoreStatusNotFoundFromError(err); err != nil {
return err return err
} }
} }
klog.V(10).Infof("DeletePublicIPWithRetry(%s, %q): end", pipResourceGroup, pipName) klog.V(10).Infof("DeletePublicIP(%s, %q): end", pipResourceGroup, pipName)
return nil return nil
} }

View File

@ -20,11 +20,11 @@ import (
"context" "context"
"fmt" "fmt"
cloudprovider "k8s.io/cloud-provider"
"github.com/Azure/azure-sdk-for-go/services/network/mgmt/2017-09-01/network" "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2017-09-01/network"
"github.com/Azure/go-autorest/autorest/to" "github.com/Azure/go-autorest/autorest/to"
"k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/types"
cloudprovider "k8s.io/cloud-provider"
"k8s.io/klog" "k8s.io/klog"
) )
@ -104,18 +104,7 @@ func (az *Cloud) createRouteTable() error {
} }
klog.V(3).Infof("createRouteTableIfNotExists: creating routetable. routeTableName=%q", az.RouteTableName) klog.V(3).Infof("createRouteTableIfNotExists: creating routetable. routeTableName=%q", az.RouteTableName)
ctx, cancel := getContextWithCancel() err := az.CreateOrUpdateRouteTable(routeTable)
defer cancel()
resp, err := az.RouteTablesClient.CreateOrUpdate(ctx, az.ResourceGroup, az.RouteTableName, routeTable)
klog.V(10).Infof("RouteTablesClient.CreateOrUpdate(%q): end", az.RouteTableName)
if az.CloudProviderBackoff && shouldRetryHTTPRequest(resp, err) {
klog.V(2).Infof("createRouteTableIfNotExists backing off: creating routetable. routeTableName=%q", az.RouteTableName)
retryErr := az.CreateOrUpdateRouteTableWithRetry(routeTable)
if retryErr != nil {
err = retryErr
klog.V(2).Infof("createRouteTableIfNotExists abort backoff: creating routetable. routeTableName=%q", az.RouteTableName)
}
}
if err != nil { if err != nil {
return err return err
} }
@ -163,18 +152,7 @@ func (az *Cloud) CreateRoute(ctx context.Context, clusterName string, nameHint s
} }
klog.V(3).Infof("CreateRoute: creating route: instance=%q cidr=%q", kubeRoute.TargetNode, kubeRoute.DestinationCIDR) klog.V(3).Infof("CreateRoute: creating route: instance=%q cidr=%q", kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
ctx, cancel := getContextWithCancel() err = az.CreateOrUpdateRoute(route)
defer cancel()
resp, err := az.RoutesClient.CreateOrUpdate(ctx, az.ResourceGroup, az.RouteTableName, *route.Name, route)
klog.V(10).Infof("RoutesClient.CreateOrUpdate(%q): end", az.RouteTableName)
if az.CloudProviderBackoff && shouldRetryHTTPRequest(resp, err) {
klog.V(2).Infof("CreateRoute backing off: creating route: instance=%q cidr=%q", kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
retryErr := az.CreateOrUpdateRouteWithRetry(route)
if retryErr != nil {
err = retryErr
klog.V(2).Infof("CreateRoute abort backoff: creating route: instance=%q cidr=%q", kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
}
}
if err != nil { if err != nil {
return err return err
} }
@ -202,20 +180,8 @@ func (az *Cloud) DeleteRoute(ctx context.Context, clusterName string, kubeRoute
klog.V(2).Infof("DeleteRoute: deleting route. clusterName=%q instance=%q cidr=%q", clusterName, kubeRoute.TargetNode, kubeRoute.DestinationCIDR) klog.V(2).Infof("DeleteRoute: deleting route. clusterName=%q instance=%q cidr=%q", clusterName, kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
ctx, cancel := getContextWithCancel()
defer cancel()
routeName := mapNodeNameToRouteName(kubeRoute.TargetNode) routeName := mapNodeNameToRouteName(kubeRoute.TargetNode)
resp, err := az.RoutesClient.Delete(ctx, az.ResourceGroup, az.RouteTableName, routeName) err = az.DeleteRouteWithName(routeName)
klog.V(10).Infof("RoutesClient.Delete(%q): end", az.RouteTableName)
if az.CloudProviderBackoff && shouldRetryHTTPRequest(resp, err) {
klog.V(2).Infof("DeleteRoute backing off: deleting route. clusterName=%q instance=%q cidr=%q", clusterName, kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
retryErr := az.DeleteRouteWithRetry(routeName)
if retryErr != nil {
err = retryErr
klog.V(2).Infof("DeleteRoute abort backoff: deleting route. clusterName=%q instance=%q cidr=%q", clusterName, kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
}
}
if err != nil { if err != nil {
return err return err
} }

View File

@ -26,16 +26,16 @@ import (
"strconv" "strconv"
"strings" "strings"
"k8s.io/api/core/v1"
cloudprovider "k8s.io/cloud-provider"
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-10-01/compute" "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-10-01/compute"
"github.com/Azure/azure-sdk-for-go/services/network/mgmt/2017-09-01/network" "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2017-09-01/network"
"github.com/Azure/go-autorest/autorest/to" "github.com/Azure/go-autorest/autorest/to"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/types"
utilerrors "k8s.io/apimachinery/pkg/util/errors" utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/uuid" "k8s.io/apimachinery/pkg/util/uuid"
cloudprovider "k8s.io/cloud-provider"
"k8s.io/klog" "k8s.io/klog"
) )
@ -294,10 +294,6 @@ outer:
return -1, fmt.Errorf("securityGroup priorities are exhausted") return -1, fmt.Errorf("securityGroup priorities are exhausted")
} }
func (az *Cloud) getIPForMachine(nodeName types.NodeName) (string, string, error) {
return az.vmSet.GetIPByNodeName(string(nodeName))
}
var polyTable = crc32.MakeTable(crc32.Koopman) var polyTable = crc32.MakeTable(crc32.Koopman)
//MakeCRC32 : convert string to CRC32 format //MakeCRC32 : convert string to CRC32 format
@ -460,9 +456,9 @@ func (as *availabilitySet) GetIPByNodeName(name string) (string, string, error)
// getAgentPoolAvailabiliySets lists the virtual machines for the resource group and then builds // getAgentPoolAvailabiliySets lists the virtual machines for the resource group and then builds
// a list of availability sets that match the nodes available to k8s. // a list of availability sets that match the nodes available to k8s.
func (as *availabilitySet) getAgentPoolAvailabiliySets(nodes []*v1.Node) (agentPoolAvailabilitySets *[]string, err error) { func (as *availabilitySet) getAgentPoolAvailabiliySets(nodes []*v1.Node) (agentPoolAvailabilitySets *[]string, err error) {
vms, err := as.VirtualMachineClientListWithRetry(as.ResourceGroup) vms, err := as.ListVirtualMachines(as.ResourceGroup)
if err != nil { if err != nil {
klog.Errorf("as.getNodeAvailabilitySet - VirtualMachineClientListWithRetry failed, err=%v", err) klog.Errorf("as.getNodeAvailabilitySet - ListVirtualMachines failed, err=%v", err)
return nil, err return nil, err
} }
vmNameToAvailabilitySetID := make(map[string]string, len(vms)) vmNameToAvailabilitySetID := make(map[string]string, len(vms))
@ -695,18 +691,7 @@ func (as *availabilitySet) ensureHostInPool(service *v1.Service, nodeName types.
nicName := *nic.Name nicName := *nic.Name
klog.V(3).Infof("nicupdate(%s): nic(%s) - updating", serviceName, nicName) klog.V(3).Infof("nicupdate(%s): nic(%s) - updating", serviceName, nicName)
ctx, cancel := getContextWithCancel() err := as.CreateOrUpdateInterface(service, nic)
defer cancel()
resp, err := as.InterfacesClient.CreateOrUpdate(ctx, as.ResourceGroup, *nic.Name, nic)
klog.V(10).Infof("InterfacesClient.CreateOrUpdate(%q): end", *nic.Name)
if as.CloudProviderBackoff && shouldRetryHTTPRequest(resp, err) {
klog.V(2).Infof("nicupdate(%s) backing off: nic(%s) - updating, err=%v", serviceName, nicName, err)
retryErr := as.CreateOrUpdateInterfaceWithRetry(service, nic)
if retryErr != nil {
err = retryErr
klog.V(2).Infof("nicupdate(%s) abort backoff: nic(%s) - updating", serviceName, nicName)
}
}
if err != nil { if err != nil {
return err return err
} }

View File

@ -27,13 +27,13 @@ import (
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-10-01/compute" "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-10-01/compute"
"github.com/Azure/azure-sdk-for-go/services/network/mgmt/2017-09-01/network" "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2017-09-01/network"
"github.com/Azure/go-autorest/autorest/to" "github.com/Azure/go-autorest/autorest/to"
"k8s.io/klog"
"k8s.io/api/core/v1" "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/util/wait"
cloudprovider "k8s.io/cloud-provider" cloudprovider "k8s.io/cloud-provider"
"k8s.io/klog"
) )
var ( var (
@ -563,6 +563,30 @@ func (ss *scaleSet) GetPrimaryInterface(nodeName string) (network.Interface, err
return nic, nil return nic, nil
} }
// getScaleSet gets scale set with exponential backoff retry
func (ss *scaleSet) getScaleSet(service *v1.Service, name string) (compute.VirtualMachineScaleSet, bool, error) {
if ss.Config.shouldOmitCloudProviderBackoff() {
var result compute.VirtualMachineScaleSet
var exists bool
cached, err := ss.vmssCache.Get(name)
if err != nil {
ss.Event(service, v1.EventTypeWarning, "GetVirtualMachineScaleSet", err.Error())
klog.Errorf("backoff: failure for scale set %q, will retry,err=%v", name, err)
return result, false, nil
}
if cached != nil {
exists = true
result = *(cached.(*compute.VirtualMachineScaleSet))
}
return result, exists, err
}
return ss.getScaleSetWithRetry(service, name)
}
// getScaleSetWithRetry gets scale set with exponential backoff retry // getScaleSetWithRetry gets scale set with exponential backoff retry
func (ss *scaleSet) getScaleSetWithRetry(service *v1.Service, name string) (compute.VirtualMachineScaleSet, bool, error) { func (ss *scaleSet) getScaleSetWithRetry(service *v1.Service, name string) (compute.VirtualMachineScaleSet, bool, error) {
var result compute.VirtualMachineScaleSet var result compute.VirtualMachineScaleSet
@ -621,6 +645,19 @@ func (ss *scaleSet) getPrimaryIPConfigForScaleSet(config *compute.VirtualMachine
return nil, fmt.Errorf("failed to find a primary IP configuration for the scale set %q", scaleSetName) return nil, fmt.Errorf("failed to find a primary IP configuration for the scale set %q", scaleSetName)
} }
// createOrUpdateVMSS invokes ss.VirtualMachineScaleSetsClient.CreateOrUpdate with exponential backoff retry.
func (ss *scaleSet) createOrUpdateVMSS(service *v1.Service, virtualMachineScaleSet compute.VirtualMachineScaleSet) error {
if ss.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
resp, err := ss.VirtualMachineScaleSetsClient.CreateOrUpdate(ctx, ss.ResourceGroup, *virtualMachineScaleSet.Name, virtualMachineScaleSet)
klog.V(10).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate(%s): end", *virtualMachineScaleSet.Name)
return ss.processHTTPResponse(service, "CreateOrUpdateVMSS", resp, err)
}
return ss.createOrUpdateVMSSWithRetry(service, virtualMachineScaleSet)
}
// createOrUpdateVMSSWithRetry invokes ss.VirtualMachineScaleSetsClient.CreateOrUpdate with exponential backoff retry. // createOrUpdateVMSSWithRetry invokes ss.VirtualMachineScaleSetsClient.CreateOrUpdate with exponential backoff retry.
func (ss *scaleSet) createOrUpdateVMSSWithRetry(service *v1.Service, virtualMachineScaleSet compute.VirtualMachineScaleSet) error { func (ss *scaleSet) createOrUpdateVMSSWithRetry(service *v1.Service, virtualMachineScaleSet compute.VirtualMachineScaleSet) error {
return wait.ExponentialBackoff(ss.requestBackoff(), func() (bool, error) { return wait.ExponentialBackoff(ss.requestBackoff(), func() (bool, error) {
@ -632,6 +669,19 @@ func (ss *scaleSet) createOrUpdateVMSSWithRetry(service *v1.Service, virtualMach
}) })
} }
// updateVMSSInstances invokes ss.VirtualMachineScaleSetsClient.UpdateInstances with exponential backoff retry.
func (ss *scaleSet) updateVMSSInstances(service *v1.Service, scaleSetName string, vmInstanceIDs compute.VirtualMachineScaleSetVMInstanceRequiredIDs) error {
if ss.Config.shouldOmitCloudProviderBackoff() {
ctx, cancel := getContextWithCancel()
defer cancel()
resp, err := ss.VirtualMachineScaleSetsClient.UpdateInstances(ctx, ss.ResourceGroup, scaleSetName, vmInstanceIDs)
klog.V(10).Infof("VirtualMachineScaleSetsClient.UpdateInstances(%s): end", scaleSetName)
return ss.processHTTPResponse(service, "CreateOrUpdateVMSSInstance", resp, err)
}
return ss.updateVMSSInstancesWithRetry(service, scaleSetName, vmInstanceIDs)
}
// updateVMSSInstancesWithRetry invokes ss.VirtualMachineScaleSetsClient.UpdateInstances with exponential backoff retry. // updateVMSSInstancesWithRetry invokes ss.VirtualMachineScaleSetsClient.UpdateInstances with exponential backoff retry.
func (ss *scaleSet) updateVMSSInstancesWithRetry(service *v1.Service, scaleSetName string, vmInstanceIDs compute.VirtualMachineScaleSetVMInstanceRequiredIDs) error { func (ss *scaleSet) updateVMSSInstancesWithRetry(service *v1.Service, scaleSetName string, vmInstanceIDs compute.VirtualMachineScaleSetVMInstanceRequiredIDs) error {
return wait.ExponentialBackoff(ss.requestBackoff(), func() (bool, error) { return wait.ExponentialBackoff(ss.requestBackoff(), func() (bool, error) {
@ -687,9 +737,9 @@ func (ss *scaleSet) getNodesScaleSets(nodes []*v1.Node) (map[string]sets.String,
func (ss *scaleSet) ensureHostsInVMSetPool(service *v1.Service, backendPoolID string, vmSetName string, instanceIDs []string, isInternal bool) error { func (ss *scaleSet) ensureHostsInVMSetPool(service *v1.Service, backendPoolID string, vmSetName string, instanceIDs []string, isInternal bool) error {
klog.V(3).Infof("ensuring hosts %q of scaleset %q in LB backendpool %q", instanceIDs, vmSetName, backendPoolID) klog.V(3).Infof("ensuring hosts %q of scaleset %q in LB backendpool %q", instanceIDs, vmSetName, backendPoolID)
serviceName := getServiceName(service) serviceName := getServiceName(service)
virtualMachineScaleSet, exists, err := ss.getScaleSetWithRetry(service, vmSetName) virtualMachineScaleSet, exists, err := ss.getScaleSet(service, vmSetName)
if err != nil { if err != nil {
klog.Errorf("ss.getScaleSetWithRetry(%s) for service %q failed: %v", vmSetName, serviceName, err) klog.Errorf("ss.getScaleSet(%s) for service %q failed: %v", vmSetName, serviceName, err)
return err return err
} }
if !exists { if !exists {
@ -748,19 +798,7 @@ func (ss *scaleSet) ensureHostsInVMSetPool(service *v1.Service, backendPoolID st
}) })
primaryIPConfiguration.LoadBalancerBackendAddressPools = &newBackendPools primaryIPConfiguration.LoadBalancerBackendAddressPools = &newBackendPools
ctx, cancel := getContextWithCancel() err := ss.createOrUpdateVMSS(service, virtualMachineScaleSet)
defer cancel()
klog.V(3).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate for service (%s): scale set (%s) - updating", serviceName, vmSetName)
resp, err := ss.VirtualMachineScaleSetsClient.CreateOrUpdate(ctx, ss.ResourceGroup, vmSetName, virtualMachineScaleSet)
klog.V(10).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate(%q): end", vmSetName)
if ss.CloudProviderBackoff && shouldRetryHTTPRequest(resp, err) {
klog.V(2).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate for service (%s): scale set (%s) - updating, err=%v", serviceName, vmSetName, err)
retryErr := ss.createOrUpdateVMSSWithRetry(service, virtualMachineScaleSet)
if retryErr != nil {
err = retryErr
klog.V(2).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate for service (%s) abort backoff: scale set (%s) - updating", serviceName, vmSetName)
}
}
if err != nil { if err != nil {
return err return err
} }
@ -770,18 +808,7 @@ func (ss *scaleSet) ensureHostsInVMSetPool(service *v1.Service, backendPoolID st
vmInstanceIDs := compute.VirtualMachineScaleSetVMInstanceRequiredIDs{ vmInstanceIDs := compute.VirtualMachineScaleSetVMInstanceRequiredIDs{
InstanceIds: &instanceIDs, InstanceIds: &instanceIDs,
} }
ctx, cancel := getContextWithCancel() err = ss.updateVMSSInstances(service, vmSetName, vmInstanceIDs)
defer cancel()
instanceResp, err := ss.VirtualMachineScaleSetsClient.UpdateInstances(ctx, ss.ResourceGroup, vmSetName, vmInstanceIDs)
klog.V(10).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate(%q): end", vmSetName)
if ss.CloudProviderBackoff && shouldRetryHTTPRequest(instanceResp, err) {
klog.V(2).Infof("VirtualMachineScaleSetsClient.UpdateInstances for service (%s): scale set (%s) - updating, err=%v", serviceName, vmSetName, err)
retryErr := ss.updateVMSSInstancesWithRetry(service, vmSetName, vmInstanceIDs)
if retryErr != nil {
err = retryErr
klog.V(2).Infof("VirtualMachineScaleSetsClient.UpdateInstances for service (%s) abort backoff: scale set (%s) - updating", serviceName, vmSetName)
}
}
if err != nil { if err != nil {
return err return err
} }
@ -833,9 +860,9 @@ func (ss *scaleSet) EnsureHostsInPool(service *v1.Service, nodes []*v1.Node, bac
// ensureScaleSetBackendPoolDeleted ensures the loadBalancer backendAddressPools deleted from the specified scaleset. // ensureScaleSetBackendPoolDeleted ensures the loadBalancer backendAddressPools deleted from the specified scaleset.
func (ss *scaleSet) ensureScaleSetBackendPoolDeleted(service *v1.Service, poolID, ssName string) error { func (ss *scaleSet) ensureScaleSetBackendPoolDeleted(service *v1.Service, poolID, ssName string) error {
klog.V(3).Infof("ensuring backend pool %q deleted from scaleset %q", poolID, ssName) klog.V(3).Infof("ensuring backend pool %q deleted from scaleset %q", poolID, ssName)
virtualMachineScaleSet, exists, err := ss.getScaleSetWithRetry(service, ssName) virtualMachineScaleSet, exists, err := ss.getScaleSet(service, ssName)
if err != nil { if err != nil {
klog.Errorf("ss.ensureScaleSetBackendPoolDeleted(%s, %s) getScaleSetWithRetry(%s) failed: %v", poolID, ssName, ssName, err) klog.Errorf("ss.ensureScaleSetBackendPoolDeleted(%s, %s) getScaleSet(%s) failed: %v", poolID, ssName, ssName, err)
return err return err
} }
if !exists { if !exists {
@ -879,18 +906,7 @@ func (ss *scaleSet) ensureScaleSetBackendPoolDeleted(service *v1.Service, poolID
// Update scale set with backoff. // Update scale set with backoff.
primaryIPConfiguration.LoadBalancerBackendAddressPools = &newBackendPools primaryIPConfiguration.LoadBalancerBackendAddressPools = &newBackendPools
klog.V(3).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate: scale set (%s) - updating", ssName) klog.V(3).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate: scale set (%s) - updating", ssName)
ctx, cancel := getContextWithCancel() err = ss.createOrUpdateVMSS(service, virtualMachineScaleSet)
defer cancel()
resp, err := ss.VirtualMachineScaleSetsClient.CreateOrUpdate(ctx, ss.ResourceGroup, ssName, virtualMachineScaleSet)
klog.V(10).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate(%q): end", ssName)
if ss.CloudProviderBackoff && shouldRetryHTTPRequest(resp, err) {
klog.V(2).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate: scale set (%s) - updating, err=%v", ssName, err)
retryErr := ss.createOrUpdateVMSSWithRetry(service, virtualMachineScaleSet)
if retryErr != nil {
err = retryErr
klog.V(2).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate abort backoff: scale set (%s) - updating", ssName)
}
}
if err != nil { if err != nil {
return err return err
} }
@ -900,18 +916,7 @@ func (ss *scaleSet) ensureScaleSetBackendPoolDeleted(service *v1.Service, poolID
vmInstanceIDs := compute.VirtualMachineScaleSetVMInstanceRequiredIDs{ vmInstanceIDs := compute.VirtualMachineScaleSetVMInstanceRequiredIDs{
InstanceIds: &instanceIDs, InstanceIds: &instanceIDs,
} }
instanceCtx, instanceCancel := getContextWithCancel() err = ss.updateVMSSInstances(service, ssName, vmInstanceIDs)
defer instanceCancel()
instanceResp, err := ss.VirtualMachineScaleSetsClient.UpdateInstances(instanceCtx, ss.ResourceGroup, ssName, vmInstanceIDs)
klog.V(10).Infof("VirtualMachineScaleSetsClient.UpdateInstances(%q): end", ssName)
if ss.CloudProviderBackoff && shouldRetryHTTPRequest(instanceResp, err) {
klog.V(2).Infof("VirtualMachineScaleSetsClient.UpdateInstances scale set (%s) - updating, err=%v", ssName, err)
retryErr := ss.updateVMSSInstancesWithRetry(service, ssName, vmInstanceIDs)
if retryErr != nil {
err = retryErr
klog.V(2).Infof("VirtualMachineScaleSetsClient.UpdateInstances abort backoff: scale set (%s) - updating", ssName)
}
}
if err != nil { if err != nil {
return err return err
} }
@ -919,19 +924,11 @@ func (ss *scaleSet) ensureScaleSetBackendPoolDeleted(service *v1.Service, poolID
// Update virtualMachineScaleSet again. This is a workaround for removing VMSS reference from LB. // Update virtualMachineScaleSet again. This is a workaround for removing VMSS reference from LB.
// TODO: remove this workaround when figuring out the root cause. // TODO: remove this workaround when figuring out the root cause.
if len(newBackendPools) == 0 { if len(newBackendPools) == 0 {
updateCtx, updateCancel := getContextWithCancel() err = ss.createOrUpdateVMSS(service, virtualMachineScaleSet)
defer updateCancel() if err != nil {
klog.V(3).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate: scale set (%s) - updating second time", ssName)
resp, err = ss.VirtualMachineScaleSetsClient.CreateOrUpdate(updateCtx, ss.ResourceGroup, ssName, virtualMachineScaleSet)
klog.V(10).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate(%q): end", ssName)
if ss.CloudProviderBackoff && shouldRetryHTTPRequest(resp, err) {
klog.V(2).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate: scale set (%s) - updating, err=%v", ssName, err)
retryErr := ss.createOrUpdateVMSSWithRetry(service, virtualMachineScaleSet)
if retryErr != nil {
klog.V(2).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate abort backoff: scale set (%s) - updating", ssName) klog.V(2).Infof("VirtualMachineScaleSetsClient.CreateOrUpdate abort backoff: scale set (%s) - updating", ssName)
} }
} }
}
return nil return nil
} }

View File

@ -132,7 +132,7 @@ func (ss *scaleSet) newAvailabilitySetNodesCache() (*timedCache, error) {
} }
for _, resourceGroup := range resourceGroups.List() { for _, resourceGroup := range resourceGroups.List() {
vmList, err := ss.Cloud.VirtualMachineClientListWithRetry(resourceGroup) vmList, err := ss.Cloud.ListVirtualMachines(resourceGroup)
if err != nil { if err != nil {
return nil, err return nil, err
} }