Get VirtualMachineScaleSets and VirtualMachineScaleSetVMs from cached resource groups

pull/8/head
Pengfei Ni 2018-08-20 14:15:02 +08:00
parent f83fb14452
commit 8aaeafa166
5 changed files with 164 additions and 57 deletions

View File

@ -69,20 +69,20 @@ func (az *Cloud) GetVirtualMachineWithRetry(name types.NodeName) (compute.Virtua
}
// VirtualMachineClientListWithRetry invokes az.VirtualMachinesClient.List with exponential backoff retry
func (az *Cloud) VirtualMachineClientListWithRetry() ([]compute.VirtualMachine, error) {
func (az *Cloud) VirtualMachineClientListWithRetry(resourceGroup string) ([]compute.VirtualMachine, error) {
allNodes := []compute.VirtualMachine{}
err := wait.ExponentialBackoff(az.requestBackoff(), func() (bool, error) {
var retryErr error
ctx, cancel := getContextWithCancel()
defer cancel()
allNodes, retryErr = az.VirtualMachinesClient.List(ctx, az.ResourceGroup)
allNodes, retryErr = az.VirtualMachinesClient.List(ctx, resourceGroup)
if retryErr != nil {
glog.Errorf("VirtualMachinesClient.List(%v) - backoff: failure, will retry,err=%v",
az.ResourceGroup,
resourceGroup,
retryErr)
return false, retryErr
}
glog.V(2).Infof("VirtualMachinesClient.List(%v) - backoff: success", az.ResourceGroup)
glog.V(2).Infof("VirtualMachinesClient.List(%v) - backoff: success", resourceGroup)
return true, nil
})
if err != nil {

View File

@ -29,7 +29,13 @@ import (
// AttachDisk attaches a vhd to vm
// the vhd must exist, can be identified by diskName, diskURI, and lun.
func (ss *scaleSet) AttachDisk(isManagedDisk bool, diskName, diskURI string, nodeName types.NodeName, lun int32, cachingMode compute.CachingTypes) error {
ssName, instanceID, vm, err := ss.getVmssVM(string(nodeName))
vmName := mapNodeNameToVMName(nodeName)
ssName, instanceID, vm, err := ss.getVmssVM(vmName)
if err != nil {
return err
}
nodeResourceGroup, err := ss.GetNodeResourceGroup(vmName)
if err != nil {
return err
}
@ -65,14 +71,14 @@ func (ss *scaleSet) AttachDisk(isManagedDisk bool, diskName, diskURI string, nod
ctx, cancel := getContextWithCancel()
defer cancel()
glog.V(2).Infof("azureDisk - update(%s): vm(%s) - attach disk", ss.resourceGroup, nodeName)
resp, err := ss.VirtualMachineScaleSetVMsClient.Update(ctx, ss.resourceGroup, ssName, instanceID, vm)
glog.V(2).Infof("azureDisk - update(%s): vm(%s) - attach disk", nodeResourceGroup, nodeName)
resp, err := ss.VirtualMachineScaleSetVMsClient.Update(ctx, nodeResourceGroup, ssName, instanceID, vm)
if ss.CloudProviderBackoff && shouldRetryHTTPRequest(resp, err) {
glog.V(2).Infof("azureDisk - update(%s) backing off: vm(%s)", ss.resourceGroup, nodeName)
retryErr := ss.UpdateVmssVMWithRetry(ctx, ss.resourceGroup, ssName, instanceID, vm)
glog.V(2).Infof("azureDisk - update(%s) backing off: vm(%s)", nodeResourceGroup, nodeName)
retryErr := ss.UpdateVmssVMWithRetry(ctx, nodeResourceGroup, ssName, instanceID, vm)
if retryErr != nil {
err = retryErr
glog.V(2).Infof("azureDisk - update(%s) abort backoff: vm(%s)", ss.resourceGroup, nodeName)
glog.V(2).Infof("azureDisk - update(%s) abort backoff: vm(%s)", nodeResourceGroup, nodeName)
}
}
if err != nil {
@ -85,7 +91,8 @@ func (ss *scaleSet) AttachDisk(isManagedDisk bool, diskName, diskURI string, nod
} else {
glog.V(4).Info("azureDisk - azure attach succeeded")
// Invalidate the cache right after updating
ss.vmssVMCache.Delete(ss.makeVmssVMName(ssName, instanceID))
key := buildVmssCacheKey(nodeResourceGroup, ss.makeVmssVMName(ssName, instanceID))
ss.vmssVMCache.Delete(key)
}
return err
}
@ -93,7 +100,13 @@ func (ss *scaleSet) AttachDisk(isManagedDisk bool, diskName, diskURI string, nod
// DetachDiskByName detaches a vhd from host
// the vhd can be identified by diskName or diskURI
func (ss *scaleSet) DetachDiskByName(diskName, diskURI string, nodeName types.NodeName) error {
ssName, instanceID, vm, err := ss.getVmssVM(string(nodeName))
vmName := mapNodeNameToVMName(nodeName)
ssName, instanceID, vm, err := ss.getVmssVM(vmName)
if err != nil {
return err
}
nodeResourceGroup, err := ss.GetNodeResourceGroup(vmName)
if err != nil {
return err
}
@ -122,14 +135,14 @@ func (ss *scaleSet) DetachDiskByName(diskName, diskURI string, nodeName types.No
vm.StorageProfile.DataDisks = &disks
ctx, cancel := getContextWithCancel()
defer cancel()
glog.V(2).Infof("azureDisk - update(%s): vm(%s) - detach disk", ss.resourceGroup, nodeName)
resp, err := ss.VirtualMachineScaleSetVMsClient.Update(ctx, ss.resourceGroup, ssName, instanceID, vm)
glog.V(2).Infof("azureDisk - update(%s): vm(%s) - detach disk", nodeResourceGroup, nodeName)
resp, err := ss.VirtualMachineScaleSetVMsClient.Update(ctx, nodeResourceGroup, ssName, instanceID, vm)
if ss.CloudProviderBackoff && shouldRetryHTTPRequest(resp, err) {
glog.V(2).Infof("azureDisk - update(%s) backing off: vm(%s)", ss.resourceGroup, nodeName)
retryErr := ss.UpdateVmssVMWithRetry(ctx, ss.resourceGroup, ssName, instanceID, vm)
glog.V(2).Infof("azureDisk - update(%s) backing off: vm(%s)", nodeResourceGroup, nodeName)
retryErr := ss.UpdateVmssVMWithRetry(ctx, nodeResourceGroup, ssName, instanceID, vm)
if retryErr != nil {
err = retryErr
glog.V(2).Infof("azureDisk - update(%s) abort backoff: vm(%s)", ss.resourceGroup, nodeName)
glog.V(2).Infof("azureDisk - update(%s) abort backoff: vm(%s)", nodeResourceGroup, nodeName)
}
}
if err != nil {
@ -137,7 +150,8 @@ func (ss *scaleSet) DetachDiskByName(diskName, diskURI string, nodeName types.No
} else {
glog.V(4).Info("azureDisk - azure detach succeeded")
// Invalidate the cache right after updating
ss.vmssVMCache.Delete(ss.makeVmssVMName(ssName, instanceID))
key := buildVmssCacheKey(nodeResourceGroup, ss.makeVmssVMName(ssName, instanceID))
ss.vmssVMCache.Delete(key)
}
return err

View File

@ -440,7 +440,7 @@ func (as *availabilitySet) GetIPByNodeName(name string) (string, string, error)
// getAgentPoolAvailabiliySets lists the virtual machines for the resource group and then builds
// a list of availability sets that match the nodes available to k8s.
func (as *availabilitySet) getAgentPoolAvailabiliySets(nodes []*v1.Node) (agentPoolAvailabilitySets *[]string, err error) {
vms, err := as.VirtualMachineClientListWithRetry()
vms, err := as.VirtualMachineClientListWithRetry(as.ResourceGroup)
if err != nil {
glog.Errorf("as.getNodeAvailabilitySet - VirtualMachineClientListWithRetry failed, err=%v", err)
return nil, err

View File

@ -40,8 +40,10 @@ var (
// ErrorNotVmssInstance indicates an instance is not belongint to any vmss.
ErrorNotVmssInstance = errors.New("not a vmss instance")
scaleSetNameRE = regexp.MustCompile(`.*/subscriptions/(?:.*)/Microsoft.Compute/virtualMachineScaleSets/(.+)/virtualMachines(?:.*)`)
vmssMachineIDTemplate = "/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Compute/virtualMachineScaleSets/%s/virtualMachines/%s"
scaleSetNameRE = regexp.MustCompile(`.*/subscriptions/(?:.*)/Microsoft.Compute/virtualMachineScaleSets/(.+)/virtualMachines(?:.*)`)
resourceGroupRE = regexp.MustCompile(`.*/subscriptions/(?:.*)/resourceGroups/(.+)/providers/Microsoft.Compute/virtualMachineScaleSets/(?:.*)/virtualMachines(?:.*)`)
vmssNicResourceGroupRE = regexp.MustCompile(`.*/subscriptions/(?:.*)/resourceGroups/(.+)/providers/Microsoft.Compute/virtualMachineScaleSets/(?:.*)/virtualMachines/(?:.*)/networkInterfaces/(?:.*)`)
vmssMachineIDTemplate = "/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Compute/virtualMachineScaleSets/%s/virtualMachines/%s"
)
// scaleSet implements VMSet interface for Azure scale set.
@ -106,8 +108,14 @@ func (ss *scaleSet) getVmssVM(nodeName string) (ssName, instanceID string, vm co
return "", "", vm, cloudprovider.InstanceNotFound
}
resourceGroup, err := ss.GetNodeResourceGroup(nodeName)
if err != nil {
return "", "", vm, err
}
glog.V(4).Infof("getVmssVM gets scaleSetName (%q) and instanceID (%q) for node %q", ssName, instanceID, nodeName)
cachedVM, err := ss.vmssVMCache.Get(ss.makeVmssVMName(ssName, instanceID))
key := buildVmssCacheKey(resourceGroup, ss.makeVmssVMName(ssName, instanceID))
cachedVM, err := ss.vmssVMCache.Get(key)
if err != nil {
return ssName, instanceID, vm, err
}
@ -122,9 +130,10 @@ func (ss *scaleSet) getVmssVM(nodeName string) (ssName, instanceID string, vm co
// getCachedVirtualMachineByInstanceID gets scaleSetVMInfo from cache.
// The node must belong to one of scale sets.
func (ss *scaleSet) getVmssVMByInstanceID(scaleSetName, instanceID string) (vm compute.VirtualMachineScaleSetVM, err error) {
func (ss *scaleSet) getVmssVMByInstanceID(resourceGroup, scaleSetName, instanceID string) (vm compute.VirtualMachineScaleSetVM, err error) {
vmName := ss.makeVmssVMName(scaleSetName, instanceID)
cachedVM, err := ss.vmssVMCache.Get(vmName)
key := buildVmssCacheKey(resourceGroup, vmName)
cachedVM, err := ss.vmssVMCache.Get(key)
if err != nil {
return vm, err
}
@ -168,13 +177,18 @@ func (ss *scaleSet) GetNodeNameByProviderID(providerID string) (types.NodeName,
return ss.availabilitySet.GetNodeNameByProviderID(providerID)
}
resourceGroup, err := extractResourceGroupByProviderID(providerID)
if err != nil {
return "", fmt.Errorf("error of extracting resource group for node %q", providerID)
}
instanceID, err := getLastSegment(providerID)
if err != nil {
glog.V(4).Infof("Can not extract instanceID from providerID (%s), assuming it is mananaged by availability set: %v", providerID, err)
return ss.availabilitySet.GetNodeNameByProviderID(providerID)
}
vm, err := ss.getVmssVMByInstanceID(scaleSetName, instanceID)
vm, err := ss.getVmssVMByInstanceID(resourceGroup, scaleSetName, instanceID)
if err != nil {
return "", err
}
@ -308,7 +322,7 @@ func getScaleSetVMInstanceID(machineName string) (string, error) {
return fmt.Sprintf("%d", instanceID), nil
}
// extractScaleSetNameByProviderID extracts the scaleset name by node's ProviderID.
// extractScaleSetNameByProviderID extracts the scaleset name by vmss node's ProviderID.
func extractScaleSetNameByProviderID(providerID string) (string, error) {
matches := scaleSetNameRE.FindStringSubmatch(providerID)
if len(matches) != 2 {
@ -318,13 +332,23 @@ func extractScaleSetNameByProviderID(providerID string) (string, error) {
return matches[1], nil
}
// extractResourceGroupByProviderID extracts the resource group name by vmss node's ProviderID.
func extractResourceGroupByProviderID(providerID string) (string, error) {
matches := resourceGroupRE.FindStringSubmatch(providerID)
if len(matches) != 2 {
return "", ErrorNotVmssInstance
}
return matches[1], nil
}
// listScaleSets lists all scale sets.
func (ss *scaleSet) listScaleSets() ([]string, error) {
func (ss *scaleSet) listScaleSets(resourceGroup string) ([]string, error) {
var err error
ctx, cancel := getContextWithCancel()
defer cancel()
allScaleSets, err := ss.VirtualMachineScaleSetsClient.List(ctx, ss.ResourceGroup)
allScaleSets, err := ss.VirtualMachineScaleSetsClient.List(ctx, resourceGroup)
if err != nil {
glog.Errorf("VirtualMachineScaleSetsClient.List failed: %v", err)
return nil, err
@ -339,12 +363,12 @@ func (ss *scaleSet) listScaleSets() ([]string, error) {
}
// listScaleSetVMs lists VMs belonging to the specified scale set.
func (ss *scaleSet) listScaleSetVMs(scaleSetName string) ([]compute.VirtualMachineScaleSetVM, error) {
func (ss *scaleSet) listScaleSetVMs(scaleSetName, resourceGroup string) ([]compute.VirtualMachineScaleSetVM, error) {
var err error
ctx, cancel := getContextWithCancel()
defer cancel()
allVMs, err := ss.VirtualMachineScaleSetVMsClient.List(ctx, ss.ResourceGroup, scaleSetName, "", "", string(compute.InstanceView))
allVMs, err := ss.VirtualMachineScaleSetVMsClient.List(ctx, resourceGroup, scaleSetName, "", "", string(compute.InstanceView))
if err != nil {
glog.Errorf("VirtualMachineScaleSetVMsClient.List failed: %v", err)
return nil, err
@ -362,6 +386,10 @@ func (ss *scaleSet) getAgentPoolScaleSets(nodes []*v1.Node) (*[]string, error) {
continue
}
if ss.ShouldNodeExcludedFromLoadBalancer(nodes[nx]) {
continue
}
nodeName := nodes[nx].Name
ssName, err := ss.getScaleSetNameByNodeName(nodeName)
if err != nil {
@ -429,6 +457,16 @@ func (ss *scaleSet) GetVMSetNames(service *v1.Service, nodes []*v1.Node) (vmSetN
return vmSetNames, nil
}
// extractResourceGroupByVMSSNicID extracts the resource group name by vmss nicID.
func extractResourceGroupByVMSSNicID(nicID string) (string, error) {
matches := vmssNicResourceGroupRE.FindStringSubmatch(nicID)
if len(matches) != 2 {
return "", fmt.Errorf("error of extracting resourceGroup from nicID %q", nicID)
}
return matches[1], nil
}
// GetPrimaryInterface gets machine primary network interface by node name and vmSet.
func (ss *scaleSet) GetPrimaryInterface(nodeName string) (network.Interface, error) {
managedByAS, err := ss.isNodeManagedByAvailabilitySet(nodeName)
@ -443,6 +481,11 @@ func (ss *scaleSet) GetPrimaryInterface(nodeName string) (network.Interface, err
ssName, instanceID, vm, err := ss.getVmssVM(nodeName)
if err != nil {
// VM is availability set, but not cached yet in availabilitySetNodesCache.
if err == ErrorNotVmssInstance {
return ss.availabilitySet.GetPrimaryInterface(nodeName)
}
glog.Errorf("error: ss.GetPrimaryInterface(%s), ss.getVmssVM(%s), err=%v", nodeName, nodeName, err)
return network.Interface{}, err
}
@ -458,12 +501,16 @@ func (ss *scaleSet) GetPrimaryInterface(nodeName string) (network.Interface, err
glog.Errorf("error: ss.GetPrimaryInterface(%s), getLastSegment(%s), err=%v", nodeName, primaryInterfaceID, err)
return network.Interface{}, err
}
resourceGroup, err := extractResourceGroupByVMSSNicID(primaryInterfaceID)
if err != nil {
return network.Interface{}, err
}
ctx, cancel := getContextWithCancel()
defer cancel()
nic, err := ss.InterfacesClient.GetVirtualMachineScaleSetNetworkInterface(ctx, ss.ResourceGroup, ssName, instanceID, nicName, "")
nic, err := ss.InterfacesClient.GetVirtualMachineScaleSetNetworkInterface(ctx, resourceGroup, ssName, instanceID, nicName, "")
if err != nil {
glog.Errorf("error: ss.GetPrimaryInterface(%s), ss.GetVirtualMachineScaleSetNetworkInterface.Get(%s, %s, %s), err=%v", nodeName, ss.ResourceGroup, ssName, nicName, err)
glog.Errorf("error: ss.GetPrimaryInterface(%s), ss.GetVirtualMachineScaleSetNetworkInterface.Get(%s, %s, %s), err=%v", nodeName, resourceGroup, ssName, nicName, err)
return network.Interface{}, err
}
@ -566,6 +613,11 @@ func (ss *scaleSet) getNodesScaleSets(nodes []*v1.Node) (map[string]sets.String,
continue
}
if ss.ShouldNodeExcludedFromLoadBalancer(curNode) {
glog.V(4).Infof("Excluding unmanaged/external-resource-group node %q", curNode.Name)
continue
}
curScaleSetName, err := extractScaleSetNameByProviderID(curNode.Spec.ProviderID)
if err != nil {
glog.V(4).Infof("Node %q is not belonging to any scale sets, assuming it is belong to availability sets", curNode.Name)
@ -884,11 +936,11 @@ func (ss *scaleSet) EnsureBackendPoolDeleted(poolID, vmSetName string, backendAd
}
// getVmssMachineID returns the full identifier of a vmss virtual machine.
func (az *Cloud) getVmssMachineID(scaleSetName, instanceID string) string {
func (az *Cloud) getVmssMachineID(resourceGroup, scaleSetName, instanceID string) string {
return fmt.Sprintf(
vmssMachineIDTemplate,
az.SubscriptionID,
az.ResourceGroup,
resourceGroup,
scaleSetName,
instanceID)
}

View File

@ -27,15 +27,16 @@ import (
)
var (
vmssNameSeparator = "_"
vmssNameSeparator = "_"
vmssCacheSeparator = "#"
nodeNameToScaleSetMappingKey = "k8sNodeNameToScaleSetMappingKey"
availabilitySetNodesKey = "k8sAvailabilitySetNodesKey"
vmssCacheTTL = time.Minute
vmssVMCacheTTL = time.Minute
availabilitySetNodesCacheTTL = 15 * time.Minute
nodeNameToScaleSetMappingCacheTTL = 15 * time.Minute
availabilitySetNodesCacheTTL = 5 * time.Minute
nodeNameToScaleSetMappingCacheTTL = 5 * time.Minute
)
// nodeNameToScaleSetMapping maps nodeName to scaleSet name.
@ -49,19 +50,19 @@ func (ss *scaleSet) makeVmssVMName(scaleSetName, instanceID string) string {
func extractVmssVMName(name string) (string, string, error) {
split := strings.SplitAfter(name, vmssNameSeparator)
if len(split) < 2 {
glog.Errorf("Failed to extract vmssVMName %q", name)
glog.V(3).Infof("Failed to extract vmssVMName %q", name)
return "", "", ErrorNotVmssInstance
}
ssName := strings.Join(split[0:len(split)-1], "")
// removing the trailing `vmssNameSeparator` since we used SplitAfter
ssName = ssName[:len(ssName)-1]
instanceID := split[len(split)-1]
return ssName, instanceID, nil
}
// vmssCache only holds vmss from ss.ResourceGroup because nodes from other resourceGroups
// will be excluded from LB backends.
func (ss *scaleSet) newVmssCache() (*timedCache, error) {
getter := func(key string) (interface{}, error) {
ctx, cancel := getContextWithCancel()
@ -85,26 +86,34 @@ func (ss *scaleSet) newVmssCache() (*timedCache, error) {
func (ss *scaleSet) newNodeNameToScaleSetMappingCache() (*timedCache, error) {
getter := func(key string) (interface{}, error) {
scaleSetNames, err := ss.listScaleSets()
localCache := make(nodeNameToScaleSetMapping)
allResourceGroups, err := ss.GetResourceGroups()
if err != nil {
return nil, err
}
localCache := make(nodeNameToScaleSetMapping)
for _, ssName := range scaleSetNames {
vms, err := ss.listScaleSetVMs(ssName)
for _, resourceGroup := range allResourceGroups.List() {
scaleSetNames, err := ss.listScaleSets(resourceGroup)
if err != nil {
return nil, err
}
for _, vm := range vms {
if vm.OsProfile == nil || vm.OsProfile.ComputerName == nil {
glog.Warningf("failed to get computerName for vmssVM (%q)", ssName)
continue
for _, ssName := range scaleSetNames {
vms, err := ss.listScaleSetVMs(ssName, resourceGroup)
if err != nil {
return nil, err
}
computerName := strings.ToLower(*vm.OsProfile.ComputerName)
localCache[computerName] = ssName
for _, vm := range vms {
if vm.OsProfile == nil || vm.OsProfile.ComputerName == nil {
glog.Warningf("failed to get computerName for vmssVM (%q)", ssName)
continue
}
computerName := strings.ToLower(*vm.OsProfile.ComputerName)
localCache[computerName] = ssName
}
}
}
@ -116,14 +125,23 @@ func (ss *scaleSet) newNodeNameToScaleSetMappingCache() (*timedCache, error) {
func (ss *scaleSet) newAvailabilitySetNodesCache() (*timedCache, error) {
getter := func(key string) (interface{}, error) {
vmList, err := ss.Cloud.VirtualMachineClientListWithRetry()
localCache := sets.NewString()
resourceGroups, err := ss.GetResourceGroups()
if err != nil {
return nil, err
}
localCache := sets.NewString()
for _, vm := range vmList {
localCache.Insert(*vm.Name)
for _, resourceGroup := range resourceGroups.List() {
vmList, err := ss.Cloud.VirtualMachineClientListWithRetry(resourceGroup)
if err != nil {
return nil, err
}
for _, vm := range vmList {
if vm.Name != nil {
localCache.Insert(*vm.Name)
}
}
}
return localCache, nil
@ -132,10 +150,33 @@ func (ss *scaleSet) newAvailabilitySetNodesCache() (*timedCache, error) {
return newTimedcache(availabilitySetNodesCacheTTL, getter)
}
func buildVmssCacheKey(resourceGroup, name string) string {
// key is composed of <resourceGroup>#<vmName>
return fmt.Sprintf("%s%s%s", resourceGroup, vmssCacheSeparator, name)
}
func extractVmssCacheKey(key string) (string, string, error) {
// key is composed of <resourceGroup>#<vmName>
keyItems := strings.Split(key, vmssCacheSeparator)
if len(keyItems) != 2 {
return "", "", fmt.Errorf("key %q is not in format '<resouceGroup>#<vmName>'", key)
}
resourceGroup := keyItems[0]
vmName := keyItems[1]
return resourceGroup, vmName, nil
}
func (ss *scaleSet) newVmssVMCache() (*timedCache, error) {
getter := func(key string) (interface{}, error) {
// vmssVM name's format is 'scaleSetName_instanceID'
ssName, instanceID, err := extractVmssVMName(key)
// key is composed of <resourceGroup>#<vmName>
resourceGroup, vmName, err := extractVmssCacheKey(key)
if err != nil {
return nil, err
}
// vmName's format is 'scaleSetName_instanceID'
ssName, instanceID, err := extractVmssVMName(vmName)
if err != nil {
return nil, err
}
@ -147,7 +188,7 @@ func (ss *scaleSet) newVmssVMCache() (*timedCache, error) {
ctx, cancel := getContextWithCancel()
defer cancel()
result, err := ss.VirtualMachineScaleSetVMsClient.Get(ctx, ss.ResourceGroup, ssName, instanceID)
result, err := ss.VirtualMachineScaleSetVMsClient.Get(ctx, resourceGroup, ssName, instanceID)
exists, message, realErr := checkResourceExistsFromError(err)
if realErr != nil {
return nil, realErr