k3s/vendor/k8s.io/legacy-cloud-providers/azure/azure_controller_common.go

458 lines
16 KiB
Go

// +build !providerless
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package azure
import (
"context"
"fmt"
"net/http"
"path"
"regexp"
"strings"
"sync"
"time"
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-12-01/compute"
"k8s.io/apimachinery/pkg/types"
kwait "k8s.io/apimachinery/pkg/util/wait"
cloudprovider "k8s.io/cloud-provider"
volerr "k8s.io/cloud-provider/volume/errors"
"k8s.io/klog/v2"
azcache "k8s.io/legacy-cloud-providers/azure/cache"
"k8s.io/legacy-cloud-providers/azure/retry"
)
const (
// for limits check https://docs.microsoft.com/en-us/azure/azure-subscription-service-limits#storage-limits
maxStorageAccounts = 100 // max # is 200 (250 with special request). this allows 100 for everything else including stand alone disks
maxDisksPerStorageAccounts = 60
storageAccountUtilizationBeforeGrowing = 0.5
// Disk Caching is not supported for disks 4 TiB and larger
// https://docs.microsoft.com/en-us/azure/virtual-machines/premium-storage-performance#disk-caching
diskCachingLimit = 4096 // GiB
maxLUN = 64 // max number of LUNs per VM
errLeaseFailed = "AcquireDiskLeaseFailed"
errLeaseIDMissing = "LeaseIdMissing"
errContainerNotFound = "ContainerNotFound"
errStatusCode400 = "statuscode=400"
errInvalidParameter = `code="invalidparameter"`
errTargetInstanceIds = `target="instanceids"`
sourceSnapshot = "snapshot"
sourceVolume = "volume"
// WriteAcceleratorEnabled support for Azure Write Accelerator on Azure Disks
// https://docs.microsoft.com/azure/virtual-machines/windows/how-to-enable-write-accelerator
WriteAcceleratorEnabled = "writeacceleratorenabled"
// see https://docs.microsoft.com/en-us/rest/api/compute/disks/createorupdate#create-a-managed-disk-by-copying-a-snapshot.
diskSnapshotPath = "/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Compute/snapshots/%s"
// see https://docs.microsoft.com/en-us/rest/api/compute/disks/createorupdate#create-a-managed-disk-from-an-existing-managed-disk-in-the-same-or-different-subscription.
managedDiskPath = "/subscriptions/%s/resourceGroups/%s/providers/Microsoft.Compute/disks/%s"
)
var defaultBackOff = kwait.Backoff{
Steps: 20,
Duration: 2 * time.Second,
Factor: 1.5,
Jitter: 0.0,
}
var (
managedDiskPathRE = regexp.MustCompile(`.*/subscriptions/(?:.*)/resourceGroups/(?:.*)/providers/Microsoft.Compute/disks/(.+)`)
diskSnapshotPathRE = regexp.MustCompile(`.*/subscriptions/(?:.*)/resourceGroups/(?:.*)/providers/Microsoft.Compute/snapshots/(.+)`)
)
type controllerCommon struct {
subscriptionID string
location string
storageEndpointSuffix string
resourceGroup string
// store disk URI when disk is in attaching or detaching process
diskAttachDetachMap sync.Map
// vm disk map used to lock per vm update calls
vmLockMap *lockMap
cloud *Cloud
}
// getNodeVMSet gets the VMSet interface based on config.VMType and the real virtual machine type.
func (c *controllerCommon) getNodeVMSet(nodeName types.NodeName, crt azcache.AzureCacheReadType) (VMSet, error) {
// 1. vmType is standard, return cloud.VMSet directly.
if c.cloud.VMType == vmTypeStandard {
return c.cloud.VMSet, nil
}
// 2. vmType is Virtual Machine Scale Set (vmss), convert vmSet to scaleSet.
ss, ok := c.cloud.VMSet.(*scaleSet)
if !ok {
return nil, fmt.Errorf("error of converting vmSet (%q) to scaleSet with vmType %q", c.cloud.VMSet, c.cloud.VMType)
}
// 3. If the node is managed by availability set, then return ss.availabilitySet.
managedByAS, err := ss.isNodeManagedByAvailabilitySet(mapNodeNameToVMName(nodeName), crt)
if err != nil {
return nil, err
}
if managedByAS {
// vm is managed by availability set.
return ss.availabilitySet, nil
}
// 4. Node is managed by vmss
return ss, nil
}
// AttachDisk attaches a vhd to vm. The vhd must exist, can be identified by diskName, diskURI.
// return (lun, error)
func (c *controllerCommon) AttachDisk(isManagedDisk bool, diskName, diskURI string, nodeName types.NodeName, cachingMode compute.CachingTypes) (int32, error) {
diskEncryptionSetID := ""
writeAcceleratorEnabled := false
vmset, err := c.getNodeVMSet(nodeName, azcache.CacheReadTypeUnsafe)
if err != nil {
return -1, err
}
if isManagedDisk {
diskName := path.Base(diskURI)
resourceGroup, err := getResourceGroupFromDiskURI(diskURI)
if err != nil {
return -1, err
}
ctx, cancel := getContextWithCancel()
defer cancel()
disk, rerr := c.cloud.DisksClient.Get(ctx, resourceGroup, diskName)
if rerr != nil {
return -1, rerr.Error()
}
if disk.ManagedBy != nil && (disk.MaxShares == nil || *disk.MaxShares <= 1) {
attachErr := fmt.Sprintf(
"disk(%s) already attached to node(%s), could not be attached to node(%s)",
diskURI, *disk.ManagedBy, nodeName)
attachedNode, err := vmset.GetNodeNameByProviderID(*disk.ManagedBy)
if err != nil {
return -1, err
}
klog.V(2).Infof("found dangling volume %s attached to node %s", diskURI, attachedNode)
danglingErr := volerr.NewDanglingError(attachErr, attachedNode, "")
return -1, danglingErr
}
if disk.DiskProperties != nil {
if disk.DiskProperties.DiskSizeGB != nil && *disk.DiskProperties.DiskSizeGB >= diskCachingLimit && cachingMode != compute.CachingTypesNone {
// Disk Caching is not supported for disks 4 TiB and larger
// https://docs.microsoft.com/en-us/azure/virtual-machines/premium-storage-performance#disk-caching
cachingMode = compute.CachingTypesNone
klog.Warningf("size of disk(%s) is %dGB which is bigger than limit(%dGB), set cacheMode as None",
diskURI, *disk.DiskProperties.DiskSizeGB, diskCachingLimit)
}
if disk.DiskProperties.Encryption != nil &&
disk.DiskProperties.Encryption.DiskEncryptionSetID != nil {
diskEncryptionSetID = *disk.DiskProperties.Encryption.DiskEncryptionSetID
}
}
if v, ok := disk.Tags[WriteAcceleratorEnabled]; ok {
if v != nil && strings.EqualFold(*v, "true") {
writeAcceleratorEnabled = true
}
}
}
instanceid, err := c.cloud.InstanceID(context.TODO(), nodeName)
if err != nil {
klog.Warningf("failed to get azure instance id (%v) for node %s", err, nodeName)
return -1, fmt.Errorf("failed to get azure instance id for node %q (%v)", nodeName, err)
}
c.vmLockMap.LockEntry(strings.ToLower(string(nodeName)))
defer c.vmLockMap.UnlockEntry(strings.ToLower(string(nodeName)))
lun, err := c.GetNextDiskLun(nodeName)
if err != nil {
klog.Warningf("no LUN available for instance %q (%v)", nodeName, err)
return -1, fmt.Errorf("all LUNs are used, cannot attach volume (%s, %s) to instance %q (%v)", diskName, diskURI, instanceid, err)
}
klog.V(2).Infof("Trying to attach volume %q lun %d to node %q.", diskURI, lun, nodeName)
c.diskAttachDetachMap.Store(strings.ToLower(diskURI), "attaching")
defer c.diskAttachDetachMap.Delete(strings.ToLower(diskURI))
return lun, vmset.AttachDisk(isManagedDisk, diskName, diskURI, nodeName, lun, cachingMode, diskEncryptionSetID, writeAcceleratorEnabled)
}
// DetachDisk detaches a disk from host. The vhd can be identified by diskName or diskURI.
func (c *controllerCommon) DetachDisk(diskName, diskURI string, nodeName types.NodeName) error {
_, err := c.cloud.InstanceID(context.TODO(), nodeName)
if err != nil {
if err == cloudprovider.InstanceNotFound {
// if host doesn't exist, no need to detach
klog.Warningf("azureDisk - failed to get azure instance id(%q), DetachDisk(%s) will assume disk is already detached",
nodeName, diskURI)
return nil
}
klog.Warningf("failed to get azure instance id (%v)", err)
return fmt.Errorf("failed to get azure instance id for node %q (%v)", nodeName, err)
}
vmset, err := c.getNodeVMSet(nodeName, azcache.CacheReadTypeUnsafe)
if err != nil {
return err
}
klog.V(2).Infof("detach %v from node %q", diskURI, nodeName)
// make the lock here as small as possible
c.vmLockMap.LockEntry(strings.ToLower(string(nodeName)))
c.diskAttachDetachMap.Store(strings.ToLower(diskURI), "detaching")
err = vmset.DetachDisk(diskName, diskURI, nodeName)
c.diskAttachDetachMap.Delete(strings.ToLower(diskURI))
c.vmLockMap.UnlockEntry(strings.ToLower(string(nodeName)))
if err != nil {
if isInstanceNotFoundError(err) {
// if host doesn't exist, no need to detach
klog.Warningf("azureDisk - got InstanceNotFoundError(%v), DetachDisk(%s) will assume disk is already detached",
err, diskURI)
return nil
}
if retry.IsErrorRetriable(err) && c.cloud.CloudProviderBackoff {
klog.Warningf("azureDisk - update backing off: detach disk(%s, %s), err: %v", diskName, diskURI, err)
retryErr := kwait.ExponentialBackoff(c.cloud.RequestBackoff(), func() (bool, error) {
c.vmLockMap.LockEntry(strings.ToLower(string(nodeName)))
c.diskAttachDetachMap.Store(strings.ToLower(diskURI), "detaching")
err := vmset.DetachDisk(diskName, diskURI, nodeName)
c.diskAttachDetachMap.Delete(strings.ToLower(diskURI))
c.vmLockMap.UnlockEntry(strings.ToLower(string(nodeName)))
retriable := false
if err != nil && retry.IsErrorRetriable(err) {
retriable = true
}
return !retriable, err
})
if retryErr != nil {
err = retryErr
klog.V(2).Infof("azureDisk - update abort backoff: detach disk(%s, %s), err: %v", diskName, diskURI, err)
}
}
}
if err != nil {
klog.Errorf("azureDisk - detach disk(%s, %s) failed, err: %v", diskName, diskURI, err)
return err
}
klog.V(2).Infof("azureDisk - detach disk(%s, %s) succeeded", diskName, diskURI)
return nil
}
// getNodeDataDisks invokes vmSet interfaces to get data disks for the node.
func (c *controllerCommon) getNodeDataDisks(nodeName types.NodeName, crt azcache.AzureCacheReadType) ([]compute.DataDisk, error) {
vmset, err := c.getNodeVMSet(nodeName, crt)
if err != nil {
return nil, err
}
return vmset.GetDataDisks(nodeName, crt)
}
// GetDiskLun finds the lun on the host that the vhd is attached to, given a vhd's diskName and diskURI.
func (c *controllerCommon) GetDiskLun(diskName, diskURI string, nodeName types.NodeName) (int32, error) {
// getNodeDataDisks need to fetch the cached data/fresh data if cache expired here
// to ensure we get LUN based on latest entry.
disks, err := c.getNodeDataDisks(nodeName, azcache.CacheReadTypeDefault)
if err != nil {
klog.Errorf("error of getting data disks for node %q: %v", nodeName, err)
return -1, err
}
for _, disk := range disks {
if disk.Lun != nil && (disk.Name != nil && diskName != "" && strings.EqualFold(*disk.Name, diskName)) ||
(disk.Vhd != nil && disk.Vhd.URI != nil && diskURI != "" && strings.EqualFold(*disk.Vhd.URI, diskURI)) ||
(disk.ManagedDisk != nil && strings.EqualFold(*disk.ManagedDisk.ID, diskURI)) {
if disk.ToBeDetached != nil && *disk.ToBeDetached {
klog.Warningf("azureDisk - find disk(ToBeDetached): lun %d name %q uri %q", *disk.Lun, diskName, diskURI)
} else {
// found the disk
klog.V(2).Infof("azureDisk - find disk: lun %d name %q uri %q", *disk.Lun, diskName, diskURI)
return *disk.Lun, nil
}
}
}
return -1, fmt.Errorf("cannot find Lun for disk %s", diskName)
}
// GetNextDiskLun searches all vhd attachment on the host and find unused lun. Return -1 if all luns are used.
func (c *controllerCommon) GetNextDiskLun(nodeName types.NodeName) (int32, error) {
disks, err := c.getNodeDataDisks(nodeName, azcache.CacheReadTypeDefault)
if err != nil {
klog.Errorf("error of getting data disks for node %q: %v", nodeName, err)
return -1, err
}
used := make([]bool, maxLUN)
for _, disk := range disks {
if disk.Lun != nil {
used[*disk.Lun] = true
}
}
for k, v := range used {
if !v {
return int32(k), nil
}
}
return -1, fmt.Errorf("all luns are used")
}
// DisksAreAttached checks if a list of volumes are attached to the node with the specified NodeName.
func (c *controllerCommon) DisksAreAttached(diskNames []string, nodeName types.NodeName) (map[string]bool, error) {
attached := make(map[string]bool)
for _, diskName := range diskNames {
attached[diskName] = false
}
// doing stalled read for getNodeDataDisks to ensure we don't call ARM
// for every reconcile call. The cache is invalidated after Attach/Detach
// disk. So the new entry will be fetched and cached the first time reconcile
// loop runs after the Attach/Disk OP which will reflect the latest model.
disks, err := c.getNodeDataDisks(nodeName, azcache.CacheReadTypeUnsafe)
if err != nil {
if err == cloudprovider.InstanceNotFound {
// if host doesn't exist, no need to detach
klog.Warningf("azureDisk - Cannot find node %q, DisksAreAttached will assume disks %v are not attached to it.",
nodeName, diskNames)
return attached, nil
}
return attached, err
}
for _, disk := range disks {
for _, diskName := range diskNames {
if disk.Name != nil && diskName != "" && strings.EqualFold(*disk.Name, diskName) {
attached[diskName] = true
}
}
}
return attached, nil
}
func filterDetachingDisks(unfilteredDisks []compute.DataDisk) []compute.DataDisk {
filteredDisks := []compute.DataDisk{}
for _, disk := range unfilteredDisks {
if disk.ToBeDetached != nil && *disk.ToBeDetached {
if disk.Name != nil {
klog.V(2).Infof("Filtering disk: %s with ToBeDetached flag set.", *disk.Name)
}
} else {
filteredDisks = append(filteredDisks, disk)
}
}
return filteredDisks
}
func (c *controllerCommon) filterNonExistingDisks(ctx context.Context, unfilteredDisks []compute.DataDisk) []compute.DataDisk {
filteredDisks := []compute.DataDisk{}
for _, disk := range unfilteredDisks {
filter := false
if disk.ManagedDisk != nil && disk.ManagedDisk.ID != nil {
diskURI := *disk.ManagedDisk.ID
exist, err := c.cloud.checkDiskExists(ctx, diskURI)
if err != nil {
klog.Errorf("checkDiskExists(%s) failed with error: %v", diskURI, err)
} else {
// only filter disk when checkDiskExists returns <false, nil>
filter = !exist
if filter {
klog.Errorf("disk(%s) does not exist, removed from data disk list", diskURI)
}
}
}
if !filter {
filteredDisks = append(filteredDisks, disk)
}
}
return filteredDisks
}
func (c *controllerCommon) checkDiskExists(ctx context.Context, diskURI string) (bool, error) {
diskName := path.Base(diskURI)
resourceGroup, err := getResourceGroupFromDiskURI(diskURI)
if err != nil {
return false, err
}
if _, rerr := c.cloud.DisksClient.Get(ctx, resourceGroup, diskName); rerr != nil {
if rerr.HTTPStatusCode == http.StatusNotFound {
return false, nil
}
return false, rerr.Error()
}
return true, nil
}
func getValidCreationData(subscriptionID, resourceGroup, sourceResourceID, sourceType string) (compute.CreationData, error) {
if sourceResourceID == "" {
return compute.CreationData{
CreateOption: compute.Empty,
}, nil
}
switch sourceType {
case sourceSnapshot:
if match := diskSnapshotPathRE.FindString(sourceResourceID); match == "" {
sourceResourceID = fmt.Sprintf(diskSnapshotPath, subscriptionID, resourceGroup, sourceResourceID)
}
case sourceVolume:
if match := managedDiskPathRE.FindString(sourceResourceID); match == "" {
sourceResourceID = fmt.Sprintf(managedDiskPath, subscriptionID, resourceGroup, sourceResourceID)
}
default:
return compute.CreationData{
CreateOption: compute.Empty,
}, nil
}
splits := strings.Split(sourceResourceID, "/")
if len(splits) > 9 {
if sourceType == sourceSnapshot {
return compute.CreationData{}, fmt.Errorf("sourceResourceID(%s) is invalid, correct format: %s", sourceResourceID, diskSnapshotPathRE)
}
return compute.CreationData{}, fmt.Errorf("sourceResourceID(%s) is invalid, correct format: %s", sourceResourceID, managedDiskPathRE)
}
return compute.CreationData{
CreateOption: compute.Copy,
SourceResourceID: &sourceResourceID,
}, nil
}
func isInstanceNotFoundError(err error) bool {
errMsg := strings.ToLower(err.Error())
return strings.Contains(errMsg, errStatusCode400) && strings.Contains(errMsg, errInvalidParameter) && strings.Contains(errMsg, errTargetInstanceIds)
}