Merge pull request #67984 from feiskyer/on-prem

Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions here: https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md.

Add support for unmanaged nodes for Azure cloud provider

**What this PR does / why we need it**:

Continue of [Azure cross resource groups feature](https://github.com/kubernetes/features/issues/604).

This PR adds support for unmanaged nodes (such as on-prem or on other clouds) that are labeled with `alpha.service-controller.kubernetes.io/exclude-balancer=true` and `kubernetes.azure.com/managed=false`. Azure cloud provider would exclude such nodes from LoadBalancer backends and always assumes they are existing.

**Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*:

See KEP [here](https://github.com/kubernetes/community/blob/master/keps/sig-azure/0025-20180809-cross-resource-group-nodes.md).

**Special notes for your reviewer**:

Azure cloud provider won't provision network routes for on-prem nodes, so cluster admins should ensure the network (including pod-to-pod, pod-to-node and node-to-node connectivity) has been set up properly.

**Release note**:

```release-note
Azure cloud provider now supports unmanaged nodes (such as on-prem) that are labeled with `kubernetes.azure.com/managed=false` and `alpha.service-controller.kubernetes.io/exclude-balancer=true`
```

/assign @khenidak @andyzhangx
/sig azure
/kind feature
/milestone v1.12
pull/8/head
Kubernetes Submit Queue 2018-08-29 21:41:54 -07:00 committed by GitHub
commit 8aea674681
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 250 additions and 3 deletions

View File

@ -159,7 +159,7 @@ type Cloud struct {
metadata *InstanceMetadata
vmSet VMSet
// Lock for access to node caches
// Lock for access to node caches, includes nodeZones, nodeResourceGroups, and unmanagedNodes.
nodeCachesLock sync.Mutex
// nodeZones is a mapping from Zone to a sets.String of Node's names in the Zone
// it is updated by the nodeInformer
@ -171,6 +171,11 @@ type Cloud struct {
// nodeInformerSynced is for determining if the informer has synced.
nodeInformerSynced cache.InformerSynced
// routeCIDRsLock holds lock for routeCIDRs cache.
routeCIDRsLock sync.Mutex
// routeCIDRs holds cache for route CIDRs.
routeCIDRs map[string]string
// Clients for vmss.
VirtualMachineScaleSetsClient VirtualMachineScaleSetsClient
VirtualMachineScaleSetVMsClient VirtualMachineScaleSetVMsClient
@ -270,6 +275,7 @@ func NewCloud(configReader io.Reader) (cloudprovider.Interface, error) {
nodeZones: map[string]sets.String{},
nodeResourceGroups: map[string]string{},
unmanagedNodes: sets.NewString(),
routeCIDRs: map[string]string{},
DisksClient: newAzDisksClient(azClientConfig),
RoutesClient: newAzRoutesClient(azClientConfig),

View File

@ -30,6 +30,16 @@ import (
// NodeAddresses returns the addresses of the specified instance.
func (az *Cloud) NodeAddresses(ctx context.Context, name types.NodeName) ([]v1.NodeAddress, error) {
// Returns nil for unmanaged nodes because azure cloud provider couldn't fetch information for them.
unmanaged, err := az.IsNodeUnmanaged(string(name))
if err != nil {
return nil, err
}
if unmanaged {
glog.V(4).Infof("NodeAddresses: omitting unmanaged node %q", name)
return nil, nil
}
addressGetter := func(nodeName types.NodeName) ([]v1.NodeAddress, error) {
ip, publicIP, err := az.GetIPForMachineWithRetry(nodeName)
if err != nil {
@ -92,6 +102,12 @@ func (az *Cloud) NodeAddresses(ctx context.Context, name types.NodeName) ([]v1.N
// This method will not be called from the node that is requesting this ID. i.e. metadata service
// and other local methods cannot be used here
func (az *Cloud) NodeAddressesByProviderID(ctx context.Context, providerID string) ([]v1.NodeAddress, error) {
// Returns nil for unmanaged nodes because azure cloud provider couldn't fetch information for them.
if az.IsNodeUnmanagedByProviderID(providerID) {
glog.V(4).Infof("NodeAddressesByProviderID: omitting unmanaged node %q", providerID)
return nil, nil
}
name, err := az.vmSet.GetNodeNameByProviderID(providerID)
if err != nil {
return nil, err
@ -103,6 +119,12 @@ func (az *Cloud) NodeAddressesByProviderID(ctx context.Context, providerID strin
// InstanceExistsByProviderID returns true if the instance with the given provider id still exists and is running.
// If false is returned with no error, the instance will be immediately deleted by the cloud controller manager.
func (az *Cloud) InstanceExistsByProviderID(ctx context.Context, providerID string) (bool, error) {
// Returns true for unmanaged nodes because azure cloud provider always assumes them exists.
if az.IsNodeUnmanagedByProviderID(providerID) {
glog.V(4).Infof("InstanceExistsByProviderID: assuming unmanaged node %q exists", providerID)
return true, nil
}
name, err := az.vmSet.GetNodeNameByProviderID(providerID)
if err != nil {
return false, err
@ -154,6 +176,15 @@ func (az *Cloud) isCurrentInstance(name types.NodeName, metadataVMName string) (
// Note that if the instance does not exist or is no longer running, we must return ("", cloudprovider.InstanceNotFound)
func (az *Cloud) InstanceID(ctx context.Context, name types.NodeName) (string, error) {
nodeName := mapNodeNameToVMName(name)
unmanaged, err := az.IsNodeUnmanaged(nodeName)
if err != nil {
return "", err
}
if unmanaged {
// InstanceID is same with nodeName for unmanaged nodes.
glog.V(4).Infof("InstanceID: getting ID %q for unmanaged node %q", name, name)
return nodeName, nil
}
if az.UseInstanceMetadata {
computeMetadata, err := az.getComputeMetadata()
@ -202,6 +233,12 @@ func (az *Cloud) InstanceID(ctx context.Context, name types.NodeName) (string, e
// This method will not be called from the node that is requesting this ID. i.e. metadata service
// and other local methods cannot be used here
func (az *Cloud) InstanceTypeByProviderID(ctx context.Context, providerID string) (string, error) {
// Returns "" for unmanaged nodes because azure cloud provider couldn't fetch information for them.
if az.IsNodeUnmanagedByProviderID(providerID) {
glog.V(4).Infof("InstanceTypeByProviderID: omitting unmanaged node %q", providerID)
return "", nil
}
name, err := az.vmSet.GetNodeNameByProviderID(providerID)
if err != nil {
return "", err
@ -215,6 +252,16 @@ func (az *Cloud) InstanceTypeByProviderID(ctx context.Context, providerID string
// (Implementer Note): This is used by kubelet. Kubelet will label the node. Real log from kubelet:
// Adding node label from cloud provider: beta.kubernetes.io/instance-type=[value]
func (az *Cloud) InstanceType(ctx context.Context, name types.NodeName) (string, error) {
// Returns "" for unmanaged nodes because azure cloud provider couldn't fetch information for them.
unmanaged, err := az.IsNodeUnmanaged(string(name))
if err != nil {
return "", err
}
if unmanaged {
glog.V(4).Infof("InstanceType: omitting unmanaged node %q", name)
return "", nil
}
if az.UseInstanceMetadata {
computeMetadata, err := az.getComputeMetadata()
if err != nil {

View File

@ -32,7 +32,29 @@ import (
func (az *Cloud) ListRoutes(ctx context.Context, clusterName string) ([]*cloudprovider.Route, error) {
glog.V(10).Infof("ListRoutes: START clusterName=%q", clusterName)
routeTable, existsRouteTable, err := az.getRouteTable()
return processRoutes(routeTable, existsRouteTable, err)
routes, err := processRoutes(routeTable, existsRouteTable, err)
if err != nil {
return nil, err
}
// Compose routes for unmanaged routes so that node controller won't retry creating routes for them.
unmanagedNodes, err := az.GetUnmanagedNodes()
if err != nil {
return nil, err
}
az.routeCIDRsLock.Lock()
defer az.routeCIDRsLock.Unlock()
for _, nodeName := range unmanagedNodes.List() {
if cidr, ok := az.routeCIDRs[nodeName]; ok {
routes = append(routes, &cloudprovider.Route{
Name: nodeName,
TargetNode: mapRouteNameToNodeName(nodeName),
DestinationCIDR: cidr,
})
}
}
return routes, nil
}
// Injectable for testing
@ -107,6 +129,20 @@ func (az *Cloud) createRouteTable() error {
// route.Name will be ignored, although the cloud-provider may use nameHint
// to create a more user-meaningful name.
func (az *Cloud) CreateRoute(ctx context.Context, clusterName string, nameHint string, kubeRoute *cloudprovider.Route) error {
// Returns for unmanaged nodes because azure cloud provider couldn't fetch information for them.
nodeName := string(kubeRoute.TargetNode)
unmanaged, err := az.IsNodeUnmanaged(nodeName)
if err != nil {
return err
}
if unmanaged {
glog.V(2).Infof("CreateRoute: omitting unmanaged node %q", kubeRoute.TargetNode)
az.routeCIDRsLock.Lock()
defer az.routeCIDRsLock.Unlock()
az.routeCIDRs[nodeName] = kubeRoute.DestinationCIDR
return nil
}
glog.V(2).Infof("CreateRoute: creating route. clusterName=%q instance=%q cidr=%q", clusterName, kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
if err := az.createRouteTableIfNotExists(clusterName, kubeRoute); err != nil {
return err
@ -150,6 +186,20 @@ func (az *Cloud) CreateRoute(ctx context.Context, clusterName string, nameHint s
// DeleteRoute deletes the specified managed route
// Route should be as returned by ListRoutes
func (az *Cloud) DeleteRoute(ctx context.Context, clusterName string, kubeRoute *cloudprovider.Route) error {
// Returns for unmanaged nodes because azure cloud provider couldn't fetch information for them.
nodeName := string(kubeRoute.TargetNode)
unmanaged, err := az.IsNodeUnmanaged(nodeName)
if err != nil {
return err
}
if unmanaged {
glog.V(2).Infof("DeleteRoute: omitting unmanaged node %q", kubeRoute.TargetNode)
az.routeCIDRsLock.Lock()
defer az.routeCIDRsLock.Unlock()
delete(az.routeCIDRs, nodeName)
return nil
}
glog.V(2).Infof("DeleteRoute: deleting route. clusterName=%q instance=%q cidr=%q", clusterName, kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
ctx, cancel := getContextWithCancel()

View File

@ -22,6 +22,7 @@ import (
"reflect"
"testing"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/kubernetes/pkg/cloudprovider"
"github.com/Azure/azure-sdk-for-go/services/network/mgmt/2017-09-01/network"
@ -38,6 +39,8 @@ func TestDeleteRoute(t *testing.T) {
RouteTableName: "bar",
Location: "location",
},
unmanagedNodes: sets.NewString(),
nodeInformerSynced: func() bool { return true },
}
route := cloudprovider.Route{TargetNode: "node", DestinationCIDR: "1.2.3.4/24"}
routeName := mapNodeNameToRouteName(route.TargetNode)
@ -62,6 +65,28 @@ func TestDeleteRoute(t *testing.T) {
ob, found := mp[routeName]
if found {
t.Errorf("unexpectedly found: %v that should have been deleted.", ob)
t.FailNow()
}
// test delete route for unmanaged nodes.
nodeName := "node1"
nodeCIDR := "4.3.2.1/24"
cloud.unmanagedNodes.Insert(nodeName)
cloud.routeCIDRs = map[string]string{
nodeName: nodeCIDR,
}
route1 := cloudprovider.Route{
TargetNode: mapRouteNameToNodeName(nodeName),
DestinationCIDR: nodeCIDR,
}
err = cloud.DeleteRoute(context.TODO(), "cluster", &route1)
if err != nil {
t.Errorf("unexpected error deleting route: %v", err)
t.FailNow()
}
cidr, found := cloud.routeCIDRs[nodeName]
if found {
t.Errorf("unexpected CIDR item (%q) for %s", cidr, nodeName)
}
}
@ -79,6 +104,8 @@ func TestCreateRoute(t *testing.T) {
RouteTableName: "bar",
Location: "location",
},
unmanagedNodes: sets.NewString(),
nodeInformerSynced: func() bool { return true },
}
cache, _ := cloud.newRouteTableCache()
cloud.rtCache = cache
@ -122,6 +149,29 @@ func TestCreateRoute(t *testing.T) {
if *routeInfo.NextHopIPAddress != nodeIP {
t.Errorf("Expected IP address: %s, saw %s", nodeIP, *routeInfo.NextHopIPAddress)
}
// test create route for unmanaged nodes.
nodeName := "node1"
nodeCIDR := "4.3.2.1/24"
cloud.unmanagedNodes.Insert(nodeName)
cloud.routeCIDRs = map[string]string{}
route1 := cloudprovider.Route{
TargetNode: mapRouteNameToNodeName(nodeName),
DestinationCIDR: nodeCIDR,
}
err = cloud.CreateRoute(context.TODO(), "cluster", "unused", &route1)
if err != nil {
t.Errorf("unexpected error creating route: %v", err)
t.FailNow()
}
cidr, found := cloud.routeCIDRs[nodeName]
if !found {
t.Errorf("unexpected missing item for %s", nodeName)
t.FailNow()
}
if cidr != nodeCIDR {
t.Errorf("unexpected cidr %s, saw %s", nodeCIDR, cidr)
}
}
func TestCreateRouteTableIfNotExists_Exists(t *testing.T) {

View File

@ -625,7 +625,7 @@ func (as *availabilitySet) ensureHostInPool(serviceName string, nodeName types.N
}
if nic.ProvisioningState != nil && *nic.ProvisioningState == nicFailedState {
glog.V(3).Infof("ensureHostInPool skips node %s because its primdary nic %s is in Failed state", nodeName, nic.Name)
glog.V(3).Infof("ensureHostInPool skips node %s because its primary nic %s is in Failed state", nodeName, *nic.Name)
return nil
}

View File

@ -958,6 +958,7 @@ func getTestCloud() (az *Cloud) {
nodeInformerSynced: func() bool { return true },
nodeResourceGroups: map[string]string{},
unmanagedNodes: sets.NewString(),
routeCIDRs: map[string]string{},
}
az.DisksClient = newFakeDisksClient()
az.InterfacesClient = newFakeAzureInterfacesClient()

View File

@ -19,6 +19,7 @@ package azure
import (
"fmt"
"net/http"
"regexp"
"strings"
"time"
@ -36,6 +37,8 @@ var (
lbCacheTTL = 2 * time.Minute
nsgCacheTTL = 2 * time.Minute
rtCacheTTL = 2 * time.Minute
azureNodeProviderIDRE = regexp.MustCompile(`^azure:///subscriptions/(?:.*)/resourceGroups/(?:.*)/providers/Microsoft.Compute/(?:.*)`)
)
// checkExistsFromError inspects an error and returns a true if err is nil,
@ -283,3 +286,21 @@ func (az *Cloud) useStandardLoadBalancer() bool {
func (az *Cloud) excludeMasterNodesFromStandardLB() bool {
return az.ExcludeMasterFromStandardLB != nil && *az.ExcludeMasterFromStandardLB
}
// IsNodeUnmanaged returns true if the node is not managed by Azure cloud provider.
// Those nodes includes on-prem or VMs from other clouds. They will not be added to load balancer
// backends. Azure routes and managed disks are also not supported for them.
func (az *Cloud) IsNodeUnmanaged(nodeName string) (bool, error) {
unmanagedNodes, err := az.GetUnmanagedNodes()
if err != nil {
return false, err
}
return unmanagedNodes.Has(nodeName), nil
}
// IsNodeUnmanagedByProviderID returns true if the node is not managed by Azure cloud provider.
// All managed node's providerIDs are in format 'azure:///subscriptions/<id>/resourceGroups/<rg>/providers/Microsoft.Compute/.*'
func (az *Cloud) IsNodeUnmanagedByProviderID(providerID string) bool {
return azureNodeProviderIDRE.Match([]byte(providerID))
}

View File

@ -23,6 +23,8 @@ import (
"testing"
"github.com/Azure/go-autorest/autorest"
"github.com/stretchr/testify/assert"
"k8s.io/apimachinery/pkg/util/sets"
)
func TestExtractNotFound(t *testing.T) {
@ -51,3 +53,57 @@ func TestExtractNotFound(t *testing.T) {
}
}
}
func TestIsNodeUnmanaged(t *testing.T) {
tests := []struct {
name string
unmanagedNodes sets.String
node string
expected bool
expectErr bool
}{
{
name: "unmanaged node should return true",
unmanagedNodes: sets.NewString("node1", "node2"),
node: "node1",
expected: true,
},
{
name: "managed node should return false",
unmanagedNodes: sets.NewString("node1", "node2"),
node: "node3",
expected: false,
},
{
name: "empty unmanagedNodes should return true",
unmanagedNodes: sets.NewString(),
node: "node3",
expected: false,
},
{
name: "no synced informer should report error",
unmanagedNodes: sets.NewString(),
node: "node1",
expectErr: true,
},
}
az := getTestCloud()
for _, test := range tests {
az.unmanagedNodes = test.unmanagedNodes
if test.expectErr {
az.nodeInformerSynced = func() bool {
return false
}
}
real, err := az.IsNodeUnmanaged(test.node)
if test.expectErr {
assert.Error(t, err, test.name)
continue
}
assert.NoError(t, err, test.name)
assert.Equal(t, test.expected, real, test.name)
}
}

View File

@ -103,6 +103,12 @@ func (az *Cloud) getZoneFromFaultDomain() (cloudprovider.Zone, error) {
// This is particularly useful in external cloud providers where the kubelet
// does not initialize node data.
func (az *Cloud) GetZoneByProviderID(ctx context.Context, providerID string) (cloudprovider.Zone, error) {
// Returns nil for unmanaged nodes because azure cloud provider couldn't fetch information for them.
if az.IsNodeUnmanagedByProviderID(providerID) {
glog.V(2).Infof("GetZoneByProviderID: omitting unmanaged node %q", providerID)
return cloudprovider.Zone{}, nil
}
nodeName, err := az.vmSet.GetNodeNameByProviderID(providerID)
if err != nil {
return cloudprovider.Zone{}, err
@ -115,6 +121,16 @@ func (az *Cloud) GetZoneByProviderID(ctx context.Context, providerID string) (cl
// This is particularly useful in external cloud providers where the kubelet
// does not initialize node data.
func (az *Cloud) GetZoneByNodeName(ctx context.Context, nodeName types.NodeName) (cloudprovider.Zone, error) {
// Returns "" for unmanaged nodes because azure cloud provider couldn't fetch information for them.
unmanaged, err := az.IsNodeUnmanaged(string(nodeName))
if err != nil {
return cloudprovider.Zone{}, err
}
if unmanaged {
glog.V(2).Infof("GetZoneByNodeName: omitting unmanaged node %q", nodeName)
return cloudprovider.Zone{}, nil
}
return az.vmSet.GetZoneByNodeName(string(nodeName))
}