mirror of https://github.com/k3s-io/k3s
557 lines
18 KiB
Go
557 lines
18 KiB
Go
// +build !providerless
|
|
|
|
/*
|
|
Copyright 2016 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package azure
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/Azure/azure-sdk-for-go/services/network/mgmt/2019-06-01/network"
|
|
"github.com/Azure/go-autorest/autorest/to"
|
|
|
|
"k8s.io/apimachinery/pkg/types"
|
|
"k8s.io/apimachinery/pkg/util/wait"
|
|
cloudprovider "k8s.io/cloud-provider"
|
|
"k8s.io/klog/v2"
|
|
azcache "k8s.io/legacy-cloud-providers/azure/cache"
|
|
"k8s.io/legacy-cloud-providers/azure/metrics"
|
|
utilnet "k8s.io/utils/net"
|
|
)
|
|
|
|
var (
|
|
// routeUpdateInterval defines the route reconciling interval.
|
|
routeUpdateInterval = 30 * time.Second
|
|
)
|
|
|
|
// routeOperation defines the allowed operations for route updating.
|
|
type routeOperation string
|
|
|
|
// copied to minimize the number of cross reference
|
|
// and exceptions in publishing and allowed imports.
|
|
const (
|
|
routeNameFmt = "%s____%s"
|
|
routeNameSeparator = "____"
|
|
|
|
// Route operations.
|
|
routeOperationAdd routeOperation = "add"
|
|
routeOperationDelete routeOperation = "delete"
|
|
routeTableOperationUpdateTags routeOperation = "updateRouteTableTags"
|
|
)
|
|
|
|
// delayedRouteOperation defines a delayed route operation which is used in delayedRouteUpdater.
|
|
type delayedRouteOperation struct {
|
|
route network.Route
|
|
routeTableTags map[string]*string
|
|
operation routeOperation
|
|
result chan error
|
|
}
|
|
|
|
// wait waits for the operation completion and returns the result.
|
|
func (op *delayedRouteOperation) wait() error {
|
|
return <-op.result
|
|
}
|
|
|
|
// delayedRouteUpdater defines a delayed route updater, which batches all the
|
|
// route updating operations within "interval" period.
|
|
// Example usage:
|
|
// op, err := updater.addRouteOperation(routeOperationAdd, route)
|
|
// err = op.wait()
|
|
type delayedRouteUpdater struct {
|
|
az *Cloud
|
|
interval time.Duration
|
|
|
|
lock sync.Mutex
|
|
routesToUpdate []*delayedRouteOperation
|
|
}
|
|
|
|
// newDelayedRouteUpdater creates a new delayedRouteUpdater.
|
|
func newDelayedRouteUpdater(az *Cloud, interval time.Duration) *delayedRouteUpdater {
|
|
return &delayedRouteUpdater{
|
|
az: az,
|
|
interval: interval,
|
|
routesToUpdate: make([]*delayedRouteOperation, 0),
|
|
}
|
|
}
|
|
|
|
// run starts the updater reconciling loop.
|
|
func (d *delayedRouteUpdater) run() {
|
|
err := wait.PollImmediateInfinite(d.interval, func() (bool, error) {
|
|
d.updateRoutes()
|
|
return false, nil
|
|
})
|
|
if err != nil { // this should never happen, if it does, panic
|
|
panic(err)
|
|
}
|
|
}
|
|
|
|
// updateRoutes invokes route table client to update all routes.
|
|
func (d *delayedRouteUpdater) updateRoutes() {
|
|
d.lock.Lock()
|
|
defer d.lock.Unlock()
|
|
|
|
// No need to do any updating.
|
|
if len(d.routesToUpdate) == 0 {
|
|
return
|
|
}
|
|
|
|
var err error
|
|
defer func() {
|
|
// Notify all the goroutines.
|
|
for _, rt := range d.routesToUpdate {
|
|
rt.result <- err
|
|
}
|
|
// Clear all the jobs.
|
|
d.routesToUpdate = make([]*delayedRouteOperation, 0)
|
|
}()
|
|
|
|
var (
|
|
routeTable network.RouteTable
|
|
existsRouteTable bool
|
|
)
|
|
routeTable, existsRouteTable, err = d.az.getRouteTable(azcache.CacheReadTypeDefault)
|
|
if err != nil {
|
|
klog.Errorf("getRouteTable() failed with error: %v", err)
|
|
return
|
|
}
|
|
|
|
// create route table if it doesn't exists yet.
|
|
if !existsRouteTable {
|
|
err = d.az.createRouteTable()
|
|
if err != nil {
|
|
klog.Errorf("createRouteTable() failed with error: %v", err)
|
|
return
|
|
}
|
|
|
|
routeTable, _, err = d.az.getRouteTable(azcache.CacheReadTypeDefault)
|
|
if err != nil {
|
|
klog.Errorf("getRouteTable() failed with error: %v", err)
|
|
return
|
|
}
|
|
}
|
|
|
|
// reconcile routes.
|
|
dirty, onlyUpdateTags := false, true
|
|
routes := []network.Route{}
|
|
if routeTable.Routes != nil {
|
|
routes = *routeTable.Routes
|
|
}
|
|
|
|
routes, dirty = d.cleanupOutdatedRoutes(routes)
|
|
if dirty {
|
|
onlyUpdateTags = false
|
|
}
|
|
|
|
for _, rt := range d.routesToUpdate {
|
|
if rt.operation == routeTableOperationUpdateTags {
|
|
routeTable.Tags = rt.routeTableTags
|
|
dirty = true
|
|
continue
|
|
}
|
|
|
|
routeMatch := false
|
|
onlyUpdateTags = false
|
|
for i, existingRoute := range routes {
|
|
if strings.EqualFold(to.String(existingRoute.Name), to.String(rt.route.Name)) {
|
|
// delete the name-matched routes here (missing routes would be added later if the operation is add).
|
|
routes = append(routes[:i], routes[i+1:]...)
|
|
if existingRoute.RoutePropertiesFormat != nil &&
|
|
rt.route.RoutePropertiesFormat != nil &&
|
|
strings.EqualFold(to.String(existingRoute.AddressPrefix), to.String(rt.route.AddressPrefix)) &&
|
|
strings.EqualFold(to.String(existingRoute.NextHopIPAddress), to.String(rt.route.NextHopIPAddress)) {
|
|
routeMatch = true
|
|
}
|
|
if rt.operation == routeOperationDelete {
|
|
dirty = true
|
|
}
|
|
break
|
|
}
|
|
}
|
|
|
|
// Add missing routes if the operation is add.
|
|
if rt.operation == routeOperationAdd {
|
|
routes = append(routes, rt.route)
|
|
if !routeMatch {
|
|
dirty = true
|
|
}
|
|
continue
|
|
}
|
|
}
|
|
|
|
if dirty {
|
|
if !onlyUpdateTags {
|
|
klog.V(2).Infof("updateRoutes: updating routes")
|
|
routeTable.Routes = &routes
|
|
}
|
|
err = d.az.CreateOrUpdateRouteTable(routeTable)
|
|
if err != nil {
|
|
klog.Errorf("CreateOrUpdateRouteTable() failed with error: %v", err)
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// cleanupOutdatedRoutes deletes all non-dualstack routes when dualstack is enabled,
|
|
// and deletes all dualstack routes when dualstack is not enabled.
|
|
func (d *delayedRouteUpdater) cleanupOutdatedRoutes(existingRoutes []network.Route) (routes []network.Route, changed bool) {
|
|
for i := len(existingRoutes) - 1; i >= 0; i-- {
|
|
existingRouteName := to.String(existingRoutes[i].Name)
|
|
split := strings.Split(existingRouteName, routeNameSeparator)
|
|
|
|
// filter out unmanaged routes
|
|
deleteRoute := false
|
|
if d.az.nodeNames.Has(split[0]) {
|
|
if d.az.ipv6DualStackEnabled && len(split) == 1 {
|
|
klog.V(2).Infof("cleanupOutdatedRoutes: deleting outdated non-dualstack route %s", existingRouteName)
|
|
deleteRoute = true
|
|
} else if !d.az.ipv6DualStackEnabled && len(split) == 2 {
|
|
klog.V(2).Infof("cleanupOutdatedRoutes: deleting outdated dualstack route %s", existingRouteName)
|
|
deleteRoute = true
|
|
}
|
|
|
|
if deleteRoute {
|
|
existingRoutes = append(existingRoutes[:i], existingRoutes[i+1:]...)
|
|
changed = true
|
|
}
|
|
}
|
|
}
|
|
|
|
return existingRoutes, changed
|
|
}
|
|
|
|
// addRouteOperation adds the routeOperation to delayedRouteUpdater and returns a delayedRouteOperation.
|
|
func (d *delayedRouteUpdater) addRouteOperation(operation routeOperation, route network.Route) (*delayedRouteOperation, error) {
|
|
d.lock.Lock()
|
|
defer d.lock.Unlock()
|
|
|
|
op := &delayedRouteOperation{
|
|
route: route,
|
|
operation: operation,
|
|
result: make(chan error),
|
|
}
|
|
d.routesToUpdate = append(d.routesToUpdate, op)
|
|
return op, nil
|
|
}
|
|
|
|
// addUpdateRouteTableTagsOperation adds a update route table tags operation to delayedRouteUpdater and returns a delayedRouteOperation.
|
|
func (d *delayedRouteUpdater) addUpdateRouteTableTagsOperation(operation routeOperation, tags map[string]*string) (*delayedRouteOperation, error) {
|
|
d.lock.Lock()
|
|
defer d.lock.Unlock()
|
|
|
|
op := &delayedRouteOperation{
|
|
routeTableTags: tags,
|
|
operation: operation,
|
|
result: make(chan error),
|
|
}
|
|
d.routesToUpdate = append(d.routesToUpdate, op)
|
|
return op, nil
|
|
}
|
|
|
|
// ListRoutes lists all managed routes that belong to the specified clusterName
|
|
func (az *Cloud) ListRoutes(ctx context.Context, clusterName string) ([]*cloudprovider.Route, error) {
|
|
klog.V(10).Infof("ListRoutes: START clusterName=%q", clusterName)
|
|
routeTable, existsRouteTable, err := az.getRouteTable(azcache.CacheReadTypeDefault)
|
|
routes, err := processRoutes(az.ipv6DualStackEnabled, routeTable, existsRouteTable, err)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Compose routes for unmanaged routes so that node controller won't retry creating routes for them.
|
|
unmanagedNodes, err := az.GetUnmanagedNodes()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
az.routeCIDRsLock.Lock()
|
|
defer az.routeCIDRsLock.Unlock()
|
|
for _, nodeName := range unmanagedNodes.List() {
|
|
if cidr, ok := az.routeCIDRs[nodeName]; ok {
|
|
routes = append(routes, &cloudprovider.Route{
|
|
Name: nodeName,
|
|
TargetNode: mapRouteNameToNodeName(az.ipv6DualStackEnabled, nodeName),
|
|
DestinationCIDR: cidr,
|
|
})
|
|
}
|
|
}
|
|
|
|
// ensure the route table is tagged as configured
|
|
tags, changed := az.ensureRouteTableTagged(&routeTable)
|
|
if changed {
|
|
klog.V(2).Infof("ListRoutes: updating tags on route table %s", to.String(routeTable.Name))
|
|
op, err := az.routeUpdater.addUpdateRouteTableTagsOperation(routeTableOperationUpdateTags, tags)
|
|
if err != nil {
|
|
klog.Errorf("ListRoutes: failed to add route table operation with error: %v", err)
|
|
return nil, err
|
|
}
|
|
|
|
// Wait for operation complete.
|
|
err = op.wait()
|
|
if err != nil {
|
|
klog.Errorf("ListRoutes: failed to update route table tags with error: %v", err)
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
return routes, nil
|
|
}
|
|
|
|
// Injectable for testing
|
|
func processRoutes(ipv6DualStackEnabled bool, routeTable network.RouteTable, exists bool, err error) ([]*cloudprovider.Route, error) {
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if !exists {
|
|
return []*cloudprovider.Route{}, nil
|
|
}
|
|
|
|
var kubeRoutes []*cloudprovider.Route
|
|
if routeTable.RouteTablePropertiesFormat != nil && routeTable.Routes != nil {
|
|
kubeRoutes = make([]*cloudprovider.Route, len(*routeTable.Routes))
|
|
for i, route := range *routeTable.Routes {
|
|
instance := mapRouteNameToNodeName(ipv6DualStackEnabled, *route.Name)
|
|
cidr := *route.AddressPrefix
|
|
klog.V(10).Infof("ListRoutes: * instance=%q, cidr=%q", instance, cidr)
|
|
|
|
kubeRoutes[i] = &cloudprovider.Route{
|
|
Name: *route.Name,
|
|
TargetNode: instance,
|
|
DestinationCIDR: cidr,
|
|
}
|
|
}
|
|
}
|
|
|
|
klog.V(10).Info("ListRoutes: FINISH")
|
|
return kubeRoutes, nil
|
|
}
|
|
|
|
func (az *Cloud) createRouteTable() error {
|
|
routeTable := network.RouteTable{
|
|
Name: to.StringPtr(az.RouteTableName),
|
|
Location: to.StringPtr(az.Location),
|
|
RouteTablePropertiesFormat: &network.RouteTablePropertiesFormat{},
|
|
}
|
|
|
|
klog.V(3).Infof("createRouteTableIfNotExists: creating routetable. routeTableName=%q", az.RouteTableName)
|
|
err := az.CreateOrUpdateRouteTable(routeTable)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Invalidate the cache right after updating
|
|
az.rtCache.Delete(az.RouteTableName)
|
|
return nil
|
|
}
|
|
|
|
// CreateRoute creates the described managed route
|
|
// route.Name will be ignored, although the cloud-provider may use nameHint
|
|
// to create a more user-meaningful name.
|
|
func (az *Cloud) CreateRoute(ctx context.Context, clusterName string, nameHint string, kubeRoute *cloudprovider.Route) error {
|
|
mc := metrics.NewMetricContext("routes", "create_route", az.ResourceGroup, az.SubscriptionID, "")
|
|
isOperationSucceeded := false
|
|
defer func() {
|
|
mc.ObserveOperationWithResult(isOperationSucceeded)
|
|
}()
|
|
|
|
// Returns for unmanaged nodes because azure cloud provider couldn't fetch information for them.
|
|
var targetIP string
|
|
nodeName := string(kubeRoute.TargetNode)
|
|
unmanaged, err := az.IsNodeUnmanaged(nodeName)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if unmanaged {
|
|
if az.ipv6DualStackEnabled {
|
|
//TODO (khenidak) add support for unmanaged nodes when the feature reaches beta
|
|
return fmt.Errorf("unmanaged nodes are not supported in dual stack mode")
|
|
}
|
|
klog.V(2).Infof("CreateRoute: omitting unmanaged node %q", kubeRoute.TargetNode)
|
|
az.routeCIDRsLock.Lock()
|
|
defer az.routeCIDRsLock.Unlock()
|
|
az.routeCIDRs[nodeName] = kubeRoute.DestinationCIDR
|
|
return nil
|
|
}
|
|
|
|
CIDRv6 := utilnet.IsIPv6CIDRString(string(kubeRoute.DestinationCIDR))
|
|
// if single stack IPv4 then get the IP for the primary ip config
|
|
// single stack IPv6 is supported on dual stack host. So the IPv6 IP is secondary IP for both single stack IPv6 and dual stack
|
|
// Get all private IPs for the machine and find the first one that matches the IPv6 family
|
|
if !az.ipv6DualStackEnabled && !CIDRv6 {
|
|
targetIP, _, err = az.getIPForMachine(kubeRoute.TargetNode)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
} else {
|
|
// for dual stack and single stack IPv6 we need to select
|
|
// a private ip that matches family of the cidr
|
|
klog.V(4).Infof("CreateRoute: create route instance=%q cidr=%q is in dual stack mode", kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
|
|
nodePrivateIPs, err := az.getPrivateIPsForMachine(kubeRoute.TargetNode)
|
|
if nil != err {
|
|
klog.V(3).Infof("CreateRoute: create route: failed(GetPrivateIPsByNodeName) instance=%q cidr=%q with error=%v", kubeRoute.TargetNode, kubeRoute.DestinationCIDR, err)
|
|
return err
|
|
}
|
|
|
|
targetIP, err = findFirstIPByFamily(nodePrivateIPs, CIDRv6)
|
|
if nil != err {
|
|
klog.V(3).Infof("CreateRoute: create route: failed(findFirstIpByFamily) instance=%q cidr=%q with error=%v", kubeRoute.TargetNode, kubeRoute.DestinationCIDR, err)
|
|
return err
|
|
}
|
|
}
|
|
routeName := mapNodeNameToRouteName(az.ipv6DualStackEnabled, kubeRoute.TargetNode, string(kubeRoute.DestinationCIDR))
|
|
route := network.Route{
|
|
Name: to.StringPtr(routeName),
|
|
RoutePropertiesFormat: &network.RoutePropertiesFormat{
|
|
AddressPrefix: to.StringPtr(kubeRoute.DestinationCIDR),
|
|
NextHopType: network.RouteNextHopTypeVirtualAppliance,
|
|
NextHopIPAddress: to.StringPtr(targetIP),
|
|
},
|
|
}
|
|
|
|
klog.V(2).Infof("CreateRoute: creating route for clusterName=%q instance=%q cidr=%q", clusterName, kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
|
|
op, err := az.routeUpdater.addRouteOperation(routeOperationAdd, route)
|
|
if err != nil {
|
|
klog.Errorf("CreateRoute failed for node %q with error: %v", kubeRoute.TargetNode, err)
|
|
return err
|
|
}
|
|
|
|
// Wait for operation complete.
|
|
err = op.wait()
|
|
if err != nil {
|
|
klog.Errorf("CreateRoute failed for node %q with error: %v", kubeRoute.TargetNode, err)
|
|
return err
|
|
}
|
|
|
|
klog.V(2).Infof("CreateRoute: route created. clusterName=%q instance=%q cidr=%q", clusterName, kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
|
|
isOperationSucceeded = true
|
|
|
|
return nil
|
|
}
|
|
|
|
// DeleteRoute deletes the specified managed route
|
|
// Route should be as returned by ListRoutes
|
|
func (az *Cloud) DeleteRoute(ctx context.Context, clusterName string, kubeRoute *cloudprovider.Route) error {
|
|
mc := metrics.NewMetricContext("routes", "delete_route", az.ResourceGroup, az.SubscriptionID, "")
|
|
isOperationSucceeded := false
|
|
defer func() {
|
|
mc.ObserveOperationWithResult(isOperationSucceeded)
|
|
}()
|
|
|
|
// Returns for unmanaged nodes because azure cloud provider couldn't fetch information for them.
|
|
nodeName := string(kubeRoute.TargetNode)
|
|
unmanaged, err := az.IsNodeUnmanaged(nodeName)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if unmanaged {
|
|
klog.V(2).Infof("DeleteRoute: omitting unmanaged node %q", kubeRoute.TargetNode)
|
|
az.routeCIDRsLock.Lock()
|
|
defer az.routeCIDRsLock.Unlock()
|
|
delete(az.routeCIDRs, nodeName)
|
|
return nil
|
|
}
|
|
|
|
klog.V(2).Infof("DeleteRoute: deleting route. clusterName=%q instance=%q cidr=%q", clusterName, kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
|
|
|
|
routeName := mapNodeNameToRouteName(az.ipv6DualStackEnabled, kubeRoute.TargetNode, string(kubeRoute.DestinationCIDR))
|
|
route := network.Route{
|
|
Name: to.StringPtr(routeName),
|
|
RoutePropertiesFormat: &network.RoutePropertiesFormat{},
|
|
}
|
|
op, err := az.routeUpdater.addRouteOperation(routeOperationDelete, route)
|
|
if err != nil {
|
|
klog.Errorf("DeleteRoute failed for node %q with error: %v", kubeRoute.TargetNode, err)
|
|
return err
|
|
}
|
|
|
|
// Wait for operation complete.
|
|
err = op.wait()
|
|
if err != nil {
|
|
klog.Errorf("DeleteRoute failed for node %q with error: %v", kubeRoute.TargetNode, err)
|
|
return err
|
|
}
|
|
|
|
klog.V(2).Infof("DeleteRoute: route deleted. clusterName=%q instance=%q cidr=%q", clusterName, kubeRoute.TargetNode, kubeRoute.DestinationCIDR)
|
|
isOperationSucceeded = true
|
|
|
|
return nil
|
|
}
|
|
|
|
// This must be kept in sync with mapRouteNameToNodeName.
|
|
// These two functions enable stashing the instance name in the route
|
|
// and then retrieving it later when listing. This is needed because
|
|
// Azure does not let you put tags/descriptions on the Route itself.
|
|
func mapNodeNameToRouteName(ipv6DualStackEnabled bool, nodeName types.NodeName, cidr string) string {
|
|
if !ipv6DualStackEnabled {
|
|
return fmt.Sprintf("%s", nodeName)
|
|
}
|
|
return fmt.Sprintf(routeNameFmt, nodeName, cidrtoRfc1035(cidr))
|
|
}
|
|
|
|
// Used with mapNodeNameToRouteName. See comment on mapNodeNameToRouteName.
|
|
func mapRouteNameToNodeName(ipv6DualStackEnabled bool, routeName string) types.NodeName {
|
|
if !ipv6DualStackEnabled {
|
|
return types.NodeName(routeName)
|
|
}
|
|
parts := strings.Split(routeName, routeNameSeparator)
|
|
nodeName := parts[0]
|
|
return types.NodeName(nodeName)
|
|
|
|
}
|
|
|
|
// given a list of ips, return the first one
|
|
// that matches the family requested
|
|
// error if no match, or failure to parse
|
|
// any of the ips
|
|
func findFirstIPByFamily(ips []string, v6 bool) (string, error) {
|
|
for _, ip := range ips {
|
|
bIPv6 := utilnet.IsIPv6String(ip)
|
|
if v6 == bIPv6 {
|
|
return ip, nil
|
|
}
|
|
}
|
|
return "", fmt.Errorf("no match found matching the ipfamily requested")
|
|
}
|
|
|
|
//strips : . /
|
|
func cidrtoRfc1035(cidr string) string {
|
|
cidr = strings.ReplaceAll(cidr, ":", "")
|
|
cidr = strings.ReplaceAll(cidr, ".", "")
|
|
cidr = strings.ReplaceAll(cidr, "/", "")
|
|
return cidr
|
|
}
|
|
|
|
// ensureRouteTableTagged ensures the route table is tagged as configured
|
|
func (az *Cloud) ensureRouteTableTagged(rt *network.RouteTable) (map[string]*string, bool) {
|
|
if az.Tags == "" {
|
|
return nil, false
|
|
}
|
|
changed := false
|
|
tags := parseTags(az.Tags)
|
|
if rt.Tags == nil {
|
|
rt.Tags = make(map[string]*string)
|
|
}
|
|
for k, v := range tags {
|
|
if vv, ok := rt.Tags[k]; !ok || !strings.EqualFold(to.String(v), to.String(vv)) {
|
|
rt.Tags[k] = v
|
|
changed = true
|
|
}
|
|
}
|
|
return rt.Tags, changed
|
|
}
|