2019-08-30 18:33:25 +00:00
|
|
|
/*
|
|
|
|
Copyright 2016 The Kubernetes Authors.
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package cloud
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"time"
|
|
|
|
|
2020-03-26 21:07:15 +00:00
|
|
|
"k8s.io/api/core/v1"
|
2019-12-12 01:27:03 +00:00
|
|
|
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
2019-08-30 18:33:25 +00:00
|
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
|
|
"k8s.io/apimachinery/pkg/types"
|
|
|
|
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
|
|
|
"k8s.io/apimachinery/pkg/util/wait"
|
|
|
|
coreinformers "k8s.io/client-go/informers/core/v1"
|
|
|
|
clientset "k8s.io/client-go/kubernetes"
|
|
|
|
"k8s.io/client-go/kubernetes/scheme"
|
|
|
|
v1core "k8s.io/client-go/kubernetes/typed/core/v1"
|
|
|
|
"k8s.io/client-go/tools/cache"
|
|
|
|
"k8s.io/client-go/tools/record"
|
|
|
|
clientretry "k8s.io/client-go/util/retry"
|
|
|
|
cloudprovider "k8s.io/cloud-provider"
|
2020-03-26 21:07:15 +00:00
|
|
|
cloudproviderapi "k8s.io/cloud-provider/api"
|
2019-12-12 01:27:03 +00:00
|
|
|
cloudnodeutil "k8s.io/cloud-provider/node/helpers"
|
2020-08-10 17:43:49 +00:00
|
|
|
"k8s.io/klog/v2"
|
2019-08-30 18:33:25 +00:00
|
|
|
)
|
|
|
|
|
2019-12-12 01:27:03 +00:00
|
|
|
// labelReconcileInfo lists Node labels to reconcile, and how to reconcile them.
|
|
|
|
// primaryKey and secondaryKey are keys of labels to reconcile.
|
|
|
|
// - If both keys exist, but their values don't match. Use the value from the
|
|
|
|
// primaryKey as the source of truth to reconcile.
|
|
|
|
// - If ensureSecondaryExists is true, and the secondaryKey does not
|
|
|
|
// exist, secondaryKey will be added with the value of the primaryKey.
|
|
|
|
var labelReconcileInfo = []struct {
|
|
|
|
primaryKey string
|
|
|
|
secondaryKey string
|
|
|
|
ensureSecondaryExists bool
|
|
|
|
}{
|
|
|
|
{
|
|
|
|
// Reconcile the beta and the GA zone label using the beta label as
|
|
|
|
// the source of truth
|
|
|
|
// TODO: switch the primary key to GA labels in v1.21
|
|
|
|
primaryKey: v1.LabelZoneFailureDomain,
|
|
|
|
secondaryKey: v1.LabelZoneFailureDomainStable,
|
|
|
|
ensureSecondaryExists: true,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
// Reconcile the beta and the stable region label using the beta label as
|
|
|
|
// the source of truth
|
|
|
|
// TODO: switch the primary key to GA labels in v1.21
|
|
|
|
primaryKey: v1.LabelZoneRegion,
|
|
|
|
secondaryKey: v1.LabelZoneRegionStable,
|
|
|
|
ensureSecondaryExists: true,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
// Reconcile the beta and the stable instance-type label using the beta label as
|
|
|
|
// the source of truth
|
|
|
|
// TODO: switch the primary key to GA labels in v1.21
|
|
|
|
primaryKey: v1.LabelInstanceType,
|
|
|
|
secondaryKey: v1.LabelInstanceTypeStable,
|
|
|
|
ensureSecondaryExists: true,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2019-08-30 18:33:25 +00:00
|
|
|
var UpdateNodeSpecBackoff = wait.Backoff{
|
|
|
|
Steps: 20,
|
|
|
|
Duration: 50 * time.Millisecond,
|
|
|
|
Jitter: 1.0,
|
|
|
|
}
|
|
|
|
|
|
|
|
type CloudNodeController struct {
|
|
|
|
nodeInformer coreinformers.NodeInformer
|
|
|
|
kubeClient clientset.Interface
|
|
|
|
recorder record.EventRecorder
|
|
|
|
|
|
|
|
cloud cloudprovider.Interface
|
|
|
|
|
|
|
|
nodeStatusUpdateFrequency time.Duration
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewCloudNodeController creates a CloudNodeController object
|
|
|
|
func NewCloudNodeController(
|
|
|
|
nodeInformer coreinformers.NodeInformer,
|
|
|
|
kubeClient clientset.Interface,
|
|
|
|
cloud cloudprovider.Interface,
|
2019-12-12 01:27:03 +00:00
|
|
|
nodeStatusUpdateFrequency time.Duration) (*CloudNodeController, error) {
|
2019-08-30 18:33:25 +00:00
|
|
|
|
|
|
|
eventBroadcaster := record.NewBroadcaster()
|
|
|
|
recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "cloud-node-controller"})
|
2020-08-10 17:43:49 +00:00
|
|
|
eventBroadcaster.StartStructuredLogging(0)
|
2019-12-12 01:27:03 +00:00
|
|
|
|
|
|
|
klog.Infof("Sending events to api server.")
|
|
|
|
eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")})
|
|
|
|
|
2020-08-10 17:43:49 +00:00
|
|
|
_, instancesSupported := cloud.Instances()
|
|
|
|
_, instancesV2Supported := cloud.InstancesV2()
|
|
|
|
if !instancesSupported && !instancesV2Supported {
|
2019-12-12 01:27:03 +00:00
|
|
|
return nil, errors.New("cloud provider does not support instances")
|
2019-08-30 18:33:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
cnc := &CloudNodeController{
|
|
|
|
nodeInformer: nodeInformer,
|
|
|
|
kubeClient: kubeClient,
|
|
|
|
recorder: recorder,
|
|
|
|
cloud: cloud,
|
|
|
|
nodeStatusUpdateFrequency: nodeStatusUpdateFrequency,
|
|
|
|
}
|
|
|
|
|
|
|
|
// Use shared informer to listen to add/update of nodes. Note that any nodes
|
|
|
|
// that exist before node controller starts will show up in the update method
|
|
|
|
cnc.nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
2019-12-12 01:27:03 +00:00
|
|
|
AddFunc: func(obj interface{}) { cnc.AddCloudNode(context.TODO(), obj) },
|
|
|
|
UpdateFunc: func(oldObj, newObj interface{}) { cnc.UpdateCloudNode(context.TODO(), oldObj, newObj) },
|
2019-08-30 18:33:25 +00:00
|
|
|
})
|
|
|
|
|
2019-12-12 01:27:03 +00:00
|
|
|
return cnc, nil
|
2019-08-30 18:33:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// This controller updates newly registered nodes with information
|
|
|
|
// from the cloud provider. This call is blocking so should be called
|
|
|
|
// via a goroutine
|
|
|
|
func (cnc *CloudNodeController) Run(stopCh <-chan struct{}) {
|
|
|
|
defer utilruntime.HandleCrash()
|
|
|
|
|
|
|
|
// The following loops run communicate with the APIServer with a worst case complexity
|
|
|
|
// of O(num_nodes) per cycle. These functions are justified here because these events fire
|
|
|
|
// very infrequently. DO NOT MODIFY this to perform frequent operations.
|
|
|
|
|
|
|
|
// Start a loop to periodically update the node addresses obtained from the cloud
|
2019-12-12 01:27:03 +00:00
|
|
|
wait.Until(func() { cnc.UpdateNodeStatus(context.TODO()) }, cnc.nodeStatusUpdateFrequency, stopCh)
|
2019-08-30 18:33:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// UpdateNodeStatus updates the node status, such as node addresses
|
2019-12-12 01:27:03 +00:00
|
|
|
func (cnc *CloudNodeController) UpdateNodeStatus(ctx context.Context) {
|
2020-03-26 21:07:15 +00:00
|
|
|
nodes, err := cnc.kubeClient.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{ResourceVersion: "0"})
|
2019-08-30 18:33:25 +00:00
|
|
|
if err != nil {
|
|
|
|
klog.Errorf("Error monitoring node status: %v", err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
for i := range nodes.Items {
|
2020-08-10 17:43:49 +00:00
|
|
|
cnc.updateNodeAddress(ctx, &nodes.Items[i])
|
2019-12-12 01:27:03 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for _, node := range nodes.Items {
|
|
|
|
err = cnc.reconcileNodeLabels(node.Name)
|
|
|
|
if err != nil {
|
|
|
|
klog.Errorf("Error reconciling node labels for node %q, err: %v", node.Name, err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// reconcileNodeLabels reconciles node labels transitioning from beta to GA
|
|
|
|
func (cnc *CloudNodeController) reconcileNodeLabels(nodeName string) error {
|
|
|
|
node, err := cnc.nodeInformer.Lister().Get(nodeName)
|
|
|
|
if err != nil {
|
|
|
|
// If node not found, just ignore it.
|
|
|
|
if apierrors.IsNotFound(err) {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if node.Labels == nil {
|
|
|
|
// Nothing to reconcile.
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
labelsToUpdate := map[string]string{}
|
|
|
|
for _, r := range labelReconcileInfo {
|
|
|
|
primaryValue, primaryExists := node.Labels[r.primaryKey]
|
|
|
|
secondaryValue, secondaryExists := node.Labels[r.secondaryKey]
|
|
|
|
|
|
|
|
if !primaryExists {
|
|
|
|
// The primary label key does not exist. This should not happen
|
|
|
|
// within our supported version skew range, when no external
|
|
|
|
// components/factors modifying the node object. Ignore this case.
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if secondaryExists && primaryValue != secondaryValue {
|
|
|
|
// Secondary label exists, but not consistent with the primary
|
|
|
|
// label. Need to reconcile.
|
|
|
|
labelsToUpdate[r.secondaryKey] = primaryValue
|
|
|
|
|
|
|
|
} else if !secondaryExists && r.ensureSecondaryExists {
|
|
|
|
// Apply secondary label based on primary label.
|
|
|
|
labelsToUpdate[r.secondaryKey] = primaryValue
|
|
|
|
}
|
2019-08-30 18:33:25 +00:00
|
|
|
}
|
2019-12-12 01:27:03 +00:00
|
|
|
|
|
|
|
if len(labelsToUpdate) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if !cloudnodeutil.AddOrUpdateLabelsOnNode(cnc.kubeClient, labelsToUpdate, node) {
|
|
|
|
return fmt.Errorf("failed update labels for node %+v", node)
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
2019-08-30 18:33:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// UpdateNodeAddress updates the nodeAddress of a single node
|
2020-08-10 17:43:49 +00:00
|
|
|
func (cnc *CloudNodeController) updateNodeAddress(ctx context.Context, node *v1.Node) {
|
2019-08-30 18:33:25 +00:00
|
|
|
// Do not process nodes that are still tainted
|
|
|
|
cloudTaint := getCloudTaint(node.Spec.Taints)
|
|
|
|
if cloudTaint != nil {
|
|
|
|
klog.V(5).Infof("This node %s is still tainted. Will not process.", node.Name)
|
|
|
|
return
|
|
|
|
}
|
2020-08-10 17:43:49 +00:00
|
|
|
|
|
|
|
instanceMetadataGetter := func(providerID string, node *v1.Node) (*cloudprovider.InstanceMetadata, error) {
|
|
|
|
if instancesV2, ok := cnc.cloud.InstancesV2(); instancesV2 != nil && ok {
|
|
|
|
return instancesV2.InstanceMetadata(ctx, node)
|
|
|
|
}
|
|
|
|
|
|
|
|
// If InstancesV2 not implement, use Instances.
|
|
|
|
instances, ok := cnc.cloud.Instances()
|
|
|
|
if !ok {
|
|
|
|
return nil, fmt.Errorf("failed to get instances from cloud provider")
|
|
|
|
}
|
|
|
|
|
|
|
|
nodeAddresses, err := getNodeAddressesByProviderIDOrName(ctx, instances, node.Spec.ProviderID, node.Name)
|
|
|
|
if err != nil {
|
|
|
|
klog.Errorf("Error getting node addresses for node %q: %v", node.Name, err)
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return &cloudprovider.InstanceMetadata{
|
|
|
|
NodeAddresses: nodeAddresses,
|
|
|
|
}, nil
|
2019-08-30 18:33:25 +00:00
|
|
|
}
|
|
|
|
|
2020-08-10 17:43:49 +00:00
|
|
|
instanceMeta, err := instanceMetadataGetter(node.Spec.ProviderID, node)
|
2019-08-30 18:33:25 +00:00
|
|
|
if err != nil {
|
2020-08-10 17:43:49 +00:00
|
|
|
utilruntime.HandleError(err)
|
2019-08-30 18:33:25 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2020-08-10 17:43:49 +00:00
|
|
|
nodeAddresses := instanceMeta.NodeAddresses
|
2019-08-30 18:33:25 +00:00
|
|
|
if len(nodeAddresses) == 0 {
|
|
|
|
klog.V(5).Infof("Skipping node address update for node %q since cloud provider did not return any", node.Name)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check if a hostname address exists in the cloud provided addresses
|
|
|
|
hostnameExists := false
|
|
|
|
for i := range nodeAddresses {
|
|
|
|
if nodeAddresses[i].Type == v1.NodeHostName {
|
|
|
|
hostnameExists = true
|
2019-12-12 01:27:03 +00:00
|
|
|
break
|
2019-08-30 18:33:25 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
// If hostname was not present in cloud provided addresses, use the hostname
|
|
|
|
// from the existing node (populated by kubelet)
|
|
|
|
if !hostnameExists {
|
|
|
|
for _, addr := range node.Status.Addresses {
|
|
|
|
if addr.Type == v1.NodeHostName {
|
|
|
|
nodeAddresses = append(nodeAddresses, addr)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// If nodeIP was suggested by user, ensure that
|
|
|
|
// it can be found in the cloud as well (consistent with the behaviour in kubelet)
|
2020-08-10 17:43:49 +00:00
|
|
|
if nodeIP, ok := ensureNodeProvidedIPExists(node, nodeAddresses); ok && nodeIP == nil {
|
|
|
|
klog.Errorf("Specified Node IP not found in cloudprovider for node %q", node.Name)
|
|
|
|
return
|
2019-08-30 18:33:25 +00:00
|
|
|
}
|
|
|
|
if !nodeAddressesChangeDetected(node.Status.Addresses, nodeAddresses) {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
newNode := node.DeepCopy()
|
|
|
|
newNode.Status.Addresses = nodeAddresses
|
2020-08-10 17:43:49 +00:00
|
|
|
if _, _, err := cloudnodeutil.PatchNodeStatus(cnc.kubeClient.CoreV1(), types.NodeName(node.Name), node, newNode); err != nil {
|
2019-08-30 18:33:25 +00:00
|
|
|
klog.Errorf("Error patching node with cloud ip addresses = [%v]", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-26 21:07:15 +00:00
|
|
|
// nodeModifier is used to carry changes to node objects across multiple attempts to update them
|
|
|
|
// in a retry-if-conflict loop.
|
|
|
|
type nodeModifier func(*v1.Node)
|
|
|
|
|
2019-12-12 01:27:03 +00:00
|
|
|
func (cnc *CloudNodeController) UpdateCloudNode(ctx context.Context, _, newObj interface{}) {
|
2019-08-30 18:33:25 +00:00
|
|
|
node, ok := newObj.(*v1.Node)
|
|
|
|
if !ok {
|
|
|
|
utilruntime.HandleError(fmt.Errorf("unexpected object type: %v", newObj))
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
cloudTaint := getCloudTaint(node.Spec.Taints)
|
|
|
|
if cloudTaint == nil {
|
|
|
|
// The node has already been initialized so nothing to do.
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2019-12-12 01:27:03 +00:00
|
|
|
cnc.initializeNode(ctx, node)
|
2019-08-30 18:33:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// AddCloudNode handles initializing new nodes registered with the cloud taint.
|
2019-12-12 01:27:03 +00:00
|
|
|
func (cnc *CloudNodeController) AddCloudNode(ctx context.Context, obj interface{}) {
|
2019-08-30 18:33:25 +00:00
|
|
|
node := obj.(*v1.Node)
|
|
|
|
|
|
|
|
cloudTaint := getCloudTaint(node.Spec.Taints)
|
|
|
|
if cloudTaint == nil {
|
|
|
|
klog.V(2).Infof("This node %s is registered without the cloud taint. Will not process.", node.Name)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2019-12-12 01:27:03 +00:00
|
|
|
cnc.initializeNode(ctx, node)
|
2019-08-30 18:33:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// This processes nodes that were added into the cluster, and cloud initialize them if appropriate
|
2019-12-12 01:27:03 +00:00
|
|
|
func (cnc *CloudNodeController) initializeNode(ctx context.Context, node *v1.Node) {
|
2020-03-26 21:07:15 +00:00
|
|
|
klog.Infof("Initializing node %s with cloud provider", node.Name)
|
2019-08-30 18:33:25 +00:00
|
|
|
|
|
|
|
err := clientretry.RetryOnConflict(UpdateNodeSpecBackoff, func() error {
|
|
|
|
// TODO(wlan0): Move this logic to the route controller using the node taint instead of condition
|
|
|
|
// Since there are node taints, do we still need this?
|
|
|
|
// This condition marks the node as unusable until routes are initialized in the cloud provider
|
|
|
|
if cnc.cloud.ProviderName() == "gce" {
|
2019-12-12 01:27:03 +00:00
|
|
|
if err := cloudnodeutil.SetNodeCondition(cnc.kubeClient, types.NodeName(node.Name), v1.NodeCondition{
|
2019-08-30 18:33:25 +00:00
|
|
|
Type: v1.NodeNetworkUnavailable,
|
|
|
|
Status: v1.ConditionTrue,
|
|
|
|
Reason: "NoRouteCreated",
|
|
|
|
Message: "Node created without a route",
|
|
|
|
LastTransitionTime: metav1.Now(),
|
|
|
|
}); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2020-03-26 21:07:15 +00:00
|
|
|
return nil
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
utilruntime.HandleError(err)
|
|
|
|
return
|
|
|
|
}
|
2019-08-30 18:33:25 +00:00
|
|
|
|
2020-03-26 21:07:15 +00:00
|
|
|
curNode, err := cnc.kubeClient.CoreV1().Nodes().Get(context.TODO(), node.Name, metav1.GetOptions{})
|
|
|
|
if err != nil {
|
|
|
|
utilruntime.HandleError(fmt.Errorf("failed to get node %s: %v", node.Name, err))
|
|
|
|
return
|
|
|
|
}
|
2019-08-30 18:33:25 +00:00
|
|
|
|
2020-03-26 21:07:15 +00:00
|
|
|
cloudTaint := getCloudTaint(curNode.Spec.Taints)
|
|
|
|
if cloudTaint == nil {
|
|
|
|
// Node object received from event had the cloud taint but was outdated,
|
|
|
|
// the node has actually already been initialized.
|
|
|
|
return
|
|
|
|
}
|
2019-08-30 18:33:25 +00:00
|
|
|
|
2020-08-10 17:43:49 +00:00
|
|
|
// TODO: getNodeModifiersFromCloudProvider and updateNodeAddress both call cloud api to get instanceMetadata,
|
|
|
|
// get instanceMetadata and pass it to getNodeModifiersFromCloudProvider and updateNodeAddress which reduces api calls.
|
|
|
|
nodeModifiers, err := cnc.getNodeModifiersFromCloudProvider(ctx, curNode)
|
2020-03-26 21:07:15 +00:00
|
|
|
if err != nil {
|
|
|
|
utilruntime.HandleError(fmt.Errorf("failed to initialize node %s at cloudprovider: %v", node.Name, err))
|
|
|
|
return
|
|
|
|
}
|
2019-08-30 18:33:25 +00:00
|
|
|
|
2020-03-26 21:07:15 +00:00
|
|
|
nodeModifiers = append(nodeModifiers, func(n *v1.Node) {
|
|
|
|
n.Spec.Taints = excludeCloudTaint(n.Spec.Taints)
|
|
|
|
})
|
2019-08-30 18:33:25 +00:00
|
|
|
|
2020-03-26 21:07:15 +00:00
|
|
|
err = clientretry.RetryOnConflict(UpdateNodeSpecBackoff, func() error {
|
|
|
|
curNode, err := cnc.kubeClient.CoreV1().Nodes().Get(context.TODO(), node.Name, metav1.GetOptions{})
|
|
|
|
if err != nil {
|
2019-08-30 18:33:25 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2020-03-26 21:07:15 +00:00
|
|
|
for _, modify := range nodeModifiers {
|
|
|
|
modify(curNode)
|
2019-08-30 18:33:25 +00:00
|
|
|
}
|
|
|
|
|
2020-03-26 21:07:15 +00:00
|
|
|
_, err = cnc.kubeClient.CoreV1().Nodes().Update(context.TODO(), curNode, metav1.UpdateOptions{})
|
2019-08-30 18:33:25 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-03-26 21:07:15 +00:00
|
|
|
|
2019-08-30 18:33:25 +00:00
|
|
|
// After adding, call UpdateNodeAddress to set the CloudProvider provided IPAddresses
|
|
|
|
// So that users do not see any significant delay in IP addresses being filled into the node
|
2020-08-10 17:43:49 +00:00
|
|
|
cnc.updateNodeAddress(ctx, curNode)
|
2019-08-30 18:33:25 +00:00
|
|
|
|
|
|
|
klog.Infof("Successfully initialized node %s with cloud provider", node.Name)
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
utilruntime.HandleError(err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-26 21:07:15 +00:00
|
|
|
// getNodeModifiersFromCloudProvider returns a slice of nodeModifiers that update
|
|
|
|
// a node object with provider-specific information.
|
|
|
|
// All of the returned functions are idempotent, because they are used in a retry-if-conflict
|
|
|
|
// loop, meaning they could get called multiple times.
|
2020-08-10 17:43:49 +00:00
|
|
|
func (cnc *CloudNodeController) getNodeModifiersFromCloudProvider(ctx context.Context, node *v1.Node) ([]nodeModifier, error) {
|
2020-05-26 22:59:35 +00:00
|
|
|
var (
|
|
|
|
nodeModifiers []nodeModifier
|
|
|
|
providerID string
|
|
|
|
err error
|
|
|
|
)
|
2020-03-26 21:07:15 +00:00
|
|
|
|
2020-11-14 08:06:46 +00:00
|
|
|
// skip the provider ID check for InstancesV2, GetInstanceProviderID only builds a ProviderID for Instances
|
|
|
|
// we will set up providerID for InstancesV2 if instanceMeta has non-empty providerID
|
|
|
|
_, instancesV2Enabled := cnc.cloud.InstancesV2()
|
|
|
|
if node.Spec.ProviderID == "" && !instancesV2Enabled {
|
2020-05-26 22:59:35 +00:00
|
|
|
providerID, err = cloudprovider.GetInstanceProviderID(ctx, cnc.cloud, types.NodeName(node.Name))
|
2020-03-26 21:07:15 +00:00
|
|
|
if err == nil {
|
|
|
|
nodeModifiers = append(nodeModifiers, func(n *v1.Node) {
|
|
|
|
if n.Spec.ProviderID == "" {
|
|
|
|
n.Spec.ProviderID = providerID
|
|
|
|
}
|
|
|
|
})
|
|
|
|
} else if err == cloudprovider.NotImplemented {
|
|
|
|
// if the cloud provider being used does not support provider IDs,
|
|
|
|
// we can safely continue since we will attempt to set node
|
|
|
|
// addresses given the node name in getNodeAddressesByProviderIDOrName
|
|
|
|
klog.Warningf("cloud provider does not set node provider ID, using node name to discover node %s", node.Name)
|
|
|
|
} else {
|
|
|
|
// if the cloud provider being used supports provider IDs, we want
|
|
|
|
// to propagate the error so that we re-try in the future; if we
|
|
|
|
// do not, the taint will be removed, and this will not be retried
|
|
|
|
return nil, err
|
|
|
|
}
|
2020-05-26 22:59:35 +00:00
|
|
|
} else {
|
|
|
|
providerID = node.Spec.ProviderID
|
2020-03-26 21:07:15 +00:00
|
|
|
}
|
|
|
|
|
2020-08-10 17:43:49 +00:00
|
|
|
instanceMetadataGetter := func(providerID string, nodeName string, node *v1.Node) (*cloudprovider.InstanceMetadata, error) {
|
|
|
|
if instancesV2, ok := cnc.cloud.InstancesV2(); instancesV2 != nil && ok {
|
|
|
|
return instancesV2.InstanceMetadata(ctx, node)
|
|
|
|
}
|
|
|
|
|
|
|
|
// If InstancesV2 not implement, use Instances.
|
|
|
|
instances, ok := cnc.cloud.Instances()
|
|
|
|
if !ok {
|
|
|
|
return nil, fmt.Errorf("failed to get instances from cloud provider")
|
|
|
|
}
|
|
|
|
nodeAddresses, err := getNodeAddressesByProviderIDOrName(ctx, instances, providerID, nodeName)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
instanceType, err := getInstanceTypeByProviderIDOrName(ctx, instances, providerID, nodeName)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return &cloudprovider.InstanceMetadata{
|
|
|
|
InstanceType: instanceType,
|
|
|
|
NodeAddresses: nodeAddresses,
|
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
instanceMeta, err := instanceMetadataGetter(providerID, node.Name, node)
|
2020-03-26 21:07:15 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2020-11-14 08:06:46 +00:00
|
|
|
if node.Spec.ProviderID == "" && instanceMeta.ProviderID != "" {
|
|
|
|
nodeModifiers = append(nodeModifiers, func(n *v1.Node) { n.Spec.ProviderID = instanceMeta.ProviderID })
|
|
|
|
}
|
|
|
|
|
2020-03-26 21:07:15 +00:00
|
|
|
// If user provided an IP address, ensure that IP address is found
|
|
|
|
// in the cloud provider before removing the taint on the node
|
2020-08-10 17:43:49 +00:00
|
|
|
if nodeIP, ok := ensureNodeProvidedIPExists(node, instanceMeta.NodeAddresses); ok && nodeIP == nil {
|
|
|
|
return nil, errors.New("failed to find kubelet node IP from cloud provider")
|
2020-03-26 21:07:15 +00:00
|
|
|
}
|
|
|
|
|
2020-08-10 17:43:49 +00:00
|
|
|
if instanceMeta.InstanceType != "" {
|
|
|
|
klog.V(2).Infof("Adding node label from cloud provider: %s=%s", v1.LabelInstanceType, instanceMeta.InstanceType)
|
|
|
|
klog.V(2).Infof("Adding node label from cloud provider: %s=%s", v1.LabelInstanceTypeStable, instanceMeta.InstanceType)
|
2020-03-26 21:07:15 +00:00
|
|
|
nodeModifiers = append(nodeModifiers, func(n *v1.Node) {
|
|
|
|
if n.Labels == nil {
|
|
|
|
n.Labels = map[string]string{}
|
|
|
|
}
|
2020-08-10 17:43:49 +00:00
|
|
|
n.Labels[v1.LabelInstanceType] = instanceMeta.InstanceType
|
|
|
|
n.Labels[v1.LabelInstanceTypeStable] = instanceMeta.InstanceType
|
2020-03-26 21:07:15 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
if zones, ok := cnc.cloud.Zones(); ok {
|
2020-05-26 22:59:35 +00:00
|
|
|
zone, err := getZoneByProviderIDOrName(ctx, zones, providerID, node.Name)
|
2020-03-26 21:07:15 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to get zone from cloud provider: %v", err)
|
|
|
|
}
|
|
|
|
if zone.FailureDomain != "" {
|
|
|
|
klog.V(2).Infof("Adding node label from cloud provider: %s=%s", v1.LabelZoneFailureDomain, zone.FailureDomain)
|
|
|
|
klog.V(2).Infof("Adding node label from cloud provider: %s=%s", v1.LabelZoneFailureDomainStable, zone.FailureDomain)
|
|
|
|
nodeModifiers = append(nodeModifiers, func(n *v1.Node) {
|
|
|
|
if n.Labels == nil {
|
|
|
|
n.Labels = map[string]string{}
|
|
|
|
}
|
|
|
|
n.Labels[v1.LabelZoneFailureDomain] = zone.FailureDomain
|
|
|
|
n.Labels[v1.LabelZoneFailureDomainStable] = zone.FailureDomain
|
|
|
|
})
|
|
|
|
}
|
|
|
|
if zone.Region != "" {
|
|
|
|
klog.V(2).Infof("Adding node label from cloud provider: %s=%s", v1.LabelZoneRegion, zone.Region)
|
|
|
|
klog.V(2).Infof("Adding node label from cloud provider: %s=%s", v1.LabelZoneRegionStable, zone.Region)
|
|
|
|
nodeModifiers = append(nodeModifiers, func(n *v1.Node) {
|
|
|
|
if n.Labels == nil {
|
|
|
|
n.Labels = map[string]string{}
|
|
|
|
}
|
|
|
|
n.Labels[v1.LabelZoneRegion] = zone.Region
|
|
|
|
n.Labels[v1.LabelZoneRegionStable] = zone.Region
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nodeModifiers, nil
|
|
|
|
}
|
|
|
|
|
2019-08-30 18:33:25 +00:00
|
|
|
func getCloudTaint(taints []v1.Taint) *v1.Taint {
|
|
|
|
for _, taint := range taints {
|
2020-03-26 21:07:15 +00:00
|
|
|
if taint.Key == cloudproviderapi.TaintExternalCloudProvider {
|
2019-08-30 18:33:25 +00:00
|
|
|
return &taint
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func excludeCloudTaint(taints []v1.Taint) []v1.Taint {
|
|
|
|
newTaints := []v1.Taint{}
|
|
|
|
for _, taint := range taints {
|
2020-03-26 21:07:15 +00:00
|
|
|
if taint.Key == cloudproviderapi.TaintExternalCloudProvider {
|
2019-08-30 18:33:25 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
newTaints = append(newTaints, taint)
|
|
|
|
}
|
|
|
|
return newTaints
|
|
|
|
}
|
|
|
|
|
|
|
|
// ensureNodeExistsByProviderID checks if the instance exists by the provider id,
|
|
|
|
// If provider id in spec is empty it calls instanceId with node name to get provider id
|
2019-12-12 01:27:03 +00:00
|
|
|
func ensureNodeExistsByProviderID(ctx context.Context, instances cloudprovider.Instances, node *v1.Node) (bool, error) {
|
2019-08-30 18:33:25 +00:00
|
|
|
providerID := node.Spec.ProviderID
|
|
|
|
if providerID == "" {
|
|
|
|
var err error
|
2019-12-12 01:27:03 +00:00
|
|
|
providerID, err = instances.InstanceID(ctx, types.NodeName(node.Name))
|
2019-08-30 18:33:25 +00:00
|
|
|
if err != nil {
|
|
|
|
if err == cloudprovider.InstanceNotFound {
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if providerID == "" {
|
|
|
|
klog.Warningf("Cannot find valid providerID for node name %q, assuming non existence", node.Name)
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-12-12 01:27:03 +00:00
|
|
|
return instances.InstanceExistsByProviderID(ctx, providerID)
|
2019-08-30 18:33:25 +00:00
|
|
|
}
|
|
|
|
|
2020-05-26 22:59:35 +00:00
|
|
|
func getNodeAddressesByProviderIDOrName(ctx context.Context, instances cloudprovider.Instances, providerID, nodeName string) ([]v1.NodeAddress, error) {
|
|
|
|
nodeAddresses, err := instances.NodeAddressesByProviderID(ctx, providerID)
|
2019-08-30 18:33:25 +00:00
|
|
|
if err != nil {
|
|
|
|
providerIDErr := err
|
2020-05-26 22:59:35 +00:00
|
|
|
nodeAddresses, err = instances.NodeAddresses(ctx, types.NodeName(nodeName))
|
2019-08-30 18:33:25 +00:00
|
|
|
if err != nil {
|
2019-12-12 01:27:03 +00:00
|
|
|
return nil, fmt.Errorf("error fetching node by provider ID: %v, and error by node name: %v", providerIDErr, err)
|
2019-08-30 18:33:25 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return nodeAddresses, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func nodeAddressesChangeDetected(addressSet1, addressSet2 []v1.NodeAddress) bool {
|
|
|
|
if len(addressSet1) != len(addressSet2) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
addressMap1 := map[v1.NodeAddressType]string{}
|
|
|
|
|
|
|
|
for i := range addressSet1 {
|
|
|
|
addressMap1[addressSet1[i].Type] = addressSet1[i].Address
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, v := range addressSet2 {
|
|
|
|
if addressMap1[v.Type] != v.Address {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
func ensureNodeProvidedIPExists(node *v1.Node, nodeAddresses []v1.NodeAddress) (*v1.NodeAddress, bool) {
|
|
|
|
var nodeIP *v1.NodeAddress
|
|
|
|
nodeIPExists := false
|
2020-08-10 17:43:49 +00:00
|
|
|
if providedIP, ok := node.ObjectMeta.Annotations[cloudproviderapi.AnnotationAlphaProvidedIPAddr]; ok {
|
2019-08-30 18:33:25 +00:00
|
|
|
nodeIPExists = true
|
|
|
|
for i := range nodeAddresses {
|
|
|
|
if nodeAddresses[i].Address == providedIP {
|
|
|
|
nodeIP = &nodeAddresses[i]
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nodeIP, nodeIPExists
|
|
|
|
}
|
|
|
|
|
2020-05-26 22:59:35 +00:00
|
|
|
// getInstanceTypeByProviderIDOrName will attempt to get the instance type of node using its providerID
|
|
|
|
// then it's name. If both attempts fail, an error is returned.
|
|
|
|
func getInstanceTypeByProviderIDOrName(ctx context.Context, instances cloudprovider.Instances, providerID, nodeName string) (string, error) {
|
|
|
|
instanceType, err := instances.InstanceTypeByProviderID(ctx, providerID)
|
2019-08-30 18:33:25 +00:00
|
|
|
if err != nil {
|
|
|
|
providerIDErr := err
|
2020-05-26 22:59:35 +00:00
|
|
|
instanceType, err = instances.InstanceType(ctx, types.NodeName(nodeName))
|
2019-08-30 18:33:25 +00:00
|
|
|
if err != nil {
|
|
|
|
return "", fmt.Errorf("InstanceType: Error fetching by providerID: %v Error fetching by NodeName: %v", providerIDErr, err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return instanceType, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// getZoneByProviderIDorName will attempt to get the zone of node using its providerID
|
2020-05-26 22:59:35 +00:00
|
|
|
// then it's name. If both attempts fail, an error is returned.
|
|
|
|
func getZoneByProviderIDOrName(ctx context.Context, zones cloudprovider.Zones, providerID, nodeName string) (cloudprovider.Zone, error) {
|
|
|
|
zone, err := zones.GetZoneByProviderID(ctx, providerID)
|
2019-08-30 18:33:25 +00:00
|
|
|
if err != nil {
|
|
|
|
providerIDErr := err
|
2020-05-26 22:59:35 +00:00
|
|
|
zone, err = zones.GetZoneByNodeName(ctx, types.NodeName(nodeName))
|
2019-08-30 18:33:25 +00:00
|
|
|
if err != nil {
|
|
|
|
return cloudprovider.Zone{}, fmt.Errorf("Zone: Error fetching by providerID: %v Error fetching by NodeName: %v", providerIDErr, err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return zone, nil
|
|
|
|
}
|