mirror of https://github.com/k3s-io/k3s
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
810 lines
25 KiB
810 lines
25 KiB
// +build !providerless
|
|
|
|
/*
|
|
Copyright 2017 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package gce
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net"
|
|
"net/http"
|
|
"strings"
|
|
"time"
|
|
|
|
"cloud.google.com/go/compute/metadata"
|
|
computebeta "google.golang.org/api/compute/v0.beta"
|
|
compute "google.golang.org/api/compute/v1"
|
|
"k8s.io/klog/v2"
|
|
|
|
"github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud"
|
|
"github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/filter"
|
|
"github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/meta"
|
|
"k8s.io/api/core/v1"
|
|
"k8s.io/apimachinery/pkg/types"
|
|
"k8s.io/apimachinery/pkg/util/sets"
|
|
"k8s.io/apimachinery/pkg/util/wait"
|
|
cloudprovider "k8s.io/cloud-provider"
|
|
)
|
|
|
|
const (
|
|
defaultZone = ""
|
|
networkInterfaceIP = "instance/network-interfaces/%s/ip"
|
|
networkInterfaceAccessConfigs = "instance/network-interfaces/%s/access-configs"
|
|
networkInterfaceExternalIP = "instance/network-interfaces/%s/access-configs/%s/external-ip"
|
|
)
|
|
|
|
func newInstancesMetricContext(request, zone string) *metricContext {
|
|
return newGenericMetricContext("instances", request, unusedMetricLabel, zone, computeV1Version)
|
|
}
|
|
|
|
func splitNodesByZone(nodes []*v1.Node) map[string][]*v1.Node {
|
|
zones := make(map[string][]*v1.Node)
|
|
for _, n := range nodes {
|
|
z := getZone(n)
|
|
if z != defaultZone {
|
|
zones[z] = append(zones[z], n)
|
|
}
|
|
}
|
|
return zones
|
|
}
|
|
|
|
func getZone(n *v1.Node) string {
|
|
zone, ok := n.Labels[v1.LabelFailureDomainBetaZone]
|
|
if !ok {
|
|
return defaultZone
|
|
}
|
|
return zone
|
|
}
|
|
|
|
func makeHostURL(projectsAPIEndpoint, projectID, zone, host string) string {
|
|
host = canonicalizeInstanceName(host)
|
|
return projectsAPIEndpoint + strings.Join([]string{projectID, "zones", zone, "instances", host}, "/")
|
|
}
|
|
|
|
// ToInstanceReferences returns instance references by links
|
|
func (g *Cloud) ToInstanceReferences(zone string, instanceNames []string) (refs []*compute.InstanceReference) {
|
|
for _, ins := range instanceNames {
|
|
instanceLink := makeHostURL(g.service.BasePath, g.projectID, zone, ins)
|
|
refs = append(refs, &compute.InstanceReference{Instance: instanceLink})
|
|
}
|
|
return refs
|
|
}
|
|
|
|
// NodeAddresses is an implementation of Instances.NodeAddresses.
|
|
func (g *Cloud) NodeAddresses(ctx context.Context, nodeName types.NodeName) ([]v1.NodeAddress, error) {
|
|
timeoutCtx, cancel := context.WithTimeout(ctx, 1*time.Hour)
|
|
defer cancel()
|
|
|
|
instanceName := string(nodeName)
|
|
|
|
if g.useMetadataServer {
|
|
// Use metadata server if possible
|
|
if g.isCurrentInstance(instanceName) {
|
|
|
|
nics, err := metadata.Get("instance/network-interfaces/")
|
|
if err != nil {
|
|
return nil, fmt.Errorf("couldn't get network interfaces: %v", err)
|
|
}
|
|
|
|
nicsArr := strings.Split(nics, "/\n")
|
|
nodeAddresses := []v1.NodeAddress{}
|
|
|
|
for _, nic := range nicsArr {
|
|
|
|
if nic == "" {
|
|
continue
|
|
}
|
|
|
|
internalIP, err := metadata.Get(fmt.Sprintf(networkInterfaceIP, nic))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("couldn't get internal IP: %v", err)
|
|
}
|
|
nodeAddresses = append(nodeAddresses, v1.NodeAddress{Type: v1.NodeInternalIP, Address: internalIP})
|
|
|
|
acs, err := metadata.Get(fmt.Sprintf(networkInterfaceAccessConfigs, nic))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("couldn't get access configs: %v", err)
|
|
}
|
|
|
|
acsArr := strings.Split(acs, "/\n")
|
|
|
|
for _, ac := range acsArr {
|
|
|
|
if ac == "" {
|
|
continue
|
|
}
|
|
|
|
externalIP, err := metadata.Get(fmt.Sprintf(networkInterfaceExternalIP, nic, ac))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("couldn't get external IP: %v", err)
|
|
}
|
|
|
|
if externalIP != "" {
|
|
nodeAddresses = append(nodeAddresses, v1.NodeAddress{Type: v1.NodeExternalIP, Address: externalIP})
|
|
}
|
|
}
|
|
}
|
|
|
|
internalDNSFull, err := metadata.Get("instance/hostname")
|
|
if err != nil {
|
|
klog.Warningf("couldn't get full internal DNS name: %v", err)
|
|
} else {
|
|
nodeAddresses = append(nodeAddresses,
|
|
v1.NodeAddress{Type: v1.NodeInternalDNS, Address: internalDNSFull},
|
|
v1.NodeAddress{Type: v1.NodeHostName, Address: internalDNSFull},
|
|
)
|
|
}
|
|
return nodeAddresses, nil
|
|
}
|
|
}
|
|
|
|
// Use GCE API
|
|
instanceObj, err := g.getInstanceByName(instanceName)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("couldn't get instance details: %v", err)
|
|
}
|
|
|
|
instance, err := g.c.Instances().Get(timeoutCtx, meta.ZonalKey(canonicalizeInstanceName(instanceObj.Name), instanceObj.Zone))
|
|
if err != nil {
|
|
return []v1.NodeAddress{}, fmt.Errorf("error while querying for instance: %v", err)
|
|
}
|
|
|
|
return nodeAddressesFromInstance(instance)
|
|
}
|
|
|
|
// NodeAddressesByProviderID will not be called from the node that is requesting this ID.
|
|
// i.e. metadata service and other local methods cannot be used here
|
|
func (g *Cloud) NodeAddressesByProviderID(ctx context.Context, providerID string) ([]v1.NodeAddress, error) {
|
|
timeoutCtx, cancel := context.WithTimeout(ctx, 1*time.Hour)
|
|
defer cancel()
|
|
|
|
_, zone, name, err := splitProviderID(providerID)
|
|
if err != nil {
|
|
return []v1.NodeAddress{}, err
|
|
}
|
|
|
|
instance, err := g.c.Instances().Get(timeoutCtx, meta.ZonalKey(canonicalizeInstanceName(name), zone))
|
|
if err != nil {
|
|
return []v1.NodeAddress{}, fmt.Errorf("error while querying for providerID %q: %v", providerID, err)
|
|
}
|
|
|
|
return nodeAddressesFromInstance(instance)
|
|
}
|
|
|
|
// instanceByProviderID returns the cloudprovider instance of the node
|
|
// with the specified unique providerID
|
|
func (g *Cloud) instanceByProviderID(providerID string) (*gceInstance, error) {
|
|
project, zone, name, err := splitProviderID(providerID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
instance, err := g.getInstanceFromProjectInZoneByName(project, zone, name)
|
|
if err != nil {
|
|
if isHTTPErrorCode(err, http.StatusNotFound) {
|
|
return nil, cloudprovider.InstanceNotFound
|
|
}
|
|
return nil, err
|
|
}
|
|
|
|
return instance, nil
|
|
}
|
|
|
|
// InstanceShutdownByProviderID returns true if the instance is in safe state to detach volumes
|
|
func (g *Cloud) InstanceShutdownByProviderID(ctx context.Context, providerID string) (bool, error) {
|
|
return false, cloudprovider.NotImplemented
|
|
}
|
|
|
|
// InstanceShutdown returns true if the instance is in safe state to detach volumes
|
|
func (g *Cloud) InstanceShutdown(ctx context.Context, node *v1.Node) (bool, error) {
|
|
return false, cloudprovider.NotImplemented
|
|
}
|
|
|
|
func nodeAddressesFromInstance(instance *compute.Instance) ([]v1.NodeAddress, error) {
|
|
if len(instance.NetworkInterfaces) < 1 {
|
|
return nil, fmt.Errorf("could not find network interfaces for instanceID %q", instance.Id)
|
|
}
|
|
nodeAddresses := []v1.NodeAddress{}
|
|
|
|
for _, nic := range instance.NetworkInterfaces {
|
|
nodeAddresses = append(nodeAddresses, v1.NodeAddress{Type: v1.NodeInternalIP, Address: nic.NetworkIP})
|
|
for _, config := range nic.AccessConfigs {
|
|
nodeAddresses = append(nodeAddresses, v1.NodeAddress{Type: v1.NodeExternalIP, Address: config.NatIP})
|
|
}
|
|
}
|
|
|
|
return nodeAddresses, nil
|
|
}
|
|
|
|
// InstanceTypeByProviderID returns the cloudprovider instance type of the node
|
|
// with the specified unique providerID This method will not be called from the
|
|
// node that is requesting this ID. i.e. metadata service and other local
|
|
// methods cannot be used here
|
|
func (g *Cloud) InstanceTypeByProviderID(ctx context.Context, providerID string) (string, error) {
|
|
instance, err := g.instanceByProviderID(providerID)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
return instance.Type, nil
|
|
}
|
|
|
|
// InstanceExistsByProviderID returns true if the instance with the given provider id still exists and is running.
|
|
// If false is returned with no error, the instance will be immediately deleted by the cloud controller manager.
|
|
func (g *Cloud) InstanceExistsByProviderID(ctx context.Context, providerID string) (bool, error) {
|
|
_, err := g.instanceByProviderID(providerID)
|
|
if err != nil {
|
|
if err == cloudprovider.InstanceNotFound {
|
|
return false, nil
|
|
}
|
|
return false, err
|
|
}
|
|
|
|
return true, nil
|
|
}
|
|
|
|
// InstanceExists returns true if the instance with the given provider id still exists and is running.
|
|
// If false is returned with no error, the instance will be immediately deleted by the cloud controller manager.
|
|
func (g *Cloud) InstanceExists(ctx context.Context, node *v1.Node) (bool, error) {
|
|
providerID := node.Spec.ProviderID
|
|
if providerID == "" {
|
|
var err error
|
|
if providerID, err = cloudprovider.GetInstanceProviderID(ctx, g, types.NodeName(node.Name)); err != nil {
|
|
if err == cloudprovider.InstanceNotFound {
|
|
return false, nil
|
|
}
|
|
return false, err
|
|
}
|
|
}
|
|
return g.InstanceExistsByProviderID(ctx, providerID)
|
|
}
|
|
|
|
// InstanceMetadata returns metadata of the specified instance.
|
|
func (g *Cloud) InstanceMetadata(ctx context.Context, node *v1.Node) (*cloudprovider.InstanceMetadata, error) {
|
|
timeoutCtx, cancel := context.WithTimeout(ctx, 1*time.Hour)
|
|
defer cancel()
|
|
|
|
providerID := node.Spec.ProviderID
|
|
if providerID == "" {
|
|
var err error
|
|
if providerID, err = cloudprovider.GetInstanceProviderID(ctx, g, types.NodeName(node.Name)); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
_, zone, name, err := splitProviderID(providerID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
region, err := GetGCERegion(zone)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
instance, err := g.c.Instances().Get(timeoutCtx, meta.ZonalKey(canonicalizeInstanceName(name), zone))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error while querying for providerID %q: %v", providerID, err)
|
|
}
|
|
|
|
addresses, err := nodeAddressesFromInstance(instance)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &cloudprovider.InstanceMetadata{
|
|
ProviderID: providerID,
|
|
InstanceType: lastComponent(instance.MachineType),
|
|
NodeAddresses: addresses,
|
|
Zone: zone,
|
|
Region: region,
|
|
}, nil
|
|
}
|
|
|
|
// InstanceID returns the cloud provider ID of the node with the specified NodeName.
|
|
func (g *Cloud) InstanceID(ctx context.Context, nodeName types.NodeName) (string, error) {
|
|
instanceName := mapNodeNameToInstanceName(nodeName)
|
|
if g.useMetadataServer {
|
|
// Use metadata, if possible, to fetch ID. See issue #12000
|
|
if g.isCurrentInstance(instanceName) {
|
|
projectID, zone, err := getProjectAndZone()
|
|
if err == nil {
|
|
return projectID + "/" + zone + "/" + canonicalizeInstanceName(instanceName), nil
|
|
}
|
|
}
|
|
}
|
|
instance, err := g.getInstanceByName(instanceName)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return g.projectID + "/" + instance.Zone + "/" + instance.Name, nil
|
|
}
|
|
|
|
// InstanceType returns the type of the specified node with the specified NodeName.
|
|
func (g *Cloud) InstanceType(ctx context.Context, nodeName types.NodeName) (string, error) {
|
|
instanceName := mapNodeNameToInstanceName(nodeName)
|
|
if g.useMetadataServer {
|
|
// Use metadata, if possible, to fetch ID. See issue #12000
|
|
if g.isCurrentInstance(instanceName) {
|
|
mType, err := getCurrentMachineTypeViaMetadata()
|
|
if err == nil {
|
|
return mType, nil
|
|
}
|
|
}
|
|
}
|
|
instance, err := g.getInstanceByName(instanceName)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return instance.Type, nil
|
|
}
|
|
|
|
// AddSSHKeyToAllInstances adds an SSH public key as a legal identity for all instances
|
|
// expected format for the key is standard ssh-keygen format: <protocol> <blob>
|
|
func (g *Cloud) AddSSHKeyToAllInstances(ctx context.Context, user string, keyData []byte) error {
|
|
ctx, cancel := context.WithTimeout(ctx, 1*time.Hour)
|
|
defer cancel()
|
|
|
|
return wait.Poll(2*time.Second, 30*time.Second, func() (bool, error) {
|
|
project, err := g.c.Projects().Get(ctx, g.projectID)
|
|
if err != nil {
|
|
klog.Errorf("Could not get project: %v", err)
|
|
return false, nil
|
|
}
|
|
keyString := fmt.Sprintf("%s:%s %s@%s", user, strings.TrimSpace(string(keyData)), user, user)
|
|
found := false
|
|
for _, item := range project.CommonInstanceMetadata.Items {
|
|
if item.Key == "sshKeys" {
|
|
if strings.Contains(*item.Value, keyString) {
|
|
// We've already added the key
|
|
klog.Info("SSHKey already in project metadata")
|
|
return true, nil
|
|
}
|
|
value := *item.Value + "\n" + keyString
|
|
item.Value = &value
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
if !found {
|
|
// This is super unlikely, so log.
|
|
klog.Infof("Failed to find sshKeys metadata, creating a new item")
|
|
project.CommonInstanceMetadata.Items = append(project.CommonInstanceMetadata.Items,
|
|
&compute.MetadataItems{
|
|
Key: "sshKeys",
|
|
Value: &keyString,
|
|
})
|
|
}
|
|
|
|
mc := newInstancesMetricContext("add_ssh_key", "")
|
|
err = g.c.Projects().SetCommonInstanceMetadata(ctx, g.projectID, project.CommonInstanceMetadata)
|
|
mc.Observe(err)
|
|
|
|
if err != nil {
|
|
klog.Errorf("Could not Set Metadata: %v", err)
|
|
return false, nil
|
|
}
|
|
klog.Infof("Successfully added sshKey to project metadata")
|
|
return true, nil
|
|
})
|
|
}
|
|
|
|
// GetAllCurrentZones returns all the zones in which k8s nodes are currently running
|
|
func (g *Cloud) GetAllCurrentZones() (sets.String, error) {
|
|
if g.nodeInformerSynced == nil {
|
|
klog.Warningf("Cloud object does not have informers set, should only happen in E2E binary.")
|
|
return g.GetAllZonesFromCloudProvider()
|
|
}
|
|
g.nodeZonesLock.Lock()
|
|
defer g.nodeZonesLock.Unlock()
|
|
if !g.nodeInformerSynced() {
|
|
return nil, fmt.Errorf("node informer is not synced when trying to GetAllCurrentZones")
|
|
}
|
|
zones := sets.NewString()
|
|
for zone, nodes := range g.nodeZones {
|
|
if len(nodes) > 0 {
|
|
zones.Insert(zone)
|
|
}
|
|
}
|
|
return zones, nil
|
|
}
|
|
|
|
// GetAllZonesFromCloudProvider returns all the zones in which nodes are running
|
|
// Only use this in E2E tests to get zones, on real clusters this will
|
|
// get all zones with compute instances in them even if not k8s instances!!!
|
|
// ex. I have k8s nodes in us-central1-c and us-central1-b. I also have
|
|
// a non-k8s compute in us-central1-a. This func will return a,b, and c.
|
|
//
|
|
// TODO: this should be removed from the cloud provider.
|
|
func (g *Cloud) GetAllZonesFromCloudProvider() (sets.String, error) {
|
|
ctx, cancel := cloud.ContextWithCallTimeout()
|
|
defer cancel()
|
|
|
|
zones := sets.NewString()
|
|
for _, zone := range g.managedZones {
|
|
instances, err := g.c.Instances().List(ctx, zone, filter.None)
|
|
if err != nil {
|
|
return sets.NewString(), err
|
|
}
|
|
if len(instances) > 0 {
|
|
zones.Insert(zone)
|
|
}
|
|
}
|
|
return zones, nil
|
|
}
|
|
|
|
// InsertInstance creates a new instance on GCP
|
|
func (g *Cloud) InsertInstance(project string, zone string, i *compute.Instance) error {
|
|
ctx, cancel := cloud.ContextWithCallTimeout()
|
|
defer cancel()
|
|
|
|
mc := newInstancesMetricContext("create", zone)
|
|
return mc.Observe(g.c.Instances().Insert(ctx, meta.ZonalKey(i.Name, zone), i))
|
|
}
|
|
|
|
// ListInstanceNames returns a string of instance names separated by spaces.
|
|
// This method should only be used for e2e testing.
|
|
// TODO: remove this method.
|
|
func (g *Cloud) ListInstanceNames(project, zone string) (string, error) {
|
|
ctx, cancel := cloud.ContextWithCallTimeout()
|
|
defer cancel()
|
|
|
|
l, err := g.c.Instances().List(ctx, zone, filter.None)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
var names []string
|
|
for _, i := range l {
|
|
names = append(names, i.Name)
|
|
}
|
|
return strings.Join(names, " "), nil
|
|
}
|
|
|
|
// DeleteInstance deletes an instance specified by project, zone, and name
|
|
func (g *Cloud) DeleteInstance(project, zone, name string) error {
|
|
ctx, cancel := cloud.ContextWithCallTimeout()
|
|
defer cancel()
|
|
|
|
return g.c.Instances().Delete(ctx, meta.ZonalKey(name, zone))
|
|
}
|
|
|
|
// CurrentNodeName returns the name of the node we are currently running on
|
|
// On most clouds (e.g. GCE) this is the hostname, so we provide the hostname
|
|
func (g *Cloud) CurrentNodeName(ctx context.Context, hostname string) (types.NodeName, error) {
|
|
return types.NodeName(hostname), nil
|
|
}
|
|
|
|
// AliasRangesByProviderID returns a list of CIDR ranges that are assigned to the
|
|
// `node` for allocation to pods. Returns a list of the form
|
|
// "<ip>/<netmask>".
|
|
func (g *Cloud) AliasRangesByProviderID(providerID string) (cidrs []string, err error) {
|
|
ctx, cancel := cloud.ContextWithCallTimeout()
|
|
defer cancel()
|
|
|
|
_, zone, name, err := splitProviderID(providerID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var res *computebeta.Instance
|
|
res, err = g.c.BetaInstances().Get(ctx, meta.ZonalKey(canonicalizeInstanceName(name), zone))
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
for _, networkInterface := range res.NetworkInterfaces {
|
|
for _, r := range networkInterface.AliasIpRanges {
|
|
cidrs = append(cidrs, r.IpCidrRange)
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// AddAliasToInstanceByProviderID adds an alias to the given instance from the named
|
|
// secondary range.
|
|
func (g *Cloud) AddAliasToInstanceByProviderID(providerID string, alias *net.IPNet) error {
|
|
ctx, cancel := cloud.ContextWithCallTimeout()
|
|
defer cancel()
|
|
|
|
_, zone, name, err := splitProviderID(providerID)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
instance, err := g.c.BetaInstances().Get(ctx, meta.ZonalKey(canonicalizeInstanceName(name), zone))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
switch len(instance.NetworkInterfaces) {
|
|
case 0:
|
|
return fmt.Errorf("instance %q has no network interfaces", providerID)
|
|
case 1:
|
|
default:
|
|
klog.Warningf("Instance %q has more than one network interface, using only the first (%v)",
|
|
providerID, instance.NetworkInterfaces)
|
|
}
|
|
|
|
iface := &computebeta.NetworkInterface{}
|
|
iface.Name = instance.NetworkInterfaces[0].Name
|
|
iface.Fingerprint = instance.NetworkInterfaces[0].Fingerprint
|
|
iface.AliasIpRanges = append(iface.AliasIpRanges, &computebeta.AliasIpRange{
|
|
IpCidrRange: alias.String(),
|
|
SubnetworkRangeName: g.secondaryRangeName,
|
|
})
|
|
|
|
mc := newInstancesMetricContext("add_alias", zone)
|
|
err = g.c.BetaInstances().UpdateNetworkInterface(ctx, meta.ZonalKey(instance.Name, lastComponent(instance.Zone)), iface.Name, iface)
|
|
return mc.Observe(err)
|
|
}
|
|
|
|
// Gets the named instances, returning cloudprovider.InstanceNotFound if any
|
|
// instance is not found
|
|
func (g *Cloud) getInstancesByNames(names []string) ([]*gceInstance, error) {
|
|
foundInstances, err := g.getFoundInstanceByNames(names)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if len(foundInstances) != len(names) {
|
|
if len(foundInstances) == 0 {
|
|
// return error so the TargetPool nodecount does not drop to 0 unexpectedly.
|
|
return nil, cloudprovider.InstanceNotFound
|
|
}
|
|
klog.Warningf("getFoundInstanceByNames - input instances %d, found %d. Continuing LoadBalancer Update", len(names), len(foundInstances))
|
|
}
|
|
return foundInstances, nil
|
|
}
|
|
|
|
// Gets the named instances, returning a list of gceInstances it was able to find from the provided
|
|
// list of names.
|
|
func (g *Cloud) getFoundInstanceByNames(names []string) ([]*gceInstance, error) {
|
|
ctx, cancel := cloud.ContextWithCallTimeout()
|
|
defer cancel()
|
|
|
|
found := map[string]*gceInstance{}
|
|
remaining := len(names)
|
|
|
|
nodeInstancePrefix := g.nodeInstancePrefix
|
|
for _, name := range names {
|
|
name = canonicalizeInstanceName(name)
|
|
if !strings.HasPrefix(name, g.nodeInstancePrefix) {
|
|
klog.Warningf("Instance %q does not conform to prefix %q, removing filter", name, g.nodeInstancePrefix)
|
|
nodeInstancePrefix = ""
|
|
}
|
|
found[name] = nil
|
|
}
|
|
|
|
for _, zone := range g.managedZones {
|
|
if remaining == 0 {
|
|
break
|
|
}
|
|
instances, err := g.c.Instances().List(ctx, zone, filter.Regexp("name", nodeInstancePrefix+".*"))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
for _, inst := range instances {
|
|
if remaining == 0 {
|
|
break
|
|
}
|
|
if _, ok := found[inst.Name]; !ok {
|
|
continue
|
|
}
|
|
if found[inst.Name] != nil {
|
|
klog.Errorf("Instance name %q was duplicated (in zone %q and %q)", inst.Name, zone, found[inst.Name].Zone)
|
|
continue
|
|
}
|
|
found[inst.Name] = &gceInstance{
|
|
Zone: zone,
|
|
Name: inst.Name,
|
|
ID: inst.Id,
|
|
Disks: inst.Disks,
|
|
Type: lastComponent(inst.MachineType),
|
|
}
|
|
remaining--
|
|
}
|
|
}
|
|
|
|
var ret []*gceInstance
|
|
var failed []string
|
|
for name, instance := range found {
|
|
if instance != nil {
|
|
ret = append(ret, instance)
|
|
} else {
|
|
failed = append(failed, name)
|
|
}
|
|
}
|
|
if len(failed) > 0 {
|
|
klog.Errorf("Failed to retrieve instances: %v", failed)
|
|
}
|
|
|
|
return ret, nil
|
|
}
|
|
|
|
// Gets the named instance, returning cloudprovider.InstanceNotFound if the instance is not found
|
|
func (g *Cloud) getInstanceByName(name string) (*gceInstance, error) {
|
|
// Avoid changing behaviour when not managing multiple zones
|
|
for _, zone := range g.managedZones {
|
|
instance, err := g.getInstanceFromProjectInZoneByName(g.projectID, zone, name)
|
|
if err != nil {
|
|
if isHTTPErrorCode(err, http.StatusNotFound) {
|
|
continue
|
|
}
|
|
klog.Errorf("getInstanceByName: failed to get instance %s in zone %s; err: %v", name, zone, err)
|
|
return nil, err
|
|
}
|
|
return instance, nil
|
|
}
|
|
|
|
return nil, cloudprovider.InstanceNotFound
|
|
}
|
|
|
|
func (g *Cloud) getInstanceFromProjectInZoneByName(project, zone, name string) (*gceInstance, error) {
|
|
ctx, cancel := cloud.ContextWithCallTimeout()
|
|
defer cancel()
|
|
|
|
name = canonicalizeInstanceName(name)
|
|
mc := newInstancesMetricContext("get", zone)
|
|
res, err := g.c.Instances().Get(ctx, meta.ZonalKey(name, zone))
|
|
mc.Observe(err)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &gceInstance{
|
|
Zone: lastComponent(res.Zone),
|
|
Name: res.Name,
|
|
ID: res.Id,
|
|
Disks: res.Disks,
|
|
Type: lastComponent(res.MachineType),
|
|
}, nil
|
|
}
|
|
|
|
func getInstanceIDViaMetadata() (string, error) {
|
|
result, err := metadata.Get("instance/hostname")
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
parts := strings.Split(result, ".")
|
|
if len(parts) == 0 {
|
|
return "", fmt.Errorf("unexpected response: %s", result)
|
|
}
|
|
return parts[0], nil
|
|
}
|
|
|
|
func getCurrentMachineTypeViaMetadata() (string, error) {
|
|
mType, err := metadata.Get("instance/machine-type")
|
|
if err != nil {
|
|
return "", fmt.Errorf("couldn't get machine type: %v", err)
|
|
}
|
|
parts := strings.Split(mType, "/")
|
|
if len(parts) != 4 {
|
|
return "", fmt.Errorf("unexpected response for machine type: %s", mType)
|
|
}
|
|
|
|
return parts[3], nil
|
|
}
|
|
|
|
// isCurrentInstance uses metadata server to check if specified
|
|
// instanceID matches current machine's instanceID
|
|
func (g *Cloud) isCurrentInstance(instanceID string) bool {
|
|
currentInstanceID, err := getInstanceIDViaMetadata()
|
|
if err != nil {
|
|
// Log and swallow error
|
|
klog.Errorf("Failed to fetch instanceID via Metadata: %v", err)
|
|
return false
|
|
}
|
|
|
|
return currentInstanceID == canonicalizeInstanceName(instanceID)
|
|
}
|
|
|
|
// ComputeHostTags grabs all tags from all instances being added to the pool.
|
|
// * The longest tag that is a prefix of the instance name is used
|
|
// * If any instance has no matching prefix tag, return error
|
|
// Invoking this method to get host tags is risky since it depends on the
|
|
// format of the host names in the cluster. Only use it as a fallback if
|
|
// gce.nodeTags is unspecified
|
|
func (g *Cloud) computeHostTags(hosts []*gceInstance) ([]string, error) {
|
|
ctx, cancel := cloud.ContextWithCallTimeout()
|
|
defer cancel()
|
|
|
|
// TODO: We could store the tags in gceInstance, so we could have already fetched it
|
|
hostNamesByZone := make(map[string]map[string]bool) // map of zones -> map of names -> bool (for easy lookup)
|
|
nodeInstancePrefix := g.nodeInstancePrefix
|
|
for _, host := range hosts {
|
|
if !strings.HasPrefix(host.Name, g.nodeInstancePrefix) {
|
|
klog.Warningf("instance %v does not conform to prefix '%s', ignoring filter", host, g.nodeInstancePrefix)
|
|
nodeInstancePrefix = ""
|
|
}
|
|
|
|
z, ok := hostNamesByZone[host.Zone]
|
|
if !ok {
|
|
z = make(map[string]bool)
|
|
hostNamesByZone[host.Zone] = z
|
|
}
|
|
z[host.Name] = true
|
|
}
|
|
|
|
tags := sets.NewString()
|
|
|
|
filt := filter.None
|
|
if nodeInstancePrefix != "" {
|
|
filt = filter.Regexp("name", nodeInstancePrefix+".*")
|
|
}
|
|
for zone, hostNames := range hostNamesByZone {
|
|
instances, err := g.c.Instances().List(ctx, zone, filt)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
for _, instance := range instances {
|
|
if !hostNames[instance.Name] {
|
|
continue
|
|
}
|
|
longestTag := ""
|
|
for _, tag := range instance.Tags.Items {
|
|
if strings.HasPrefix(instance.Name, tag) && len(tag) > len(longestTag) {
|
|
longestTag = tag
|
|
}
|
|
}
|
|
if len(longestTag) > 0 {
|
|
tags.Insert(longestTag)
|
|
} else {
|
|
return nil, fmt.Errorf("could not find any tag that is a prefix of instance name for instance %s", instance.Name)
|
|
}
|
|
}
|
|
}
|
|
if len(tags) == 0 {
|
|
return nil, fmt.Errorf("no instances found")
|
|
}
|
|
return tags.List(), nil
|
|
}
|
|
|
|
// GetNodeTags will first try returning the list of tags specified in GCE cloud Configuration.
|
|
// If they weren't provided, it'll compute the host tags with the given hostnames. If the list
|
|
// of hostnames has not changed, a cached set of nodetags are returned.
|
|
func (g *Cloud) GetNodeTags(nodeNames []string) ([]string, error) {
|
|
// If nodeTags were specified through configuration, use them
|
|
if len(g.nodeTags) > 0 {
|
|
return g.nodeTags, nil
|
|
}
|
|
|
|
g.computeNodeTagLock.Lock()
|
|
defer g.computeNodeTagLock.Unlock()
|
|
|
|
// Early return if hosts have not changed
|
|
hosts := sets.NewString(nodeNames...)
|
|
if hosts.Equal(g.lastKnownNodeNames) {
|
|
return g.lastComputedNodeTags, nil
|
|
}
|
|
|
|
// Get GCE instance data by hostname
|
|
instances, err := g.getInstancesByNames(nodeNames)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Determine list of host tags
|
|
tags, err := g.computeHostTags(instances)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Save the list of tags
|
|
g.lastKnownNodeNames = hosts
|
|
g.lastComputedNodeTags = tags
|
|
return tags, nil
|
|
}
|