Merge pull request #65373 from multi-io/openstack_lbaas_node_secgroup_fix

Automatic merge from submit-queue (batch tested with PRs 65449, 65373, 49410). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

OpenStack LBaaS fix: must use ID, not name, of the node security group

This is a bugfix for the OpenStack LBaaS cloud provider security group management.

A bit of context: When creating a load balancer for a given `type: LoadBalancer` service, the provider will try to:

(see `pkg/cloudprovider/providers/openstack/openstack_loadbalancer.go`/`EnsureLoadBalancer`)

1. create a load balancer (LB) in Openstack with listeners corresponding to the service's ports
2. attach a floating IP to the LB's network port

If `manage-security-groups` is enabled in controller-manager's cloud.conf:

3. create a security group with ingress rules corresponding to the LB's listeners, and attach it to the LB's network port
4. for all nodes of the cluster, pick an existing security group for the nodes ("node security group") and add ingress rules to it exposing the service's NodePorts to the security group created in step 3.

In the current upstream master, steps 1 through 3 work fine, step 4 fails, leading to a service that's not accessible via the LB without further manual intervention.

The bug is in the "pick an existing security group" operation (func `getNodeSecurityGroupIDForLB`), which, contrary to its name, will return the security group's name rather than its ID (actually it returns a list of names rather than IDs, apparently to cover some corner cases where you might have more than one node security group, but anyway). This will then be used when trying to add the ingress rules to the group, which the Openstack API will reject with a 404 (at least on our (fairly standard) Openstack Ocata installation) because we're giving it a name where it expects an ID.

The PR adds a "get ID given a name" lookup to the `getNodeSecurityGroupIDForLB` function, so it actually returns IDs. That's it. I'm not sure if the upstream code wasn't really tested, or maybe other people use other Openstacks with more lenient APIs. The bug and the fix is always reproducible on our installation.

**Which issue(s) this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close the issue(s) when PR gets merged)*:

Fixes #58145


**Special notes for your reviewer:**

Should we turn `getNodeSecurityGroupIDForLB` into a method with the lbaas as its receiver because it now requires two of the lbaas's attributes? I'm not sure what the conventions are here, if any. 

**Release note**:
```release-note
NONE
```
pull/8/head
Kubernetes Submit Queue 2018-06-26 02:52:06 -07:00 committed by GitHub
commit 3d694993d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 16 additions and 8 deletions

View File

@ -554,14 +554,14 @@ func getSubnetIDForLB(compute *gophercloud.ServiceClient, node v1.Node) (string,
}
// getNodeSecurityGroupIDForLB lists node-security-groups for specific nodes
func getNodeSecurityGroupIDForLB(compute *gophercloud.ServiceClient, nodes []*v1.Node) ([]string, error) {
nodeSecurityGroupIDs := sets.NewString()
func getNodeSecurityGroupIDForLB(compute *gophercloud.ServiceClient, network *gophercloud.ServiceClient, nodes []*v1.Node) ([]string, error) {
secGroupNames := sets.NewString()
for _, node := range nodes {
nodeName := types.NodeName(node.Name)
srv, err := getServerByName(compute, nodeName)
if err != nil {
return nodeSecurityGroupIDs.List(), err
return []string{}, err
}
// use the first node-security-groups
@ -569,11 +569,19 @@ func getNodeSecurityGroupIDForLB(compute *gophercloud.ServiceClient, nodes []*v1
// case 1: node1:SG1 node2:SG2 return SG1,SG2
// case 2: node1:SG1,SG2 node2:SG3,SG4 return SG1,SG3
// case 3: node1:SG1,SG2 node2:SG2,SG3 return SG1,SG2
securityGroupName := srv.SecurityGroups[0]["name"]
nodeSecurityGroupIDs.Insert(securityGroupName.(string))
secGroupNames.Insert(srv.SecurityGroups[0]["name"].(string))
}
return nodeSecurityGroupIDs.List(), nil
secGroupIDs := make([]string, secGroupNames.Len())
for i, name := range secGroupNames.List() {
secGroupID, err := groups.IDFromName(network, name)
if err != nil {
return []string{}, err
}
secGroupIDs[i] = secGroupID
}
return secGroupIDs, nil
}
// isSecurityGroupNotFound return true while 'err' is object of gophercloud.ErrResourceNotFound
@ -997,7 +1005,7 @@ func (lbaas *LbaasV2) ensureSecurityGroup(clusterName string, apiService *v1.Ser
// find node-security-group for service
var err error
if len(lbaas.opts.NodeSecurityGroupIDs) == 0 {
lbaas.opts.NodeSecurityGroupIDs, err = getNodeSecurityGroupIDForLB(lbaas.compute, nodes)
lbaas.opts.NodeSecurityGroupIDs, err = getNodeSecurityGroupIDForLB(lbaas.compute, lbaas.network, nodes)
if err != nil {
return fmt.Errorf("failed to find node-security-group for loadbalancer service %s/%s: %v", apiService.Namespace, apiService.Name, err)
}
@ -1311,7 +1319,7 @@ func (lbaas *LbaasV2) updateSecurityGroup(clusterName string, apiService *v1.Ser
originalNodeSecurityGroupIDs := lbaas.opts.NodeSecurityGroupIDs
var err error
lbaas.opts.NodeSecurityGroupIDs, err = getNodeSecurityGroupIDForLB(lbaas.compute, nodes)
lbaas.opts.NodeSecurityGroupIDs, err = getNodeSecurityGroupIDForLB(lbaas.compute, lbaas.network, nodes)
if err != nil {
return fmt.Errorf("failed to find node-security-group for loadbalancer service %s/%s: %v", apiService.Namespace, apiService.Name, err)
}