From a9b67a4b52cab6514755a573c2599c06f7a1844a Mon Sep 17 00:00:00 2001 From: m1093782566 Date: Wed, 29 Nov 2017 17:11:35 +0800 Subject: [PATCH] fix ipvs proxier nodeport --- pkg/proxy/ipvs/netlink.go | 11 +++- pkg/proxy/ipvs/netlink_linux.go | 66 +++++++++++++++++++++++- pkg/proxy/ipvs/netlink_unsupported.go | 7 +++ pkg/proxy/ipvs/proxier.go | 74 +++++++++++++++++---------- pkg/proxy/ipvs/testing/fake.go | 55 ++++++++++++++++++-- 5 files changed, 178 insertions(+), 35 deletions(-) diff --git a/pkg/proxy/ipvs/netlink.go b/pkg/proxy/ipvs/netlink.go index 4f66f706ee..45551da36a 100644 --- a/pkg/proxy/ipvs/netlink.go +++ b/pkg/proxy/ipvs/netlink.go @@ -16,14 +16,21 @@ limitations under the License. package ipvs +import ( + "k8s.io/apimachinery/pkg/util/sets" +) + // NetLinkHandle for revoke netlink interface type NetLinkHandle interface { - // EnsureAddressBind checks if address is bound to the interface and, if not, binds it. If the address is already bound, return true. + // EnsureAddressBind checks if address is bound to the interface and, if not, binds it. If the address is already bound, return true. EnsureAddressBind(address, devName string) (exist bool, err error) // UnbindAddress unbind address from the interface UnbindAddress(address, devName string) error - // EnsureDummyDevice checks if dummy device is exist and, if not, create one. If the dummy device is already exist, return true. + // EnsureDummyDevice checks if dummy device is exist and, if not, create one. If the dummy device is already exist, return true. EnsureDummyDevice(devName string) (exist bool, err error) // DeleteDummyDevice deletes the given dummy device by name. DeleteDummyDevice(devName string) error + // GetLocalAddresses returns all unique local type IP addresses based on filter device interface. If filter device is not given, + // it will list all unique local type addresses. + GetLocalAddresses(filterDev string) (sets.String, error) } diff --git a/pkg/proxy/ipvs/netlink_linux.go b/pkg/proxy/ipvs/netlink_linux.go index e709afafab..2553e4c624 100644 --- a/pkg/proxy/ipvs/netlink_linux.go +++ b/pkg/proxy/ipvs/netlink_linux.go @@ -22,6 +22,14 @@ import ( "fmt" "net" "syscall" + // TODO: replace syscall with golang.org/x/sys/unix? + // The Go doc for syscall says: + // NOTE: This package is locked down. + // Code outside the standard Go repository should be migrated to use the corresponding package in the golang.org/x/sys repository. + // That is also where updates required by new systems or versions should be applied. + // See https://golang.org/s/go1.4-syscall for more information. + + "k8s.io/apimachinery/pkg/util/sets" "github.com/vishvananda/netlink" ) @@ -30,7 +38,7 @@ type netlinkHandle struct { netlink.Handle } -// NewNetLinkHandle will crate a new netlinkHandle +// NewNetLinkHandle will crate a new NetLinkHandle func NewNetLinkHandle() NetLinkHandle { return &netlinkHandle{netlink.Handle{}} } @@ -96,3 +104,59 @@ func (h *netlinkHandle) DeleteDummyDevice(devName string) error { } return h.LinkDel(dummy) } + +// GetLocalAddresses lists all LOCAL type IP addresses from host based on filter device. +// If filter device is not specified, it's equivalent to exec: +// $ ip route show table local type local proto kernel +// 10.0.0.1 dev kube-ipvs0 scope host src 10.0.0.1 +// 10.0.0.10 dev kube-ipvs0 scope host src 10.0.0.10 +// 10.0.0.252 dev kube-ipvs0 scope host src 10.0.0.252 +// 100.106.89.164 dev eth0 scope host src 100.106.89.164 +// 127.0.0.0/8 dev lo scope host src 127.0.0.1 +// 127.0.0.1 dev lo scope host src 127.0.0.1 +// 172.17.0.1 dev docker0 scope host src 172.17.0.1 +// 192.168.122.1 dev virbr0 scope host src 192.168.122.1 +// Then cut the unique src IP fields, +// --> result set: [10.0.0.1, 10.0.0.10, 10.0.0.252, 100.106.89.164, 127.0.0.1, 192.168.122.1] + +// If filter device is specified, it's equivalent to exec: +// $ ip route show table local type local proto kernel dev kube-ipvs0 +// 10.0.0.1 scope host src 10.0.0.1 +// 10.0.0.10 scope host src 10.0.0.10 +// Then cut the unique src IP fields, +// --> result set: [10.0.0.1, 10.0.0.10] +func (h *netlinkHandle) GetLocalAddresses(filterDev string) (sets.String, error) { + linkIndex := -1 + if len(filterDev) != 0 { + link, err := h.LinkByName(filterDev) + if err != nil { + return nil, fmt.Errorf("error get filter device %s, err: %v", filterDev, err) + } + linkIndex = link.Attrs().Index + } + + routeFilter := &netlink.Route{ + Table: syscall.RT_TABLE_LOCAL, + Type: syscall.RTN_LOCAL, + Protocol: syscall.RTPROT_KERNEL, + } + filterMask := netlink.RT_FILTER_TABLE | netlink.RT_FILTER_TYPE | netlink.RT_FILTER_PROTOCOL + + // find filter device + if linkIndex != -1 { + routeFilter.LinkIndex = linkIndex + filterMask |= netlink.RT_FILTER_OIF + } + + routes, err := h.RouteListFiltered(netlink.FAMILY_ALL, routeFilter, filterMask) + if err != nil { + return nil, fmt.Errorf("error list route table, err: %v", err) + } + res := sets.NewString() + for _, route := range routes { + if route.Src != nil { + res.Insert(route.Src.String()) + } + } + return res, nil +} diff --git a/pkg/proxy/ipvs/netlink_unsupported.go b/pkg/proxy/ipvs/netlink_unsupported.go index 1e22685b27..b70550387a 100644 --- a/pkg/proxy/ipvs/netlink_unsupported.go +++ b/pkg/proxy/ipvs/netlink_unsupported.go @@ -20,6 +20,8 @@ package ipvs import ( "fmt" + + "k8s.io/apimachinery/pkg/util/sets" ) type emptyHandle struct { @@ -49,3 +51,8 @@ func (h *emptyHandle) EnsureDummyDevice(devName string) (bool, error) { func (h *emptyHandle) DeleteDummyDevice(devName string) error { return fmt.Errorf("netlink is not supported in this platform") } + +// GetLocalAddresses is part of interface. +func (h *emptyHandle) GetLocalAddresses(filterDev string) (sets.String, error) { + return nil, fmt.Errorf("netlink is not supported in this platform") +} diff --git a/pkg/proxy/ipvs/proxier.go b/pkg/proxy/ipvs/proxier.go index 7e15fd8ac9..9f0def1e1d 100644 --- a/pkg/proxy/ipvs/proxier.go +++ b/pkg/proxy/ipvs/proxier.go @@ -35,7 +35,6 @@ import ( clientv1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" - utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" utilfeature "k8s.io/apiserver/pkg/util/feature" @@ -171,36 +170,57 @@ type IPGetter interface { NodeIPs() ([]net.IP, error) } -type realIPGetter struct{} +// realIPGetter is a real NodeIP handler, it implements IPGetter. +type realIPGetter struct { + // nl is a handle for revoking netlink interface + nl NetLinkHandle +} +// NodeIPs returns all LOCAL type IP addresses from host which are taken as the Node IPs of NodePort service. +// Firstly, it will list source IP exists in local route table with `kernel` protocol type. For example, +// $ ip route show table local type local proto kernel +// 10.0.0.1 dev kube-ipvs0 scope host src 10.0.0.1 +// 10.0.0.10 dev kube-ipvs0 scope host src 10.0.0.10 +// 10.0.0.252 dev kube-ipvs0 scope host src 10.0.0.252 +// 100.106.89.164 dev eth0 scope host src 100.106.89.164 +// 127.0.0.0/8 dev lo scope host src 127.0.0.1 +// 127.0.0.1 dev lo scope host src 127.0.0.1 +// 172.17.0.1 dev docker0 scope host src 172.17.0.1 +// 192.168.122.1 dev virbr0 scope host src 192.168.122.1 +// Then cut the unique src IP fields, +// --> result set1: [10.0.0.1, 10.0.0.10, 10.0.0.252, 100.106.89.164, 127.0.0.1, 192.168.122.1] + +// NOTE: For cases where an LB acts as a VIP (e.g. Google cloud), the VIP IP is considered LOCAL, but the protocol +// of the entry is 66, e.g. `10.128.0.6 dev ens4 proto 66 scope host`. Therefore, the rule mentioned above will +// filter these entries out. + +// Secondly, as we bind Cluster IPs to the dummy interface in IPVS proxier, we need to filter the them out so that +// we can eventually get the Node IPs. Fortunately, the dummy interface created by IPVS proxier is known as `kube-ipvs0`, +// so we just need to specify the `dev kube-ipvs0` argument in ip route command, for example, +// $ ip route show table local type local proto kernel dev kube-ipvs0 +// 10.0.0.1 scope host src 10.0.0.1 +// 10.0.0.10 scope host src 10.0.0.10 +// Then cut the unique src IP fields, +// --> result set2: [10.0.0.1, 10.0.0.10] + +// Finally, Node IP set = set1 - set2 func (r *realIPGetter) NodeIPs() (ips []net.IP, err error) { - interfaces, err := net.Interfaces() + // Pass in empty filter device name for list all LOCAL type addresses. + allAddress, err := r.nl.GetLocalAddresses("") if err != nil { - return nil, err + return nil, fmt.Errorf("error listing LOCAL type addresses from host, error: %v", err) } - for i := range interfaces { - name := interfaces[i].Name - // We assume node ip bind to eth{x} - if !strings.HasPrefix(name, "eth") { - continue - } - intf, err := net.InterfaceByName(name) - if err != nil { - utilruntime.HandleError(fmt.Errorf("Failed to get interface by name: %s, error: %v", name, err)) - continue - } - addrs, err := intf.Addrs() - if err != nil { - utilruntime.HandleError(fmt.Errorf("Failed to get addresses from interface: %s, error: %v", name, err)) - continue - } - for _, a := range addrs { - if ipnet, ok := a.(*net.IPNet); ok { - ips = append(ips, ipnet.IP) - } - } + dummyAddress, err := r.nl.GetLocalAddresses(DefaultDummyDevice) + if err != nil { + return nil, fmt.Errorf("error listing LOCAL type addresses from device: %s, error: %v", DefaultDummyDevice, err) } - return + // exclude ip address from dummy interface created by IPVS proxier - they are all Cluster IPs. + nodeAddress := allAddress.Difference(dummyAddress) + // translate ip string to IP + for _, ipStr := range nodeAddress.UnsortedList() { + ips = append(ips, net.ParseIP(ipStr)) + } + return ips, nil } // Proxier implements ProxyProvider @@ -294,7 +314,7 @@ func NewProxier(ipt utiliptables.Interface, healthzServer: healthzServer, ipvs: ipvs, ipvsScheduler: scheduler, - ipGetter: &realIPGetter{}, + ipGetter: &realIPGetter{nl: NewNetLinkHandle()}, iptablesData: bytes.NewBuffer(nil), natChains: bytes.NewBuffer(nil), natRules: bytes.NewBuffer(nil), diff --git a/pkg/proxy/ipvs/testing/fake.go b/pkg/proxy/ipvs/testing/fake.go index 6b3f0ddc1b..561e8b3c6b 100644 --- a/pkg/proxy/ipvs/testing/fake.go +++ b/pkg/proxy/ipvs/testing/fake.go @@ -16,21 +16,33 @@ limitations under the License. package testing -//FakeNetlinkHandle mock implementation of proxy NetlinkHandle +import ( + "fmt" + + "k8s.io/apimachinery/pkg/util/sets" +) + +// FakeNetlinkHandle mock implementation of proxy NetlinkHandle type FakeNetlinkHandle struct { + // localAddresses is a network interface name to all of its IP addresses map, e.g. + // eth0 -> [1.2.3.4, 10.20.30.40] + localAddresses map[string][]string } -//NewFakeNetlinkHandle will create a new FakeNetlinkHandle +// NewFakeNetlinkHandle will create a new FakeNetlinkHandle func NewFakeNetlinkHandle() *FakeNetlinkHandle { - return &FakeNetlinkHandle{} + fake := &FakeNetlinkHandle{ + localAddresses: make(map[string][]string), + } + return fake } -//EnsureAddressBind is a mock implementation +// EnsureAddressBind is a mock implementation func (h *FakeNetlinkHandle) EnsureAddressBind(address, devName string) (exist bool, err error) { return false, nil } -//UnbindAddress is a mock implementation +// UnbindAddress is a mock implementation func (h *FakeNetlinkHandle) UnbindAddress(address, devName string) error { return nil } @@ -44,3 +56,36 @@ func (h *FakeNetlinkHandle) EnsureDummyDevice(devName string) (bool, error) { func (h *FakeNetlinkHandle) DeleteDummyDevice(devName string) error { return nil } + +// GetLocalAddresses is a mock implementation +func (h *FakeNetlinkHandle) GetLocalAddresses(filterDev string) (sets.String, error) { + res := sets.NewString() + if len(filterDev) != 0 { + // list all addresses from a given network interface. + for _, addr := range h.localAddresses[filterDev] { + res.Insert(addr) + } + return res, nil + } + // If filterDev is not given, will list all addresses from all available network interface. + for linkName := range h.localAddresses { + // list all addresses from a given network interface. + for _, addr := range h.localAddresses[linkName] { + res.Insert(addr) + } + } + return res, nil +} + +// SetLocalAddresses set IP addresses to the given interface device. It's not part of interface. +func (h *FakeNetlinkHandle) SetLocalAddresses(dev string, ips ...string) error { + if h.localAddresses == nil { + h.localAddresses = make(map[string][]string) + } + if len(dev) == 0 { + return fmt.Errorf("device name can't be empty") + } + h.localAddresses[dev] = make([]string, 0) + h.localAddresses[dev] = append(h.localAddresses[dev], ips...) + return nil +}