Merge pull request #56685 from m1093782566/fix-nodeport

Automatic merge from submit-queue (batch tested with PRs 54379, 56593, 56685, 54174, 57309). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Fix ipvs proxier nodeport

**What this PR does / why we need it**:

Fix ipvs proxier nodeport.

**Which issue(s) this PR fixes**:
Fixes #55923 

**Special notes for your reviewer**:

We bump the netwlink version in the 1st commit because:

* We call `netlink.RouteListFiltered()` to filter LOCAL type addresses from kernel route table. `netlink.RouteListFiltered()` exists in newer-version netlink package


* newer-version netlink package migrate `syscall` to `golang.org/x/sys/unix`, k8s cross-build can benefit from it as well. The Go doc for syscall says:

> NOTE: This package is locked down. Code outside the standard Go repository should be migrated to use the corresponding package in the golang.org/x/sys repository. That is also where updates required by new systems or versions should be applied. See https://golang.org/s/go1.4-syscall for more information.



**Release note**:

```release-note
Fix ipvs proxier nodeport eth* assumption
```
pull/6/head
Kubernetes Submit Queue 2017-12-18 14:45:42 -08:00 committed by GitHub
commit 6719e7ad9c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
51 changed files with 2269 additions and 726 deletions

6
Godeps/Godeps.json generated
View File

@ -2530,15 +2530,15 @@
},
{
"ImportPath": "github.com/vishvananda/netlink",
"Rev": "f5a6f697a596c788d474984a38a0ac4ba0719e93"
"Rev": "f67b75edbf5e3bb7dfe70bb788610693a71be3d1"
},
{
"ImportPath": "github.com/vishvananda/netlink/nl",
"Rev": "f5a6f697a596c788d474984a38a0ac4ba0719e93"
"Rev": "f67b75edbf5e3bb7dfe70bb788610693a71be3d1"
},
{
"ImportPath": "github.com/vishvananda/netns",
"Rev": "86bef332bfc3b59b7624a600bd53009ce91a9829"
"Rev": "be1fbeda19366dea804f00efff2dd73a1642fdcc"
},
{
"ImportPath": "github.com/vmware/govmomi",

View File

@ -67,7 +67,6 @@ go_library(
"//vendor/github.com/golang/glog:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/types:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/runtime:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",

View File

@ -16,14 +16,21 @@ limitations under the License.
package ipvs
import (
"k8s.io/apimachinery/pkg/util/sets"
)
// NetLinkHandle for revoke netlink interface
type NetLinkHandle interface {
// EnsureAddressBind checks if address is bound to the interface and, if not, binds it. If the address is already bound, return true.
// EnsureAddressBind checks if address is bound to the interface and, if not, binds it. If the address is already bound, return true.
EnsureAddressBind(address, devName string) (exist bool, err error)
// UnbindAddress unbind address from the interface
UnbindAddress(address, devName string) error
// EnsureDummyDevice checks if dummy device is exist and, if not, create one. If the dummy device is already exist, return true.
// EnsureDummyDevice checks if dummy device is exist and, if not, create one. If the dummy device is already exist, return true.
EnsureDummyDevice(devName string) (exist bool, err error)
// DeleteDummyDevice deletes the given dummy device by name.
DeleteDummyDevice(devName string) error
// GetLocalAddresses returns all unique local type IP addresses based on filter device interface. If filter device is not given,
// it will list all unique local type addresses.
GetLocalAddresses(filterDev string) (sets.String, error)
}

View File

@ -22,6 +22,14 @@ import (
"fmt"
"net"
"syscall"
// TODO: replace syscall with golang.org/x/sys/unix?
// The Go doc for syscall says:
// NOTE: This package is locked down.
// Code outside the standard Go repository should be migrated to use the corresponding package in the golang.org/x/sys repository.
// That is also where updates required by new systems or versions should be applied.
// See https://golang.org/s/go1.4-syscall for more information.
"k8s.io/apimachinery/pkg/util/sets"
"github.com/vishvananda/netlink"
)
@ -30,7 +38,7 @@ type netlinkHandle struct {
netlink.Handle
}
// NewNetLinkHandle will crate a new netlinkHandle
// NewNetLinkHandle will crate a new NetLinkHandle
func NewNetLinkHandle() NetLinkHandle {
return &netlinkHandle{netlink.Handle{}}
}
@ -96,3 +104,59 @@ func (h *netlinkHandle) DeleteDummyDevice(devName string) error {
}
return h.LinkDel(dummy)
}
// GetLocalAddresses lists all LOCAL type IP addresses from host based on filter device.
// If filter device is not specified, it's equivalent to exec:
// $ ip route show table local type local proto kernel
// 10.0.0.1 dev kube-ipvs0 scope host src 10.0.0.1
// 10.0.0.10 dev kube-ipvs0 scope host src 10.0.0.10
// 10.0.0.252 dev kube-ipvs0 scope host src 10.0.0.252
// 100.106.89.164 dev eth0 scope host src 100.106.89.164
// 127.0.0.0/8 dev lo scope host src 127.0.0.1
// 127.0.0.1 dev lo scope host src 127.0.0.1
// 172.17.0.1 dev docker0 scope host src 172.17.0.1
// 192.168.122.1 dev virbr0 scope host src 192.168.122.1
// Then cut the unique src IP fields,
// --> result set: [10.0.0.1, 10.0.0.10, 10.0.0.252, 100.106.89.164, 127.0.0.1, 192.168.122.1]
// If filter device is specified, it's equivalent to exec:
// $ ip route show table local type local proto kernel dev kube-ipvs0
// 10.0.0.1 scope host src 10.0.0.1
// 10.0.0.10 scope host src 10.0.0.10
// Then cut the unique src IP fields,
// --> result set: [10.0.0.1, 10.0.0.10]
func (h *netlinkHandle) GetLocalAddresses(filterDev string) (sets.String, error) {
linkIndex := -1
if len(filterDev) != 0 {
link, err := h.LinkByName(filterDev)
if err != nil {
return nil, fmt.Errorf("error get filter device %s, err: %v", filterDev, err)
}
linkIndex = link.Attrs().Index
}
routeFilter := &netlink.Route{
Table: syscall.RT_TABLE_LOCAL,
Type: syscall.RTN_LOCAL,
Protocol: syscall.RTPROT_KERNEL,
}
filterMask := netlink.RT_FILTER_TABLE | netlink.RT_FILTER_TYPE | netlink.RT_FILTER_PROTOCOL
// find filter device
if linkIndex != -1 {
routeFilter.LinkIndex = linkIndex
filterMask |= netlink.RT_FILTER_OIF
}
routes, err := h.RouteListFiltered(netlink.FAMILY_ALL, routeFilter, filterMask)
if err != nil {
return nil, fmt.Errorf("error list route table, err: %v", err)
}
res := sets.NewString()
for _, route := range routes {
if route.Src != nil {
res.Insert(route.Src.String())
}
}
return res, nil
}

View File

@ -20,6 +20,8 @@ package ipvs
import (
"fmt"
"k8s.io/apimachinery/pkg/util/sets"
)
type emptyHandle struct {
@ -49,3 +51,8 @@ func (h *emptyHandle) EnsureDummyDevice(devName string) (bool, error) {
func (h *emptyHandle) DeleteDummyDevice(devName string) error {
return fmt.Errorf("netlink is not supported in this platform")
}
// GetLocalAddresses is part of interface.
func (h *emptyHandle) GetLocalAddresses(filterDev string) (sets.String, error) {
return nil, fmt.Errorf("netlink is not supported in this platform")
}

View File

@ -35,7 +35,6 @@ import (
clientv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
@ -171,36 +170,57 @@ type IPGetter interface {
NodeIPs() ([]net.IP, error)
}
type realIPGetter struct{}
// realIPGetter is a real NodeIP handler, it implements IPGetter.
type realIPGetter struct {
// nl is a handle for revoking netlink interface
nl NetLinkHandle
}
// NodeIPs returns all LOCAL type IP addresses from host which are taken as the Node IPs of NodePort service.
// Firstly, it will list source IP exists in local route table with `kernel` protocol type. For example,
// $ ip route show table local type local proto kernel
// 10.0.0.1 dev kube-ipvs0 scope host src 10.0.0.1
// 10.0.0.10 dev kube-ipvs0 scope host src 10.0.0.10
// 10.0.0.252 dev kube-ipvs0 scope host src 10.0.0.252
// 100.106.89.164 dev eth0 scope host src 100.106.89.164
// 127.0.0.0/8 dev lo scope host src 127.0.0.1
// 127.0.0.1 dev lo scope host src 127.0.0.1
// 172.17.0.1 dev docker0 scope host src 172.17.0.1
// 192.168.122.1 dev virbr0 scope host src 192.168.122.1
// Then cut the unique src IP fields,
// --> result set1: [10.0.0.1, 10.0.0.10, 10.0.0.252, 100.106.89.164, 127.0.0.1, 192.168.122.1]
// NOTE: For cases where an LB acts as a VIP (e.g. Google cloud), the VIP IP is considered LOCAL, but the protocol
// of the entry is 66, e.g. `10.128.0.6 dev ens4 proto 66 scope host`. Therefore, the rule mentioned above will
// filter these entries out.
// Secondly, as we bind Cluster IPs to the dummy interface in IPVS proxier, we need to filter the them out so that
// we can eventually get the Node IPs. Fortunately, the dummy interface created by IPVS proxier is known as `kube-ipvs0`,
// so we just need to specify the `dev kube-ipvs0` argument in ip route command, for example,
// $ ip route show table local type local proto kernel dev kube-ipvs0
// 10.0.0.1 scope host src 10.0.0.1
// 10.0.0.10 scope host src 10.0.0.10
// Then cut the unique src IP fields,
// --> result set2: [10.0.0.1, 10.0.0.10]
// Finally, Node IP set = set1 - set2
func (r *realIPGetter) NodeIPs() (ips []net.IP, err error) {
interfaces, err := net.Interfaces()
// Pass in empty filter device name for list all LOCAL type addresses.
allAddress, err := r.nl.GetLocalAddresses("")
if err != nil {
return nil, err
return nil, fmt.Errorf("error listing LOCAL type addresses from host, error: %v", err)
}
for i := range interfaces {
name := interfaces[i].Name
// We assume node ip bind to eth{x}
if !strings.HasPrefix(name, "eth") {
continue
}
intf, err := net.InterfaceByName(name)
if err != nil {
utilruntime.HandleError(fmt.Errorf("Failed to get interface by name: %s, error: %v", name, err))
continue
}
addrs, err := intf.Addrs()
if err != nil {
utilruntime.HandleError(fmt.Errorf("Failed to get addresses from interface: %s, error: %v", name, err))
continue
}
for _, a := range addrs {
if ipnet, ok := a.(*net.IPNet); ok {
ips = append(ips, ipnet.IP)
}
}
dummyAddress, err := r.nl.GetLocalAddresses(DefaultDummyDevice)
if err != nil {
return nil, fmt.Errorf("error listing LOCAL type addresses from device: %s, error: %v", DefaultDummyDevice, err)
}
return
// exclude ip address from dummy interface created by IPVS proxier - they are all Cluster IPs.
nodeAddress := allAddress.Difference(dummyAddress)
// translate ip string to IP
for _, ipStr := range nodeAddress.UnsortedList() {
ips = append(ips, net.ParseIP(ipStr))
}
return ips, nil
}
// Proxier implements ProxyProvider
@ -294,7 +314,7 @@ func NewProxier(ipt utiliptables.Interface,
healthzServer: healthzServer,
ipvs: ipvs,
ipvsScheduler: scheduler,
ipGetter: &realIPGetter{},
ipGetter: &realIPGetter{nl: NewNetLinkHandle()},
iptablesData: bytes.NewBuffer(nil),
natChains: bytes.NewBuffer(nil),
natRules: bytes.NewBuffer(nil),

View File

@ -264,6 +264,83 @@ func TestCanUseIPVSProxier(t *testing.T) {
}
}
func TestGetNodeIPs(t *testing.T) {
testCases := []struct {
devAddresses map[string][]string
expectIPs []string
}{
// case 0
{
devAddresses: map[string][]string{"eth0": {"1.2.3.4"}, "lo": {"127.0.0.1"}},
expectIPs: []string{"1.2.3.4", "127.0.0.1"},
},
// case 1
{
devAddresses: map[string][]string{"lo": {"127.0.0.1"}},
expectIPs: []string{"127.0.0.1"},
},
// case 2
{
devAddresses: map[string][]string{},
expectIPs: []string{},
},
// case 3
{
devAddresses: map[string][]string{"encap0": {"10.20.30.40"}, "lo": {"127.0.0.1"}, "docker0": {"172.17.0.1"}},
expectIPs: []string{"10.20.30.40", "127.0.0.1", "172.17.0.1"},
},
// case 4
{
devAddresses: map[string][]string{"encaps9": {"10.20.30.40"}, "lo": {"127.0.0.1"}, "encap7": {"10.20.30.31"}},
expectIPs: []string{"10.20.30.40", "127.0.0.1", "10.20.30.31"},
},
// case 5
{
devAddresses: map[string][]string{"kube-ipvs0": {"1.2.3.4"}, "lo": {"127.0.0.1"}, "encap7": {"10.20.30.31"}},
expectIPs: []string{"127.0.0.1", "10.20.30.31"},
},
// case 6
{
devAddresses: map[string][]string{"kube-ipvs0": {"1.2.3.4", "2.3.4.5"}, "lo": {"127.0.0.1"}},
expectIPs: []string{"127.0.0.1"},
},
// case 7
{
devAddresses: map[string][]string{"kube-ipvs0": {"1.2.3.4", "2.3.4.5"}},
expectIPs: []string{},
},
// case 8
{
devAddresses: map[string][]string{"kube-ipvs0": {"1.2.3.4", "2.3.4.5"}, "eth5": {"3.4.5.6"}, "lo": {"127.0.0.1"}},
expectIPs: []string{"127.0.0.1", "3.4.5.6"},
},
// case 9
{
devAddresses: map[string][]string{"ipvs0": {"1.2.3.4"}, "lo": {"127.0.0.1"}, "encap7": {"10.20.30.31"}},
expectIPs: []string{"127.0.0.1", "10.20.30.31", "1.2.3.4"},
},
}
for i := range testCases {
fake := netlinktest.NewFakeNetlinkHandle()
for dev, addresses := range testCases[i].devAddresses {
fake.SetLocalAddresses(dev, addresses...)
}
r := realIPGetter{nl: fake}
ips, err := r.NodeIPs()
if err != nil {
t.Errorf("Unexpected error: %v", err)
}
ipStrs := sets.NewString()
for _, ip := range ips {
ipStrs.Insert(ip.String())
}
if !ipStrs.Equal(sets.NewString(testCases[i].expectIPs...)) {
t.Errorf("case[%d], unexpected mismatch, expected: %v, got: %v", i, testCases[i].expectIPs, ips)
}
}
}
func TestNodePort(t *testing.T) {
ipt := iptablestest.NewFake()
ipvs := ipvstest.NewFake()

View File

@ -5,6 +5,7 @@ licenses(["notice"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_library(
@ -12,6 +13,7 @@ go_library(
srcs = ["fake.go"],
importpath = "k8s.io/kubernetes/pkg/proxy/ipvs/testing",
tags = ["automanaged"],
deps = ["//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library"],
)
filegroup(
@ -26,3 +28,11 @@ filegroup(
srcs = [":package-srcs"],
tags = ["automanaged"],
)
go_test(
name = "go_default_test",
srcs = ["fake_test.go"],
importpath = "k8s.io/kubernetes/pkg/proxy/ipvs/testing",
library = ":go_default_library",
deps = ["//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library"],
)

View File

@ -16,21 +16,33 @@ limitations under the License.
package testing
//FakeNetlinkHandle mock implementation of proxy NetlinkHandle
import (
"fmt"
"k8s.io/apimachinery/pkg/util/sets"
)
// FakeNetlinkHandle mock implementation of proxy NetlinkHandle
type FakeNetlinkHandle struct {
// localAddresses is a network interface name to all of its IP addresses map, e.g.
// eth0 -> [1.2.3.4, 10.20.30.40]
localAddresses map[string][]string
}
//NewFakeNetlinkHandle will create a new FakeNetlinkHandle
// NewFakeNetlinkHandle will create a new FakeNetlinkHandle
func NewFakeNetlinkHandle() *FakeNetlinkHandle {
return &FakeNetlinkHandle{}
fake := &FakeNetlinkHandle{
localAddresses: make(map[string][]string),
}
return fake
}
//EnsureAddressBind is a mock implementation
// EnsureAddressBind is a mock implementation
func (h *FakeNetlinkHandle) EnsureAddressBind(address, devName string) (exist bool, err error) {
return false, nil
}
//UnbindAddress is a mock implementation
// UnbindAddress is a mock implementation
func (h *FakeNetlinkHandle) UnbindAddress(address, devName string) error {
return nil
}
@ -44,3 +56,36 @@ func (h *FakeNetlinkHandle) EnsureDummyDevice(devName string) (bool, error) {
func (h *FakeNetlinkHandle) DeleteDummyDevice(devName string) error {
return nil
}
// GetLocalAddresses is a mock implementation
func (h *FakeNetlinkHandle) GetLocalAddresses(filterDev string) (sets.String, error) {
res := sets.NewString()
if len(filterDev) != 0 {
// list all addresses from a given network interface.
for _, addr := range h.localAddresses[filterDev] {
res.Insert(addr)
}
return res, nil
}
// If filterDev is not given, will list all addresses from all available network interface.
for linkName := range h.localAddresses {
// list all addresses from a given network interface.
for _, addr := range h.localAddresses[linkName] {
res.Insert(addr)
}
}
return res, nil
}
// SetLocalAddresses set IP addresses to the given interface device. It's not part of interface.
func (h *FakeNetlinkHandle) SetLocalAddresses(dev string, ips ...string) error {
if h.localAddresses == nil {
h.localAddresses = make(map[string][]string)
}
if len(dev) == 0 {
return fmt.Errorf("device name can't be empty")
}
h.localAddresses[dev] = make([]string, 0)
h.localAddresses[dev] = append(h.localAddresses[dev], ips...)
return nil
}

View File

@ -0,0 +1,49 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package testing
import (
"reflect"
"testing"
"k8s.io/apimachinery/pkg/util/sets"
)
func TestSetGetLocalAddresses(t *testing.T) {
fake := NewFakeNetlinkHandle()
fake.SetLocalAddresses("eth0", "1.2.3.4")
expected := sets.NewString("1.2.3.4")
addr, _ := fake.GetLocalAddresses("eth0")
if !reflect.DeepEqual(expected, addr) {
t.Errorf("Unexpected mismatch, expected: %v, got: %v", expected, addr)
}
list, _ := fake.GetLocalAddresses("")
if !reflect.DeepEqual(expected, list) {
t.Errorf("Unexpected mismatch, expected: %v, got: %v", expected, list)
}
fake.SetLocalAddresses("lo", "127.0.0.1")
expected = sets.NewString("127.0.0.1")
addr, _ = fake.GetLocalAddresses("lo")
if !reflect.DeepEqual(expected, addr) {
t.Errorf("Unexpected mismatch, expected: %v, got: %v", expected, addr)
}
list, _ = fake.GetLocalAddresses("")
expected = sets.NewString("1.2.3.4", "127.0.0.1")
if !reflect.DeepEqual(expected, list) {
t.Errorf("Unexpected mismatch, expected: %v, got: %v", expected, list)
}
}

View File

@ -7,6 +7,8 @@ go_library(
"class.go",
"conntrack_unspecified.go",
"filter.go",
"fou.go",
"fou_unspecified.go",
"genetlink_unspecified.go",
"handle_unspecified.go",
"link.go",
@ -31,6 +33,7 @@ go_library(
"class_linux.go",
"conntrack_linux.go",
"filter_linux.go",
"fou_linux.go",
"genetlink_linux.go",
"gtp_linux.go",
"handle_linux.go",
@ -55,6 +58,7 @@ go_library(
deps = [
"//vendor/github.com/vishvananda/netlink/nl:go_default_library",
"//vendor/github.com/vishvananda/netns:go_default_library",
"//vendor/golang.org/x/sys/unix:go_default_library",
],
)

View File

@ -3,7 +3,8 @@ DIRS := \
nl
DEPS = \
github.com/vishvananda/netns
github.com/vishvananda/netns \
golang.org/x/sys/unix
uniq = $(if $1,$(firstword $1) $(call uniq,$(filter-out $(firstword $1),$1)))
testdirs = $(call uniq,$(foreach d,$(1),$(dir $(wildcard $(d)/*_test.go))))
@ -18,7 +19,7 @@ $(call goroot,$(DEPS)):
.PHONY: $(call testdirs,$(DIRS))
$(call testdirs,$(DIRS)):
sudo -E go test -test.parallel 4 -timeout 60s -v github.com/vishvananda/netlink/$@
go test -test.exec sudo -test.parallel 4 -timeout 60s -test.v github.com/vishvananda/netlink/$@
$(call fmt,$(call testdirs,$(DIRS))):
! gofmt -l $(subst fmt-,,$@)/*.go | grep -q .

View File

@ -89,3 +89,4 @@ There are also a few pieces of low level netlink functionality that still
need to be implemented. Routing rules are not in place and some of the
more advanced link types. Hopefully there is decent structure and testing
in place to make these fairly straightforward to add.

View File

@ -2,13 +2,12 @@ package netlink
import (
"fmt"
"log"
"net"
"strings"
"syscall"
"github.com/vishvananda/netlink/nl"
"github.com/vishvananda/netns"
"golang.org/x/sys/unix"
)
// IFA_FLAGS is a u32 attribute.
@ -23,7 +22,7 @@ func AddrAdd(link Link, addr *Addr) error {
// AddrAdd will add an IP address to a link device.
// Equivalent to: `ip addr add $addr dev $link`
func (h *Handle) AddrAdd(link Link, addr *Addr) error {
req := h.newNetlinkRequest(syscall.RTM_NEWADDR, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
req := h.newNetlinkRequest(unix.RTM_NEWADDR, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK)
return h.addrHandle(link, addr, req)
}
@ -36,7 +35,7 @@ func AddrReplace(link Link, addr *Addr) error {
// AddrReplace will replace (or, if not present, add) an IP address on a link device.
// Equivalent to: `ip addr replace $addr dev $link`
func (h *Handle) AddrReplace(link Link, addr *Addr) error {
req := h.newNetlinkRequest(syscall.RTM_NEWADDR, syscall.NLM_F_CREATE|syscall.NLM_F_REPLACE|syscall.NLM_F_ACK)
req := h.newNetlinkRequest(unix.RTM_NEWADDR, unix.NLM_F_CREATE|unix.NLM_F_REPLACE|unix.NLM_F_ACK)
return h.addrHandle(link, addr, req)
}
@ -49,7 +48,7 @@ func AddrDel(link Link, addr *Addr) error {
// AddrDel will delete an IP address from a link device.
// Equivalent to: `ip addr del $addr dev $link`
func (h *Handle) AddrDel(link Link, addr *Addr) error {
req := h.newNetlinkRequest(syscall.RTM_DELADDR, syscall.NLM_F_ACK)
req := h.newNetlinkRequest(unix.RTM_DELADDR, unix.NLM_F_ACK)
return h.addrHandle(link, addr, req)
}
@ -76,7 +75,7 @@ func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error
localAddrData = addr.IP.To16()
}
localData := nl.NewRtAttr(syscall.IFA_LOCAL, localAddrData)
localData := nl.NewRtAttr(unix.IFA_LOCAL, localAddrData)
req.AddData(localData)
var peerAddrData []byte
if addr.Peer != nil {
@ -89,7 +88,7 @@ func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error
peerAddrData = localAddrData
}
addressData := nl.NewRtAttr(syscall.IFA_ADDRESS, peerAddrData)
addressData := nl.NewRtAttr(unix.IFA_ADDRESS, peerAddrData)
req.AddData(addressData)
if addr.Flags != 0 {
@ -110,14 +109,14 @@ func (h *Handle) addrHandle(link Link, addr *Addr, req *nl.NetlinkRequest) error
}
addr.Broadcast = calcBroadcast
}
req.AddData(nl.NewRtAttr(syscall.IFA_BROADCAST, addr.Broadcast))
req.AddData(nl.NewRtAttr(unix.IFA_BROADCAST, addr.Broadcast))
if addr.Label != "" {
labelData := nl.NewRtAttr(syscall.IFA_LABEL, nl.ZeroTerminated(addr.Label))
labelData := nl.NewRtAttr(unix.IFA_LABEL, nl.ZeroTerminated(addr.Label))
req.AddData(labelData)
}
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
_, err := req.Execute(unix.NETLINK_ROUTE, 0)
return err
}
@ -132,11 +131,11 @@ func AddrList(link Link, family int) ([]Addr, error) {
// Equivalent to: `ip addr show`.
// The list can be filtered by link and ip family.
func (h *Handle) AddrList(link Link, family int) ([]Addr, error) {
req := h.newNetlinkRequest(syscall.RTM_GETADDR, syscall.NLM_F_DUMP)
req := h.newNetlinkRequest(unix.RTM_GETADDR, unix.NLM_F_DUMP)
msg := nl.NewIfInfomsg(family)
req.AddData(msg)
msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWADDR)
msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWADDR)
if err != nil {
return nil, err
}
@ -188,21 +187,21 @@ func parseAddr(m []byte) (addr Addr, family, index int, err error) {
var local, dst *net.IPNet
for _, attr := range attrs {
switch attr.Attr.Type {
case syscall.IFA_ADDRESS:
case unix.IFA_ADDRESS:
dst = &net.IPNet{
IP: attr.Value,
Mask: net.CIDRMask(int(msg.Prefixlen), 8*len(attr.Value)),
}
addr.Peer = dst
case syscall.IFA_LOCAL:
case unix.IFA_LOCAL:
local = &net.IPNet{
IP: attr.Value,
Mask: net.CIDRMask(int(msg.Prefixlen), 8*len(attr.Value)),
}
addr.IPNet = local
case syscall.IFA_BROADCAST:
case unix.IFA_BROADCAST:
addr.Broadcast = attr.Value
case syscall.IFA_LABEL:
case unix.IFA_LABEL:
addr.Label = string(attr.Value[:len(attr.Value)-1])
case IFA_FLAGS:
addr.Flags = int(native.Uint32(attr.Value[0:4]))
@ -237,17 +236,35 @@ type AddrUpdate struct {
// AddrSubscribe takes a chan down which notifications will be sent
// when addresses change. Close the 'done' chan to stop subscription.
func AddrSubscribe(ch chan<- AddrUpdate, done <-chan struct{}) error {
return addrSubscribe(netns.None(), netns.None(), ch, done)
return addrSubscribeAt(netns.None(), netns.None(), ch, done, nil)
}
// AddrSubscribeAt works like AddrSubscribe plus it allows the caller
// to choose the network namespace in which to subscribe (ns).
func AddrSubscribeAt(ns netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}) error {
return addrSubscribe(ns, netns.None(), ch, done)
return addrSubscribeAt(ns, netns.None(), ch, done, nil)
}
func addrSubscribe(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}) error {
s, err := nl.SubscribeAt(newNs, curNs, syscall.NETLINK_ROUTE, syscall.RTNLGRP_IPV4_IFADDR, syscall.RTNLGRP_IPV6_IFADDR)
// AddrSubscribeOptions contains a set of options to use with
// AddrSubscribeWithOptions.
type AddrSubscribeOptions struct {
Namespace *netns.NsHandle
ErrorCallback func(error)
}
// AddrSubscribeWithOptions work like AddrSubscribe but enable to
// provide additional options to modify the behavior. Currently, the
// namespace can be provided as well as an error callback.
func AddrSubscribeWithOptions(ch chan<- AddrUpdate, done <-chan struct{}, options AddrSubscribeOptions) error {
if options.Namespace == nil {
none := netns.None()
options.Namespace = &none
}
return addrSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback)
}
func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}, cberr func(error)) error {
s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_IPV4_IFADDR, unix.RTNLGRP_IPV6_IFADDR)
if err != nil {
return err
}
@ -262,25 +279,31 @@ func addrSubscribe(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-cha
for {
msgs, err := s.Receive()
if err != nil {
log.Printf("netlink.AddrSubscribe: Receive() error: %v", err)
if cberr != nil {
cberr(err)
}
return
}
for _, m := range msgs {
msgType := m.Header.Type
if msgType != syscall.RTM_NEWADDR && msgType != syscall.RTM_DELADDR {
log.Printf("netlink.AddrSubscribe: bad message type: %d", msgType)
continue
if msgType != unix.RTM_NEWADDR && msgType != unix.RTM_DELADDR {
if cberr != nil {
cberr(fmt.Errorf("bad message type: %d", msgType))
}
return
}
addr, _, ifindex, err := parseAddr(m.Data)
if err != nil {
log.Printf("netlink.AddrSubscribe: could not parse address: %v", err)
continue
if cberr != nil {
cberr(fmt.Errorf("could not parse address: %v", err))
}
return
}
ch <- AddrUpdate{LinkAddress: *addr.IPNet,
LinkIndex: ifindex,
NewAddr: msgType == syscall.RTM_NEWADDR,
NewAddr: msgType == unix.RTM_NEWADDR,
Flags: addr.Flags,
Scope: addr.Scope,
PreferedLft: addr.PreferedLft,

View File

@ -2,9 +2,9 @@ package netlink
import (
"fmt"
"syscall"
"github.com/vishvananda/netlink/nl"
"golang.org/x/sys/unix"
)
// BridgeVlanList gets a map of device id to bridge vlan infos.
@ -16,12 +16,12 @@ func BridgeVlanList() (map[int32][]*nl.BridgeVlanInfo, error) {
// BridgeVlanList gets a map of device id to bridge vlan infos.
// Equivalent to: `bridge vlan show`
func (h *Handle) BridgeVlanList() (map[int32][]*nl.BridgeVlanInfo, error) {
req := h.newNetlinkRequest(syscall.RTM_GETLINK, syscall.NLM_F_DUMP)
msg := nl.NewIfInfomsg(syscall.AF_BRIDGE)
req := h.newNetlinkRequest(unix.RTM_GETLINK, unix.NLM_F_DUMP)
msg := nl.NewIfInfomsg(unix.AF_BRIDGE)
req.AddData(msg)
req.AddData(nl.NewRtAttr(nl.IFLA_EXT_MASK, nl.Uint32Attr(uint32(nl.RTEXT_FILTER_BRVLAN))))
msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWLINK)
msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWLINK)
if err != nil {
return nil, err
}
@ -63,7 +63,7 @@ func BridgeVlanAdd(link Link, vid uint16, pvid, untagged, self, master bool) err
// BridgeVlanAdd adds a new vlan filter entry
// Equivalent to: `bridge vlan add dev DEV vid VID [ pvid ] [ untagged ] [ self ] [ master ]`
func (h *Handle) BridgeVlanAdd(link Link, vid uint16, pvid, untagged, self, master bool) error {
return h.bridgeVlanModify(syscall.RTM_SETLINK, link, vid, pvid, untagged, self, master)
return h.bridgeVlanModify(unix.RTM_SETLINK, link, vid, pvid, untagged, self, master)
}
// BridgeVlanDel adds a new vlan filter entry
@ -75,15 +75,15 @@ func BridgeVlanDel(link Link, vid uint16, pvid, untagged, self, master bool) err
// BridgeVlanDel adds a new vlan filter entry
// Equivalent to: `bridge vlan del dev DEV vid VID [ pvid ] [ untagged ] [ self ] [ master ]`
func (h *Handle) BridgeVlanDel(link Link, vid uint16, pvid, untagged, self, master bool) error {
return h.bridgeVlanModify(syscall.RTM_DELLINK, link, vid, pvid, untagged, self, master)
return h.bridgeVlanModify(unix.RTM_DELLINK, link, vid, pvid, untagged, self, master)
}
func (h *Handle) bridgeVlanModify(cmd int, link Link, vid uint16, pvid, untagged, self, master bool) error {
base := link.Attrs()
h.ensureIndex(base)
req := h.newNetlinkRequest(cmd, syscall.NLM_F_ACK)
req := h.newNetlinkRequest(cmd, unix.NLM_F_ACK)
msg := nl.NewIfInfomsg(syscall.AF_BRIDGE)
msg := nl.NewIfInfomsg(unix.AF_BRIDGE)
msg.Index = int32(base.Index)
req.AddData(msg)
@ -107,7 +107,7 @@ func (h *Handle) bridgeVlanModify(cmd int, link Link, vid uint16, pvid, untagged
}
nl.NewRtAttrChild(br, nl.IFLA_BRIDGE_VLAN_INFO, vlanInfo.Serialize())
req.AddData(br)
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
_, err := req.Execute(unix.NETLINK_ROUTE, 0)
if err != nil {
return err
}

View File

@ -5,6 +5,7 @@ import (
"syscall"
"github.com/vishvananda/netlink/nl"
"golang.org/x/sys/unix"
)
// NOTE: function is in here because it uses other linux functions
@ -50,7 +51,7 @@ func ClassDel(class Class) error {
// ClassDel will delete a class from the system.
// Equivalent to: `tc class del $class`
func (h *Handle) ClassDel(class Class) error {
return h.classModify(syscall.RTM_DELTCLASS, 0, class)
return h.classModify(unix.RTM_DELTCLASS, 0, class)
}
// ClassChange will change a class in place
@ -64,7 +65,7 @@ func ClassChange(class Class) error {
// Equivalent to: `tc class change $class`
// The parent and handle MUST NOT be changed.
func (h *Handle) ClassChange(class Class) error {
return h.classModify(syscall.RTM_NEWTCLASS, 0, class)
return h.classModify(unix.RTM_NEWTCLASS, 0, class)
}
// ClassReplace will replace a class to the system.
@ -82,7 +83,7 @@ func ClassReplace(class Class) error {
// If a class already exist with this parent/handle pair, the class is changed.
// If a class does not already exist with this parent/handle, a new class is created.
func (h *Handle) ClassReplace(class Class) error {
return h.classModify(syscall.RTM_NEWTCLASS, syscall.NLM_F_CREATE, class)
return h.classModify(unix.RTM_NEWTCLASS, unix.NLM_F_CREATE, class)
}
// ClassAdd will add a class to the system.
@ -95,14 +96,14 @@ func ClassAdd(class Class) error {
// Equivalent to: `tc class add $class`
func (h *Handle) ClassAdd(class Class) error {
return h.classModify(
syscall.RTM_NEWTCLASS,
syscall.NLM_F_CREATE|syscall.NLM_F_EXCL,
unix.RTM_NEWTCLASS,
unix.NLM_F_CREATE|unix.NLM_F_EXCL,
class,
)
}
func (h *Handle) classModify(cmd, flags int, class Class) error {
req := h.newNetlinkRequest(cmd, flags|syscall.NLM_F_ACK)
req := h.newNetlinkRequest(cmd, flags|unix.NLM_F_ACK)
base := class.Attrs()
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
@ -112,12 +113,12 @@ func (h *Handle) classModify(cmd, flags int, class Class) error {
}
req.AddData(msg)
if cmd != syscall.RTM_DELTCLASS {
if cmd != unix.RTM_DELTCLASS {
if err := classPayload(req, class); err != nil {
return err
}
}
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
_, err := req.Execute(unix.NETLINK_ROUTE, 0)
return err
}
@ -141,12 +142,12 @@ func classPayload(req *nl.NetlinkRequest, class Class) error {
var rtab [256]uint32
var ctab [256]uint32
tcrate := nl.TcRateSpec{Rate: uint32(htb.Rate)}
if CalcRtable(&tcrate, rtab, cellLog, uint32(mtu), linklayer) < 0 {
if CalcRtable(&tcrate, rtab[:], cellLog, uint32(mtu), linklayer) < 0 {
return errors.New("HTB: failed to calculate rate table")
}
opt.Rate = tcrate
tcceil := nl.TcRateSpec{Rate: uint32(htb.Ceil)}
if CalcRtable(&tcceil, ctab, ccellLog, uint32(mtu), linklayer) < 0 {
if CalcRtable(&tcceil, ctab[:], ccellLog, uint32(mtu), linklayer) < 0 {
return errors.New("HTB: failed to calculate ceil rate table")
}
opt.Ceil = tcceil
@ -169,7 +170,7 @@ func ClassList(link Link, parent uint32) ([]Class, error) {
// Equivalent to: `tc class show`.
// Generally returns nothing if link and parent are not specified.
func (h *Handle) ClassList(link Link, parent uint32) ([]Class, error) {
req := h.newNetlinkRequest(syscall.RTM_GETTCLASS, syscall.NLM_F_DUMP)
req := h.newNetlinkRequest(unix.RTM_GETTCLASS, unix.NLM_F_DUMP)
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Parent: parent,
@ -181,7 +182,7 @@ func (h *Handle) ClassList(link Link, parent uint32) ([]Class, error) {
}
req.AddData(msg)
msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWTCLASS)
msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWTCLASS)
if err != nil {
return nil, err
}

View File

@ -6,9 +6,9 @@ import (
"errors"
"fmt"
"net"
"syscall"
"github.com/vishvananda/netlink/nl"
"golang.org/x/sys/unix"
)
// ConntrackTableType Conntrack table for the netlink operation
@ -85,8 +85,8 @@ func (h *Handle) ConntrackTableList(table ConntrackTableType, family InetFamily)
// conntrack -F [table] Flush table
// The flush operation applies to all the family types
func (h *Handle) ConntrackTableFlush(table ConntrackTableType) error {
req := h.newConntrackRequest(table, syscall.AF_INET, nl.IPCTNL_MSG_CT_DELETE, syscall.NLM_F_ACK)
_, err := req.Execute(syscall.NETLINK_NETFILTER, 0)
req := h.newConntrackRequest(table, unix.AF_INET, nl.IPCTNL_MSG_CT_DELETE, unix.NLM_F_ACK)
_, err := req.Execute(unix.NETLINK_NETFILTER, 0)
return err
}
@ -102,10 +102,10 @@ func (h *Handle) ConntrackDeleteFilter(table ConntrackTableType, family InetFami
for _, dataRaw := range res {
flow := parseRawData(dataRaw)
if match := filter.MatchConntrackFlow(flow); match {
req2 := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_DELETE, syscall.NLM_F_ACK)
req2 := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_DELETE, unix.NLM_F_ACK)
// skip the first 4 byte that are the netfilter header, the newConntrackRequest is adding it already
req2.AddRawData(dataRaw[4:])
req2.Execute(syscall.NETLINK_NETFILTER, 0)
req2.Execute(unix.NETLINK_NETFILTER, 0)
matched++
}
}
@ -127,8 +127,8 @@ func (h *Handle) newConntrackRequest(table ConntrackTableType, family InetFamily
}
func (h *Handle) dumpConntrackTable(table ConntrackTableType, family InetFamily) ([][]byte, error) {
req := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_GET, syscall.NLM_F_DUMP)
return req.Execute(syscall.NETLINK_NETFILTER, 0)
req := h.newConntrackRequest(table, family, nl.IPCTNL_MSG_CT_GET, unix.NLM_F_DUMP)
return req.Execute(unix.NETLINK_NETFILTER, 0)
}
// The full conntrack flow structure is very complicated and can be found in the file:

View File

@ -2,8 +2,6 @@ package netlink
import (
"fmt"
"github.com/vishvananda/netlink/nl"
)
type Filter interface {
@ -19,7 +17,7 @@ type FilterAttrs struct {
Handle uint32
Parent uint32
Priority uint16 // lower is higher priority
Protocol uint16 // syscall.ETH_P_*
Protocol uint16 // unix.ETH_P_*
}
func (q FilterAttrs) String() string {
@ -184,14 +182,6 @@ func NewMirredAction(redirIndex int) *MirredAction {
}
}
// Constants used in TcU32Sel.Flags.
const (
TC_U32_TERMINAL = nl.TC_U32_TERMINAL
TC_U32_OFFSET = nl.TC_U32_OFFSET
TC_U32_VAROFFSET = nl.TC_U32_VAROFFSET
TC_U32_EAT = nl.TC_U32_EAT
)
// Sel of the U32 filters that contains multiple TcU32Key. This is the copy
// and the frontend representation of nl.TcU32Sel. It is serialized into canonical
// nl.TcU32Sel with the appropriate endianness.

View File

@ -9,6 +9,15 @@ import (
"unsafe"
"github.com/vishvananda/netlink/nl"
"golang.org/x/sys/unix"
)
// Constants used in TcU32Sel.Flags.
const (
TC_U32_TERMINAL = nl.TC_U32_TERMINAL
TC_U32_OFFSET = nl.TC_U32_OFFSET
TC_U32_VAROFFSET = nl.TC_U32_VAROFFSET
TC_U32_EAT = nl.TC_U32_EAT
)
// Fw filter filters on firewall marks
@ -47,7 +56,7 @@ func NewFw(attrs FilterAttrs, fattrs FilterFwAttrs) (*Fw, error) {
if police.Rate.Rate != 0 {
police.Rate.Mpu = fattrs.Mpu
police.Rate.Overhead = fattrs.Overhead
if CalcRtable(&police.Rate, rtab, rcellLog, fattrs.Mtu, linklayer) < 0 {
if CalcRtable(&police.Rate, rtab[:], rcellLog, fattrs.Mtu, linklayer) < 0 {
return nil, errors.New("TBF: failed to calculate rate table")
}
police.Burst = uint32(Xmittime(uint64(police.Rate.Rate), uint32(buffer)))
@ -56,7 +65,7 @@ func NewFw(attrs FilterAttrs, fattrs FilterFwAttrs) (*Fw, error) {
if police.PeakRate.Rate != 0 {
police.PeakRate.Mpu = fattrs.Mpu
police.PeakRate.Overhead = fattrs.Overhead
if CalcRtable(&police.PeakRate, ptab, pcellLog, fattrs.Mtu, linklayer) < 0 {
if CalcRtable(&police.PeakRate, ptab[:], pcellLog, fattrs.Mtu, linklayer) < 0 {
return nil, errors.New("POLICE: failed to calculate peak rate table")
}
}
@ -90,7 +99,7 @@ func FilterDel(filter Filter) error {
// FilterDel will delete a filter from the system.
// Equivalent to: `tc filter del $filter`
func (h *Handle) FilterDel(filter Filter) error {
req := h.newNetlinkRequest(syscall.RTM_DELTFILTER, syscall.NLM_F_ACK)
req := h.newNetlinkRequest(unix.RTM_DELTFILTER, unix.NLM_F_ACK)
base := filter.Attrs()
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
@ -101,7 +110,7 @@ func (h *Handle) FilterDel(filter Filter) error {
}
req.AddData(msg)
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
_, err := req.Execute(unix.NETLINK_ROUTE, 0)
return err
}
@ -115,7 +124,7 @@ func FilterAdd(filter Filter) error {
// Equivalent to: `tc filter add $filter`
func (h *Handle) FilterAdd(filter Filter) error {
native = nl.NativeEndian()
req := h.newNetlinkRequest(syscall.RTM_NEWTFILTER, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
req := h.newNetlinkRequest(unix.RTM_NEWTFILTER, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK)
base := filter.Attrs()
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
@ -128,9 +137,11 @@ func (h *Handle) FilterAdd(filter Filter) error {
req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(filter.Type())))
options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
if u32, ok := filter.(*U32); ok {
switch filter := filter.(type) {
case *U32:
// Convert TcU32Sel into nl.TcU32Sel as it is without copy.
sel := (*nl.TcU32Sel)(unsafe.Pointer(u32.Sel))
sel := (*nl.TcU32Sel)(unsafe.Pointer(filter.Sel))
if sel == nil {
// match all
sel = &nl.TcU32Sel{
@ -158,63 +169,63 @@ func (h *Handle) FilterAdd(filter Filter) error {
}
sel.Nkeys = uint8(len(sel.Keys))
nl.NewRtAttrChild(options, nl.TCA_U32_SEL, sel.Serialize())
if u32.ClassId != 0 {
nl.NewRtAttrChild(options, nl.TCA_U32_CLASSID, nl.Uint32Attr(u32.ClassId))
if filter.ClassId != 0 {
nl.NewRtAttrChild(options, nl.TCA_U32_CLASSID, nl.Uint32Attr(filter.ClassId))
}
actionsAttr := nl.NewRtAttrChild(options, nl.TCA_U32_ACT, nil)
// backwards compatibility
if u32.RedirIndex != 0 {
u32.Actions = append([]Action{NewMirredAction(u32.RedirIndex)}, u32.Actions...)
if filter.RedirIndex != 0 {
filter.Actions = append([]Action{NewMirredAction(filter.RedirIndex)}, filter.Actions...)
}
if err := EncodeActions(actionsAttr, u32.Actions); err != nil {
if err := EncodeActions(actionsAttr, filter.Actions); err != nil {
return err
}
} else if fw, ok := filter.(*Fw); ok {
if fw.Mask != 0 {
case *Fw:
if filter.Mask != 0 {
b := make([]byte, 4)
native.PutUint32(b, fw.Mask)
native.PutUint32(b, filter.Mask)
nl.NewRtAttrChild(options, nl.TCA_FW_MASK, b)
}
if fw.InDev != "" {
nl.NewRtAttrChild(options, nl.TCA_FW_INDEV, nl.ZeroTerminated(fw.InDev))
if filter.InDev != "" {
nl.NewRtAttrChild(options, nl.TCA_FW_INDEV, nl.ZeroTerminated(filter.InDev))
}
if (fw.Police != nl.TcPolice{}) {
if (filter.Police != nl.TcPolice{}) {
police := nl.NewRtAttrChild(options, nl.TCA_FW_POLICE, nil)
nl.NewRtAttrChild(police, nl.TCA_POLICE_TBF, fw.Police.Serialize())
if (fw.Police.Rate != nl.TcRateSpec{}) {
payload := SerializeRtab(fw.Rtab)
nl.NewRtAttrChild(police, nl.TCA_POLICE_TBF, filter.Police.Serialize())
if (filter.Police.Rate != nl.TcRateSpec{}) {
payload := SerializeRtab(filter.Rtab)
nl.NewRtAttrChild(police, nl.TCA_POLICE_RATE, payload)
}
if (fw.Police.PeakRate != nl.TcRateSpec{}) {
payload := SerializeRtab(fw.Ptab)
if (filter.Police.PeakRate != nl.TcRateSpec{}) {
payload := SerializeRtab(filter.Ptab)
nl.NewRtAttrChild(police, nl.TCA_POLICE_PEAKRATE, payload)
}
}
if fw.ClassId != 0 {
if filter.ClassId != 0 {
b := make([]byte, 4)
native.PutUint32(b, fw.ClassId)
native.PutUint32(b, filter.ClassId)
nl.NewRtAttrChild(options, nl.TCA_FW_CLASSID, b)
}
} else if bpf, ok := filter.(*BpfFilter); ok {
case *BpfFilter:
var bpfFlags uint32
if bpf.ClassId != 0 {
nl.NewRtAttrChild(options, nl.TCA_BPF_CLASSID, nl.Uint32Attr(bpf.ClassId))
if filter.ClassId != 0 {
nl.NewRtAttrChild(options, nl.TCA_BPF_CLASSID, nl.Uint32Attr(filter.ClassId))
}
if bpf.Fd >= 0 {
nl.NewRtAttrChild(options, nl.TCA_BPF_FD, nl.Uint32Attr((uint32(bpf.Fd))))
if filter.Fd >= 0 {
nl.NewRtAttrChild(options, nl.TCA_BPF_FD, nl.Uint32Attr((uint32(filter.Fd))))
}
if bpf.Name != "" {
nl.NewRtAttrChild(options, nl.TCA_BPF_NAME, nl.ZeroTerminated(bpf.Name))
if filter.Name != "" {
nl.NewRtAttrChild(options, nl.TCA_BPF_NAME, nl.ZeroTerminated(filter.Name))
}
if bpf.DirectAction {
if filter.DirectAction {
bpfFlags |= nl.TCA_BPF_FLAG_ACT_DIRECT
}
nl.NewRtAttrChild(options, nl.TCA_BPF_FLAGS, nl.Uint32Attr(bpfFlags))
}
req.AddData(options)
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
_, err := req.Execute(unix.NETLINK_ROUTE, 0)
return err
}
@ -229,7 +240,7 @@ func FilterList(link Link, parent uint32) ([]Filter, error) {
// Equivalent to: `tc filter show`.
// Generally returns nothing if link and parent are not specified.
func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) {
req := h.newNetlinkRequest(syscall.RTM_GETTFILTER, syscall.NLM_F_DUMP)
req := h.newNetlinkRequest(unix.RTM_GETTFILTER, unix.NLM_F_DUMP)
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
Parent: parent,
@ -241,7 +252,7 @@ func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) {
}
req.AddData(msg)
msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWTFILTER)
msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWTFILTER)
if err != nil {
return nil, err
}
@ -552,7 +563,7 @@ func AdjustSize(sz uint, mpu uint, linklayer int) uint {
}
}
func CalcRtable(rate *nl.TcRateSpec, rtab [256]uint32, cellLog int, mtu uint32, linklayer int) int {
func CalcRtable(rate *nl.TcRateSpec, rtab []uint32, cellLog int, mtu uint32, linklayer int) int {
bps := rate.Rate
mpu := rate.Mpu
var sz uint

21
vendor/github.com/vishvananda/netlink/fou.go generated vendored Normal file
View File

@ -0,0 +1,21 @@
package netlink
import (
"errors"
)
var (
// ErrAttrHeaderTruncated is returned when a netlink attribute's header is
// truncated.
ErrAttrHeaderTruncated = errors.New("attribute header truncated")
// ErrAttrBodyTruncated is returned when a netlink attribute's body is
// truncated.
ErrAttrBodyTruncated = errors.New("attribute body truncated")
)
type Fou struct {
Family int
Port int
Protocol int
EncapType int
}

215
vendor/github.com/vishvananda/netlink/fou_linux.go generated vendored Normal file
View File

@ -0,0 +1,215 @@
// +build linux
package netlink
import (
"encoding/binary"
"errors"
"github.com/vishvananda/netlink/nl"
"golang.org/x/sys/unix"
)
const (
FOU_GENL_NAME = "fou"
)
const (
FOU_CMD_UNSPEC uint8 = iota
FOU_CMD_ADD
FOU_CMD_DEL
FOU_CMD_GET
FOU_CMD_MAX = FOU_CMD_GET
)
const (
FOU_ATTR_UNSPEC = iota
FOU_ATTR_PORT
FOU_ATTR_AF
FOU_ATTR_IPPROTO
FOU_ATTR_TYPE
FOU_ATTR_REMCSUM_NOPARTIAL
FOU_ATTR_MAX = FOU_ATTR_REMCSUM_NOPARTIAL
)
const (
FOU_ENCAP_UNSPEC = iota
FOU_ENCAP_DIRECT
FOU_ENCAP_GUE
FOU_ENCAP_MAX = FOU_ENCAP_GUE
)
var fouFamilyId int
func FouFamilyId() (int, error) {
if fouFamilyId != 0 {
return fouFamilyId, nil
}
fam, err := GenlFamilyGet(FOU_GENL_NAME)
if err != nil {
return -1, err
}
fouFamilyId = int(fam.ID)
return fouFamilyId, nil
}
func FouAdd(f Fou) error {
return pkgHandle.FouAdd(f)
}
func (h *Handle) FouAdd(f Fou) error {
fam_id, err := FouFamilyId()
if err != nil {
return err
}
// setting ip protocol conflicts with encapsulation type GUE
if f.EncapType == FOU_ENCAP_GUE && f.Protocol != 0 {
return errors.New("GUE encapsulation doesn't specify an IP protocol")
}
req := h.newNetlinkRequest(fam_id, unix.NLM_F_ACK)
// int to byte for port
bp := make([]byte, 2)
binary.BigEndian.PutUint16(bp[0:2], uint16(f.Port))
attrs := []*nl.RtAttr{
nl.NewRtAttr(FOU_ATTR_PORT, bp),
nl.NewRtAttr(FOU_ATTR_TYPE, []byte{uint8(f.EncapType)}),
nl.NewRtAttr(FOU_ATTR_AF, []byte{uint8(f.Family)}),
nl.NewRtAttr(FOU_ATTR_IPPROTO, []byte{uint8(f.Protocol)}),
}
raw := []byte{FOU_CMD_ADD, 1, 0, 0}
for _, a := range attrs {
raw = append(raw, a.Serialize()...)
}
req.AddRawData(raw)
_, err = req.Execute(unix.NETLINK_GENERIC, 0)
if err != nil {
return err
}
return nil
}
func FouDel(f Fou) error {
return pkgHandle.FouDel(f)
}
func (h *Handle) FouDel(f Fou) error {
fam_id, err := FouFamilyId()
if err != nil {
return err
}
req := h.newNetlinkRequest(fam_id, unix.NLM_F_ACK)
// int to byte for port
bp := make([]byte, 2)
binary.BigEndian.PutUint16(bp[0:2], uint16(f.Port))
attrs := []*nl.RtAttr{
nl.NewRtAttr(FOU_ATTR_PORT, bp),
nl.NewRtAttr(FOU_ATTR_AF, []byte{uint8(f.Family)}),
}
raw := []byte{FOU_CMD_DEL, 1, 0, 0}
for _, a := range attrs {
raw = append(raw, a.Serialize()...)
}
req.AddRawData(raw)
_, err = req.Execute(unix.NETLINK_GENERIC, 0)
if err != nil {
return err
}
return nil
}
func FouList(fam int) ([]Fou, error) {
return pkgHandle.FouList(fam)
}
func (h *Handle) FouList(fam int) ([]Fou, error) {
fam_id, err := FouFamilyId()
if err != nil {
return nil, err
}
req := h.newNetlinkRequest(fam_id, unix.NLM_F_DUMP)
attrs := []*nl.RtAttr{
nl.NewRtAttr(FOU_ATTR_AF, []byte{uint8(fam)}),
}
raw := []byte{FOU_CMD_GET, 1, 0, 0}
for _, a := range attrs {
raw = append(raw, a.Serialize()...)
}
req.AddRawData(raw)
msgs, err := req.Execute(unix.NETLINK_GENERIC, 0)
if err != nil {
return nil, err
}
fous := make([]Fou, 0, len(msgs))
for _, m := range msgs {
f, err := deserializeFouMsg(m)
if err != nil {
return fous, err
}
fous = append(fous, f)
}
return fous, nil
}
func deserializeFouMsg(msg []byte) (Fou, error) {
// we'll skip to byte 4 to first attribute
msg = msg[3:]
var shift int
fou := Fou{}
for {
// attribute header is at least 16 bits
if len(msg) < 4 {
return fou, ErrAttrHeaderTruncated
}
lgt := int(binary.BigEndian.Uint16(msg[0:2]))
if len(msg) < lgt+4 {
return fou, ErrAttrBodyTruncated
}
attr := binary.BigEndian.Uint16(msg[2:4])
shift = lgt + 3
switch attr {
case FOU_ATTR_AF:
fou.Family = int(msg[5])
case FOU_ATTR_PORT:
fou.Port = int(binary.BigEndian.Uint16(msg[5:7]))
// port is 2 bytes
shift = lgt + 2
case FOU_ATTR_IPPROTO:
fou.Protocol = int(msg[5])
case FOU_ATTR_TYPE:
fou.EncapType = int(msg[5])
}
msg = msg[shift:]
if len(msg) < 4 {
break
}
}
return fou, nil
}

View File

@ -0,0 +1,15 @@
// +build !linux
package netlink
func FouAdd(f Fou) error {
return ErrNotImplemented
}
func FouDel(f Fou) error {
return ErrNotImplemented
}
func FouList(fam int) ([]Fou, error) {
return nil, ErrNotImplemented
}

View File

@ -5,6 +5,7 @@ import (
"syscall"
"github.com/vishvananda/netlink/nl"
"golang.org/x/sys/unix"
)
type GenlOp struct {
@ -130,9 +131,9 @@ func (h *Handle) GenlFamilyList() ([]*GenlFamily, error) {
Command: nl.GENL_CTRL_CMD_GETFAMILY,
Version: nl.GENL_CTRL_VERSION,
}
req := h.newNetlinkRequest(nl.GENL_ID_CTRL, syscall.NLM_F_DUMP)
req := h.newNetlinkRequest(nl.GENL_ID_CTRL, unix.NLM_F_DUMP)
req.AddData(msg)
msgs, err := req.Execute(syscall.NETLINK_GENERIC, 0)
msgs, err := req.Execute(unix.NETLINK_GENERIC, 0)
if err != nil {
return nil, err
}
@ -151,7 +152,7 @@ func (h *Handle) GenlFamilyGet(name string) (*GenlFamily, error) {
req := h.newNetlinkRequest(nl.GENL_ID_CTRL, 0)
req.AddData(msg)
req.AddData(nl.NewRtAttr(nl.GENL_CTRL_ATTR_FAMILY_NAME, nl.ZeroTerminated(name)))
msgs, err := req.Execute(syscall.NETLINK_GENERIC, 0)
msgs, err := req.Execute(unix.NETLINK_GENERIC, 0)
if err != nil {
return nil, err
}

View File

@ -7,6 +7,7 @@ import (
"syscall"
"github.com/vishvananda/netlink/nl"
"golang.org/x/sys/unix"
)
type PDP struct {
@ -82,9 +83,9 @@ func (h *Handle) GTPPDPList() ([]*PDP, error) {
Command: nl.GENL_GTP_CMD_GETPDP,
Version: nl.GENL_GTP_VERSION,
}
req := h.newNetlinkRequest(int(f.ID), syscall.NLM_F_DUMP)
req := h.newNetlinkRequest(int(f.ID), unix.NLM_F_DUMP)
req.AddData(msg)
msgs, err := req.Execute(syscall.NETLINK_GENERIC, 0)
msgs, err := req.Execute(unix.NETLINK_GENERIC, 0)
if err != nil {
return nil, err
}
@ -96,7 +97,7 @@ func GTPPDPList() ([]*PDP, error) {
}
func gtpPDPGet(req *nl.NetlinkRequest) (*PDP, error) {
msgs, err := req.Execute(syscall.NETLINK_GENERIC, 0)
msgs, err := req.Execute(unix.NETLINK_GENERIC, 0)
if err != nil {
return nil, err
}
@ -182,7 +183,7 @@ func (h *Handle) GTPPDPAdd(link Link, pdp *PDP) error {
Command: nl.GENL_GTP_CMD_NEWPDP,
Version: nl.GENL_GTP_VERSION,
}
req := h.newNetlinkRequest(int(f.ID), syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
req := h.newNetlinkRequest(int(f.ID), unix.NLM_F_EXCL|unix.NLM_F_ACK)
req.AddData(msg)
req.AddData(nl.NewRtAttr(nl.GENL_GTP_ATTR_VERSION, nl.Uint32Attr(pdp.Version)))
req.AddData(nl.NewRtAttr(nl.GENL_GTP_ATTR_LINK, nl.Uint32Attr(uint32(link.Attrs().Index))))
@ -199,7 +200,7 @@ func (h *Handle) GTPPDPAdd(link Link, pdp *PDP) error {
default:
return fmt.Errorf("unsupported GTP version: %d", pdp.Version)
}
_, err = req.Execute(syscall.NETLINK_GENERIC, 0)
_, err = req.Execute(unix.NETLINK_GENERIC, 0)
return err
}
@ -216,7 +217,7 @@ func (h *Handle) GTPPDPDel(link Link, pdp *PDP) error {
Command: nl.GENL_GTP_CMD_DELPDP,
Version: nl.GENL_GTP_VERSION,
}
req := h.newNetlinkRequest(int(f.ID), syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
req := h.newNetlinkRequest(int(f.ID), unix.NLM_F_EXCL|unix.NLM_F_ACK)
req.AddData(msg)
req.AddData(nl.NewRtAttr(nl.GENL_GTP_ATTR_VERSION, nl.Uint32Attr(pdp.Version)))
req.AddData(nl.NewRtAttr(nl.GENL_GTP_ATTR_LINK, nl.Uint32Attr(uint32(link.Attrs().Index))))
@ -229,7 +230,7 @@ func (h *Handle) GTPPDPDel(link Link, pdp *PDP) error {
default:
return fmt.Errorf("unsupported GTP version: %d", pdp.Version)
}
_, err = req.Execute(syscall.NETLINK_GENERIC, 0)
_, err = req.Execute(unix.NETLINK_GENERIC, 0)
return err
}

View File

@ -2,11 +2,11 @@ package netlink
import (
"fmt"
"syscall"
"time"
"github.com/vishvananda/netlink/nl"
"github.com/vishvananda/netns"
"golang.org/x/sys/unix"
)
// Empty handle used by the netlink package methods
@ -43,14 +43,29 @@ func (h *Handle) SetSocketTimeout(to time.Duration) error {
if to < time.Microsecond {
return fmt.Errorf("invalid timeout, minimul value is %s", time.Microsecond)
}
tv := syscall.NsecToTimeval(to.Nanoseconds())
tv := unix.NsecToTimeval(to.Nanoseconds())
for _, sh := range h.sockets {
fd := sh.Socket.GetFd()
err := syscall.SetsockoptTimeval(fd, syscall.SOL_SOCKET, syscall.SO_RCVTIMEO, &tv)
if err != nil {
if err := sh.Socket.SetSendTimeout(&tv); err != nil {
return err
}
err = syscall.SetsockoptTimeval(fd, syscall.SOL_SOCKET, syscall.SO_SNDTIMEO, &tv)
if err := sh.Socket.SetReceiveTimeout(&tv); err != nil {
return err
}
}
return nil
}
// SetSocketReceiveBufferSize sets the receive buffer size for each
// socket in the netlink handle. The maximum value is capped by
// /proc/sys/net/core/rmem_max.
func (h *Handle) SetSocketReceiveBufferSize(size int, force bool) error {
opt := unix.SO_RCVBUF
if force {
opt = unix.SO_RCVBUFFORCE
}
for _, sh := range h.sockets {
fd := sh.Socket.GetFd()
err := unix.SetsockoptInt(fd, unix.SOL_SOCKET, opt, size)
if err != nil {
return err
}
@ -58,6 +73,24 @@ func (h *Handle) SetSocketTimeout(to time.Duration) error {
return nil
}
// GetSocketReceiveBufferSize gets the receiver buffer size for each
// socket in the netlink handle. The retrieved value should be the
// double to the one set for SetSocketReceiveBufferSize.
func (h *Handle) GetSocketReceiveBufferSize() ([]int, error) {
results := make([]int, len(h.sockets))
i := 0
for _, sh := range h.sockets {
fd := sh.Socket.GetFd()
size, err := unix.GetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_RCVBUF)
if err != nil {
return nil, err
}
results[i] = size
i++
}
return results, nil
}
// NewHandle returns a netlink handle on the network namespace
// specified by ns. If ns=netns.None(), current network namespace
// will be assumed
@ -101,10 +134,10 @@ func (h *Handle) newNetlinkRequest(proto, flags int) *nl.NetlinkRequest {
return nl.NewNetlinkRequest(proto, flags)
}
return &nl.NetlinkRequest{
NlMsghdr: syscall.NlMsghdr{
Len: uint32(syscall.SizeofNlMsghdr),
NlMsghdr: unix.NlMsghdr{
Len: uint32(unix.SizeofNlMsghdr),
Type: uint16(proto),
Flags: syscall.NLM_F_REQUEST | uint16(flags),
Flags: unix.NLM_F_REQUEST | uint16(flags),
},
Sockets: h.sockets,
}

View File

@ -145,6 +145,10 @@ func (h *Handle) LinkSetFlood(link Link, mode bool) error {
return ErrNotImplemented
}
func (h *Handle) LinkSetTxQLen(link Link, qlen int) error {
return ErrNotImplemented
}
func (h *Handle) setProtinfoAttr(link Link, mode bool, attr int) error {
return ErrNotImplemented
}

View File

@ -3,6 +3,7 @@ package netlink
import (
"fmt"
"net"
"os"
)
// Link represents a link device from netlink. Shared link attributes
@ -37,6 +38,9 @@ type LinkAttrs struct {
EncapType string
Protinfo *Protinfo
OperState LinkOperState
NetNsID int
NumTxQueues int
NumRxQueues int
}
// LinkOperState represents the values of the IFLA_OPERSTATE link
@ -258,6 +262,9 @@ const (
type Macvlan struct {
LinkAttrs
Mode MacvlanMode
// MACAddrs is only populated for Macvlan SOURCE links
MACAddrs []net.HardwareAddr
}
func (macvlan *Macvlan) Attrs() *LinkAttrs {
@ -283,8 +290,10 @@ type TuntapFlag uint16
// Tuntap links created via /dev/tun/tap, but can be destroyed via netlink
type Tuntap struct {
LinkAttrs
Mode TuntapMode
Flags TuntapFlag
Mode TuntapMode
Flags TuntapFlag
Queues int
Fds []*os.File
}
func (tuntap *Tuntap) Attrs() *LinkAttrs {
@ -326,26 +335,28 @@ func (generic *GenericLink) Type() string {
type Vxlan struct {
LinkAttrs
VxlanId int
VtepDevIndex int
SrcAddr net.IP
Group net.IP
TTL int
TOS int
Learning bool
Proxy bool
RSC bool
L2miss bool
L3miss bool
UDPCSum bool
NoAge bool
GBP bool
FlowBased bool
Age int
Limit int
Port int
PortLow int
PortHigh int
VxlanId int
VtepDevIndex int
SrcAddr net.IP
Group net.IP
TTL int
TOS int
Learning bool
Proxy bool
RSC bool
L2miss bool
L3miss bool
UDPCSum bool
UDP6ZeroCSumTx bool
UDP6ZeroCSumRx bool
NoAge bool
GBP bool
FlowBased bool
Age int
Limit int
Port int
PortLow int
PortHigh int
}
func (vxlan *Vxlan) Attrs() *LinkAttrs {
@ -699,12 +710,17 @@ func (gretap *Gretap) Type() string {
type Iptun struct {
LinkAttrs
Ttl uint8
Tos uint8
PMtuDisc uint8
Link uint32
Local net.IP
Remote net.IP
Ttl uint8
Tos uint8
PMtuDisc uint8
Link uint32
Local net.IP
Remote net.IP
EncapSport uint16
EncapDport uint16
EncapType uint16
EncapFlags uint16
FlowBased bool
}
func (iptun *Iptun) Attrs() *LinkAttrs {
@ -715,6 +731,28 @@ func (iptun *Iptun) Type() string {
return "ipip"
}
type Sittun struct {
LinkAttrs
Link uint32
Local net.IP
Remote net.IP
Ttl uint8
Tos uint8
PMtuDisc uint8
EncapType uint16
EncapFlags uint16
EncapSport uint16
EncapDport uint16
}
func (sittun *Sittun) Attrs() *LinkAttrs {
return &sittun.LinkAttrs
}
func (sittun *Sittun) Type() string {
return "sit"
}
type Vti struct {
LinkAttrs
IKey uint32
@ -732,6 +770,32 @@ func (iptun *Vti) Type() string {
return "vti"
}
type Gretun struct {
LinkAttrs
Link uint32
IFlags uint16
OFlags uint16
IKey uint32
OKey uint32
Local net.IP
Remote net.IP
Ttl uint8
Tos uint8
PMtuDisc uint8
EncapType uint16
EncapFlags uint16
EncapSport uint16
EncapDport uint16
}
func (gretun *Gretun) Attrs() *LinkAttrs {
return &gretun.LinkAttrs
}
func (gretun *Gretun) Type() string {
return "gre"
}
type Vrf struct {
LinkAttrs
Table uint32

File diff suppressed because it is too large Load Diff

View File

@ -14,6 +14,9 @@ type Neigh struct {
Flags int
IP net.IP
HardwareAddr net.HardwareAddr
LLIPAddr net.IP //Used in the case of NHRP
Vlan int
VNI int
}
// String returns $ip/$hwaddr $label

View File

@ -2,10 +2,10 @@ package netlink
import (
"net"
"syscall"
"unsafe"
"github.com/vishvananda/netlink/nl"
"golang.org/x/sys/unix"
)
const (
@ -73,7 +73,7 @@ func NeighAdd(neigh *Neigh) error {
// NeighAdd will add an IP to MAC mapping to the ARP table
// Equivalent to: `ip neigh add ....`
func (h *Handle) NeighAdd(neigh *Neigh) error {
return h.neighAdd(neigh, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL)
return h.neighAdd(neigh, unix.NLM_F_CREATE|unix.NLM_F_EXCL)
}
// NeighSet will add or replace an IP to MAC mapping to the ARP table
@ -85,7 +85,7 @@ func NeighSet(neigh *Neigh) error {
// NeighSet will add or replace an IP to MAC mapping to the ARP table
// Equivalent to: `ip neigh replace....`
func (h *Handle) NeighSet(neigh *Neigh) error {
return h.neighAdd(neigh, syscall.NLM_F_CREATE|syscall.NLM_F_REPLACE)
return h.neighAdd(neigh, unix.NLM_F_CREATE|unix.NLM_F_REPLACE)
}
// NeighAppend will append an entry to FDB
@ -97,7 +97,7 @@ func NeighAppend(neigh *Neigh) error {
// NeighAppend will append an entry to FDB
// Equivalent to: `bridge fdb append...`
func (h *Handle) NeighAppend(neigh *Neigh) error {
return h.neighAdd(neigh, syscall.NLM_F_CREATE|syscall.NLM_F_APPEND)
return h.neighAdd(neigh, unix.NLM_F_CREATE|unix.NLM_F_APPEND)
}
// NeighAppend will append an entry to FDB
@ -109,7 +109,7 @@ func neighAdd(neigh *Neigh, mode int) error {
// NeighAppend will append an entry to FDB
// Equivalent to: `bridge fdb append...`
func (h *Handle) neighAdd(neigh *Neigh, mode int) error {
req := h.newNetlinkRequest(syscall.RTM_NEWNEIGH, mode|syscall.NLM_F_ACK)
req := h.newNetlinkRequest(unix.RTM_NEWNEIGH, mode|unix.NLM_F_ACK)
return neighHandle(neigh, req)
}
@ -122,12 +122,13 @@ func NeighDel(neigh *Neigh) error {
// NeighDel will delete an IP address from a link device.
// Equivalent to: `ip addr del $addr dev $link`
func (h *Handle) NeighDel(neigh *Neigh) error {
req := h.newNetlinkRequest(syscall.RTM_DELNEIGH, syscall.NLM_F_ACK)
req := h.newNetlinkRequest(unix.RTM_DELNEIGH, unix.NLM_F_ACK)
return neighHandle(neigh, req)
}
func neighHandle(neigh *Neigh, req *nl.NetlinkRequest) error {
var family int
if neigh.Family > 0 {
family = neigh.Family
} else {
@ -151,12 +152,25 @@ func neighHandle(neigh *Neigh, req *nl.NetlinkRequest) error {
dstData := nl.NewRtAttr(NDA_DST, ipData)
req.AddData(dstData)
if neigh.Flags != NTF_PROXY || neigh.HardwareAddr != nil {
if neigh.LLIPAddr != nil {
llIPData := nl.NewRtAttr(NDA_LLADDR, neigh.LLIPAddr.To4())
req.AddData(llIPData)
} else if neigh.Flags != NTF_PROXY || neigh.HardwareAddr != nil {
hwData := nl.NewRtAttr(NDA_LLADDR, []byte(neigh.HardwareAddr))
req.AddData(hwData)
}
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
if neigh.Vlan != 0 {
vlanData := nl.NewRtAttr(NDA_VLAN, nl.Uint16Attr(uint16(neigh.Vlan)))
req.AddData(vlanData)
}
if neigh.VNI != 0 {
vniData := nl.NewRtAttr(NDA_VNI, nl.Uint32Attr(uint32(neigh.VNI)))
req.AddData(vniData)
}
_, err := req.Execute(unix.NETLINK_ROUTE, 0)
return err
}
@ -189,7 +203,7 @@ func (h *Handle) NeighProxyList(linkIndex, family int) ([]Neigh, error) {
}
func (h *Handle) neighList(linkIndex, family, flags int) ([]Neigh, error) {
req := h.newNetlinkRequest(syscall.RTM_GETNEIGH, syscall.NLM_F_DUMP)
req := h.newNetlinkRequest(unix.RTM_GETNEIGH, unix.NLM_F_DUMP)
msg := Ndmsg{
Family: uint8(family),
Index: uint32(linkIndex),
@ -197,7 +211,7 @@ func (h *Handle) neighList(linkIndex, family, flags int) ([]Neigh, error) {
}
req.AddData(&msg)
msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWNEIGH)
msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWNEIGH)
if err != nil {
return nil, err
}
@ -237,12 +251,37 @@ func NeighDeserialize(m []byte) (*Neigh, error) {
return nil, err
}
// This should be cached for perfomance
// once per table dump
link, err := LinkByIndex(neigh.LinkIndex)
if err != nil {
return nil, err
}
encapType := link.Attrs().EncapType
for _, attr := range attrs {
switch attr.Attr.Type {
case NDA_DST:
neigh.IP = net.IP(attr.Value)
case NDA_LLADDR:
neigh.HardwareAddr = net.HardwareAddr(attr.Value)
// BUG: Is this a bug in the netlink library?
// #define RTA_LENGTH(len) (RTA_ALIGN(sizeof(struct rtattr)) + (len))
// #define RTA_PAYLOAD(rta) ((int)((rta)->rta_len) - RTA_LENGTH(0))
attrLen := attr.Attr.Len - unix.SizeofRtAttr
if attrLen == 4 && (encapType == "ipip" ||
encapType == "sit" ||
encapType == "gre") {
neigh.LLIPAddr = net.IP(attr.Value)
} else if attrLen == 16 &&
encapType == "tunnel6" {
neigh.IP = net.IP(attr.Value)
} else {
neigh.HardwareAddr = net.HardwareAddr(attr.Value)
}
case NDA_VLAN:
neigh.Vlan = int(native.Uint16(attr.Value[0:2]))
case NDA_VNI:
neigh.VNI = int(native.Uint32(attr.Value[0:4]))
}
}

View File

@ -108,6 +108,10 @@ func LinkSetFlood(link Link, mode bool) error {
return ErrNotImplemented
}
func LinkSetTxQLen(link Link, qlen int) error {
return ErrNotImplemented
}
func LinkAdd(link Link) error {
return ErrNotImplemented
}

View File

@ -15,6 +15,7 @@ go_library(
"mpls_linux.go",
"nl_linux.go",
"route_linux.go",
"seg6_linux.go",
"tc_linux.go",
"xfrm_linux.go",
"xfrm_monitor_linux.go",
@ -28,6 +29,7 @@ go_library(
deps = select({
"@io_bazel_rules_go//go/platform:linux_amd64": [
"//vendor/github.com/vishvananda/netns:go_default_library",
"//vendor/golang.org/x/sys/unix:go_default_library",
],
"//conditions:default": [],
}),

View File

@ -1,17 +1,18 @@
package nl
import (
"syscall"
"unsafe"
"golang.org/x/sys/unix"
)
type IfAddrmsg struct {
syscall.IfAddrmsg
unix.IfAddrmsg
}
func NewIfAddrmsg(family int) *IfAddrmsg {
return &IfAddrmsg{
IfAddrmsg: syscall.IfAddrmsg{
IfAddrmsg: unix.IfAddrmsg{
Family: uint8(family),
},
}
@ -35,15 +36,15 @@ func NewIfAddrmsg(family int) *IfAddrmsg {
// SizeofIfAddrmsg = 0x8
func DeserializeIfAddrmsg(b []byte) *IfAddrmsg {
return (*IfAddrmsg)(unsafe.Pointer(&b[0:syscall.SizeofIfAddrmsg][0]))
return (*IfAddrmsg)(unsafe.Pointer(&b[0:unix.SizeofIfAddrmsg][0]))
}
func (msg *IfAddrmsg) Serialize() []byte {
return (*(*[syscall.SizeofIfAddrmsg]byte)(unsafe.Pointer(msg)))[:]
return (*(*[unix.SizeofIfAddrmsg]byte)(unsafe.Pointer(msg)))[:]
}
func (msg *IfAddrmsg) Len() int {
return syscall.SizeofIfAddrmsg
return unix.SizeofIfAddrmsg
}
// struct ifa_cacheinfo {

View File

@ -1,14 +1,15 @@
package nl
import (
"syscall"
"unsafe"
"golang.org/x/sys/unix"
)
const (
DEFAULT_CHANGE = 0xFFFFFFFF
// doesn't exist in syscall
IFLA_VFINFO_LIST = syscall.IFLA_IFALIAS + 1 + iota
IFLA_VFINFO_LIST = unix.IFLA_IFALIAS + 1 + iota
IFLA_STATS64
IFLA_VF_PORTS
IFLA_PORT_SELF
@ -118,6 +119,10 @@ const (
IFLA_MACVLAN_UNSPEC = iota
IFLA_MACVLAN_MODE
IFLA_MACVLAN_FLAGS
IFLA_MACVLAN_MACADDR_MODE
IFLA_MACVLAN_MACADDR
IFLA_MACVLAN_MACADDR_DATA
IFLA_MACVLAN_MACADDR_COUNT
IFLA_MACVLAN_MAX = IFLA_MACVLAN_FLAGS
)
@ -129,6 +134,13 @@ const (
MACVLAN_MODE_SOURCE = 16
)
const (
MACVLAN_MACADDR_ADD = iota
MACVLAN_MACADDR_DEL
MACVLAN_MACADDR_FLUSH
MACVLAN_MACADDR_SET
)
const (
IFLA_BOND_UNSPEC = iota
IFLA_BOND_MODE
@ -443,6 +455,13 @@ func (msg *VfTrust) Serialize() []byte {
return (*(*[SizeofVfTrust]byte)(unsafe.Pointer(msg)))[:]
}
const (
XDP_FLAGS_UPDATE_IF_NOEXIST = 1 << iota
XDP_FLAGS_SKB_MODE
XDP_FLAGS_DRV_MODE
XDP_FLAGS_MASK = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE
)
const (
IFLA_XDP_UNSPEC = iota
IFLA_XDP_FD /* fd of xdp program to attach, or -1 to remove */
@ -468,7 +487,12 @@ const (
IFLA_IPTUN_6RD_RELAY_PREFIX
IFLA_IPTUN_6RD_PREFIXLEN
IFLA_IPTUN_6RD_RELAY_PREFIXLEN
IFLA_IPTUN_MAX = IFLA_IPTUN_6RD_RELAY_PREFIXLEN
IFLA_IPTUN_ENCAP_TYPE
IFLA_IPTUN_ENCAP_FLAGS
IFLA_IPTUN_ENCAP_SPORT
IFLA_IPTUN_ENCAP_DPORT
IFLA_IPTUN_COLLECT_METADATA
IFLA_IPTUN_MAX = IFLA_IPTUN_COLLECT_METADATA
)
const (

View File

@ -13,18 +13,19 @@ import (
"unsafe"
"github.com/vishvananda/netns"
"golang.org/x/sys/unix"
)
const (
// Family type definitions
FAMILY_ALL = syscall.AF_UNSPEC
FAMILY_V4 = syscall.AF_INET
FAMILY_V6 = syscall.AF_INET6
FAMILY_ALL = unix.AF_UNSPEC
FAMILY_V4 = unix.AF_INET
FAMILY_V6 = unix.AF_INET6
FAMILY_MPLS = AF_MPLS
)
// SupportedNlFamilies contains the list of netlink families this netlink package supports
var SupportedNlFamilies = []int{syscall.NETLINK_ROUTE, syscall.NETLINK_XFRM, syscall.NETLINK_NETFILTER}
var SupportedNlFamilies = []int{unix.NETLINK_ROUTE, unix.NETLINK_XFRM, unix.NETLINK_NETFILTER}
var nextSeqNr uint32
@ -77,161 +78,161 @@ type NetlinkRequestData interface {
// IfInfomsg is related to links, but it is used for list requests as well
type IfInfomsg struct {
syscall.IfInfomsg
unix.IfInfomsg
}
// Create an IfInfomsg with family specified
func NewIfInfomsg(family int) *IfInfomsg {
return &IfInfomsg{
IfInfomsg: syscall.IfInfomsg{
IfInfomsg: unix.IfInfomsg{
Family: uint8(family),
},
}
}
func DeserializeIfInfomsg(b []byte) *IfInfomsg {
return (*IfInfomsg)(unsafe.Pointer(&b[0:syscall.SizeofIfInfomsg][0]))
return (*IfInfomsg)(unsafe.Pointer(&b[0:unix.SizeofIfInfomsg][0]))
}
func (msg *IfInfomsg) Serialize() []byte {
return (*(*[syscall.SizeofIfInfomsg]byte)(unsafe.Pointer(msg)))[:]
return (*(*[unix.SizeofIfInfomsg]byte)(unsafe.Pointer(msg)))[:]
}
func (msg *IfInfomsg) Len() int {
return syscall.SizeofIfInfomsg
return unix.SizeofIfInfomsg
}
func (msg *IfInfomsg) EncapType() string {
switch msg.Type {
case 0:
return "generic"
case syscall.ARPHRD_ETHER:
case unix.ARPHRD_ETHER:
return "ether"
case syscall.ARPHRD_EETHER:
case unix.ARPHRD_EETHER:
return "eether"
case syscall.ARPHRD_AX25:
case unix.ARPHRD_AX25:
return "ax25"
case syscall.ARPHRD_PRONET:
case unix.ARPHRD_PRONET:
return "pronet"
case syscall.ARPHRD_CHAOS:
case unix.ARPHRD_CHAOS:
return "chaos"
case syscall.ARPHRD_IEEE802:
case unix.ARPHRD_IEEE802:
return "ieee802"
case syscall.ARPHRD_ARCNET:
case unix.ARPHRD_ARCNET:
return "arcnet"
case syscall.ARPHRD_APPLETLK:
case unix.ARPHRD_APPLETLK:
return "atalk"
case syscall.ARPHRD_DLCI:
case unix.ARPHRD_DLCI:
return "dlci"
case syscall.ARPHRD_ATM:
case unix.ARPHRD_ATM:
return "atm"
case syscall.ARPHRD_METRICOM:
case unix.ARPHRD_METRICOM:
return "metricom"
case syscall.ARPHRD_IEEE1394:
case unix.ARPHRD_IEEE1394:
return "ieee1394"
case syscall.ARPHRD_INFINIBAND:
case unix.ARPHRD_INFINIBAND:
return "infiniband"
case syscall.ARPHRD_SLIP:
case unix.ARPHRD_SLIP:
return "slip"
case syscall.ARPHRD_CSLIP:
case unix.ARPHRD_CSLIP:
return "cslip"
case syscall.ARPHRD_SLIP6:
case unix.ARPHRD_SLIP6:
return "slip6"
case syscall.ARPHRD_CSLIP6:
case unix.ARPHRD_CSLIP6:
return "cslip6"
case syscall.ARPHRD_RSRVD:
case unix.ARPHRD_RSRVD:
return "rsrvd"
case syscall.ARPHRD_ADAPT:
case unix.ARPHRD_ADAPT:
return "adapt"
case syscall.ARPHRD_ROSE:
case unix.ARPHRD_ROSE:
return "rose"
case syscall.ARPHRD_X25:
case unix.ARPHRD_X25:
return "x25"
case syscall.ARPHRD_HWX25:
case unix.ARPHRD_HWX25:
return "hwx25"
case syscall.ARPHRD_PPP:
case unix.ARPHRD_PPP:
return "ppp"
case syscall.ARPHRD_HDLC:
case unix.ARPHRD_HDLC:
return "hdlc"
case syscall.ARPHRD_LAPB:
case unix.ARPHRD_LAPB:
return "lapb"
case syscall.ARPHRD_DDCMP:
case unix.ARPHRD_DDCMP:
return "ddcmp"
case syscall.ARPHRD_RAWHDLC:
case unix.ARPHRD_RAWHDLC:
return "rawhdlc"
case syscall.ARPHRD_TUNNEL:
case unix.ARPHRD_TUNNEL:
return "ipip"
case syscall.ARPHRD_TUNNEL6:
case unix.ARPHRD_TUNNEL6:
return "tunnel6"
case syscall.ARPHRD_FRAD:
case unix.ARPHRD_FRAD:
return "frad"
case syscall.ARPHRD_SKIP:
case unix.ARPHRD_SKIP:
return "skip"
case syscall.ARPHRD_LOOPBACK:
case unix.ARPHRD_LOOPBACK:
return "loopback"
case syscall.ARPHRD_LOCALTLK:
case unix.ARPHRD_LOCALTLK:
return "ltalk"
case syscall.ARPHRD_FDDI:
case unix.ARPHRD_FDDI:
return "fddi"
case syscall.ARPHRD_BIF:
case unix.ARPHRD_BIF:
return "bif"
case syscall.ARPHRD_SIT:
case unix.ARPHRD_SIT:
return "sit"
case syscall.ARPHRD_IPDDP:
case unix.ARPHRD_IPDDP:
return "ip/ddp"
case syscall.ARPHRD_IPGRE:
case unix.ARPHRD_IPGRE:
return "gre"
case syscall.ARPHRD_PIMREG:
case unix.ARPHRD_PIMREG:
return "pimreg"
case syscall.ARPHRD_HIPPI:
case unix.ARPHRD_HIPPI:
return "hippi"
case syscall.ARPHRD_ASH:
case unix.ARPHRD_ASH:
return "ash"
case syscall.ARPHRD_ECONET:
case unix.ARPHRD_ECONET:
return "econet"
case syscall.ARPHRD_IRDA:
case unix.ARPHRD_IRDA:
return "irda"
case syscall.ARPHRD_FCPP:
case unix.ARPHRD_FCPP:
return "fcpp"
case syscall.ARPHRD_FCAL:
case unix.ARPHRD_FCAL:
return "fcal"
case syscall.ARPHRD_FCPL:
case unix.ARPHRD_FCPL:
return "fcpl"
case syscall.ARPHRD_FCFABRIC:
case unix.ARPHRD_FCFABRIC:
return "fcfb0"
case syscall.ARPHRD_FCFABRIC + 1:
case unix.ARPHRD_FCFABRIC + 1:
return "fcfb1"
case syscall.ARPHRD_FCFABRIC + 2:
case unix.ARPHRD_FCFABRIC + 2:
return "fcfb2"
case syscall.ARPHRD_FCFABRIC + 3:
case unix.ARPHRD_FCFABRIC + 3:
return "fcfb3"
case syscall.ARPHRD_FCFABRIC + 4:
case unix.ARPHRD_FCFABRIC + 4:
return "fcfb4"
case syscall.ARPHRD_FCFABRIC + 5:
case unix.ARPHRD_FCFABRIC + 5:
return "fcfb5"
case syscall.ARPHRD_FCFABRIC + 6:
case unix.ARPHRD_FCFABRIC + 6:
return "fcfb6"
case syscall.ARPHRD_FCFABRIC + 7:
case unix.ARPHRD_FCFABRIC + 7:
return "fcfb7"
case syscall.ARPHRD_FCFABRIC + 8:
case unix.ARPHRD_FCFABRIC + 8:
return "fcfb8"
case syscall.ARPHRD_FCFABRIC + 9:
case unix.ARPHRD_FCFABRIC + 9:
return "fcfb9"
case syscall.ARPHRD_FCFABRIC + 10:
case unix.ARPHRD_FCFABRIC + 10:
return "fcfb10"
case syscall.ARPHRD_FCFABRIC + 11:
case unix.ARPHRD_FCFABRIC + 11:
return "fcfb11"
case syscall.ARPHRD_FCFABRIC + 12:
case unix.ARPHRD_FCFABRIC + 12:
return "fcfb12"
case syscall.ARPHRD_IEEE802_TR:
case unix.ARPHRD_IEEE802_TR:
return "tr"
case syscall.ARPHRD_IEEE80211:
case unix.ARPHRD_IEEE80211:
return "ieee802.11"
case syscall.ARPHRD_IEEE80211_PRISM:
case unix.ARPHRD_IEEE80211_PRISM:
return "ieee802.11/prism"
case syscall.ARPHRD_IEEE80211_RADIOTAP:
case unix.ARPHRD_IEEE80211_RADIOTAP:
return "ieee802.11/radiotap"
case syscall.ARPHRD_IEEE802154:
case unix.ARPHRD_IEEE802154:
return "ieee802.15.4"
case 65534:
@ -243,7 +244,7 @@ func (msg *IfInfomsg) EncapType() string {
}
func rtaAlignOf(attrlen int) int {
return (attrlen + syscall.RTA_ALIGNTO - 1) & ^(syscall.RTA_ALIGNTO - 1)
return (attrlen + unix.RTA_ALIGNTO - 1) & ^(unix.RTA_ALIGNTO - 1)
}
func NewIfInfomsgChild(parent *RtAttr, family int) *IfInfomsg {
@ -254,7 +255,7 @@ func NewIfInfomsgChild(parent *RtAttr, family int) *IfInfomsg {
// Extend RtAttr to handle data and children
type RtAttr struct {
syscall.RtAttr
unix.RtAttr
Data []byte
children []NetlinkRequestData
}
@ -262,7 +263,7 @@ type RtAttr struct {
// Create a new Extended RtAttr object
func NewRtAttr(attrType int, data []byte) *RtAttr {
return &RtAttr{
RtAttr: syscall.RtAttr{
RtAttr: unix.RtAttr{
Type: uint16(attrType),
},
children: []NetlinkRequestData{},
@ -277,16 +278,21 @@ func NewRtAttrChild(parent *RtAttr, attrType int, data []byte) *RtAttr {
return attr
}
// AddChild adds an existing RtAttr as a child.
func (a *RtAttr) AddChild(attr *RtAttr) {
a.children = append(a.children, attr)
}
func (a *RtAttr) Len() int {
if len(a.children) == 0 {
return (syscall.SizeofRtAttr + len(a.Data))
return (unix.SizeofRtAttr + len(a.Data))
}
l := 0
for _, child := range a.children {
l += rtaAlignOf(child.Len())
}
l += syscall.SizeofRtAttr
l += unix.SizeofRtAttr
return rtaAlignOf(l + len(a.Data))
}
@ -319,7 +325,7 @@ func (a *RtAttr) Serialize() []byte {
}
type NetlinkRequest struct {
syscall.NlMsghdr
unix.NlMsghdr
Data []NetlinkRequestData
RawData []byte
Sockets map[int]*SocketHandle
@ -327,7 +333,7 @@ type NetlinkRequest struct {
// Serialize the Netlink Request into a byte array
func (req *NetlinkRequest) Serialize() []byte {
length := syscall.SizeofNlMsghdr
length := unix.SizeofNlMsghdr
dataBytes := make([][]byte, len(req.Data))
for i, data := range req.Data {
dataBytes[i] = data.Serialize()
@ -337,8 +343,8 @@ func (req *NetlinkRequest) Serialize() []byte {
req.Len = uint32(length)
b := make([]byte, length)
hdr := (*(*[syscall.SizeofNlMsghdr]byte)(unsafe.Pointer(req)))[:]
next := syscall.SizeofNlMsghdr
hdr := (*(*[unix.SizeofNlMsghdr]byte)(unsafe.Pointer(req)))[:]
next := unix.SizeofNlMsghdr
copy(b[0:next], hdr)
for _, data := range dataBytes {
for _, dataByte := range data {
@ -421,10 +427,10 @@ done:
if m.Header.Pid != pid {
return nil, fmt.Errorf("Wrong pid %d, expected %d", m.Header.Pid, pid)
}
if m.Header.Type == syscall.NLMSG_DONE {
if m.Header.Type == unix.NLMSG_DONE {
break done
}
if m.Header.Type == syscall.NLMSG_ERROR {
if m.Header.Type == unix.NLMSG_ERROR {
native := NativeEndian()
error := int32(native.Uint32(m.Data[0:4]))
if error == 0 {
@ -436,7 +442,7 @@ done:
continue
}
res = append(res, m.Data)
if m.Header.Flags&syscall.NLM_F_MULTI == 0 {
if m.Header.Flags&unix.NLM_F_MULTI == 0 {
break done
}
}
@ -449,10 +455,10 @@ done:
// the message is serialized
func NewNetlinkRequest(proto, flags int) *NetlinkRequest {
return &NetlinkRequest{
NlMsghdr: syscall.NlMsghdr{
Len: uint32(syscall.SizeofNlMsghdr),
NlMsghdr: unix.NlMsghdr{
Len: uint32(unix.SizeofNlMsghdr),
Type: uint16(proto),
Flags: syscall.NLM_F_REQUEST | uint16(flags),
Flags: unix.NLM_F_REQUEST | uint16(flags),
Seq: atomic.AddUint32(&nextSeqNr, 1),
},
}
@ -460,21 +466,21 @@ func NewNetlinkRequest(proto, flags int) *NetlinkRequest {
type NetlinkSocket struct {
fd int32
lsa syscall.SockaddrNetlink
lsa unix.SockaddrNetlink
sync.Mutex
}
func getNetlinkSocket(protocol int) (*NetlinkSocket, error) {
fd, err := syscall.Socket(syscall.AF_NETLINK, syscall.SOCK_RAW|syscall.SOCK_CLOEXEC, protocol)
fd, err := unix.Socket(unix.AF_NETLINK, unix.SOCK_RAW|unix.SOCK_CLOEXEC, protocol)
if err != nil {
return nil, err
}
s := &NetlinkSocket{
fd: int32(fd),
}
s.lsa.Family = syscall.AF_NETLINK
if err := syscall.Bind(fd, &s.lsa); err != nil {
syscall.Close(fd)
s.lsa.Family = unix.AF_NETLINK
if err := unix.Bind(fd, &s.lsa); err != nil {
unix.Close(fd)
return nil, err
}
@ -551,21 +557,21 @@ func executeInNetns(newNs, curNs netns.NsHandle) (func(), error) {
// Returns the netlink socket on which Receive() method can be called
// to retrieve the messages from the kernel.
func Subscribe(protocol int, groups ...uint) (*NetlinkSocket, error) {
fd, err := syscall.Socket(syscall.AF_NETLINK, syscall.SOCK_RAW, protocol)
fd, err := unix.Socket(unix.AF_NETLINK, unix.SOCK_RAW, protocol)
if err != nil {
return nil, err
}
s := &NetlinkSocket{
fd: int32(fd),
}
s.lsa.Family = syscall.AF_NETLINK
s.lsa.Family = unix.AF_NETLINK
for _, g := range groups {
s.lsa.Groups |= (1 << (g - 1))
}
if err := syscall.Bind(fd, &s.lsa); err != nil {
syscall.Close(fd)
if err := unix.Bind(fd, &s.lsa); err != nil {
unix.Close(fd)
return nil, err
}
@ -586,7 +592,7 @@ func SubscribeAt(newNs, curNs netns.NsHandle, protocol int, groups ...uint) (*Ne
func (s *NetlinkSocket) Close() {
fd := int(atomic.SwapInt32(&s.fd, -1))
syscall.Close(fd)
unix.Close(fd)
}
func (s *NetlinkSocket) GetFd() int {
@ -598,7 +604,7 @@ func (s *NetlinkSocket) Send(request *NetlinkRequest) error {
if fd < 0 {
return fmt.Errorf("Send called on a closed socket")
}
if err := syscall.Sendto(fd, request.Serialize(), 0, &s.lsa); err != nil {
if err := unix.Sendto(fd, request.Serialize(), 0, &s.lsa); err != nil {
return err
}
return nil
@ -609,26 +615,40 @@ func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, error) {
if fd < 0 {
return nil, fmt.Errorf("Receive called on a closed socket")
}
rb := make([]byte, syscall.Getpagesize())
nr, _, err := syscall.Recvfrom(fd, rb, 0)
rb := make([]byte, unix.Getpagesize())
nr, _, err := unix.Recvfrom(fd, rb, 0)
if err != nil {
return nil, err
}
if nr < syscall.NLMSG_HDRLEN {
if nr < unix.NLMSG_HDRLEN {
return nil, fmt.Errorf("Got short response from netlink")
}
rb = rb[:nr]
return syscall.ParseNetlinkMessage(rb)
}
// SetSendTimeout allows to set a send timeout on the socket
func (s *NetlinkSocket) SetSendTimeout(timeout *unix.Timeval) error {
// Set a send timeout of SOCKET_SEND_TIMEOUT, this will allow the Send to periodically unblock and avoid that a routine
// remains stuck on a send on a closed fd
return unix.SetsockoptTimeval(int(s.fd), unix.SOL_SOCKET, unix.SO_SNDTIMEO, timeout)
}
// SetReceiveTimeout allows to set a receive timeout on the socket
func (s *NetlinkSocket) SetReceiveTimeout(timeout *unix.Timeval) error {
// Set a read timeout of SOCKET_READ_TIMEOUT, this will allow the Read to periodically unblock and avoid that a routine
// remains stuck on a recvmsg on a closed fd
return unix.SetsockoptTimeval(int(s.fd), unix.SOL_SOCKET, unix.SO_RCVTIMEO, timeout)
}
func (s *NetlinkSocket) GetPid() (uint32, error) {
fd := int(atomic.LoadInt32(&s.fd))
lsa, err := syscall.Getsockname(fd)
lsa, err := unix.Getsockname(fd)
if err != nil {
return 0, err
}
switch v := lsa.(type) {
case *syscall.SockaddrNetlink:
case *unix.SockaddrNetlink:
return v.Pid, nil
}
return 0, fmt.Errorf("Wrong socket type")
@ -683,24 +703,24 @@ func Uint64Attr(v uint64) []byte {
func ParseRouteAttr(b []byte) ([]syscall.NetlinkRouteAttr, error) {
var attrs []syscall.NetlinkRouteAttr
for len(b) >= syscall.SizeofRtAttr {
for len(b) >= unix.SizeofRtAttr {
a, vbuf, alen, err := netlinkRouteAttrAndValue(b)
if err != nil {
return nil, err
}
ra := syscall.NetlinkRouteAttr{Attr: *a, Value: vbuf[:int(a.Len)-syscall.SizeofRtAttr]}
ra := syscall.NetlinkRouteAttr{Attr: syscall.RtAttr(*a), Value: vbuf[:int(a.Len)-unix.SizeofRtAttr]}
attrs = append(attrs, ra)
b = b[alen:]
}
return attrs, nil
}
func netlinkRouteAttrAndValue(b []byte) (*syscall.RtAttr, []byte, int, error) {
a := (*syscall.RtAttr)(unsafe.Pointer(&b[0]))
if int(a.Len) < syscall.SizeofRtAttr || int(a.Len) > len(b) {
return nil, nil, 0, syscall.EINVAL
func netlinkRouteAttrAndValue(b []byte) (*unix.RtAttr, []byte, int, error) {
a := (*unix.RtAttr)(unsafe.Pointer(&b[0]))
if int(a.Len) < unix.SizeofRtAttr || int(a.Len) > len(b) {
return nil, nil, 0, unix.EINVAL
}
return a, b[syscall.SizeofRtAttr:], rtaAlignOf(int(a.Len)), nil
return a, b[unix.SizeofRtAttr:], rtaAlignOf(int(a.Len)), nil
}
// SocketHandle contains the netlink socket and the associated

View File

@ -1,65 +1,66 @@
package nl
import (
"syscall"
"unsafe"
"golang.org/x/sys/unix"
)
type RtMsg struct {
syscall.RtMsg
unix.RtMsg
}
func NewRtMsg() *RtMsg {
return &RtMsg{
RtMsg: syscall.RtMsg{
Table: syscall.RT_TABLE_MAIN,
Scope: syscall.RT_SCOPE_UNIVERSE,
Protocol: syscall.RTPROT_BOOT,
Type: syscall.RTN_UNICAST,
RtMsg: unix.RtMsg{
Table: unix.RT_TABLE_MAIN,
Scope: unix.RT_SCOPE_UNIVERSE,
Protocol: unix.RTPROT_BOOT,
Type: unix.RTN_UNICAST,
},
}
}
func NewRtDelMsg() *RtMsg {
return &RtMsg{
RtMsg: syscall.RtMsg{
Table: syscall.RT_TABLE_MAIN,
Scope: syscall.RT_SCOPE_NOWHERE,
RtMsg: unix.RtMsg{
Table: unix.RT_TABLE_MAIN,
Scope: unix.RT_SCOPE_NOWHERE,
},
}
}
func (msg *RtMsg) Len() int {
return syscall.SizeofRtMsg
return unix.SizeofRtMsg
}
func DeserializeRtMsg(b []byte) *RtMsg {
return (*RtMsg)(unsafe.Pointer(&b[0:syscall.SizeofRtMsg][0]))
return (*RtMsg)(unsafe.Pointer(&b[0:unix.SizeofRtMsg][0]))
}
func (msg *RtMsg) Serialize() []byte {
return (*(*[syscall.SizeofRtMsg]byte)(unsafe.Pointer(msg)))[:]
return (*(*[unix.SizeofRtMsg]byte)(unsafe.Pointer(msg)))[:]
}
type RtNexthop struct {
syscall.RtNexthop
unix.RtNexthop
Children []NetlinkRequestData
}
func DeserializeRtNexthop(b []byte) *RtNexthop {
return (*RtNexthop)(unsafe.Pointer(&b[0:syscall.SizeofRtNexthop][0]))
return (*RtNexthop)(unsafe.Pointer(&b[0:unix.SizeofRtNexthop][0]))
}
func (msg *RtNexthop) Len() int {
if len(msg.Children) == 0 {
return syscall.SizeofRtNexthop
return unix.SizeofRtNexthop
}
l := 0
for _, child := range msg.Children {
l += rtaAlignOf(child.Len())
}
l += syscall.SizeofRtNexthop
l += unix.SizeofRtNexthop
return rtaAlignOf(l)
}
@ -67,8 +68,8 @@ func (msg *RtNexthop) Serialize() []byte {
length := msg.Len()
msg.RtNexthop.Len = uint16(length)
buf := make([]byte, length)
copy(buf, (*(*[syscall.SizeofRtNexthop]byte)(unsafe.Pointer(msg)))[:])
next := rtaAlignOf(syscall.SizeofRtNexthop)
copy(buf, (*(*[unix.SizeofRtNexthop]byte)(unsafe.Pointer(msg)))[:])
next := rtaAlignOf(unix.SizeofRtNexthop)
if len(msg.Children) > 0 {
for _, child := range msg.Children {
childBuf := child.Serialize()

111
vendor/github.com/vishvananda/netlink/nl/seg6_linux.go generated vendored Normal file
View File

@ -0,0 +1,111 @@
package nl
import (
"errors"
"fmt"
"net"
)
type IPv6SrHdr struct {
nextHdr uint8
hdrLen uint8
routingType uint8
segmentsLeft uint8
firstSegment uint8
flags uint8
reserved uint16
Segments []net.IP
}
func (s1 *IPv6SrHdr) Equal(s2 IPv6SrHdr) bool {
if len(s1.Segments) != len(s2.Segments) {
return false
}
for i := range s1.Segments {
if s1.Segments[i].Equal(s2.Segments[i]) != true {
return false
}
}
return s1.nextHdr == s2.nextHdr &&
s1.hdrLen == s2.hdrLen &&
s1.routingType == s2.routingType &&
s1.segmentsLeft == s2.segmentsLeft &&
s1.firstSegment == s2.firstSegment &&
s1.flags == s2.flags
// reserved doesn't need to be identical.
}
// seg6 encap mode
const (
SEG6_IPTUN_MODE_INLINE = iota
SEG6_IPTUN_MODE_ENCAP
)
// number of nested RTATTR
// from include/uapi/linux/seg6_iptunnel.h
const (
SEG6_IPTUNNEL_UNSPEC = iota
SEG6_IPTUNNEL_SRH
__SEG6_IPTUNNEL_MAX
)
const (
SEG6_IPTUNNEL_MAX = __SEG6_IPTUNNEL_MAX - 1
)
func EncodeSEG6Encap(mode int, segments []net.IP) ([]byte, error) {
nsegs := len(segments) // nsegs: number of segments
if nsegs == 0 {
return nil, errors.New("EncodeSEG6Encap: No Segment in srh")
}
b := make([]byte, 12, 12+len(segments)*16)
native := NativeEndian()
native.PutUint32(b, uint32(mode))
b[4] = 0 // srh.nextHdr (0 when calling netlink)
b[5] = uint8(16 * nsegs >> 3) // srh.hdrLen (in 8-octets unit)
b[6] = IPV6_SRCRT_TYPE_4 // srh.routingType (assigned by IANA)
b[7] = uint8(nsegs - 1) // srh.segmentsLeft
b[8] = uint8(nsegs - 1) // srh.firstSegment
b[9] = 0 // srh.flags (SR6_FLAG1_HMAC for srh_hmac)
// srh.reserved: Defined as "Tag" in draft-ietf-6man-segment-routing-header-07
native.PutUint16(b[10:], 0) // srh.reserved
for _, netIP := range segments {
b = append(b, netIP...) // srh.Segments
}
return b, nil
}
func DecodeSEG6Encap(buf []byte) (int, []net.IP, error) {
native := NativeEndian()
mode := int(native.Uint32(buf))
srh := IPv6SrHdr{
nextHdr: buf[4],
hdrLen: buf[5],
routingType: buf[6],
segmentsLeft: buf[7],
firstSegment: buf[8],
flags: buf[9],
reserved: native.Uint16(buf[10:12]),
}
buf = buf[12:]
if len(buf)%16 != 0 {
err := fmt.Errorf("DecodeSEG6Encap: error parsing Segment List (buf len: %d)\n", len(buf))
return mode, nil, err
}
for len(buf) > 0 {
srh.Segments = append(srh.Segments, net.IP(buf[:16]))
buf = buf[16:]
}
return mode, srh.Segments, nil
}
// Helper functions
func SEG6EncapModeString(mode int) string {
switch mode {
case SEG6_IPTUN_MODE_INLINE:
return "inline"
case SEG6_IPTUN_MODE_ENCAP:
return "encap"
}
return "unknown"
}

View File

@ -65,4 +65,14 @@ const (
LWTUNNEL_ENCAP_IP
LWTUNNEL_ENCAP_ILA
LWTUNNEL_ENCAP_IP6
LWTUNNEL_ENCAP_SEG6
LWTUNNEL_ENCAP_BPF
)
// routing header types
const (
IPV6_SRCRT_STRICT = 0x01 // Deprecated; will be removed
IPV6_SRCRT_TYPE_0 = 0 // Deprecated; will be removed
IPV6_SRCRT_TYPE_2 = 2 // IPv6 type 2 Routing Header
IPV6_SRCRT_TYPE_4 = 4 // Segment Routing with IPv6
)

View File

@ -5,6 +5,7 @@ import (
"syscall"
"github.com/vishvananda/netlink/nl"
"golang.org/x/sys/unix"
)
func LinkGetProtinfo(link Link) (Protinfo, error) {
@ -15,10 +16,10 @@ func (h *Handle) LinkGetProtinfo(link Link) (Protinfo, error) {
base := link.Attrs()
h.ensureIndex(base)
var pi Protinfo
req := h.newNetlinkRequest(syscall.RTM_GETLINK, syscall.NLM_F_DUMP)
msg := nl.NewIfInfomsg(syscall.AF_BRIDGE)
req := h.newNetlinkRequest(unix.RTM_GETLINK, unix.NLM_F_DUMP)
msg := nl.NewIfInfomsg(unix.AF_BRIDGE)
req.AddData(msg)
msgs, err := req.Execute(syscall.NETLINK_ROUTE, 0)
msgs, err := req.Execute(unix.NETLINK_ROUTE, 0)
if err != nil {
return pi, err
}
@ -33,7 +34,7 @@ func (h *Handle) LinkGetProtinfo(link Link) (Protinfo, error) {
return pi, err
}
for _, attr := range attrs {
if attr.Attr.Type != syscall.IFLA_PROTINFO|syscall.NLA_F_NESTED {
if attr.Attr.Type != unix.IFLA_PROTINFO|unix.NLA_F_NESTED {
continue
}
infos, err := nl.ParseRouteAttr(attr.Value)

View File

@ -8,6 +8,7 @@ import (
"syscall"
"github.com/vishvananda/netlink/nl"
"golang.org/x/sys/unix"
)
// NOTE function is here because it uses other linux functions
@ -84,7 +85,7 @@ func QdiscDel(qdisc Qdisc) error {
// QdiscDel will delete a qdisc from the system.
// Equivalent to: `tc qdisc del $qdisc`
func (h *Handle) QdiscDel(qdisc Qdisc) error {
return h.qdiscModify(syscall.RTM_DELQDISC, 0, qdisc)
return h.qdiscModify(unix.RTM_DELQDISC, 0, qdisc)
}
// QdiscChange will change a qdisc in place
@ -98,7 +99,7 @@ func QdiscChange(qdisc Qdisc) error {
// Equivalent to: `tc qdisc change $qdisc`
// The parent and handle MUST NOT be changed.
func (h *Handle) QdiscChange(qdisc Qdisc) error {
return h.qdiscModify(syscall.RTM_NEWQDISC, 0, qdisc)
return h.qdiscModify(unix.RTM_NEWQDISC, 0, qdisc)
}
// QdiscReplace will replace a qdisc to the system.
@ -113,8 +114,8 @@ func QdiscReplace(qdisc Qdisc) error {
// The handle MUST change.
func (h *Handle) QdiscReplace(qdisc Qdisc) error {
return h.qdiscModify(
syscall.RTM_NEWQDISC,
syscall.NLM_F_CREATE|syscall.NLM_F_REPLACE,
unix.RTM_NEWQDISC,
unix.NLM_F_CREATE|unix.NLM_F_REPLACE,
qdisc)
}
@ -128,13 +129,13 @@ func QdiscAdd(qdisc Qdisc) error {
// Equivalent to: `tc qdisc add $qdisc`
func (h *Handle) QdiscAdd(qdisc Qdisc) error {
return h.qdiscModify(
syscall.RTM_NEWQDISC,
syscall.NLM_F_CREATE|syscall.NLM_F_EXCL,
unix.RTM_NEWQDISC,
unix.NLM_F_CREATE|unix.NLM_F_EXCL,
qdisc)
}
func (h *Handle) qdiscModify(cmd, flags int, qdisc Qdisc) error {
req := h.newNetlinkRequest(cmd, flags|syscall.NLM_F_ACK)
req := h.newNetlinkRequest(cmd, flags|unix.NLM_F_ACK)
base := qdisc.Attrs()
msg := &nl.TcMsg{
Family: nl.FAMILY_ALL,
@ -145,13 +146,13 @@ func (h *Handle) qdiscModify(cmd, flags int, qdisc Qdisc) error {
req.AddData(msg)
// When deleting don't bother building the rest of the netlink payload
if cmd != syscall.RTM_DELQDISC {
if cmd != unix.RTM_DELQDISC {
if err := qdiscPayload(req, qdisc); err != nil {
return err
}
}
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
_, err := req.Execute(unix.NETLINK_ROUTE, 0)
return err
}
@ -160,71 +161,73 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error {
req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(qdisc.Type())))
options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
if prio, ok := qdisc.(*Prio); ok {
switch qdisc := qdisc.(type) {
case *Prio:
tcmap := nl.TcPrioMap{
Bands: int32(prio.Bands),
Priomap: prio.PriorityMap,
Bands: int32(qdisc.Bands),
Priomap: qdisc.PriorityMap,
}
options = nl.NewRtAttr(nl.TCA_OPTIONS, tcmap.Serialize())
} else if tbf, ok := qdisc.(*Tbf); ok {
case *Tbf:
opt := nl.TcTbfQopt{}
opt.Rate.Rate = uint32(tbf.Rate)
opt.Peakrate.Rate = uint32(tbf.Peakrate)
opt.Limit = tbf.Limit
opt.Buffer = tbf.Buffer
opt.Rate.Rate = uint32(qdisc.Rate)
opt.Peakrate.Rate = uint32(qdisc.Peakrate)
opt.Limit = qdisc.Limit
opt.Buffer = qdisc.Buffer
nl.NewRtAttrChild(options, nl.TCA_TBF_PARMS, opt.Serialize())
if tbf.Rate >= uint64(1<<32) {
nl.NewRtAttrChild(options, nl.TCA_TBF_RATE64, nl.Uint64Attr(tbf.Rate))
if qdisc.Rate >= uint64(1<<32) {
nl.NewRtAttrChild(options, nl.TCA_TBF_RATE64, nl.Uint64Attr(qdisc.Rate))
}
if tbf.Peakrate >= uint64(1<<32) {
nl.NewRtAttrChild(options, nl.TCA_TBF_PRATE64, nl.Uint64Attr(tbf.Peakrate))
if qdisc.Peakrate >= uint64(1<<32) {
nl.NewRtAttrChild(options, nl.TCA_TBF_PRATE64, nl.Uint64Attr(qdisc.Peakrate))
}
if tbf.Peakrate > 0 {
nl.NewRtAttrChild(options, nl.TCA_TBF_PBURST, nl.Uint32Attr(tbf.Minburst))
if qdisc.Peakrate > 0 {
nl.NewRtAttrChild(options, nl.TCA_TBF_PBURST, nl.Uint32Attr(qdisc.Minburst))
}
} else if htb, ok := qdisc.(*Htb); ok {
case *Htb:
opt := nl.TcHtbGlob{}
opt.Version = htb.Version
opt.Rate2Quantum = htb.Rate2Quantum
opt.Defcls = htb.Defcls
opt.Version = qdisc.Version
opt.Rate2Quantum = qdisc.Rate2Quantum
opt.Defcls = qdisc.Defcls
// TODO: Handle Debug properly. For now default to 0
opt.Debug = htb.Debug
opt.DirectPkts = htb.DirectPkts
opt.Debug = qdisc.Debug
opt.DirectPkts = qdisc.DirectPkts
nl.NewRtAttrChild(options, nl.TCA_HTB_INIT, opt.Serialize())
// nl.NewRtAttrChild(options, nl.TCA_HTB_DIRECT_QLEN, opt.Serialize())
} else if netem, ok := qdisc.(*Netem); ok {
case *Netem:
opt := nl.TcNetemQopt{}
opt.Latency = netem.Latency
opt.Limit = netem.Limit
opt.Loss = netem.Loss
opt.Gap = netem.Gap
opt.Duplicate = netem.Duplicate
opt.Jitter = netem.Jitter
opt.Latency = qdisc.Latency
opt.Limit = qdisc.Limit
opt.Loss = qdisc.Loss
opt.Gap = qdisc.Gap
opt.Duplicate = qdisc.Duplicate
opt.Jitter = qdisc.Jitter
options = nl.NewRtAttr(nl.TCA_OPTIONS, opt.Serialize())
// Correlation
corr := nl.TcNetemCorr{}
corr.DelayCorr = netem.DelayCorr
corr.LossCorr = netem.LossCorr
corr.DupCorr = netem.DuplicateCorr
corr.DelayCorr = qdisc.DelayCorr
corr.LossCorr = qdisc.LossCorr
corr.DupCorr = qdisc.DuplicateCorr
if corr.DelayCorr > 0 || corr.LossCorr > 0 || corr.DupCorr > 0 {
nl.NewRtAttrChild(options, nl.TCA_NETEM_CORR, corr.Serialize())
}
// Corruption
corruption := nl.TcNetemCorrupt{}
corruption.Probability = netem.CorruptProb
corruption.Correlation = netem.CorruptCorr
corruption.Probability = qdisc.CorruptProb
corruption.Correlation = qdisc.CorruptCorr
if corruption.Probability > 0 {
nl.NewRtAttrChild(options, nl.TCA_NETEM_CORRUPT, corruption.Serialize())
}
// Reorder
reorder := nl.TcNetemReorder{}
reorder.Probability = netem.ReorderProb
reorder.Correlation = netem.ReorderCorr
reorder.Probability = qdisc.ReorderProb
reorder.Correlation = qdisc.ReorderCorr
if reorder.Probability > 0 {
nl.NewRtAttrChild(options, nl.TCA_NETEM_REORDER, reorder.Serialize())
}
} else if _, ok := qdisc.(*Ingress); ok {
case *Ingress:
// ingress filters must use the proper handle
if qdisc.Attrs().Parent != HANDLE_INGRESS {
return fmt.Errorf("Ingress filters must set Parent to HANDLE_INGRESS")
@ -246,7 +249,7 @@ func QdiscList(link Link) ([]Qdisc, error) {
// Equivalent to: `tc qdisc show`.
// The list can be filtered by link.
func (h *Handle) QdiscList(link Link) ([]Qdisc, error) {
req := h.newNetlinkRequest(syscall.RTM_GETQDISC, syscall.NLM_F_DUMP)
req := h.newNetlinkRequest(unix.RTM_GETQDISC, unix.NLM_F_DUMP)
index := int32(0)
if link != nil {
base := link.Attrs()
@ -259,7 +262,7 @@ func (h *Handle) QdiscList(link Link) ([]Qdisc, error) {
}
req.AddData(msg)
msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWQDISC)
msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWQDISC)
if err != nil {
return nil, err
}

View File

@ -16,6 +16,7 @@ type Destination interface {
Decode([]byte) error
Encode() ([]byte, error)
String() string
Equal(Destination) bool
}
type Encap interface {
@ -23,6 +24,7 @@ type Encap interface {
Decode([]byte) error
Encode() ([]byte, error)
String() string
Equal(Encap) bool
}
// Route represents a netlink route.
@ -43,6 +45,8 @@ type Route struct {
MPLSDst *int
NewDst Destination
Encap Encap
MTU int
AdvMSS int
}
func (r Route) String() string {
@ -72,6 +76,25 @@ func (r Route) String() string {
return fmt.Sprintf("{%s}", strings.Join(elems, " "))
}
func (r Route) Equal(x Route) bool {
return r.LinkIndex == x.LinkIndex &&
r.ILinkIndex == x.ILinkIndex &&
r.Scope == x.Scope &&
ipNetEqual(r.Dst, x.Dst) &&
r.Src.Equal(x.Src) &&
r.Gw.Equal(x.Gw) &&
nexthopInfoSlice(r.MultiPath).Equal(x.MultiPath) &&
r.Protocol == x.Protocol &&
r.Priority == x.Priority &&
r.Table == x.Table &&
r.Type == x.Type &&
r.Tos == x.Tos &&
r.Flags == x.Flags &&
(r.MPLSDst == x.MPLSDst || (r.MPLSDst != nil && x.MPLSDst != nil && *r.MPLSDst == *x.MPLSDst)) &&
(r.NewDst == x.NewDst || (r.NewDst != nil && r.NewDst.Equal(x.NewDst))) &&
(r.Encap == x.Encap || (r.Encap != nil && r.Encap.Equal(x.Encap)))
}
func (r *Route) SetFlag(flag NextHopFlag) {
r.Flags |= int(flag)
}
@ -110,7 +133,46 @@ func (n *NexthopInfo) String() string {
elems = append(elems, fmt.Sprintf("Encap: %s", n.Encap))
}
elems = append(elems, fmt.Sprintf("Weight: %d", n.Hops+1))
elems = append(elems, fmt.Sprintf("Gw: %d", n.Gw))
elems = append(elems, fmt.Sprintf("Gw: %s", n.Gw))
elems = append(elems, fmt.Sprintf("Flags: %s", n.ListFlags()))
return fmt.Sprintf("{%s}", strings.Join(elems, " "))
}
func (n NexthopInfo) Equal(x NexthopInfo) bool {
return n.LinkIndex == x.LinkIndex &&
n.Hops == x.Hops &&
n.Gw.Equal(x.Gw) &&
n.Flags == x.Flags &&
(n.NewDst == x.NewDst || (n.NewDst != nil && n.NewDst.Equal(x.NewDst))) &&
(n.Encap == x.Encap || (n.Encap != nil && n.Encap.Equal(x.Encap)))
}
type nexthopInfoSlice []*NexthopInfo
func (n nexthopInfoSlice) Equal(x []*NexthopInfo) bool {
if len(n) != len(x) {
return false
}
for i := range n {
if n[i] == nil || x[i] == nil {
return false
}
if !n[i].Equal(*x[i]) {
return false
}
}
return true
}
// ipNetEqual returns true iff both IPNet are equal
func ipNetEqual(ipn1 *net.IPNet, ipn2 *net.IPNet) bool {
if ipn1 == ipn2 {
return true
}
if ipn1 == nil || ipn2 == nil {
return false
}
m1, _ := ipn1.Mask.Size()
m2, _ := ipn2.Mask.Size()
return m1 == m2 && ipn1.IP.Equal(ipn2.IP)
}

View File

@ -8,16 +8,17 @@ import (
"github.com/vishvananda/netlink/nl"
"github.com/vishvananda/netns"
"golang.org/x/sys/unix"
)
// RtAttr is shared so it is in netlink_linux.go
const (
SCOPE_UNIVERSE Scope = syscall.RT_SCOPE_UNIVERSE
SCOPE_SITE Scope = syscall.RT_SCOPE_SITE
SCOPE_LINK Scope = syscall.RT_SCOPE_LINK
SCOPE_HOST Scope = syscall.RT_SCOPE_HOST
SCOPE_NOWHERE Scope = syscall.RT_SCOPE_NOWHERE
SCOPE_UNIVERSE Scope = unix.RT_SCOPE_UNIVERSE
SCOPE_SITE Scope = unix.RT_SCOPE_SITE
SCOPE_LINK Scope = unix.RT_SCOPE_LINK
SCOPE_HOST Scope = unix.RT_SCOPE_HOST
SCOPE_NOWHERE Scope = unix.RT_SCOPE_NOWHERE
)
const (
@ -34,8 +35,8 @@ const (
)
const (
FLAG_ONLINK NextHopFlag = syscall.RTNH_F_ONLINK
FLAG_PERVASIVE NextHopFlag = syscall.RTNH_F_PERVASIVE
FLAG_ONLINK NextHopFlag = unix.RTNH_F_ONLINK
FLAG_PERVASIVE NextHopFlag = unix.RTNH_F_PERVASIVE
)
var testFlags = []flagString{
@ -86,6 +87,34 @@ func (d *MPLSDestination) String() string {
return strings.Join(s, "/")
}
func (d *MPLSDestination) Equal(x Destination) bool {
o, ok := x.(*MPLSDestination)
if !ok {
return false
}
if d == nil && o == nil {
return true
}
if d == nil || o == nil {
return false
}
if d.Labels == nil && o.Labels == nil {
return true
}
if d.Labels == nil || o.Labels == nil {
return false
}
if len(d.Labels) != len(o.Labels) {
return false
}
for i := range d.Labels {
if d.Labels[i] != o.Labels[i] {
return false
}
}
return true
}
type MPLSEncap struct {
Labels []int
}
@ -96,17 +125,17 @@ func (e *MPLSEncap) Type() int {
func (e *MPLSEncap) Decode(buf []byte) error {
if len(buf) < 4 {
return fmt.Errorf("Lack of bytes")
return fmt.Errorf("lack of bytes")
}
native := nl.NativeEndian()
l := native.Uint16(buf)
if len(buf) < int(l) {
return fmt.Errorf("Lack of bytes")
return fmt.Errorf("lack of bytes")
}
buf = buf[:l]
typ := native.Uint16(buf[2:])
if typ != nl.MPLS_IPTUNNEL_DST {
return fmt.Errorf("Unknown MPLS Encap Type: %d", typ)
return fmt.Errorf("unknown MPLS Encap Type: %d", typ)
}
e.Labels = nl.DecodeMPLSStack(buf[4:])
return nil
@ -129,6 +158,107 @@ func (e *MPLSEncap) String() string {
return strings.Join(s, "/")
}
func (e *MPLSEncap) Equal(x Encap) bool {
o, ok := x.(*MPLSEncap)
if !ok {
return false
}
if e == nil && o == nil {
return true
}
if e == nil || o == nil {
return false
}
if e.Labels == nil && o.Labels == nil {
return true
}
if e.Labels == nil || o.Labels == nil {
return false
}
if len(e.Labels) != len(o.Labels) {
return false
}
for i := range e.Labels {
if e.Labels[i] != o.Labels[i] {
return false
}
}
return true
}
// SEG6 definitions
type SEG6Encap struct {
Mode int
Segments []net.IP
}
func (e *SEG6Encap) Type() int {
return nl.LWTUNNEL_ENCAP_SEG6
}
func (e *SEG6Encap) Decode(buf []byte) error {
if len(buf) < 4 {
return fmt.Errorf("lack of bytes")
}
native := nl.NativeEndian()
// Get Length(l) & Type(typ) : 2 + 2 bytes
l := native.Uint16(buf)
if len(buf) < int(l) {
return fmt.Errorf("lack of bytes")
}
buf = buf[:l] // make sure buf size upper limit is Length
typ := native.Uint16(buf[2:])
if typ != nl.SEG6_IPTUNNEL_SRH {
return fmt.Errorf("unknown SEG6 Type: %d", typ)
}
var err error
e.Mode, e.Segments, err = nl.DecodeSEG6Encap(buf[4:])
return err
}
func (e *SEG6Encap) Encode() ([]byte, error) {
s, err := nl.EncodeSEG6Encap(e.Mode, e.Segments)
native := nl.NativeEndian()
hdr := make([]byte, 4)
native.PutUint16(hdr, uint16(len(s)+4))
native.PutUint16(hdr[2:], nl.SEG6_IPTUNNEL_SRH)
return append(hdr, s...), err
}
func (e *SEG6Encap) String() string {
segs := make([]string, 0, len(e.Segments))
// append segment backwards (from n to 0) since seg#0 is the last segment.
for i := len(e.Segments); i > 0; i-- {
segs = append(segs, fmt.Sprintf("%s", e.Segments[i-1]))
}
str := fmt.Sprintf("mode %s segs %d [ %s ]", nl.SEG6EncapModeString(e.Mode),
len(e.Segments), strings.Join(segs, " "))
return str
}
func (e *SEG6Encap) Equal(x Encap) bool {
o, ok := x.(*SEG6Encap)
if !ok {
return false
}
if e == o {
return true
}
if e == nil || o == nil {
return false
}
if e.Mode != o.Mode {
return false
}
if len(e.Segments) != len(o.Segments) {
return false
}
for i := range e.Segments {
if !e.Segments[i].Equal(o.Segments[i]) {
return false
}
}
return true
}
// RouteAdd will add a route to the system.
// Equivalent to: `ip route add $route`
func RouteAdd(route *Route) error {
@ -138,8 +268,8 @@ func RouteAdd(route *Route) error {
// RouteAdd will add a route to the system.
// Equivalent to: `ip route add $route`
func (h *Handle) RouteAdd(route *Route) error {
flags := syscall.NLM_F_CREATE | syscall.NLM_F_EXCL | syscall.NLM_F_ACK
req := h.newNetlinkRequest(syscall.RTM_NEWROUTE, flags)
flags := unix.NLM_F_CREATE | unix.NLM_F_EXCL | unix.NLM_F_ACK
req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags)
return h.routeHandle(route, req, nl.NewRtMsg())
}
@ -152,8 +282,8 @@ func RouteReplace(route *Route) error {
// RouteReplace will add a route to the system.
// Equivalent to: `ip route replace $route`
func (h *Handle) RouteReplace(route *Route) error {
flags := syscall.NLM_F_CREATE | syscall.NLM_F_REPLACE | syscall.NLM_F_ACK
req := h.newNetlinkRequest(syscall.RTM_NEWROUTE, flags)
flags := unix.NLM_F_CREATE | unix.NLM_F_REPLACE | unix.NLM_F_ACK
req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags)
return h.routeHandle(route, req, nl.NewRtMsg())
}
@ -166,7 +296,7 @@ func RouteDel(route *Route) error {
// RouteDel will delete a route from the system.
// Equivalent to: `ip route del $route`
func (h *Handle) RouteDel(route *Route) error {
req := h.newNetlinkRequest(syscall.RTM_DELROUTE, syscall.NLM_F_ACK)
req := h.newNetlinkRequest(unix.RTM_DELROUTE, unix.NLM_F_ACK)
return h.routeHandle(route, req, nl.NewRtDelMsg())
}
@ -189,12 +319,12 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
} else {
dstData = route.Dst.IP.To16()
}
rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_DST, dstData))
rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_DST, dstData))
} else if route.MPLSDst != nil {
family = nl.FAMILY_MPLS
msg.Dst_len = uint8(20)
msg.Type = syscall.RTN_UNICAST
rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_DST, nl.EncodeMPLSStack(*route.MPLSDst)))
msg.Type = unix.RTN_UNICAST
rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_DST, nl.EncodeMPLSStack(*route.MPLSDst)))
}
if route.NewDst != nil {
@ -232,7 +362,7 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
srcData = route.Src.To16()
}
// The commonly used src ip for routes is actually PREFSRC
rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_PREFSRC, srcData))
rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_PREFSRC, srcData))
}
if route.Gw != nil {
@ -247,14 +377,14 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
} else {
gwData = route.Gw.To16()
}
rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_GATEWAY, gwData))
rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_GATEWAY, gwData))
}
if len(route.MultiPath) > 0 {
buf := []byte{}
for _, nh := range route.MultiPath {
rtnh := &nl.RtNexthop{
RtNexthop: syscall.RtNexthop{
RtNexthop: unix.RtNexthop{
Hops: uint8(nh.Hops),
Ifindex: int32(nh.LinkIndex),
Flags: uint8(nh.Flags),
@ -267,9 +397,9 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
return fmt.Errorf("gateway, source, and destination ip are not the same IP family")
}
if gwFamily == FAMILY_V4 {
children = append(children, nl.NewRtAttr(syscall.RTA_GATEWAY, []byte(nh.Gw.To4())))
children = append(children, nl.NewRtAttr(unix.RTA_GATEWAY, []byte(nh.Gw.To4())))
} else {
children = append(children, nl.NewRtAttr(syscall.RTA_GATEWAY, []byte(nh.Gw.To16())))
children = append(children, nl.NewRtAttr(unix.RTA_GATEWAY, []byte(nh.Gw.To16())))
}
}
if nh.NewDst != nil {
@ -295,15 +425,15 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
rtnh.Children = children
buf = append(buf, rtnh.Serialize()...)
}
rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_MULTIPATH, buf))
rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_MULTIPATH, buf))
}
if route.Table > 0 {
if route.Table >= 256 {
msg.Table = syscall.RT_TABLE_UNSPEC
msg.Table = unix.RT_TABLE_UNSPEC
b := make([]byte, 4)
native.PutUint32(b, uint32(route.Table))
rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_TABLE, b))
rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_TABLE, b))
} else {
msg.Table = uint8(route.Table)
}
@ -312,7 +442,7 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
if route.Priority > 0 {
b := make([]byte, 4)
native.PutUint32(b, uint32(route.Priority))
rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_PRIORITY, b))
rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_PRIORITY, b))
}
if route.Tos > 0 {
msg.Tos = uint8(route.Tos)
@ -324,6 +454,25 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
msg.Type = uint8(route.Type)
}
var metrics []*nl.RtAttr
// TODO: support other rta_metric values
if route.MTU > 0 {
b := nl.Uint32Attr(uint32(route.MTU))
metrics = append(metrics, nl.NewRtAttr(unix.RTAX_MTU, b))
}
if route.AdvMSS > 0 {
b := nl.Uint32Attr(uint32(route.AdvMSS))
metrics = append(metrics, nl.NewRtAttr(unix.RTAX_ADVMSS, b))
}
if metrics != nil {
attr := nl.NewRtAttr(unix.RTA_METRICS, nil)
for _, metric := range metrics {
attr.AddChild(metric)
}
rtAttrs = append(rtAttrs, attr)
}
msg.Flags = uint32(route.Flags)
msg.Scope = uint8(route.Scope)
msg.Family = uint8(family)
@ -338,9 +487,9 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg
)
native.PutUint32(b, uint32(route.LinkIndex))
req.AddData(nl.NewRtAttr(syscall.RTA_OIF, b))
req.AddData(nl.NewRtAttr(unix.RTA_OIF, b))
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
_, err := req.Execute(unix.NETLINK_ROUTE, 0)
return err
}
@ -373,11 +522,11 @@ func RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, e
// RouteListFiltered gets a list of routes in the system filtered with specified rules.
// All rules must be defined in RouteFilter struct
func (h *Handle) RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, error) {
req := h.newNetlinkRequest(syscall.RTM_GETROUTE, syscall.NLM_F_DUMP)
req := h.newNetlinkRequest(unix.RTM_GETROUTE, unix.NLM_F_DUMP)
infmsg := nl.NewIfInfomsg(family)
req.AddData(infmsg)
msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWROUTE)
msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWROUTE)
if err != nil {
return nil, err
}
@ -385,11 +534,11 @@ func (h *Handle) RouteListFiltered(family int, filter *Route, filterMask uint64)
var res []Route
for _, m := range msgs {
msg := nl.DeserializeRtMsg(m)
if msg.Flags&syscall.RTM_F_CLONED != 0 {
if msg.Flags&unix.RTM_F_CLONED != 0 {
// Ignore cloned routes
continue
}
if msg.Table != syscall.RT_TABLE_MAIN {
if msg.Table != unix.RT_TABLE_MAIN {
if filter == nil || filter != nil && filterMask&RT_FILTER_TABLE == 0 {
// Ignore non-main tables
continue
@ -401,7 +550,7 @@ func (h *Handle) RouteListFiltered(family int, filter *Route, filterMask uint64)
}
if filter != nil {
switch {
case filterMask&RT_FILTER_TABLE != 0 && filter.Table != syscall.RT_TABLE_UNSPEC && route.Table != filter.Table:
case filterMask&RT_FILTER_TABLE != 0 && filter.Table != unix.RT_TABLE_UNSPEC && route.Table != filter.Table:
continue
case filterMask&RT_FILTER_PROTOCOL != 0 && route.Protocol != filter.Protocol:
continue
@ -421,19 +570,8 @@ func (h *Handle) RouteListFiltered(family int, filter *Route, filterMask uint64)
continue
case filterMask&RT_FILTER_DST != 0:
if filter.MPLSDst == nil || route.MPLSDst == nil || (*filter.MPLSDst) != (*route.MPLSDst) {
if filter.Dst == nil {
if route.Dst != nil {
continue
}
} else {
if route.Dst == nil {
continue
}
aMaskLen, aMaskBits := route.Dst.Mask.Size()
bMaskLen, bMaskBits := filter.Dst.Mask.Size()
if !(route.Dst.IP.Equal(filter.Dst.IP) && aMaskLen == bMaskLen && aMaskBits == bMaskBits) {
continue
}
if !ipNetEqual(route.Dst, filter.Dst) {
continue
}
}
}
@ -463,11 +601,11 @@ func deserializeRoute(m []byte) (Route, error) {
var encap, encapType syscall.NetlinkRouteAttr
for _, attr := range attrs {
switch attr.Attr.Type {
case syscall.RTA_GATEWAY:
case unix.RTA_GATEWAY:
route.Gw = net.IP(attr.Value)
case syscall.RTA_PREFSRC:
case unix.RTA_PREFSRC:
route.Src = net.IP(attr.Value)
case syscall.RTA_DST:
case unix.RTA_DST:
if msg.Family == nl.FAMILY_MPLS {
stack := nl.DecodeMPLSStack(attr.Value)
if len(stack) == 0 || len(stack) > 1 {
@ -480,36 +618,36 @@ func deserializeRoute(m []byte) (Route, error) {
Mask: net.CIDRMask(int(msg.Dst_len), 8*len(attr.Value)),
}
}
case syscall.RTA_OIF:
case unix.RTA_OIF:
route.LinkIndex = int(native.Uint32(attr.Value[0:4]))
case syscall.RTA_IIF:
case unix.RTA_IIF:
route.ILinkIndex = int(native.Uint32(attr.Value[0:4]))
case syscall.RTA_PRIORITY:
case unix.RTA_PRIORITY:
route.Priority = int(native.Uint32(attr.Value[0:4]))
case syscall.RTA_TABLE:
case unix.RTA_TABLE:
route.Table = int(native.Uint32(attr.Value[0:4]))
case syscall.RTA_MULTIPATH:
case unix.RTA_MULTIPATH:
parseRtNexthop := func(value []byte) (*NexthopInfo, []byte, error) {
if len(value) < syscall.SizeofRtNexthop {
return nil, nil, fmt.Errorf("Lack of bytes")
if len(value) < unix.SizeofRtNexthop {
return nil, nil, fmt.Errorf("lack of bytes")
}
nh := nl.DeserializeRtNexthop(value)
if len(value) < int(nh.RtNexthop.Len) {
return nil, nil, fmt.Errorf("Lack of bytes")
return nil, nil, fmt.Errorf("lack of bytes")
}
info := &NexthopInfo{
LinkIndex: int(nh.RtNexthop.Ifindex),
Hops: int(nh.RtNexthop.Hops),
Flags: int(nh.RtNexthop.Flags),
}
attrs, err := nl.ParseRouteAttr(value[syscall.SizeofRtNexthop:int(nh.RtNexthop.Len)])
attrs, err := nl.ParseRouteAttr(value[unix.SizeofRtNexthop:int(nh.RtNexthop.Len)])
if err != nil {
return nil, nil, err
}
var encap, encapType syscall.NetlinkRouteAttr
for _, attr := range attrs {
switch attr.Attr.Type {
case syscall.RTA_GATEWAY:
case unix.RTA_GATEWAY:
info.Gw = net.IP(attr.Value)
case nl.RTA_NEWDST:
var d Destination
@ -566,6 +704,19 @@ func deserializeRoute(m []byte) (Route, error) {
encapType = attr
case nl.RTA_ENCAP:
encap = attr
case unix.RTA_METRICS:
metrics, err := nl.ParseRouteAttr(attr.Value)
if err != nil {
return route, err
}
for _, metric := range metrics {
switch metric.Attr.Type {
case unix.RTAX_MTU:
route.MTU = int(native.Uint32(metric.Value[0:4]))
case unix.RTAX_ADVMSS:
route.AdvMSS = int(native.Uint32(metric.Value[0:4]))
}
}
}
}
@ -578,6 +729,11 @@ func deserializeRoute(m []byte) (Route, error) {
if err := e.Decode(encap.Value); err != nil {
return route, err
}
case nl.LWTUNNEL_ENCAP_SEG6:
e = &SEG6Encap{}
if err := e.Decode(encap.Value); err != nil {
return route, err
}
}
route.Encap = e
}
@ -594,7 +750,7 @@ func RouteGet(destination net.IP) ([]Route, error) {
// RouteGet gets a route to a specific destination from the host system.
// Equivalent to: 'ip route get'.
func (h *Handle) RouteGet(destination net.IP) ([]Route, error) {
req := h.newNetlinkRequest(syscall.RTM_GETROUTE, syscall.NLM_F_REQUEST)
req := h.newNetlinkRequest(unix.RTM_GETROUTE, unix.NLM_F_REQUEST)
family := nl.GetIPFamily(destination)
var destinationData []byte
var bitlen uint8
@ -610,10 +766,10 @@ func (h *Handle) RouteGet(destination net.IP) ([]Route, error) {
msg.Dst_len = bitlen
req.AddData(msg)
rtaDst := nl.NewRtAttr(syscall.RTA_DST, destinationData)
rtaDst := nl.NewRtAttr(unix.RTA_DST, destinationData)
req.AddData(rtaDst)
msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWROUTE)
msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWROUTE)
if err != nil {
return nil, err
}
@ -633,17 +789,35 @@ func (h *Handle) RouteGet(destination net.IP) ([]Route, error) {
// RouteSubscribe takes a chan down which notifications will be sent
// when routes are added or deleted. Close the 'done' chan to stop subscription.
func RouteSubscribe(ch chan<- RouteUpdate, done <-chan struct{}) error {
return routeSubscribeAt(netns.None(), netns.None(), ch, done)
return routeSubscribeAt(netns.None(), netns.None(), ch, done, nil)
}
// RouteSubscribeAt works like RouteSubscribe plus it allows the caller
// to choose the network namespace in which to subscribe (ns).
func RouteSubscribeAt(ns netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}) error {
return routeSubscribeAt(ns, netns.None(), ch, done)
return routeSubscribeAt(ns, netns.None(), ch, done, nil)
}
func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}) error {
s, err := nl.SubscribeAt(newNs, curNs, syscall.NETLINK_ROUTE, syscall.RTNLGRP_IPV4_ROUTE, syscall.RTNLGRP_IPV6_ROUTE)
// RouteSubscribeOptions contains a set of options to use with
// RouteSubscribeWithOptions.
type RouteSubscribeOptions struct {
Namespace *netns.NsHandle
ErrorCallback func(error)
}
// RouteSubscribeWithOptions work like RouteSubscribe but enable to
// provide additional options to modify the behavior. Currently, the
// namespace can be provided as well as an error callback.
func RouteSubscribeWithOptions(ch chan<- RouteUpdate, done <-chan struct{}, options RouteSubscribeOptions) error {
if options.Namespace == nil {
none := netns.None()
options.Namespace = &none
}
return routeSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback)
}
func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done <-chan struct{}, cberr func(error)) error {
s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_IPV4_ROUTE, unix.RTNLGRP_IPV6_ROUTE)
if err != nil {
return err
}
@ -658,11 +832,17 @@ func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done <
for {
msgs, err := s.Receive()
if err != nil {
if cberr != nil {
cberr(err)
}
return
}
for _, m := range msgs {
route, err := deserializeRoute(m.Data)
if err != nil {
if cberr != nil {
cberr(err)
}
return
}
ch <- RouteUpdate{Type: m.Header.Type, Route: route}

View File

@ -8,6 +8,7 @@ import (
// Rule represents a netlink rule.
type Rule struct {
Priority int
Family int
Table int
Mark int
Mask int
@ -20,6 +21,7 @@ type Rule struct {
OifName string
SuppressIfgroup int
SuppressPrefixlen int
Invert bool
}
func (r Rule) String() string {

View File

@ -3,11 +3,13 @@ package netlink
import (
"fmt"
"net"
"syscall"
"github.com/vishvananda/netlink/nl"
"golang.org/x/sys/unix"
)
const FibRuleInvert = 0x2
// RuleAdd adds a rule to the system.
// Equivalent to: ip rule add
func RuleAdd(rule *Rule) error {
@ -17,7 +19,7 @@ func RuleAdd(rule *Rule) error {
// RuleAdd adds a rule to the system.
// Equivalent to: ip rule add
func (h *Handle) RuleAdd(rule *Rule) error {
req := h.newNetlinkRequest(syscall.RTM_NEWRULE, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
req := h.newNetlinkRequest(unix.RTM_NEWRULE, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK)
return ruleHandle(rule, req)
}
@ -30,15 +32,31 @@ func RuleDel(rule *Rule) error {
// RuleDel deletes a rule from the system.
// Equivalent to: ip rule del
func (h *Handle) RuleDel(rule *Rule) error {
req := h.newNetlinkRequest(syscall.RTM_DELRULE, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
req := h.newNetlinkRequest(unix.RTM_DELRULE, unix.NLM_F_ACK)
return ruleHandle(rule, req)
}
func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error {
msg := nl.NewRtMsg()
msg.Family = syscall.AF_INET
var dstFamily uint8
msg.Family = unix.AF_INET
msg.Protocol = unix.RTPROT_BOOT
msg.Scope = unix.RT_SCOPE_UNIVERSE
msg.Table = unix.RT_TABLE_UNSPEC
msg.Type = unix.RTN_UNSPEC
if req.NlMsghdr.Flags&unix.NLM_F_CREATE > 0 {
msg.Type = unix.RTN_UNICAST
}
if rule.Invert {
msg.Flags |= FibRuleInvert
}
if rule.Family != 0 {
msg.Family = uint8(rule.Family)
}
if rule.Table >= 0 && rule.Table < 256 {
msg.Table = uint8(rule.Table)
}
var dstFamily uint8
var rtAttrs []*nl.RtAttr
if rule.Dst != nil && rule.Dst.IP != nil {
dstLen, _ := rule.Dst.Mask.Size()
@ -46,12 +64,12 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error {
msg.Family = uint8(nl.GetIPFamily(rule.Dst.IP))
dstFamily = msg.Family
var dstData []byte
if msg.Family == syscall.AF_INET {
if msg.Family == unix.AF_INET {
dstData = rule.Dst.IP.To4()
} else {
dstData = rule.Dst.IP.To16()
}
rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_DST, dstData))
rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_DST, dstData))
}
if rule.Src != nil && rule.Src.IP != nil {
@ -62,19 +80,12 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error {
srcLen, _ := rule.Src.Mask.Size()
msg.Src_len = uint8(srcLen)
var srcData []byte
if msg.Family == syscall.AF_INET {
if msg.Family == unix.AF_INET {
srcData = rule.Src.IP.To4()
} else {
srcData = rule.Src.IP.To16()
}
rtAttrs = append(rtAttrs, nl.NewRtAttr(syscall.RTA_SRC, srcData))
}
if rule.Table >= 0 {
msg.Table = uint8(rule.Table)
if rule.Table >= 256 {
msg.Table = syscall.RT_TABLE_UNSPEC
}
rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_SRC, srcData))
}
req.AddData(msg)
@ -139,7 +150,7 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error {
req.AddData(nl.NewRtAttr(nl.FRA_GOTO, b))
}
_, err := req.Execute(syscall.NETLINK_ROUTE, 0)
_, err := req.Execute(unix.NETLINK_ROUTE, 0)
return err
}
@ -152,11 +163,11 @@ func RuleList(family int) ([]Rule, error) {
// RuleList lists rules in the system.
// Equivalent to: ip rule list
func (h *Handle) RuleList(family int) ([]Rule, error) {
req := h.newNetlinkRequest(syscall.RTM_GETRULE, syscall.NLM_F_DUMP|syscall.NLM_F_REQUEST)
req := h.newNetlinkRequest(unix.RTM_GETRULE, unix.NLM_F_DUMP|unix.NLM_F_REQUEST)
msg := nl.NewIfInfomsg(family)
req.AddData(msg)
msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWRULE)
msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWRULE)
if err != nil {
return nil, err
}
@ -172,9 +183,11 @@ func (h *Handle) RuleList(family int) ([]Rule, error) {
rule := NewRule()
rule.Invert = msg.Flags&FibRuleInvert > 0
for j := range attrs {
switch attrs[j].Attr.Type {
case syscall.RTA_TABLE:
case unix.RTA_TABLE:
rule.Table = int(native.Uint32(attrs[j].Value[0:4]))
case nl.FRA_SRC:
rule.Src = &net.IPNet{

View File

@ -4,9 +4,9 @@ import (
"errors"
"fmt"
"net"
"syscall"
"github.com/vishvananda/netlink/nl"
"golang.org/x/sys/unix"
)
const (
@ -123,15 +123,15 @@ func SocketGet(local, remote net.Addr) (*Socket, error) {
return nil, ErrNotImplemented
}
s, err := nl.Subscribe(syscall.NETLINK_INET_DIAG)
s, err := nl.Subscribe(unix.NETLINK_INET_DIAG)
if err != nil {
return nil, err
}
defer s.Close()
req := nl.NewNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, 0)
req.AddData(&socketRequest{
Family: syscall.AF_INET,
Protocol: syscall.IPPROTO_TCP,
Family: unix.AF_INET,
Protocol: unix.IPPROTO_TCP,
ID: SocketID{
SourcePort: uint16(localTCP.Port),
DestinationPort: uint16(remoteTCP.Port),

View File

@ -2,19 +2,20 @@ package netlink
import (
"fmt"
"syscall"
"golang.org/x/sys/unix"
)
// Proto is an enum representing an ipsec protocol.
type Proto uint8
const (
XFRM_PROTO_ROUTE2 Proto = syscall.IPPROTO_ROUTING
XFRM_PROTO_ESP Proto = syscall.IPPROTO_ESP
XFRM_PROTO_AH Proto = syscall.IPPROTO_AH
XFRM_PROTO_HAO Proto = syscall.IPPROTO_DSTOPTS
XFRM_PROTO_ROUTE2 Proto = unix.IPPROTO_ROUTING
XFRM_PROTO_ESP Proto = unix.IPPROTO_ESP
XFRM_PROTO_AH Proto = unix.IPPROTO_AH
XFRM_PROTO_HAO Proto = unix.IPPROTO_DSTOPTS
XFRM_PROTO_COMP Proto = 0x6c // NOTE not defined on darwin
XFRM_PROTO_IPSEC_ANY Proto = syscall.IPPROTO_RAW
XFRM_PROTO_IPSEC_ANY Proto = unix.IPPROTO_RAW
)
func (p Proto) String() string {

View File

@ -2,11 +2,10 @@ package netlink
import (
"fmt"
"syscall"
"github.com/vishvananda/netns"
"github.com/vishvananda/netlink/nl"
"github.com/vishvananda/netns"
"golang.org/x/sys/unix"
)
type XfrmMsg interface {
@ -39,7 +38,7 @@ func XfrmMonitor(ch chan<- XfrmMsg, done <-chan struct{}, errorChan chan<- error
if err != nil {
return nil
}
s, err := nl.SubscribeAt(netns.None(), netns.None(), syscall.NETLINK_XFRM, groups...)
s, err := nl.SubscribeAt(netns.None(), netns.None(), unix.NETLINK_XFRM, groups...)
if err != nil {
return err
}

View File

@ -1,9 +1,8 @@
package netlink
import (
"syscall"
"github.com/vishvananda/netlink/nl"
"golang.org/x/sys/unix"
)
func selFromPolicy(sel *nl.XfrmSelector, policy *XfrmPolicy) {
@ -55,7 +54,7 @@ func (h *Handle) XfrmPolicyUpdate(policy *XfrmPolicy) error {
}
func (h *Handle) xfrmPolicyAddOrUpdate(policy *XfrmPolicy, nlProto int) error {
req := h.newNetlinkRequest(nlProto, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
req := h.newNetlinkRequest(nlProto, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK)
msg := &nl.XfrmUserpolicyInfo{}
selFromPolicy(&msg.Sel, policy)
@ -91,7 +90,7 @@ func (h *Handle) xfrmPolicyAddOrUpdate(policy *XfrmPolicy, nlProto int) error {
req.AddData(out)
}
_, err := req.Execute(syscall.NETLINK_XFRM, 0)
_, err := req.Execute(unix.NETLINK_XFRM, 0)
return err
}
@ -121,12 +120,12 @@ func XfrmPolicyList(family int) ([]XfrmPolicy, error) {
// Equivalent to: `ip xfrm policy show`.
// The list can be filtered by ip family.
func (h *Handle) XfrmPolicyList(family int) ([]XfrmPolicy, error) {
req := h.newNetlinkRequest(nl.XFRM_MSG_GETPOLICY, syscall.NLM_F_DUMP)
req := h.newNetlinkRequest(nl.XFRM_MSG_GETPOLICY, unix.NLM_F_DUMP)
msg := nl.NewIfInfomsg(family)
req.AddData(msg)
msgs, err := req.Execute(syscall.NETLINK_XFRM, nl.XFRM_MSG_NEWPOLICY)
msgs, err := req.Execute(unix.NETLINK_XFRM, nl.XFRM_MSG_NEWPOLICY)
if err != nil {
return nil, err
}
@ -165,13 +164,13 @@ func XfrmPolicyFlush() error {
// XfrmPolicyFlush will flush the policies on the system.
// Equivalent to: `ip xfrm policy flush`
func (h *Handle) XfrmPolicyFlush() error {
req := h.newNetlinkRequest(nl.XFRM_MSG_FLUSHPOLICY, syscall.NLM_F_ACK)
_, err := req.Execute(syscall.NETLINK_XFRM, 0)
req := h.newNetlinkRequest(nl.XFRM_MSG_FLUSHPOLICY, unix.NLM_F_ACK)
_, err := req.Execute(unix.NETLINK_XFRM, 0)
return err
}
func (h *Handle) xfrmPolicyGetOrDelete(policy *XfrmPolicy, nlProto int) (*XfrmPolicy, error) {
req := h.newNetlinkRequest(nlProto, syscall.NLM_F_ACK)
req := h.newNetlinkRequest(nlProto, unix.NLM_F_ACK)
msg := &nl.XfrmUserpolicyId{}
selFromPolicy(&msg.Sel, policy)
@ -189,7 +188,7 @@ func (h *Handle) xfrmPolicyGetOrDelete(policy *XfrmPolicy, nlProto int) (*XfrmPo
resType = 0
}
msgs, err := req.Execute(syscall.NETLINK_XFRM, uint16(resType))
msgs, err := req.Execute(unix.NETLINK_XFRM, uint16(resType))
if err != nil {
return nil, err
}

View File

@ -3,6 +3,7 @@ package netlink
import (
"fmt"
"net"
"time"
)
// XfrmStateAlgo represents the algorithm to use for the ipsec encryption.
@ -67,6 +68,19 @@ type XfrmStateLimits struct {
TimeUseHard uint64
}
// XfrmStateStats represents the current number of bytes/packets
// processed by this State, the State's installation and first use
// time and the replay window counters.
type XfrmStateStats struct {
ReplayWindow uint32
Replay uint32
Failed uint32
Bytes uint64
Packets uint64
AddTime uint64
UseTime uint64
}
// XfrmState represents the state of an ipsec policy. It optionally
// contains an XfrmStateAlgo for encryption and one for authentication.
type XfrmState struct {
@ -78,6 +92,7 @@ type XfrmState struct {
Reqid int
ReplayWindow int
Limits XfrmStateLimits
Statistics XfrmStateStats
Mark *XfrmMark
Auth *XfrmStateAlgo
Crypt *XfrmStateAlgo
@ -94,10 +109,16 @@ func (sa XfrmState) Print(stats bool) string {
if !stats {
return sa.String()
}
return fmt.Sprintf("%s, ByteSoft: %s, ByteHard: %s, PacketSoft: %s, PacketHard: %s, TimeSoft: %d, TimeHard: %d, TimeUseSoft: %d, TimeUseHard: %d",
at := time.Unix(int64(sa.Statistics.AddTime), 0).Format(time.UnixDate)
ut := "-"
if sa.Statistics.UseTime > 0 {
ut = time.Unix(int64(sa.Statistics.UseTime), 0).Format(time.UnixDate)
}
return fmt.Sprintf("%s, ByteSoft: %s, ByteHard: %s, PacketSoft: %s, PacketHard: %s, TimeSoft: %d, TimeHard: %d, TimeUseSoft: %d, TimeUseHard: %d, Bytes: %d, Packets: %d, "+
"AddTime: %s, UseTime: %s, ReplayWindow: %d, Replay: %d, Failed: %d",
sa.String(), printLimit(sa.Limits.ByteSoft), printLimit(sa.Limits.ByteHard), printLimit(sa.Limits.PacketSoft), printLimit(sa.Limits.PacketHard),
sa.Limits.TimeSoft, sa.Limits.TimeHard, sa.Limits.TimeUseSoft, sa.Limits.TimeUseHard)
sa.Limits.TimeSoft, sa.Limits.TimeHard, sa.Limits.TimeUseSoft, sa.Limits.TimeUseHard, sa.Statistics.Bytes, sa.Statistics.Packets, at, ut,
sa.Statistics.ReplayWindow, sa.Statistics.Replay, sa.Statistics.Failed)
}
func printLimit(lmt uint64) string {

View File

@ -2,10 +2,10 @@ package netlink
import (
"fmt"
"syscall"
"unsafe"
"github.com/vishvananda/netlink/nl"
"golang.org/x/sys/unix"
)
func writeStateAlgo(a *XfrmStateAlgo) []byte {
@ -111,7 +111,7 @@ func (h *Handle) xfrmStateAddOrUpdate(state *XfrmState, nlProto int) error {
if state.Spi == 0 {
return fmt.Errorf("Spi must be set when adding xfrm state.")
}
req := h.newNetlinkRequest(nlProto, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
req := h.newNetlinkRequest(nlProto, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK)
msg := xfrmUsersaInfoFromXfrmState(state)
@ -157,13 +157,13 @@ func (h *Handle) xfrmStateAddOrUpdate(state *XfrmState, nlProto int) error {
req.AddData(out)
}
_, err := req.Execute(syscall.NETLINK_XFRM, 0)
_, err := req.Execute(unix.NETLINK_XFRM, 0)
return err
}
func (h *Handle) xfrmStateAllocSpi(state *XfrmState) (*XfrmState, error) {
req := h.newNetlinkRequest(nl.XFRM_MSG_ALLOCSPI,
syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK)
msg := &nl.XfrmUserSpiInfo{}
msg.XfrmUsersaInfo = *(xfrmUsersaInfoFromXfrmState(state))
@ -177,7 +177,7 @@ func (h *Handle) xfrmStateAllocSpi(state *XfrmState) (*XfrmState, error) {
req.AddData(out)
}
msgs, err := req.Execute(syscall.NETLINK_XFRM, 0)
msgs, err := req.Execute(unix.NETLINK_XFRM, 0)
if err != nil {
return nil, err
}
@ -216,9 +216,9 @@ func XfrmStateList(family int) ([]XfrmState, error) {
// Equivalent to: `ip xfrm state show`.
// The list can be filtered by ip family.
func (h *Handle) XfrmStateList(family int) ([]XfrmState, error) {
req := h.newNetlinkRequest(nl.XFRM_MSG_GETSA, syscall.NLM_F_DUMP)
req := h.newNetlinkRequest(nl.XFRM_MSG_GETSA, unix.NLM_F_DUMP)
msgs, err := req.Execute(syscall.NETLINK_XFRM, nl.XFRM_MSG_NEWSA)
msgs, err := req.Execute(unix.NETLINK_XFRM, nl.XFRM_MSG_NEWSA)
if err != nil {
return nil, err
}
@ -255,7 +255,7 @@ func (h *Handle) XfrmStateGet(state *XfrmState) (*XfrmState, error) {
}
func (h *Handle) xfrmStateGetOrDelete(state *XfrmState, nlProto int) (*XfrmState, error) {
req := h.newNetlinkRequest(nlProto, syscall.NLM_F_ACK)
req := h.newNetlinkRequest(nlProto, unix.NLM_F_ACK)
msg := &nl.XfrmUsersaId{}
msg.Family = uint16(nl.GetIPFamily(state.Dst))
@ -278,7 +278,7 @@ func (h *Handle) xfrmStateGetOrDelete(state *XfrmState, nlProto int) (*XfrmState
resType = 0
}
msgs, err := req.Execute(syscall.NETLINK_XFRM, uint16(resType))
msgs, err := req.Execute(unix.NETLINK_XFRM, uint16(resType))
if err != nil {
return nil, err
}
@ -308,6 +308,7 @@ func xfrmStateFromXfrmUsersaInfo(msg *nl.XfrmUsersaInfo) *XfrmState {
state.Reqid = int(msg.Reqid)
state.ReplayWindow = int(msg.ReplayWindow)
lftToLimits(&msg.Lft, &state.Limits)
curToStats(&msg.Curlft, &msg.Stats, &state.Statistics)
return &state
}
@ -386,11 +387,11 @@ func XfrmStateFlush(proto Proto) error {
// proto = 0 means any transformation protocols
// Equivalent to: `ip xfrm state flush [ proto XFRM-PROTO ]`
func (h *Handle) XfrmStateFlush(proto Proto) error {
req := h.newNetlinkRequest(nl.XFRM_MSG_FLUSHSA, syscall.NLM_F_ACK)
req := h.newNetlinkRequest(nl.XFRM_MSG_FLUSHSA, unix.NLM_F_ACK)
req.AddData(&nl.XfrmUsersaFlush{Proto: uint8(proto)})
_, err := req.Execute(syscall.NETLINK_XFRM, 0)
_, err := req.Execute(unix.NETLINK_XFRM, 0)
if err != nil {
return err
}
@ -429,6 +430,16 @@ func lftToLimits(lft *nl.XfrmLifetimeCfg, lmts *XfrmStateLimits) {
*lmts = *(*XfrmStateLimits)(unsafe.Pointer(lft))
}
func curToStats(cur *nl.XfrmLifetimeCur, wstats *nl.XfrmStats, stats *XfrmStateStats) {
stats.Bytes = cur.Bytes
stats.Packets = cur.Packets
stats.AddTime = cur.AddTime
stats.UseTime = cur.UseTime
stats.ReplayWindow = wstats.ReplayWindow
stats.Replay = wstats.Replay
stats.Failed = wstats.IntegrityFailed
}
func xfrmUsersaInfoFromXfrmState(state *XfrmState) *nl.XfrmUsersaInfo {
msg := &nl.XfrmUsersaInfo{}
msg.Family = uint16(nl.GetIPFamily(state.Dst))

View File

@ -188,6 +188,8 @@ func getPidForContainer(id string) (int, error) {
filepath.Join(cgroupRoot, "system.slice", "docker-"+id+".scope", "tasks"),
// Even more recent docker versions under cgroup/systemd/docker/<id>/
filepath.Join(cgroupRoot, "..", "systemd", "docker", id, "tasks"),
// Kubernetes with docker and CNI is even more different
filepath.Join(cgroupRoot, "..", "systemd", "kubepods", "*", "pod*", id, "tasks"),
}
var filename string