2015-12-16 23:31:10 +00:00
// +build linux
/ *
2016-06-03 00:25:58 +00:00
Copyright 2014 The Kubernetes Authors .
2015-12-16 23:31:10 +00:00
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package kubenet
import (
"fmt"
"net"
2016-08-22 08:28:11 +00:00
"strings"
2016-04-26 20:56:46 +00:00
"sync"
2015-12-16 23:31:10 +00:00
"syscall"
2016-05-09 21:54:15 +00:00
"time"
2015-12-16 23:31:10 +00:00
2016-07-12 20:43:19 +00:00
"github.com/containernetworking/cni/libcni"
cnitypes "github.com/containernetworking/cni/pkg/types"
2015-12-16 23:31:10 +00:00
"github.com/golang/glog"
2016-05-11 20:25:14 +00:00
"github.com/vishvananda/netlink"
"github.com/vishvananda/netlink/nl"
2016-06-13 22:19:04 +00:00
"k8s.io/kubernetes/pkg/api"
2016-03-31 22:20:04 +00:00
"k8s.io/kubernetes/pkg/apis/componentconfig"
2015-12-16 23:31:10 +00:00
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/network"
"k8s.io/kubernetes/pkg/util/bandwidth"
2016-05-11 20:25:14 +00:00
utildbus "k8s.io/kubernetes/pkg/util/dbus"
2016-08-19 22:15:02 +00:00
utilebtables "k8s.io/kubernetes/pkg/util/ebtables"
2016-06-07 02:45:46 +00:00
utilerrors "k8s.io/kubernetes/pkg/util/errors"
2016-04-11 19:06:26 +00:00
utilexec "k8s.io/kubernetes/pkg/util/exec"
2016-05-11 20:25:14 +00:00
utiliptables "k8s.io/kubernetes/pkg/util/iptables"
2016-06-22 22:57:01 +00:00
utilnet "k8s.io/kubernetes/pkg/util/net"
2016-04-01 17:00:05 +00:00
utilsets "k8s.io/kubernetes/pkg/util/sets"
2016-04-11 19:06:26 +00:00
utilsysctl "k8s.io/kubernetes/pkg/util/sysctl"
2016-06-07 02:45:46 +00:00
"k8s.io/kubernetes/pkg/kubelet/network/hostport"
2016-08-19 17:18:00 +00:00
"strconv"
2015-12-16 23:31:10 +00:00
)
const (
2016-03-02 00:17:04 +00:00
KubenetPluginName = "kubenet"
BridgeName = "cbr0"
DefaultCNIDir = "/opt/cni/bin"
2016-04-11 19:06:26 +00:00
sysctlBridgeCallIptables = "net/bridge/bridge-nf-call-iptables"
2016-08-10 16:09:43 +00:00
// fallbackMTU is used if an MTU is not specified, and we cannot determine the MTU
fallbackMTU = 1460
2016-08-19 17:18:00 +00:00
// private mac prefix safe to use
2016-08-19 22:15:02 +00:00
// Universally administered and locally administered addresses are distinguished by setting the second-least-significant
// bit of the first octet of the address. If it is 1, the address is locally administered. For example, for address 0a:00:00:00:00:00,
// the first cotet is 0a(hex), the binary form of which is 00001010, where the second-least-significant bit is 1.
2016-08-19 17:18:00 +00:00
privateMACPrefix = "0a:58"
2016-08-19 22:15:02 +00:00
// ebtables Chain to store dedup rules
dedupChain = utilebtables . Chain ( "KUBE-DEDUP" )
2015-12-16 23:31:10 +00:00
)
type kubenetNetworkPlugin struct {
2016-03-22 16:38:21 +00:00
network . NoopNetworkPlugin
2016-05-27 02:07:20 +00:00
host network . Host
netConfig * libcni . NetworkConfig
loConfig * libcni . NetworkConfig
cniConfig libcni . CNI
bandwidthShaper bandwidth . BandwidthShaper
mu sync . Mutex //Mutex for protecting podIPs map, netConfig, and shaper initialization
podIPs map [ kubecontainer . ContainerID ] string
2016-08-10 16:09:43 +00:00
mtu int
2016-05-27 02:07:20 +00:00
execer utilexec . Interface
nsenterPath string
hairpinMode componentconfig . HairpinMode
2016-06-07 02:45:46 +00:00
hostportHandler hostport . HostportHandler
2016-05-27 02:07:20 +00:00
iptables utiliptables . Interface
2016-08-10 15:38:44 +00:00
sysctl utilsysctl . Interface
2016-08-19 22:15:02 +00:00
ebtables utilebtables . Interface
2016-06-02 22:49:21 +00:00
// vendorDir is passed by kubelet network-plugin-dir parameter.
// kubenet will search for cni binaries in DefaultCNIDir first, then continue to vendorDir.
2016-06-09 17:32:28 +00:00
vendorDir string
nonMasqueradeCIDR string
2016-08-19 22:15:02 +00:00
podCidr string
gateway net . IP
2015-12-16 23:31:10 +00:00
}
2016-06-02 22:49:21 +00:00
func NewPlugin ( networkPluginDir string ) network . NetworkPlugin {
2016-05-11 20:25:14 +00:00
protocol := utiliptables . ProtocolIpv4
execer := utilexec . New ( )
dbus := utildbus . New ( )
2016-08-10 16:09:43 +00:00
sysctl := utilsysctl . New ( )
2016-05-11 20:25:14 +00:00
iptInterface := utiliptables . New ( execer , dbus , protocol )
2015-12-16 23:31:10 +00:00
return & kubenetNetworkPlugin {
2016-06-09 17:32:28 +00:00
podIPs : make ( map [ kubecontainer . ContainerID ] string ) ,
execer : utilexec . New ( ) ,
iptables : iptInterface ,
2016-08-10 16:09:43 +00:00
sysctl : sysctl ,
2016-06-09 17:32:28 +00:00
vendorDir : networkPluginDir ,
2016-06-07 02:45:46 +00:00
hostportHandler : hostport . NewHostportHandler ( ) ,
2016-06-09 17:32:28 +00:00
nonMasqueradeCIDR : "10.0.0.0/8" ,
2015-12-16 23:31:10 +00:00
}
}
2016-08-10 16:09:43 +00:00
func ( plugin * kubenetNetworkPlugin ) Init ( host network . Host , hairpinMode componentconfig . HairpinMode , nonMasqueradeCIDR string , mtu int ) error {
2015-12-16 23:31:10 +00:00
plugin . host = host
2016-03-31 22:20:04 +00:00
plugin . hairpinMode = hairpinMode
2016-06-22 19:26:11 +00:00
plugin . nonMasqueradeCIDR = nonMasqueradeCIDR
2015-12-16 23:31:10 +00:00
plugin . cniConfig = & libcni . CNIConfig {
2016-06-02 22:49:21 +00:00
Path : [ ] string { DefaultCNIDir , plugin . vendorDir } ,
2015-12-16 23:31:10 +00:00
}
2016-08-10 16:09:43 +00:00
if mtu == network . UseDefaultMTU {
if link , err := findMinMTU ( ) ; err == nil {
plugin . mtu = link . MTU
glog . V ( 5 ) . Infof ( "Using interface %s MTU %d as bridge MTU" , link . Name , link . MTU )
} else {
plugin . mtu = fallbackMTU
glog . Warningf ( "Failed to find default bridge MTU, using %d: %v" , fallbackMTU , err )
}
2015-12-16 23:31:10 +00:00
} else {
2016-08-10 16:09:43 +00:00
plugin . mtu = mtu
2015-12-16 23:31:10 +00:00
}
2016-04-11 19:06:26 +00:00
// Since this plugin uses a Linux bridge, set bridge-nf-call-iptables=1
// is necessary to ensure kube-proxy functions correctly.
//
// This will return an error on older kernel version (< 3.18) as the module
// was built-in, we simply ignore the error here. A better thing to do is
// to check the kernel version in the future.
2016-05-05 03:39:41 +00:00
plugin . execer . Command ( "modprobe" , "br-netfilter" ) . CombinedOutput ( )
2016-08-10 15:38:44 +00:00
err := plugin . sysctl . SetSysctl ( sysctlBridgeCallIptables , 1 )
2016-05-09 21:19:41 +00:00
if err != nil {
2016-04-11 19:06:26 +00:00
glog . Warningf ( "can't set sysctl %s: %v" , sysctlBridgeCallIptables , err )
}
2016-05-09 21:19:41 +00:00
plugin . loConfig , err = libcni . ConfFromBytes ( [ ] byte ( ` {
"cniVersion" : "0.1.0" ,
"name" : "kubenet-loopback" ,
"type" : "loopback"
} ` ) )
if err != nil {
return fmt . Errorf ( "Failed to generate loopback config: %v" , err )
}
2016-06-22 19:26:11 +00:00
plugin . nsenterPath , err = plugin . execer . LookPath ( "nsenter" )
if err != nil {
return fmt . Errorf ( "Failed to find nsenter binary: %v" , err )
}
2016-06-09 17:32:28 +00:00
// Need to SNAT outbound traffic from cluster
if err = plugin . ensureMasqRule ( ) ; err != nil {
return err
}
return nil
}
// TODO: move thic logic into cni bridge plugin and remove this from kubenet
func ( plugin * kubenetNetworkPlugin ) ensureMasqRule ( ) error {
if _ , err := plugin . iptables . EnsureRule ( utiliptables . Append , utiliptables . TableNAT , utiliptables . ChainPostrouting ,
"-m" , "comment" , "--comment" , "kubenet: SNAT for outbound traffic from cluster" ,
"-m" , "addrtype" , "!" , "--dst-type" , "LOCAL" ,
"!" , "-d" , plugin . nonMasqueradeCIDR ,
"-j" , "MASQUERADE" ) ; err != nil {
return fmt . Errorf ( "Failed to ensure that %s chain %s jumps to MASQUERADE: %v" , utiliptables . TableNAT , utiliptables . ChainPostrouting , err )
}
2015-12-16 23:31:10 +00:00
return nil
}
func findMinMTU ( ) ( * net . Interface , error ) {
intfs , err := net . Interfaces ( )
if err != nil {
return nil , err
}
mtu := 999999
defIntfIndex := - 1
for i , intf := range intfs {
if ( ( intf . Flags & net . FlagUp ) != 0 ) && ( intf . Flags & ( net . FlagLoopback | net . FlagPointToPoint ) == 0 ) {
if intf . MTU < mtu {
mtu = intf . MTU
defIntfIndex = i
}
}
}
if mtu >= 999999 || mtu < 576 || defIntfIndex < 0 {
2016-03-23 00:26:50 +00:00
return nil , fmt . Errorf ( "no suitable interface: %v" , BridgeName )
2015-12-16 23:31:10 +00:00
}
return & intfs [ defIntfIndex ] , nil
}
const NET_CONFIG_TEMPLATE = ` {
"cniVersion" : "0.1.0" ,
"name" : "kubenet" ,
"type" : "bridge" ,
"bridge" : "%s" ,
"mtu" : % d ,
"addIf" : "%s" ,
"isGateway" : true ,
2016-06-09 17:32:28 +00:00
"ipMasq" : false ,
2016-07-14 16:52:51 +00:00
"hairpin" : "%t" ,
2015-12-16 23:31:10 +00:00
"ipam" : {
"type" : "host-local" ,
"subnet" : "%s" ,
"gateway" : "%s" ,
"routes" : [
{ "dst" : "0.0.0.0/0" }
]
}
} `
func ( plugin * kubenetNetworkPlugin ) Event ( name string , details map [ string ] interface { } ) {
if name != network . NET_PLUGIN_EVENT_POD_CIDR_CHANGE {
return
}
2016-04-26 20:56:46 +00:00
plugin . mu . Lock ( )
defer plugin . mu . Unlock ( )
2015-12-16 23:31:10 +00:00
podCIDR , ok := details [ network . NET_PLUGIN_EVENT_POD_CIDR_CHANGE_DETAIL_CIDR ] . ( string )
if ! ok {
glog . Warningf ( "%s event didn't contain pod CIDR" , network . NET_PLUGIN_EVENT_POD_CIDR_CHANGE )
return
}
if plugin . netConfig != nil {
glog . V ( 5 ) . Infof ( "Ignoring subsequent pod CIDR update to %s" , podCIDR )
return
}
glog . V ( 5 ) . Infof ( "PodCIDR is set to %q" , podCIDR )
_ , cidr , err := net . ParseCIDR ( podCIDR )
if err == nil {
2016-07-14 16:52:51 +00:00
setHairpin := plugin . hairpinMode == componentconfig . HairpinVeth
2015-12-16 23:31:10 +00:00
// Set bridge address to first address in IPNet
cidr . IP . To4 ( ) [ 3 ] += 1
2016-08-10 16:09:43 +00:00
json := fmt . Sprintf ( NET_CONFIG_TEMPLATE , BridgeName , plugin . mtu , network . DefaultInterfaceName , setHairpin , podCIDR , cidr . IP . String ( ) )
2016-03-31 20:54:45 +00:00
glog . V ( 2 ) . Infof ( "CNI network config set to %v" , json )
2015-12-16 23:31:10 +00:00
plugin . netConfig , err = libcni . ConfFromBytes ( [ ] byte ( json ) )
if err == nil {
glog . V ( 5 ) . Infof ( "CNI network config:\n%s" , json )
// Ensure cbr0 has no conflicting addresses; CNI's 'bridge'
// plugin will bail out if the bridge has an unexpected one
2016-06-22 22:57:01 +00:00
plugin . clearBridgeAddressesExcept ( cidr )
2015-12-16 23:31:10 +00:00
}
2016-08-19 17:18:00 +00:00
plugin . podCidr = podCIDR
plugin . gateway = cidr . IP
2015-12-16 23:31:10 +00:00
}
if err != nil {
glog . Warningf ( "Failed to generate CNI network config: %v" , err )
}
}
2016-06-22 22:57:01 +00:00
func ( plugin * kubenetNetworkPlugin ) clearBridgeAddressesExcept ( keep * net . IPNet ) {
2015-12-16 23:31:10 +00:00
bridge , err := netlink . LinkByName ( BridgeName )
if err != nil {
return
}
addrs , err := netlink . AddrList ( bridge , syscall . AF_INET )
if err != nil {
return
}
for _ , addr := range addrs {
2016-06-22 22:57:01 +00:00
if ! utilnet . IPNetEqual ( addr . IPNet , keep ) {
glog . V ( 2 ) . Infof ( "Removing old address %s from %s" , addr . IPNet . String ( ) , BridgeName )
2015-12-16 23:31:10 +00:00
netlink . AddrDel ( bridge , & addr )
}
}
}
2016-05-04 14:48:12 +00:00
// ensureBridgeTxQueueLen() ensures that the bridge interface's TX queue
// length is greater than zero. Due to a CNI <= 0.3.0 'bridge' plugin bug,
// the bridge is initially created with a TX queue length of 0, which gets
// used as the packet limit for FIFO traffic shapers, which drops packets.
// TODO: remove when we can depend on a fixed CNI
func ( plugin * kubenetNetworkPlugin ) ensureBridgeTxQueueLen ( ) {
bridge , err := netlink . LinkByName ( BridgeName )
if err != nil {
return
}
if bridge . Attrs ( ) . TxQLen > 0 {
return
}
req := nl . NewNetlinkRequest ( syscall . RTM_NEWLINK , syscall . NLM_F_ACK )
msg := nl . NewIfInfomsg ( syscall . AF_UNSPEC )
req . AddData ( msg )
nameData := nl . NewRtAttr ( syscall . IFLA_IFNAME , nl . ZeroTerminated ( BridgeName ) )
req . AddData ( nameData )
qlen := nl . NewRtAttr ( syscall . IFLA_TXQLEN , nl . Uint32Attr ( 1000 ) )
req . AddData ( qlen )
_ , err = req . Execute ( syscall . NETLINK_ROUTE , 0 )
if err != nil {
glog . V ( 5 ) . Infof ( "Failed to set bridge tx queue length: %v" , err )
}
}
2015-12-16 23:31:10 +00:00
func ( plugin * kubenetNetworkPlugin ) Name ( ) string {
return KubenetPluginName
}
2016-04-01 17:00:05 +00:00
func ( plugin * kubenetNetworkPlugin ) Capabilities ( ) utilsets . Int {
return utilsets . NewInt ( network . NET_PLUGIN_CAPABILITY_SHAPING )
}
2016-06-13 22:19:04 +00:00
func ( plugin * kubenetNetworkPlugin ) setup ( namespace string , name string , id kubecontainer . ContainerID , pod * api . Pod ) error {
2016-05-09 21:19:41 +00:00
// Bring up container loopback interface
if _ , err := plugin . addContainerToNetwork ( plugin . loConfig , "lo" , namespace , name , id ) ; err != nil {
return err
2015-12-16 23:31:10 +00:00
}
2016-05-09 21:19:41 +00:00
// Hook container up with our bridge
res , err := plugin . addContainerToNetwork ( plugin . netConfig , network . DefaultInterfaceName , namespace , name , id )
2015-12-16 23:31:10 +00:00
if err != nil {
2016-04-26 20:56:46 +00:00
return err
2015-12-16 23:31:10 +00:00
}
2016-05-27 23:25:14 +00:00
if res . IP4 == nil {
2016-05-09 21:19:41 +00:00
return fmt . Errorf ( "CNI plugin reported no IPv4 address for container %v." , id )
}
2016-05-27 23:25:14 +00:00
ip4 := res . IP4 . IP . IP . To4 ( )
if ip4 == nil {
return fmt . Errorf ( "CNI plugin reported an invalid IPv4 address for container %v: %+v." , id , res . IP4 )
}
2015-12-16 23:31:10 +00:00
2016-08-19 17:18:00 +00:00
// Explicitly assign mac address to cbr0. If bridge mac address is not explicitly set will adopt the lowest MAC address of the attached veths.
// TODO: Remove this once upstream cni bridge plugin handles this
link , err := netlink . LinkByName ( BridgeName )
if err != nil {
return fmt . Errorf ( "failed to lookup %q: %v" , BridgeName , err )
}
macAddr , err := generateHardwareAddr ( plugin . gateway )
if err != nil {
return err
}
glog . V ( 3 ) . Infof ( "Configure %q mac address to %v" , BridgeName , macAddr )
err = netlink . LinkSetHardwareAddr ( link , macAddr )
if err != nil {
return fmt . Errorf ( "Failed to configure %q mac address to %q: %v" , BridgeName , macAddr , err )
}
2016-03-31 22:20:04 +00:00
// Put the container bridge into promiscuous mode to force it to accept hairpin packets.
// TODO: Remove this once the kernel bug (#20096) is fixed.
2016-08-22 08:28:11 +00:00
// TODO: check and set promiscuous mode with netlink once vishvananda/netlink supports it
2016-03-31 22:20:04 +00:00
if plugin . hairpinMode == componentconfig . PromiscuousBridge {
2016-08-22 08:28:11 +00:00
output , err := plugin . execer . Command ( "ip" , "link" , "show" , "dev" , BridgeName ) . CombinedOutput ( )
if err != nil || strings . Index ( string ( output ) , "PROMISC" ) < 0 {
_ , err := plugin . execer . Command ( "ip" , "link" , "set" , BridgeName , "promisc" , "on" ) . CombinedOutput ( )
2016-03-31 22:20:04 +00:00
if err != nil {
return fmt . Errorf ( "Error setting promiscuous mode on %s: %v" , BridgeName , err )
}
}
2016-08-19 22:15:02 +00:00
// configure the ebtables rules to eliminate duplicate packets by best effort
plugin . syncEbtablesDedupRules ( macAddr )
2016-03-31 22:20:04 +00:00
}
2016-05-27 02:07:20 +00:00
// The first SetUpPod call creates the bridge; get a shaper for the sake of
// initialization
shaper := plugin . shaper ( )
2015-12-16 23:31:10 +00:00
2016-06-13 22:19:04 +00:00
ingress , egress , err := bandwidth . ExtractPodBandwidthResources ( pod . Annotations )
if err != nil {
return fmt . Errorf ( "Error reading pod bandwidth annotations: %v" , err )
}
2016-04-01 17:00:05 +00:00
if egress != nil || ingress != nil {
2016-06-07 02:45:46 +00:00
if err := shaper . ReconcileCIDR ( fmt . Sprintf ( "%s/32" , ip4 . String ( ) ) , egress , ingress ) ; err != nil {
2016-04-01 17:00:05 +00:00
return fmt . Errorf ( "Failed to add pod to shaper: %v" , err )
}
}
2015-12-16 23:31:10 +00:00
2016-06-07 02:45:46 +00:00
plugin . podIPs [ id ] = ip4 . String ( )
// Open any hostports the pod's containers want
runningPods , err := plugin . getRunningPods ( )
2016-06-13 22:19:04 +00:00
if err != nil {
return err
2016-06-07 02:45:46 +00:00
}
2016-06-23 00:52:12 +00:00
newPod := & hostport . RunningPod { Pod : pod , IP : ip4 }
if err := plugin . hostportHandler . OpenPodHostportsAndSync ( newPod , BridgeName , runningPods ) ; err != nil {
2016-06-13 22:19:04 +00:00
return err
2016-06-09 17:32:28 +00:00
}
2016-06-07 02:45:46 +00:00
2016-06-13 22:19:04 +00:00
return nil
2015-12-16 23:31:10 +00:00
}
2016-06-13 22:19:04 +00:00
func ( plugin * kubenetNetworkPlugin ) SetUpPod ( namespace string , name string , id kubecontainer . ContainerID ) error {
2016-05-09 21:54:15 +00:00
plugin . mu . Lock ( )
defer plugin . mu . Unlock ( )
start := time . Now ( )
defer func ( ) {
2016-06-13 22:19:04 +00:00
glog . V ( 4 ) . Infof ( "SetUpPod took %v for %s/%s" , time . Since ( start ) , namespace , name )
2016-05-09 21:54:15 +00:00
} ( )
2016-06-13 22:19:04 +00:00
pod , ok := plugin . host . GetPodByName ( namespace , name )
if ! ok {
return fmt . Errorf ( "pod %q cannot be found" , name )
2015-12-16 23:31:10 +00:00
}
2016-06-13 22:19:04 +00:00
if err := plugin . Status ( ) ; err != nil {
return fmt . Errorf ( "Kubenet cannot SetUpPod: %v" , err )
}
if err := plugin . setup ( namespace , name , id , pod ) ; err != nil {
// Make sure everything gets cleaned up on errors
podIP , _ := plugin . podIPs [ id ]
if err := plugin . teardown ( namespace , name , id , podIP ) ; err != nil {
// Not a hard error or warning
glog . V ( 4 ) . Infof ( "Failed to clean up %s/%s after SetUpPod failure: %v" , namespace , name , err )
}
return err
}
// Need to SNAT outbound traffic from cluster
if err := plugin . ensureMasqRule ( ) ; err != nil {
glog . Errorf ( "Failed to ensure MASQ rule: %v" , err )
}
return nil
}
// Tears down as much of a pod's network as it can even if errors occur. Returns
// an aggregate error composed of all errors encountered during the teardown.
func ( plugin * kubenetNetworkPlugin ) teardown ( namespace string , name string , id kubecontainer . ContainerID , podIP string ) error {
errList := [ ] error { }
if podIP != "" {
2016-05-27 01:47:22 +00:00
glog . V ( 5 ) . Infof ( "Removing pod IP %s from shaper" , podIP )
2015-12-16 23:31:10 +00:00
// shaper wants /32
2016-05-27 02:07:20 +00:00
if err := plugin . shaper ( ) . Reset ( fmt . Sprintf ( "%s/32" , podIP ) ) ; err != nil {
2016-05-27 02:42:56 +00:00
// Possible bandwidth shaping wasn't enabled for this pod anyways
glog . V ( 4 ) . Infof ( "Failed to remove pod IP %s from shaper: %v" , podIP , err )
2015-12-16 23:31:10 +00:00
}
2016-06-13 22:19:04 +00:00
delete ( plugin . podIPs , id )
2015-12-16 23:31:10 +00:00
}
2016-06-13 22:19:04 +00:00
2016-05-09 21:19:41 +00:00
if err := plugin . delContainerFromNetwork ( plugin . netConfig , network . DefaultInterfaceName , namespace , name , id ) ; err != nil {
2016-05-24 21:18:28 +00:00
// This is to prevent returning error when TearDownPod is called twice on the same pod. This helps to reduce event pollution.
2016-06-13 22:19:04 +00:00
if podIP != "" {
2016-05-24 21:18:28 +00:00
glog . Warningf ( "Failed to delete container from kubenet: %v" , err )
2016-06-13 22:19:04 +00:00
} else {
errList = append ( errList , err )
2016-05-24 21:18:28 +00:00
}
2015-12-16 23:31:10 +00:00
}
2016-06-07 02:45:46 +00:00
runningPods , err := plugin . getRunningPods ( )
if err == nil {
err = plugin . hostportHandler . SyncHostports ( BridgeName , runningPods )
}
2016-06-13 22:19:04 +00:00
if err != nil {
errList = append ( errList , err )
}
return utilerrors . NewAggregate ( errList )
}
func ( plugin * kubenetNetworkPlugin ) TearDownPod ( namespace string , name string , id kubecontainer . ContainerID ) error {
plugin . mu . Lock ( )
defer plugin . mu . Unlock ( )
start := time . Now ( )
defer func ( ) {
glog . V ( 4 ) . Infof ( "TearDownPod took %v for %s/%s" , time . Since ( start ) , namespace , name )
} ( )
if plugin . netConfig == nil {
return fmt . Errorf ( "Kubenet needs a PodCIDR to tear down pods" )
}
// no cached IP is Ok during teardown
podIP , _ := plugin . podIPs [ id ]
if err := plugin . teardown ( namespace , name , id , podIP ) ; err != nil {
return err
}
2016-06-09 17:32:28 +00:00
// Need to SNAT outbound traffic from cluster
if err := plugin . ensureMasqRule ( ) ; err != nil {
glog . Errorf ( "Failed to ensure MASQ rule: %v" , err )
}
2016-06-07 02:45:46 +00:00
2016-06-13 22:19:04 +00:00
return nil
2015-12-16 23:31:10 +00:00
}
// TODO: Use the addToNetwork function to obtain the IP of the Pod. That will assume idempotent ADD call to the plugin.
// Also fix the runtime's call to Status function to be done only in the case that the IP is lost, no need to do periodic calls
2016-04-26 23:10:07 +00:00
func ( plugin * kubenetNetworkPlugin ) GetPodNetworkStatus ( namespace string , name string , id kubecontainer . ContainerID ) ( * network . PodNetworkStatus , error ) {
2016-04-26 20:56:46 +00:00
plugin . mu . Lock ( )
defer plugin . mu . Unlock ( )
2016-05-05 03:39:41 +00:00
// Assuming the ip of pod does not change. Try to retrieve ip from kubenet map first.
2016-05-27 01:47:22 +00:00
if podIP , ok := plugin . podIPs [ id ] ; ok {
return & network . PodNetworkStatus { IP : net . ParseIP ( podIP ) } , nil
2016-05-05 03:39:41 +00:00
}
2016-05-03 00:49:02 +00:00
netnsPath , err := plugin . host . GetRuntime ( ) . GetNetNS ( id )
2016-05-05 03:39:41 +00:00
if err != nil {
return nil , fmt . Errorf ( "Kubenet failed to retrieve network namespace path: %v" , err )
}
2016-06-22 19:26:11 +00:00
ip , err := network . GetPodIP ( plugin . execer , plugin . nsenterPath , netnsPath , network . DefaultInterfaceName )
2016-05-05 03:39:41 +00:00
if err != nil {
2016-06-21 21:58:30 +00:00
return nil , err
2016-05-05 03:39:41 +00:00
}
2016-06-21 21:58:30 +00:00
2016-05-27 01:47:22 +00:00
plugin . podIPs [ id ] = ip . String ( )
2015-12-16 23:31:10 +00:00
return & network . PodNetworkStatus { IP : ip } , nil
}
2016-04-26 23:10:07 +00:00
func ( plugin * kubenetNetworkPlugin ) Status ( ) error {
2016-04-22 22:23:03 +00:00
// Can't set up pods if we don't have a PodCIDR yet
if plugin . netConfig == nil {
return fmt . Errorf ( "Kubenet does not have netConfig. This is most likely due to lack of PodCIDR" )
}
return nil
}
2016-06-07 02:45:46 +00:00
// Returns a list of pods running on this node and each pod's IP address. Assumes
// PodSpecs retrieved from the runtime include the name and ID of containers in
// each pod.
func ( plugin * kubenetNetworkPlugin ) getRunningPods ( ) ( [ ] * hostport . RunningPod , error ) {
pods , err := plugin . host . GetRuntime ( ) . GetPods ( false )
if err != nil {
return nil , fmt . Errorf ( "Failed to retrieve pods from runtime: %v" , err )
}
runningPods := make ( [ ] * hostport . RunningPod , 0 )
for _ , p := range pods {
2016-06-22 14:44:33 +00:00
containerID , err := plugin . host . GetRuntime ( ) . GetPodContainerID ( p )
if err != nil {
continue
}
ipString , ok := plugin . podIPs [ containerID ]
if ! ok {
continue
}
podIP := net . ParseIP ( ipString )
if podIP == nil {
continue
}
if pod , ok := plugin . host . GetPodByName ( p . Namespace , p . Name ) ; ok {
runningPods = append ( runningPods , & hostport . RunningPod {
Pod : pod ,
IP : podIP ,
} )
2016-06-07 02:45:46 +00:00
}
}
return runningPods , nil
}
2016-05-09 21:19:41 +00:00
func ( plugin * kubenetNetworkPlugin ) buildCNIRuntimeConf ( ifName string , id kubecontainer . ContainerID ) ( * libcni . RuntimeConf , error ) {
netnsPath , err := plugin . host . GetRuntime ( ) . GetNetNS ( id )
if err != nil {
return nil , fmt . Errorf ( "Kubenet failed to retrieve network namespace path: %v" , err )
}
2015-12-16 23:31:10 +00:00
return & libcni . RuntimeConf {
2016-05-09 21:19:41 +00:00
ContainerID : id . ID ,
NetNS : netnsPath ,
IfName : ifName ,
} , nil
2015-12-16 23:31:10 +00:00
}
2016-04-26 20:56:46 +00:00
2016-05-09 21:19:41 +00:00
func ( plugin * kubenetNetworkPlugin ) addContainerToNetwork ( config * libcni . NetworkConfig , ifName , namespace , name string , id kubecontainer . ContainerID ) ( * cnitypes . Result , error ) {
rt , err := plugin . buildCNIRuntimeConf ( ifName , id )
2016-04-26 20:56:46 +00:00
if err != nil {
2016-05-09 21:19:41 +00:00
return nil , fmt . Errorf ( "Error building CNI config: %v" , err )
2016-04-26 20:56:46 +00:00
}
2016-05-09 21:19:41 +00:00
glog . V ( 3 ) . Infof ( "Adding %s/%s to '%s' with CNI '%s' plugin and runtime: %+v" , namespace , name , config . Network . Name , config . Network . Type , rt )
res , err := plugin . cniConfig . AddNetwork ( config , rt )
if err != nil {
return nil , fmt . Errorf ( "Error adding container to network: %v" , err )
}
return res , nil
2016-04-26 20:56:46 +00:00
}
2016-05-09 21:19:41 +00:00
func ( plugin * kubenetNetworkPlugin ) delContainerFromNetwork ( config * libcni . NetworkConfig , ifName , namespace , name string , id kubecontainer . ContainerID ) error {
rt , err := plugin . buildCNIRuntimeConf ( ifName , id )
if err != nil {
return fmt . Errorf ( "Error building CNI config: %v" , err )
}
glog . V ( 3 ) . Infof ( "Removing %s/%s from '%s' with CNI '%s' plugin and runtime: %+v" , namespace , name , config . Network . Name , config . Network . Type , rt )
if err := plugin . cniConfig . DelNetwork ( config , rt ) ; err != nil {
2016-04-26 20:56:46 +00:00
return fmt . Errorf ( "Error removing container from network: %v" , err )
}
return nil
}
2016-05-05 03:39:41 +00:00
2016-05-27 02:07:20 +00:00
// shaper retrieves the bandwidth shaper and, if it hasn't been fetched before,
// initializes it and ensures the bridge is appropriately configured
// This function should only be called while holding the `plugin.mu` lock
func ( plugin * kubenetNetworkPlugin ) shaper ( ) bandwidth . BandwidthShaper {
if plugin . bandwidthShaper == nil {
plugin . bandwidthShaper = bandwidth . NewTCShaper ( BridgeName )
plugin . ensureBridgeTxQueueLen ( )
plugin . bandwidthShaper . ReconcileInterface ( )
}
return plugin . bandwidthShaper
}
2016-08-19 17:18:00 +00:00
2016-08-19 22:15:02 +00:00
//TODO: make this into a goroutine and rectify the dedup rules periodically
func ( plugin * kubenetNetworkPlugin ) syncEbtablesDedupRules ( macAddr net . HardwareAddr ) {
if plugin . ebtables == nil {
plugin . ebtables = utilebtables . New ( plugin . execer )
glog . V ( 3 ) . Infof ( "Flushing dedup chain" )
if err := plugin . ebtables . FlushChain ( utilebtables . TableFilter , dedupChain ) ; err != nil {
glog . Errorf ( "Failed to flush dedup chain: %v" , err )
}
}
_ , err := plugin . ebtables . GetVersion ( )
if err != nil {
glog . Warningf ( "Failed to get ebtables version. Skip syncing ebtables dedup rules: %v" , err )
return
}
glog . V ( 3 ) . Infof ( "Filtering packets with ebtables on mac address: %v, gateway: %v, pod CIDR: %v" , macAddr . String ( ) , plugin . gateway . String ( ) , plugin . podCidr )
_ , err = plugin . ebtables . EnsureChain ( utilebtables . TableFilter , dedupChain )
if err != nil {
glog . Errorf ( "Failed to ensure %v chain %v" , utilebtables . TableFilter , dedupChain )
return
}
_ , err = plugin . ebtables . EnsureRule ( utilebtables . Append , utilebtables . TableFilter , utilebtables . ChainOutput , "-j" , string ( dedupChain ) )
if err != nil {
glog . Errorf ( "Failed to ensure %v chain %v jump to %v chain: %v" , utilebtables . TableFilter , utilebtables . ChainOutput , dedupChain , err )
return
}
commonArgs := [ ] string { "-p" , "IPv4" , "-s" , macAddr . String ( ) , "-o" , "veth+" }
_ , err = plugin . ebtables . EnsureRule ( utilebtables . Prepend , utilebtables . TableFilter , dedupChain , append ( commonArgs , "--ip-src" , plugin . gateway . String ( ) , "-j" , "ACCEPT" ) ... )
if err != nil {
glog . Errorf ( "Failed to ensure packets from cbr0 gateway to be accepted" )
return
}
_ , err = plugin . ebtables . EnsureRule ( utilebtables . Append , utilebtables . TableFilter , dedupChain , append ( commonArgs , "--ip-src" , plugin . podCidr , "-j" , "DROP" ) ... )
if err != nil {
glog . Errorf ( "Failed to ensure packets from podCidr but has mac address of cbr0 to get dropped." )
return
}
}
2016-08-19 17:18:00 +00:00
// generateHardwareAddr generates 48 bit virtual mac addresses based on the IP input.
func generateHardwareAddr ( ip net . IP ) ( net . HardwareAddr , error ) {
if ip . To4 ( ) == nil {
return nil , fmt . Errorf ( "generateHardwareAddr only support valid ipv4 address as input" )
}
mac := privateMACPrefix
sections := strings . Split ( ip . String ( ) , "." )
for _ , s := range sections {
i , _ := strconv . Atoi ( s )
mac = mac + ":" + fmt . Sprintf ( "%02x" , i )
}
hwAddr , err := net . ParseMAC ( mac )
if err != nil {
return nil , fmt . Errorf ( "Failed to parse mac address %s generated based on ip %s due to: %v" , mac , ip , err )
}
return hwAddr , nil
2016-08-19 22:15:02 +00:00
}