Merge pull request #30486 from freehan/lbsrcfix

Automatic merge from submit-queue

syncNetworkUtil in kubelet and fix loadbalancerSourceRange on GCE

fixes: #29997 #29039

@yujuhong Can you take a look at the kubelet part?

@girishkalele KUBE-MARK-DROP is the chain for dropping connections. Marked connection will be drop  in INPUT/OUTPUT chain of filter table. Let me know if this is good enough for your use case.
pull/6/head
Kubernetes Submit Queue 2016-08-16 15:22:34 -07:00 committed by GitHub
commit d412d5721d
15 changed files with 3013 additions and 2558 deletions

View File

@ -204,8 +204,7 @@ func NewProxyServerDefault(config *options.ProxyServerConfig) (*ProxyServer, err
// IPTablesMasqueradeBit must be specified or defaulted.
return nil, fmt.Errorf("Unable to read IPTablesMasqueradeBit from config")
}
proxierIptables, err := iptables.NewProxier(iptInterface, execer, config.IPTablesSyncPeriod.Duration, config.MasqueradeAll, int(*config.IPTablesMasqueradeBit), config.ClusterCIDR, hostname)
proxierIptables, err := iptables.NewProxier(iptInterface, execer, config.IPTablesSyncPeriod.Duration, config.MasqueradeAll, int(*config.IPTablesMasqueradeBit), config.ClusterCIDR, hostname, getNodeIP(client, hostname))
if err != nil {
glog.Fatalf("Unable to create proxier: %v", err)
}
@ -409,3 +408,18 @@ func tryIptablesProxy(iptver iptables.IptablesVersioner, kcompat iptables.Kernel
func (s *ProxyServer) birthCry() {
s.Recorder.Eventf(s.Config.NodeRef, api.EventTypeNormal, "Starting", "Starting kube-proxy.")
}
func getNodeIP(client *kubeclient.Client, hostname string) net.IP {
var nodeIP net.IP
node, err := client.Nodes().Get(hostname)
if err != nil {
glog.Warningf("Failed to retrieve node info: %v", err)
return nil
}
nodeIP, err = nodeutil.GetNodeHostIP(node)
if err != nil {
glog.Warningf("Failed to retrieve node IP: %v", err)
return nil
}
return nodeIP
}

View File

@ -164,6 +164,9 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
fs.StringVar(&s.ResolverConfig, "resolv-conf", s.ResolverConfig, "Resolver configuration file used as the basis for the container DNS resolution configuration.")
fs.BoolVar(&s.CPUCFSQuota, "cpu-cfs-quota", s.CPUCFSQuota, "Enable CPU CFS quota enforcement for containers that specify CPU limits")
fs.BoolVar(&s.EnableControllerAttachDetach, "enable-controller-attach-detach", s.EnableControllerAttachDetach, "Enables the Attach/Detach controller to manage attachment/detachment of volumes scheduled to this node, and disables kubelet from executing any attach/detach operations")
fs.BoolVar(&s.MakeIPTablesUtilChains, "make-iptables-util-chains", s.MakeIPTablesUtilChains, "If true, kubelet will ensure iptables utility rules are present on host.")
fs.Int32Var(&s.IPTablesMasqueradeBit, "iptables-masquerade-bit", s.IPTablesMasqueradeBit, "The bit of the fwmark space to mark packets for SNAT. Must be within the range [0, 31]. Please match this parameter with corresponding parameter in kube-proxy.")
fs.Int32Var(&s.IPTablesDropBit, "iptables-drop-bit", s.IPTablesDropBit, "The bit of the fwmark space to mark packets for dropping. Must be within the range [0, 31].")
// Flags intended for testing, not recommended used in production environments.
fs.BoolVar(&s.ReallyCrashForTesting, "really-crash-for-testing", s.ReallyCrashForTesting, "If true, when panics occur crash. Intended for testing.")

View File

@ -199,6 +199,18 @@ func UnsecuredKubeletConfig(s *options.KubeletServer) (*KubeletConfig, error) {
Thresholds: thresholds,
}
if s.MakeIPTablesUtilChains {
if s.IPTablesMasqueradeBit > 31 || s.IPTablesMasqueradeBit < 0 {
return nil, fmt.Errorf("iptables-masquerade-bit is not valid. Must be within [0, 31]")
}
if s.IPTablesDropBit > 31 || s.IPTablesDropBit < 0 {
return nil, fmt.Errorf("iptables-drop-bit is not valid. Must be within [0, 31]")
}
if s.IPTablesDropBit == s.IPTablesMasqueradeBit {
return nil, fmt.Errorf("iptables-masquerade-bit and iptables-drop-bit must be different")
}
}
return &KubeletConfig{
Address: net.ParseIP(s.Address),
AllowPrivileged: s.AllowPrivileged,
@ -281,10 +293,13 @@ func UnsecuredKubeletConfig(s *options.KubeletServer) (*KubeletConfig, error) {
HairpinMode: s.HairpinMode,
BabysitDaemons: s.BabysitDaemons,
ExperimentalFlannelOverlay: s.ExperimentalFlannelOverlay,
NodeIP: net.ParseIP(s.NodeIP),
EvictionConfig: evictionConfig,
PodsPerCore: int(s.PodsPerCore),
ProtectKernelDefaults: s.ProtectKernelDefaults,
NodeIP: net.ParseIP(s.NodeIP),
EvictionConfig: evictionConfig,
PodsPerCore: int(s.PodsPerCore),
ProtectKernelDefaults: s.ProtectKernelDefaults,
MakeIPTablesUtilChains: s.MakeIPTablesUtilChains,
iptablesMasqueradeBit: int(s.IPTablesMasqueradeBit),
iptablesDropBit: int(s.IPTablesDropBit),
}, nil
}
@ -893,8 +908,10 @@ type KubeletConfig struct {
HairpinMode string
BabysitDaemons bool
Options []kubelet.Option
ProtectKernelDefaults bool
ProtectKernelDefaults bool
MakeIPTablesUtilChains bool
iptablesMasqueradeBit int
iptablesDropBit int
}
func CreateAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.PodConfig, err error) {
@ -992,6 +1009,9 @@ func CreateAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.Pod
kc.EvictionConfig,
kc.Options,
kc.EnableControllerAttachDetach,
kc.MakeIPTablesUtilChains,
kc.iptablesMasqueradeBit,
kc.iptablesDropBit,
)
if err != nil {

View File

@ -230,6 +230,7 @@ insecure-port
insecure-skip-tls-verify
instance-metadata
instance-name-prefix
iptables-drop-bit
iptables-masquerade-bit
iptables-sync-period
ir-data-source
@ -288,6 +289,7 @@ lock-file
log-flush-frequency
long-running-request-regexp
low-diskspace-threshold-mb
make-iptables-util-chains
make-symlinks
manifest-url
manifest-url-header

File diff suppressed because it is too large Load Diff

View File

@ -399,6 +399,18 @@ type KubeletConfiguration struct {
KubeReserved utilconfig.ConfigurationMap `json:"kubeReserved"`
// Default behaviour for kernel tuning
ProtectKernelDefaults bool `json:"protectKernelDefaults"`
// If true, Kubelet ensures a set of iptables rules are present on host.
// These rules will serve as utility for various components, e.g. kube-proxy.
// The rules will be created based on IPTablesMasqueradeBit and IPTablesDropBit.
MakeIPTablesUtilChains bool `json:"makeIPTablesUtilChains"`
// iptablesMasqueradeBit is the bit of the iptables fwmark space to use for SNAT
// Values must be within the range [0, 31].
// Warning: Please match the value of corresponding parameter in kube-proxy
// TODO: clean up IPTablesMasqueradeBit in kube-proxy
IPTablesMasqueradeBit int32 `json:"iptablesMasqueradeBit"`
// iptablesDropBit is the bit of the iptables fwmark space to use for dropping packets. Kubelet will ensure iptables mark and drop rules.
// Values must be within the range [0, 31]. Must be different from IPTablesMasqueradeBit
IPTablesDropBit int32 `json:"iptablesDropBit"`
}
type KubeSchedulerConfiguration struct {

View File

@ -43,6 +43,9 @@ const (
defaultRktAPIServiceEndpoint = "localhost:15441"
AutoDetectCloudProvider = "auto-detect"
defaultIPTablesMasqueradeBit = 14
defaultIPTablesDropBit = 15
)
var zeroDuration = unversioned.Duration{}
@ -337,6 +340,17 @@ func SetDefaults_KubeletConfiguration(obj *KubeletConfiguration) {
if obj.KubeReserved == nil {
obj.KubeReserved = make(map[string]string)
}
if obj.MakeIPTablesUtilChains == nil {
obj.MakeIPTablesUtilChains = boolVar(true)
}
if obj.IPTablesMasqueradeBit == nil {
temp := int32(defaultIPTablesMasqueradeBit)
obj.IPTablesMasqueradeBit = &temp
}
if obj.IPTablesDropBit == nil {
temp := int32(defaultIPTablesDropBit)
obj.IPTablesDropBit = &temp
}
}
func boolVar(b bool) *bool {

View File

@ -454,4 +454,16 @@ type KubeletConfiguration struct {
KubeReserved map[string]string `json:"kubeReserved"`
// Default behaviour for kernel tuning
ProtectKernelDefaults bool `json:"protectKernelDefaults"`
// If true, Kubelet ensures a set of iptables rules are present on host.
// These rules will serve as utility rules for various components, e.g. KubeProxy.
// The rules will be created based on IPTablesMasqueradeBit and IPTablesDropBit.
MakeIPTablesUtilChains *bool `json:"makeIPTablesUtilChains"`
// iptablesMasqueradeBit is the bit of the iptables fwmark space to mark for SNAT
// Values must be within the range [0, 31]. Must be different from other mark bits.
// Warning: Please match the value of corresponding parameter in kube-proxy
// TODO: clean up IPTablesMasqueradeBit in kube-proxy
IPTablesMasqueradeBit *int32 `json:"iptablesMasqueradeBit"`
// iptablesDropBit is the bit of the iptables fwmark space to mark for dropping packets.
// Values must be within the range [0, 31]. Must be different from other mark bits.
IPTablesDropBit *int32 `json:"iptablesDropBit"`
}

View File

@ -320,6 +320,15 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_componentconfig_KubeletConfigu
out.KubeReserved = nil
}
out.ProtectKernelDefaults = in.ProtectKernelDefaults
if err := api.Convert_Pointer_bool_To_bool(&in.MakeIPTablesUtilChains, &out.MakeIPTablesUtilChains, s); err != nil {
return err
}
if err := api.Convert_Pointer_int32_To_int32(&in.IPTablesMasqueradeBit, &out.IPTablesMasqueradeBit, s); err != nil {
return err
}
if err := api.Convert_Pointer_int32_To_int32(&in.IPTablesDropBit, &out.IPTablesDropBit, s); err != nil {
return err
}
return nil
}
@ -485,6 +494,15 @@ func autoConvert_componentconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigu
out.KubeReserved = nil
}
out.ProtectKernelDefaults = in.ProtectKernelDefaults
if err := api.Convert_bool_To_Pointer_bool(&in.MakeIPTablesUtilChains, &out.MakeIPTablesUtilChains, s); err != nil {
return err
}
if err := api.Convert_int32_To_Pointer_int32(&in.IPTablesMasqueradeBit, &out.IPTablesMasqueradeBit, s); err != nil {
return err
}
if err := api.Convert_int32_To_Pointer_int32(&in.IPTablesDropBit, &out.IPTablesDropBit, s); err != nil {
return err
}
return nil
}

View File

@ -378,6 +378,27 @@ func DeepCopy_v1alpha1_KubeletConfiguration(in interface{}, out interface{}, c *
out.KubeReserved = nil
}
out.ProtectKernelDefaults = in.ProtectKernelDefaults
if in.MakeIPTablesUtilChains != nil {
in, out := &in.MakeIPTablesUtilChains, &out.MakeIPTablesUtilChains
*out = new(bool)
**out = **in
} else {
out.MakeIPTablesUtilChains = nil
}
if in.IPTablesMasqueradeBit != nil {
in, out := &in.IPTablesMasqueradeBit, &out.IPTablesMasqueradeBit
*out = new(int32)
**out = **in
} else {
out.IPTablesMasqueradeBit = nil
}
if in.IPTablesDropBit != nil {
in, out := &in.IPTablesDropBit, &out.IPTablesDropBit
*out = new(int32)
**out = **in
} else {
out.IPTablesDropBit = nil
}
return nil
}
}

View File

@ -326,6 +326,9 @@ func DeepCopy_componentconfig_KubeletConfiguration(in interface{}, out interface
out.KubeReserved = nil
}
out.ProtectKernelDefaults = in.ProtectKernelDefaults
out.MakeIPTablesUtilChains = in.MakeIPTablesUtilChains
out.IPTablesMasqueradeBit = in.IPTablesMasqueradeBit
out.IPTablesDropBit = in.IPTablesDropBit
return nil
}
}

View File

@ -244,6 +244,9 @@ func NewMainKubelet(
evictionConfig eviction.Config,
kubeOptions []Option,
enableControllerAttachDetach bool,
makeIPTablesUtilChains bool,
iptablesMasqueradeBit int,
iptablesDropBit int,
) (*Kubelet, error) {
if rootDirectory == "" {
return nil, fmt.Errorf("invalid root directory %q", rootDirectory)
@ -368,6 +371,9 @@ func NewMainKubelet(
babysitDaemons: babysitDaemons,
enableControllerAttachDetach: enableControllerAttachDetach,
iptClient: utilipt.New(utilexec.New(), utildbus.New(), utilipt.ProtocolIpv4),
makeIPTablesUtilChains: makeIPTablesUtilChains,
iptablesMasqueradeBit: iptablesMasqueradeBit,
iptablesDropBit: iptablesDropBit,
}
if klet.flannelExperimentalOverlay {
@ -845,6 +851,15 @@ type Kubelet struct {
// trigger deleting containers in a pod
containerDeletor *podContainerDeletor
// config iptables util rules
makeIPTablesUtilChains bool
// The bit of the fwmark space to mark packets for SNAT.
iptablesMasqueradeBit int
// The bit of the fwmark space to mark packets for dropping.
iptablesDropBit int
}
// setupDataDirs creates:
@ -971,6 +986,11 @@ func (kl *Kubelet) Run(updates <-chan kubetypes.PodUpdate) {
go wait.Until(kl.syncNetworkStatus, 30*time.Second, wait.NeverStop)
go wait.Until(kl.updateRuntimeUp, 5*time.Second, wait.NeverStop)
// Start loop to sync iptables util rules
if kl.makeIPTablesUtilChains {
go wait.Until(kl.syncNetworkUtil, 1*time.Minute, wait.NeverStop)
}
// Start a goroutine responsible for killing pods (that are not properly
// handled by pod workers).
go wait.Until(kl.podKiller, 1*time.Second, wait.NeverStop)

View File

@ -28,9 +28,25 @@ import (
"k8s.io/kubernetes/pkg/apis/componentconfig"
"k8s.io/kubernetes/pkg/kubelet/network"
"k8s.io/kubernetes/pkg/util/bandwidth"
utiliptables "k8s.io/kubernetes/pkg/util/iptables"
"k8s.io/kubernetes/pkg/util/sets"
)
const (
// the mark-for-masquerade chain
// TODO: clean up this logic in kube-proxy
KubeMarkMasqChain utiliptables.Chain = "KUBE-MARK-MASQ"
// the mark-for-drop chain
KubeMarkDropChain utiliptables.Chain = "KUBE-MARK-DROP"
// kubernetes postrouting rules
KubePostroutingChain utiliptables.Chain = "KUBE-POSTROUTING"
// kubernetes postrouting rules
KubeFirewallChain utiliptables.Chain = "KUBE-FIREWALL"
)
// effectiveHairpinMode determines the effective hairpin mode given the
// configured mode, container runtime, and whether cbr0 should be configured.
func effectiveHairpinMode(hairpinMode componentconfig.HairpinMode, containerRuntime string, configureCBR0 bool, networkPlugin string) (componentconfig.HairpinMode, error) {
@ -303,3 +319,88 @@ func (kl *Kubelet) shapingEnabled() bool {
}
return true
}
// syncNetworkUtil ensures the network utility are present on host.
// Network util includes:
// 1. In nat table, KUBE-MARK-DROP rule to mark connections for dropping
// Marked connection will be drop on INPUT/OUTPUT Chain in filter table
// 2. In nat table, KUBE-MARK-MASQ rule to mark connections for SNAT
// Marked connection will get SNAT on POSTROUTING Chain in nat table
func (kl *Kubelet) syncNetworkUtil() {
if kl.iptablesMasqueradeBit < 0 || kl.iptablesMasqueradeBit > 31 {
glog.Errorf("invalid iptables-masquerade-bit %v not in [0, 31]", kl.iptablesMasqueradeBit)
return
}
if kl.iptablesDropBit < 0 || kl.iptablesDropBit > 31 {
glog.Errorf("invalid iptables-drop-bit %v not in [0, 31]", kl.iptablesDropBit)
return
}
if kl.iptablesDropBit == kl.iptablesMasqueradeBit {
glog.Errorf("iptables-masquerade-bit %v and iptables-drop-bit %v must be different", kl.iptablesMasqueradeBit, kl.iptablesDropBit)
return
}
// Setup KUBE-MARK-DROP rules
dropMark := getIPTablesMark(kl.iptablesDropBit)
if _, err := kl.iptClient.EnsureChain(utiliptables.TableNAT, KubeMarkDropChain); err != nil {
glog.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableNAT, KubeMarkDropChain, err)
return
}
if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableNAT, KubeMarkDropChain, "-j", "MARK", "--set-xmark", dropMark); err != nil {
glog.Errorf("Failed to ensure marking rule for %v: %v", KubeMarkDropChain, err)
return
}
if _, err := kl.iptClient.EnsureChain(utiliptables.TableFilter, KubeFirewallChain); err != nil {
glog.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableFilter, KubeFirewallChain, err)
return
}
if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableFilter, KubeFirewallChain,
"-m", "comment", "--comment", "kubernetes firewall for dropping marked packets",
"-m", "mark", "--mark", dropMark,
"-j", "DROP"); err != nil {
glog.Errorf("Failed to ensure rule to drop packet marked by %v in %v chain %v: %v", KubeMarkDropChain, utiliptables.TableFilter, KubeFirewallChain, err)
return
}
if _, err := kl.iptClient.EnsureRule(utiliptables.Prepend, utiliptables.TableFilter, utiliptables.ChainOutput, "-j", string(KubeFirewallChain)); err != nil {
glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", utiliptables.TableFilter, utiliptables.ChainOutput, KubeFirewallChain, err)
return
}
if _, err := kl.iptClient.EnsureRule(utiliptables.Prepend, utiliptables.TableFilter, utiliptables.ChainInput, "-j", string(KubeFirewallChain)); err != nil {
glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", utiliptables.TableFilter, utiliptables.ChainInput, KubeFirewallChain, err)
return
}
// Setup KUBE-MARK-MASQ rules
masqueradeMark := getIPTablesMark(kl.iptablesMasqueradeBit)
if _, err := kl.iptClient.EnsureChain(utiliptables.TableNAT, KubeMarkMasqChain); err != nil {
glog.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableNAT, KubeMarkMasqChain, err)
return
}
if _, err := kl.iptClient.EnsureChain(utiliptables.TableNAT, KubePostroutingChain); err != nil {
glog.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableNAT, KubePostroutingChain, err)
return
}
if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableNAT, KubeMarkMasqChain, "-j", "MARK", "--set-xmark", masqueradeMark); err != nil {
glog.Errorf("Failed to ensure marking rule for %v: %v", KubeMarkMasqChain, err)
return
}
if _, err := kl.iptClient.EnsureRule(utiliptables.Prepend, utiliptables.TableNAT, utiliptables.ChainPostrouting,
"-m", "comment", "--comment", "kubernetes postrouting rules", "-j", string(KubePostroutingChain)); err != nil {
glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", utiliptables.TableNAT, utiliptables.ChainPostrouting, KubePostroutingChain, err)
return
}
if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableNAT, KubePostroutingChain,
"-m", "comment", "--comment", "kubernetes service traffic requiring SNAT",
"-m", "mark", "--mark", masqueradeMark, "-j", "MASQUERADE"); err != nil {
glog.Errorf("Failed to ensure SNAT rule for packets marked by %v in %v chain %v: %v", KubeMarkMasqChain, utiliptables.TableNAT, KubePostroutingChain, err)
return
}
}
// getIPTablesMark returns the fwmark given the bit
func getIPTablesMark(bit int) string {
value := 1 << uint(bit)
return fmt.Sprintf("%#08x/%#08x", value, value)
}

View File

@ -217,3 +217,25 @@ func TestCleanupBandwidthLimits(t *testing.T) {
}
}
}
func TestGetIPTablesMark(t *testing.T) {
tests := []struct {
bit int
expect string
}{
{
14,
"0x00004000/0x00004000",
},
{
15,
"0x00008000/0x00008000",
},
}
for _, tc := range tests {
res := getIPTablesMark(tc.bit)
if res != tc.expect {
t.Errorf("getIPTablesMark output unexpected result: %v when input bit is %d. Expect result: %v", res, tc.bit, tc.expect)
}
}
}

View File

@ -45,31 +45,35 @@ import (
utilsysctl "k8s.io/kubernetes/pkg/util/sysctl"
)
// iptablesMinVersion is the minimum version of iptables for which we will use the Proxier
// from this package instead of the userspace Proxier. While most of the
// features we need were available earlier, the '-C' flag was added more
// recently. We use that indirectly in Ensure* functions, and if we don't
// have it, we have to be extra careful about the exact args we feed in being
// the same as the args we read back (iptables itself normalizes some args).
// This is the "new" Proxier, so we require "new" versions of tools.
const iptablesMinVersion = utiliptables.MinCheckVersion
const (
// iptablesMinVersion is the minimum version of iptables for which we will use the Proxier
// from this package instead of the userspace Proxier. While most of the
// features we need were available earlier, the '-C' flag was added more
// recently. We use that indirectly in Ensure* functions, and if we don't
// have it, we have to be extra careful about the exact args we feed in being
// the same as the args we read back (iptables itself normalizes some args).
// This is the "new" Proxier, so we require "new" versions of tools.
iptablesMinVersion = utiliptables.MinCheckVersion
// the services chain
const kubeServicesChain utiliptables.Chain = "KUBE-SERVICES"
// the services chain
kubeServicesChain utiliptables.Chain = "KUBE-SERVICES"
// the nodeports chain
const kubeNodePortsChain utiliptables.Chain = "KUBE-NODEPORTS"
// the nodeports chain
kubeNodePortsChain utiliptables.Chain = "KUBE-NODEPORTS"
// the kubernetes postrouting chain
const kubePostroutingChain utiliptables.Chain = "KUBE-POSTROUTING"
// the kubernetes postrouting chain
kubePostroutingChain utiliptables.Chain = "KUBE-POSTROUTING"
// the mark-for-masquerade chain
// TODO: let kubelet manage this chain. Other component should just assume it exists and use it.
const KubeMarkMasqChain utiliptables.Chain = "KUBE-MARK-MASQ"
// the mark-for-masquerade chain
KubeMarkMasqChain utiliptables.Chain = "KUBE-MARK-MASQ"
// the mark we apply to traffic needing SNAT
// TODO(thockin): Remove this for v1.3 or v1.4.
const oldIptablesMasqueradeMark = "0x4d415351"
// the mark we apply to traffic needing SNAT
// TODO(thockin): Remove this for v1.3 or v1.4.
oldIptablesMasqueradeMark = "0x4d415351"
// the mark-for-drop chain
KubeMarkDropChain utiliptables.Chain = "KUBE-MARK-DROP"
)
// IptablesVersioner can query the current iptables version.
type IptablesVersioner interface {
@ -128,14 +132,15 @@ const sysctlBridgeCallIptables = "net/bridge/bridge-nf-call-iptables"
// internal struct for string service information
type serviceInfo struct {
clusterIP net.IP
port int
protocol api.Protocol
nodePort int
loadBalancerStatus api.LoadBalancerStatus
sessionAffinityType api.ServiceAffinity
stickyMaxAgeSeconds int
externalIPs []string
clusterIP net.IP
port int
protocol api.Protocol
nodePort int
loadBalancerStatus api.LoadBalancerStatus
sessionAffinityType api.ServiceAffinity
stickyMaxAgeSeconds int
externalIPs []string
loadBalancerSourceRanges []string
}
// returns a new serviceInfo struct
@ -164,6 +169,7 @@ type Proxier struct {
exec utilexec.Interface
clusterCIDR string
hostname string
nodeIP net.IP
}
type localPort struct {
@ -189,7 +195,7 @@ var _ proxy.ProxyProvider = &Proxier{}
// An error will be returned if iptables fails to update or acquire the initial lock.
// Once a proxier is created, it will keep iptables up to date in the background and
// will not terminate if a particular iptables call fails.
func NewProxier(ipt utiliptables.Interface, exec utilexec.Interface, syncPeriod time.Duration, masqueradeAll bool, masqueradeBit int, clusterCIDR string, hostname string) (*Proxier, error) {
func NewProxier(ipt utiliptables.Interface, exec utilexec.Interface, syncPeriod time.Duration, masqueradeAll bool, masqueradeBit int, clusterCIDR string, hostname string, nodeIP net.IP) (*Proxier, error) {
// Set the route_localnet sysctl we need for
if err := utilsysctl.SetSysctl(sysctlRouteLocalnet, 1); err != nil {
return nil, fmt.Errorf("can't set sysctl %s: %v", sysctlRouteLocalnet, err)
@ -209,6 +215,10 @@ func NewProxier(ipt utiliptables.Interface, exec utilexec.Interface, syncPeriod
masqueradeValue := 1 << uint(masqueradeBit)
masqueradeMark := fmt.Sprintf("%#08x/%#08x", masqueradeValue, masqueradeValue)
if nodeIP == nil {
glog.Warningf("invalid nodeIP, initialize kube-proxy with 127.0.0.1 as nodeIP")
nodeIP = net.ParseIP("127.0.0.1")
}
return &Proxier{
serviceMap: make(map[proxy.ServicePortName]*serviceInfo),
endpointsMap: make(map[proxy.ServicePortName][]string),
@ -220,6 +230,7 @@ func NewProxier(ipt utiliptables.Interface, exec utilexec.Interface, syncPeriod
exec: exec,
clusterCIDR: clusterCIDR,
hostname: hostname,
nodeIP: nodeIP,
}, nil
}
@ -428,6 +439,7 @@ func (proxier *Proxier) OnServiceUpdate(allServices []api.Service) {
// Deep-copy in case the service instance changes
info.loadBalancerStatus = *api.LoadBalancerStatusDeepCopy(&service.Status.LoadBalancer)
info.sessionAffinityType = service.Spec.SessionAffinity
info.loadBalancerSourceRanges = service.Spec.LoadBalancerSourceRanges
proxier.serviceMap[serviceName] = info
glog.V(4).Infof("added serviceInfo(%s): %s", serviceName, spew.Sdump(info))
@ -552,15 +564,28 @@ func flattenValidEndpoints(endpoints []hostPortPair) []string {
return result
}
// servicePortChainName takes the ServicePortName for a service and
// returns the associated iptables chain. This is computed by hashing (sha256)
// then encoding to base32 and truncating with the prefix "KUBE-SVC-". We do
// this because Iptables Chain Names must be <= 28 chars long, and the longer
// they are the harder they are to read.
func servicePortChainName(s proxy.ServicePortName, protocol string) utiliptables.Chain {
// portProtoHash takes the ServicePortName and protocol for a service
// returns the associated 16 character hash. This is computed by hashing (sha256)
// then encoding to base32 and truncating to 16 chars. We do this because Iptables
// Chain Names must be <= 28 chars long, and the longer they are the harder they are to read.
func portProtoHash(s proxy.ServicePortName, protocol string) string {
hash := sha256.Sum256([]byte(s.String() + protocol))
encoded := base32.StdEncoding.EncodeToString(hash[:])
return utiliptables.Chain("KUBE-SVC-" + encoded[:16])
return encoded[:16]
}
// servicePortChainName takes the ServicePortName for a service and
// returns the associated iptables chain. This is computed by hashing (sha256)
// then encoding to base32 and truncating with the prefix "KUBE-SVC-".
func servicePortChainName(s proxy.ServicePortName, protocol string) utiliptables.Chain {
return utiliptables.Chain("KUBE-SVC-" + portProtoHash(s, protocol))
}
// servicePortChainName takes the ServicePortName for a service and
// returns the associated iptables chain. This is computed by hashing (sha256)
// then encoding to base32 and truncating with the prefix "KUBE-FW-".
func serviceFirewallChainName(s proxy.ServicePortName, protocol string) utiliptables.Chain {
return utiliptables.Chain("KUBE-FW-" + portProtoHash(s, protocol))
}
// This is the same as servicePortChainName but with the endpoint included.
@ -854,9 +879,54 @@ func (proxier *Proxier) syncProxyRules() {
"-d", fmt.Sprintf("%s/32", ingress.IP),
"--dport", fmt.Sprintf("%d", svcInfo.port),
}
// create service firewall chain
fwChain := serviceFirewallChainName(svcName, protocol)
if chain, ok := existingNATChains[fwChain]; ok {
writeLine(natChains, chain)
} else {
writeLine(natChains, utiliptables.MakeChainLine(fwChain))
}
// jump to service firewall chain
// The service firewall rules are created based on ServiceSpec.loadBalancerSourceRanges field.
// This currently works for loadbalancers that preserves source ips.
// For loadbalancers which direct traffic to service NodePort, the firewall rules will not apply.
writeLine(natRules, append(args, "-j", string(fwChain))...)
args = []string{
"-A", string(fwChain),
"-m", "comment", "--comment", fmt.Sprintf(`"%s loadbalancer IP"`, svcName.String()),
"-m", protocol, "-p", protocol,
"-d", fmt.Sprintf("%s/32", ingress.IP),
"--dport", fmt.Sprintf("%d", svcInfo.port),
}
// We have to SNAT packets from external IPs.
writeLine(natRules, append(args, "-j", string(KubeMarkMasqChain))...)
writeLine(natRules, append(args, "-j", string(svcChain))...)
if len(svcInfo.loadBalancerSourceRanges) == 0 {
// allow all sources, so jump directly to KUBE-SVC chain
writeLine(natRules, append(args, "-j", string(svcChain))...)
} else {
// firewall filter based on each source range
allowFromNode := false
for _, src := range svcInfo.loadBalancerSourceRanges {
writeLine(natRules, append(args, "-s", src, "-j", string(svcChain))...)
// ignore error because it has been validated
_, cidr, _ := net.ParseCIDR(src)
if cidr.Contains(proxier.nodeIP) {
allowFromNode = true
}
}
// generally, ip route rule was added to intercept request to loadbalancer vip from the
// loadbalancer's backend hosts. In this case, request will not hit the loadbalancer but loop back directly.
// Need to add the following rule to allow request on host.
if allowFromNode {
writeLine(natRules, append(args, "-s", fmt.Sprintf("%s/32", ingress.IP), "-j", string(svcChain))...)
}
}
// If the packet was able to reach the end of firewall chain, then it did not get DNATed.
// It means the packet cannot go thru the firewall, then mark it for DROP
writeLine(natRules, append(args, "-j", string(KubeMarkDropChain))...)
}
}