mirror of https://github.com/k3s-io/k3s
Merge pull request #30486 from freehan/lbsrcfix
Automatic merge from submit-queue syncNetworkUtil in kubelet and fix loadbalancerSourceRange on GCE fixes: #29997 #29039 @yujuhong Can you take a look at the kubelet part? @girishkalele KUBE-MARK-DROP is the chain for dropping connections. Marked connection will be drop in INPUT/OUTPUT chain of filter table. Let me know if this is good enough for your use case.pull/6/head
commit
d412d5721d
|
@ -204,8 +204,7 @@ func NewProxyServerDefault(config *options.ProxyServerConfig) (*ProxyServer, err
|
|||
// IPTablesMasqueradeBit must be specified or defaulted.
|
||||
return nil, fmt.Errorf("Unable to read IPTablesMasqueradeBit from config")
|
||||
}
|
||||
|
||||
proxierIptables, err := iptables.NewProxier(iptInterface, execer, config.IPTablesSyncPeriod.Duration, config.MasqueradeAll, int(*config.IPTablesMasqueradeBit), config.ClusterCIDR, hostname)
|
||||
proxierIptables, err := iptables.NewProxier(iptInterface, execer, config.IPTablesSyncPeriod.Duration, config.MasqueradeAll, int(*config.IPTablesMasqueradeBit), config.ClusterCIDR, hostname, getNodeIP(client, hostname))
|
||||
if err != nil {
|
||||
glog.Fatalf("Unable to create proxier: %v", err)
|
||||
}
|
||||
|
@ -409,3 +408,18 @@ func tryIptablesProxy(iptver iptables.IptablesVersioner, kcompat iptables.Kernel
|
|||
func (s *ProxyServer) birthCry() {
|
||||
s.Recorder.Eventf(s.Config.NodeRef, api.EventTypeNormal, "Starting", "Starting kube-proxy.")
|
||||
}
|
||||
|
||||
func getNodeIP(client *kubeclient.Client, hostname string) net.IP {
|
||||
var nodeIP net.IP
|
||||
node, err := client.Nodes().Get(hostname)
|
||||
if err != nil {
|
||||
glog.Warningf("Failed to retrieve node info: %v", err)
|
||||
return nil
|
||||
}
|
||||
nodeIP, err = nodeutil.GetNodeHostIP(node)
|
||||
if err != nil {
|
||||
glog.Warningf("Failed to retrieve node IP: %v", err)
|
||||
return nil
|
||||
}
|
||||
return nodeIP
|
||||
}
|
||||
|
|
|
@ -164,6 +164,9 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
|
|||
fs.StringVar(&s.ResolverConfig, "resolv-conf", s.ResolverConfig, "Resolver configuration file used as the basis for the container DNS resolution configuration.")
|
||||
fs.BoolVar(&s.CPUCFSQuota, "cpu-cfs-quota", s.CPUCFSQuota, "Enable CPU CFS quota enforcement for containers that specify CPU limits")
|
||||
fs.BoolVar(&s.EnableControllerAttachDetach, "enable-controller-attach-detach", s.EnableControllerAttachDetach, "Enables the Attach/Detach controller to manage attachment/detachment of volumes scheduled to this node, and disables kubelet from executing any attach/detach operations")
|
||||
fs.BoolVar(&s.MakeIPTablesUtilChains, "make-iptables-util-chains", s.MakeIPTablesUtilChains, "If true, kubelet will ensure iptables utility rules are present on host.")
|
||||
fs.Int32Var(&s.IPTablesMasqueradeBit, "iptables-masquerade-bit", s.IPTablesMasqueradeBit, "The bit of the fwmark space to mark packets for SNAT. Must be within the range [0, 31]. Please match this parameter with corresponding parameter in kube-proxy.")
|
||||
fs.Int32Var(&s.IPTablesDropBit, "iptables-drop-bit", s.IPTablesDropBit, "The bit of the fwmark space to mark packets for dropping. Must be within the range [0, 31].")
|
||||
|
||||
// Flags intended for testing, not recommended used in production environments.
|
||||
fs.BoolVar(&s.ReallyCrashForTesting, "really-crash-for-testing", s.ReallyCrashForTesting, "If true, when panics occur crash. Intended for testing.")
|
||||
|
|
|
@ -199,6 +199,18 @@ func UnsecuredKubeletConfig(s *options.KubeletServer) (*KubeletConfig, error) {
|
|||
Thresholds: thresholds,
|
||||
}
|
||||
|
||||
if s.MakeIPTablesUtilChains {
|
||||
if s.IPTablesMasqueradeBit > 31 || s.IPTablesMasqueradeBit < 0 {
|
||||
return nil, fmt.Errorf("iptables-masquerade-bit is not valid. Must be within [0, 31]")
|
||||
}
|
||||
if s.IPTablesDropBit > 31 || s.IPTablesDropBit < 0 {
|
||||
return nil, fmt.Errorf("iptables-drop-bit is not valid. Must be within [0, 31]")
|
||||
}
|
||||
if s.IPTablesDropBit == s.IPTablesMasqueradeBit {
|
||||
return nil, fmt.Errorf("iptables-masquerade-bit and iptables-drop-bit must be different")
|
||||
}
|
||||
}
|
||||
|
||||
return &KubeletConfig{
|
||||
Address: net.ParseIP(s.Address),
|
||||
AllowPrivileged: s.AllowPrivileged,
|
||||
|
@ -281,10 +293,13 @@ func UnsecuredKubeletConfig(s *options.KubeletServer) (*KubeletConfig, error) {
|
|||
HairpinMode: s.HairpinMode,
|
||||
BabysitDaemons: s.BabysitDaemons,
|
||||
ExperimentalFlannelOverlay: s.ExperimentalFlannelOverlay,
|
||||
NodeIP: net.ParseIP(s.NodeIP),
|
||||
EvictionConfig: evictionConfig,
|
||||
PodsPerCore: int(s.PodsPerCore),
|
||||
ProtectKernelDefaults: s.ProtectKernelDefaults,
|
||||
NodeIP: net.ParseIP(s.NodeIP),
|
||||
EvictionConfig: evictionConfig,
|
||||
PodsPerCore: int(s.PodsPerCore),
|
||||
ProtectKernelDefaults: s.ProtectKernelDefaults,
|
||||
MakeIPTablesUtilChains: s.MakeIPTablesUtilChains,
|
||||
iptablesMasqueradeBit: int(s.IPTablesMasqueradeBit),
|
||||
iptablesDropBit: int(s.IPTablesDropBit),
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
@ -893,8 +908,10 @@ type KubeletConfig struct {
|
|||
HairpinMode string
|
||||
BabysitDaemons bool
|
||||
Options []kubelet.Option
|
||||
|
||||
ProtectKernelDefaults bool
|
||||
ProtectKernelDefaults bool
|
||||
MakeIPTablesUtilChains bool
|
||||
iptablesMasqueradeBit int
|
||||
iptablesDropBit int
|
||||
}
|
||||
|
||||
func CreateAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.PodConfig, err error) {
|
||||
|
@ -992,6 +1009,9 @@ func CreateAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.Pod
|
|||
kc.EvictionConfig,
|
||||
kc.Options,
|
||||
kc.EnableControllerAttachDetach,
|
||||
kc.MakeIPTablesUtilChains,
|
||||
kc.iptablesMasqueradeBit,
|
||||
kc.iptablesDropBit,
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
|
|
|
@ -230,6 +230,7 @@ insecure-port
|
|||
insecure-skip-tls-verify
|
||||
instance-metadata
|
||||
instance-name-prefix
|
||||
iptables-drop-bit
|
||||
iptables-masquerade-bit
|
||||
iptables-sync-period
|
||||
ir-data-source
|
||||
|
@ -288,6 +289,7 @@ lock-file
|
|||
log-flush-frequency
|
||||
long-running-request-regexp
|
||||
low-diskspace-threshold-mb
|
||||
make-iptables-util-chains
|
||||
make-symlinks
|
||||
manifest-url
|
||||
manifest-url-header
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -399,6 +399,18 @@ type KubeletConfiguration struct {
|
|||
KubeReserved utilconfig.ConfigurationMap `json:"kubeReserved"`
|
||||
// Default behaviour for kernel tuning
|
||||
ProtectKernelDefaults bool `json:"protectKernelDefaults"`
|
||||
// If true, Kubelet ensures a set of iptables rules are present on host.
|
||||
// These rules will serve as utility for various components, e.g. kube-proxy.
|
||||
// The rules will be created based on IPTablesMasqueradeBit and IPTablesDropBit.
|
||||
MakeIPTablesUtilChains bool `json:"makeIPTablesUtilChains"`
|
||||
// iptablesMasqueradeBit is the bit of the iptables fwmark space to use for SNAT
|
||||
// Values must be within the range [0, 31].
|
||||
// Warning: Please match the value of corresponding parameter in kube-proxy
|
||||
// TODO: clean up IPTablesMasqueradeBit in kube-proxy
|
||||
IPTablesMasqueradeBit int32 `json:"iptablesMasqueradeBit"`
|
||||
// iptablesDropBit is the bit of the iptables fwmark space to use for dropping packets. Kubelet will ensure iptables mark and drop rules.
|
||||
// Values must be within the range [0, 31]. Must be different from IPTablesMasqueradeBit
|
||||
IPTablesDropBit int32 `json:"iptablesDropBit"`
|
||||
}
|
||||
|
||||
type KubeSchedulerConfiguration struct {
|
||||
|
|
|
@ -43,6 +43,9 @@ const (
|
|||
defaultRktAPIServiceEndpoint = "localhost:15441"
|
||||
|
||||
AutoDetectCloudProvider = "auto-detect"
|
||||
|
||||
defaultIPTablesMasqueradeBit = 14
|
||||
defaultIPTablesDropBit = 15
|
||||
)
|
||||
|
||||
var zeroDuration = unversioned.Duration{}
|
||||
|
@ -337,6 +340,17 @@ func SetDefaults_KubeletConfiguration(obj *KubeletConfiguration) {
|
|||
if obj.KubeReserved == nil {
|
||||
obj.KubeReserved = make(map[string]string)
|
||||
}
|
||||
if obj.MakeIPTablesUtilChains == nil {
|
||||
obj.MakeIPTablesUtilChains = boolVar(true)
|
||||
}
|
||||
if obj.IPTablesMasqueradeBit == nil {
|
||||
temp := int32(defaultIPTablesMasqueradeBit)
|
||||
obj.IPTablesMasqueradeBit = &temp
|
||||
}
|
||||
if obj.IPTablesDropBit == nil {
|
||||
temp := int32(defaultIPTablesDropBit)
|
||||
obj.IPTablesDropBit = &temp
|
||||
}
|
||||
}
|
||||
|
||||
func boolVar(b bool) *bool {
|
||||
|
|
|
@ -454,4 +454,16 @@ type KubeletConfiguration struct {
|
|||
KubeReserved map[string]string `json:"kubeReserved"`
|
||||
// Default behaviour for kernel tuning
|
||||
ProtectKernelDefaults bool `json:"protectKernelDefaults"`
|
||||
// If true, Kubelet ensures a set of iptables rules are present on host.
|
||||
// These rules will serve as utility rules for various components, e.g. KubeProxy.
|
||||
// The rules will be created based on IPTablesMasqueradeBit and IPTablesDropBit.
|
||||
MakeIPTablesUtilChains *bool `json:"makeIPTablesUtilChains"`
|
||||
// iptablesMasqueradeBit is the bit of the iptables fwmark space to mark for SNAT
|
||||
// Values must be within the range [0, 31]. Must be different from other mark bits.
|
||||
// Warning: Please match the value of corresponding parameter in kube-proxy
|
||||
// TODO: clean up IPTablesMasqueradeBit in kube-proxy
|
||||
IPTablesMasqueradeBit *int32 `json:"iptablesMasqueradeBit"`
|
||||
// iptablesDropBit is the bit of the iptables fwmark space to mark for dropping packets.
|
||||
// Values must be within the range [0, 31]. Must be different from other mark bits.
|
||||
IPTablesDropBit *int32 `json:"iptablesDropBit"`
|
||||
}
|
||||
|
|
|
@ -320,6 +320,15 @@ func autoConvert_v1alpha1_KubeletConfiguration_To_componentconfig_KubeletConfigu
|
|||
out.KubeReserved = nil
|
||||
}
|
||||
out.ProtectKernelDefaults = in.ProtectKernelDefaults
|
||||
if err := api.Convert_Pointer_bool_To_bool(&in.MakeIPTablesUtilChains, &out.MakeIPTablesUtilChains, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := api.Convert_Pointer_int32_To_int32(&in.IPTablesMasqueradeBit, &out.IPTablesMasqueradeBit, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := api.Convert_Pointer_int32_To_int32(&in.IPTablesDropBit, &out.IPTablesDropBit, s); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -485,6 +494,15 @@ func autoConvert_componentconfig_KubeletConfiguration_To_v1alpha1_KubeletConfigu
|
|||
out.KubeReserved = nil
|
||||
}
|
||||
out.ProtectKernelDefaults = in.ProtectKernelDefaults
|
||||
if err := api.Convert_bool_To_Pointer_bool(&in.MakeIPTablesUtilChains, &out.MakeIPTablesUtilChains, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := api.Convert_int32_To_Pointer_int32(&in.IPTablesMasqueradeBit, &out.IPTablesMasqueradeBit, s); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := api.Convert_int32_To_Pointer_int32(&in.IPTablesDropBit, &out.IPTablesDropBit, s); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -378,6 +378,27 @@ func DeepCopy_v1alpha1_KubeletConfiguration(in interface{}, out interface{}, c *
|
|||
out.KubeReserved = nil
|
||||
}
|
||||
out.ProtectKernelDefaults = in.ProtectKernelDefaults
|
||||
if in.MakeIPTablesUtilChains != nil {
|
||||
in, out := &in.MakeIPTablesUtilChains, &out.MakeIPTablesUtilChains
|
||||
*out = new(bool)
|
||||
**out = **in
|
||||
} else {
|
||||
out.MakeIPTablesUtilChains = nil
|
||||
}
|
||||
if in.IPTablesMasqueradeBit != nil {
|
||||
in, out := &in.IPTablesMasqueradeBit, &out.IPTablesMasqueradeBit
|
||||
*out = new(int32)
|
||||
**out = **in
|
||||
} else {
|
||||
out.IPTablesMasqueradeBit = nil
|
||||
}
|
||||
if in.IPTablesDropBit != nil {
|
||||
in, out := &in.IPTablesDropBit, &out.IPTablesDropBit
|
||||
*out = new(int32)
|
||||
**out = **in
|
||||
} else {
|
||||
out.IPTablesDropBit = nil
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
|
|
@ -326,6 +326,9 @@ func DeepCopy_componentconfig_KubeletConfiguration(in interface{}, out interface
|
|||
out.KubeReserved = nil
|
||||
}
|
||||
out.ProtectKernelDefaults = in.ProtectKernelDefaults
|
||||
out.MakeIPTablesUtilChains = in.MakeIPTablesUtilChains
|
||||
out.IPTablesMasqueradeBit = in.IPTablesMasqueradeBit
|
||||
out.IPTablesDropBit = in.IPTablesDropBit
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
|
|
@ -244,6 +244,9 @@ func NewMainKubelet(
|
|||
evictionConfig eviction.Config,
|
||||
kubeOptions []Option,
|
||||
enableControllerAttachDetach bool,
|
||||
makeIPTablesUtilChains bool,
|
||||
iptablesMasqueradeBit int,
|
||||
iptablesDropBit int,
|
||||
) (*Kubelet, error) {
|
||||
if rootDirectory == "" {
|
||||
return nil, fmt.Errorf("invalid root directory %q", rootDirectory)
|
||||
|
@ -368,6 +371,9 @@ func NewMainKubelet(
|
|||
babysitDaemons: babysitDaemons,
|
||||
enableControllerAttachDetach: enableControllerAttachDetach,
|
||||
iptClient: utilipt.New(utilexec.New(), utildbus.New(), utilipt.ProtocolIpv4),
|
||||
makeIPTablesUtilChains: makeIPTablesUtilChains,
|
||||
iptablesMasqueradeBit: iptablesMasqueradeBit,
|
||||
iptablesDropBit: iptablesDropBit,
|
||||
}
|
||||
|
||||
if klet.flannelExperimentalOverlay {
|
||||
|
@ -845,6 +851,15 @@ type Kubelet struct {
|
|||
|
||||
// trigger deleting containers in a pod
|
||||
containerDeletor *podContainerDeletor
|
||||
|
||||
// config iptables util rules
|
||||
makeIPTablesUtilChains bool
|
||||
|
||||
// The bit of the fwmark space to mark packets for SNAT.
|
||||
iptablesMasqueradeBit int
|
||||
|
||||
// The bit of the fwmark space to mark packets for dropping.
|
||||
iptablesDropBit int
|
||||
}
|
||||
|
||||
// setupDataDirs creates:
|
||||
|
@ -971,6 +986,11 @@ func (kl *Kubelet) Run(updates <-chan kubetypes.PodUpdate) {
|
|||
go wait.Until(kl.syncNetworkStatus, 30*time.Second, wait.NeverStop)
|
||||
go wait.Until(kl.updateRuntimeUp, 5*time.Second, wait.NeverStop)
|
||||
|
||||
// Start loop to sync iptables util rules
|
||||
if kl.makeIPTablesUtilChains {
|
||||
go wait.Until(kl.syncNetworkUtil, 1*time.Minute, wait.NeverStop)
|
||||
}
|
||||
|
||||
// Start a goroutine responsible for killing pods (that are not properly
|
||||
// handled by pod workers).
|
||||
go wait.Until(kl.podKiller, 1*time.Second, wait.NeverStop)
|
||||
|
|
|
@ -28,9 +28,25 @@ import (
|
|||
"k8s.io/kubernetes/pkg/apis/componentconfig"
|
||||
"k8s.io/kubernetes/pkg/kubelet/network"
|
||||
"k8s.io/kubernetes/pkg/util/bandwidth"
|
||||
utiliptables "k8s.io/kubernetes/pkg/util/iptables"
|
||||
"k8s.io/kubernetes/pkg/util/sets"
|
||||
)
|
||||
|
||||
const (
|
||||
// the mark-for-masquerade chain
|
||||
// TODO: clean up this logic in kube-proxy
|
||||
KubeMarkMasqChain utiliptables.Chain = "KUBE-MARK-MASQ"
|
||||
|
||||
// the mark-for-drop chain
|
||||
KubeMarkDropChain utiliptables.Chain = "KUBE-MARK-DROP"
|
||||
|
||||
// kubernetes postrouting rules
|
||||
KubePostroutingChain utiliptables.Chain = "KUBE-POSTROUTING"
|
||||
|
||||
// kubernetes postrouting rules
|
||||
KubeFirewallChain utiliptables.Chain = "KUBE-FIREWALL"
|
||||
)
|
||||
|
||||
// effectiveHairpinMode determines the effective hairpin mode given the
|
||||
// configured mode, container runtime, and whether cbr0 should be configured.
|
||||
func effectiveHairpinMode(hairpinMode componentconfig.HairpinMode, containerRuntime string, configureCBR0 bool, networkPlugin string) (componentconfig.HairpinMode, error) {
|
||||
|
@ -303,3 +319,88 @@ func (kl *Kubelet) shapingEnabled() bool {
|
|||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// syncNetworkUtil ensures the network utility are present on host.
|
||||
// Network util includes:
|
||||
// 1. In nat table, KUBE-MARK-DROP rule to mark connections for dropping
|
||||
// Marked connection will be drop on INPUT/OUTPUT Chain in filter table
|
||||
// 2. In nat table, KUBE-MARK-MASQ rule to mark connections for SNAT
|
||||
// Marked connection will get SNAT on POSTROUTING Chain in nat table
|
||||
func (kl *Kubelet) syncNetworkUtil() {
|
||||
if kl.iptablesMasqueradeBit < 0 || kl.iptablesMasqueradeBit > 31 {
|
||||
glog.Errorf("invalid iptables-masquerade-bit %v not in [0, 31]", kl.iptablesMasqueradeBit)
|
||||
return
|
||||
}
|
||||
|
||||
if kl.iptablesDropBit < 0 || kl.iptablesDropBit > 31 {
|
||||
glog.Errorf("invalid iptables-drop-bit %v not in [0, 31]", kl.iptablesDropBit)
|
||||
return
|
||||
}
|
||||
|
||||
if kl.iptablesDropBit == kl.iptablesMasqueradeBit {
|
||||
glog.Errorf("iptables-masquerade-bit %v and iptables-drop-bit %v must be different", kl.iptablesMasqueradeBit, kl.iptablesDropBit)
|
||||
return
|
||||
}
|
||||
|
||||
// Setup KUBE-MARK-DROP rules
|
||||
dropMark := getIPTablesMark(kl.iptablesDropBit)
|
||||
if _, err := kl.iptClient.EnsureChain(utiliptables.TableNAT, KubeMarkDropChain); err != nil {
|
||||
glog.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableNAT, KubeMarkDropChain, err)
|
||||
return
|
||||
}
|
||||
if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableNAT, KubeMarkDropChain, "-j", "MARK", "--set-xmark", dropMark); err != nil {
|
||||
glog.Errorf("Failed to ensure marking rule for %v: %v", KubeMarkDropChain, err)
|
||||
return
|
||||
}
|
||||
if _, err := kl.iptClient.EnsureChain(utiliptables.TableFilter, KubeFirewallChain); err != nil {
|
||||
glog.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableFilter, KubeFirewallChain, err)
|
||||
return
|
||||
}
|
||||
if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableFilter, KubeFirewallChain,
|
||||
"-m", "comment", "--comment", "kubernetes firewall for dropping marked packets",
|
||||
"-m", "mark", "--mark", dropMark,
|
||||
"-j", "DROP"); err != nil {
|
||||
glog.Errorf("Failed to ensure rule to drop packet marked by %v in %v chain %v: %v", KubeMarkDropChain, utiliptables.TableFilter, KubeFirewallChain, err)
|
||||
return
|
||||
}
|
||||
if _, err := kl.iptClient.EnsureRule(utiliptables.Prepend, utiliptables.TableFilter, utiliptables.ChainOutput, "-j", string(KubeFirewallChain)); err != nil {
|
||||
glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", utiliptables.TableFilter, utiliptables.ChainOutput, KubeFirewallChain, err)
|
||||
return
|
||||
}
|
||||
if _, err := kl.iptClient.EnsureRule(utiliptables.Prepend, utiliptables.TableFilter, utiliptables.ChainInput, "-j", string(KubeFirewallChain)); err != nil {
|
||||
glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", utiliptables.TableFilter, utiliptables.ChainInput, KubeFirewallChain, err)
|
||||
return
|
||||
}
|
||||
|
||||
// Setup KUBE-MARK-MASQ rules
|
||||
masqueradeMark := getIPTablesMark(kl.iptablesMasqueradeBit)
|
||||
if _, err := kl.iptClient.EnsureChain(utiliptables.TableNAT, KubeMarkMasqChain); err != nil {
|
||||
glog.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableNAT, KubeMarkMasqChain, err)
|
||||
return
|
||||
}
|
||||
if _, err := kl.iptClient.EnsureChain(utiliptables.TableNAT, KubePostroutingChain); err != nil {
|
||||
glog.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableNAT, KubePostroutingChain, err)
|
||||
return
|
||||
}
|
||||
if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableNAT, KubeMarkMasqChain, "-j", "MARK", "--set-xmark", masqueradeMark); err != nil {
|
||||
glog.Errorf("Failed to ensure marking rule for %v: %v", KubeMarkMasqChain, err)
|
||||
return
|
||||
}
|
||||
if _, err := kl.iptClient.EnsureRule(utiliptables.Prepend, utiliptables.TableNAT, utiliptables.ChainPostrouting,
|
||||
"-m", "comment", "--comment", "kubernetes postrouting rules", "-j", string(KubePostroutingChain)); err != nil {
|
||||
glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", utiliptables.TableNAT, utiliptables.ChainPostrouting, KubePostroutingChain, err)
|
||||
return
|
||||
}
|
||||
if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableNAT, KubePostroutingChain,
|
||||
"-m", "comment", "--comment", "kubernetes service traffic requiring SNAT",
|
||||
"-m", "mark", "--mark", masqueradeMark, "-j", "MASQUERADE"); err != nil {
|
||||
glog.Errorf("Failed to ensure SNAT rule for packets marked by %v in %v chain %v: %v", KubeMarkMasqChain, utiliptables.TableNAT, KubePostroutingChain, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// getIPTablesMark returns the fwmark given the bit
|
||||
func getIPTablesMark(bit int) string {
|
||||
value := 1 << uint(bit)
|
||||
return fmt.Sprintf("%#08x/%#08x", value, value)
|
||||
}
|
||||
|
|
|
@ -217,3 +217,25 @@ func TestCleanupBandwidthLimits(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetIPTablesMark(t *testing.T) {
|
||||
tests := []struct {
|
||||
bit int
|
||||
expect string
|
||||
}{
|
||||
{
|
||||
14,
|
||||
"0x00004000/0x00004000",
|
||||
},
|
||||
{
|
||||
15,
|
||||
"0x00008000/0x00008000",
|
||||
},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
res := getIPTablesMark(tc.bit)
|
||||
if res != tc.expect {
|
||||
t.Errorf("getIPTablesMark output unexpected result: %v when input bit is %d. Expect result: %v", res, tc.bit, tc.expect)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -45,31 +45,35 @@ import (
|
|||
utilsysctl "k8s.io/kubernetes/pkg/util/sysctl"
|
||||
)
|
||||
|
||||
// iptablesMinVersion is the minimum version of iptables for which we will use the Proxier
|
||||
// from this package instead of the userspace Proxier. While most of the
|
||||
// features we need were available earlier, the '-C' flag was added more
|
||||
// recently. We use that indirectly in Ensure* functions, and if we don't
|
||||
// have it, we have to be extra careful about the exact args we feed in being
|
||||
// the same as the args we read back (iptables itself normalizes some args).
|
||||
// This is the "new" Proxier, so we require "new" versions of tools.
|
||||
const iptablesMinVersion = utiliptables.MinCheckVersion
|
||||
const (
|
||||
// iptablesMinVersion is the minimum version of iptables for which we will use the Proxier
|
||||
// from this package instead of the userspace Proxier. While most of the
|
||||
// features we need were available earlier, the '-C' flag was added more
|
||||
// recently. We use that indirectly in Ensure* functions, and if we don't
|
||||
// have it, we have to be extra careful about the exact args we feed in being
|
||||
// the same as the args we read back (iptables itself normalizes some args).
|
||||
// This is the "new" Proxier, so we require "new" versions of tools.
|
||||
iptablesMinVersion = utiliptables.MinCheckVersion
|
||||
|
||||
// the services chain
|
||||
const kubeServicesChain utiliptables.Chain = "KUBE-SERVICES"
|
||||
// the services chain
|
||||
kubeServicesChain utiliptables.Chain = "KUBE-SERVICES"
|
||||
|
||||
// the nodeports chain
|
||||
const kubeNodePortsChain utiliptables.Chain = "KUBE-NODEPORTS"
|
||||
// the nodeports chain
|
||||
kubeNodePortsChain utiliptables.Chain = "KUBE-NODEPORTS"
|
||||
|
||||
// the kubernetes postrouting chain
|
||||
const kubePostroutingChain utiliptables.Chain = "KUBE-POSTROUTING"
|
||||
// the kubernetes postrouting chain
|
||||
kubePostroutingChain utiliptables.Chain = "KUBE-POSTROUTING"
|
||||
|
||||
// the mark-for-masquerade chain
|
||||
// TODO: let kubelet manage this chain. Other component should just assume it exists and use it.
|
||||
const KubeMarkMasqChain utiliptables.Chain = "KUBE-MARK-MASQ"
|
||||
// the mark-for-masquerade chain
|
||||
KubeMarkMasqChain utiliptables.Chain = "KUBE-MARK-MASQ"
|
||||
|
||||
// the mark we apply to traffic needing SNAT
|
||||
// TODO(thockin): Remove this for v1.3 or v1.4.
|
||||
const oldIptablesMasqueradeMark = "0x4d415351"
|
||||
// the mark we apply to traffic needing SNAT
|
||||
// TODO(thockin): Remove this for v1.3 or v1.4.
|
||||
oldIptablesMasqueradeMark = "0x4d415351"
|
||||
|
||||
// the mark-for-drop chain
|
||||
KubeMarkDropChain utiliptables.Chain = "KUBE-MARK-DROP"
|
||||
)
|
||||
|
||||
// IptablesVersioner can query the current iptables version.
|
||||
type IptablesVersioner interface {
|
||||
|
@ -128,14 +132,15 @@ const sysctlBridgeCallIptables = "net/bridge/bridge-nf-call-iptables"
|
|||
|
||||
// internal struct for string service information
|
||||
type serviceInfo struct {
|
||||
clusterIP net.IP
|
||||
port int
|
||||
protocol api.Protocol
|
||||
nodePort int
|
||||
loadBalancerStatus api.LoadBalancerStatus
|
||||
sessionAffinityType api.ServiceAffinity
|
||||
stickyMaxAgeSeconds int
|
||||
externalIPs []string
|
||||
clusterIP net.IP
|
||||
port int
|
||||
protocol api.Protocol
|
||||
nodePort int
|
||||
loadBalancerStatus api.LoadBalancerStatus
|
||||
sessionAffinityType api.ServiceAffinity
|
||||
stickyMaxAgeSeconds int
|
||||
externalIPs []string
|
||||
loadBalancerSourceRanges []string
|
||||
}
|
||||
|
||||
// returns a new serviceInfo struct
|
||||
|
@ -164,6 +169,7 @@ type Proxier struct {
|
|||
exec utilexec.Interface
|
||||
clusterCIDR string
|
||||
hostname string
|
||||
nodeIP net.IP
|
||||
}
|
||||
|
||||
type localPort struct {
|
||||
|
@ -189,7 +195,7 @@ var _ proxy.ProxyProvider = &Proxier{}
|
|||
// An error will be returned if iptables fails to update or acquire the initial lock.
|
||||
// Once a proxier is created, it will keep iptables up to date in the background and
|
||||
// will not terminate if a particular iptables call fails.
|
||||
func NewProxier(ipt utiliptables.Interface, exec utilexec.Interface, syncPeriod time.Duration, masqueradeAll bool, masqueradeBit int, clusterCIDR string, hostname string) (*Proxier, error) {
|
||||
func NewProxier(ipt utiliptables.Interface, exec utilexec.Interface, syncPeriod time.Duration, masqueradeAll bool, masqueradeBit int, clusterCIDR string, hostname string, nodeIP net.IP) (*Proxier, error) {
|
||||
// Set the route_localnet sysctl we need for
|
||||
if err := utilsysctl.SetSysctl(sysctlRouteLocalnet, 1); err != nil {
|
||||
return nil, fmt.Errorf("can't set sysctl %s: %v", sysctlRouteLocalnet, err)
|
||||
|
@ -209,6 +215,10 @@ func NewProxier(ipt utiliptables.Interface, exec utilexec.Interface, syncPeriod
|
|||
masqueradeValue := 1 << uint(masqueradeBit)
|
||||
masqueradeMark := fmt.Sprintf("%#08x/%#08x", masqueradeValue, masqueradeValue)
|
||||
|
||||
if nodeIP == nil {
|
||||
glog.Warningf("invalid nodeIP, initialize kube-proxy with 127.0.0.1 as nodeIP")
|
||||
nodeIP = net.ParseIP("127.0.0.1")
|
||||
}
|
||||
return &Proxier{
|
||||
serviceMap: make(map[proxy.ServicePortName]*serviceInfo),
|
||||
endpointsMap: make(map[proxy.ServicePortName][]string),
|
||||
|
@ -220,6 +230,7 @@ func NewProxier(ipt utiliptables.Interface, exec utilexec.Interface, syncPeriod
|
|||
exec: exec,
|
||||
clusterCIDR: clusterCIDR,
|
||||
hostname: hostname,
|
||||
nodeIP: nodeIP,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
@ -428,6 +439,7 @@ func (proxier *Proxier) OnServiceUpdate(allServices []api.Service) {
|
|||
// Deep-copy in case the service instance changes
|
||||
info.loadBalancerStatus = *api.LoadBalancerStatusDeepCopy(&service.Status.LoadBalancer)
|
||||
info.sessionAffinityType = service.Spec.SessionAffinity
|
||||
info.loadBalancerSourceRanges = service.Spec.LoadBalancerSourceRanges
|
||||
proxier.serviceMap[serviceName] = info
|
||||
|
||||
glog.V(4).Infof("added serviceInfo(%s): %s", serviceName, spew.Sdump(info))
|
||||
|
@ -552,15 +564,28 @@ func flattenValidEndpoints(endpoints []hostPortPair) []string {
|
|||
return result
|
||||
}
|
||||
|
||||
// servicePortChainName takes the ServicePortName for a service and
|
||||
// returns the associated iptables chain. This is computed by hashing (sha256)
|
||||
// then encoding to base32 and truncating with the prefix "KUBE-SVC-". We do
|
||||
// this because Iptables Chain Names must be <= 28 chars long, and the longer
|
||||
// they are the harder they are to read.
|
||||
func servicePortChainName(s proxy.ServicePortName, protocol string) utiliptables.Chain {
|
||||
// portProtoHash takes the ServicePortName and protocol for a service
|
||||
// returns the associated 16 character hash. This is computed by hashing (sha256)
|
||||
// then encoding to base32 and truncating to 16 chars. We do this because Iptables
|
||||
// Chain Names must be <= 28 chars long, and the longer they are the harder they are to read.
|
||||
func portProtoHash(s proxy.ServicePortName, protocol string) string {
|
||||
hash := sha256.Sum256([]byte(s.String() + protocol))
|
||||
encoded := base32.StdEncoding.EncodeToString(hash[:])
|
||||
return utiliptables.Chain("KUBE-SVC-" + encoded[:16])
|
||||
return encoded[:16]
|
||||
}
|
||||
|
||||
// servicePortChainName takes the ServicePortName for a service and
|
||||
// returns the associated iptables chain. This is computed by hashing (sha256)
|
||||
// then encoding to base32 and truncating with the prefix "KUBE-SVC-".
|
||||
func servicePortChainName(s proxy.ServicePortName, protocol string) utiliptables.Chain {
|
||||
return utiliptables.Chain("KUBE-SVC-" + portProtoHash(s, protocol))
|
||||
}
|
||||
|
||||
// servicePortChainName takes the ServicePortName for a service and
|
||||
// returns the associated iptables chain. This is computed by hashing (sha256)
|
||||
// then encoding to base32 and truncating with the prefix "KUBE-FW-".
|
||||
func serviceFirewallChainName(s proxy.ServicePortName, protocol string) utiliptables.Chain {
|
||||
return utiliptables.Chain("KUBE-FW-" + portProtoHash(s, protocol))
|
||||
}
|
||||
|
||||
// This is the same as servicePortChainName but with the endpoint included.
|
||||
|
@ -854,9 +879,54 @@ func (proxier *Proxier) syncProxyRules() {
|
|||
"-d", fmt.Sprintf("%s/32", ingress.IP),
|
||||
"--dport", fmt.Sprintf("%d", svcInfo.port),
|
||||
}
|
||||
// create service firewall chain
|
||||
fwChain := serviceFirewallChainName(svcName, protocol)
|
||||
if chain, ok := existingNATChains[fwChain]; ok {
|
||||
writeLine(natChains, chain)
|
||||
} else {
|
||||
writeLine(natChains, utiliptables.MakeChainLine(fwChain))
|
||||
}
|
||||
// jump to service firewall chain
|
||||
// The service firewall rules are created based on ServiceSpec.loadBalancerSourceRanges field.
|
||||
// This currently works for loadbalancers that preserves source ips.
|
||||
// For loadbalancers which direct traffic to service NodePort, the firewall rules will not apply.
|
||||
writeLine(natRules, append(args, "-j", string(fwChain))...)
|
||||
|
||||
args = []string{
|
||||
"-A", string(fwChain),
|
||||
"-m", "comment", "--comment", fmt.Sprintf(`"%s loadbalancer IP"`, svcName.String()),
|
||||
"-m", protocol, "-p", protocol,
|
||||
"-d", fmt.Sprintf("%s/32", ingress.IP),
|
||||
"--dport", fmt.Sprintf("%d", svcInfo.port),
|
||||
}
|
||||
// We have to SNAT packets from external IPs.
|
||||
writeLine(natRules, append(args, "-j", string(KubeMarkMasqChain))...)
|
||||
writeLine(natRules, append(args, "-j", string(svcChain))...)
|
||||
|
||||
if len(svcInfo.loadBalancerSourceRanges) == 0 {
|
||||
// allow all sources, so jump directly to KUBE-SVC chain
|
||||
writeLine(natRules, append(args, "-j", string(svcChain))...)
|
||||
} else {
|
||||
// firewall filter based on each source range
|
||||
allowFromNode := false
|
||||
for _, src := range svcInfo.loadBalancerSourceRanges {
|
||||
writeLine(natRules, append(args, "-s", src, "-j", string(svcChain))...)
|
||||
// ignore error because it has been validated
|
||||
_, cidr, _ := net.ParseCIDR(src)
|
||||
if cidr.Contains(proxier.nodeIP) {
|
||||
allowFromNode = true
|
||||
}
|
||||
}
|
||||
// generally, ip route rule was added to intercept request to loadbalancer vip from the
|
||||
// loadbalancer's backend hosts. In this case, request will not hit the loadbalancer but loop back directly.
|
||||
// Need to add the following rule to allow request on host.
|
||||
if allowFromNode {
|
||||
writeLine(natRules, append(args, "-s", fmt.Sprintf("%s/32", ingress.IP), "-j", string(svcChain))...)
|
||||
}
|
||||
}
|
||||
|
||||
// If the packet was able to reach the end of firewall chain, then it did not get DNATed.
|
||||
// It means the packet cannot go thru the firewall, then mark it for DROP
|
||||
writeLine(natRules, append(args, "-j", string(KubeMarkDropChain))...)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue