diff --git a/cluster/gce/config-default.sh b/cluster/gce/config-default.sh index 0bb09a7356..1477784aa5 100755 --- a/cluster/gce/config-default.sh +++ b/cluster/gce/config-default.sh @@ -116,8 +116,8 @@ OPENCONTRAIL_TAG="${OPENCONTRAIL_TAG:-R2.20}" OPENCONTRAIL_KUBERNETES_TAG="${OPENCONTRAIL_KUBERNETES_TAG:-master}" OPENCONTRAIL_PUBLIC_SUBNET="${OPENCONTRAIL_PUBLIC_SUBNET:-10.1.0.0/16}" -# Should the kubelet configure hairpin mode on the bridge? -HAIRPIN_MODE="${HAIRPIN_MODE:-true}" # true, false +# How should the kubelet configure hairpin mode? +HAIRPIN_MODE="${HAIRPIN_MODE:-promiscuous-bridge}" # promiscuous-bridge, hairpin-veth, none # Optional: if set to true, kube-up will configure the cluster to run e2e tests. E2E_STORAGE_TEST_ENVIRONMENT=${KUBE_E2E_STORAGE_TEST_ENVIRONMENT:-false} diff --git a/cluster/gce/config-test.sh b/cluster/gce/config-test.sh index 312d9037ff..958a000e1d 100755 --- a/cluster/gce/config-test.sh +++ b/cluster/gce/config-test.sh @@ -134,8 +134,8 @@ OPENCONTRAIL_TAG="${OPENCONTRAIL_TAG:-R2.20}" OPENCONTRAIL_KUBERNETES_TAG="${OPENCONTRAIL_KUBERNETES_TAG:-master}" OPENCONTRAIL_PUBLIC_SUBNET="${OPENCONTRAIL_PUBLIC_SUBNET:-10.1.0.0/16}" -# Should the kubelet configure hairpin mode on the bridge? -HAIRPIN_MODE="${HAIRPIN_MODE:-true}" # true, false +# How should the kubelet configure hairpin mode? +HAIRPIN_MODE="${HAIRPIN_MODE:-promiscuous-bridge}" # promiscuous-bridge, hairpin-veth, none # Optional: if set to true, kube-up will configure the cluster to run e2e tests. E2E_STORAGE_TEST_ENVIRONMENT=${KUBE_E2E_STORAGE_TEST_ENVIRONMENT:-false} diff --git a/cluster/saltbase/salt/kubelet/default b/cluster/saltbase/salt/kubelet/default index 4eeb3f730a..1f7d8fe9f2 100644 --- a/cluster/saltbase/salt/kubelet/default +++ b/cluster/saltbase/salt/kubelet/default @@ -156,7 +156,6 @@ {% set configure_hairpin_mode = "--configure-hairpin-mode=false" %} {% endif -%} - {% set kubelet_port = "" -%} {% if pillar['kubelet_port'] is defined -%} {% set kubelet_port="--port=" + pillar['kubelet_port'] %} @@ -173,4 +172,4 @@ {% endif -%} # test_args has to be kept at the end, so they'll overwrite any prior configuration -DAEMON_ARGS="{{daemon_args}} {{api_servers_with_port}} {{debugging_handlers}} {{hostname_override}} {{cloud_provider}} {{config}} {{manifest_url}} --allow-privileged={{pillar['allow_privileged']}} {{log_level}} {{cluster_dns}} {{cluster_domain}} {{docker_root}} {{kubelet_root}} {{configure_cbr0}} {{non_masquerade_cidr}} {{cgroup_root}} {{system_container}} {{pod_cidr}} {{ master_kubelet_args }} {{cpu_cfs_quota}} {{network_plugin}} {{kubelet_port}} {{experimental_flannel_overlay}} {{ reconcile_cidr_args }} {{ configure_hairpin_mode }} {{enable_custom_metrics}} {{runtime_container}} {{kubelet_container}} {{test_args}}" +DAEMON_ARGS="{{daemon_args}} {{api_servers_with_port}} {{debugging_handlers}} {{hostname_override}} {{cloud_provider}} {{config}} {{manifest_url}} --allow-privileged={{pillar['allow_privileged']}} {{log_level}} {{cluster_dns}} {{cluster_domain}} {{docker_root}} {{kubelet_root}} {{configure_cbr0}} {{non_masquerade_cidr}} {{cgroup_root}} {{system_container}} {{pod_cidr}} {{ master_kubelet_args }} {{cpu_cfs_quota}} {{network_plugin}} {{kubelet_port}} {{experimental_flannel_overlay}} {{ reconcile_cidr_args }} {{ hairpin_mode }} {{enable_custom_metrics}} {{runtime_container}} {{kubelet_container}} {{test_args}}" diff --git a/cmd/kubelet/app/options/options.go b/cmd/kubelet/app/options/options.go index f834956c47..8ac554e971 100644 --- a/cmd/kubelet/app/options/options.go +++ b/cmd/kubelet/app/options/options.go @@ -35,7 +35,6 @@ import ( const ( defaultRootDir = "/var/lib/kubelet" experimentalFlannelOverlay = false - hairpinMode = true ) // KubeletServer encapsulates all of the parameters necessary for starting up @@ -126,7 +125,7 @@ func NewKubeletServer() *KubeletServer { KubeAPIBurst: 10, ExperimentalFlannelOverlay: experimentalFlannelOverlay, OutOfDiskTransitionFrequency: unversioned.Duration{5 * time.Minute}, - HairpinMode: hairpinMode, + HairpinMode: componentconfig.PromiscuousBridge, }, } } @@ -208,7 +207,7 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) { fs.StringVar(&s.RktPath, "rkt-path", s.RktPath, "Path of rkt binary. Leave empty to use the first rkt in $PATH. Only used if --container-runtime='rkt'") fs.StringVar(&s.RktStage1Image, "rkt-stage1-image", s.RktStage1Image, "image to use as stage1. Local paths and http/https URLs are supported. If empty, the 'stage1.aci' in the same directory as '--rkt-path' will be used") fs.BoolVar(&s.ConfigureCBR0, "configure-cbr0", s.ConfigureCBR0, "If true, kubelet will configure cbr0 based on Node.Spec.PodCIDR.") - fs.BoolVar(&s.HairpinMode, "configure-hairpin-mode", s.HairpinMode, "If true, kubelet will set the hairpin mode flag on container interfaces. This allows endpoints of a Service to loadbalance back to themselves if they should try to access their own Service.") + fs.StringVar(&s.HairpinMode, "hairpin-mode", s.HairpinMode, "How should the kubelet setup hairpin NAT. This allows endpoints of a Service to loadbalance back to themselves if they should try to access their own Service. Valid values are \"promiscuous-bridge\", \"hairpin-veth\" and \"none\".") fs.IntVar(&s.MaxPods, "max-pods", s.MaxPods, "Number of Pods that can run on this Kubelet.") fs.StringVar(&s.DockerExecHandlerName, "docker-exec-handler", s.DockerExecHandlerName, "Handler to use when executing a command in a container. Valid values are 'native' and 'nsenter'. Defaults to 'native'.") fs.StringVar(&s.NonMasqueradeCIDR, "non-masquerade-cidr", s.NonMasqueradeCIDR, "Traffic to IPs outside this range will use IP masquerade.") diff --git a/cmd/kubelet/app/server.go b/cmd/kubelet/app/server.go index f9ecdb3060..0cfc99c4a9 100644 --- a/cmd/kubelet/app/server.go +++ b/cmd/kubelet/app/server.go @@ -752,7 +752,7 @@ type KubeletConfig struct { ExperimentalFlannelOverlay bool NodeIP net.IP ContainerRuntimeOptions []kubecontainer.Option - HairpinMode bool + HairpinMode string } func CreateAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.PodConfig, err error) { diff --git a/docs/admin/kubelet.md b/docs/admin/kubelet.md index cd5f478d25..e5b738127a 100644 --- a/docs/admin/kubelet.md +++ b/docs/admin/kubelet.md @@ -77,7 +77,6 @@ kubelet --cluster-domain="": Domain for this cluster. If set, kubelet will configure all containers to search this domain in addition to the host's search domains --config="": Path to the config file or directory of files --configure-cbr0[=false]: If true, kubelet will configure cbr0 based on Node.Spec.PodCIDR. - --configure-hairpin-mode[=true]: If true, kubelet will set the hairpin mode flag on container interfaces. This allows endpoints of a Service to loadbalance back to themselves if they should try to access their own Service. --container-runtime="docker": The container runtime to use. Possible values: 'docker', 'rkt'. Default: 'docker'. --containerized[=false]: Experimental support for running kubelet in a container. Intended for testing. [default=false] --cpu-cfs-quota[=true]: Enable CPU CFS quota enforcement for containers that specify CPU limits @@ -91,6 +90,7 @@ kubelet --experimental-flannel-overlay[=false]: Experimental support for starting the kubelet with the default overlay network (flannel). Assumes flanneld is already running in client mode. [default=false] --file-check-frequency=20s: Duration between checking config files for new data --google-json-key="": The Google Cloud Platform Service Account JSON Key to use for authentication. + --hairpin-mode="promiscuous-bridge": How should the kubelet setup hairpin NAT. This allows endpoints of a Service to loadbalance back to themselves if they should try to access their own Service. Valid values are "promiscuous-bridge", "hairpin-veth" and "none". --healthz-bind-address=127.0.0.1: The IP address for the healthz server to serve on, defaulting to 127.0.0.1 (set to 0.0.0.0 for all interfaces) --healthz-port=10248: The port of the localhost healthz endpoint --host-ipc-sources="*": Comma-separated list of sources from which the Kubelet allows pods to use the host ipc namespace. [default="*"] @@ -152,7 +152,7 @@ kubelet --volume-stats-agg-period=1m0s: Specifies interval for kubelet to calculate and cache the volume disk usage for all pods and volumes. To disable volume calculations, set to 0. Default: '1m' ``` -###### Auto generated by spf13/cobra on 17-Feb-2016 +###### Auto generated by spf13/cobra on 18-Feb-2016 diff --git a/hack/jenkins/job-configs/kubernetes-soak.yaml b/hack/jenkins/job-configs/kubernetes-soak.yaml index e234005232..3176cc9542 100644 --- a/hack/jenkins/job-configs/kubernetes-soak.yaml +++ b/hack/jenkins/job-configs/kubernetes-soak.yaml @@ -96,7 +96,6 @@ branch: 'master' provider-env: '{gce-provider-env}' job-env: | - export HAIRPIN_MODE="false" export PROJECT="k8s-jkns-gce-soak" - 'gce-2': deploy-description: Clone of kubernetes-soak-weekly-deploy-gce. @@ -104,6 +103,7 @@ branch: 'master' provider-env: '{gce-provider-env}' job-env: | + export HAIRPIN_MODE="hairpin-veth" export PROJECT="k8s-jkns-gce-soak-2" - 'gce-1.1': deploy-description: | diff --git a/hack/verify-flags/exceptions.txt b/hack/verify-flags/exceptions.txt index 93fcac7a84..8156866921 100644 --- a/hack/verify-flags/exceptions.txt +++ b/hack/verify-flags/exceptions.txt @@ -108,3 +108,4 @@ test/e2e/host_path.go: fmt.Sprintf("--retry_time=%d", retryDuration), test/images/mount-tester/mt.go: flag.BoolVar(&breakOnExpectedContent, "break_on_expected_content", true, "Break out of loop on expected content, (use with --file_content_in_loop flag only)") test/images/mount-tester/mt.go: flag.IntVar(&retryDuration, "retry_time", 180, "Retry time during the loop") test/images/mount-tester/mt.go: flag.StringVar(&readFileContentInLoopPath, "file_content_in_loop", "", "Path to read the file content in loop from") +pkg/kubelet/network/hairpin/hairpin.go: hairpinModeRelativePath = "hairpin_mode" diff --git a/hack/verify-flags/known-flags.txt b/hack/verify-flags/known-flags.txt index 00f66eadd6..1981750fb5 100644 --- a/hack/verify-flags/known-flags.txt +++ b/hack/verify-flags/known-flags.txt @@ -390,5 +390,5 @@ leader-elect-lease-duration leader-elect-renew-deadline leader-elect-retry-period watch-cache-sizes -configure-hairpin-mode lock-file +hairpin-mode diff --git a/pkg/apis/componentconfig/types.generated.go b/pkg/apis/componentconfig/types.generated.go index 445449a2c8..cbd1aee7e4 100644 --- a/pkg/apis/componentconfig/types.generated.go +++ b/pkg/apis/componentconfig/types.generated.go @@ -1094,6 +1094,32 @@ func (x *ProxyMode) CodecDecodeSelf(d *codec1978.Decoder) { } } +func (x HairpinMode) CodecEncodeSelf(e *codec1978.Encoder) { + var h codecSelfer1234 + z, r := codec1978.GenHelperEncoder(e) + _, _, _ = h, z, r + yym1 := z.EncBinary() + _ = yym1 + if false { + } else if z.HasExtensions() && z.EncExt(x) { + } else { + r.EncodeString(codecSelferC_UTF81234, string(x)) + } +} + +func (x *HairpinMode) CodecDecodeSelf(d *codec1978.Decoder) { + var h codecSelfer1234 + z, r := codec1978.GenHelperDecoder(d) + _, _, _ = h, z, r + yym1 := z.DecBinary() + _ = yym1 + if false { + } else if z.HasExtensions() && z.DecExt(x) { + } else { + *((*string)(x)) = r.DecodeString() + } +} + func (x *KubeletConfiguration) CodecEncodeSelf(e *codec1978.Encoder) { var h codecSelfer1234 z, r := codec1978.GenHelperEncoder(e) @@ -2354,17 +2380,17 @@ func (x *KubeletConfiguration) CodecEncodeSelf(e *codec1978.Encoder) { _ = yym194 if false { } else { - r.EncodeBool(bool(x.HairpinMode)) + r.EncodeString(codecSelferC_UTF81234, string(x.HairpinMode)) } } else { z.EncSendContainerState(codecSelfer_containerMapKey1234) - r.EncodeString(codecSelferC_UTF81234, string("configureHairpinMode")) + r.EncodeString(codecSelferC_UTF81234, string("hairpinMode")) z.EncSendContainerState(codecSelfer_containerMapValue1234) yym195 := z.EncBinary() _ = yym195 if false { } else { - r.EncodeBool(bool(x.HairpinMode)) + r.EncodeString(codecSelferC_UTF81234, string(x.HairpinMode)) } } if yyr2 || yy2arr2 { @@ -3218,11 +3244,11 @@ func (x *KubeletConfiguration) codecDecodeSelfFromMap(l int, d *codec1978.Decode } else { x.ConfigureCBR0 = bool(r.DecodeBool()) } - case "configureHairpinMode": + case "hairpinMode": if r.TryDecodeAsNil() { - x.HairpinMode = false + x.HairpinMode = "" } else { - x.HairpinMode = bool(r.DecodeBool()) + x.HairpinMode = string(r.DecodeString()) } case "maxPods": if r.TryDecodeAsNil() { @@ -4373,9 +4399,9 @@ func (x *KubeletConfiguration) codecDecodeSelfFromArray(l int, d *codec1978.Deco } z.DecSendContainerState(codecSelfer_containerArrayElem1234) if r.TryDecodeAsNil() { - x.HairpinMode = false + x.HairpinMode = "" } else { - x.HairpinMode = bool(r.DecodeBool()) + x.HairpinMode = string(r.DecodeString()) } yyj91++ if yyhl91 { diff --git a/pkg/apis/componentconfig/types.go b/pkg/apis/componentconfig/types.go index 5f15c7a401..79def9a1cc 100644 --- a/pkg/apis/componentconfig/types.go +++ b/pkg/apis/componentconfig/types.go @@ -78,6 +78,24 @@ const ( ProxyModeIPTables ProxyMode = "iptables" ) +// HairpinMode denotes how the kubelet should configure networking to handle +// hairpin packets. +type HairpinMode string + +// Enum settings for different ways to handle hairpin packets. +const ( + // Set the hairpin flag on the veth of containers in the respective + // container runtime. + HairpinVeth = "hairpin-veth" + // Make the container bridge promiscuous. This will force it to accept + // hairpin packets, even if the flag isn't set on ports of the bridge. + PromiscuousBridge = "promiscuous-bridge" + // Neither of the above. If the kubelet is started in this hairpin mode + // and kube-proxy is running in iptables mode, hairpin packets will be + // dropped by the container bridge. + HairpinNone = "none" +) + // TODO: curate the ordering and structure of this config object type KubeletConfiguration struct { // config is the path to the config file or directory of files @@ -252,11 +270,16 @@ type KubeletConfiguration struct { // configureCBR0 enables the kublet to configure cbr0 based on // Node.Spec.PodCIDR. ConfigureCBR0 bool `json:"configureCbr0"` - // Should the kubelet set the hairpin flag on veth interfaces for containers - // it creates? Setting this flag allows endpoints in a Service to - // loadbalance back to themselves if they should try to access their own - // Service. - HairpinMode bool `json:"configureHairpinMode"` + // How should the kubelet configure the container bridge for hairpin packets. + // Setting this flag allows endpoints in a Service to loadbalance back to + // themselves if they should try to access their own Service. Values: + // "promiscuous-bridge": make the container bridge promiscuous. + // "hairpin-veth": set the hairpin flag on container veth interfaces. + // "none": do nothing. + // Setting --configure-cbr0 to false implies that to achieve hairpin NAT + // one must set --hairpin-mode=veth-flag, because bridge assumes the + // existence of a container bridge named cbr0. + HairpinMode string `json:"hairpinMode"` // maxPods is the number of pods that can run on this Kubelet. MaxPods int `json:"maxPods"` // dockerExecHandlerName is the handler to use when executing a command diff --git a/pkg/kubelet/container_bridge.go b/pkg/kubelet/container_bridge.go index 6f25f8e886..fb1f17b106 100644 --- a/pkg/kubelet/container_bridge.go +++ b/pkg/kubelet/container_bridge.go @@ -59,7 +59,7 @@ func createCBR0(wantCIDR *net.IPNet) error { return nil } -func ensureCbr0(wantCIDR *net.IPNet) error { +func ensureCbr0(wantCIDR *net.IPNet, promiscuous bool) error { exists, err := cbr0Exists() if err != nil { return err @@ -80,7 +80,20 @@ func ensureCbr0(wantCIDR *net.IPNet) error { glog.Error(err) return err } - return createCBR0(wantCIDR) + if err := createCBR0(wantCIDR); err != nil { + glog.Error(err) + return err + } + } + // Put the container bridge into promiscuous mode to force it to accept hairpin packets. + // TODO: Remove this once the kernel bug (#20096) is fixed. + if promiscuous { + // Checking if the bridge is in promiscuous mode is as expensive and more brittle than + // simply setting the flag everytime. + if err := exec.Command("ip", "link", "set", "cbr0", "promisc", "on").Run(); err != nil { + glog.Error(err) + return err + } } return nil } diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 383d1a1aed..93e4d665eb 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -40,6 +40,7 @@ import ( "k8s.io/kubernetes/pkg/api/resource" "k8s.io/kubernetes/pkg/api/unversioned" "k8s.io/kubernetes/pkg/api/validation" + "k8s.io/kubernetes/pkg/apis/componentconfig" "k8s.io/kubernetes/pkg/client/cache" clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset" "k8s.io/kubernetes/pkg/client/record" @@ -204,7 +205,7 @@ func NewMainKubelet( enableCustomMetrics bool, volumeStatsAggPeriod time.Duration, containerRuntimeOptions []kubecontainer.Option, - hairpinMode bool, + hairpinMode string, ) (*Kubelet, error) { if rootDirectory == "" { return nil, fmt.Errorf("invalid root directory %q", rootDirectory) @@ -326,6 +327,7 @@ func NewMainKubelet( outOfDiskTransitionFrequency: outOfDiskTransitionFrequency, reservation: reservation, enableCustomMetrics: enableCustomMetrics, + hairpinMode: componentconfig.HairpinMode(hairpinMode), } // TODO: Factor out "StatsProvider" from Kubelet so we don't have a cyclic dependency klet.resourceAnalyzer = stats.NewResourceAnalyzer(klet, volumeStatsAggPeriod) @@ -383,7 +385,7 @@ func NewMainKubelet( imageBackOff, serializeImagePulls, enableCustomMetrics, - hairpinMode, + hairpinMode == componentconfig.HairpinVeth, containerRuntimeOptions..., ) case "rkt": @@ -683,6 +685,11 @@ type Kubelet struct { // support gathering custom metrics. enableCustomMetrics bool + + // How the Kubelet should setup hairpin NAT. Can take the values: "promiscuous-bridge" + // (make cbr0 promiscuous), "hairpin-veth" (set the hairpin flag on veth interfaces) + // or "none" (do nothing). + hairpinMode componentconfig.HairpinMode } // Validate given node IP belongs to the current host @@ -2633,7 +2640,7 @@ func (kl *Kubelet) reconcileCBR0(podCIDR string) error { } // Set cbr0 interface address to first address in IPNet cidr.IP.To4()[3] += 1 - if err := ensureCbr0(cidr); err != nil { + if err := ensureCbr0(cidr, kl.hairpinMode == componentconfig.PromiscuousBridge); err != nil { return err } if kl.shaper == nil {