diff --git a/cmd/kube-proxy/app/server.go b/cmd/kube-proxy/app/server.go index 044c3268d2..2b66fbea95 100644 --- a/cmd/kube-proxy/app/server.go +++ b/cmd/kube-proxy/app/server.go @@ -35,8 +35,6 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/serializer" "k8s.io/apimachinery/pkg/runtime/serializer/json" - "k8s.io/apimachinery/pkg/types" - utilnet "k8s.io/apimachinery/pkg/util/net" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/apiserver/pkg/server/healthz" @@ -60,17 +58,13 @@ import ( "k8s.io/kubernetes/pkg/proxy/iptables" "k8s.io/kubernetes/pkg/proxy/ipvs" "k8s.io/kubernetes/pkg/proxy/userspace" - "k8s.io/kubernetes/pkg/proxy/winuserspace" "k8s.io/kubernetes/pkg/util/configz" - utildbus "k8s.io/kubernetes/pkg/util/dbus" utiliptables "k8s.io/kubernetes/pkg/util/iptables" utilipvs "k8s.io/kubernetes/pkg/util/ipvs" - utilnetsh "k8s.io/kubernetes/pkg/util/netsh" utilnode "k8s.io/kubernetes/pkg/util/node" "k8s.io/kubernetes/pkg/util/oom" utilpointer "k8s.io/kubernetes/pkg/util/pointer" "k8s.io/kubernetes/pkg/util/resourcecontainer" - utilsysctl "k8s.io/kubernetes/pkg/util/sysctl" "k8s.io/kubernetes/pkg/version/verflag" "k8s.io/utils/exec" @@ -78,19 +72,19 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/spf13/cobra" "github.com/spf13/pflag" - "k8s.io/kubernetes/pkg/features" ) const ( - proxyModeUserspace = "userspace" - proxyModeIPTables = "iptables" - proxyModeIPVS = "ipvs" + proxyModeUserspace = "userspace" + proxyModeIPTables = "iptables" + proxyModeIPVS = "ipvs" + proxyModeKernelspace = "kernelspace" ) // checkKnownProxyMode returns true if proxyMode is valid. func checkKnownProxyMode(proxyMode string) bool { switch proxyMode { - case "", proxyModeUserspace, proxyModeIPTables, proxyModeIPVS: + case "", proxyModeUserspace, proxyModeIPTables, proxyModeIPVS, proxyModeKernelspace: return true } return false @@ -142,7 +136,7 @@ func AddFlags(options *Options, fs *pflag.FlagSet) { fs.StringVar(&options.config.ClientConnection.KubeConfigFile, "kubeconfig", options.config.ClientConnection.KubeConfigFile, "Path to kubeconfig file with authorization information (the master location is set by the master flag).") fs.Var(componentconfig.PortRangeVar{Val: &options.config.PortRange}, "proxy-port-range", "Range of host ports (beginPort-endPort, inclusive) that may be consumed in order to proxy service traffic. If unspecified (0-0) then ports will be randomly chosen.") fs.StringVar(&options.config.HostnameOverride, "hostname-override", options.config.HostnameOverride, "If non-empty, will use this string as identification instead of the actual hostname.") - fs.Var(&options.config.Mode, "proxy-mode", "Which proxy mode to use: 'userspace' (older) or 'iptables' (faster) or 'ipvs'(experimental). If blank, use the best-available proxy (currently iptables). If the iptables proxy is selected, regardless of how, but the system's kernel or iptables versions are insufficient, this always falls back to the userspace proxy.") + fs.Var(&options.config.Mode, "proxy-mode", "Which proxy mode to use: 'userspace' (older) or 'iptables' (faster) or 'ipvs'(experimental)'. If blank, use the best-available proxy (currently iptables). If the iptables proxy is selected, regardless of how, but the system's kernel or iptables versions are insufficient, this always falls back to the userspace proxy.") fs.Int32Var(options.config.IPTables.MasqueradeBit, "iptables-masquerade-bit", utilpointer.Int32PtrDerefOr(options.config.IPTables.MasqueradeBit, 14), "If using the pure iptables proxy, the bit of the fwmark space to mark packets requiring SNAT with. Must be within the range [0, 31].") fs.DurationVar(&options.config.IPTables.SyncPeriod.Duration, "iptables-sync-period", options.config.IPTables.SyncPeriod.Duration, "The maximum interval of how often iptables rules are refreshed (e.g. '5s', '1m', '2h22m'). Must be greater than 0.") fs.DurationVar(&options.config.IPTables.MinSyncPeriod.Duration, "iptables-min-sync-period", options.config.IPTables.MinSyncPeriod.Duration, "The minimum interval of how often the iptables rules can be refreshed as endpoints and services change (e.g. '5s', '1m', '2h22m').") @@ -425,229 +419,6 @@ func createClients(config componentconfig.ClientConnectionConfiguration, masterO return client, eventClient.CoreV1(), nil } -// NewProxyServer returns a new ProxyServer. -func NewProxyServer(config *componentconfig.KubeProxyConfiguration, cleanupAndExit bool, scheme *runtime.Scheme, master string) (*ProxyServer, error) { - if config == nil { - return nil, errors.New("config is required") - } - - if c, err := configz.New("componentconfig"); err == nil { - c.Set(config) - } else { - return nil, fmt.Errorf("unable to register configz: %s", err) - } - - protocol := utiliptables.ProtocolIpv4 - if net.ParseIP(config.BindAddress).To4() == nil { - protocol = utiliptables.ProtocolIpv6 - } - - var netshInterface utilnetsh.Interface - var iptInterface utiliptables.Interface - var ipvsInterface utilipvs.Interface - var dbus utildbus.Interface - - // Create a iptables utils. - execer := exec.New() - - if goruntime.GOOS == "windows" { - netshInterface = utilnetsh.New(execer) - } else { - dbus = utildbus.New() - iptInterface = utiliptables.New(execer, dbus, protocol) - ipvsInterface = utilipvs.New(execer) - } - - // We omit creation of pretty much everything if we run in cleanup mode - if cleanupAndExit { - return &ProxyServer{IptInterface: iptInterface, IpvsInterface: ipvsInterface, CleanupAndExit: cleanupAndExit}, nil - } - - client, eventClient, err := createClients(config.ClientConnection, master) - if err != nil { - return nil, err - } - - // Create event recorder - hostname := utilnode.GetHostname(config.HostnameOverride) - eventBroadcaster := record.NewBroadcaster() - recorder := eventBroadcaster.NewRecorder(scheme, v1.EventSource{Component: "kube-proxy", Host: hostname}) - - nodeRef := &v1.ObjectReference{ - Kind: "Node", - Name: hostname, - UID: types.UID(hostname), - Namespace: "", - } - - var healthzServer *healthcheck.HealthzServer - var healthzUpdater healthcheck.HealthzUpdater - if len(config.HealthzBindAddress) > 0 { - healthzServer = healthcheck.NewDefaultHealthzServer(config.HealthzBindAddress, 2*config.IPTables.SyncPeriod.Duration, recorder, nodeRef) - healthzUpdater = healthzServer - } - - var proxier proxy.ProxyProvider - var serviceEventHandler proxyconfig.ServiceHandler - var endpointsEventHandler proxyconfig.EndpointsHandler - - proxyMode := getProxyMode(string(config.Mode), iptInterface, iptables.LinuxKernelCompatTester{}) - if proxyMode == proxyModeIPTables { - glog.V(0).Info("Using iptables Proxier.") - var nodeIP net.IP - if config.BindAddress != "0.0.0.0" { - nodeIP = net.ParseIP(config.BindAddress) - } else { - nodeIP = getNodeIP(client, hostname) - } - if config.IPTables.MasqueradeBit == nil { - // MasqueradeBit must be specified or defaulted. - return nil, fmt.Errorf("unable to read IPTables MasqueradeBit from config") - } - - // TODO this has side effects that should only happen when Run() is invoked. - proxierIPTables, err := iptables.NewProxier( - iptInterface, - utilsysctl.New(), - execer, - config.IPTables.SyncPeriod.Duration, - config.IPTables.MinSyncPeriod.Duration, - config.IPTables.MasqueradeAll, - int(*config.IPTables.MasqueradeBit), - config.ClusterCIDR, - hostname, - nodeIP, - recorder, - healthzUpdater, - ) - if err != nil { - return nil, fmt.Errorf("unable to create proxier: %v", err) - } - iptables.RegisterMetrics() - proxier = proxierIPTables - serviceEventHandler = proxierIPTables - endpointsEventHandler = proxierIPTables - // No turning back. Remove artifacts that might still exist from the userspace Proxier. - glog.V(0).Info("Tearing down inactive rules.") - // TODO this has side effects that should only happen when Run() is invoked. - userspace.CleanupLeftovers(iptInterface) - // IPVS Proxier will generate some iptables rules, - // need to clean them before switching to other proxy mode. - ipvs.CleanupLeftovers(execer, ipvsInterface, iptInterface) - } else if proxyMode == proxyModeIPVS { - glog.V(0).Info("Using ipvs Proxier.") - proxierIPVS, err := ipvs.NewProxier( - iptInterface, - ipvsInterface, - utilsysctl.New(), - execer, - config.IPVS.SyncPeriod.Duration, - config.IPVS.MinSyncPeriod.Duration, - config.IPTables.MasqueradeAll, - int(*config.IPTables.MasqueradeBit), - config.ClusterCIDR, - hostname, - getNodeIP(client, hostname), - recorder, - healthzServer, - config.IPVS.Scheduler, - ) - if err != nil { - return nil, fmt.Errorf("unable to create proxier: %v", err) - } - proxier = proxierIPVS - serviceEventHandler = proxierIPVS - endpointsEventHandler = proxierIPVS - glog.V(0).Info("Tearing down inactive rules.") - // TODO this has side effects that should only happen when Run() is invoked. - userspace.CleanupLeftovers(iptInterface) - iptables.CleanupLeftovers(iptInterface) - } else { - glog.V(0).Info("Using userspace Proxier.") - if goruntime.GOOS == "windows" { - // This is a proxy.LoadBalancer which NewProxier needs but has methods we don't need for - // our config.EndpointsConfigHandler. - loadBalancer := winuserspace.NewLoadBalancerRR() - // set EndpointsHandler to our loadBalancer - endpointsEventHandler = loadBalancer - proxierUserspace, err := winuserspace.NewProxier( - loadBalancer, - net.ParseIP(config.BindAddress), - netshInterface, - *utilnet.ParsePortRangeOrDie(config.PortRange), - // TODO @pires replace below with default values, if applicable - config.IPTables.SyncPeriod.Duration, - config.UDPIdleTimeout.Duration, - ) - if err != nil { - return nil, fmt.Errorf("unable to create proxier: %v", err) - } - serviceEventHandler = proxierUserspace - proxier = proxierUserspace - } else { - // This is a proxy.LoadBalancer which NewProxier needs but has methods we don't need for - // our config.EndpointsConfigHandler. - loadBalancer := userspace.NewLoadBalancerRR() - // set EndpointsConfigHandler to our loadBalancer - endpointsEventHandler = loadBalancer - - // TODO this has side effects that should only happen when Run() is invoked. - proxierUserspace, err := userspace.NewProxier( - loadBalancer, - net.ParseIP(config.BindAddress), - iptInterface, - execer, - *utilnet.ParsePortRangeOrDie(config.PortRange), - config.IPTables.SyncPeriod.Duration, - config.IPTables.MinSyncPeriod.Duration, - config.UDPIdleTimeout.Duration, - ) - if err != nil { - return nil, fmt.Errorf("unable to create proxier: %v", err) - } - serviceEventHandler = proxierUserspace - proxier = proxierUserspace - } - // Remove artifacts from the iptables and ipvs Proxier, if not on Windows. - if goruntime.GOOS != "windows" { - glog.V(0).Info("Tearing down inactive rules.") - // TODO this has side effects that should only happen when Run() is invoked. - iptables.CleanupLeftovers(iptInterface) - // IPVS Proxier will generate some iptables rules, - // need to clean them before switching to other proxy mode. - ipvs.CleanupLeftovers(execer, ipvsInterface, iptInterface) - } - } - - // Add iptables reload function, if not on Windows. - if goruntime.GOOS != "windows" { - iptInterface.AddReloadFunc(proxier.Sync) - } - - return &ProxyServer{ - Client: client, - EventClient: eventClient, - IptInterface: iptInterface, - IpvsInterface: ipvsInterface, - execer: execer, - Proxier: proxier, - Broadcaster: eventBroadcaster, - Recorder: recorder, - ConntrackConfiguration: config.Conntrack, - Conntracker: &realConntracker{}, - ProxyMode: proxyMode, - NodeRef: nodeRef, - MetricsBindAddress: config.MetricsBindAddress, - EnableProfiling: config.EnableProfiling, - OOMScoreAdj: config.OOMScoreAdj, - ResourceContainer: config.ResourceContainer, - ConfigSyncPeriod: config.ConfigSyncPeriod.Duration, - ServiceEventHandler: serviceEventHandler, - EndpointsEventHandler: endpointsEventHandler, - HealthzServer: healthzServer, - }, nil -} - // Run runs the specified ProxyServer. This should never exit (unless CleanupAndExit is set). func (s *ProxyServer) Run() error { // remove iptables rules and exit @@ -712,7 +483,8 @@ func (s *ProxyServer) Run() error { } // Tune conntrack, if requested - if s.Conntracker != nil && goruntime.GOOS != "windows" { + // Conntracker is always nil for windows + if s.Conntracker != nil { max, err := getConntrackMax(s.ConntrackConfiguration) if err != nil { return err @@ -776,6 +548,10 @@ func (s *ProxyServer) Run() error { return nil } +func (s *ProxyServer) birthCry() { + s.Recorder.Eventf(s.NodeRef, api.EventTypeNormal, "Starting", "Starting kube-proxy.") +} + func getConntrackMax(config componentconfig.KubeProxyConntrackConfiguration) (int, error) { if config.Max > 0 { if config.MaxPerCore > 0 { @@ -797,65 +573,6 @@ func getConntrackMax(config componentconfig.KubeProxyConntrackConfiguration) (in return 0, nil } -func getProxyMode(proxyMode string, iptver iptables.IPTablesVersioner, kcompat iptables.KernelCompatTester) string { - if proxyMode == proxyModeUserspace { - return proxyModeUserspace - } - - if len(proxyMode) > 0 && proxyMode == proxyModeIPTables { - return tryIPTablesProxy(iptver, kcompat) - } - - if utilfeature.DefaultFeatureGate.Enabled(features.SupportIPVSProxyMode) { - if proxyMode == proxyModeIPVS { - return tryIPVSProxy(iptver, kcompat) - } else { - glog.Warningf("Can't use ipvs proxier, trying iptables proxier") - return tryIPTablesProxy(iptver, kcompat) - } - } - glog.Warningf("Flag proxy-mode=%q unknown, assuming iptables proxy", proxyMode) - return tryIPTablesProxy(iptver, kcompat) -} - -func tryIPVSProxy(iptver iptables.IPTablesVersioner, kcompat iptables.KernelCompatTester) string { - // guaranteed false on error, error only necessary for debugging - // IPVS Proxier relies on iptables - useIPVSProxy, err := ipvs.CanUseIPVSProxier() - if err != nil { - utilruntime.HandleError(fmt.Errorf("can't determine whether to use ipvs proxy, using userspace proxier: %v", err)) - return proxyModeUserspace - } - if useIPVSProxy { - return proxyModeIPVS - } - - // TODO: Check ipvs version - - // Try to fallback to iptables before falling back to userspace - glog.V(1).Infof("Can't use ipvs proxier, trying iptables proxier") - return tryIPTablesProxy(iptver, kcompat) -} - -func tryIPTablesProxy(iptver iptables.IPTablesVersioner, kcompat iptables.KernelCompatTester) string { - // guaranteed false on error, error only necessary for debugging - useIPTablesProxy, err := iptables.CanUseIPTablesProxier(iptver, kcompat) - if err != nil { - utilruntime.HandleError(fmt.Errorf("can't determine whether to use iptables proxy, using userspace proxier: %v", err)) - return proxyModeUserspace - } - if useIPTablesProxy { - return proxyModeIPTables - } - // Fallback. - glog.V(1).Infof("Can't use iptables proxy, using userspace proxier") - return proxyModeUserspace -} - -func (s *ProxyServer) birthCry() { - s.Recorder.Eventf(s.NodeRef, api.EventTypeNormal, "Starting", "Starting kube-proxy.") -} - func getNodeIP(client clientset.Interface, hostname string) net.IP { var nodeIP net.IP node, err := client.Core().Nodes().Get(hostname, metav1.GetOptions{}) diff --git a/cmd/kube-proxy/app/server_linux.go b/cmd/kube-proxy/app/server_linux.go new file mode 100644 index 0000000000..c92e2f1fee --- /dev/null +++ b/cmd/kube-proxy/app/server_linux.go @@ -0,0 +1,298 @@ +// +build !windows + +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package app does all of the work necessary to configure and run a +// Kubernetes app process. +package app + +import ( + "errors" + "fmt" + "net" + + "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + utilnet "k8s.io/apimachinery/pkg/util/net" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + utilfeature "k8s.io/apiserver/pkg/util/feature" + "k8s.io/client-go/tools/record" + "k8s.io/kubernetes/pkg/apis/componentconfig" + "k8s.io/kubernetes/pkg/features" + "k8s.io/kubernetes/pkg/proxy" + proxyconfig "k8s.io/kubernetes/pkg/proxy/config" + "k8s.io/kubernetes/pkg/proxy/healthcheck" + "k8s.io/kubernetes/pkg/proxy/iptables" + "k8s.io/kubernetes/pkg/proxy/ipvs" + "k8s.io/kubernetes/pkg/proxy/userspace" + "k8s.io/kubernetes/pkg/util/configz" + utildbus "k8s.io/kubernetes/pkg/util/dbus" + utiliptables "k8s.io/kubernetes/pkg/util/iptables" + utilipvs "k8s.io/kubernetes/pkg/util/ipvs" + utilnode "k8s.io/kubernetes/pkg/util/node" + utilsysctl "k8s.io/kubernetes/pkg/util/sysctl" + "k8s.io/utils/exec" + + "github.com/golang/glog" +) + +func NewProxyServer(config *componentconfig.KubeProxyConfiguration, cleanupAndExit bool, scheme *runtime.Scheme, master string) (*ProxyServer, error) { + if config == nil { + return nil, errors.New("config is required") + } + + if c, err := configz.New("componentconfig"); err == nil { + c.Set(config) + } else { + return nil, fmt.Errorf("unable to register configz: %s", err) + } + + protocol := utiliptables.ProtocolIpv4 + if net.ParseIP(config.BindAddress).To4() == nil { + protocol = utiliptables.ProtocolIpv6 + } + + var iptInterface utiliptables.Interface + var ipvsInterface utilipvs.Interface + var dbus utildbus.Interface + + // Create a iptables utils. + execer := exec.New() + + dbus = utildbus.New() + iptInterface = utiliptables.New(execer, dbus, protocol) + ipvsInterface = utilipvs.New(execer) + + // We omit creation of pretty much everything if we run in cleanup mode + if cleanupAndExit { + return &ProxyServer{IptInterface: iptInterface, IpvsInterface: ipvsInterface, CleanupAndExit: cleanupAndExit}, nil + } + + client, eventClient, err := createClients(config.ClientConnection, master) + if err != nil { + return nil, err + } + + // Create event recorder + hostname := utilnode.GetHostname(config.HostnameOverride) + eventBroadcaster := record.NewBroadcaster() + recorder := eventBroadcaster.NewRecorder(scheme, v1.EventSource{Component: "kube-proxy", Host: hostname}) + + nodeRef := &v1.ObjectReference{ + Kind: "Node", + Name: hostname, + UID: types.UID(hostname), + Namespace: "", + } + + var healthzServer *healthcheck.HealthzServer + var healthzUpdater healthcheck.HealthzUpdater + if len(config.HealthzBindAddress) > 0 { + healthzServer = healthcheck.NewDefaultHealthzServer(config.HealthzBindAddress, 2*config.IPTables.SyncPeriod.Duration, recorder, nodeRef) + healthzUpdater = healthzServer + } + + var proxier proxy.ProxyProvider + var serviceEventHandler proxyconfig.ServiceHandler + var endpointsEventHandler proxyconfig.EndpointsHandler + + proxyMode := getProxyMode(string(config.Mode), iptInterface, iptables.LinuxKernelCompatTester{}) + if proxyMode == proxyModeIPTables { + glog.V(0).Info("Using iptables Proxier.") + var nodeIP net.IP + if config.BindAddress != "0.0.0.0" { + nodeIP = net.ParseIP(config.BindAddress) + } else { + nodeIP = getNodeIP(client, hostname) + } + if config.IPTables.MasqueradeBit == nil { + // MasqueradeBit must be specified or defaulted. + return nil, fmt.Errorf("unable to read IPTables MasqueradeBit from config") + } + + // TODO this has side effects that should only happen when Run() is invoked. + proxierIPTables, err := iptables.NewProxier( + iptInterface, + utilsysctl.New(), + execer, + config.IPTables.SyncPeriod.Duration, + config.IPTables.MinSyncPeriod.Duration, + config.IPTables.MasqueradeAll, + int(*config.IPTables.MasqueradeBit), + config.ClusterCIDR, + hostname, + nodeIP, + recorder, + healthzUpdater, + ) + if err != nil { + return nil, fmt.Errorf("unable to create proxier: %v", err) + } + iptables.RegisterMetrics() + proxier = proxierIPTables + serviceEventHandler = proxierIPTables + endpointsEventHandler = proxierIPTables + // No turning back. Remove artifacts that might still exist from the userspace Proxier. + glog.V(0).Info("Tearing down inactive rules.") + // TODO this has side effects that should only happen when Run() is invoked. + userspace.CleanupLeftovers(iptInterface) + // IPVS Proxier will generate some iptables rules, + // need to clean them before switching to other proxy mode. + ipvs.CleanupLeftovers(execer, ipvsInterface, iptInterface) + } else if proxyMode == proxyModeIPVS { + glog.V(0).Info("Using ipvs Proxier.") + proxierIPVS, err := ipvs.NewProxier( + iptInterface, + ipvsInterface, + utilsysctl.New(), + execer, + config.IPVS.SyncPeriod.Duration, + config.IPVS.MinSyncPeriod.Duration, + config.IPTables.MasqueradeAll, + int(*config.IPTables.MasqueradeBit), + config.ClusterCIDR, + hostname, + getNodeIP(client, hostname), + recorder, + healthzServer, + config.IPVS.Scheduler, + ) + if err != nil { + return nil, fmt.Errorf("unable to create proxier: %v", err) + } + proxier = proxierIPVS + serviceEventHandler = proxierIPVS + endpointsEventHandler = proxierIPVS + glog.V(0).Info("Tearing down inactive rules.") + // TODO this has side effects that should only happen when Run() is invoked. + userspace.CleanupLeftovers(iptInterface) + iptables.CleanupLeftovers(iptInterface) + } else { + glog.V(0).Info("Using userspace Proxier.") + // This is a proxy.LoadBalancer which NewProxier needs but has methods we don't need for + // our config.EndpointsConfigHandler. + loadBalancer := userspace.NewLoadBalancerRR() + // set EndpointsConfigHandler to our loadBalancer + endpointsEventHandler = loadBalancer + + // TODO this has side effects that should only happen when Run() is invoked. + proxierUserspace, err := userspace.NewProxier( + loadBalancer, + net.ParseIP(config.BindAddress), + iptInterface, + execer, + *utilnet.ParsePortRangeOrDie(config.PortRange), + config.IPTables.SyncPeriod.Duration, + config.IPTables.MinSyncPeriod.Duration, + config.UDPIdleTimeout.Duration, + ) + if err != nil { + return nil, fmt.Errorf("unable to create proxier: %v", err) + } + serviceEventHandler = proxierUserspace + proxier = proxierUserspace + + // Remove artifacts from the iptables and ipvs Proxier, if not on Windows. + glog.V(0).Info("Tearing down inactive rules.") + // TODO this has side effects that should only happen when Run() is invoked. + iptables.CleanupLeftovers(iptInterface) + // IPVS Proxier will generate some iptables rules, + // need to clean them before switching to other proxy mode. + ipvs.CleanupLeftovers(execer, ipvsInterface, iptInterface) + } + + iptInterface.AddReloadFunc(proxier.Sync) + + return &ProxyServer{ + Client: client, + EventClient: eventClient, + IptInterface: iptInterface, + IpvsInterface: ipvsInterface, + execer: execer, + Proxier: proxier, + Broadcaster: eventBroadcaster, + Recorder: recorder, + ConntrackConfiguration: config.Conntrack, + Conntracker: &realConntracker{}, + ProxyMode: proxyMode, + NodeRef: nodeRef, + MetricsBindAddress: config.MetricsBindAddress, + EnableProfiling: config.EnableProfiling, + OOMScoreAdj: config.OOMScoreAdj, + ResourceContainer: config.ResourceContainer, + ConfigSyncPeriod: config.ConfigSyncPeriod.Duration, + ServiceEventHandler: serviceEventHandler, + EndpointsEventHandler: endpointsEventHandler, + HealthzServer: healthzServer, + }, nil +} + +func getProxyMode(proxyMode string, iptver iptables.IPTablesVersioner, kcompat iptables.KernelCompatTester) string { + if proxyMode == proxyModeUserspace { + return proxyModeUserspace + } + + if len(proxyMode) > 0 && proxyMode == proxyModeIPTables { + return tryIPTablesProxy(iptver, kcompat) + } + + if utilfeature.DefaultFeatureGate.Enabled(features.SupportIPVSProxyMode) { + if proxyMode == proxyModeIPVS { + return tryIPVSProxy(iptver, kcompat) + } else { + glog.Warningf("Can't use ipvs proxier, trying iptables proxier") + return tryIPTablesProxy(iptver, kcompat) + } + } + glog.Warningf("Flag proxy-mode=%q unknown, assuming iptables proxy", proxyMode) + return tryIPTablesProxy(iptver, kcompat) +} + +func tryIPVSProxy(iptver iptables.IPTablesVersioner, kcompat iptables.KernelCompatTester) string { + // guaranteed false on error, error only necessary for debugging + // IPVS Proxier relies on iptables + useIPVSProxy, err := ipvs.CanUseIPVSProxier() + if err != nil { + utilruntime.HandleError(fmt.Errorf("can't determine whether to use ipvs proxy, using userspace proxier: %v", err)) + return proxyModeUserspace + } + if useIPVSProxy { + return proxyModeIPVS + } + + // TODO: Check ipvs version + + // Try to fallback to iptables before falling back to userspace + glog.V(1).Infof("Can't use ipvs proxier, trying iptables proxier") + return tryIPTablesProxy(iptver, kcompat) +} + +func tryIPTablesProxy(iptver iptables.IPTablesVersioner, kcompat iptables.KernelCompatTester) string { + // guaranteed false on error, error only necessary for debugging + useIPTablesProxy, err := iptables.CanUseIPTablesProxier(iptver, kcompat) + if err != nil { + utilruntime.HandleError(fmt.Errorf("can't determine whether to use iptables proxy, using userspace proxier: %v", err)) + return proxyModeUserspace + } + if useIPTablesProxy { + return proxyModeIPTables + } + // Fallback. + glog.V(1).Infof("Can't use iptables proxy, using userspace proxier") + return proxyModeUserspace +} diff --git a/cmd/kube-proxy/app/server_windows.go b/cmd/kube-proxy/app/server_windows.go new file mode 100644 index 0000000000..f4f46386a7 --- /dev/null +++ b/cmd/kube-proxy/app/server_windows.go @@ -0,0 +1,187 @@ +// +build windows + +/* +Copyright 2014 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package app does all of the work necessary to configure and run a +// Kubernetes app process. +package app + +import ( + "errors" + "fmt" + "net" + _ "net/http/pprof" + + "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + utilnet "k8s.io/apimachinery/pkg/util/net" + "k8s.io/client-go/tools/record" + "k8s.io/kubernetes/pkg/apis/componentconfig" + "k8s.io/kubernetes/pkg/proxy" + proxyconfig "k8s.io/kubernetes/pkg/proxy/config" + "k8s.io/kubernetes/pkg/proxy/healthcheck" + "k8s.io/kubernetes/pkg/proxy/winkernel" + "k8s.io/kubernetes/pkg/proxy/winuserspace" + "k8s.io/kubernetes/pkg/util/configz" + utilnetsh "k8s.io/kubernetes/pkg/util/netsh" + utilnode "k8s.io/kubernetes/pkg/util/node" + "k8s.io/utils/exec" + + "github.com/golang/glog" +) + +// NewProxyServer returns a new ProxyServer. +func NewProxyServer(config *componentconfig.KubeProxyConfiguration, cleanupAndExit bool, scheme *runtime.Scheme, master string) (*ProxyServer, error) { + if config == nil { + return nil, errors.New("config is required") + } + + if c, err := configz.New("componentconfig"); err == nil { + c.Set(config) + } else { + return nil, fmt.Errorf("unable to register configz: %s", err) + } + + // We omit creation of pretty much everything if we run in cleanup mode + if cleanupAndExit { + return &ProxyServer{CleanupAndExit: cleanupAndExit}, nil + } + + client, eventClient, err := createClients(config.ClientConnection, master) + if err != nil { + return nil, err + } + + // Create event recorder + hostname := utilnode.GetHostname(config.HostnameOverride) + eventBroadcaster := record.NewBroadcaster() + recorder := eventBroadcaster.NewRecorder(scheme, v1.EventSource{Component: "kube-proxy", Host: hostname}) + + nodeRef := &v1.ObjectReference{ + Kind: "Node", + Name: hostname, + UID: types.UID(hostname), + Namespace: "", + } + + var healthzServer *healthcheck.HealthzServer + var healthzUpdater healthcheck.HealthzUpdater + if len(config.HealthzBindAddress) > 0 { + healthzServer = healthcheck.NewDefaultHealthzServer(config.HealthzBindAddress, 2*config.IPTables.SyncPeriod.Duration, recorder, nodeRef) + healthzUpdater = healthzServer + } + + var proxier proxy.ProxyProvider + var serviceEventHandler proxyconfig.ServiceHandler + var endpointsEventHandler proxyconfig.EndpointsHandler + + proxyMode := getProxyMode(string(config.Mode), winkernel.WindowsKernelCompatTester{}) + if proxyMode == proxyModeKernelspace { + glog.V(0).Info("Using Kernelspace Proxier.") + proxierKernelspace, err := winkernel.NewProxier( + config.IPTables.SyncPeriod.Duration, + config.IPTables.MinSyncPeriod.Duration, + config.IPTables.MasqueradeAll, + int(*config.IPTables.MasqueradeBit), + config.ClusterCIDR, + hostname, + getNodeIP(client, hostname), + recorder, + healthzUpdater, + ) + if err != nil { + return nil, fmt.Errorf("unable to create proxier: %v", err) + } + proxier = proxierKernelspace + endpointsEventHandler = proxierKernelspace + serviceEventHandler = proxierKernelspace + } else { + glog.V(0).Info("Using userspace Proxier.") + execer := exec.New() + var netshInterface utilnetsh.Interface + netshInterface = utilnetsh.New(execer) + + // This is a proxy.LoadBalancer which NewProxier needs but has methods we don't need for + // our config.EndpointsConfigHandler. + loadBalancer := winuserspace.NewLoadBalancerRR() + + // set EndpointsConfigHandler to our loadBalancer + endpointsEventHandler = loadBalancer + proxierUserspace, err := winuserspace.NewProxier( + loadBalancer, + net.ParseIP(config.BindAddress), + netshInterface, + *utilnet.ParsePortRangeOrDie(config.PortRange), + // TODO @pires replace below with default values, if applicable + config.IPTables.SyncPeriod.Duration, + config.UDPIdleTimeout.Duration, + ) + if err != nil { + return nil, fmt.Errorf("unable to create proxier: %v", err) + } + proxier = proxierUserspace + serviceEventHandler = proxierUserspace + glog.V(0).Info("Tearing down pure-winkernel proxy rules.") + winkernel.CleanupLeftovers() + } + + return &ProxyServer{ + Client: client, + EventClient: eventClient, + Proxier: proxier, + Broadcaster: eventBroadcaster, + Recorder: recorder, + ProxyMode: proxyMode, + NodeRef: nodeRef, + MetricsBindAddress: config.MetricsBindAddress, + EnableProfiling: config.EnableProfiling, + OOMScoreAdj: config.OOMScoreAdj, + ResourceContainer: config.ResourceContainer, + ConfigSyncPeriod: config.ConfigSyncPeriod.Duration, + ServiceEventHandler: serviceEventHandler, + EndpointsEventHandler: endpointsEventHandler, + HealthzServer: healthzServer, + }, nil +} + +func getProxyMode(proxyMode string, kcompat winkernel.KernelCompatTester) string { + if proxyMode == proxyModeUserspace { + return proxyModeUserspace + } else if proxyMode == proxyModeKernelspace { + return tryWinKernelSpaceProxy(kcompat) + } + return proxyModeUserspace +} + +func tryWinKernelSpaceProxy(kcompat winkernel.KernelCompatTester) string { + // Check for Windows Kernel Version if we can support Kernel Space proxy + // Check for Windows Version + + // guaranteed false on error, error only necessary for debugging + useWinKerelProxy, err := winkernel.CanUseWinKernelProxier(kcompat) + if err != nil { + glog.Errorf("Can't determine whether to use windows kernel proxy, using userspace proxier: %v", err) + return proxyModeUserspace + } + if useWinKerelProxy { + return proxyModeKernelspace + } + // Fallback. + glog.V(1).Infof("Can't use winkernel proxy, using userspace proxier") + return proxyModeUserspace +} diff --git a/pkg/proxy/winkernel/metrics.go b/pkg/proxy/winkernel/metrics.go new file mode 100644 index 0000000000..100b6abba8 --- /dev/null +++ b/pkg/proxy/winkernel/metrics.go @@ -0,0 +1,50 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package winkernel + +import ( + "sync" + "time" + + "github.com/prometheus/client_golang/prometheus" +) + +const kubeProxySubsystem = "kubeproxy" + +var ( + SyncProxyRulesLatency = prometheus.NewHistogram( + prometheus.HistogramOpts{ + Subsystem: kubeProxySubsystem, + Name: "sync_proxy_rules_latency_microseconds", + Help: "SyncProxyRules latency", + Buckets: prometheus.ExponentialBuckets(1000, 2, 15), + }, + ) +) + +var registerMetricsOnce sync.Once + +func RegisterMetrics() { + registerMetricsOnce.Do(func() { + prometheus.MustRegister(SyncProxyRulesLatency) + }) +} + +// Gets the time since the specified start in microseconds. +func sinceInMicroseconds(start time.Time) float64 { + return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds()) +} diff --git a/pkg/proxy/winkernel/proxier.go b/pkg/proxy/winkernel/proxier.go new file mode 100644 index 0000000000..d440dc410b --- /dev/null +++ b/pkg/proxy/winkernel/proxier.go @@ -0,0 +1,1129 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package winkernel + +import ( + "encoding/json" + "fmt" + "net" + "os" + "reflect" + "sync" + "sync/atomic" + "time" + + "github.com/Microsoft/hcsshim" + "github.com/davecgh/go-spew/spew" + "github.com/golang/glog" + + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/apimachinery/pkg/util/wait" + utilfeature "k8s.io/apiserver/pkg/util/feature" + "k8s.io/client-go/tools/record" + "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/api/helper" + apiservice "k8s.io/kubernetes/pkg/api/service" + "k8s.io/kubernetes/pkg/features" + "k8s.io/kubernetes/pkg/proxy" + "k8s.io/kubernetes/pkg/proxy/healthcheck" + "k8s.io/kubernetes/pkg/util/async" +) + +// KernelCompatTester tests whether the required kernel capabilities are +// present to run the windows kernel proxier. +type KernelCompatTester interface { + IsCompatible() error +} + +// CanUseWinKernelProxier returns true if we should use the Kernel Proxier +// instead of the "classic" userspace Proxier. This is determined by checking +// the windows kernel version and for the existence of kernel features. +func CanUseWinKernelProxier(kcompat KernelCompatTester) (bool, error) { + // Check that the kernel supports what we need. + if err := kcompat.IsCompatible(); err != nil { + return false, err + } + return true, nil +} + +type WindowsKernelCompatTester struct{} + +// TODO : Fix the below API to query the OS version +func (lkct WindowsKernelCompatTester) IsCompatible() error { + return nil +} + +type externalIPInfo struct { + ip string + hnsID string +} + +type loadBalancerIngressInfo struct { + ip string + hnsID string +} + +// internal struct for string service information +type serviceInfo struct { + clusterIP net.IP + port int + protocol api.Protocol + nodePort int + targetPort int + loadBalancerStatus api.LoadBalancerStatus + sessionAffinityType api.ServiceAffinity + stickyMaxAgeMinutes int + externalIPs []*externalIPInfo + loadBalancerIngressIPs []*loadBalancerIngressInfo + loadBalancerSourceRanges []string + onlyNodeLocalEndpoints bool + healthCheckNodePort int + hnsID string + nodePorthnsID string + policyApplied bool +} + +type hnsNetworkInfo struct { + name string + id string +} + +func Log(v interface{}, message string, level glog.Level) { + glog.V(level).Infof("%s, %s", message, spew.Sdump(v)) +} + +func LogJson(v interface{}, message string, level glog.Level) { + jsonString, err := json.Marshal(v) + if err == nil { + glog.V(level).Infof("%s, %s", message, string(jsonString)) + } +} + +// internal struct for endpoints information +type endpointsInfo struct { + ip string + port uint16 + isLocal bool + macAddress string + hnsID string + refCount uint16 +} + +func newEndpointInfo(ip string, port uint16, isLocal bool) *endpointsInfo { + info := &endpointsInfo{ + ip: ip, + port: port, + isLocal: isLocal, + macAddress: "00:11:22:33:44:55", // Hardcoding to some Random Mac + refCount: 0, + hnsID: "", + } + + return info +} + +func (ep *endpointsInfo) Cleanup() { + Log(ep, "Endpoint Cleanup", 3) + ep.refCount-- + // Remove the remote hns endpoint, if no service is referring it + // Never delete a Local Endpoint. Local Endpoints are already created by other entities. + // Remove only remote endpoints created by this service + if ep.refCount <= 0 && !ep.isLocal { + glog.V(4).Infof("Removing endpoints for %v, since no one is referencing it", ep) + deleteHnsEndpoint(ep.hnsID) + ep.hnsID = "" + } + +} + +// returns a new serviceInfo struct +func newServiceInfo(svcPortName proxy.ServicePortName, port *api.ServicePort, service *api.Service) *serviceInfo { + onlyNodeLocalEndpoints := false + if utilfeature.DefaultFeatureGate.Enabled(features.ExternalTrafficLocalOnly) && + apiservice.RequestsOnlyLocalTraffic(service) { + onlyNodeLocalEndpoints = true + } + + info := &serviceInfo{ + clusterIP: net.ParseIP(service.Spec.ClusterIP), + port: int(port.Port), + protocol: port.Protocol, + nodePort: int(port.NodePort), + targetPort: port.TargetPort.IntValue(), + // Deep-copy in case the service instance changes + loadBalancerStatus: *helper.LoadBalancerStatusDeepCopy(&service.Status.LoadBalancer), + sessionAffinityType: service.Spec.SessionAffinity, + stickyMaxAgeMinutes: 180, // TODO: paramaterize this in the API. + loadBalancerSourceRanges: make([]string, len(service.Spec.LoadBalancerSourceRanges)), + onlyNodeLocalEndpoints: onlyNodeLocalEndpoints, + } + + copy(info.loadBalancerSourceRanges, service.Spec.LoadBalancerSourceRanges) + for _, eip := range service.Spec.ExternalIPs { + info.externalIPs = append(info.externalIPs, &externalIPInfo{ip: eip}) + } + for _, ingress := range service.Status.LoadBalancer.Ingress { + info.loadBalancerIngressIPs = append(info.loadBalancerIngressIPs, &loadBalancerIngressInfo{ip: ingress.IP}) + } + + if apiservice.NeedsHealthCheck(service) { + p := service.Spec.HealthCheckNodePort + if p == 0 { + glog.Errorf("Service %q has no healthcheck nodeport", svcPortName.NamespacedName.String()) + } else { + info.healthCheckNodePort = int(p) + } + } + + return info +} + +type endpointsChange struct { + previous proxyEndpointsMap + current proxyEndpointsMap +} + +type endpointsChangeMap struct { + lock sync.Mutex + hostname string + items map[types.NamespacedName]*endpointsChange +} + +type serviceChange struct { + previous proxyServiceMap + current proxyServiceMap +} + +type serviceChangeMap struct { + lock sync.Mutex + items map[types.NamespacedName]*serviceChange +} + +type updateEndpointMapResult struct { + hcEndpoints map[types.NamespacedName]int + staleEndpoints map[endpointServicePair]bool + staleServiceNames map[proxy.ServicePortName]bool +} + +type updateServiceMapResult struct { + hcServices map[types.NamespacedName]uint16 + staleServices sets.String +} +type proxyServiceMap map[proxy.ServicePortName]*serviceInfo +type proxyEndpointsMap map[proxy.ServicePortName][]*endpointsInfo + +func newEndpointsChangeMap(hostname string) endpointsChangeMap { + return endpointsChangeMap{ + hostname: hostname, + items: make(map[types.NamespacedName]*endpointsChange), + } +} + +func (ecm *endpointsChangeMap) update(namespacedName *types.NamespacedName, previous, current *api.Endpoints) bool { + ecm.lock.Lock() + defer ecm.lock.Unlock() + + change, exists := ecm.items[*namespacedName] + if !exists { + change = &endpointsChange{} + change.previous = endpointsToEndpointsMap(previous, ecm.hostname) + ecm.items[*namespacedName] = change + } + change.current = endpointsToEndpointsMap(current, ecm.hostname) + if reflect.DeepEqual(change.previous, change.current) { + delete(ecm.items, *namespacedName) + } + return len(ecm.items) > 0 +} + +func newServiceChangeMap() serviceChangeMap { + return serviceChangeMap{ + items: make(map[types.NamespacedName]*serviceChange), + } +} + +func (scm *serviceChangeMap) update(namespacedName *types.NamespacedName, previous, current *api.Service) bool { + scm.lock.Lock() + defer scm.lock.Unlock() + + change, exists := scm.items[*namespacedName] + if !exists { + // Service is Added + change = &serviceChange{} + change.previous = serviceToServiceMap(previous) + scm.items[*namespacedName] = change + } + change.current = serviceToServiceMap(current) + if reflect.DeepEqual(change.previous, change.current) { + delete(scm.items, *namespacedName) + } + return len(scm.items) > 0 +} + +func (sm *proxyServiceMap) merge(other proxyServiceMap, curEndpoints proxyEndpointsMap) sets.String { + existingPorts := sets.NewString() + for svcPortName, info := range other { + existingPorts.Insert(svcPortName.Port) + svcInfo, exists := (*sm)[svcPortName] + if !exists { + glog.V(1).Infof("Adding new service port %q at %s:%d/%s", svcPortName, info.clusterIP, info.port, info.protocol) + } else { + glog.V(1).Infof("Updating existing service port %q at %s:%d/%s", svcPortName, info.clusterIP, info.port, info.protocol) + svcInfo.cleanupAllPolicies(curEndpoints[svcPortName]) + delete(*sm, svcPortName) + } + (*sm)[svcPortName] = info + } + return existingPorts +} + +func (sm *proxyServiceMap) unmerge(other proxyServiceMap, existingPorts, staleServices sets.String, curEndpoints proxyEndpointsMap) { + for svcPortName := range other { + if existingPorts.Has(svcPortName.Port) { + continue + } + info, exists := (*sm)[svcPortName] + if exists { + glog.V(1).Infof("Removing service port %q", svcPortName) + if info.protocol == api.ProtocolUDP { + staleServices.Insert(info.clusterIP.String()) + } + info.cleanupAllPolicies(curEndpoints[svcPortName]) + delete(*sm, svcPortName) + } else { + glog.Errorf("Service port %q removed, but doesn't exists", svcPortName) + } + } +} + +func (em proxyEndpointsMap) merge(other proxyEndpointsMap, curServices proxyServiceMap) { + // Endpoint Update/Add + for svcPortName := range other { + epInfos, exists := em[svcPortName] + if exists { + // + info, exists := curServices[svcPortName] + glog.V(1).Infof("Updating existing service port %q at %s:%d/%s", svcPortName, info.clusterIP, info.port, info.protocol) + if exists { + glog.V(2).Infof("Endpoints are modified. Service [%v] is stale", svcPortName) + info.cleanupAllPolicies(epInfos) + } else { + // If no service exists, just cleanup the remote endpoints + glog.V(2).Infof("Endpoints are orphaned. Cleaning up") + // Cleanup Endpoints references + for _, ep := range epInfos { + ep.Cleanup() + } + + } + + delete(em, svcPortName) + } + em[svcPortName] = other[svcPortName] + } +} + +func (em proxyEndpointsMap) unmerge(other proxyEndpointsMap, curServices proxyServiceMap) { + // Endpoint Update/Removal + for svcPortName := range other { + info, exists := curServices[svcPortName] + if exists { + glog.V(2).Infof("Service [%v] is stale", info) + info.cleanupAllPolicies(em[svcPortName]) + } else { + // If no service exists, just cleanup the remote endpoints + glog.V(2).Infof("Endpoints are orphaned. Cleaning up") + // Cleanup Endpoints references + epInfos, exists := em[svcPortName] + if exists { + for _, ep := range epInfos { + ep.Cleanup() + } + } + } + + delete(em, svcPortName) + } +} + +// Proxier is an hns based proxy for connections between a localhost:lport +// and services that provide the actual backends. +type Proxier struct { + // endpointsChanges and serviceChanges contains all changes to endpoints and + // services that happened since policies were synced. For a single object, + // changes are accumulated, i.e. previous is state from before all of them, + // current is state after applying all of those. + endpointsChanges endpointsChangeMap + serviceChanges serviceChangeMap + + mu sync.Mutex // protects the following fields + serviceMap proxyServiceMap + endpointsMap proxyEndpointsMap + portsMap map[localPort]closeable + // endpointsSynced and servicesSynced are set to true when corresponding + // objects are synced after startup. This is used to avoid updating hns policies + // with some partial data after kube-proxy restart. + endpointsSynced bool + servicesSynced bool + initialized int32 + syncRunner *async.BoundedFrequencyRunner // governs calls to syncProxyRules + + // These are effectively const and do not need the mutex to be held. + masqueradeAll bool + masqueradeMark string + clusterCIDR string + hostname string + nodeIP net.IP + recorder record.EventRecorder + healthChecker healthcheck.Server + healthzServer healthcheck.HealthzUpdater + + // Since converting probabilities (floats) to strings is expensive + // and we are using only probabilities in the format of 1/n, we are + // precomputing some number of those and cache for future reuse. + precomputedProbabilities []string + + network hnsNetworkInfo +} + +type localPort struct { + desc string + ip string + port int + protocol string +} + +func (lp *localPort) String() string { + return fmt.Sprintf("%q (%s:%d/%s)", lp.desc, lp.ip, lp.port, lp.protocol) +} + +func Enum(p api.Protocol) uint16 { + if p == api.ProtocolTCP { + return 6 + } + if p == api.ProtocolUDP { + return 17 + } + return 0 +} + +type closeable interface { + Close() error +} + +// Proxier implements ProxyProvider +var _ proxy.ProxyProvider = &Proxier{} + +// NewProxier returns a new Proxier +func NewProxier( + syncPeriod time.Duration, + minSyncPeriod time.Duration, + masqueradeAll bool, + masqueradeBit int, + clusterCIDR string, + hostname string, + nodeIP net.IP, + recorder record.EventRecorder, + healthzServer healthcheck.HealthzUpdater, +) (*Proxier, error) { + // check valid user input + if minSyncPeriod > syncPeriod { + return nil, fmt.Errorf("min-sync (%v) must be < sync(%v)", minSyncPeriod, syncPeriod) + } + + // Generate the masquerade mark to use for SNAT rules. + if masqueradeBit < 0 || masqueradeBit > 31 { + return nil, fmt.Errorf("invalid iptables-masquerade-bit %v not in [0, 31]", masqueradeBit) + } + masqueradeValue := 1 << uint(masqueradeBit) + masqueradeMark := fmt.Sprintf("%#08x/%#08x", masqueradeValue, masqueradeValue) + + if nodeIP == nil { + glog.Warningf("invalid nodeIP, initializing kube-proxy with 127.0.0.1 as nodeIP") + nodeIP = net.ParseIP("127.0.0.1") + } + + if len(clusterCIDR) == 0 { + glog.Warningf("clusterCIDR not specified, unable to distinguish between internal and external traffic") + } + + healthChecker := healthcheck.NewServer(hostname, recorder, nil, nil) // use default implementations of deps + + // TODO : Make this a param + hnsNetworkName := os.Getenv("KUBE_NETWORK") + if len(hnsNetworkName) == 0 { + return nil, fmt.Errorf("Environment variable KUBE_NETWORK not initialized") + } + hnsNetwork, err := getHnsNetworkInfo(hnsNetworkName) + if err != nil { + glog.Fatalf("Unable to find Hns Network speficied by %s. Please check environment variable KUBE_NETWORK", hnsNetworkName) + return nil, err + } + + glog.V(1).Infof("Hns Network loaded with info = %v", hnsNetwork) + + proxier := &Proxier{ + portsMap: make(map[localPort]closeable), + serviceMap: make(proxyServiceMap), + serviceChanges: newServiceChangeMap(), + endpointsMap: make(proxyEndpointsMap), + endpointsChanges: newEndpointsChangeMap(hostname), + masqueradeAll: masqueradeAll, + masqueradeMark: masqueradeMark, + clusterCIDR: clusterCIDR, + hostname: hostname, + nodeIP: nodeIP, + recorder: recorder, + healthChecker: healthChecker, + healthzServer: healthzServer, + network: *hnsNetwork, + } + + burstSyncs := 2 + glog.V(3).Infof("minSyncPeriod: %v, syncPeriod: %v, burstSyncs: %d", minSyncPeriod, syncPeriod, burstSyncs) + proxier.syncRunner = async.NewBoundedFrequencyRunner("sync-runner", proxier.syncProxyRules, minSyncPeriod, syncPeriod, burstSyncs) + return proxier, nil + +} + +// CleanupLeftovers removes all hns rules created by the Proxier +// It returns true if an error was encountered. Errors are logged. +func CleanupLeftovers() (encounteredError bool) { + // Delete all Hns Load Balancer Policies + deleteAllHnsLoadBalancerPolicy() + // TODO + // Delete all Hns Remote endpoints + + return encounteredError +} + +func (svcInfo *serviceInfo) cleanupAllPolicies(endpoints []*endpointsInfo) { + Log(svcInfo, "Service Cleanup", 3) + if svcInfo.policyApplied { + svcInfo.deleteAllHnsLoadBalancerPolicy() + // Cleanup Endpoints references + for _, ep := range endpoints { + ep.Cleanup() + } + + svcInfo.policyApplied = false + } +} + +func (svcInfo *serviceInfo) deleteAllHnsLoadBalancerPolicy() { + // Remove the Hns Policy corresponding to this service + deleteHnsLoadBalancerPolicy(svcInfo.hnsID) + svcInfo.hnsID = "" + for _, externalIp := range svcInfo.externalIPs { + deleteHnsLoadBalancerPolicy(externalIp.hnsID) + externalIp.hnsID = "" + } + for _, lbIngressIp := range svcInfo.loadBalancerIngressIPs { + deleteHnsLoadBalancerPolicy(lbIngressIp.hnsID) + lbIngressIp.hnsID = "" + } + +} + +func deleteAllHnsLoadBalancerPolicy() { + plists, err := hcsshim.HNSListPolicyListRequest() + if err != nil { + return + } + for _, plist := range plists { + LogJson(plist, "Remove Policy", 3) + _, err = plist.Delete() + if err != nil { + glog.Errorf("%v", err) + } + } + +} + +// getHnsLoadBalancer returns the LoadBalancer policy resource, if already found. +// If not, it would create one and return +func getHnsLoadBalancer(endpoints []hcsshim.HNSEndpoint, isILB bool, vip string, protocol uint16, internalPort uint16, externalPort uint16) (*hcsshim.PolicyList, error) { + plists, err := hcsshim.HNSListPolicyListRequest() + if err != nil { + return nil, err + } + + for _, plist := range plists { + if len(plist.EndpointReferences) != len(endpoints) { + continue + } + // Validate if input meets any of the policy lists + elbPolicy := hcsshim.ELBPolicy{} + if err = json.Unmarshal(plist.Policies[0], &elbPolicy); err != nil { + continue + } + if elbPolicy.Protocol == protocol && elbPolicy.InternalPort == internalPort && elbPolicy.ExternalPort == externalPort && elbPolicy.ILB == isILB { + if len(vip) > 0 { + if len(elbPolicy.VIPs) > 0 && elbPolicy.VIPs[0] != vip { + continue + } + } + LogJson(plist, "Found existing Hns loadbalancer policy resource", 1) + return &plist, nil + + } + } + //TODO: sourceVip is not used. If required, expose this as a param + var sourceVip string + lb, err := hcsshim.AddLoadBalancer( + endpoints, + isILB, + sourceVip, + vip, + protocol, + internalPort, + externalPort, + ) + + if err == nil { + LogJson(lb, "Hns loadbalancer policy resource", 1) + } + return lb, err +} + +func deleteHnsLoadBalancerPolicy(hnsID string) { + if len(hnsID) == 0 { + // Return silently + return + } + + // Cleanup HNS policies + hnsloadBalancer, err := hcsshim.GetPolicyListByID(hnsID) + if err != nil { + glog.Errorf("%v", err) + return + } + LogJson(hnsloadBalancer, "Removing Policy", 2) + + _, err = hnsloadBalancer.Delete() + if err != nil { + glog.Errorf("%v", err) + } +} + +func deleteHnsEndpoint(hnsID string) { + hnsendpoint, err := hcsshim.GetHNSEndpointByID(hnsID) + if err != nil { + glog.Errorf("%v", err) + return + } + + _, err = hnsendpoint.Delete() + if err != nil { + glog.Errorf("%v", err) + } + + glog.V(3).Infof("Remote endpoint resource deleted id %s", hnsID) +} + +func getHnsNetworkInfo(hnsNetworkName string) (*hnsNetworkInfo, error) { + hnsnetwork, err := hcsshim.GetHNSNetworkByName(hnsNetworkName) + if err != nil { + glog.Errorf("%v", err) + return nil, err + } + + return &hnsNetworkInfo{ + id: hnsnetwork.Id, + name: hnsnetwork.Name, + }, nil +} + +func getHnsEndpointByIpAddress(ip net.IP, networkName string) (*hcsshim.HNSEndpoint, error) { + hnsnetwork, err := hcsshim.GetHNSNetworkByName(networkName) + if err != nil { + glog.Errorf("%v", err) + return nil, err + } + + endpoints, err := hcsshim.HNSListEndpointRequest() + for _, endpoint := range endpoints { + equal := reflect.DeepEqual(endpoint.IPAddress, ip) + if equal && endpoint.VirtualNetwork == hnsnetwork.Id { + return &endpoint, nil + } + } + + return nil, fmt.Errorf("Endpoint %v not found on network %s", ip, networkName) +} + +// Sync is called to synchronize the proxier state to hns as soon as possible. +func (proxier *Proxier) Sync() { + proxier.syncRunner.Run() +} + +// SyncLoop runs periodic work. This is expected to run as a goroutine or as the main loop of the app. It does not return. +func (proxier *Proxier) SyncLoop() { + // Update healthz timestamp at beginning in case Sync() never succeeds. + if proxier.healthzServer != nil { + proxier.healthzServer.UpdateTimestamp() + } + proxier.syncRunner.Loop(wait.NeverStop) +} + +func (proxier *Proxier) setInitialized(value bool) { + var initialized int32 + if value { + initialized = 1 + } + atomic.StoreInt32(&proxier.initialized, initialized) +} + +func (proxier *Proxier) isInitialized() bool { + return atomic.LoadInt32(&proxier.initialized) > 0 +} + +func (proxier *Proxier) OnServiceAdd(service *api.Service) { + namespacedName := types.NamespacedName{Namespace: service.Namespace, Name: service.Name} + if proxier.serviceChanges.update(&namespacedName, nil, service) && proxier.isInitialized() { + proxier.syncRunner.Run() + } +} + +func (proxier *Proxier) OnServiceUpdate(oldService, service *api.Service) { + namespacedName := types.NamespacedName{Namespace: service.Namespace, Name: service.Name} + if proxier.serviceChanges.update(&namespacedName, oldService, service) && proxier.isInitialized() { + proxier.syncRunner.Run() + } +} + +func (proxier *Proxier) OnServiceDelete(service *api.Service) { + namespacedName := types.NamespacedName{Namespace: service.Namespace, Name: service.Name} + if proxier.serviceChanges.update(&namespacedName, service, nil) && proxier.isInitialized() { + proxier.syncRunner.Run() + } +} + +func (proxier *Proxier) OnServiceSynced() { + proxier.mu.Lock() + proxier.servicesSynced = true + proxier.setInitialized(proxier.servicesSynced && proxier.endpointsSynced) + proxier.mu.Unlock() + + // Sync unconditionally - this is called once per lifetime. + proxier.syncProxyRules() +} + +func shouldSkipService(svcName types.NamespacedName, service *api.Service) bool { + // if ClusterIP is "None" or empty, skip proxying + if !helper.IsServiceIPSet(service) { + glog.V(3).Infof("Skipping service %s due to clusterIP = %q", svcName, service.Spec.ClusterIP) + return true + } + // Even if ClusterIP is set, ServiceTypeExternalName services don't get proxied + if service.Spec.Type == api.ServiceTypeExternalName { + glog.V(3).Infof("Skipping service %s due to Type=ExternalName", svcName) + return true + } + return false +} + +// is updated by this function (based on the given changes). +// map is cleared after applying them. +func (proxier *Proxier) updateServiceMap() (result updateServiceMapResult) { + result.staleServices = sets.NewString() + + var serviceMap proxyServiceMap = proxier.serviceMap + var changes *serviceChangeMap = &proxier.serviceChanges + + func() { + changes.lock.Lock() + defer changes.lock.Unlock() + for _, change := range changes.items { + existingPorts := serviceMap.merge(change.current, proxier.endpointsMap) + serviceMap.unmerge(change.previous, existingPorts, result.staleServices, proxier.endpointsMap) + } + changes.items = make(map[types.NamespacedName]*serviceChange) + }() + + // TODO: If this will appear to be computationally expensive, consider + // computing this incrementally similarly to serviceMap. + result.hcServices = make(map[types.NamespacedName]uint16) + for svcPortName, info := range serviceMap { + if info.healthCheckNodePort != 0 { + result.hcServices[svcPortName.NamespacedName] = uint16(info.healthCheckNodePort) + } + } + + return result +} + +func (proxier *Proxier) OnEndpointsAdd(endpoints *api.Endpoints) { + namespacedName := types.NamespacedName{Namespace: endpoints.Namespace, Name: endpoints.Name} + if proxier.endpointsChanges.update(&namespacedName, nil, endpoints) && proxier.isInitialized() { + proxier.syncRunner.Run() + } +} + +func (proxier *Proxier) OnEndpointsUpdate(oldEndpoints, endpoints *api.Endpoints) { + namespacedName := types.NamespacedName{Namespace: endpoints.Namespace, Name: endpoints.Name} + if proxier.endpointsChanges.update(&namespacedName, oldEndpoints, endpoints) && proxier.isInitialized() { + proxier.syncRunner.Run() + } +} + +func (proxier *Proxier) OnEndpointsDelete(endpoints *api.Endpoints) { + namespacedName := types.NamespacedName{Namespace: endpoints.Namespace, Name: endpoints.Name} + if proxier.endpointsChanges.update(&namespacedName, endpoints, nil) && proxier.isInitialized() { + proxier.syncRunner.Run() + } +} + +func (proxier *Proxier) OnEndpointsSynced() { + proxier.mu.Lock() + proxier.endpointsSynced = true + proxier.setInitialized(proxier.servicesSynced && proxier.endpointsSynced) + proxier.mu.Unlock() + + // Sync unconditionally - this is called once per lifetime. + proxier.syncProxyRules() +} + +// is updated by this function (based on the given changes). +// map is cleared after applying them. +func (proxier *Proxier) updateEndpointsMap() (result updateEndpointMapResult) { + result.staleEndpoints = make(map[endpointServicePair]bool) + result.staleServiceNames = make(map[proxy.ServicePortName]bool) + + var endpointsMap proxyEndpointsMap = proxier.endpointsMap + var changes *endpointsChangeMap = &proxier.endpointsChanges + + func() { + changes.lock.Lock() + defer changes.lock.Unlock() + for _, change := range changes.items { + endpointsMap.unmerge(change.previous, proxier.serviceMap) + endpointsMap.merge(change.current, proxier.serviceMap) + } + changes.items = make(map[types.NamespacedName]*endpointsChange) + }() + + if !utilfeature.DefaultFeatureGate.Enabled(features.ExternalTrafficLocalOnly) { + return + } + + // TODO: If this will appear to be computationally expensive, consider + // computing this incrementally similarly to endpointsMap. + result.hcEndpoints = make(map[types.NamespacedName]int) + localIPs := getLocalIPs(endpointsMap) + for nsn, ips := range localIPs { + result.hcEndpoints[nsn] = len(ips) + } + + return result +} +func getLocalIPs(endpointsMap proxyEndpointsMap) map[types.NamespacedName]sets.String { + localIPs := make(map[types.NamespacedName]sets.String) + for svcPortName := range endpointsMap { + for _, ep := range endpointsMap[svcPortName] { + if ep.isLocal { + nsn := svcPortName.NamespacedName + if localIPs[nsn] == nil { + localIPs[nsn] = sets.NewString() + } + localIPs[nsn].Insert(ep.ip) // just the IP part + } + } + } + return localIPs +} + +// Translates single Endpoints object to proxyEndpointsMap. +// This function is used for incremental updated of endpointsMap. +// +// NOTE: endpoints object should NOT be modified. +func endpointsToEndpointsMap(endpoints *api.Endpoints, hostname string) proxyEndpointsMap { + if endpoints == nil { + return nil + } + + endpointsMap := make(proxyEndpointsMap) + // We need to build a map of portname -> all ip:ports for that + // portname. Explode Endpoints.Subsets[*] into this structure. + for i := range endpoints.Subsets { + ss := &endpoints.Subsets[i] + for i := range ss.Ports { + port := &ss.Ports[i] + if port.Port == 0 { + glog.Warningf("ignoring invalid endpoint port %s", port.Name) + continue + } + svcPortName := proxy.ServicePortName{ + NamespacedName: types.NamespacedName{Namespace: endpoints.Namespace, Name: endpoints.Name}, + Port: port.Name, + } + for i := range ss.Addresses { + addr := &ss.Addresses[i] + if addr.IP == "" { + glog.Warningf("ignoring invalid endpoint port %s with empty host", port.Name) + continue + } + isLocal := addr.NodeName != nil && *addr.NodeName == hostname + epInfo := newEndpointInfo(addr.IP, uint16(port.Port), isLocal) + endpointsMap[svcPortName] = append(endpointsMap[svcPortName], epInfo) + } + if glog.V(3) { + newEPList := []*endpointsInfo{} + for _, ep := range endpointsMap[svcPortName] { + newEPList = append(newEPList, ep) + } + glog.Infof("Setting endpoints for %q to %+v", svcPortName, newEPList) + } + } + } + return endpointsMap +} + +// Translates single Service object to proxyServiceMap. +// +// NOTE: service object should NOT be modified. +func serviceToServiceMap(service *api.Service) proxyServiceMap { + if service == nil { + return nil + } + svcName := types.NamespacedName{Namespace: service.Namespace, Name: service.Name} + if shouldSkipService(svcName, service) { + return nil + } + + serviceMap := make(proxyServiceMap) + for i := range service.Spec.Ports { + servicePort := &service.Spec.Ports[i] + svcPortName := proxy.ServicePortName{NamespacedName: svcName, Port: servicePort.Name} + serviceMap[svcPortName] = newServiceInfo(svcPortName, servicePort, service) + } + return serviceMap +} + +// This is where all of the hns -save/restore calls happen. +// The only other hns rules are those that are setup in iptablesInit() +// assumes proxier.mu is held +func (proxier *Proxier) syncProxyRules() { + proxier.mu.Lock() + defer proxier.mu.Unlock() + + start := time.Now() + defer func() { + SyncProxyRulesLatency.Observe(sinceInMicroseconds(start)) + glog.V(4).Infof("syncProxyRules took %v", time.Since(start)) + }() + // don't sync rules till we've received services and endpoints + if !proxier.endpointsSynced || !proxier.servicesSynced { + glog.V(2).Info("Not syncing hns until Services and Endpoints have been received from master") + return + } + + // We assume that if this was called, we really want to sync them, + // even if nothing changed in the meantime. In other words, callers are + // responsible for detecting no-op changes and not calling this function. + serviceUpdateResult := proxier.updateServiceMap() + endpointUpdateResult := proxier.updateEndpointsMap() + + staleServices := serviceUpdateResult.staleServices + // merge stale services gathered from updateEndpointsMap + for svcPortName := range endpointUpdateResult.staleServiceNames { + if svcInfo, ok := proxier.serviceMap[svcPortName]; ok && svcInfo != nil && svcInfo.protocol == api.ProtocolUDP { + glog.V(2).Infof("Stale udp service %v -> %s", svcPortName, svcInfo.clusterIP.String()) + staleServices.Insert(svcInfo.clusterIP.String()) + } + } + + glog.V(3).Infof("Syncing Policies") + + // Program HNS by adding corresponding policies for each service. + for svcName, svcInfo := range proxier.serviceMap { + if svcInfo.policyApplied { + glog.V(4).Infof("Policy already applied for %s", spew.Sdump(svcInfo)) + continue + } + + var hnsEndpoints []hcsshim.HNSEndpoint + glog.V(4).Infof("====Applying Policy for %s====", svcName) + // Create Remote endpoints for every endpoint, corresponding to the service + if len(proxier.endpointsMap[svcName]) > 0 { + for _, ep := range proxier.endpointsMap[svcName] { + var newHnsEndpoint *hcsshim.HNSEndpoint + hnsNetworkName := proxier.network.name + var err error + if len(ep.hnsID) > 0 { + newHnsEndpoint, err = hcsshim.GetHNSEndpointByID(ep.hnsID) + } + + if newHnsEndpoint == nil { + // First check if an endpoint resource exists for this IP, on the current host + // A Local endpoint could exist here already + // A remote endpoint was already created and proxy was restarted + newHnsEndpoint, err = getHnsEndpointByIpAddress(net.ParseIP(ep.ip), hnsNetworkName) + } + + if newHnsEndpoint == nil { + if ep.isLocal { + glog.Errorf("Local endpoint not found for %v: err : %v on network %s", ep.ip, err, hnsNetworkName) + continue + } + // hns Endpoint resource was not found, create one + hnsnetwork, err := hcsshim.GetHNSNetworkByName(hnsNetworkName) + if err != nil { + glog.Errorf("%v", err) + continue + } + + hnsEndpoint := &hcsshim.HNSEndpoint{ + MacAddress: ep.macAddress, + IPAddress: net.ParseIP(ep.ip), + } + + newHnsEndpoint, err = hnsnetwork.CreateRemoteEndpoint(hnsEndpoint) + if err != nil { + glog.Errorf("Remote endpoint creation failed: %v", err) + continue + } + } + + // Save the hnsId for reference + LogJson(newHnsEndpoint, "Hns Endpoint resource", 1) + hnsEndpoints = append(hnsEndpoints, *newHnsEndpoint) + ep.hnsID = newHnsEndpoint.Id + ep.refCount++ + Log(ep, "Endpoint resource found", 3) + } + } + + glog.V(3).Infof("Associated endpoints [%s] for service [%s]", spew.Sdump(hnsEndpoints), svcName) + + if len(svcInfo.hnsID) > 0 { + // This should not happen + glog.Warningf("Load Balancer already exists %s -- Debug ", svcInfo.hnsID) + } + + if len(hnsEndpoints) == 0 { + glog.Errorf("Endpoint information not available for service %s. Not applying any policy", svcName) + continue + } + + glog.V(4).Infof("Trying to Apply Policies for service %s", spew.Sdump(svcInfo)) + var hnsLoadBalancer *hcsshim.PolicyList + + hnsLoadBalancer, err := getHnsLoadBalancer( + hnsEndpoints, + false, + svcInfo.clusterIP.String(), + Enum(svcInfo.protocol), + uint16(svcInfo.port), + uint16(svcInfo.targetPort), + ) + if err != nil { + glog.Errorf("Policy creation failed: %v", err) + continue + } + + svcInfo.hnsID = hnsLoadBalancer.ID + glog.V(3).Infof("Hns LoadBalancer resource created for cluster ip resources %v, Id [%s]", svcInfo.clusterIP, hnsLoadBalancer.ID) + + // If nodePort is speficied, user should be able to use nodeIP:nodePort to reach the backend endpoints + if svcInfo.nodePort > 0 { + hnsLoadBalancer, err := getHnsLoadBalancer( + hnsEndpoints, + false, + "", // VIP has to be empty to automatically select the nodeIP + Enum(svcInfo.protocol), + uint16(svcInfo.port), + uint16(svcInfo.nodePort), + ) + if err != nil { + glog.Errorf("Policy creation failed: %v", err) + continue + } + + svcInfo.nodePorthnsID = hnsLoadBalancer.ID + glog.V(3).Infof("Hns LoadBalancer resource created for cluster ip resources %v, Id [%s]", svcInfo.clusterIP, hnsLoadBalancer.ID) + } + + // Create a Load Balancer Policy for each external IP + for _, externalIp := range svcInfo.externalIPs { + // Try loading existing policies, if already available + hnsLoadBalancer, err := getHnsLoadBalancer( + hnsEndpoints, + false, + externalIp.ip, + Enum(svcInfo.protocol), + uint16(svcInfo.port), + uint16(svcInfo.targetPort), + ) + if err != nil { + glog.Errorf("Policy creation failed: %v", err) + continue + } + externalIp.hnsID = hnsLoadBalancer.ID + glog.V(3).Infof("Hns LoadBalancer resource created for externalIp resources %v, Id[%s]", externalIp, hnsLoadBalancer.ID) + } + // Create a Load Balancer Policy for each loadbalancer ingress + for _, lbIngressIp := range svcInfo.loadBalancerIngressIPs { + // Try loading existing policies, if already available + hnsLoadBalancer, err := getHnsLoadBalancer( + hnsEndpoints, + false, + lbIngressIp.ip, + Enum(svcInfo.protocol), + uint16(svcInfo.port), + uint16(svcInfo.targetPort), + ) + if err != nil { + glog.Errorf("Policy creation failed: %v", err) + continue + } + lbIngressIp.hnsID = hnsLoadBalancer.ID + glog.V(3).Infof("Hns LoadBalancer resource created for loadBalancer Ingress resources %v", lbIngressIp) + } + svcInfo.policyApplied = true + Log(svcInfo, "+++Policy Successfully applied for service +++", 2) + } + + // Update healthz timestamp. + if proxier.healthzServer != nil { + proxier.healthzServer.UpdateTimestamp() + } + + // Update healthchecks. The endpoints list might include services that are + // not "OnlyLocal", but the services list will not, and the healthChecker + // will just drop those endpoints. + if err := proxier.healthChecker.SyncServices(serviceUpdateResult.hcServices); err != nil { + glog.Errorf("Error syncing healtcheck services: %v", err) + } + if err := proxier.healthChecker.SyncEndpoints(endpointUpdateResult.hcEndpoints); err != nil { + glog.Errorf("Error syncing healthcheck endoints: %v", err) + } + + // Finish housekeeping. + // TODO: these could be made more consistent. + for _, svcIP := range staleServices.List() { + // TODO : Check if this is required to cleanup stale services here + glog.V(5).Infof("Pending delete stale service IP %s connections", svcIP) + } + +} + +type endpointServicePair struct { + endpoint string + servicePortName proxy.ServicePortName +} diff --git a/pkg/proxy/winkernel/proxier_test.go b/pkg/proxy/winkernel/proxier_test.go new file mode 100644 index 0000000000..7ccce60022 --- /dev/null +++ b/pkg/proxy/winkernel/proxier_test.go @@ -0,0 +1,2474 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package winkernel + +import ( + "bytes" + "reflect" + "strconv" + "testing" + "time" + + "github.com/davecgh/go-spew/spew" + + "fmt" + "net" + "strings" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/proxy" + "k8s.io/kubernetes/pkg/util/async" + utiliptables "k8s.io/kubernetes/pkg/util/iptables" + iptablestest "k8s.io/kubernetes/pkg/util/iptables/testing" + "k8s.io/utils/exec" + fakeexec "k8s.io/utils/exec/testing" +) + +func checkAllLines(t *testing.T, table utiliptables.Table, save []byte, expectedLines map[utiliptables.Chain]string) { + chainLines := utiliptables.GetChainLines(table, save) + for chain, line := range chainLines { + if expected, exists := expectedLines[chain]; exists { + if expected != line { + t.Errorf("getChainLines expected chain line not present. For chain: %s Expected: %s Got: %s", chain, expected, line) + } + } else { + t.Errorf("getChainLines expected chain not present: %s", chain) + } + } +} + +func TestReadLinesFromByteBuffer(t *testing.T) { + testFn := func(byteArray []byte, expected []string) { + index := 0 + readIndex := 0 + for ; readIndex < len(byteArray); index++ { + line, n := utiliptables.ReadLine(readIndex, byteArray) + readIndex = n + if expected[index] != line { + t.Errorf("expected:%q, actual:%q", expected[index], line) + } + } // for + if readIndex < len(byteArray) { + t.Errorf("Byte buffer was only partially read. Buffer length is:%d, readIndex is:%d", len(byteArray), readIndex) + } + if index < len(expected) { + t.Errorf("All expected strings were not compared. expected arr length:%d, matched count:%d", len(expected), index-1) + } + } + + byteArray1 := []byte("\n Line 1 \n\n\n L ine4 \nLine 5 \n \n") + expected1 := []string{"", "Line 1", "", "", "L ine4", "Line 5", ""} + testFn(byteArray1, expected1) + + byteArray1 = []byte("") + expected1 = []string{} + testFn(byteArray1, expected1) + + byteArray1 = []byte("\n\n") + expected1 = []string{"", ""} + testFn(byteArray1, expected1) +} + +func TestGetChainLines(t *testing.T) { + iptables_save := `# Generated by iptables-save v1.4.7 on Wed Oct 29 14:56:01 2014 + *nat + :PREROUTING ACCEPT [2136997:197881818] + :POSTROUTING ACCEPT [4284525:258542680] + :OUTPUT ACCEPT [5901660:357267963] + -A PREROUTING -m addrtype --dst-type LOCAL -j DOCKER + COMMIT + # Completed on Wed Oct 29 14:56:01 2014` + expected := map[utiliptables.Chain]string{ + utiliptables.ChainPrerouting: ":PREROUTING ACCEPT [2136997:197881818]", + utiliptables.ChainPostrouting: ":POSTROUTING ACCEPT [4284525:258542680]", + utiliptables.ChainOutput: ":OUTPUT ACCEPT [5901660:357267963]", + } + checkAllLines(t, utiliptables.TableNAT, []byte(iptables_save), expected) +} + +func TestGetChainLinesMultipleTables(t *testing.T) { + iptables_save := `# Generated by iptables-save v1.4.21 on Fri Aug 7 14:47:37 2015 + *nat + :PREROUTING ACCEPT [2:138] + :INPUT ACCEPT [0:0] + :OUTPUT ACCEPT [0:0] + :POSTROUTING ACCEPT [0:0] + :DOCKER - [0:0] + :KUBE-NODEPORT-CONTAINER - [0:0] + :KUBE-NODEPORT-HOST - [0:0] + :KUBE-PORTALS-CONTAINER - [0:0] + :KUBE-PORTALS-HOST - [0:0] + :KUBE-SVC-1111111111111111 - [0:0] + :KUBE-SVC-2222222222222222 - [0:0] + :KUBE-SVC-3333333333333333 - [0:0] + :KUBE-SVC-4444444444444444 - [0:0] + :KUBE-SVC-5555555555555555 - [0:0] + :KUBE-SVC-6666666666666666 - [0:0] + -A PREROUTING -m comment --comment "handle ClusterIPs; NOTE: this must be before the NodePort rules" -j KUBE-PORTALS-CONTAINER + -A PREROUTING -m addrtype --dst-type LOCAL -j DOCKER + -A PREROUTING -m addrtype --dst-type LOCAL -m comment --comment "handle service NodePorts; NOTE: this must be the last rule in the chain" -j KUBE-NODEPORT-CONTAINER + -A OUTPUT -m comment --comment "handle ClusterIPs; NOTE: this must be before the NodePort rules" -j KUBE-PORTALS-HOST + -A OUTPUT ! -d 127.0.0.0/8 -m addrtype --dst-type LOCAL -j DOCKER + -A OUTPUT -m addrtype --dst-type LOCAL -m comment --comment "handle service NodePorts; NOTE: this must be the last rule in the chain" -j KUBE-NODEPORT-HOST + -A POSTROUTING -s 10.246.1.0/24 ! -o cbr0 -j MASQUERADE + -A POSTROUTING -s 10.0.2.15/32 -d 10.0.2.15/32 -m comment --comment "handle pod connecting to self" -j MASQUERADE + -A KUBE-PORTALS-CONTAINER -d 10.247.0.1/32 -p tcp -m comment --comment "portal for default/kubernetes:" -m state --state NEW -m tcp --dport 443 -j KUBE-SVC-5555555555555555 + -A KUBE-PORTALS-CONTAINER -d 10.247.0.10/32 -p udp -m comment --comment "portal for kube-system/kube-dns:dns" -m state --state NEW -m udp --dport 53 -j KUBE-SVC-6666666666666666 + -A KUBE-PORTALS-CONTAINER -d 10.247.0.10/32 -p tcp -m comment --comment "portal for kube-system/kube-dns:dns-tcp" -m state --state NEW -m tcp --dport 53 -j KUBE-SVC-2222222222222222 + -A KUBE-PORTALS-HOST -d 10.247.0.1/32 -p tcp -m comment --comment "portal for default/kubernetes:" -m state --state NEW -m tcp --dport 443 -j KUBE-SVC-5555555555555555 + -A KUBE-PORTALS-HOST -d 10.247.0.10/32 -p udp -m comment --comment "portal for kube-system/kube-dns:dns" -m state --state NEW -m udp --dport 53 -j KUBE-SVC-6666666666666666 + -A KUBE-PORTALS-HOST -d 10.247.0.10/32 -p tcp -m comment --comment "portal for kube-system/kube-dns:dns-tcp" -m state --state NEW -m tcp --dport 53 -j KUBE-SVC-2222222222222222 + -A KUBE-SVC-1111111111111111 -p udp -m comment --comment "kube-system/kube-dns:dns" -m recent --set --name KUBE-SVC-1111111111111111 --mask 255.255.255.255 --rsource -j DNAT --to-destination 10.246.1.2:53 + -A KUBE-SVC-2222222222222222 -m comment --comment "kube-system/kube-dns:dns-tcp" -j KUBE-SVC-3333333333333333 + -A KUBE-SVC-3333333333333333 -p tcp -m comment --comment "kube-system/kube-dns:dns-tcp" -m recent --set --name KUBE-SVC-3333333333333333 --mask 255.255.255.255 --rsource -j DNAT --to-destination 10.246.1.2:53 + -A KUBE-SVC-4444444444444444 -p tcp -m comment --comment "default/kubernetes:" -m recent --set --name KUBE-SVC-4444444444444444 --mask 255.255.255.255 --rsource -j DNAT --to-destination 10.245.1.2:443 + -A KUBE-SVC-5555555555555555 -m comment --comment "default/kubernetes:" -j KUBE-SVC-4444444444444444 + -A KUBE-SVC-6666666666666666 -m comment --comment "kube-system/kube-dns:dns" -j KUBE-SVC-1111111111111111 + COMMIT + # Completed on Fri Aug 7 14:47:37 2015 + # Generated by iptables-save v1.4.21 on Fri Aug 7 14:47:37 2015 + *filter + :INPUT ACCEPT [17514:83115836] + :FORWARD ACCEPT [0:0] + :OUTPUT ACCEPT [8909:688225] + :DOCKER - [0:0] + -A FORWARD -o cbr0 -j DOCKER + -A FORWARD -o cbr0 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT + -A FORWARD -i cbr0 ! -o cbr0 -j ACCEPT + -A FORWARD -i cbr0 -o cbr0 -j ACCEPT + COMMIT + ` + expected := map[utiliptables.Chain]string{ + utiliptables.ChainPrerouting: ":PREROUTING ACCEPT [2:138]", + utiliptables.Chain("INPUT"): ":INPUT ACCEPT [0:0]", + utiliptables.Chain("OUTPUT"): ":OUTPUT ACCEPT [0:0]", + utiliptables.ChainPostrouting: ":POSTROUTING ACCEPT [0:0]", + utiliptables.Chain("DOCKER"): ":DOCKER - [0:0]", + utiliptables.Chain("KUBE-NODEPORT-CONTAINER"): ":KUBE-NODEPORT-CONTAINER - [0:0]", + utiliptables.Chain("KUBE-NODEPORT-HOST"): ":KUBE-NODEPORT-HOST - [0:0]", + utiliptables.Chain("KUBE-PORTALS-CONTAINER"): ":KUBE-PORTALS-CONTAINER - [0:0]", + utiliptables.Chain("KUBE-PORTALS-HOST"): ":KUBE-PORTALS-HOST - [0:0]", + utiliptables.Chain("KUBE-SVC-1111111111111111"): ":KUBE-SVC-1111111111111111 - [0:0]", + utiliptables.Chain("KUBE-SVC-2222222222222222"): ":KUBE-SVC-2222222222222222 - [0:0]", + utiliptables.Chain("KUBE-SVC-3333333333333333"): ":KUBE-SVC-3333333333333333 - [0:0]", + utiliptables.Chain("KUBE-SVC-4444444444444444"): ":KUBE-SVC-4444444444444444 - [0:0]", + utiliptables.Chain("KUBE-SVC-5555555555555555"): ":KUBE-SVC-5555555555555555 - [0:0]", + utiliptables.Chain("KUBE-SVC-6666666666666666"): ":KUBE-SVC-6666666666666666 - [0:0]", + } + checkAllLines(t, utiliptables.TableNAT, []byte(iptables_save), expected) +} + +func newFakeServiceInfo(service proxy.ServicePortName, ip net.IP, port int, protocol api.Protocol, onlyNodeLocalEndpoints bool) *serviceInfo { + return &serviceInfo{ + sessionAffinityType: api.ServiceAffinityNone, // default + stickyMaxAgeMinutes: 180, // TODO: paramaterize this in the API. + clusterIP: ip, + port: port, + protocol: protocol, + onlyNodeLocalEndpoints: onlyNodeLocalEndpoints, + } +} + +func TestDeleteEndpointConnections(t *testing.T) { + fcmd := fakeexec.FakeCmd{ + CombinedOutputScript: []fakeexec.FakeCombinedOutputAction{ + func() ([]byte, error) { return []byte("1 flow entries have been deleted"), nil }, + func() ([]byte, error) { + return []byte(""), fmt.Errorf("conntrack v1.4.2 (conntrack-tools): 0 flow entries have been deleted.") + }, + }, + } + fexec := fakeexec.FakeExec{ + CommandScript: []fakeexec.FakeCommandAction{ + func(cmd string, args ...string) exec.Cmd { return fakeexec.InitFakeCmd(&fcmd, cmd, args...) }, + func(cmd string, args ...string) exec.Cmd { return fakeexec.InitFakeCmd(&fcmd, cmd, args...) }, + }, + LookPathFunc: func(cmd string) (string, error) { return cmd, nil }, + } + + serviceMap := make(map[proxy.ServicePortName]*serviceInfo) + svc1 := proxy.ServicePortName{NamespacedName: types.NamespacedName{Namespace: "ns1", Name: "svc1"}, Port: "p80"} + svc2 := proxy.ServicePortName{NamespacedName: types.NamespacedName{Namespace: "ns1", Name: "svc2"}, Port: "p80"} + serviceMap[svc1] = newFakeServiceInfo(svc1, net.IPv4(10, 20, 30, 40), 80, api.ProtocolUDP, false) + serviceMap[svc2] = newFakeServiceInfo(svc1, net.IPv4(10, 20, 30, 41), 80, api.ProtocolTCP, false) + + fakeProxier := Proxier{exec: &fexec, serviceMap: serviceMap} + + testCases := []endpointServicePair{ + { + endpoint: "10.240.0.3:80", + servicePortName: svc1, + }, + { + endpoint: "10.240.0.4:80", + servicePortName: svc1, + }, + { + endpoint: "10.240.0.5:80", + servicePortName: svc2, + }, + } + + expectCommandExecCount := 0 + for i := range testCases { + input := map[endpointServicePair]bool{testCases[i]: true} + fakeProxier.deleteEndpointConnections(input) + svcInfo := fakeProxier.serviceMap[testCases[i].servicePortName] + if svcInfo.protocol == api.ProtocolUDP { + svcIp := svcInfo.clusterIP.String() + endpointIp := strings.Split(testCases[i].endpoint, ":")[0] + expectCommand := fmt.Sprintf("conntrack -D --orig-dst %s --dst-nat %s -p udp", svcIp, endpointIp) + execCommand := strings.Join(fcmd.CombinedOutputLog[expectCommandExecCount], " ") + if expectCommand != execCommand { + t.Errorf("Exepect comand: %s, but executed %s", expectCommand, execCommand) + } + expectCommandExecCount += 1 + } + + if expectCommandExecCount != fexec.CommandCalls { + t.Errorf("Exepect comand executed %d times, but got %d", expectCommandExecCount, fexec.CommandCalls) + } + } +} + +type fakeClosable struct { + closed bool +} + +func (c *fakeClosable) Close() error { + c.closed = true + return nil +} + +func TestRevertPorts(t *testing.T) { + testCases := []struct { + replacementPorts []localPort + existingPorts []localPort + expectToBeClose []bool + }{ + { + replacementPorts: []localPort{ + {port: 5001}, + {port: 5002}, + {port: 5003}, + }, + existingPorts: []localPort{}, + expectToBeClose: []bool{true, true, true}, + }, + { + replacementPorts: []localPort{}, + existingPorts: []localPort{ + {port: 5001}, + {port: 5002}, + {port: 5003}, + }, + expectToBeClose: []bool{}, + }, + { + replacementPorts: []localPort{ + {port: 5001}, + {port: 5002}, + {port: 5003}, + }, + existingPorts: []localPort{ + {port: 5001}, + {port: 5002}, + {port: 5003}, + }, + expectToBeClose: []bool{false, false, false}, + }, + { + replacementPorts: []localPort{ + {port: 5001}, + {port: 5002}, + {port: 5003}, + }, + existingPorts: []localPort{ + {port: 5001}, + {port: 5003}, + }, + expectToBeClose: []bool{false, true, false}, + }, + { + replacementPorts: []localPort{ + {port: 5001}, + {port: 5002}, + {port: 5003}, + }, + existingPorts: []localPort{ + {port: 5001}, + {port: 5002}, + {port: 5003}, + {port: 5004}, + }, + expectToBeClose: []bool{false, false, false}, + }, + } + + for i, tc := range testCases { + replacementPortsMap := make(map[localPort]closeable) + for _, lp := range tc.replacementPorts { + replacementPortsMap[lp] = &fakeClosable{} + } + existingPortsMap := make(map[localPort]closeable) + for _, lp := range tc.existingPorts { + existingPortsMap[lp] = &fakeClosable{} + } + revertPorts(replacementPortsMap, existingPortsMap) + for j, expectation := range tc.expectToBeClose { + if replacementPortsMap[tc.replacementPorts[j]].(*fakeClosable).closed != expectation { + t.Errorf("Expect replacement localport %v to be %v in test case %v", tc.replacementPorts[j], expectation, i) + } + } + for _, lp := range tc.existingPorts { + if existingPortsMap[lp].(*fakeClosable).closed == true { + t.Errorf("Expect existing localport %v to be false in test case %v", lp, i) + } + } + } + +} + +// fakePortOpener implements portOpener. +type fakePortOpener struct { + openPorts []*localPort +} + +// OpenLocalPort fakes out the listen() and bind() used by syncProxyRules +// to lock a local port. +func (f *fakePortOpener) OpenLocalPort(lp *localPort) (closeable, error) { + f.openPorts = append(f.openPorts, lp) + return nil, nil +} + +type fakeHealthChecker struct { + services map[types.NamespacedName]uint16 + endpoints map[types.NamespacedName]int +} + +func newFakeHealthChecker() *fakeHealthChecker { + return &fakeHealthChecker{ + services: map[types.NamespacedName]uint16{}, + endpoints: map[types.NamespacedName]int{}, + } +} + +func (fake *fakeHealthChecker) SyncServices(newServices map[types.NamespacedName]uint16) error { + fake.services = newServices + return nil +} + +func (fake *fakeHealthChecker) SyncEndpoints(newEndpoints map[types.NamespacedName]int) error { + fake.endpoints = newEndpoints + return nil +} + +const testHostname = "test-hostname" + +func NewFakeProxier(ipt utiliptables.Interface) *Proxier { + // TODO: Call NewProxier after refactoring out the goroutine + // invocation into a Run() method. + p := &Proxier{ + exec: &fakeexec.FakeExec{}, + serviceMap: make(proxyServiceMap), + serviceChanges: newServiceChangeMap(), + endpointsMap: make(proxyEndpointsMap), + endpointsChanges: newEndpointsChangeMap(testHostname), + iptables: ipt, + clusterCIDR: "10.0.0.0/24", + hostname: testHostname, + portsMap: make(map[localPort]closeable), + portMapper: &fakePortOpener{[]*localPort{}}, + healthChecker: newFakeHealthChecker(), + precomputedProbabilities: make([]string, 0, 1001), + iptablesData: bytes.NewBuffer(nil), + filterChains: bytes.NewBuffer(nil), + filterRules: bytes.NewBuffer(nil), + natChains: bytes.NewBuffer(nil), + natRules: bytes.NewBuffer(nil), + } + p.syncRunner = async.NewBoundedFrequencyRunner("test-sync-runner", p.syncProxyRules, 0, time.Minute, 1) + return p +} + +func hasJump(rules []iptablestest.Rule, destChain, destIP string, destPort int) bool { + destPortStr := strconv.Itoa(destPort) + match := false + for _, r := range rules { + if r[iptablestest.Jump] == destChain { + match = true + if destIP != "" { + if strings.Contains(r[iptablestest.Destination], destIP) && (strings.Contains(r[iptablestest.DPort], destPortStr) || r[iptablestest.DPort] == "") { + return true + } + match = false + } + if destPort != 0 { + if strings.Contains(r[iptablestest.DPort], destPortStr) && (strings.Contains(r[iptablestest.Destination], destIP) || r[iptablestest.Destination] == "") { + return true + } + match = false + } + } + } + return match +} + +func TestHasJump(t *testing.T) { + testCases := map[string]struct { + rules []iptablestest.Rule + destChain string + destIP string + destPort int + expected bool + }{ + "case 1": { + // Match the 1st rule(both dest IP and dest Port) + rules: []iptablestest.Rule{ + {"-d ": "10.20.30.41/32", "--dport ": "80", "-p ": "tcp", "-j ": "REJECT"}, + {"--dport ": "3001", "-p ": "tcp", "-j ": "KUBE-MARK-MASQ"}, + }, + destChain: "REJECT", + destIP: "10.20.30.41", + destPort: 80, + expected: true, + }, + "case 2": { + // Match the 2nd rule(dest Port) + rules: []iptablestest.Rule{ + {"-d ": "10.20.30.41/32", "-p ": "tcp", "-j ": "REJECT"}, + {"--dport ": "3001", "-p ": "tcp", "-j ": "REJECT"}, + }, + destChain: "REJECT", + destIP: "", + destPort: 3001, + expected: true, + }, + "case 3": { + // Match both dest IP and dest Port + rules: []iptablestest.Rule{ + {"-d ": "1.2.3.4/32", "--dport ": "80", "-p ": "tcp", "-j ": "KUBE-XLB-GF53O3C2HZEXL2XN"}, + }, + destChain: "KUBE-XLB-GF53O3C2HZEXL2XN", + destIP: "1.2.3.4", + destPort: 80, + expected: true, + }, + "case 4": { + // Match dest IP but doesn't match dest Port + rules: []iptablestest.Rule{ + {"-d ": "1.2.3.4/32", "--dport ": "80", "-p ": "tcp", "-j ": "KUBE-XLB-GF53O3C2HZEXL2XN"}, + }, + destChain: "KUBE-XLB-GF53O3C2HZEXL2XN", + destIP: "1.2.3.4", + destPort: 8080, + expected: false, + }, + "case 5": { + // Match dest Port but doesn't match dest IP + rules: []iptablestest.Rule{ + {"-d ": "1.2.3.4/32", "--dport ": "80", "-p ": "tcp", "-j ": "KUBE-XLB-GF53O3C2HZEXL2XN"}, + }, + destChain: "KUBE-XLB-GF53O3C2HZEXL2XN", + destIP: "10.20.30.40", + destPort: 80, + expected: false, + }, + "case 6": { + // Match the 2nd rule(dest IP) + rules: []iptablestest.Rule{ + {"-d ": "10.20.30.41/32", "-p ": "tcp", "-j ": "REJECT"}, + {"-d ": "1.2.3.4/32", "-p ": "tcp", "-j ": "REJECT"}, + {"--dport ": "3001", "-p ": "tcp", "-j ": "REJECT"}, + }, + destChain: "REJECT", + destIP: "1.2.3.4", + destPort: 8080, + expected: true, + }, + "case 7": { + // Match the 2nd rule(dest Port) + rules: []iptablestest.Rule{ + {"-d ": "10.20.30.41/32", "-p ": "tcp", "-j ": "REJECT"}, + {"--dport ": "3001", "-p ": "tcp", "-j ": "REJECT"}, + }, + destChain: "REJECT", + destIP: "1.2.3.4", + destPort: 3001, + expected: true, + }, + "case 8": { + // Match the 1st rule(dest IP) + rules: []iptablestest.Rule{ + {"-d ": "10.20.30.41/32", "-p ": "tcp", "-j ": "REJECT"}, + {"--dport ": "3001", "-p ": "tcp", "-j ": "REJECT"}, + }, + destChain: "REJECT", + destIP: "10.20.30.41", + destPort: 8080, + expected: true, + }, + "case 9": { + rules: []iptablestest.Rule{ + {"-j ": "KUBE-SEP-LWSOSDSHMKPJHHJV"}, + }, + destChain: "KUBE-SEP-LWSOSDSHMKPJHHJV", + destIP: "", + destPort: 0, + expected: true, + }, + "case 10": { + rules: []iptablestest.Rule{ + {"-j ": "KUBE-SEP-FOO"}, + }, + destChain: "KUBE-SEP-BAR", + destIP: "", + destPort: 0, + expected: false, + }, + } + + for k, tc := range testCases { + if got := hasJump(tc.rules, tc.destChain, tc.destIP, tc.destPort); got != tc.expected { + t.Errorf("%v: expected %v, got %v", k, tc.expected, got) + } + } +} + +func hasDNAT(rules []iptablestest.Rule, endpoint string) bool { + for _, r := range rules { + if r[iptablestest.ToDest] == endpoint { + return true + } + } + return false +} + +func errorf(msg string, rules []iptablestest.Rule, t *testing.T) { + for _, r := range rules { + t.Logf("%q", r) + } + t.Errorf("%v", msg) +} + +func TestClusterIPReject(t *testing.T) { + ipt := iptablestest.NewFake() + fp := NewFakeProxier(ipt) + svcIP := "10.20.30.41" + svcPort := 80 + svcPortName := proxy.ServicePortName{ + NamespacedName: makeNSN("ns1", "svc1"), + Port: "p80", + } + + makeServiceMap(fp, + makeTestService(svcPortName.Namespace, svcPortName.Namespace, func(svc *api.Service) { + svc.Spec.ClusterIP = svcIP + svc.Spec.Ports = []api.ServicePort{{ + Name: svcPortName.Port, + Port: int32(svcPort), + Protocol: api.ProtocolTCP, + }} + }), + ) + makeEndpointsMap(fp) + fp.syncProxyRules() + + svcChain := string(servicePortChainName(svcPortName.String(), strings.ToLower(string(api.ProtocolTCP)))) + svcRules := ipt.GetRules(svcChain) + if len(svcRules) != 0 { + errorf(fmt.Sprintf("Unexpected rule for chain %v service %v without endpoints", svcChain, svcPortName), svcRules, t) + } + kubeSvcRules := ipt.GetRules(string(kubeServicesChain)) + if !hasJump(kubeSvcRules, iptablestest.Reject, svcIP, svcPort) { + errorf(fmt.Sprintf("Failed to find a %v rule for service %v with no endpoints", iptablestest.Reject, svcPortName), kubeSvcRules, t) + } +} + +func TestClusterIPEndpointsJump(t *testing.T) { + ipt := iptablestest.NewFake() + fp := NewFakeProxier(ipt) + svcIP := "10.20.30.41" + svcPort := 80 + svcPortName := proxy.ServicePortName{ + NamespacedName: makeNSN("ns1", "svc1"), + Port: "p80", + } + + makeServiceMap(fp, + makeTestService(svcPortName.Namespace, svcPortName.Name, func(svc *api.Service) { + svc.Spec.ClusterIP = svcIP + svc.Spec.Ports = []api.ServicePort{{ + Name: svcPortName.Port, + Port: int32(svcPort), + Protocol: api.ProtocolTCP, + }} + }), + ) + + epIP := "10.180.0.1" + makeEndpointsMap(fp, + makeTestEndpoints(svcPortName.Namespace, svcPortName.Name, func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{{ + Addresses: []api.EndpointAddress{{ + IP: epIP, + }}, + Ports: []api.EndpointPort{{ + Name: svcPortName.Port, + Port: int32(svcPort), + }}, + }} + }), + ) + + fp.syncProxyRules() + + epStr := fmt.Sprintf("%s:%d", epIP, svcPort) + svcChain := string(servicePortChainName(svcPortName.String(), strings.ToLower(string(api.ProtocolTCP)))) + epChain := string(servicePortEndpointChainName(svcPortName.String(), strings.ToLower(string(api.ProtocolTCP)), epStr)) + + kubeSvcRules := ipt.GetRules(string(kubeServicesChain)) + if !hasJump(kubeSvcRules, svcChain, svcIP, svcPort) { + errorf(fmt.Sprintf("Failed to find jump from KUBE-SERVICES to %v chain", svcChain), kubeSvcRules, t) + } + + svcRules := ipt.GetRules(svcChain) + if !hasJump(svcRules, epChain, "", 0) { + errorf(fmt.Sprintf("Failed to jump to ep chain %v", epChain), svcRules, t) + } + epRules := ipt.GetRules(epChain) + if !hasDNAT(epRules, epStr) { + errorf(fmt.Sprintf("Endpoint chain %v lacks DNAT to %v", epChain, epStr), epRules, t) + } +} + +func TestLoadBalancer(t *testing.T) { + ipt := iptablestest.NewFake() + fp := NewFakeProxier(ipt) + svcIP := "10.20.30.41" + svcPort := 80 + svcNodePort := 3001 + svcLBIP := "1.2.3.4" + svcPortName := proxy.ServicePortName{ + NamespacedName: makeNSN("ns1", "svc1"), + Port: "p80", + } + + makeServiceMap(fp, + makeTestService(svcPortName.Namespace, svcPortName.Name, func(svc *api.Service) { + svc.Spec.Type = "LoadBalancer" + svc.Spec.ClusterIP = svcIP + svc.Spec.Ports = []api.ServicePort{{ + Name: svcPortName.Port, + Port: int32(svcPort), + Protocol: api.ProtocolTCP, + NodePort: int32(svcNodePort), + }} + svc.Status.LoadBalancer.Ingress = []api.LoadBalancerIngress{{ + IP: svcLBIP, + }} + }), + ) + + epIP := "10.180.0.1" + makeEndpointsMap(fp, + makeTestEndpoints(svcPortName.Namespace, svcPortName.Name, func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{{ + Addresses: []api.EndpointAddress{{ + IP: epIP, + }}, + Ports: []api.EndpointPort{{ + Name: svcPortName.Port, + Port: int32(svcPort), + }}, + }} + }), + ) + + fp.syncProxyRules() + + proto := strings.ToLower(string(api.ProtocolTCP)) + fwChain := string(serviceFirewallChainName(svcPortName.String(), proto)) + svcChain := string(servicePortChainName(svcPortName.String(), proto)) + //lbChain := string(serviceLBChainName(svcPortName.String(), proto)) + + kubeSvcRules := ipt.GetRules(string(kubeServicesChain)) + if !hasJump(kubeSvcRules, fwChain, svcLBIP, svcPort) { + errorf(fmt.Sprintf("Failed to find jump to firewall chain %v", fwChain), kubeSvcRules, t) + } + + fwRules := ipt.GetRules(fwChain) + if !hasJump(fwRules, svcChain, "", 0) || !hasJump(fwRules, string(KubeMarkMasqChain), "", 0) { + errorf(fmt.Sprintf("Failed to find jump from firewall chain %v to svc chain %v", fwChain, svcChain), fwRules, t) + } +} + +func TestNodePort(t *testing.T) { + ipt := iptablestest.NewFake() + fp := NewFakeProxier(ipt) + svcIP := "10.20.30.41" + svcPort := 80 + svcNodePort := 3001 + svcPortName := proxy.ServicePortName{ + NamespacedName: makeNSN("ns1", "svc1"), + Port: "p80", + } + + makeServiceMap(fp, + makeTestService(svcPortName.Namespace, svcPortName.Name, func(svc *api.Service) { + svc.Spec.Type = "NodePort" + svc.Spec.ClusterIP = svcIP + svc.Spec.Ports = []api.ServicePort{{ + Name: svcPortName.Port, + Port: int32(svcPort), + Protocol: api.ProtocolTCP, + NodePort: int32(svcNodePort), + }} + }), + ) + + epIP := "10.180.0.1" + makeEndpointsMap(fp, + makeTestEndpoints(svcPortName.Namespace, svcPortName.Name, func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{{ + Addresses: []api.EndpointAddress{{ + IP: epIP, + }}, + Ports: []api.EndpointPort{{ + Name: svcPortName.Port, + Port: int32(svcPort), + }}, + }} + }), + ) + + fp.syncProxyRules() + + proto := strings.ToLower(string(api.ProtocolTCP)) + svcChain := string(servicePortChainName(svcPortName.String(), proto)) + + kubeNodePortRules := ipt.GetRules(string(kubeNodePortsChain)) + if !hasJump(kubeNodePortRules, svcChain, "", svcNodePort) { + errorf(fmt.Sprintf("Failed to find jump to svc chain %v", svcChain), kubeNodePortRules, t) + } +} + +func TestExternalIPsReject(t *testing.T) { + ipt := iptablestest.NewFake() + fp := NewFakeProxier(ipt) + svcIP := "10.20.30.41" + svcPort := 80 + svcExternalIPs := "50.60.70.81" + svcPortName := proxy.ServicePortName{ + NamespacedName: makeNSN("ns1", "svc1"), + Port: "p80", + } + + makeServiceMap(fp, + makeTestService(svcPortName.Namespace, svcPortName.Name, func(svc *api.Service) { + svc.Spec.Type = "ClusterIP" + svc.Spec.ClusterIP = svcIP + svc.Spec.ExternalIPs = []string{svcExternalIPs} + svc.Spec.Ports = []api.ServicePort{{ + Name: svcPortName.Port, + Port: int32(svcPort), + Protocol: api.ProtocolTCP, + TargetPort: intstr.FromInt(svcPort), + }} + }), + ) + makeEndpointsMap(fp) + + fp.syncProxyRules() + + kubeSvcRules := ipt.GetRules(string(kubeServicesChain)) + if !hasJump(kubeSvcRules, iptablestest.Reject, svcExternalIPs, svcPort) { + errorf(fmt.Sprintf("Failed to a %v rule for externalIP %v with no endpoints", iptablestest.Reject, svcPortName), kubeSvcRules, t) + } +} + +func TestNodePortReject(t *testing.T) { + ipt := iptablestest.NewFake() + fp := NewFakeProxier(ipt) + svcIP := "10.20.30.41" + svcPort := 80 + svcNodePort := 3001 + svcPortName := proxy.ServicePortName{ + NamespacedName: makeNSN("ns1", "svc1"), + Port: "p80", + } + + makeServiceMap(fp, + makeTestService(svcPortName.Namespace, svcPortName.Name, func(svc *api.Service) { + svc.Spec.Type = "NodePort" + svc.Spec.ClusterIP = svcIP + svc.Spec.Ports = []api.ServicePort{{ + Name: svcPortName.Port, + Port: int32(svcPort), + Protocol: api.ProtocolTCP, + NodePort: int32(svcNodePort), + }} + }), + ) + makeEndpointsMap(fp) + + fp.syncProxyRules() + + kubeSvcRules := ipt.GetRules(string(kubeServicesChain)) + if !hasJump(kubeSvcRules, iptablestest.Reject, svcIP, svcNodePort) { + errorf(fmt.Sprintf("Failed to find a %v rule for service %v with no endpoints", iptablestest.Reject, svcPortName), kubeSvcRules, t) + } +} + +func strPtr(s string) *string { + return &s +} + +func TestOnlyLocalLoadBalancing(t *testing.T) { + ipt := iptablestest.NewFake() + fp := NewFakeProxier(ipt) + svcIP := "10.20.30.41" + svcPort := 80 + svcNodePort := 3001 + svcLBIP := "1.2.3.4" + svcPortName := proxy.ServicePortName{ + NamespacedName: makeNSN("ns1", "svc1"), + Port: "p80", + } + + makeServiceMap(fp, + makeTestService(svcPortName.Namespace, svcPortName.Name, func(svc *api.Service) { + svc.Spec.Type = "LoadBalancer" + svc.Spec.ClusterIP = svcIP + svc.Spec.Ports = []api.ServicePort{{ + Name: svcPortName.Port, + Port: int32(svcPort), + Protocol: api.ProtocolTCP, + NodePort: int32(svcNodePort), + }} + svc.Status.LoadBalancer.Ingress = []api.LoadBalancerIngress{{ + IP: svcLBIP, + }} + svc.Annotations[api.BetaAnnotationExternalTraffic] = api.AnnotationValueExternalTrafficLocal + }), + ) + + epIP1 := "10.180.0.1" + epIP2 := "10.180.2.1" + epStrLocal := fmt.Sprintf("%s:%d", epIP1, svcPort) + epStrNonLocal := fmt.Sprintf("%s:%d", epIP2, svcPort) + makeEndpointsMap(fp, + makeTestEndpoints(svcPortName.Namespace, svcPortName.Name, func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{{ + Addresses: []api.EndpointAddress{{ + IP: epIP1, + NodeName: nil, + }, { + IP: epIP2, + NodeName: strPtr(testHostname), + }}, + Ports: []api.EndpointPort{{ + Name: svcPortName.Port, + Port: int32(svcPort), + }}, + }} + }), + ) + + fp.syncProxyRules() + + proto := strings.ToLower(string(api.ProtocolTCP)) + fwChain := string(serviceFirewallChainName(svcPortName.String(), proto)) + lbChain := string(serviceLBChainName(svcPortName.String(), proto)) + + nonLocalEpChain := string(servicePortEndpointChainName(svcPortName.String(), strings.ToLower(string(api.ProtocolTCP)), epStrLocal)) + localEpChain := string(servicePortEndpointChainName(svcPortName.String(), strings.ToLower(string(api.ProtocolTCP)), epStrNonLocal)) + + kubeSvcRules := ipt.GetRules(string(kubeServicesChain)) + if !hasJump(kubeSvcRules, fwChain, svcLBIP, svcPort) { + errorf(fmt.Sprintf("Failed to find jump to firewall chain %v", fwChain), kubeSvcRules, t) + } + + fwRules := ipt.GetRules(fwChain) + if !hasJump(fwRules, lbChain, "", 0) { + errorf(fmt.Sprintf("Failed to find jump from firewall chain %v to svc chain %v", fwChain, lbChain), fwRules, t) + } + if hasJump(fwRules, string(KubeMarkMasqChain), "", 0) { + errorf(fmt.Sprintf("Found jump from fw chain %v to MASQUERADE", fwChain), fwRules, t) + } + + lbRules := ipt.GetRules(lbChain) + if hasJump(lbRules, nonLocalEpChain, "", 0) { + errorf(fmt.Sprintf("Found jump from lb chain %v to non-local ep %v", lbChain, epStrLocal), lbRules, t) + } + if !hasJump(lbRules, localEpChain, "", 0) { + errorf(fmt.Sprintf("Didn't find jump from lb chain %v to local ep %v", lbChain, epStrNonLocal), lbRules, t) + } +} + +func TestOnlyLocalNodePortsNoClusterCIDR(t *testing.T) { + ipt := iptablestest.NewFake() + fp := NewFakeProxier(ipt) + // set cluster CIDR to empty before test + fp.clusterCIDR = "" + onlyLocalNodePorts(t, fp, ipt) +} + +func TestOnlyLocalNodePorts(t *testing.T) { + ipt := iptablestest.NewFake() + fp := NewFakeProxier(ipt) + onlyLocalNodePorts(t, fp, ipt) +} + +func onlyLocalNodePorts(t *testing.T, fp *Proxier, ipt *iptablestest.FakeIPTables) { + shouldLBTOSVCRuleExist := len(fp.clusterCIDR) > 0 + svcIP := "10.20.30.41" + svcPort := 80 + svcNodePort := 3001 + svcPortName := proxy.ServicePortName{ + NamespacedName: makeNSN("ns1", "svc1"), + Port: "p80", + } + + makeServiceMap(fp, + makeTestService(svcPortName.Namespace, svcPortName.Name, func(svc *api.Service) { + svc.Spec.Type = "NodePort" + svc.Spec.ClusterIP = svcIP + svc.Spec.Ports = []api.ServicePort{{ + Name: svcPortName.Port, + Port: int32(svcPort), + Protocol: api.ProtocolTCP, + NodePort: int32(svcNodePort), + }} + svc.Annotations[api.BetaAnnotationExternalTraffic] = api.AnnotationValueExternalTrafficLocal + }), + ) + + epIP1 := "10.180.0.1" + epIP2 := "10.180.2.1" + epStrLocal := fmt.Sprintf("%s:%d", epIP1, svcPort) + epStrNonLocal := fmt.Sprintf("%s:%d", epIP2, svcPort) + makeEndpointsMap(fp, + makeTestEndpoints(svcPortName.Namespace, svcPortName.Name, func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{{ + Addresses: []api.EndpointAddress{{ + IP: epIP1, + NodeName: nil, + }, { + IP: epIP2, + NodeName: strPtr(testHostname), + }}, + Ports: []api.EndpointPort{{ + Name: svcPortName.Port, + Port: int32(svcPort), + }}, + }} + }), + ) + + fp.syncProxyRules() + + proto := strings.ToLower(string(api.ProtocolTCP)) + lbChain := string(serviceLBChainName(svcPortName.String(), proto)) + + nonLocalEpChain := string(servicePortEndpointChainName(svcPortName.String(), proto, epStrLocal)) + localEpChain := string(servicePortEndpointChainName(svcPortName.String(), proto, epStrNonLocal)) + + kubeNodePortRules := ipt.GetRules(string(kubeNodePortsChain)) + if !hasJump(kubeNodePortRules, lbChain, "", svcNodePort) { + errorf(fmt.Sprintf("Failed to find jump to lb chain %v", lbChain), kubeNodePortRules, t) + } + + svcChain := string(servicePortChainName(svcPortName.String(), proto)) + lbRules := ipt.GetRules(lbChain) + if hasJump(lbRules, nonLocalEpChain, "", 0) { + errorf(fmt.Sprintf("Found jump from lb chain %v to non-local ep %v", lbChain, epStrLocal), lbRules, t) + } + if hasJump(lbRules, svcChain, "", 0) != shouldLBTOSVCRuleExist { + prefix := "Did not find " + if !shouldLBTOSVCRuleExist { + prefix = "Found " + } + errorf(fmt.Sprintf("%s jump from lb chain %v to svc %v", prefix, lbChain, svcChain), lbRules, t) + } + if !hasJump(lbRules, localEpChain, "", 0) { + errorf(fmt.Sprintf("Didn't find jump from lb chain %v to local ep %v", lbChain, epStrLocal), lbRules, t) + } +} + +func makeTestService(namespace, name string, svcFunc func(*api.Service)) *api.Service { + svc := &api.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Annotations: map[string]string{}, + }, + Spec: api.ServiceSpec{}, + Status: api.ServiceStatus{}, + } + svcFunc(svc) + return svc +} + +func addTestPort(array []api.ServicePort, name string, protocol api.Protocol, port, nodeport int32, targetPort int) []api.ServicePort { + svcPort := api.ServicePort{ + Name: name, + Protocol: protocol, + Port: port, + NodePort: nodeport, + TargetPort: intstr.FromInt(targetPort), + } + return append(array, svcPort) +} + +func TestBuildServiceMapAddRemove(t *testing.T) { + ipt := iptablestest.NewFake() + fp := NewFakeProxier(ipt) + + services := []*api.Service{ + makeTestService("somewhere-else", "cluster-ip", func(svc *api.Service) { + svc.Spec.Type = api.ServiceTypeClusterIP + svc.Spec.ClusterIP = "172.16.55.4" + svc.Spec.Ports = addTestPort(svc.Spec.Ports, "something", "UDP", 1234, 4321, 0) + svc.Spec.Ports = addTestPort(svc.Spec.Ports, "somethingelse", "UDP", 1235, 5321, 0) + }), + makeTestService("somewhere-else", "node-port", func(svc *api.Service) { + svc.Spec.Type = api.ServiceTypeNodePort + svc.Spec.ClusterIP = "172.16.55.10" + svc.Spec.Ports = addTestPort(svc.Spec.Ports, "blahblah", "UDP", 345, 678, 0) + svc.Spec.Ports = addTestPort(svc.Spec.Ports, "moreblahblah", "TCP", 344, 677, 0) + }), + makeTestService("somewhere", "load-balancer", func(svc *api.Service) { + svc.Spec.Type = api.ServiceTypeLoadBalancer + svc.Spec.ClusterIP = "172.16.55.11" + svc.Spec.LoadBalancerIP = "5.6.7.8" + svc.Spec.Ports = addTestPort(svc.Spec.Ports, "foobar", "UDP", 8675, 30061, 7000) + svc.Spec.Ports = addTestPort(svc.Spec.Ports, "baz", "UDP", 8676, 30062, 7001) + svc.Status.LoadBalancer = api.LoadBalancerStatus{ + Ingress: []api.LoadBalancerIngress{ + {IP: "10.1.2.4"}, + }, + } + }), + makeTestService("somewhere", "only-local-load-balancer", func(svc *api.Service) { + svc.ObjectMeta.Annotations = map[string]string{ + api.BetaAnnotationExternalTraffic: api.AnnotationValueExternalTrafficLocal, + api.BetaAnnotationHealthCheckNodePort: "345", + } + svc.Spec.Type = api.ServiceTypeLoadBalancer + svc.Spec.ClusterIP = "172.16.55.12" + svc.Spec.LoadBalancerIP = "5.6.7.8" + svc.Spec.Ports = addTestPort(svc.Spec.Ports, "foobar2", "UDP", 8677, 30063, 7002) + svc.Spec.Ports = addTestPort(svc.Spec.Ports, "baz", "UDP", 8678, 30064, 7003) + svc.Status.LoadBalancer = api.LoadBalancerStatus{ + Ingress: []api.LoadBalancerIngress{ + {IP: "10.1.2.3"}, + }, + } + }), + } + + for i := range services { + fp.OnServiceAdd(services[i]) + } + result := updateServiceMap(fp.serviceMap, &fp.serviceChanges) + if len(fp.serviceMap) != 8 { + t.Errorf("expected service map length 8, got %v", fp.serviceMap) + } + + // The only-local-loadbalancer ones get added + if len(result.hcServices) != 1 { + t.Errorf("expected 1 healthcheck port, got %v", result.hcServices) + } else { + nsn := makeNSN("somewhere", "only-local-load-balancer") + if port, found := result.hcServices[nsn]; !found || port != 345 { + t.Errorf("expected healthcheck port [%q]=345: got %v", nsn, result.hcServices) + } + } + + if len(result.staleServices) != 0 { + // Services only added, so nothing stale yet + t.Errorf("expected stale UDP services length 0, got %d", len(result.staleServices)) + } + + // Remove some stuff + // oneService is a modification of services[0] with removed first port. + oneService := makeTestService("somewhere-else", "cluster-ip", func(svc *api.Service) { + svc.Spec.Type = api.ServiceTypeClusterIP + svc.Spec.ClusterIP = "172.16.55.4" + svc.Spec.Ports = addTestPort(svc.Spec.Ports, "somethingelse", "UDP", 1235, 5321, 0) + }) + + fp.OnServiceUpdate(services[0], oneService) + fp.OnServiceDelete(services[1]) + fp.OnServiceDelete(services[2]) + fp.OnServiceDelete(services[3]) + + result = updateServiceMap(fp.serviceMap, &fp.serviceChanges) + if len(fp.serviceMap) != 1 { + t.Errorf("expected service map length 1, got %v", fp.serviceMap) + } + + if len(result.hcServices) != 0 { + t.Errorf("expected 0 healthcheck ports, got %v", result.hcServices) + } + + // All services but one were deleted. While you'd expect only the ClusterIPs + // from the three deleted services here, we still have the ClusterIP for + // the not-deleted service, because one of it's ServicePorts was deleted. + expectedStaleUDPServices := []string{"172.16.55.10", "172.16.55.4", "172.16.55.11", "172.16.55.12"} + if len(result.staleServices) != len(expectedStaleUDPServices) { + t.Errorf("expected stale UDP services length %d, got %v", len(expectedStaleUDPServices), result.staleServices.List()) + } + for _, ip := range expectedStaleUDPServices { + if !result.staleServices.Has(ip) { + t.Errorf("expected stale UDP service service %s", ip) + } + } +} + +func TestBuildServiceMapServiceHeadless(t *testing.T) { + ipt := iptablestest.NewFake() + fp := NewFakeProxier(ipt) + + makeServiceMap(fp, + makeTestService("somewhere-else", "headless", func(svc *api.Service) { + svc.Spec.Type = api.ServiceTypeClusterIP + svc.Spec.ClusterIP = api.ClusterIPNone + svc.Spec.Ports = addTestPort(svc.Spec.Ports, "rpc", "UDP", 1234, 0, 0) + }), + makeTestService("somewhere-else", "headless-without-port", func(svc *api.Service) { + svc.Spec.Type = api.ServiceTypeClusterIP + svc.Spec.ClusterIP = api.ClusterIPNone + }), + ) + + // Headless service should be ignored + result := updateServiceMap(fp.serviceMap, &fp.serviceChanges) + if len(fp.serviceMap) != 0 { + t.Errorf("expected service map length 0, got %d", len(fp.serviceMap)) + } + + // No proxied services, so no healthchecks + if len(result.hcServices) != 0 { + t.Errorf("expected healthcheck ports length 0, got %d", len(result.hcServices)) + } + + if len(result.staleServices) != 0 { + t.Errorf("expected stale UDP services length 0, got %d", len(result.staleServices)) + } +} + +func TestBuildServiceMapServiceTypeExternalName(t *testing.T) { + ipt := iptablestest.NewFake() + fp := NewFakeProxier(ipt) + + makeServiceMap(fp, + makeTestService("somewhere-else", "external-name", func(svc *api.Service) { + svc.Spec.Type = api.ServiceTypeExternalName + svc.Spec.ClusterIP = "172.16.55.4" // Should be ignored + svc.Spec.ExternalName = "foo2.bar.com" + svc.Spec.Ports = addTestPort(svc.Spec.Ports, "blah", "UDP", 1235, 5321, 0) + }), + ) + + result := updateServiceMap(fp.serviceMap, &fp.serviceChanges) + if len(fp.serviceMap) != 0 { + t.Errorf("expected service map length 0, got %v", fp.serviceMap) + } + // No proxied services, so no healthchecks + if len(result.hcServices) != 0 { + t.Errorf("expected healthcheck ports length 0, got %v", result.hcServices) + } + if len(result.staleServices) != 0 { + t.Errorf("expected stale UDP services length 0, got %v", result.staleServices) + } +} + +func TestBuildServiceMapServiceUpdate(t *testing.T) { + ipt := iptablestest.NewFake() + fp := NewFakeProxier(ipt) + + servicev1 := makeTestService("somewhere", "some-service", func(svc *api.Service) { + svc.Spec.Type = api.ServiceTypeClusterIP + svc.Spec.ClusterIP = "172.16.55.4" + svc.Spec.Ports = addTestPort(svc.Spec.Ports, "something", "UDP", 1234, 4321, 0) + svc.Spec.Ports = addTestPort(svc.Spec.Ports, "somethingelse", "TCP", 1235, 5321, 0) + }) + servicev2 := makeTestService("somewhere", "some-service", func(svc *api.Service) { + svc.ObjectMeta.Annotations = map[string]string{ + api.BetaAnnotationExternalTraffic: api.AnnotationValueExternalTrafficLocal, + api.BetaAnnotationHealthCheckNodePort: "345", + } + svc.Spec.Type = api.ServiceTypeLoadBalancer + svc.Spec.ClusterIP = "172.16.55.4" + svc.Spec.LoadBalancerIP = "5.6.7.8" + svc.Spec.Ports = addTestPort(svc.Spec.Ports, "something", "UDP", 1234, 4321, 7002) + svc.Spec.Ports = addTestPort(svc.Spec.Ports, "somethingelse", "TCP", 1235, 5321, 7003) + svc.Status.LoadBalancer = api.LoadBalancerStatus{ + Ingress: []api.LoadBalancerIngress{ + {IP: "10.1.2.3"}, + }, + } + }) + + fp.OnServiceAdd(servicev1) + + result := updateServiceMap(fp.serviceMap, &fp.serviceChanges) + if len(fp.serviceMap) != 2 { + t.Errorf("expected service map length 2, got %v", fp.serviceMap) + } + if len(result.hcServices) != 0 { + t.Errorf("expected healthcheck ports length 0, got %v", result.hcServices) + } + if len(result.staleServices) != 0 { + // Services only added, so nothing stale yet + t.Errorf("expected stale UDP services length 0, got %d", len(result.staleServices)) + } + + // Change service to load-balancer + fp.OnServiceUpdate(servicev1, servicev2) + result = updateServiceMap(fp.serviceMap, &fp.serviceChanges) + if len(fp.serviceMap) != 2 { + t.Errorf("expected service map length 2, got %v", fp.serviceMap) + } + if len(result.hcServices) != 1 { + t.Errorf("expected healthcheck ports length 1, got %v", result.hcServices) + } + if len(result.staleServices) != 0 { + t.Errorf("expected stale UDP services length 0, got %v", result.staleServices.List()) + } + + // No change; make sure the service map stays the same and there are + // no health-check changes + fp.OnServiceUpdate(servicev2, servicev2) + result = updateServiceMap(fp.serviceMap, &fp.serviceChanges) + if len(fp.serviceMap) != 2 { + t.Errorf("expected service map length 2, got %v", fp.serviceMap) + } + if len(result.hcServices) != 1 { + t.Errorf("expected healthcheck ports length 1, got %v", result.hcServices) + } + if len(result.staleServices) != 0 { + t.Errorf("expected stale UDP services length 0, got %v", result.staleServices.List()) + } + + // And back to ClusterIP + fp.OnServiceUpdate(servicev2, servicev1) + result = updateServiceMap(fp.serviceMap, &fp.serviceChanges) + if len(fp.serviceMap) != 2 { + t.Errorf("expected service map length 2, got %v", fp.serviceMap) + } + if len(result.hcServices) != 0 { + t.Errorf("expected healthcheck ports length 0, got %v", result.hcServices) + } + if len(result.staleServices) != 0 { + // Services only added, so nothing stale yet + t.Errorf("expected stale UDP services length 0, got %d", len(result.staleServices)) + } +} + +func Test_getLocalIPs(t *testing.T) { + testCases := []struct { + endpointsMap map[proxy.ServicePortName][]*endpointsInfo + expected map[types.NamespacedName]sets.String + }{{ + // Case[0]: nothing + endpointsMap: map[proxy.ServicePortName][]*endpointsInfo{}, + expected: map[types.NamespacedName]sets.String{}, + }, { + // Case[1]: unnamed port + endpointsMap: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", ""): { + {endpoint: "1.1.1.1:11", isLocal: false}, + }, + }, + expected: map[types.NamespacedName]sets.String{}, + }, { + // Case[2]: unnamed port local + endpointsMap: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", ""): { + {endpoint: "1.1.1.1:11", isLocal: true}, + }, + }, + expected: map[types.NamespacedName]sets.String{ + {Namespace: "ns1", Name: "ep1"}: sets.NewString("1.1.1.1"), + }, + }, { + // Case[3]: named local and non-local ports for the same IP. + endpointsMap: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p11"): { + {endpoint: "1.1.1.1:11", isLocal: false}, + {endpoint: "1.1.1.2:11", isLocal: true}, + }, + makeServicePortName("ns1", "ep1", "p12"): { + {endpoint: "1.1.1.1:12", isLocal: false}, + {endpoint: "1.1.1.2:12", isLocal: true}, + }, + }, + expected: map[types.NamespacedName]sets.String{ + {Namespace: "ns1", Name: "ep1"}: sets.NewString("1.1.1.2"), + }, + }, { + // Case[4]: named local and non-local ports for different IPs. + endpointsMap: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p11"): { + {endpoint: "1.1.1.1:11", isLocal: false}, + }, + makeServicePortName("ns2", "ep2", "p22"): { + {endpoint: "2.2.2.2:22", isLocal: true}, + {endpoint: "2.2.2.22:22", isLocal: true}, + }, + makeServicePortName("ns2", "ep2", "p23"): { + {endpoint: "2.2.2.3:23", isLocal: true}, + }, + makeServicePortName("ns4", "ep4", "p44"): { + {endpoint: "4.4.4.4:44", isLocal: true}, + {endpoint: "4.4.4.5:44", isLocal: false}, + }, + makeServicePortName("ns4", "ep4", "p45"): { + {endpoint: "4.4.4.6:45", isLocal: true}, + }, + }, + expected: map[types.NamespacedName]sets.String{ + {Namespace: "ns2", Name: "ep2"}: sets.NewString("2.2.2.2", "2.2.2.22", "2.2.2.3"), + {Namespace: "ns4", Name: "ep4"}: sets.NewString("4.4.4.4", "4.4.4.6"), + }, + }} + + for tci, tc := range testCases { + // outputs + localIPs := getLocalIPs(tc.endpointsMap) + + if !reflect.DeepEqual(localIPs, tc.expected) { + t.Errorf("[%d] expected %#v, got %#v", tci, tc.expected, localIPs) + } + } +} + +// This is a coarse test, but it offers some modicum of confidence as the code is evolved. +func Test_endpointsToEndpointsMap(t *testing.T) { + testCases := []struct { + newEndpoints *api.Endpoints + expected map[proxy.ServicePortName][]*endpointsInfo + }{{ + // Case[0]: nothing + newEndpoints: makeTestEndpoints("ns1", "ep1", func(ept *api.Endpoints) {}), + expected: map[proxy.ServicePortName][]*endpointsInfo{}, + }, { + // Case[1]: no changes, unnamed port + newEndpoints: makeTestEndpoints("ns1", "ep1", func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{ + { + Addresses: []api.EndpointAddress{{ + IP: "1.1.1.1", + }}, + Ports: []api.EndpointPort{{ + Name: "", + Port: 11, + }}, + }, + } + }), + expected: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", ""): { + {endpoint: "1.1.1.1:11", isLocal: false}, + }, + }, + }, { + // Case[2]: no changes, named port + newEndpoints: makeTestEndpoints("ns1", "ep1", func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{ + { + Addresses: []api.EndpointAddress{{ + IP: "1.1.1.1", + }}, + Ports: []api.EndpointPort{{ + Name: "port", + Port: 11, + }}, + }, + } + }), + expected: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "port"): { + {endpoint: "1.1.1.1:11", isLocal: false}, + }, + }, + }, { + // Case[3]: new port + newEndpoints: makeTestEndpoints("ns1", "ep1", func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{ + { + Addresses: []api.EndpointAddress{{ + IP: "1.1.1.1", + }}, + Ports: []api.EndpointPort{{ + Port: 11, + }}, + }, + } + }), + expected: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", ""): { + {endpoint: "1.1.1.1:11", isLocal: false}, + }, + }, + }, { + // Case[4]: remove port + newEndpoints: makeTestEndpoints("ns1", "ep1", func(ept *api.Endpoints) {}), + expected: map[proxy.ServicePortName][]*endpointsInfo{}, + }, { + // Case[5]: new IP and port + newEndpoints: makeTestEndpoints("ns1", "ep1", func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{ + { + Addresses: []api.EndpointAddress{{ + IP: "1.1.1.1", + }, { + IP: "2.2.2.2", + }}, + Ports: []api.EndpointPort{{ + Name: "p1", + Port: 11, + }, { + Name: "p2", + Port: 22, + }}, + }, + } + }), + expected: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p1"): { + {endpoint: "1.1.1.1:11", isLocal: false}, + {endpoint: "2.2.2.2:11", isLocal: false}, + }, + makeServicePortName("ns1", "ep1", "p2"): { + {endpoint: "1.1.1.1:22", isLocal: false}, + {endpoint: "2.2.2.2:22", isLocal: false}, + }, + }, + }, { + // Case[6]: remove IP and port + newEndpoints: makeTestEndpoints("ns1", "ep1", func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{ + { + Addresses: []api.EndpointAddress{{ + IP: "1.1.1.1", + }}, + Ports: []api.EndpointPort{{ + Name: "p1", + Port: 11, + }}, + }, + } + }), + expected: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p1"): { + {endpoint: "1.1.1.1:11", isLocal: false}, + }, + }, + }, { + // Case[7]: rename port + newEndpoints: makeTestEndpoints("ns1", "ep1", func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{ + { + Addresses: []api.EndpointAddress{{ + IP: "1.1.1.1", + }}, + Ports: []api.EndpointPort{{ + Name: "p2", + Port: 11, + }}, + }, + } + }), + expected: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p2"): { + {endpoint: "1.1.1.1:11", isLocal: false}, + }, + }, + }, { + // Case[8]: renumber port + newEndpoints: makeTestEndpoints("ns1", "ep1", func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{ + { + Addresses: []api.EndpointAddress{{ + IP: "1.1.1.1", + }}, + Ports: []api.EndpointPort{{ + Name: "p1", + Port: 22, + }}, + }, + } + }), + expected: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p1"): { + {endpoint: "1.1.1.1:22", isLocal: false}, + }, + }, + }} + + for tci, tc := range testCases { + // outputs + newEndpoints := endpointsToEndpointsMap(tc.newEndpoints, "host") + + if len(newEndpoints) != len(tc.expected) { + t.Errorf("[%d] expected %d new, got %d: %v", tci, len(tc.expected), len(newEndpoints), spew.Sdump(newEndpoints)) + } + for x := range tc.expected { + if len(newEndpoints[x]) != len(tc.expected[x]) { + t.Errorf("[%d] expected %d endpoints for %v, got %d", tci, len(tc.expected[x]), x, len(newEndpoints[x])) + } else { + for i := range newEndpoints[x] { + if *(newEndpoints[x][i]) != *(tc.expected[x][i]) { + t.Errorf("[%d] expected new[%v][%d] to be %v, got %v", tci, x, i, tc.expected[x][i], *(newEndpoints[x][i])) + } + } + } + } + } +} + +func makeTestEndpoints(namespace, name string, eptFunc func(*api.Endpoints)) *api.Endpoints { + ept := &api.Endpoints{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + } + eptFunc(ept) + return ept +} + +func makeEndpointsMap(proxier *Proxier, allEndpoints ...*api.Endpoints) { + for i := range allEndpoints { + proxier.OnEndpointsAdd(allEndpoints[i]) + } + + proxier.mu.Lock() + defer proxier.mu.Unlock() + proxier.endpointsSynced = true +} + +func makeNSN(namespace, name string) types.NamespacedName { + return types.NamespacedName{Namespace: namespace, Name: name} +} + +func makeServicePortName(ns, name, port string) proxy.ServicePortName { + return proxy.ServicePortName{ + NamespacedName: makeNSN(ns, name), + Port: port, + } +} + +func makeServiceMap(proxier *Proxier, allServices ...*api.Service) { + for i := range allServices { + proxier.OnServiceAdd(allServices[i]) + } + + proxier.mu.Lock() + defer proxier.mu.Unlock() + proxier.servicesSynced = true +} + +func compareEndpointsMaps(t *testing.T, tci int, newMap, expected map[proxy.ServicePortName][]*endpointsInfo) { + if len(newMap) != len(expected) { + t.Errorf("[%d] expected %d results, got %d: %v", tci, len(expected), len(newMap), newMap) + } + for x := range expected { + if len(newMap[x]) != len(expected[x]) { + t.Errorf("[%d] expected %d endpoints for %v, got %d", tci, len(expected[x]), x, len(newMap[x])) + } else { + for i := range expected[x] { + if *(newMap[x][i]) != *(expected[x][i]) { + t.Errorf("[%d] expected new[%v][%d] to be %v, got %v", tci, x, i, expected[x][i], newMap[x][i]) + } + } + } + } +} + +func Test_updateEndpointsMap(t *testing.T) { + var nodeName = testHostname + + emptyEndpoint := func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{} + } + unnamedPort := func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{{ + Addresses: []api.EndpointAddress{{ + IP: "1.1.1.1", + }}, + Ports: []api.EndpointPort{{ + Port: 11, + }}, + }} + } + unnamedPortLocal := func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{{ + Addresses: []api.EndpointAddress{{ + IP: "1.1.1.1", + NodeName: &nodeName, + }}, + Ports: []api.EndpointPort{{ + Port: 11, + }}, + }} + } + namedPortLocal := func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{{ + Addresses: []api.EndpointAddress{{ + IP: "1.1.1.1", + NodeName: &nodeName, + }}, + Ports: []api.EndpointPort{{ + Name: "p11", + Port: 11, + }}, + }} + } + namedPort := func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{{ + Addresses: []api.EndpointAddress{{ + IP: "1.1.1.1", + }}, + Ports: []api.EndpointPort{{ + Name: "p11", + Port: 11, + }}, + }} + } + namedPortRenamed := func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{{ + Addresses: []api.EndpointAddress{{ + IP: "1.1.1.1", + }}, + Ports: []api.EndpointPort{{ + Name: "p11-2", + Port: 11, + }}, + }} + } + namedPortRenumbered := func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{{ + Addresses: []api.EndpointAddress{{ + IP: "1.1.1.1", + }}, + Ports: []api.EndpointPort{{ + Name: "p11", + Port: 22, + }}, + }} + } + namedPortsLocalNoLocal := func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{{ + Addresses: []api.EndpointAddress{{ + IP: "1.1.1.1", + }, { + IP: "1.1.1.2", + NodeName: &nodeName, + }}, + Ports: []api.EndpointPort{{ + Name: "p11", + Port: 11, + }, { + Name: "p12", + Port: 12, + }}, + }} + } + multipleSubsets := func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{{ + Addresses: []api.EndpointAddress{{ + IP: "1.1.1.1", + }}, + Ports: []api.EndpointPort{{ + Name: "p11", + Port: 11, + }}, + }, { + Addresses: []api.EndpointAddress{{ + IP: "1.1.1.2", + }}, + Ports: []api.EndpointPort{{ + Name: "p12", + Port: 12, + }}, + }} + } + multipleSubsetsWithLocal := func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{{ + Addresses: []api.EndpointAddress{{ + IP: "1.1.1.1", + }}, + Ports: []api.EndpointPort{{ + Name: "p11", + Port: 11, + }}, + }, { + Addresses: []api.EndpointAddress{{ + IP: "1.1.1.2", + NodeName: &nodeName, + }}, + Ports: []api.EndpointPort{{ + Name: "p12", + Port: 12, + }}, + }} + } + multipleSubsetsMultiplePortsLocal := func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{{ + Addresses: []api.EndpointAddress{{ + IP: "1.1.1.1", + NodeName: &nodeName, + }}, + Ports: []api.EndpointPort{{ + Name: "p11", + Port: 11, + }, { + Name: "p12", + Port: 12, + }}, + }, { + Addresses: []api.EndpointAddress{{ + IP: "1.1.1.3", + }}, + Ports: []api.EndpointPort{{ + Name: "p13", + Port: 13, + }}, + }} + } + multipleSubsetsIPsPorts1 := func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{{ + Addresses: []api.EndpointAddress{{ + IP: "1.1.1.1", + }, { + IP: "1.1.1.2", + NodeName: &nodeName, + }}, + Ports: []api.EndpointPort{{ + Name: "p11", + Port: 11, + }, { + Name: "p12", + Port: 12, + }}, + }, { + Addresses: []api.EndpointAddress{{ + IP: "1.1.1.3", + }, { + IP: "1.1.1.4", + NodeName: &nodeName, + }}, + Ports: []api.EndpointPort{{ + Name: "p13", + Port: 13, + }, { + Name: "p14", + Port: 14, + }}, + }} + } + multipleSubsetsIPsPorts2 := func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{{ + Addresses: []api.EndpointAddress{{ + IP: "2.2.2.1", + }, { + IP: "2.2.2.2", + NodeName: &nodeName, + }}, + Ports: []api.EndpointPort{{ + Name: "p21", + Port: 21, + }, { + Name: "p22", + Port: 22, + }}, + }} + } + complexBefore1 := func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{{ + Addresses: []api.EndpointAddress{{ + IP: "1.1.1.1", + }}, + Ports: []api.EndpointPort{{ + Name: "p11", + Port: 11, + }}, + }} + } + complexBefore2 := func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{{ + Addresses: []api.EndpointAddress{{ + IP: "2.2.2.2", + NodeName: &nodeName, + }, { + IP: "2.2.2.22", + NodeName: &nodeName, + }}, + Ports: []api.EndpointPort{{ + Name: "p22", + Port: 22, + }}, + }, { + Addresses: []api.EndpointAddress{{ + IP: "2.2.2.3", + NodeName: &nodeName, + }}, + Ports: []api.EndpointPort{{ + Name: "p23", + Port: 23, + }}, + }} + } + complexBefore4 := func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{{ + Addresses: []api.EndpointAddress{{ + IP: "4.4.4.4", + NodeName: &nodeName, + }, { + IP: "4.4.4.5", + NodeName: &nodeName, + }}, + Ports: []api.EndpointPort{{ + Name: "p44", + Port: 44, + }}, + }, { + Addresses: []api.EndpointAddress{{ + IP: "4.4.4.6", + NodeName: &nodeName, + }}, + Ports: []api.EndpointPort{{ + Name: "p45", + Port: 45, + }}, + }} + } + complexAfter1 := func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{{ + Addresses: []api.EndpointAddress{{ + IP: "1.1.1.1", + }, { + IP: "1.1.1.11", + }}, + Ports: []api.EndpointPort{{ + Name: "p11", + Port: 11, + }}, + }, { + Addresses: []api.EndpointAddress{{ + IP: "1.1.1.2", + }}, + Ports: []api.EndpointPort{{ + Name: "p12", + Port: 12, + }, { + Name: "p122", + Port: 122, + }}, + }} + } + complexAfter3 := func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{{ + Addresses: []api.EndpointAddress{{ + IP: "3.3.3.3", + }}, + Ports: []api.EndpointPort{{ + Name: "p33", + Port: 33, + }}, + }} + } + complexAfter4 := func(ept *api.Endpoints) { + ept.Subsets = []api.EndpointSubset{{ + Addresses: []api.EndpointAddress{{ + IP: "4.4.4.4", + NodeName: &nodeName, + }}, + Ports: []api.EndpointPort{{ + Name: "p44", + Port: 44, + }}, + }} + } + + testCases := []struct { + // previousEndpoints and currentEndpoints are used to call appropriate + // handlers OnEndpoints* (based on whether corresponding values are nil + // or non-nil) and must be of equal length. + previousEndpoints []*api.Endpoints + currentEndpoints []*api.Endpoints + oldEndpoints map[proxy.ServicePortName][]*endpointsInfo + expectedResult map[proxy.ServicePortName][]*endpointsInfo + expectedStaleEndpoints []endpointServicePair + expectedStaleServiceNames map[proxy.ServicePortName]bool + expectedHealthchecks map[types.NamespacedName]int + }{{ + // Case[0]: nothing + oldEndpoints: map[proxy.ServicePortName][]*endpointsInfo{}, + expectedResult: map[proxy.ServicePortName][]*endpointsInfo{}, + expectedStaleEndpoints: []endpointServicePair{}, + expectedStaleServiceNames: map[proxy.ServicePortName]bool{}, + expectedHealthchecks: map[types.NamespacedName]int{}, + }, { + // Case[1]: no change, unnamed port + previousEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", unnamedPort), + }, + currentEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", unnamedPort), + }, + oldEndpoints: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", ""): { + {endpoint: "1.1.1.1:11", isLocal: false}, + }, + }, + expectedResult: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", ""): { + {endpoint: "1.1.1.1:11", isLocal: false}, + }, + }, + expectedStaleEndpoints: []endpointServicePair{}, + expectedStaleServiceNames: map[proxy.ServicePortName]bool{}, + expectedHealthchecks: map[types.NamespacedName]int{}, + }, { + // Case[2]: no change, named port, local + previousEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", namedPortLocal), + }, + currentEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", namedPortLocal), + }, + oldEndpoints: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p11"): { + {endpoint: "1.1.1.1:11", isLocal: true}, + }, + }, + expectedResult: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p11"): { + {endpoint: "1.1.1.1:11", isLocal: true}, + }, + }, + expectedStaleEndpoints: []endpointServicePair{}, + expectedStaleServiceNames: map[proxy.ServicePortName]bool{}, + expectedHealthchecks: map[types.NamespacedName]int{ + makeNSN("ns1", "ep1"): 1, + }, + }, { + // Case[3]: no change, multiple subsets + previousEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", multipleSubsets), + }, + currentEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", multipleSubsets), + }, + oldEndpoints: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p11"): { + {endpoint: "1.1.1.1:11", isLocal: false}, + }, + makeServicePortName("ns1", "ep1", "p12"): { + {endpoint: "1.1.1.2:12", isLocal: false}, + }, + }, + expectedResult: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p11"): { + {endpoint: "1.1.1.1:11", isLocal: false}, + }, + makeServicePortName("ns1", "ep1", "p12"): { + {endpoint: "1.1.1.2:12", isLocal: false}, + }, + }, + expectedStaleEndpoints: []endpointServicePair{}, + expectedStaleServiceNames: map[proxy.ServicePortName]bool{}, + expectedHealthchecks: map[types.NamespacedName]int{}, + }, { + // Case[4]: no change, multiple subsets, multiple ports, local + previousEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", multipleSubsetsMultiplePortsLocal), + }, + currentEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", multipleSubsetsMultiplePortsLocal), + }, + oldEndpoints: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p11"): { + {endpoint: "1.1.1.1:11", isLocal: true}, + }, + makeServicePortName("ns1", "ep1", "p12"): { + {endpoint: "1.1.1.1:12", isLocal: true}, + }, + makeServicePortName("ns1", "ep1", "p13"): { + {endpoint: "1.1.1.3:13", isLocal: false}, + }, + }, + expectedResult: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p11"): { + {endpoint: "1.1.1.1:11", isLocal: true}, + }, + makeServicePortName("ns1", "ep1", "p12"): { + {endpoint: "1.1.1.1:12", isLocal: true}, + }, + makeServicePortName("ns1", "ep1", "p13"): { + {endpoint: "1.1.1.3:13", isLocal: false}, + }, + }, + expectedStaleEndpoints: []endpointServicePair{}, + expectedStaleServiceNames: map[proxy.ServicePortName]bool{}, + expectedHealthchecks: map[types.NamespacedName]int{ + makeNSN("ns1", "ep1"): 1, + }, + }, { + // Case[5]: no change, multiple endpoints, subsets, IPs, and ports + previousEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", multipleSubsetsIPsPorts1), + makeTestEndpoints("ns2", "ep2", multipleSubsetsIPsPorts2), + }, + currentEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", multipleSubsetsIPsPorts1), + makeTestEndpoints("ns2", "ep2", multipleSubsetsIPsPorts2), + }, + oldEndpoints: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p11"): { + {endpoint: "1.1.1.1:11", isLocal: false}, + {endpoint: "1.1.1.2:11", isLocal: true}, + }, + makeServicePortName("ns1", "ep1", "p12"): { + {endpoint: "1.1.1.1:12", isLocal: false}, + {endpoint: "1.1.1.2:12", isLocal: true}, + }, + makeServicePortName("ns1", "ep1", "p13"): { + {endpoint: "1.1.1.3:13", isLocal: false}, + {endpoint: "1.1.1.4:13", isLocal: true}, + }, + makeServicePortName("ns1", "ep1", "p14"): { + {endpoint: "1.1.1.3:14", isLocal: false}, + {endpoint: "1.1.1.4:14", isLocal: true}, + }, + makeServicePortName("ns2", "ep2", "p21"): { + {endpoint: "2.2.2.1:21", isLocal: false}, + {endpoint: "2.2.2.2:21", isLocal: true}, + }, + makeServicePortName("ns2", "ep2", "p22"): { + {endpoint: "2.2.2.1:22", isLocal: false}, + {endpoint: "2.2.2.2:22", isLocal: true}, + }, + }, + expectedResult: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p11"): { + {endpoint: "1.1.1.1:11", isLocal: false}, + {endpoint: "1.1.1.2:11", isLocal: true}, + }, + makeServicePortName("ns1", "ep1", "p12"): { + {endpoint: "1.1.1.1:12", isLocal: false}, + {endpoint: "1.1.1.2:12", isLocal: true}, + }, + makeServicePortName("ns1", "ep1", "p13"): { + {endpoint: "1.1.1.3:13", isLocal: false}, + {endpoint: "1.1.1.4:13", isLocal: true}, + }, + makeServicePortName("ns1", "ep1", "p14"): { + {endpoint: "1.1.1.3:14", isLocal: false}, + {endpoint: "1.1.1.4:14", isLocal: true}, + }, + makeServicePortName("ns2", "ep2", "p21"): { + {endpoint: "2.2.2.1:21", isLocal: false}, + {endpoint: "2.2.2.2:21", isLocal: true}, + }, + makeServicePortName("ns2", "ep2", "p22"): { + {endpoint: "2.2.2.1:22", isLocal: false}, + {endpoint: "2.2.2.2:22", isLocal: true}, + }, + }, + expectedStaleEndpoints: []endpointServicePair{}, + expectedStaleServiceNames: map[proxy.ServicePortName]bool{}, + expectedHealthchecks: map[types.NamespacedName]int{ + makeNSN("ns1", "ep1"): 2, + makeNSN("ns2", "ep2"): 1, + }, + }, { + // Case[6]: add an Endpoints + previousEndpoints: []*api.Endpoints{ + nil, + }, + currentEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", unnamedPortLocal), + }, + oldEndpoints: map[proxy.ServicePortName][]*endpointsInfo{}, + expectedResult: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", ""): { + {endpoint: "1.1.1.1:11", isLocal: true}, + }, + }, + expectedStaleEndpoints: []endpointServicePair{}, + expectedStaleServiceNames: map[proxy.ServicePortName]bool{ + makeServicePortName("ns1", "ep1", ""): true, + }, + expectedHealthchecks: map[types.NamespacedName]int{ + makeNSN("ns1", "ep1"): 1, + }, + }, { + // Case[7]: remove an Endpoints + previousEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", unnamedPortLocal), + }, + currentEndpoints: []*api.Endpoints{ + nil, + }, + oldEndpoints: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", ""): { + {endpoint: "1.1.1.1:11", isLocal: true}, + }, + }, + expectedResult: map[proxy.ServicePortName][]*endpointsInfo{}, + expectedStaleEndpoints: []endpointServicePair{{ + endpoint: "1.1.1.1:11", + servicePortName: makeServicePortName("ns1", "ep1", ""), + }}, + expectedStaleServiceNames: map[proxy.ServicePortName]bool{}, + expectedHealthchecks: map[types.NamespacedName]int{}, + }, { + // Case[8]: add an IP and port + previousEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", namedPort), + }, + currentEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", namedPortsLocalNoLocal), + }, + oldEndpoints: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p11"): { + {endpoint: "1.1.1.1:11", isLocal: false}, + }, + }, + expectedResult: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p11"): { + {endpoint: "1.1.1.1:11", isLocal: false}, + {endpoint: "1.1.1.2:11", isLocal: true}, + }, + makeServicePortName("ns1", "ep1", "p12"): { + {endpoint: "1.1.1.1:12", isLocal: false}, + {endpoint: "1.1.1.2:12", isLocal: true}, + }, + }, + expectedStaleEndpoints: []endpointServicePair{}, + expectedStaleServiceNames: map[proxy.ServicePortName]bool{ + makeServicePortName("ns1", "ep1", "p12"): true, + }, + expectedHealthchecks: map[types.NamespacedName]int{ + makeNSN("ns1", "ep1"): 1, + }, + }, { + // Case[9]: remove an IP and port + previousEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", namedPortsLocalNoLocal), + }, + currentEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", namedPort), + }, + oldEndpoints: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p11"): { + {endpoint: "1.1.1.1:11", isLocal: false}, + {endpoint: "1.1.1.2:11", isLocal: true}, + }, + makeServicePortName("ns1", "ep1", "p12"): { + {endpoint: "1.1.1.1:12", isLocal: false}, + {endpoint: "1.1.1.2:12", isLocal: true}, + }, + }, + expectedResult: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p11"): { + {endpoint: "1.1.1.1:11", isLocal: false}, + }, + }, + expectedStaleEndpoints: []endpointServicePair{{ + endpoint: "1.1.1.2:11", + servicePortName: makeServicePortName("ns1", "ep1", "p11"), + }, { + endpoint: "1.1.1.1:12", + servicePortName: makeServicePortName("ns1", "ep1", "p12"), + }, { + endpoint: "1.1.1.2:12", + servicePortName: makeServicePortName("ns1", "ep1", "p12"), + }}, + expectedStaleServiceNames: map[proxy.ServicePortName]bool{}, + expectedHealthchecks: map[types.NamespacedName]int{}, + }, { + // Case[10]: add a subset + previousEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", namedPort), + }, + currentEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", multipleSubsetsWithLocal), + }, + oldEndpoints: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p11"): { + {endpoint: "1.1.1.1:11", isLocal: false}, + }, + }, + expectedResult: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p11"): { + {endpoint: "1.1.1.1:11", isLocal: false}, + }, + makeServicePortName("ns1", "ep1", "p12"): { + {endpoint: "1.1.1.2:12", isLocal: true}, + }, + }, + expectedStaleEndpoints: []endpointServicePair{}, + expectedStaleServiceNames: map[proxy.ServicePortName]bool{ + makeServicePortName("ns1", "ep1", "p12"): true, + }, + expectedHealthchecks: map[types.NamespacedName]int{ + makeNSN("ns1", "ep1"): 1, + }, + }, { + // Case[11]: remove a subset + previousEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", multipleSubsets), + }, + currentEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", namedPort), + }, + oldEndpoints: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p11"): { + {endpoint: "1.1.1.1:11", isLocal: false}, + }, + makeServicePortName("ns1", "ep1", "p12"): { + {endpoint: "1.1.1.2:12", isLocal: false}, + }, + }, + expectedResult: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p11"): { + {endpoint: "1.1.1.1:11", isLocal: false}, + }, + }, + expectedStaleEndpoints: []endpointServicePair{{ + endpoint: "1.1.1.2:12", + servicePortName: makeServicePortName("ns1", "ep1", "p12"), + }}, + expectedStaleServiceNames: map[proxy.ServicePortName]bool{}, + expectedHealthchecks: map[types.NamespacedName]int{}, + }, { + // Case[12]: rename a port + previousEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", namedPort), + }, + currentEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", namedPortRenamed), + }, + oldEndpoints: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p11"): { + {endpoint: "1.1.1.1:11", isLocal: false}, + }, + }, + expectedResult: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p11-2"): { + {endpoint: "1.1.1.1:11", isLocal: false}, + }, + }, + expectedStaleEndpoints: []endpointServicePair{{ + endpoint: "1.1.1.1:11", + servicePortName: makeServicePortName("ns1", "ep1", "p11"), + }}, + expectedStaleServiceNames: map[proxy.ServicePortName]bool{ + makeServicePortName("ns1", "ep1", "p11-2"): true, + }, + expectedHealthchecks: map[types.NamespacedName]int{}, + }, { + // Case[13]: renumber a port + previousEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", namedPort), + }, + currentEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", namedPortRenumbered), + }, + oldEndpoints: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p11"): { + {endpoint: "1.1.1.1:11", isLocal: false}, + }, + }, + expectedResult: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p11"): { + {endpoint: "1.1.1.1:22", isLocal: false}, + }, + }, + expectedStaleEndpoints: []endpointServicePair{{ + endpoint: "1.1.1.1:11", + servicePortName: makeServicePortName("ns1", "ep1", "p11"), + }}, + expectedStaleServiceNames: map[proxy.ServicePortName]bool{}, + expectedHealthchecks: map[types.NamespacedName]int{}, + }, { + // Case[14]: complex add and remove + previousEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", complexBefore1), + makeTestEndpoints("ns2", "ep2", complexBefore2), + nil, + makeTestEndpoints("ns4", "ep4", complexBefore4), + }, + currentEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", complexAfter1), + nil, + makeTestEndpoints("ns3", "ep3", complexAfter3), + makeTestEndpoints("ns4", "ep4", complexAfter4), + }, + oldEndpoints: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p11"): { + {endpoint: "1.1.1.1:11", isLocal: false}, + }, + makeServicePortName("ns2", "ep2", "p22"): { + {endpoint: "2.2.2.2:22", isLocal: true}, + {endpoint: "2.2.2.22:22", isLocal: true}, + }, + makeServicePortName("ns2", "ep2", "p23"): { + {endpoint: "2.2.2.3:23", isLocal: true}, + }, + makeServicePortName("ns4", "ep4", "p44"): { + {endpoint: "4.4.4.4:44", isLocal: true}, + {endpoint: "4.4.4.5:44", isLocal: true}, + }, + makeServicePortName("ns4", "ep4", "p45"): { + {endpoint: "4.4.4.6:45", isLocal: true}, + }, + }, + expectedResult: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", "p11"): { + {endpoint: "1.1.1.1:11", isLocal: false}, + {endpoint: "1.1.1.11:11", isLocal: false}, + }, + makeServicePortName("ns1", "ep1", "p12"): { + {endpoint: "1.1.1.2:12", isLocal: false}, + }, + makeServicePortName("ns1", "ep1", "p122"): { + {endpoint: "1.1.1.2:122", isLocal: false}, + }, + makeServicePortName("ns3", "ep3", "p33"): { + {endpoint: "3.3.3.3:33", isLocal: false}, + }, + makeServicePortName("ns4", "ep4", "p44"): { + {endpoint: "4.4.4.4:44", isLocal: true}, + }, + }, + expectedStaleEndpoints: []endpointServicePair{{ + endpoint: "2.2.2.2:22", + servicePortName: makeServicePortName("ns2", "ep2", "p22"), + }, { + endpoint: "2.2.2.22:22", + servicePortName: makeServicePortName("ns2", "ep2", "p22"), + }, { + endpoint: "2.2.2.3:23", + servicePortName: makeServicePortName("ns2", "ep2", "p23"), + }, { + endpoint: "4.4.4.5:44", + servicePortName: makeServicePortName("ns4", "ep4", "p44"), + }, { + endpoint: "4.4.4.6:45", + servicePortName: makeServicePortName("ns4", "ep4", "p45"), + }}, + expectedStaleServiceNames: map[proxy.ServicePortName]bool{ + makeServicePortName("ns1", "ep1", "p12"): true, + makeServicePortName("ns1", "ep1", "p122"): true, + makeServicePortName("ns3", "ep3", "p33"): true, + }, + expectedHealthchecks: map[types.NamespacedName]int{ + makeNSN("ns4", "ep4"): 1, + }, + }, { + // Case[15]: change from 0 endpoint address to 1 unnamed port + previousEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", emptyEndpoint), + }, + currentEndpoints: []*api.Endpoints{ + makeTestEndpoints("ns1", "ep1", unnamedPort), + }, + oldEndpoints: map[proxy.ServicePortName][]*endpointsInfo{}, + expectedResult: map[proxy.ServicePortName][]*endpointsInfo{ + makeServicePortName("ns1", "ep1", ""): { + {endpoint: "1.1.1.1:11", isLocal: false}, + }, + }, + expectedStaleEndpoints: []endpointServicePair{}, + expectedStaleServiceNames: map[proxy.ServicePortName]bool{ + makeServicePortName("ns1", "ep1", ""): true, + }, + expectedHealthchecks: map[types.NamespacedName]int{}, + }, + } + + for tci, tc := range testCases { + ipt := iptablestest.NewFake() + fp := NewFakeProxier(ipt) + fp.hostname = nodeName + + // First check that after adding all previous versions of endpoints, + // the fp.oldEndpoints is as we expect. + for i := range tc.previousEndpoints { + if tc.previousEndpoints[i] != nil { + fp.OnEndpointsAdd(tc.previousEndpoints[i]) + } + } + updateEndpointsMap(fp.endpointsMap, &fp.endpointsChanges, fp.hostname) + compareEndpointsMaps(t, tci, fp.endpointsMap, tc.oldEndpoints) + + // Now let's call appropriate handlers to get to state we want to be. + if len(tc.previousEndpoints) != len(tc.currentEndpoints) { + t.Fatalf("[%d] different lengths of previous and current endpoints", tci) + continue + } + + for i := range tc.previousEndpoints { + prev, curr := tc.previousEndpoints[i], tc.currentEndpoints[i] + switch { + case prev == nil: + fp.OnEndpointsAdd(curr) + case curr == nil: + fp.OnEndpointsDelete(prev) + default: + fp.OnEndpointsUpdate(prev, curr) + } + } + result := updateEndpointsMap(fp.endpointsMap, &fp.endpointsChanges, fp.hostname) + newMap := fp.endpointsMap + compareEndpointsMaps(t, tci, newMap, tc.expectedResult) + if len(result.staleEndpoints) != len(tc.expectedStaleEndpoints) { + t.Errorf("[%d] expected %d staleEndpoints, got %d: %v", tci, len(tc.expectedStaleEndpoints), len(result.staleEndpoints), result.staleEndpoints) + } + for _, x := range tc.expectedStaleEndpoints { + if result.staleEndpoints[x] != true { + t.Errorf("[%d] expected staleEndpoints[%v], but didn't find it: %v", tci, x, result.staleEndpoints) + } + } + if len(result.staleServiceNames) != len(tc.expectedStaleServiceNames) { + t.Errorf("[%d] expected %d staleServiceNames, got %d: %v", tci, len(tc.expectedStaleServiceNames), len(result.staleServiceNames), result.staleServiceNames) + } + for svcName := range tc.expectedStaleServiceNames { + if result.staleServiceNames[svcName] != true { + t.Errorf("[%d] expected staleServiceNames[%v], but didn't find it: %v", tci, svcName, result.staleServiceNames) + } + } + if !reflect.DeepEqual(result.hcEndpoints, tc.expectedHealthchecks) { + t.Errorf("[%d] expected healthchecks %v, got %v", tci, tc.expectedHealthchecks, result.hcEndpoints) + } + } +} + +// TODO(thockin): add *more* tests for syncProxyRules() or break it down further and test the pieces.