// +build windows /* Copyright 2017 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package winkernel import ( "encoding/json" "fmt" "net" "os" "strconv" "strings" "sync" "sync/atomic" "time" "github.com/Microsoft/hcsshim" "github.com/Microsoft/hcsshim/hcn" "github.com/davecgh/go-spew/spew" v1 "k8s.io/api/core/v1" discovery "k8s.io/api/discovery/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" apiutil "k8s.io/apimachinery/pkg/util/net" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/version" "k8s.io/apimachinery/pkg/util/wait" utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/client-go/tools/events" "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/apis/core/v1/helper" kubefeatures "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/proxy" "k8s.io/kubernetes/pkg/proxy/apis/config" proxyconfig "k8s.io/kubernetes/pkg/proxy/config" "k8s.io/kubernetes/pkg/proxy/healthcheck" "k8s.io/kubernetes/pkg/proxy/metaproxier" "k8s.io/kubernetes/pkg/proxy/metrics" "k8s.io/kubernetes/pkg/util/async" utilnet "k8s.io/utils/net" ) // KernelCompatTester tests whether the required kernel capabilities are // present to run the windows kernel proxier. type KernelCompatTester interface { IsCompatible() error } // CanUseWinKernelProxier returns true if we should use the Kernel Proxier // instead of the "classic" userspace Proxier. This is determined by checking // the windows kernel version and for the existence of kernel features. func CanUseWinKernelProxier(kcompat KernelCompatTester) (bool, error) { // Check that the kernel supports what we need. if err := kcompat.IsCompatible(); err != nil { return false, err } return true, nil } type WindowsKernelCompatTester struct{} // IsCompatible returns true if winkernel can support this mode of proxy func (lkct WindowsKernelCompatTester) IsCompatible() error { _, err := hcsshim.HNSListPolicyListRequest() if err != nil { return fmt.Errorf("Windows kernel is not compatible for Kernel mode") } return nil } type externalIPInfo struct { ip string hnsID string } type loadBalancerIngressInfo struct { ip string hnsID string } type loadBalancerInfo struct { hnsID string } type loadBalancerFlags struct { isILB bool isDSR bool localRoutedVIP bool useMUX bool preserveDIP bool sessionAffinity bool isIPv6 bool } // internal struct for string service information type serviceInfo struct { *proxy.BaseServiceInfo targetPort int externalIPs []*externalIPInfo loadBalancerIngressIPs []*loadBalancerIngressInfo hnsID string nodePorthnsID string policyApplied bool remoteEndpoint *endpointsInfo hns HostNetworkService preserveDIP bool localTrafficDSR bool } type hnsNetworkInfo struct { name string id string networkType string remoteSubnets []*remoteSubnetInfo } type remoteSubnetInfo struct { destinationPrefix string isolationID uint16 providerAddress string drMacAddress string } const NETWORK_TYPE_OVERLAY = "overlay" func newHostNetworkService() (HostNetworkService, hcn.SupportedFeatures) { var hns HostNetworkService hns = hnsV1{} supportedFeatures := hcn.GetSupportedFeatures() if supportedFeatures.Api.V2 { hns = hnsV2{} } return hns, supportedFeatures } func getNetworkName(hnsNetworkName string) (string, error) { if len(hnsNetworkName) == 0 { klog.V(3).InfoS("network-name flag not set. Checking environment variable") hnsNetworkName = os.Getenv("KUBE_NETWORK") if len(hnsNetworkName) == 0 { return "", fmt.Errorf("Environment variable KUBE_NETWORK and network-flag not initialized") } } return hnsNetworkName, nil } func getNetworkInfo(hns HostNetworkService, hnsNetworkName string) (*hnsNetworkInfo, error) { hnsNetworkInfo, err := hns.getNetworkByName(hnsNetworkName) for err != nil { klog.ErrorS(err, "Unable to find HNS Network specified. Please check network name and CNI deployment", "hnsNetworkName", hnsNetworkName) time.Sleep(1 * time.Second) hnsNetworkInfo, err = hns.getNetworkByName(hnsNetworkName) } return hnsNetworkInfo, err } func isOverlay(hnsNetworkInfo *hnsNetworkInfo) bool { return strings.EqualFold(hnsNetworkInfo.networkType, NETWORK_TYPE_OVERLAY) } // StackCompatTester tests whether the required kernel and network are dualstack capable type StackCompatTester interface { DualStackCompatible(networkName string) bool } type DualStackCompatTester struct{} func (t DualStackCompatTester) DualStackCompatible(networkName string) bool { dualStackFeatureEnabled := utilfeature.DefaultFeatureGate.Enabled(kubefeatures.IPv6DualStack) if !dualStackFeatureEnabled { return false } globals, err := hcn.GetGlobals() if err != nil { klog.ErrorS(err, "Unable to determine networking stack version. Falling back to single-stack") return false } if !kernelSupportsDualstack(globals.Version) { klog.InfoS("This version of Windows does not support dual-stack. Falling back to single-stack") return false } // check if network is using overlay hns, _ := newHostNetworkService() networkName, err = getNetworkName(networkName) if err != nil { klog.ErrorS(err, "unable to determine dual-stack status %v. Falling back to single-stack") return false } networkInfo, err := getNetworkInfo(hns, networkName) if err != nil { klog.ErrorS(err, "unable to determine dual-stack status %v. Falling back to single-stack") return false } if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.WinOverlay) && isOverlay(networkInfo) { // Overlay (VXLAN) networks on Windows do not support dual-stack networking today klog.InfoS("Winoverlay does not support dual-stack. Falling back to single-stack") return false } return true } // The hcsshim version logic has a bug that did not calculate the versioning of DualStack correctly. // DualStack is supported in WS 2004+ (10.0.19041+) where HCN component version is 11.10+ // https://github.com/microsoft/hcsshim/pull/1003#issuecomment-827930358 func kernelSupportsDualstack(currentVersion hcn.Version) bool { hnsVersion := fmt.Sprintf("%d.%d.0", currentVersion.Major, currentVersion.Minor) v, err := version.ParseSemantic(hnsVersion) if err != nil { return false } return v.AtLeast(version.MustParseSemantic("11.10.0")) } func Log(v interface{}, message string, level klog.Level) { klog.V(level).InfoS("%s", message, "spewConfig", spewSdump(v)) } func LogJson(interfaceName string, v interface{}, message string, level klog.Level) { jsonString, err := json.Marshal(v) if err == nil { klog.V(level).InfoS("%s", message, interfaceName, string(jsonString)) } } func spewSdump(v interface{}) string { scs := spew.NewDefaultConfig() scs.DisableMethods = true return scs.Sdump(v) } // internal struct for endpoints information type endpointsInfo struct { ip string port uint16 isLocal bool macAddress string hnsID string refCount *uint16 providerAddress string hns HostNetworkService // conditions ready bool serving bool terminating bool } // String is part of proxy.Endpoint interface. func (info *endpointsInfo) String() string { return net.JoinHostPort(info.ip, strconv.Itoa(int(info.port))) } // GetIsLocal is part of proxy.Endpoint interface. func (info *endpointsInfo) GetIsLocal() bool { return info.isLocal } // IsReady returns true if an endpoint is ready and not terminating. func (info *endpointsInfo) IsReady() bool { return info.ready } // IsServing returns true if an endpoint is ready, regardless of it's terminating state. func (info *endpointsInfo) IsServing() bool { return info.serving } // IsTerminating returns true if an endpoint is terminating. func (info *endpointsInfo) IsTerminating() bool { return info.terminating } // GetZoneHint returns the zone hint for the endpoint. func (info *endpointsInfo) GetZoneHints() sets.String { return sets.String{} } // IP returns just the IP part of the endpoint, it's a part of proxy.Endpoint interface. func (info *endpointsInfo) IP() string { return info.ip } // Port returns just the Port part of the endpoint. func (info *endpointsInfo) Port() (int, error) { return int(info.port), nil } // Equal is part of proxy.Endpoint interface. func (info *endpointsInfo) Equal(other proxy.Endpoint) bool { return info.String() == other.String() && info.GetIsLocal() == other.GetIsLocal() } // GetNodeName returns the NodeName for this endpoint. func (info *endpointsInfo) GetNodeName() string { return "" } // GetZone returns the Zone for this endpoint. func (info *endpointsInfo) GetZone() string { return "" } //Uses mac prefix and IPv4 address to return a mac address //This ensures mac addresses are unique for proper load balancing //There is a possibility of MAC collisions but this Mac address is used for remote endpoints only //and not sent on the wire. func conjureMac(macPrefix string, ip net.IP) string { if ip4 := ip.To4(); ip4 != nil { a, b, c, d := ip4[0], ip4[1], ip4[2], ip4[3] return fmt.Sprintf("%v-%02x-%02x-%02x-%02x", macPrefix, a, b, c, d) } else if ip6 := ip.To16(); ip6 != nil { a, b, c, d := ip6[15], ip6[14], ip6[13], ip6[12] return fmt.Sprintf("%v-%02x-%02x-%02x-%02x", macPrefix, a, b, c, d) } return "02-11-22-33-44-55" } func (proxier *Proxier) endpointsMapChange(oldEndpointsMap, newEndpointsMap proxy.EndpointsMap) { for svcPortName := range oldEndpointsMap { proxier.onEndpointsMapChange(&svcPortName) } for svcPortName := range newEndpointsMap { proxier.onEndpointsMapChange(&svcPortName) } } func (proxier *Proxier) onEndpointsMapChange(svcPortName *proxy.ServicePortName) { svc, exists := proxier.serviceMap[*svcPortName] if exists { svcInfo, ok := svc.(*serviceInfo) if !ok { klog.ErrorS(nil, "Failed to cast serviceInfo", "svcPortName", svcPortName.String()) return } klog.V(3).InfoS("Endpoints are modified. Service is stale", "svcPortName", svcPortName.String()) svcInfo.cleanupAllPolicies(proxier.endpointsMap[*svcPortName]) } else { // If no service exists, just cleanup the remote endpoints klog.V(3).InfoS("Endpoints are orphaned. Cleaning up") // Cleanup Endpoints references epInfos, exists := proxier.endpointsMap[*svcPortName] if exists { // Cleanup Endpoints references for _, ep := range epInfos { epInfo, ok := ep.(*endpointsInfo) if ok { epInfo.Cleanup() } } } } } func (proxier *Proxier) serviceMapChange(previous, current proxy.ServiceMap) { for svcPortName := range current { proxier.onServiceMapChange(&svcPortName) } for svcPortName := range previous { if _, ok := current[svcPortName]; ok { continue } proxier.onServiceMapChange(&svcPortName) } } func (proxier *Proxier) onServiceMapChange(svcPortName *proxy.ServicePortName) { svc, exists := proxier.serviceMap[*svcPortName] if exists { svcInfo, ok := svc.(*serviceInfo) if !ok { klog.ErrorS(nil, "Failed to cast serviceInfo", "svcPortName", svcPortName.String()) return } klog.V(3).InfoS("Updating existing service port", "svcPortName", svcPortName.String(), "clusterIP", svcInfo.ClusterIP(), "port", svcInfo.Port(), "protocol", svcInfo.Protocol()) svcInfo.cleanupAllPolicies(proxier.endpointsMap[*svcPortName]) } } // returns a new proxy.Endpoint which abstracts a endpointsInfo func (proxier *Proxier) newEndpointInfo(baseInfo *proxy.BaseEndpointInfo) proxy.Endpoint { portNumber, err := baseInfo.Port() if err != nil { portNumber = 0 } info := &endpointsInfo{ ip: baseInfo.IP(), port: uint16(portNumber), isLocal: baseInfo.GetIsLocal(), macAddress: conjureMac("02-11", net.ParseIP(baseInfo.IP())), refCount: new(uint16), hnsID: "", hns: proxier.hns, ready: baseInfo.Ready, serving: baseInfo.Serving, terminating: baseInfo.Terminating, } return info } func newSourceVIP(hns HostNetworkService, network string, ip string, mac string, providerAddress string) (*endpointsInfo, error) { hnsEndpoint := &endpointsInfo{ ip: ip, isLocal: true, macAddress: mac, providerAddress: providerAddress, ready: true, serving: true, terminating: false, } ep, err := hns.createEndpoint(hnsEndpoint, network) return ep, err } func (ep *endpointsInfo) Cleanup() { Log(ep, "Endpoint Cleanup", 3) if !ep.GetIsLocal() && ep.refCount != nil { *ep.refCount-- // Remove the remote hns endpoint, if no service is referring it // Never delete a Local Endpoint. Local Endpoints are already created by other entities. // Remove only remote endpoints created by this service if *ep.refCount <= 0 && !ep.GetIsLocal() { klog.V(4).InfoS("Removing endpoints, since no one is referencing it", "endpoint", ep.String()) err := ep.hns.deleteEndpoint(ep.hnsID) if err == nil { ep.hnsID = "" } else { klog.ErrorS(err, "Endpoint deletion failed", "ip", ep.IP()) } } ep.refCount = nil } } func (refCountMap endPointsReferenceCountMap) getRefCount(hnsID string) *uint16 { refCount, exists := refCountMap[hnsID] if !exists { refCountMap[hnsID] = new(uint16) refCount = refCountMap[hnsID] } return refCount } // returns a new proxy.ServicePort which abstracts a serviceInfo func (proxier *Proxier) newServiceInfo(port *v1.ServicePort, service *v1.Service, baseInfo *proxy.BaseServiceInfo) proxy.ServicePort { info := &serviceInfo{BaseServiceInfo: baseInfo} preserveDIP := service.Annotations["preserve-destination"] == "true" localTrafficDSR := service.Spec.ExternalTrafficPolicy == v1.ServiceExternalTrafficPolicyTypeLocal err := hcn.DSRSupported() if err != nil { preserveDIP = false localTrafficDSR = false } // targetPort is zero if it is specified as a name in port.TargetPort. // Its real value would be got later from endpoints. targetPort := 0 if port.TargetPort.Type == intstr.Int { targetPort = port.TargetPort.IntValue() } info.preserveDIP = preserveDIP info.targetPort = targetPort info.hns = proxier.hns info.localTrafficDSR = localTrafficDSR for _, eip := range service.Spec.ExternalIPs { info.externalIPs = append(info.externalIPs, &externalIPInfo{ip: eip}) } for _, ingress := range service.Status.LoadBalancer.Ingress { if net.ParseIP(ingress.IP) != nil { info.loadBalancerIngressIPs = append(info.loadBalancerIngressIPs, &loadBalancerIngressInfo{ip: ingress.IP}) } } return info } func (network hnsNetworkInfo) findRemoteSubnetProviderAddress(ip string) string { var providerAddress string for _, rs := range network.remoteSubnets { _, ipNet, err := net.ParseCIDR(rs.destinationPrefix) if err != nil { klog.ErrorS(err, "Failed to parse CIDR") } if ipNet.Contains(net.ParseIP(ip)) { providerAddress = rs.providerAddress } if ip == rs.providerAddress { providerAddress = rs.providerAddress } } return providerAddress } type endPointsReferenceCountMap map[string]*uint16 // Proxier is an hns based proxy for connections between a localhost:lport // and services that provide the actual backends. type Proxier struct { // TODO(imroc): implement node handler for winkernel proxier. proxyconfig.NoopNodeHandler // endpointsChanges and serviceChanges contains all changes to endpoints and // services that happened since policies were synced. For a single object, // changes are accumulated, i.e. previous is state from before all of them, // current is state after applying all of those. endpointsChanges *proxy.EndpointChangeTracker serviceChanges *proxy.ServiceChangeTracker endPointsRefCount endPointsReferenceCountMap mu sync.Mutex // protects the following fields serviceMap proxy.ServiceMap endpointsMap proxy.EndpointsMap // endpointSlicesSynced and servicesSynced are set to true when corresponding // objects are synced after startup. This is used to avoid updating hns policies // with some partial data after kube-proxy restart. endpointSlicesSynced bool servicesSynced bool isIPv6Mode bool initialized int32 syncRunner *async.BoundedFrequencyRunner // governs calls to syncProxyRules // These are effectively const and do not need the mutex to be held. masqueradeAll bool masqueradeMark string clusterCIDR string hostname string nodeIP net.IP recorder events.EventRecorder serviceHealthServer healthcheck.ServiceHealthServer healthzServer healthcheck.ProxierHealthUpdater // Since converting probabilities (floats) to strings is expensive // and we are using only probabilities in the format of 1/n, we are // precomputing some number of those and cache for future reuse. precomputedProbabilities []string hns HostNetworkService network hnsNetworkInfo sourceVip string hostMac string isDSR bool supportedFeatures hcn.SupportedFeatures } type localPort struct { desc string ip string port int protocol string } func (lp *localPort) String() string { return fmt.Sprintf("%q (%s:%d/%s)", lp.desc, lp.ip, lp.port, lp.protocol) } func Enum(p v1.Protocol) uint16 { if p == v1.ProtocolTCP { return 6 } if p == v1.ProtocolUDP { return 17 } if p == v1.ProtocolSCTP { return 132 } return 0 } type closeable interface { Close() error } // Proxier implements proxy.Provider var _ proxy.Provider = &Proxier{} // NewProxier returns a new Proxier func NewProxier( syncPeriod time.Duration, minSyncPeriod time.Duration, masqueradeAll bool, masqueradeBit int, clusterCIDR string, hostname string, nodeIP net.IP, recorder events.EventRecorder, healthzServer healthcheck.ProxierHealthUpdater, config config.KubeProxyWinkernelConfiguration, ) (*Proxier, error) { masqueradeValue := 1 << uint(masqueradeBit) masqueradeMark := fmt.Sprintf("%#08x/%#08x", masqueradeValue, masqueradeValue) if nodeIP == nil { klog.InfoS("invalid nodeIP, initializing kube-proxy with 127.0.0.1 as nodeIP") nodeIP = net.ParseIP("127.0.0.1") } if len(clusterCIDR) == 0 { klog.InfoS("clusterCIDR not specified, unable to distinguish between internal and external traffic") } serviceHealthServer := healthcheck.NewServiceHealthServer(hostname, recorder) hns, supportedFeatures := newHostNetworkService() hnsNetworkName, err := getNetworkName(config.NetworkName) if err != nil { return nil, err } klog.V(3).InfoS("Cleaning up old HNS policy lists") deleteAllHnsLoadBalancerPolicy() // Get HNS network information hnsNetworkInfo, err := getNetworkInfo(hns, hnsNetworkName) if err != nil { return nil, err } // Network could have been detected before Remote Subnet Routes are applied or ManagementIP is updated // Sleep and update the network to include new information if isOverlay(hnsNetworkInfo) { time.Sleep(10 * time.Second) hnsNetworkInfo, err = hns.getNetworkByName(hnsNetworkName) if err != nil { return nil, fmt.Errorf("Could not find HNS network %s", hnsNetworkName) } } klog.V(1).InfoS("Hns Network loaded", "hnsNetworkInfo", hnsNetworkInfo) isDSR := config.EnableDSR if isDSR && !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.WinDSR) { return nil, fmt.Errorf("WinDSR feature gate not enabled") } err = hcn.DSRSupported() if isDSR && err != nil { return nil, err } var sourceVip string var hostMac string if isOverlay(hnsNetworkInfo) { if !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.WinOverlay) { return nil, fmt.Errorf("WinOverlay feature gate not enabled") } err = hcn.RemoteSubnetSupported() if err != nil { return nil, err } sourceVip = config.SourceVip if len(sourceVip) == 0 { return nil, fmt.Errorf("source-vip flag not set") } if nodeIP.IsUnspecified() { // attempt to get the correct ip address klog.V(2).InfoS("node ip was unspecified. Attempting to find node ip") nodeIP, err = apiutil.ResolveBindAddress(nodeIP) if err != nil { klog.InfoS("failed to find an ip. You may need set the --bind-address flag", "err", err) } } interfaces, _ := net.Interfaces() //TODO create interfaces for _, inter := range interfaces { addresses, _ := inter.Addrs() for _, addr := range addresses { addrIP, _, _ := net.ParseCIDR(addr.String()) if addrIP.String() == nodeIP.String() { klog.V(2).InfoS("record Host MAC address", "addr", inter.HardwareAddr.String()) hostMac = inter.HardwareAddr.String() } } } if len(hostMac) == 0 { return nil, fmt.Errorf("Could not find host mac address for %s", nodeIP) } } isIPv6 := utilnet.IsIPv6(nodeIP) proxier := &Proxier{ endPointsRefCount: make(endPointsReferenceCountMap), serviceMap: make(proxy.ServiceMap), endpointsMap: make(proxy.EndpointsMap), masqueradeAll: masqueradeAll, masqueradeMark: masqueradeMark, clusterCIDR: clusterCIDR, hostname: hostname, nodeIP: nodeIP, recorder: recorder, serviceHealthServer: serviceHealthServer, healthzServer: healthzServer, hns: hns, network: *hnsNetworkInfo, sourceVip: sourceVip, hostMac: hostMac, isDSR: isDSR, supportedFeatures: supportedFeatures, isIPv6Mode: isIPv6, } ipFamily := v1.IPv4Protocol if isIPv6 { ipFamily = v1.IPv6Protocol } serviceChanges := proxy.NewServiceChangeTracker(proxier.newServiceInfo, ipFamily, recorder, proxier.serviceMapChange) endPointChangeTracker := proxy.NewEndpointChangeTracker(hostname, proxier.newEndpointInfo, ipFamily, recorder, proxier.endpointsMapChange) proxier.endpointsChanges = endPointChangeTracker proxier.serviceChanges = serviceChanges burstSyncs := 2 klog.V(3).InfoS("record sync param", "minSyncPeriod", minSyncPeriod, "syncPeriod", syncPeriod, "burstSyncs", burstSyncs) proxier.syncRunner = async.NewBoundedFrequencyRunner("sync-runner", proxier.syncProxyRules, minSyncPeriod, syncPeriod, burstSyncs) return proxier, nil } func NewDualStackProxier( syncPeriod time.Duration, minSyncPeriod time.Duration, masqueradeAll bool, masqueradeBit int, clusterCIDR string, hostname string, nodeIP [2]net.IP, recorder events.EventRecorder, healthzServer healthcheck.ProxierHealthUpdater, config config.KubeProxyWinkernelConfiguration, ) (proxy.Provider, error) { // Create an ipv4 instance of the single-stack proxier ipv4Proxier, err := NewProxier(syncPeriod, minSyncPeriod, masqueradeAll, masqueradeBit, clusterCIDR, hostname, nodeIP[0], recorder, healthzServer, config) if err != nil { return nil, fmt.Errorf("unable to create ipv4 proxier: %v, hostname: %s, clusterCIDR : %s, nodeIP:%v", err, hostname, clusterCIDR, nodeIP[0]) } ipv6Proxier, err := NewProxier(syncPeriod, minSyncPeriod, masqueradeAll, masqueradeBit, clusterCIDR, hostname, nodeIP[1], recorder, healthzServer, config) if err != nil { return nil, fmt.Errorf("unable to create ipv6 proxier: %v, hostname: %s, clusterCIDR : %s, nodeIP:%v", err, hostname, clusterCIDR, nodeIP[1]) } // Return a meta-proxier that dispatch calls between the two // single-stack proxier instances return metaproxier.NewMetaProxier(ipv4Proxier, ipv6Proxier), nil } // CleanupLeftovers removes all hns rules created by the Proxier // It returns true if an error was encountered. Errors are logged. func CleanupLeftovers() (encounteredError bool) { // Delete all Hns Load Balancer Policies deleteAllHnsLoadBalancerPolicy() // TODO // Delete all Hns Remote endpoints return encounteredError } func (svcInfo *serviceInfo) cleanupAllPolicies(endpoints []proxy.Endpoint) { Log(svcInfo, "Service Cleanup", 3) // Skip the svcInfo.policyApplied check to remove all the policies svcInfo.deleteAllHnsLoadBalancerPolicy() // Cleanup Endpoints references for _, ep := range endpoints { epInfo, ok := ep.(*endpointsInfo) if ok { epInfo.Cleanup() } } if svcInfo.remoteEndpoint != nil { svcInfo.remoteEndpoint.Cleanup() } svcInfo.policyApplied = false } func (svcInfo *serviceInfo) deleteAllHnsLoadBalancerPolicy() { // Remove the Hns Policy corresponding to this service hns := svcInfo.hns hns.deleteLoadBalancer(svcInfo.hnsID) svcInfo.hnsID = "" hns.deleteLoadBalancer(svcInfo.nodePorthnsID) svcInfo.nodePorthnsID = "" for _, externalIP := range svcInfo.externalIPs { hns.deleteLoadBalancer(externalIP.hnsID) externalIP.hnsID = "" } for _, lbIngressIP := range svcInfo.loadBalancerIngressIPs { hns.deleteLoadBalancer(lbIngressIP.hnsID) lbIngressIP.hnsID = "" } } func deleteAllHnsLoadBalancerPolicy() { plists, err := hcsshim.HNSListPolicyListRequest() if err != nil { return } for _, plist := range plists { LogJson("policyList", plist, "Remove Policy", 3) _, err = plist.Delete() if err != nil { klog.ErrorS(err, "Failed to delete policy list") } } } func getHnsNetworkInfo(hnsNetworkName string) (*hnsNetworkInfo, error) { hnsnetwork, err := hcsshim.GetHNSNetworkByName(hnsNetworkName) if err != nil { klog.ErrorS(err, "Failed to get HNS Network by name") return nil, err } return &hnsNetworkInfo{ id: hnsnetwork.Id, name: hnsnetwork.Name, networkType: hnsnetwork.Type, }, nil } // Sync is called to synchronize the proxier state to hns as soon as possible. func (proxier *Proxier) Sync() { if proxier.healthzServer != nil { proxier.healthzServer.QueuedUpdate() } metrics.SyncProxyRulesLastQueuedTimestamp.SetToCurrentTime() proxier.syncRunner.Run() } // SyncLoop runs periodic work. This is expected to run as a goroutine or as the main loop of the app. It does not return. func (proxier *Proxier) SyncLoop() { // Update healthz timestamp at beginning in case Sync() never succeeds. if proxier.healthzServer != nil { proxier.healthzServer.Updated() } // synthesize "last change queued" time as the informers are syncing. metrics.SyncProxyRulesLastQueuedTimestamp.SetToCurrentTime() proxier.syncRunner.Loop(wait.NeverStop) } func (proxier *Proxier) setInitialized(value bool) { var initialized int32 if value { initialized = 1 } atomic.StoreInt32(&proxier.initialized, initialized) } func (proxier *Proxier) isInitialized() bool { return atomic.LoadInt32(&proxier.initialized) > 0 } // OnServiceAdd is called whenever creation of new service object // is observed. func (proxier *Proxier) OnServiceAdd(service *v1.Service) { proxier.OnServiceUpdate(nil, service) } // OnServiceUpdate is called whenever modification of an existing // service object is observed. func (proxier *Proxier) OnServiceUpdate(oldService, service *v1.Service) { if proxier.serviceChanges.Update(oldService, service) && proxier.isInitialized() { proxier.Sync() } } // OnServiceDelete is called whenever deletion of an existing service // object is observed. func (proxier *Proxier) OnServiceDelete(service *v1.Service) { proxier.OnServiceUpdate(service, nil) } // OnServiceSynced is called once all the initial event handlers were // called and the state is fully propagated to local cache. func (proxier *Proxier) OnServiceSynced() { proxier.mu.Lock() proxier.servicesSynced = true proxier.setInitialized(proxier.endpointSlicesSynced) proxier.mu.Unlock() // Sync unconditionally - this is called once per lifetime. proxier.syncProxyRules() } func shouldSkipService(svcName types.NamespacedName, service *v1.Service) bool { // if ClusterIP is "None" or empty, skip proxying if !helper.IsServiceIPSet(service) { klog.V(3).InfoS("Skipping service due to clusterIP", "svcName", svcName.String(), "clusterIP", service.Spec.ClusterIP) return true } // Even if ClusterIP is set, ServiceTypeExternalName services don't get proxied if service.Spec.Type == v1.ServiceTypeExternalName { klog.V(3).InfoS("Skipping service due to Type=ExternalName", "svcName", svcName.String()) return true } return false } // The following methods exist to implement the proxier interface, however // winkernel proxier only uses EndpointSlice, so the following are noops. // OnEndpointsAdd is called whenever creation of new endpoints object // is observed. func (proxier *Proxier) OnEndpointsAdd(endpoints *v1.Endpoints) {} // OnEndpointsUpdate is called whenever modification of an existing // endpoints object is observed. func (proxier *Proxier) OnEndpointsUpdate(oldEndpoints, endpoints *v1.Endpoints) {} // OnEndpointsDelete is called whenever deletion of an existing endpoints // object is observed. func (proxier *Proxier) OnEndpointsDelete(endpoints *v1.Endpoints) {} // OnEndpointsSynced is called once all the initial event handlers were // called and the state is fully propagated to local cache. func (proxier *Proxier) OnEndpointsSynced() {} // OnEndpointSliceAdd is called whenever creation of a new endpoint slice object // is observed. func (proxier *Proxier) OnEndpointSliceAdd(endpointSlice *discovery.EndpointSlice) { if proxier.endpointsChanges.EndpointSliceUpdate(endpointSlice, false) && proxier.isInitialized() { proxier.Sync() } } // OnEndpointSliceUpdate is called whenever modification of an existing endpoint // slice object is observed. func (proxier *Proxier) OnEndpointSliceUpdate(_, endpointSlice *discovery.EndpointSlice) { if proxier.endpointsChanges.EndpointSliceUpdate(endpointSlice, false) && proxier.isInitialized() { proxier.Sync() } } // OnEndpointSliceDelete is called whenever deletion of an existing endpoint slice // object is observed. func (proxier *Proxier) OnEndpointSliceDelete(endpointSlice *discovery.EndpointSlice) { if proxier.endpointsChanges.EndpointSliceUpdate(endpointSlice, true) && proxier.isInitialized() { proxier.Sync() } } // OnEndpointSlicesSynced is called once all the initial event handlers were // called and the state is fully propagated to local cache. func (proxier *Proxier) OnEndpointSlicesSynced() { proxier.mu.Lock() proxier.endpointSlicesSynced = true proxier.setInitialized(proxier.servicesSynced) proxier.mu.Unlock() // Sync unconditionally - this is called once per lifetime. proxier.syncProxyRules() } func (proxier *Proxier) cleanupAllPolicies() { for svcName, svc := range proxier.serviceMap { svcInfo, ok := svc.(*serviceInfo) if !ok { klog.ErrorS(nil, "Failed to cast serviceInfo", "svcName", svcName.String()) continue } svcInfo.cleanupAllPolicies(proxier.endpointsMap[svcName]) } } func isNetworkNotFoundError(err error) bool { if err == nil { return false } if _, ok := err.(hcn.NetworkNotFoundError); ok { return true } if _, ok := err.(hcsshim.NetworkNotFoundError); ok { return true } return false } // This is where all of the hns save/restore calls happen. // assumes proxier.mu is held func (proxier *Proxier) syncProxyRules() { proxier.mu.Lock() defer proxier.mu.Unlock() start := time.Now() defer func() { SyncProxyRulesLatency.Observe(metrics.SinceInSeconds(start)) klog.V(4).InfoS("syncProxyRules complete", "elapsed", time.Since(start)) }() // don't sync rules till we've received services and endpoints if !proxier.isInitialized() { klog.V(2).InfoS("Not syncing hns until Services and Endpoints have been received from master") return } hnsNetworkName := proxier.network.name hns := proxier.hns prevNetworkID := proxier.network.id updatedNetwork, err := hns.getNetworkByName(hnsNetworkName) if updatedNetwork == nil || updatedNetwork.id != prevNetworkID || isNetworkNotFoundError(err) { klog.InfoS("The HNS network %s is not present or has changed since the last sync. Please check the CNI deployment", "hnsNetworkName", hnsNetworkName) proxier.cleanupAllPolicies() if updatedNetwork != nil { proxier.network = *updatedNetwork } return } // We assume that if this was called, we really want to sync them, // even if nothing changed in the meantime. In other words, callers are // responsible for detecting no-op changes and not calling this function. serviceUpdateResult := proxier.serviceMap.Update(proxier.serviceChanges) endpointUpdateResult := proxier.endpointsMap.Update(proxier.endpointsChanges) staleServices := serviceUpdateResult.UDPStaleClusterIP // merge stale services gathered from updateEndpointsMap for _, svcPortName := range endpointUpdateResult.StaleServiceNames { if svcInfo, ok := proxier.serviceMap[svcPortName]; ok && svcInfo != nil && svcInfo.Protocol() == v1.ProtocolUDP { klog.V(2).InfoS("Stale udp service", "svcPortName", svcPortName.String(), "clusterIP", svcInfo.ClusterIP().String()) staleServices.Insert(svcInfo.ClusterIP().String()) } } if strings.EqualFold(proxier.network.networkType, NETWORK_TYPE_OVERLAY) { existingSourceVip, err := hns.getEndpointByIpAddress(proxier.sourceVip, hnsNetworkName) if existingSourceVip == nil { _, err = newSourceVIP(hns, hnsNetworkName, proxier.sourceVip, proxier.hostMac, proxier.nodeIP.String()) } if err != nil { klog.ErrorS(err, "Source Vip endpoint creation failed") return } } klog.V(3).InfoS("Syncing Policies") // Program HNS by adding corresponding policies for each service. for svcName, svc := range proxier.serviceMap { svcInfo, ok := svc.(*serviceInfo) if !ok { klog.ErrorS(nil, "Failed to cast serviceInfo", "svcName", svcName.String()) continue } if svcInfo.policyApplied { klog.V(4).InfoS("Policy already applied", "spewConfig", spewSdump(svcInfo)) continue } if strings.EqualFold(proxier.network.networkType, NETWORK_TYPE_OVERLAY) { serviceVipEndpoint, _ := hns.getEndpointByIpAddress(svcInfo.ClusterIP().String(), hnsNetworkName) if serviceVipEndpoint == nil { klog.V(4).InfoS("No existing remote endpoint", "ip", svcInfo.ClusterIP().String()) hnsEndpoint := &endpointsInfo{ ip: svcInfo.ClusterIP().String(), isLocal: false, macAddress: proxier.hostMac, providerAddress: proxier.nodeIP.String(), } newHnsEndpoint, err := hns.createEndpoint(hnsEndpoint, hnsNetworkName) if err != nil { klog.ErrorS(err, "Remote endpoint creation failed for service VIP") continue } newHnsEndpoint.refCount = proxier.endPointsRefCount.getRefCount(newHnsEndpoint.hnsID) *newHnsEndpoint.refCount++ svcInfo.remoteEndpoint = newHnsEndpoint } } var hnsEndpoints []endpointsInfo var hnsLocalEndpoints []endpointsInfo klog.V(4).InfoS("Applying Policy", "serviceInfo", svcName.String()) // Create Remote endpoints for every endpoint, corresponding to the service containsPublicIP := false containsNodeIP := false for _, epInfo := range proxier.endpointsMap[svcName] { ep, ok := epInfo.(*endpointsInfo) if !ok { klog.ErrorS(nil, "Failed to cast endpointsInfo", "svcName", svcName.String()) continue } if !ep.IsReady() { continue } var newHnsEndpoint *endpointsInfo hnsNetworkName := proxier.network.name var err error // targetPort is zero if it is specified as a name in port.TargetPort, so the real port should be got from endpoints. // Note that hcsshim.AddLoadBalancer() doesn't support endpoints with different ports, so only port from first endpoint is used. // TODO(feiskyer): add support of different endpoint ports after hcsshim.AddLoadBalancer() add that. if svcInfo.targetPort == 0 { svcInfo.targetPort = int(ep.port) } if len(ep.hnsID) > 0 { newHnsEndpoint, err = hns.getEndpointByID(ep.hnsID) } if newHnsEndpoint == nil { // First check if an endpoint resource exists for this IP, on the current host // A Local endpoint could exist here already // A remote endpoint was already created and proxy was restarted newHnsEndpoint, err = hns.getEndpointByIpAddress(ep.IP(), hnsNetworkName) } if newHnsEndpoint == nil { if ep.GetIsLocal() { klog.ErrorS(err, "Local endpoint not found: on network", "ip", ep.IP(), "hnsNetworkName", hnsNetworkName) continue } if strings.EqualFold(proxier.network.networkType, NETWORK_TYPE_OVERLAY) { klog.InfoS("Updating network to check for new remote subnet policies", "networkName", proxier.network.name) networkName := proxier.network.name updatedNetwork, err := hns.getNetworkByName(networkName) if err != nil { klog.ErrorS(err, "Unable to find HNS Network specified. Please check network name and CNI deployment", "hnsNetworkName", hnsNetworkName) proxier.cleanupAllPolicies() return } proxier.network = *updatedNetwork providerAddress := proxier.network.findRemoteSubnetProviderAddress(ep.IP()) if len(providerAddress) == 0 { klog.InfoS("Could not find provider address. Assuming it is a public IP", "ip", ep.IP()) providerAddress = proxier.nodeIP.String() } hnsEndpoint := &endpointsInfo{ ip: ep.ip, isLocal: false, macAddress: conjureMac("02-11", net.ParseIP(ep.ip)), providerAddress: providerAddress, } newHnsEndpoint, err = hns.createEndpoint(hnsEndpoint, hnsNetworkName) if err != nil { klog.ErrorS(err, "Remote endpoint creation failed", "spewConfig", spewSdump(hnsEndpoint)) continue } } else { hnsEndpoint := &endpointsInfo{ ip: ep.ip, isLocal: false, macAddress: ep.macAddress, } newHnsEndpoint, err = hns.createEndpoint(hnsEndpoint, hnsNetworkName) if err != nil { klog.ErrorS(err, "Remote endpoint creation failed") continue } } } // For Overlay networks 'SourceVIP' on an Load balancer Policy can either be chosen as // a) Source VIP configured on kube-proxy (or) // b) Node IP of the current node // // For L2Bridge network the Source VIP is always the NodeIP of the current node and the same // would be configured on kube-proxy as SourceVIP // // The logic for choosing the SourceVIP in Overlay networks is based on the backend endpoints: // a) Endpoints are any IP's outside the cluster ==> Choose NodeIP as the SourceVIP // b) Endpoints are IP addresses of a remote node => Choose NodeIP as the SourceVIP // c) Everything else (Local POD's, Remote POD's, Node IP of current node) ==> Choose the configured SourceVIP if strings.EqualFold(proxier.network.networkType, NETWORK_TYPE_OVERLAY) && !ep.GetIsLocal() { providerAddress := proxier.network.findRemoteSubnetProviderAddress(ep.IP()) isNodeIP := (ep.IP() == providerAddress) isPublicIP := (len(providerAddress) == 0) klog.InfoS("Endpoint on overlay network", "ip", ep.IP(), "hnsNetworkName", hnsNetworkName, "isNodeIP", isNodeIP, "isPublicIP", isPublicIP) containsNodeIP = containsNodeIP || isNodeIP containsPublicIP = containsPublicIP || isPublicIP } // Save the hnsId for reference LogJson("endpointInfo", newHnsEndpoint, "Hns Endpoint resource", 1) hnsEndpoints = append(hnsEndpoints, *newHnsEndpoint) if newHnsEndpoint.GetIsLocal() { hnsLocalEndpoints = append(hnsLocalEndpoints, *newHnsEndpoint) } else { // We only share the refCounts for remote endpoints ep.refCount = proxier.endPointsRefCount.getRefCount(newHnsEndpoint.hnsID) *ep.refCount++ } ep.hnsID = newHnsEndpoint.hnsID Log(ep, "Endpoint resource found", 3) } klog.V(3).InfoS("Associated endpoints for service", "spewConfig", spewSdump(hnsEndpoints), "svcName", svcName.String()) if len(svcInfo.hnsID) > 0 { // This should not happen klog.InfoS("Load Balancer already exists -- Debug ", "hnsID", svcInfo.hnsID) } if len(hnsEndpoints) == 0 { klog.ErrorS(nil, "Endpoint information not available for service. Not applying any policy", "svcName", svcName.String()) continue } klog.V(4).Infof("Trying to Apply Policies for service", "spewConfig", spewSdump(svcInfo)) var hnsLoadBalancer *loadBalancerInfo var sourceVip = proxier.sourceVip if containsPublicIP || containsNodeIP { sourceVip = proxier.nodeIP.String() } sessionAffinityClientIP := svcInfo.SessionAffinityType() == v1.ServiceAffinityClientIP if sessionAffinityClientIP && !proxier.supportedFeatures.SessionAffinity { klog.InfoS("Session Affinity is not supported on this version of Windows.") } hnsLoadBalancer, err := hns.getLoadBalancer( hnsEndpoints, loadBalancerFlags{isDSR: proxier.isDSR, isIPv6: proxier.isIPv6Mode, sessionAffinity: sessionAffinityClientIP}, sourceVip, svcInfo.ClusterIP().String(), Enum(svcInfo.Protocol()), uint16(svcInfo.targetPort), uint16(svcInfo.Port()), ) if err != nil { klog.ErrorS(err, "Policy creation failed") continue } svcInfo.hnsID = hnsLoadBalancer.hnsID klog.V(3).InfoS("Hns LoadBalancer resource created for cluster ip resources", "clusterIP", svcInfo.ClusterIP(), "hnsID", hnsLoadBalancer.hnsID) // If nodePort is specified, user should be able to use nodeIP:nodePort to reach the backend endpoints if svcInfo.NodePort() > 0 { // If the preserve-destination service annotation is present, we will disable routing mesh for NodePort. // This means that health services can use Node Port without falsely getting results from a different node. nodePortEndpoints := hnsEndpoints if svcInfo.preserveDIP || svcInfo.localTrafficDSR { nodePortEndpoints = hnsLocalEndpoints } hnsLoadBalancer, err := hns.getLoadBalancer( nodePortEndpoints, loadBalancerFlags{isDSR: svcInfo.localTrafficDSR, localRoutedVIP: true, sessionAffinity: sessionAffinityClientIP, isIPv6: proxier.isIPv6Mode}, sourceVip, "", Enum(svcInfo.Protocol()), uint16(svcInfo.targetPort), uint16(svcInfo.NodePort()), ) if err != nil { klog.ErrorS(err, "Policy creation failed") continue } svcInfo.nodePorthnsID = hnsLoadBalancer.hnsID klog.V(3).InfoS("Hns LoadBalancer resource created for nodePort resources", "clusterIP", svcInfo.ClusterIP(), "hnsID", hnsLoadBalancer.hnsID) } // Create a Load Balancer Policy for each external IP for _, externalIP := range svcInfo.externalIPs { // Disable routing mesh if ExternalTrafficPolicy is set to local externalIPEndpoints := hnsEndpoints if svcInfo.localTrafficDSR { externalIPEndpoints = hnsLocalEndpoints } // Try loading existing policies, if already available hnsLoadBalancer, err = hns.getLoadBalancer( externalIPEndpoints, loadBalancerFlags{isDSR: svcInfo.localTrafficDSR, sessionAffinity: sessionAffinityClientIP, isIPv6: proxier.isIPv6Mode}, sourceVip, externalIP.ip, Enum(svcInfo.Protocol()), uint16(svcInfo.targetPort), uint16(svcInfo.Port()), ) if err != nil { klog.ErrorS(err, "Policy creation failed") continue } externalIP.hnsID = hnsLoadBalancer.hnsID klog.V(3).InfoS("Hns LoadBalancer resource created for externalIP resources", "externalIP", externalIP, "hnsID", hnsLoadBalancer.hnsID) } // Create a Load Balancer Policy for each loadbalancer ingress for _, lbIngressIP := range svcInfo.loadBalancerIngressIPs { // Try loading existing policies, if already available lbIngressEndpoints := hnsEndpoints if svcInfo.preserveDIP || svcInfo.localTrafficDSR { lbIngressEndpoints = hnsLocalEndpoints } hnsLoadBalancer, err := hns.getLoadBalancer( lbIngressEndpoints, loadBalancerFlags{isDSR: svcInfo.preserveDIP || svcInfo.localTrafficDSR, useMUX: svcInfo.preserveDIP, preserveDIP: svcInfo.preserveDIP, sessionAffinity: sessionAffinityClientIP, isIPv6: proxier.isIPv6Mode}, sourceVip, lbIngressIP.ip, Enum(svcInfo.Protocol()), uint16(svcInfo.targetPort), uint16(svcInfo.Port()), ) if err != nil { klog.ErrorS(err, "Policy creation failed") continue } lbIngressIP.hnsID = hnsLoadBalancer.hnsID klog.V(3).InfoS("Hns LoadBalancer resource created for loadBalancer Ingress resources", "lbIngressIP", lbIngressIP) } svcInfo.policyApplied = true Log(svcInfo, "+++Policy Successfully applied for service +++", 2) } if proxier.healthzServer != nil { proxier.healthzServer.Updated() } SyncProxyRulesLastTimestamp.SetToCurrentTime() // Update service healthchecks. The endpoints list might include services that are // not "OnlyLocal", but the services list will not, and the serviceHealthServer // will just drop those endpoints. if err := proxier.serviceHealthServer.SyncServices(serviceUpdateResult.HCServiceNodePorts); err != nil { klog.ErrorS(err, "Error syncing healthcheck services") } if err := proxier.serviceHealthServer.SyncEndpoints(endpointUpdateResult.HCEndpointsLocalIPSize); err != nil { klog.ErrorS(err, "Error syncing healthcheck endpoints") } // Finish housekeeping. // TODO: these could be made more consistent. for _, svcIP := range staleServices.UnsortedList() { // TODO : Check if this is required to cleanup stale services here klog.V(5).InfoS("Pending delete stale service IP connections", "ip", svcIP) } // remove stale endpoint refcount entries for hnsID, referenceCount := range proxier.endPointsRefCount { if *referenceCount <= 0 { delete(proxier.endPointsRefCount, hnsID) } } }