diff --git a/pkg/agent/netpol/namespace.go b/pkg/agent/netpol/namespace.go new file mode 100644 index 0000000000..4c342092f2 --- /dev/null +++ b/pkg/agent/netpol/namespace.go @@ -0,0 +1,61 @@ +package netpol + +import ( + "reflect" + + "github.com/golang/glog" + api "k8s.io/api/core/v1" + "k8s.io/client-go/tools/cache" +) + +func (npc *NetworkPolicyController) newNamespaceEventHandler() cache.ResourceEventHandler { + return cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { + npc.handleNamespaceAdd(obj.(*api.Namespace)) + }, + UpdateFunc: func(oldObj, newObj interface{}) { + npc.handleNamespaceUpdate(oldObj.(*api.Namespace), newObj.(*api.Namespace)) + }, + DeleteFunc: func(obj interface{}) { + switch obj := obj.(type) { + case *api.Namespace: + npc.handleNamespaceDelete(obj) + return + case cache.DeletedFinalStateUnknown: + if namespace, ok := obj.Obj.(*api.Namespace); ok { + npc.handleNamespaceDelete(namespace) + return + } + default: + glog.Errorf("unexpected object type: %v", obj) + } + }, + } +} + +func (npc *NetworkPolicyController) handleNamespaceAdd(obj *api.Namespace) { + if obj.Labels == nil { + return + } + glog.V(2).Infof("Received update for namespace: %s", obj.Name) + + npc.RequestFullSync() +} + +func (npc *NetworkPolicyController) handleNamespaceUpdate(oldObj, newObj *api.Namespace) { + if reflect.DeepEqual(oldObj.Labels, newObj.Labels) { + return + } + glog.V(2).Infof("Received update for namespace: %s", newObj.Name) + + npc.RequestFullSync() +} + +func (npc *NetworkPolicyController) handleNamespaceDelete(obj *api.Namespace) { + if obj.Labels == nil { + return + } + glog.V(2).Infof("Received namespace: %s delete event", obj.Name) + + npc.RequestFullSync() +} diff --git a/pkg/agent/netpol/network_policy_controller.go b/pkg/agent/netpol/network_policy_controller.go index 2aefd33667..d7a21908c6 100644 --- a/pkg/agent/netpol/network_policy_controller.go +++ b/pkg/agent/netpol/network_policy_controller.go @@ -1,15 +1,8 @@ -// Apache License v2.0 (copyright Cloud Native Labs & Rancher Labs) -// - modified from https://github.com/cloudnativelabs/kube-router/blob/d6f9f31a7b/pkg/controllers/netpol/network_policy_controller.go - -// +build !windows - package netpol import ( - "context" "crypto/sha256" "encoding/base32" - "errors" "fmt" "net" "regexp" @@ -18,31 +11,27 @@ import ( "sync" "time" - // "github.com/cloudnativelabs/kube-router/pkg/healthcheck" - // "github.com/cloudnativelabs/kube-router/pkg/metrics" - // "github.com/cloudnativelabs/kube-router/pkg/options" - // "github.com/cloudnativelabs/kube-router/pkg/utils" - + "github.com/cloudnativelabs/kube-router/pkg/healthcheck" + "github.com/cloudnativelabs/kube-router/pkg/metrics" + "github.com/cloudnativelabs/kube-router/pkg/options" + "github.com/cloudnativelabs/kube-router/pkg/utils" "github.com/coreos/go-iptables/iptables" - api "k8s.io/api/core/v1" - apiextensions "k8s.io/api/extensions/v1beta1" - networking "k8s.io/api/networking/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "github.com/golang/glog" + "github.com/prometheus/client_golang/prometheus" + "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/util/intstr" - "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes" - listers "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/tools/cache" - log "k8s.io/klog" ) const ( - networkPolicyAnnotation = "net.beta.kubernetes.io/network-policy" kubePodFirewallChainPrefix = "KUBE-POD-FW-" kubeNetworkPolicyChainPrefix = "KUBE-NWPLCY-" kubeSourceIPSetPrefix = "KUBE-SRC-" kubeDestinationIPSetPrefix = "KUBE-DST-" + kubeInputChainName = "KUBE-ROUTER-INPUT" + kubeForwardChainName = "KUBE-ROUTER-FORWARD" + kubeOutputChainName = "KUBE-ROUTER-OUTPUT" ) // Network policy controller provides both ingress and egress filtering for the pods as per the defined network @@ -52,24 +41,24 @@ const ( // filter table a rule is added to jump the traffic originating (in case of egress network policy) from the pod // or destined (in case of ingress network policy) to the pod specific iptables chain. Each // pod specific iptables chain has rules to jump to the network polices chains, that pod matches. So packet -// originating/destined from/to pod goes through filter table's, FORWARD chain, followed by pod specific chain, +// originating/destined from/to pod goes through fitler table's, FORWARD chain, followed by pod specific chain, // followed by one or more network policy chains, till there is a match which will accept the packet, or gets // dropped by the rule in the pod chain, if there is no match. -// NetworkPolicyController struct to hold information required by NetworkPolicyController +// NetworkPolicyController strcut to hold information required by NetworkPolicyController type NetworkPolicyController struct { - nodeIP net.IP - nodeHostName string - mu sync.Mutex - syncPeriod time.Duration - MetricsEnabled bool - v1NetworkPolicy bool - readyForUpdates bool - // healthChan chan<- *healthcheck.ControllerHeartbeat + nodeIP net.IP + nodeHostName string + serviceClusterIPRange net.IPNet + serviceExternalIPRanges []net.IPNet + serviceNodePortRange string + mu sync.Mutex + syncPeriod time.Duration + MetricsEnabled bool + healthChan chan<- *healthcheck.ControllerHeartbeat + fullSyncRequestChan chan struct{} - // list of all active network policies expressed as networkPolicyInfo - networkPoliciesInfo *[]networkPolicyInfo - ipSetHandler *IPSet + ipSetHandler *utils.IPSet podLister cache.Indexer npLister cache.Indexer @@ -82,9 +71,9 @@ type NetworkPolicyController struct { // internal structure to represent a network policy type networkPolicyInfo struct { - name string - namespace string - labels map[string]string + name string + namespace string + podSelector labels.Selector // set of pods matching network policy spec podselector label selector targetPods map[string]podInfo @@ -141,809 +130,257 @@ type numericPort2eps map[string]*endPoints type protocol2eps map[string]numericPort2eps type namedPort2eps map[string]protocol2eps -// Run runs forever till we receive notification on stopCh -func (npc *NetworkPolicyController) Run(stopCh <-chan struct{}) { +// Run runs forver till we receive notification on stopCh +func (npc *NetworkPolicyController) Run(healthChan chan<- *healthcheck.ControllerHeartbeat, stopCh <-chan struct{}, wg *sync.WaitGroup) { t := time.NewTicker(npc.syncPeriod) defer t.Stop() + defer wg.Done() - log.Info("Starting network policy controller") - // npc.healthChan = healthChan + glog.Info("Starting network policy controller") + npc.healthChan = healthChan + + // setup kube-router specific top level cutoms chains + npc.ensureTopLevelChains() + + // Full syncs of the network policy controller take a lot of time and can only be processed one at a time, + // therefore, we start it in it's own goroutine and request a sync through a single item channel + glog.Info("Starting network policy controller full sync goroutine") + wg.Add(1) + go func(fullSyncRequest <-chan struct{}, stopCh <-chan struct{}, wg *sync.WaitGroup) { + defer wg.Done() + for { + // Add an additional non-blocking select to ensure that if the stopCh channel is closed it is handled first + select { + case <-stopCh: + glog.Info("Shutting down network policies full sync goroutine") + return + default: + } + select { + case <-stopCh: + glog.Info("Shutting down network policies full sync goroutine") + return + case <-fullSyncRequest: + glog.V(3).Info("Received request for a full sync, processing") + npc.fullPolicySync() // fullPolicySync() is a blocking request here + } + } + }(npc.fullSyncRequestChan, stopCh, wg) // loop forever till notified to stop on stopCh for { + glog.V(1).Info("Requesting periodic sync of iptables to reflect network policies") + npc.RequestFullSync() select { case <-stopCh: - log.Info("Shutting down network policies controller") - return - default: - } - - log.V(1).Info("Performing periodic sync of iptables to reflect network policies") - err := npc.Sync() - if err != nil { - log.Errorf("Error during periodic sync of network policies in network policy controller. Error: " + err.Error()) - log.Errorf("Skipping sending heartbeat from network policy controller as periodic sync failed.") - } - // else { - // healthcheck.SendHeartBeat(healthChan, "NPC") - // } - npc.readyForUpdates = true - select { - case <-stopCh: - log.Infof("Shutting down network policies controller") + glog.Infof("Shutting down network policies controller") return case <-t.C: } } } -// OnPodUpdate handles updates to pods from the Kubernetes api server -func (npc *NetworkPolicyController) OnPodUpdate(obj interface{}) { - pod := obj.(*api.Pod) - log.V(2).Infof("Received update to pod: %s/%s", pod.Namespace, pod.Name) - - if !npc.readyForUpdates { - log.V(3).Infof("Skipping update to pod: %s/%s, controller still performing bootup full-sync", pod.Namespace, pod.Name) - return - } - - err := npc.Sync() - if err != nil { - log.Errorf("Error syncing network policy for the update to pod: %s/%s Error: %s", pod.Namespace, pod.Name, err) - } -} - -// OnNetworkPolicyUpdate handles updates to network policy from the kubernetes api server -func (npc *NetworkPolicyController) OnNetworkPolicyUpdate(obj interface{}) { - netpol := obj.(*networking.NetworkPolicy) - log.V(2).Infof("Received update for network policy: %s/%s", netpol.Namespace, netpol.Name) - - if !npc.readyForUpdates { - log.V(3).Infof("Skipping update to network policy: %s/%s, controller still performing bootup full-sync", netpol.Namespace, netpol.Name) - return - } - - err := npc.Sync() - if err != nil { - log.Errorf("Error syncing network policy for the update to network policy: %s/%s Error: %s", netpol.Namespace, netpol.Name, err) - } -} - -// OnNamespaceUpdate handles updates to namespace from kubernetes api server -func (npc *NetworkPolicyController) OnNamespaceUpdate(obj interface{}) { - namespace := obj.(*api.Namespace) - // namespace (and annotations on it) has no significance in GA ver of network policy - if npc.v1NetworkPolicy { - return - } - log.V(2).Infof("Received update for namespace: %s", namespace.Name) - - err := npc.Sync() - if err != nil { - log.Errorf("Error syncing on namespace update: %s", err) +// RequestFullSync allows the request of a full network policy sync without blocking the callee +func (npc *NetworkPolicyController) RequestFullSync() { + select { + case npc.fullSyncRequestChan <- struct{}{}: + glog.V(3).Info("Full sync request queue was empty so a full sync request was successfully sent") + default: // Don't block if the buffered channel is full, return quickly so that we don't block callee execution + glog.V(1).Info("Full sync request queue was full, skipping...") } } // Sync synchronizes iptables to desired state of network policies -func (npc *NetworkPolicyController) Sync() error { +func (npc *NetworkPolicyController) fullPolicySync() { var err error + var networkPoliciesInfo []networkPolicyInfo npc.mu.Lock() defer npc.mu.Unlock() - // healthcheck.SendHeartBeat(npc.healthChan, "NPC") + healthcheck.SendHeartBeat(npc.healthChan, "NPC") start := time.Now() syncVersion := strconv.FormatInt(start.UnixNano(), 10) defer func() { endTime := time.Since(start) - // if npc.MetricsEnabled { - // metrics.ControllerIPtablesSyncTime.Observe(endTime.Seconds()) - // } - log.V(1).Infof("sync iptables took %v", endTime) + if npc.MetricsEnabled { + metrics.ControllerIptablesSyncTime.Observe(endTime.Seconds()) + } + glog.V(1).Infof("sync iptables took %v", endTime) }() - log.V(1).Infof("Starting sync of iptables with version: %s", syncVersion) - if npc.v1NetworkPolicy { - npc.networkPoliciesInfo, err = npc.buildNetworkPoliciesInfo() - if err != nil { - return errors.New("Aborting sync. Failed to build network policies: " + err.Error()) - } - } else { - // TODO remove the Beta support - npc.networkPoliciesInfo, err = npc.buildBetaNetworkPoliciesInfo() - if err != nil { - return errors.New("Aborting sync. Failed to build network policies: " + err.Error()) - } + glog.V(1).Infof("Starting sync of iptables with version: %s", syncVersion) + + // ensure kube-router specific top level chains and corresponding rules exist + npc.ensureTopLevelChains() + + networkPoliciesInfo, err = npc.buildNetworkPoliciesInfo() + if err != nil { + glog.Errorf("Aborting sync. Failed to build network policies: %v", err.Error()) + return } - activePolicyChains, activePolicyIPSets, err := npc.syncNetworkPolicyChains(syncVersion) + activePolicyChains, activePolicyIPSets, err := npc.syncNetworkPolicyChains(networkPoliciesInfo, syncVersion) if err != nil { - return errors.New("Aborting sync. Failed to sync network policy chains: " + err.Error()) + glog.Errorf("Aborting sync. Failed to sync network policy chains: %v" + err.Error()) + return } - activePodFwChains, err := npc.syncPodFirewallChains(syncVersion) + activePodFwChains, err := npc.syncPodFirewallChains(networkPoliciesInfo, syncVersion) if err != nil { - return errors.New("Aborting sync. Failed to sync pod firewalls: " + err.Error()) + glog.Errorf("Aborting sync. Failed to sync pod firewalls: %v", err.Error()) + return } err = cleanupStaleRules(activePolicyChains, activePodFwChains, activePolicyIPSets) if err != nil { - return errors.New("Aborting sync. Failed to cleanup stale iptables rules: " + err.Error()) + glog.Errorf("Aborting sync. Failed to cleanup stale iptables rules: %v", err.Error()) + return } - - return nil } -// Configure iptables rules representing each network policy. All pod's matched by -// network policy spec podselector labels are grouped together in one ipset which -// is used for matching destination ip address. Each ingress rule in the network -// policyspec is evaluated to set of matching pods, which are grouped in to a -// ipset used for source ip addr matching. -func (npc *NetworkPolicyController) syncNetworkPolicyChains(version string) (map[string]bool, map[string]bool, error) { - start := time.Now() - defer func() { - endTime := time.Since(start) - // metrics.ControllerPolicyChainsSyncTime.Observe(endTime.Seconds()) - log.V(2).Infof("Syncing network policy chains took %v", endTime) - }() - activePolicyChains := make(map[string]bool) - activePolicyIPSets := make(map[string]bool) +// Creates custom chains KUBE-ROUTER-INPUT, KUBE-ROUTER-FORWARD, KUBE-ROUTER-OUTPUT +// and following rules in the filter table to jump from builtin chain to custom chain +// -A INPUT -m comment --comment "kube-router netpol" -j KUBE-ROUTER-INPUT +// -A FORWARD -m comment --comment "kube-router netpol" -j KUBE-ROUTER-FORWARD +// -A OUTPUT -m comment --comment "kube-router netpol" -j KUBE-ROUTER-OUTPUT +func (npc *NetworkPolicyController) ensureTopLevelChains() { iptablesCmdHandler, err := iptables.New() if err != nil { - log.Fatalf("Failed to initialize iptables executor due to: %s", err.Error()) + glog.Fatalf("Failed to initialize iptables executor due to %s", err.Error()) } - // run through all network policies - for _, policy := range *npc.networkPoliciesInfo { + addUUIDForRuleSpec := func(chain string, ruleSpec *[]string) (string, error) { + hash := sha256.Sum256([]byte(chain + strings.Join(*ruleSpec, ""))) + encoded := base32.StdEncoding.EncodeToString(hash[:])[:16] + for idx, part := range *ruleSpec { + if "--comment" == part { + (*ruleSpec)[idx+1] = (*ruleSpec)[idx+1] + " - " + encoded + return encoded, nil + } + } + return "", fmt.Errorf("could not find a comment in the ruleSpec string given: %s", strings.Join(*ruleSpec, " ")) + } - // ensure there is a unique chain per network policy in filter table - policyChainName := networkPolicyChainName(policy.namespace, policy.name, version) - err := iptablesCmdHandler.NewChain("filter", policyChainName) + ensureRuleAtPosition := func(chain string, ruleSpec []string, uuid string, position int) { + exists, err := iptablesCmdHandler.Exists("filter", chain, ruleSpec...) + if err != nil { + glog.Fatalf("Failed to verify rule exists in %s chain due to %s", chain, err.Error()) + } + if !exists { + err := iptablesCmdHandler.Insert("filter", chain, position, ruleSpec...) + if err != nil { + glog.Fatalf("Failed to run iptables command to insert in %s chain %s", chain, err.Error()) + } + return + } + rules, err := iptablesCmdHandler.List("filter", chain) + if err != nil { + glog.Fatalf("failed to list rules in filter table %s chain due to %s", chain, err.Error()) + } + + var ruleNo, ruleIndexOffset int + for i, rule := range rules { + rule = strings.Replace(rule, "\"", "", 2) //removes quote from comment string + if strings.HasPrefix(rule, "-P") || strings.HasPrefix(rule, "-N") { + // if this chain has a default policy, then it will show as rule #1 from iptablesCmdHandler.List so we + // need to account for this offset + ruleIndexOffset++ + continue + } + if strings.Contains(rule, uuid) { + // range uses a 0 index, but iptables uses a 1 index so we need to increase ruleNo by 1 + ruleNo = i + 1 - ruleIndexOffset + break + } + } + if ruleNo != position { + err = iptablesCmdHandler.Insert("filter", chain, position, ruleSpec...) + if err != nil { + glog.Fatalf("Failed to run iptables command to insert in %s chain %s", chain, err.Error()) + } + err = iptablesCmdHandler.Delete("filter", chain, strconv.Itoa(ruleNo+1)) + if err != nil { + glog.Fatalf("Failed to delete incorrect rule in %s chain due to %s", chain, err.Error()) + } + } + } + + chains := map[string]string{"INPUT": kubeInputChainName, "FORWARD": kubeForwardChainName, "OUTPUT": kubeOutputChainName} + + for builtinChain, customChain := range chains { + err = iptablesCmdHandler.NewChain("filter", customChain) if err != nil && err.(*iptables.Error).ExitStatus() != 1 { - return nil, nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) + glog.Fatalf("Failed to run iptables command to create %s chain due to %s", customChain, err.Error()) } - - activePolicyChains[policyChainName] = true - - // create a ipset for all destination pod ip's matched by the policy spec PodSelector - targetDestPodIPSetName := policyDestinationPodIPSetName(policy.namespace, policy.name) - targetDestPodIPSet, err := npc.ipSetHandler.Create(targetDestPodIPSetName, TypeHashIP, OptionTimeout, "0") + args := []string{"-m", "comment", "--comment", "kube-router netpol", "-j", customChain} + uuid, err := addUUIDForRuleSpec(builtinChain, &args) if err != nil { - return nil, nil, fmt.Errorf("failed to create ipset: %s", err.Error()) - } - - // create a ipset for all source pod ip's matched by the policy spec PodSelector - targetSourcePodIPSetName := policySourcePodIPSetName(policy.namespace, policy.name) - targetSourcePodIPSet, err := npc.ipSetHandler.Create(targetSourcePodIPSetName, TypeHashIP, OptionTimeout, "0") - if err != nil { - return nil, nil, fmt.Errorf("failed to create ipset: %s", err.Error()) - } - - activePolicyIPSets[targetDestPodIPSet.Name] = true - activePolicyIPSets[targetSourcePodIPSet.Name] = true - - currentPodIPs := make([]string, 0, len(policy.targetPods)) - for ip := range policy.targetPods { - currentPodIPs = append(currentPodIPs, ip) - } - - err = targetSourcePodIPSet.Refresh(currentPodIPs, OptionTimeout, "0") - if err != nil { - log.Errorf("failed to refresh targetSourcePodIPSet: " + err.Error()) - } - err = targetDestPodIPSet.Refresh(currentPodIPs, OptionTimeout, "0") - if err != nil { - log.Errorf("failed to refresh targetDestPodIPSet: " + err.Error()) - } - - err = npc.processIngressRules(policy, targetDestPodIPSetName, activePolicyIPSets, version) - if err != nil { - return nil, nil, err - } - - err = npc.processEgressRules(policy, targetSourcePodIPSetName, activePolicyIPSets, version) - if err != nil { - return nil, nil, err + glog.Fatalf("Failed to get uuid for rule: %s", err.Error()) } + ensureRuleAtPosition(builtinChain, args, uuid, 1) } - log.V(2).Infof("IPtables chains in the filter table are synchronized with the network policies.") - - return activePolicyChains, activePolicyIPSets, nil -} - -func (npc *NetworkPolicyController) processIngressRules(policy networkPolicyInfo, - targetDestPodIPSetName string, activePolicyIPSets map[string]bool, version string) error { - - // From network policy spec: "If field 'Ingress' is empty then this NetworkPolicy does not allow any traffic " - // so no whitelist rules to be added to the network policy - if policy.ingressRules == nil { - return nil - } - - iptablesCmdHandler, err := iptables.New() + whitelistServiceVips := []string{"-m", "comment", "--comment", "allow traffic to cluster IP", "-d", npc.serviceClusterIPRange.String(), "-j", "RETURN"} + uuid, err := addUUIDForRuleSpec(kubeInputChainName, &whitelistServiceVips) if err != nil { - return fmt.Errorf("Failed to initialize iptables executor due to: %s", err.Error()) + glog.Fatalf("Failed to get uuid for rule: %s", err.Error()) } + ensureRuleAtPosition(kubeInputChainName, whitelistServiceVips, uuid, 1) - policyChainName := networkPolicyChainName(policy.namespace, policy.name, version) - - // run through all the ingress rules in the spec and create iptables rules - // in the chain for the network policy - for i, ingressRule := range policy.ingressRules { - - if len(ingressRule.srcPods) != 0 { - srcPodIPSetName := policyIndexedSourcePodIPSetName(policy.namespace, policy.name, i) - srcPodIPSet, err := npc.ipSetHandler.Create(srcPodIPSetName, TypeHashIP, OptionTimeout, "0") - if err != nil { - return fmt.Errorf("failed to create ipset: %s", err.Error()) - } - - activePolicyIPSets[srcPodIPSet.Name] = true - - ingressRuleSrcPodIPs := make([]string, 0, len(ingressRule.srcPods)) - for _, pod := range ingressRule.srcPods { - ingressRuleSrcPodIPs = append(ingressRuleSrcPodIPs, pod.ip) - } - err = srcPodIPSet.Refresh(ingressRuleSrcPodIPs, OptionTimeout, "0") - if err != nil { - log.Errorf("failed to refresh srcPodIPSet: " + err.Error()) - } - - if len(ingressRule.ports) != 0 { - // case where 'ports' details and 'from' details specified in the ingress rule - // so match on specified source and destination ip's and specified port (if any) and protocol - for _, portProtocol := range ingressRule.ports { - comment := "rule to ACCEPT traffic from source pods to dest pods selected by policy name " + - policy.name + " namespace " + policy.namespace - if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, srcPodIPSetName, targetDestPodIPSetName, portProtocol.protocol, portProtocol.port); err != nil { - return err - } - } - } - - if len(ingressRule.namedPorts) != 0 { - for j, endPoints := range ingressRule.namedPorts { - namedPortIPSetName := policyIndexedIngressNamedPortIPSetName(policy.namespace, policy.name, i, j) - namedPortIPSet, err := npc.ipSetHandler.Create(namedPortIPSetName, TypeHashIP, OptionTimeout, "0") - if err != nil { - return fmt.Errorf("failed to create ipset: %s", err.Error()) - } - activePolicyIPSets[namedPortIPSet.Name] = true - err = namedPortIPSet.Refresh(endPoints.ips, OptionTimeout, "0") - if err != nil { - log.Errorf("failed to refresh namedPortIPSet: " + err.Error()) - } - comment := "rule to ACCEPT traffic from source pods to dest pods selected by policy name " + - policy.name + " namespace " + policy.namespace - if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, srcPodIPSetName, namedPortIPSetName, endPoints.protocol, endPoints.port); err != nil { - return err - } - } - } - - if len(ingressRule.ports) == 0 && len(ingressRule.namedPorts) == 0 { - // case where no 'ports' details specified in the ingress rule but 'from' details specified - // so match on specified source and destination ip with all port and protocol - comment := "rule to ACCEPT traffic from source pods to dest pods selected by policy name " + - policy.name + " namespace " + policy.namespace - if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, srcPodIPSetName, targetDestPodIPSetName, "", ""); err != nil { - return err - } - } - } - - // case where only 'ports' details specified but no 'from' details in the ingress rule - // so match on all sources, with specified port (if any) and protocol - if ingressRule.matchAllSource && !ingressRule.matchAllPorts { - for _, portProtocol := range ingressRule.ports { - comment := "rule to ACCEPT traffic from all sources to dest pods selected by policy name: " + - policy.name + " namespace " + policy.namespace - if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, "", targetDestPodIPSetName, portProtocol.protocol, portProtocol.port); err != nil { - return err - } - } - - for j, endPoints := range ingressRule.namedPorts { - namedPortIPSetName := policyIndexedIngressNamedPortIPSetName(policy.namespace, policy.name, i, j) - namedPortIPSet, err := npc.ipSetHandler.Create(namedPortIPSetName, TypeHashIP, OptionTimeout, "0") - if err != nil { - return fmt.Errorf("failed to create ipset: %s", err.Error()) - } - - activePolicyIPSets[namedPortIPSet.Name] = true - - err = namedPortIPSet.Refresh(endPoints.ips, OptionTimeout, "0") - if err != nil { - log.Errorf("failed to refresh namedPortIPSet: " + err.Error()) - } - comment := "rule to ACCEPT traffic from all sources to dest pods selected by policy name: " + - policy.name + " namespace " + policy.namespace - if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, "", namedPortIPSetName, endPoints.protocol, endPoints.port); err != nil { - return err - } - } - } - - // case where nether ports nor from details are specified in the ingress rule - // so match on all ports, protocol, source IP's - if ingressRule.matchAllSource && ingressRule.matchAllPorts { - comment := "rule to ACCEPT traffic from all sources to dest pods selected by policy name: " + - policy.name + " namespace " + policy.namespace - if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, "", targetDestPodIPSetName, "", ""); err != nil { - return err - } - } - - if len(ingressRule.srcIPBlocks) != 0 { - srcIPBlockIPSetName := policyIndexedSourceIPBlockIPSetName(policy.namespace, policy.name, i) - srcIPBlockIPSet, err := npc.ipSetHandler.Create(srcIPBlockIPSetName, TypeHashNet, OptionTimeout, "0") - if err != nil { - return fmt.Errorf("failed to create ipset: %s", err.Error()) - } - activePolicyIPSets[srcIPBlockIPSet.Name] = true - err = srcIPBlockIPSet.RefreshWithBuiltinOptions(ingressRule.srcIPBlocks) - if err != nil { - log.Errorf("failed to refresh srcIPBlockIPSet: " + err.Error()) - } - if !ingressRule.matchAllPorts { - for _, portProtocol := range ingressRule.ports { - comment := "rule to ACCEPT traffic from specified ipBlocks to dest pods selected by policy name: " + - policy.name + " namespace " + policy.namespace - if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, srcIPBlockIPSetName, targetDestPodIPSetName, portProtocol.protocol, portProtocol.port); err != nil { - return err - } - } - - for j, endPoints := range ingressRule.namedPorts { - namedPortIPSetName := policyIndexedIngressNamedPortIPSetName(policy.namespace, policy.name, i, j) - namedPortIPSet, err := npc.ipSetHandler.Create(namedPortIPSetName, TypeHashIP, OptionTimeout, "0") - if err != nil { - return fmt.Errorf("failed to create ipset: %s", err.Error()) - } - - activePolicyIPSets[namedPortIPSet.Name] = true - - err = namedPortIPSet.Refresh(endPoints.ips, OptionTimeout, "0") - if err != nil { - log.Errorf("failed to refresh namedPortIPSet: " + err.Error()) - } - comment := "rule to ACCEPT traffic from specified ipBlocks to dest pods selected by policy name: " + - policy.name + " namespace " + policy.namespace - if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, srcIPBlockIPSetName, namedPortIPSetName, endPoints.protocol, endPoints.port); err != nil { - return err - } - } - } - if ingressRule.matchAllPorts { - comment := "rule to ACCEPT traffic from specified ipBlocks to dest pods selected by policy name: " + - policy.name + " namespace " + policy.namespace - if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, srcIPBlockIPSetName, targetDestPodIPSetName, "", ""); err != nil { - return err - } - } - } - } - - return nil -} - -func (npc *NetworkPolicyController) processEgressRules(policy networkPolicyInfo, - targetSourcePodIPSetName string, activePolicyIPSets map[string]bool, version string) error { - - // From network policy spec: "If field 'Ingress' is empty then this NetworkPolicy does not allow any traffic " - // so no whitelist rules to be added to the network policy - if policy.egressRules == nil { - return nil - } - - iptablesCmdHandler, err := iptables.New() + whitelistTCPNodeports := []string{"-p", "tcp", "-m", "comment", "--comment", "allow LOCAL TCP traffic to node ports", "-m", "addrtype", "--dst-type", "LOCAL", + "-m", "multiport", "--dports", npc.serviceNodePortRange, "-j", "RETURN"} + uuid, err = addUUIDForRuleSpec(kubeInputChainName, &whitelistTCPNodeports) if err != nil { - return fmt.Errorf("Failed to initialize iptables executor due to: %s", err.Error()) + glog.Fatalf("Failed to get uuid for rule: %s", err.Error()) } + ensureRuleAtPosition(kubeInputChainName, whitelistTCPNodeports, uuid, 2) - policyChainName := networkPolicyChainName(policy.namespace, policy.name, version) - - // run through all the egress rules in the spec and create iptables rules - // in the chain for the network policy - for i, egressRule := range policy.egressRules { - - if len(egressRule.dstPods) != 0 { - dstPodIPSetName := policyIndexedDestinationPodIPSetName(policy.namespace, policy.name, i) - dstPodIPSet, err := npc.ipSetHandler.Create(dstPodIPSetName, TypeHashIP, OptionTimeout, "0") - if err != nil { - return fmt.Errorf("failed to create ipset: %s", err.Error()) - } - - activePolicyIPSets[dstPodIPSet.Name] = true - - egressRuleDstPodIPs := make([]string, 0, len(egressRule.dstPods)) - for _, pod := range egressRule.dstPods { - egressRuleDstPodIPs = append(egressRuleDstPodIPs, pod.ip) - } - err = dstPodIPSet.Refresh(egressRuleDstPodIPs, OptionTimeout, "0") - if err != nil { - log.Errorf("failed to refresh dstPodIPSet: " + err.Error()) - } - if len(egressRule.ports) != 0 { - // case where 'ports' details and 'from' details specified in the egress rule - // so match on specified source and destination ip's and specified port (if any) and protocol - for _, portProtocol := range egressRule.ports { - comment := "rule to ACCEPT traffic from source pods to dest pods selected by policy name " + - policy.name + " namespace " + policy.namespace - if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, targetSourcePodIPSetName, dstPodIPSetName, portProtocol.protocol, portProtocol.port); err != nil { - return err - } - } - } - - if len(egressRule.namedPorts) != 0 { - for j, endPoints := range egressRule.namedPorts { - namedPortIPSetName := policyIndexedEgressNamedPortIPSetName(policy.namespace, policy.name, i, j) - namedPortIPSet, err := npc.ipSetHandler.Create(namedPortIPSetName, TypeHashIP, OptionTimeout, "0") - if err != nil { - return fmt.Errorf("failed to create ipset: %s", err.Error()) - } - - activePolicyIPSets[namedPortIPSet.Name] = true - - err = namedPortIPSet.Refresh(endPoints.ips, OptionTimeout, "0") - if err != nil { - log.Errorf("failed to refresh namedPortIPSet: " + err.Error()) - } - comment := "rule to ACCEPT traffic from source pods to dest pods selected by policy name " + - policy.name + " namespace " + policy.namespace - if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, targetSourcePodIPSetName, namedPortIPSetName, endPoints.protocol, endPoints.port); err != nil { - return err - } - } - - } - - if len(egressRule.ports) == 0 && len(egressRule.namedPorts) == 0 { - // case where no 'ports' details specified in the ingress rule but 'from' details specified - // so match on specified source and destination ip with all port and protocol - comment := "rule to ACCEPT traffic from source pods to dest pods selected by policy name " + - policy.name + " namespace " + policy.namespace - if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, targetSourcePodIPSetName, dstPodIPSetName, "", ""); err != nil { - return err - } - } - } - - // case where only 'ports' details specified but no 'to' details in the egress rule - // so match on all sources, with specified port (if any) and protocol - if egressRule.matchAllDestinations && !egressRule.matchAllPorts { - for _, portProtocol := range egressRule.ports { - comment := "rule to ACCEPT traffic from source pods to all destinations selected by policy name: " + - policy.name + " namespace " + policy.namespace - if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, targetSourcePodIPSetName, "", portProtocol.protocol, portProtocol.port); err != nil { - return err - } - } - } - - // case where nether ports nor from details are specified in the egress rule - // so match on all ports, protocol, source IP's - if egressRule.matchAllDestinations && egressRule.matchAllPorts { - comment := "rule to ACCEPT traffic from source pods to all destinations selected by policy name: " + - policy.name + " namespace " + policy.namespace - if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, targetSourcePodIPSetName, "", "", ""); err != nil { - return err - } - } - if len(egressRule.dstIPBlocks) != 0 { - dstIPBlockIPSetName := policyIndexedDestinationIPBlockIPSetName(policy.namespace, policy.name, i) - dstIPBlockIPSet, err := npc.ipSetHandler.Create(dstIPBlockIPSetName, TypeHashNet, OptionTimeout, "0") - if err != nil { - return fmt.Errorf("failed to create ipset: %s", err.Error()) - } - activePolicyIPSets[dstIPBlockIPSet.Name] = true - err = dstIPBlockIPSet.RefreshWithBuiltinOptions(egressRule.dstIPBlocks) - if err != nil { - log.Errorf("failed to refresh dstIPBlockIPSet: " + err.Error()) - } - if !egressRule.matchAllPorts { - for _, portProtocol := range egressRule.ports { - comment := "rule to ACCEPT traffic from source pods to specified ipBlocks selected by policy name: " + - policy.name + " namespace " + policy.namespace - if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, targetSourcePodIPSetName, dstIPBlockIPSetName, portProtocol.protocol, portProtocol.port); err != nil { - return err - } - } - } - if egressRule.matchAllPorts { - comment := "rule to ACCEPT traffic from source pods to specified ipBlocks selected by policy name: " + - policy.name + " namespace " + policy.namespace - if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, targetSourcePodIPSetName, dstIPBlockIPSetName, "", ""); err != nil { - return err - } - } - } - } - return nil -} - -func (npc *NetworkPolicyController) appendRuleToPolicyChain(iptablesCmdHandler *iptables.IPTables, policyChainName, comment, srcIPSetName, dstIPSetName, protocol, dPort string) error { - if iptablesCmdHandler == nil { - return fmt.Errorf("Failed to run iptables command: iptablesCmdHandler is nil") - } - args := make([]string, 0) - if comment != "" { - args = append(args, "-m", "comment", "--comment", comment) - } - if srcIPSetName != "" { - args = append(args, "-m", "set", "--set", srcIPSetName, "src") - } - if dstIPSetName != "" { - args = append(args, "-m", "set", "--set", dstIPSetName, "dst") - } - if protocol != "" { - args = append(args, "-p", protocol) - } - if dPort != "" { - args = append(args, "--dport", dPort) - } - args = append(args, "-j", "ACCEPT") - err := iptablesCmdHandler.AppendUnique("filter", policyChainName, args...) + whitelistUDPNodeports := []string{"-p", "udp", "-m", "comment", "--comment", "allow LOCAL UDP traffic to node ports", "-m", "addrtype", "--dst-type", "LOCAL", + "-m", "multiport", "--dports", npc.serviceNodePortRange, "-j", "RETURN"} + uuid, err = addUUIDForRuleSpec(kubeInputChainName, &whitelistUDPNodeports) if err != nil { - return fmt.Errorf("Failed to run iptables command: %s", err.Error()) + glog.Fatalf("Failed to get uuid for rule: %s", err.Error()) } - return nil -} + ensureRuleAtPosition(kubeInputChainName, whitelistUDPNodeports, uuid, 3) -func (npc *NetworkPolicyController) syncPodFirewallChains(version string) (map[string]bool, error) { - - activePodFwChains := make(map[string]bool) - - iptablesCmdHandler, err := iptables.New() - if err != nil { - log.Fatalf("Failed to initialize iptables executor: %s", err.Error()) + for externalIPIndex, externalIPRange := range npc.serviceExternalIPRanges { + whitelistServiceVips := []string{"-m", "comment", "--comment", "allow traffic to external IP range: " + externalIPRange.String(), "-d", externalIPRange.String(), "-j", "RETURN"} + uuid, err = addUUIDForRuleSpec(kubeInputChainName, &whitelistServiceVips) + if err != nil { + glog.Fatalf("Failed to get uuid for rule: %s", err.Error()) + } + ensureRuleAtPosition(kubeInputChainName, whitelistServiceVips, uuid, externalIPIndex+4) } - // loop through the pods running on the node which to which ingress network policies to be applied - ingressNetworkPolicyEnabledPods, err := npc.getIngressNetworkPolicyEnabledPods(npc.nodeIP.String()) - if err != nil { - return nil, err - } - for _, pod := range *ingressNetworkPolicyEnabledPods { - - // below condition occurs when we get trasient update while removing or adding pod - // subsequent update will do the correct action - if len(pod.ip) == 0 || pod.ip == "" { - continue - } - - // ensure pod specific firewall chain exist for all the pods that need ingress firewall - podFwChainName := podFirewallChainName(pod.namespace, pod.name, version) - err = iptablesCmdHandler.NewChain("filter", podFwChainName) - if err != nil && err.(*iptables.Error).ExitStatus() != 1 { - return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) - } - activePodFwChains[podFwChainName] = true - - // add entries in pod firewall to run through required network policies - for _, policy := range *npc.networkPoliciesInfo { - if _, ok := policy.targetPods[pod.ip]; ok { - comment := "run through nw policy " + policy.name - policyChainName := networkPolicyChainName(policy.namespace, policy.name, version) - args := []string{"-m", "comment", "--comment", comment, "-j", policyChainName} - exists, err := iptablesCmdHandler.Exists("filter", podFwChainName, args...) - if err != nil { - return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) - } - if !exists { - err := iptablesCmdHandler.Insert("filter", podFwChainName, 1, args...) - if err != nil && err.(*iptables.Error).ExitStatus() != 1 { - return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) - } - } - } - } - - comment := "rule to permit the traffic traffic to pods when source is the pod's local node" - args := []string{"-m", "comment", "--comment", comment, "-m", "addrtype", "--src-type", "LOCAL", "-d", pod.ip, "-j", "ACCEPT"} - exists, err := iptablesCmdHandler.Exists("filter", podFwChainName, args...) - if err != nil { - return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) - } - if !exists { - err := iptablesCmdHandler.Insert("filter", podFwChainName, 1, args...) - if err != nil { - return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) - } - } - - // ensure there is rule in filter table and FORWARD chain to jump to pod specific firewall chain - // this rule applies to the traffic getting routed (coming for other node pods) - comment = "rule to jump traffic destined to POD name:" + pod.name + " namespace: " + pod.namespace + - " to chain " + podFwChainName - args = []string{"-m", "comment", "--comment", comment, "-d", pod.ip, "-j", podFwChainName} - exists, err = iptablesCmdHandler.Exists("filter", "FORWARD", args...) - if err != nil { - return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) - } - if !exists { - err := iptablesCmdHandler.Insert("filter", "FORWARD", 1, args...) - if err != nil { - return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) - } - } - - // ensure there is rule in filter table and OUTPUT chain to jump to pod specific firewall chain - // this rule applies to the traffic from a pod getting routed back to another pod on same node by service proxy - exists, err = iptablesCmdHandler.Exists("filter", "OUTPUT", args...) - if err != nil { - return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) - } - if !exists { - err := iptablesCmdHandler.Insert("filter", "OUTPUT", 1, args...) - if err != nil { - return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) - } - } - - // ensure there is rule in filter table and forward chain to jump to pod specific firewall chain - // this rule applies to the traffic getting switched (coming for same node pods) - comment = "rule to jump traffic destined to POD name:" + pod.name + " namespace: " + pod.namespace + - " to chain " + podFwChainName - args = []string{"-m", "physdev", "--physdev-is-bridged", - "-m", "comment", "--comment", comment, - "-d", pod.ip, - "-j", podFwChainName} - exists, err = iptablesCmdHandler.Exists("filter", "FORWARD", args...) - if err != nil { - return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) - } - if !exists { - err = iptablesCmdHandler.Insert("filter", "FORWARD", 1, args...) - if err != nil { - return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) - } - } - - // add default DROP rule at the end of chain - comment = "default rule to REJECT traffic destined for POD name:" + pod.name + " namespace: " + pod.namespace - args = []string{"-m", "comment", "--comment", comment, "-j", "REJECT"} - err = iptablesCmdHandler.AppendUnique("filter", podFwChainName, args...) - if err != nil { - return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) - } - - // ensure stateful firewall, that permits return traffic for the traffic originated by the pod - comment = "rule for stateful firewall for pod" - args = []string{"-m", "comment", "--comment", comment, "-m", "conntrack", "--ctstate", "RELATED,ESTABLISHED", "-j", "ACCEPT"} - exists, err = iptablesCmdHandler.Exists("filter", podFwChainName, args...) - if err != nil { - return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) - } - if !exists { - err := iptablesCmdHandler.Insert("filter", podFwChainName, 1, args...) - if err != nil { - return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) - } - } - } - - // loop through the pods running on the node which egress network policies to be applied - egressNetworkPolicyEnabledPods, err := npc.getEgressNetworkPolicyEnabledPods(npc.nodeIP.String()) - if err != nil { - return nil, err - } - for _, pod := range *egressNetworkPolicyEnabledPods { - - // below condition occurs when we get trasient update while removing or adding pod - // subsequent update will do the correct action - if len(pod.ip) == 0 || pod.ip == "" { - continue - } - - // ensure pod specific firewall chain exist for all the pods that need egress firewall - podFwChainName := podFirewallChainName(pod.namespace, pod.name, version) - err = iptablesCmdHandler.NewChain("filter", podFwChainName) - if err != nil && err.(*iptables.Error).ExitStatus() != 1 { - return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) - } - activePodFwChains[podFwChainName] = true - - // add entries in pod firewall to run through required network policies - for _, policy := range *npc.networkPoliciesInfo { - if _, ok := policy.targetPods[pod.ip]; ok { - comment := "run through nw policy " + policy.name - policyChainName := networkPolicyChainName(policy.namespace, policy.name, version) - args := []string{"-m", "comment", "--comment", comment, "-j", policyChainName} - exists, err := iptablesCmdHandler.Exists("filter", podFwChainName, args...) - if err != nil { - return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) - } - if !exists { - err := iptablesCmdHandler.Insert("filter", podFwChainName, 1, args...) - if err != nil && err.(*iptables.Error).ExitStatus() != 1 { - return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) - } - } - } - } - - // ensure there is rule in filter table and FORWARD chain to jump to pod specific firewall chain - // this rule applies to the traffic getting routed (coming for other node pods) - comment := "rule to jump traffic from POD name:" + pod.name + " namespace: " + pod.namespace + - " to chain " + podFwChainName - args := []string{"-m", "comment", "--comment", comment, "-s", pod.ip, "-j", podFwChainName} - exists, err := iptablesCmdHandler.Exists("filter", "FORWARD", args...) - if err != nil { - return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) - } - if !exists { - err := iptablesCmdHandler.Insert("filter", "FORWARD", 1, args...) - if err != nil { - return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) - } - } - - // ensure there is rule in filter table and forward chain to jump to pod specific firewall chain - // this rule applies to the traffic getting switched (coming for same node pods) - comment = "rule to jump traffic from POD name:" + pod.name + " namespace: " + pod.namespace + - " to chain " + podFwChainName - args = []string{"-m", "physdev", "--physdev-is-bridged", - "-m", "comment", "--comment", comment, - "-s", pod.ip, - "-j", podFwChainName} - exists, err = iptablesCmdHandler.Exists("filter", "FORWARD", args...) - if err != nil { - return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) - } - if !exists { - err = iptablesCmdHandler.Insert("filter", "FORWARD", 1, args...) - if err != nil { - return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) - } - } - - // add default DROP rule at the end of chain - comment = "default rule to REJECT traffic destined for POD name:" + pod.name + " namespace: " + pod.namespace - args = []string{"-m", "comment", "--comment", comment, "-j", "REJECT"} - err = iptablesCmdHandler.AppendUnique("filter", podFwChainName, args...) - if err != nil { - return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) - } - - // ensure stateful firewall, that permits return traffic for the traffic originated by the pod - comment = "rule for stateful firewall for pod" - args = []string{"-m", "comment", "--comment", comment, "-m", "conntrack", "--ctstate", "RELATED,ESTABLISHED", "-j", "ACCEPT"} - exists, err = iptablesCmdHandler.Exists("filter", podFwChainName, args...) - if err != nil { - return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) - } - if !exists { - err := iptablesCmdHandler.Insert("filter", podFwChainName, 1, args...) - if err != nil { - return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) - } - } - } - - return activePodFwChains, nil } func cleanupStaleRules(activePolicyChains, activePodFwChains, activePolicyIPSets map[string]bool) error { cleanupPodFwChains := make([]string, 0) cleanupPolicyChains := make([]string, 0) - cleanupPolicyIPSets := make([]*Set, 0) + cleanupPolicyIPSets := make([]*utils.Set, 0) + // initialize tool sets for working with iptables and ipset iptablesCmdHandler, err := iptables.New() if err != nil { - log.Fatalf("failed to initialize iptables command executor due to %s", err.Error()) + glog.Fatalf("failed to initialize iptables command executor due to %s", err.Error()) } - ipset, err := NewSavedIPSet(false) + ipsets, err := utils.NewIPSet(false) if err != nil { - log.Fatalf("failed to create ipset command executor due to %s", err.Error()) + glog.Fatalf("failed to create ipsets command executor due to %s", err.Error()) + } + err = ipsets.Save() + if err != nil { + glog.Fatalf("failed to initialize ipsets command executor due to %s", err.Error()) } - // get the list of chains created for pod firewall and network policies + // find iptables chains and ipsets that are no longer used by comparing current to the active maps we were passed chains, err := iptablesCmdHandler.ListChains("filter") + if err != nil { + return fmt.Errorf("Unable to list chains: %s", err) + } for _, chain := range chains { if strings.HasPrefix(chain, kubeNetworkPolicyChainPrefix) { if _, ok := activePolicyChains[chain]; !ok { @@ -956,7 +393,7 @@ func cleanupStaleRules(activePolicyChains, activePodFwChains, activePolicyIPSets } } } - for _, set := range ipset.Sets { + for _, set := range ipsets.Sets { if strings.HasPrefix(set.Name, kubeSourceIPSetPrefix) || strings.HasPrefix(set.Name, kubeDestinationIPSetPrefix) { if _, ok := activePolicyIPSets[set.Name]; !ok { @@ -965,44 +402,33 @@ func cleanupStaleRules(activePolicyChains, activePodFwChains, activePolicyIPSets } } - // cleanup FORWARD chain rules to jump to pod firewall - for _, chain := range cleanupPodFwChains { + // remove stale iptables podFwChain references from the filter table chains + for _, podFwChain := range cleanupPodFwChains { - forwardChainRules, err := iptablesCmdHandler.List("filter", "FORWARD") - if err != nil { - return fmt.Errorf("failed to list rules in filter table, FORWARD chain due to %s", err.Error()) - } - outputChainRules, err := iptablesCmdHandler.List("filter", "OUTPUT") - if err != nil { - return fmt.Errorf("failed to list rules in filter table, OUTPUT chain due to %s", err.Error()) - } - - // TODO delete rule by spec, than rule number to avoid extra loop - var realRuleNo int - for i, rule := range forwardChainRules { - if strings.Contains(rule, chain) { - err = iptablesCmdHandler.Delete("filter", "FORWARD", strconv.Itoa(i-realRuleNo)) - if err != nil { - return fmt.Errorf("failed to delete rule: %s from the FORWARD chain of filter table due to %s", rule, err.Error()) - } - realRuleNo++ + primaryChains := []string{kubeInputChainName, kubeForwardChainName, kubeOutputChainName} + for _, egressChain := range primaryChains { + forwardChainRules, err := iptablesCmdHandler.List("filter", egressChain) + if err != nil { + return fmt.Errorf("failed to list rules in filter table, %s podFwChain due to %s", egressChain, err.Error()) } - } - realRuleNo = 0 - for i, rule := range outputChainRules { - if strings.Contains(rule, chain) { - err = iptablesCmdHandler.Delete("filter", "OUTPUT", strconv.Itoa(i-realRuleNo)) - if err != nil { - return fmt.Errorf("failed to delete rule: %s from the OUTPUT chain of filter table due to %s", rule, err.Error()) + + // TODO delete rule by spec, than rule number to avoid extra loop + var realRuleNo int + for i, rule := range forwardChainRules { + if strings.Contains(rule, podFwChain) { + err = iptablesCmdHandler.Delete("filter", egressChain, strconv.Itoa(i-realRuleNo)) + if err != nil { + return fmt.Errorf("failed to delete rule: %s from the %s podFwChain of filter table due to %s", rule, egressChain, err.Error()) + } + realRuleNo++ } - realRuleNo++ } } } // cleanup pod firewall chain for _, chain := range cleanupPodFwChains { - log.V(2).Infof("Found pod fw chain to cleanup: %s", chain) + glog.V(2).Infof("Found pod fw chain to cleanup: %s", chain) err = iptablesCmdHandler.ClearChain("filter", chain) if err != nil { return fmt.Errorf("Failed to flush the rules in chain %s due to %s", chain, err.Error()) @@ -1011,18 +437,18 @@ func cleanupStaleRules(activePolicyChains, activePodFwChains, activePolicyIPSets if err != nil { return fmt.Errorf("Failed to delete the chain %s due to %s", chain, err.Error()) } - log.V(2).Infof("Deleted pod specific firewall chain: %s from the filter table", chain) + glog.V(2).Infof("Deleted pod specific firewall chain: %s from the filter table", chain) } // cleanup network policy chains for _, policyChain := range cleanupPolicyChains { - log.V(2).Infof("Found policy chain to cleanup %s", policyChain) + glog.V(2).Infof("Found policy chain to cleanup %s", policyChain) - // first clean up any references from pod firewall chain + // first clean up any references from active pod firewall chains for podFwChain := range activePodFwChains { podFwChainRules, err := iptablesCmdHandler.List("filter", podFwChain) if err != nil { - + return fmt.Errorf("Unable to list rules from the chain %s: %s", podFwChain, err) } for i, rule := range podFwChainRules { if strings.Contains(rule, policyChain) { @@ -1035,6 +461,7 @@ func cleanupStaleRules(activePolicyChains, activePodFwChains, activePolicyIPSets } } + // now that all stale and active references to the network policy chain have been removed, delete the chain err = iptablesCmdHandler.ClearChain("filter", policyChain) if err != nil { return fmt.Errorf("Failed to flush the rules in chain %s due to %s", policyChain, err) @@ -1043,7 +470,7 @@ func cleanupStaleRules(activePolicyChains, activePodFwChains, activePolicyIPSets if err != nil { return fmt.Errorf("Failed to flush the rules in chain %s due to %s", policyChain, err) } - log.V(2).Infof("Deleted network policy chain: %s from the filter table", policyChain) + glog.V(2).Infof("Deleted network policy chain: %s from the filter table", policyChain) } // cleanup network policy ipsets @@ -1056,531 +483,70 @@ func cleanupStaleRules(activePolicyChains, activePodFwChains, activePolicyIPSets return nil } -func (npc *NetworkPolicyController) getIngressNetworkPolicyEnabledPods(nodeIP string) (*map[string]podInfo, error) { - nodePods := make(map[string]podInfo) - - for _, obj := range npc.podLister.List() { - pod := obj.(*api.Pod) - - if strings.Compare(pod.Status.HostIP, nodeIP) != 0 { - continue - } - for _, policy := range *npc.networkPoliciesInfo { - if policy.namespace != pod.ObjectMeta.Namespace { - continue - } - _, ok := policy.targetPods[pod.Status.PodIP] - if ok && (policy.policyType == "both" || policy.policyType == "ingress") { - log.V(2).Infof("Found pod name: " + pod.ObjectMeta.Name + " namespace: " + pod.ObjectMeta.Namespace + " for which network policies need to be applied.") - nodePods[pod.Status.PodIP] = podInfo{ip: pod.Status.PodIP, - name: pod.ObjectMeta.Name, - namespace: pod.ObjectMeta.Namespace, - labels: pod.ObjectMeta.Labels} - break - } - } - } - return &nodePods, nil - -} - -func (npc *NetworkPolicyController) getEgressNetworkPolicyEnabledPods(nodeIP string) (*map[string]podInfo, error) { - - nodePods := make(map[string]podInfo) - - for _, obj := range npc.podLister.List() { - pod := obj.(*api.Pod) - - if strings.Compare(pod.Status.HostIP, nodeIP) != 0 { - continue - } - for _, policy := range *npc.networkPoliciesInfo { - if policy.namespace != pod.ObjectMeta.Namespace { - continue - } - _, ok := policy.targetPods[pod.Status.PodIP] - if ok && (policy.policyType == "both" || policy.policyType == "egress") { - log.V(2).Infof("Found pod name: " + pod.ObjectMeta.Name + " namespace: " + pod.ObjectMeta.Namespace + " for which network policies need to be applied.") - nodePods[pod.Status.PodIP] = podInfo{ip: pod.Status.PodIP, - name: pod.ObjectMeta.Name, - namespace: pod.ObjectMeta.Namespace, - labels: pod.ObjectMeta.Labels} - break - } - } - } - return &nodePods, nil -} - -func (npc *NetworkPolicyController) processNetworkPolicyPorts(npPorts []networking.NetworkPolicyPort, namedPort2eps namedPort2eps) (numericPorts []protocolAndPort, namedPorts []endPoints) { - numericPorts, namedPorts = make([]protocolAndPort, 0), make([]endPoints, 0) - for _, npPort := range npPorts { - if npPort.Port == nil { - numericPorts = append(numericPorts, protocolAndPort{port: "", protocol: string(*npPort.Protocol)}) - } else if npPort.Port.Type == intstr.Int { - numericPorts = append(numericPorts, protocolAndPort{port: npPort.Port.String(), protocol: string(*npPort.Protocol)}) - } else { - if protocol2eps, ok := namedPort2eps[npPort.Port.String()]; ok { - if numericPort2eps, ok := protocol2eps[string(*npPort.Protocol)]; ok { - for _, eps := range numericPort2eps { - namedPorts = append(namedPorts, *eps) - } - } - } - } - } - return -} - -func (npc *NetworkPolicyController) processBetaNetworkPolicyPorts(npPorts []apiextensions.NetworkPolicyPort, namedPort2eps namedPort2eps) (numericPorts []protocolAndPort, namedPorts []endPoints) { - numericPorts, namedPorts = make([]protocolAndPort, 0), make([]endPoints, 0) - for _, npPort := range npPorts { - if npPort.Port == nil { - numericPorts = append(numericPorts, protocolAndPort{port: "", protocol: string(*npPort.Protocol)}) - } else if npPort.Port.Type == intstr.Int { - numericPorts = append(numericPorts, protocolAndPort{port: npPort.Port.String(), protocol: string(*npPort.Protocol)}) - } else { - if protocol2eps, ok := namedPort2eps[npPort.Port.String()]; ok { - if numericPort2eps, ok := protocol2eps[string(*npPort.Protocol)]; ok { - for _, eps := range numericPort2eps { - namedPorts = append(namedPorts, *eps) - } - } - } - } - } - return -} - -func (npc *NetworkPolicyController) buildNetworkPoliciesInfo() (*[]networkPolicyInfo, error) { - - NetworkPolicies := make([]networkPolicyInfo, 0) - - for _, policyObj := range npc.npLister.List() { - - policy, ok := policyObj.(*networking.NetworkPolicy) - if !ok { - return nil, fmt.Errorf("Failed to convert") - } - newPolicy := networkPolicyInfo{ - name: policy.Name, - namespace: policy.Namespace, - labels: policy.Spec.PodSelector.MatchLabels, - policyType: "ingress", - } - - // check if there is explicitly specified PolicyTypes in the spec - if len(policy.Spec.PolicyTypes) > 0 { - ingressType, egressType := false, false - for _, policyType := range policy.Spec.PolicyTypes { - if policyType == networking.PolicyTypeIngress { - ingressType = true - } - if policyType == networking.PolicyTypeEgress { - egressType = true - } - } - if ingressType && egressType { - newPolicy.policyType = "both" - } else if egressType { - newPolicy.policyType = "egress" - } else if ingressType { - newPolicy.policyType = "ingress" - } - } else { - if policy.Spec.Egress != nil && policy.Spec.Ingress != nil { - newPolicy.policyType = "both" - } else if policy.Spec.Egress != nil { - newPolicy.policyType = "egress" - } else if policy.Spec.Ingress != nil { - newPolicy.policyType = "ingress" - } - } - - matchingPods, err := npc.ListPodsByNamespaceAndLabels(policy.Namespace, policy.Spec.PodSelector.MatchLabels) - newPolicy.targetPods = make(map[string]podInfo) - namedPort2IngressEps := make(namedPort2eps) - if err == nil { - for _, matchingPod := range matchingPods { - if matchingPod.Status.PodIP == "" { - continue - } - newPolicy.targetPods[matchingPod.Status.PodIP] = podInfo{ip: matchingPod.Status.PodIP, - name: matchingPod.ObjectMeta.Name, - namespace: matchingPod.ObjectMeta.Namespace, - labels: matchingPod.ObjectMeta.Labels} - npc.grabNamedPortFromPod(matchingPod, &namedPort2IngressEps) - } - } - - if policy.Spec.Ingress == nil { - newPolicy.ingressRules = nil - } else { - newPolicy.ingressRules = make([]ingressRule, 0) - } - - if policy.Spec.Egress == nil { - newPolicy.egressRules = nil - } else { - newPolicy.egressRules = make([]egressRule, 0) - } - - for _, specIngressRule := range policy.Spec.Ingress { - ingressRule := ingressRule{} - ingressRule.srcPods = make([]podInfo, 0) - ingressRule.srcIPBlocks = make([][]string, 0) - - // If this field is empty or missing in the spec, this rule matches all sources - if len(specIngressRule.From) == 0 { - ingressRule.matchAllSource = true - } else { - ingressRule.matchAllSource = false - for _, peer := range specIngressRule.From { - if peerPods, err := npc.evalPodPeer(policy, peer); err == nil { - for _, peerPod := range peerPods { - if peerPod.Status.PodIP == "" { - continue - } - ingressRule.srcPods = append(ingressRule.srcPods, - podInfo{ip: peerPod.Status.PodIP, - name: peerPod.ObjectMeta.Name, - namespace: peerPod.ObjectMeta.Namespace, - labels: peerPod.ObjectMeta.Labels}) - } - } - ingressRule.srcIPBlocks = append(ingressRule.srcIPBlocks, npc.evalIPBlockPeer(peer)...) - } - } - - ingressRule.ports = make([]protocolAndPort, 0) - ingressRule.namedPorts = make([]endPoints, 0) - // If this field is empty or missing in the spec, this rule matches all ports - if len(specIngressRule.Ports) == 0 { - ingressRule.matchAllPorts = true - } else { - ingressRule.matchAllPorts = false - ingressRule.ports, ingressRule.namedPorts = npc.processNetworkPolicyPorts(specIngressRule.Ports, namedPort2IngressEps) - } - - newPolicy.ingressRules = append(newPolicy.ingressRules, ingressRule) - } - - for _, specEgressRule := range policy.Spec.Egress { - egressRule := egressRule{} - egressRule.dstPods = make([]podInfo, 0) - egressRule.dstIPBlocks = make([][]string, 0) - namedPort2EgressEps := make(namedPort2eps) - - // If this field is empty or missing in the spec, this rule matches all sources - if len(specEgressRule.To) == 0 { - egressRule.matchAllDestinations = true - } else { - egressRule.matchAllDestinations = false - for _, peer := range specEgressRule.To { - if peerPods, err := npc.evalPodPeer(policy, peer); err == nil { - for _, peerPod := range peerPods { - if peerPod.Status.PodIP == "" { - continue - } - egressRule.dstPods = append(egressRule.dstPods, - podInfo{ip: peerPod.Status.PodIP, - name: peerPod.ObjectMeta.Name, - namespace: peerPod.ObjectMeta.Namespace, - labels: peerPod.ObjectMeta.Labels}) - npc.grabNamedPortFromPod(peerPod, &namedPort2EgressEps) - } - - } - egressRule.dstIPBlocks = append(egressRule.dstIPBlocks, npc.evalIPBlockPeer(peer)...) - } - } - - egressRule.ports = make([]protocolAndPort, 0) - egressRule.namedPorts = make([]endPoints, 0) - // If this field is empty or missing in the spec, this rule matches all ports - if len(specEgressRule.Ports) == 0 { - egressRule.matchAllPorts = true - } else { - egressRule.matchAllPorts = false - egressRule.ports, egressRule.namedPorts = npc.processNetworkPolicyPorts(specEgressRule.Ports, namedPort2EgressEps) - } - - newPolicy.egressRules = append(newPolicy.egressRules, egressRule) - } - NetworkPolicies = append(NetworkPolicies, newPolicy) - } - - return &NetworkPolicies, nil -} - -func (npc *NetworkPolicyController) evalPodPeer(policy *networking.NetworkPolicy, peer networking.NetworkPolicyPeer) ([]*api.Pod, error) { - - var matchingPods []*api.Pod - matchingPods = make([]*api.Pod, 0) - var err error - // spec can have both PodSelector AND NamespaceSelector - if peer.NamespaceSelector != nil { - namespaces, err := npc.ListNamespaceByLabels(peer.NamespaceSelector.MatchLabels) - if err != nil { - return nil, errors.New("Failed to build network policies info due to " + err.Error()) - } - - var podSelectorLabels map[string]string - if peer.PodSelector != nil { - podSelectorLabels = peer.PodSelector.MatchLabels - } - for _, namespace := range namespaces { - namespacePods, err := npc.ListPodsByNamespaceAndLabels(namespace.Name, podSelectorLabels) - if err != nil { - return nil, errors.New("Failed to build network policies info due to " + err.Error()) - } - matchingPods = append(matchingPods, namespacePods...) - } - } else if peer.PodSelector != nil { - matchingPods, err = npc.ListPodsByNamespaceAndLabels(policy.Namespace, peer.PodSelector.MatchLabels) - } - - return matchingPods, err -} - -func (npc *NetworkPolicyController) ListPodsByNamespaceAndLabels(namespace string, labelsToMatch labels.Set) (ret []*api.Pod, err error) { - podLister := listers.NewPodLister(npc.podLister) - allMatchedNameSpacePods, err := podLister.Pods(namespace).List(labelsToMatch.AsSelector()) - if err != nil { - return nil, err - } - return allMatchedNameSpacePods, nil -} - -func (npc *NetworkPolicyController) ListNamespaceByLabels(set labels.Set) ([]*api.Namespace, error) { - namespaceLister := listers.NewNamespaceLister(npc.nsLister) - matchedNamespaces, err := namespaceLister.List(set.AsSelector()) - if err != nil { - return nil, err - } - return matchedNamespaces, nil -} - -func (npc *NetworkPolicyController) evalIPBlockPeer(peer networking.NetworkPolicyPeer) [][]string { - ipBlock := make([][]string, 0) - if peer.PodSelector == nil && peer.NamespaceSelector == nil && peer.IPBlock != nil { - if cidr := peer.IPBlock.CIDR; strings.HasSuffix(cidr, "/0") { - ipBlock = append(ipBlock, []string{"0.0.0.0/1", OptionTimeout, "0"}, []string{"128.0.0.0/1", OptionTimeout, "0"}) - } else { - ipBlock = append(ipBlock, []string{cidr, OptionTimeout, "0"}) - } - for _, except := range peer.IPBlock.Except { - if strings.HasSuffix(except, "/0") { - ipBlock = append(ipBlock, []string{"0.0.0.0/1", OptionTimeout, "0", OptionNoMatch}, []string{"128.0.0.0/1", OptionTimeout, "0", OptionNoMatch}) - } else { - ipBlock = append(ipBlock, []string{except, OptionTimeout, "0", OptionNoMatch}) - } - } - } - return ipBlock -} - -func (npc *NetworkPolicyController) grabNamedPortFromPod(pod *api.Pod, namedPort2eps *namedPort2eps) { - if pod == nil || namedPort2eps == nil { - return - } - for k := range pod.Spec.Containers { - for _, port := range pod.Spec.Containers[k].Ports { - name := port.Name - protocol := string(port.Protocol) - containerPort := strconv.Itoa(int(port.ContainerPort)) - - if (*namedPort2eps)[name] == nil { - (*namedPort2eps)[name] = make(protocol2eps) - } - if (*namedPort2eps)[name][protocol] == nil { - (*namedPort2eps)[name][protocol] = make(numericPort2eps) - } - if eps, ok := (*namedPort2eps)[name][protocol][containerPort]; !ok { - (*namedPort2eps)[name][protocol][containerPort] = &endPoints{ - ips: []string{pod.Status.PodIP}, - protocolAndPort: protocolAndPort{port: containerPort, protocol: protocol}, - } - } else { - eps.ips = append(eps.ips, pod.Status.PodIP) - } - } - } -} - -func (npc *NetworkPolicyController) buildBetaNetworkPoliciesInfo() (*[]networkPolicyInfo, error) { - - NetworkPolicies := make([]networkPolicyInfo, 0) - - for _, policyObj := range npc.npLister.List() { - - policy, _ := policyObj.(*apiextensions.NetworkPolicy) - newPolicy := networkPolicyInfo{ - name: policy.Name, - namespace: policy.Namespace, - labels: policy.Spec.PodSelector.MatchLabels, - } - matchingPods, err := npc.ListPodsByNamespaceAndLabels(policy.Namespace, policy.Spec.PodSelector.MatchLabels) - newPolicy.targetPods = make(map[string]podInfo) - newPolicy.ingressRules = make([]ingressRule, 0) - namedPort2IngressEps := make(namedPort2eps) - if err == nil { - for _, matchingPod := range matchingPods { - if matchingPod.Status.PodIP == "" { - continue - } - newPolicy.targetPods[matchingPod.Status.PodIP] = podInfo{ip: matchingPod.Status.PodIP, - name: matchingPod.ObjectMeta.Name, - namespace: matchingPod.ObjectMeta.Namespace, - labels: matchingPod.ObjectMeta.Labels} - npc.grabNamedPortFromPod(matchingPod, &namedPort2IngressEps) - } - } - - for _, specIngressRule := range policy.Spec.Ingress { - ingressRule := ingressRule{} - - ingressRule.ports = make([]protocolAndPort, 0) - ingressRule.namedPorts = make([]endPoints, 0) - ingressRule.ports, ingressRule.namedPorts = npc.processBetaNetworkPolicyPorts(specIngressRule.Ports, namedPort2IngressEps) - ingressRule.srcPods = make([]podInfo, 0) - for _, peer := range specIngressRule.From { - matchingPods, err := npc.ListPodsByNamespaceAndLabels(policy.Namespace, peer.PodSelector.MatchLabels) - if err == nil { - for _, matchingPod := range matchingPods { - if matchingPod.Status.PodIP == "" { - continue - } - ingressRule.srcPods = append(ingressRule.srcPods, - podInfo{ip: matchingPod.Status.PodIP, - name: matchingPod.ObjectMeta.Name, - namespace: matchingPod.ObjectMeta.Namespace, - labels: matchingPod.ObjectMeta.Labels}) - } - } - } - newPolicy.ingressRules = append(newPolicy.ingressRules, ingressRule) - } - NetworkPolicies = append(NetworkPolicies, newPolicy) - } - - return &NetworkPolicies, nil -} - -func podFirewallChainName(namespace, podName string, version string) string { - hash := sha256.Sum256([]byte(namespace + podName + version)) - encoded := base32.StdEncoding.EncodeToString(hash[:]) - return kubePodFirewallChainPrefix + encoded[:16] -} - -func networkPolicyChainName(namespace, policyName string, version string) string { - hash := sha256.Sum256([]byte(namespace + policyName + version)) - encoded := base32.StdEncoding.EncodeToString(hash[:]) - return kubeNetworkPolicyChainPrefix + encoded[:16] -} - -func policySourcePodIPSetName(namespace, policyName string) string { - hash := sha256.Sum256([]byte(namespace + policyName)) - encoded := base32.StdEncoding.EncodeToString(hash[:]) - return kubeSourceIPSetPrefix + encoded[:16] -} - -func policyDestinationPodIPSetName(namespace, policyName string) string { - hash := sha256.Sum256([]byte(namespace + policyName)) - encoded := base32.StdEncoding.EncodeToString(hash[:]) - return kubeDestinationIPSetPrefix + encoded[:16] -} - -func policyIndexedSourcePodIPSetName(namespace, policyName string, ingressRuleNo int) string { - hash := sha256.Sum256([]byte(namespace + policyName + "ingressrule" + strconv.Itoa(ingressRuleNo) + "pod")) - encoded := base32.StdEncoding.EncodeToString(hash[:]) - return kubeSourceIPSetPrefix + encoded[:16] -} - -func policyIndexedDestinationPodIPSetName(namespace, policyName string, egressRuleNo int) string { - hash := sha256.Sum256([]byte(namespace + policyName + "egressrule" + strconv.Itoa(egressRuleNo) + "pod")) - encoded := base32.StdEncoding.EncodeToString(hash[:]) - return kubeDestinationIPSetPrefix + encoded[:16] -} - -func policyIndexedSourceIPBlockIPSetName(namespace, policyName string, ingressRuleNo int) string { - hash := sha256.Sum256([]byte(namespace + policyName + "ingressrule" + strconv.Itoa(ingressRuleNo) + "ipblock")) - encoded := base32.StdEncoding.EncodeToString(hash[:]) - return kubeSourceIPSetPrefix + encoded[:16] -} - -func policyIndexedDestinationIPBlockIPSetName(namespace, policyName string, egressRuleNo int) string { - hash := sha256.Sum256([]byte(namespace + policyName + "egressrule" + strconv.Itoa(egressRuleNo) + "ipblock")) - encoded := base32.StdEncoding.EncodeToString(hash[:]) - return kubeDestinationIPSetPrefix + encoded[:16] -} - -func policyIndexedIngressNamedPortIPSetName(namespace, policyName string, ingressRuleNo, namedPortNo int) string { - hash := sha256.Sum256([]byte(namespace + policyName + "ingressrule" + strconv.Itoa(ingressRuleNo) + strconv.Itoa(namedPortNo) + "namedport")) - encoded := base32.StdEncoding.EncodeToString(hash[:]) - return kubeDestinationIPSetPrefix + encoded[:16] -} - -func policyIndexedEgressNamedPortIPSetName(namespace, policyName string, egressRuleNo, namedPortNo int) string { - hash := sha256.Sum256([]byte(namespace + policyName + "egressrule" + strconv.Itoa(egressRuleNo) + strconv.Itoa(namedPortNo) + "namedport")) - encoded := base32.StdEncoding.EncodeToString(hash[:]) - return kubeDestinationIPSetPrefix + encoded[:16] -} - // Cleanup cleanup configurations done func (npc *NetworkPolicyController) Cleanup() { - log.Info("Cleaning up iptables configuration permanently done by kube-router") + glog.Info("Cleaning up iptables configuration permanently done by kube-router") iptablesCmdHandler, err := iptables.New() if err != nil { - log.Errorf("Failed to initialize iptables executor: %s", err.Error()) + glog.Errorf("Failed to initialize iptables executor: %s", err.Error()) } // delete jump rules in FORWARD chain to pod specific firewall chain - forwardChainRules, err := iptablesCmdHandler.List("filter", "FORWARD") + forwardChainRules, err := iptablesCmdHandler.List("filter", kubeForwardChainName) if err != nil { - log.Errorf("Failed to delete iptables rules as part of cleanup") + glog.Errorf("Failed to delete iptables rules as part of cleanup") return } - // TODO: need a better way to delete rule with out using number + // TODO: need a better way to delte rule with out using number var realRuleNo int for i, rule := range forwardChainRules { if strings.Contains(rule, kubePodFirewallChainPrefix) { - err = iptablesCmdHandler.Delete("filter", "FORWARD", strconv.Itoa(i-realRuleNo)) + err = iptablesCmdHandler.Delete("filter", kubeForwardChainName, strconv.Itoa(i-realRuleNo)) + if err != nil { + glog.Errorf("Failed to delete iptables rule as part of cleanup: %s", err) + } realRuleNo++ } } // delete jump rules in OUTPUT chain to pod specific firewall chain - forwardChainRules, err = iptablesCmdHandler.List("filter", "OUTPUT") + forwardChainRules, err = iptablesCmdHandler.List("filter", kubeOutputChainName) if err != nil { - log.Errorf("Failed to delete iptables rules as part of cleanup") + glog.Errorf("Failed to delete iptables rules as part of cleanup") return } - // TODO: need a better way to delete rule with out using number + // TODO: need a better way to delte rule with out using number realRuleNo = 0 for i, rule := range forwardChainRules { if strings.Contains(rule, kubePodFirewallChainPrefix) { - err = iptablesCmdHandler.Delete("filter", "OUTPUT", strconv.Itoa(i-realRuleNo)) + err = iptablesCmdHandler.Delete("filter", kubeOutputChainName, strconv.Itoa(i-realRuleNo)) + if err != nil { + glog.Errorf("Failed to delete iptables rule as part of cleanup: %s", err) + } realRuleNo++ } } // flush and delete pod specific firewall chain chains, err := iptablesCmdHandler.ListChains("filter") + if err != nil { + glog.Errorf("Unable to list chains: %s", err) + return + } for _, chain := range chains { if strings.HasPrefix(chain, kubePodFirewallChainPrefix) { err = iptablesCmdHandler.ClearChain("filter", chain) if err != nil { - log.Errorf("Failed to cleanup iptables rules: " + err.Error()) + glog.Errorf("Failed to cleanup iptables rules: " + err.Error()) return } err = iptablesCmdHandler.DeleteChain("filter", chain) if err != nil { - log.Errorf("Failed to cleanup iptables rules: " + err.Error()) + glog.Errorf("Failed to cleanup iptables rules: " + err.Error()) return } } @@ -1588,141 +554,117 @@ func (npc *NetworkPolicyController) Cleanup() { // flush and delete per network policy specific chain chains, err = iptablesCmdHandler.ListChains("filter") + if err != nil { + glog.Errorf("Unable to list chains: %s", err) + return + } for _, chain := range chains { if strings.HasPrefix(chain, kubeNetworkPolicyChainPrefix) { err = iptablesCmdHandler.ClearChain("filter", chain) if err != nil { - log.Errorf("Failed to cleanup iptables rules: " + err.Error()) + glog.Errorf("Failed to cleanup iptables rules: " + err.Error()) return } err = iptablesCmdHandler.DeleteChain("filter", chain) if err != nil { - log.Errorf("Failed to cleanup iptables rules: " + err.Error()) + glog.Errorf("Failed to cleanup iptables rules: " + err.Error()) return } } } // delete all ipsets - ipset, err := NewSavedIPSet(false) + ipset, err := utils.NewIPSet(false) if err != nil { - log.Errorf("Failed to clean up ipsets: " + err.Error()) + glog.Errorf("Failed to clean up ipsets: " + err.Error()) + } + err = ipset.Save() + if err != nil { + glog.Errorf("Failed to clean up ipsets: " + err.Error()) } err = ipset.DestroyAllWithin() if err != nil { - log.Errorf("Failed to clean up ipsets: " + err.Error()) - } - log.Infof("Successfully cleaned the iptables configuration done by kube-router") -} - -func (npc *NetworkPolicyController) newPodEventHandler() cache.ResourceEventHandler { - return cache.ResourceEventHandlerFuncs{ - AddFunc: func(obj interface{}) { - npc.OnPodUpdate(obj) - - }, - UpdateFunc: func(oldObj, newObj interface{}) { - newPoObj := newObj.(*api.Pod) - oldPoObj := oldObj.(*api.Pod) - if newPoObj.Status.Phase != oldPoObj.Status.Phase || newPoObj.Status.PodIP != oldPoObj.Status.PodIP { - // for the network policies, we are only interested in pod status phase change or IP change - npc.OnPodUpdate(newObj) - } - }, - DeleteFunc: func(obj interface{}) { - npc.OnPodUpdate(obj) - }, - } -} - -func (npc *NetworkPolicyController) newNamespaceEventHandler() cache.ResourceEventHandler { - return cache.ResourceEventHandlerFuncs{ - AddFunc: func(obj interface{}) { - npc.OnNamespaceUpdate(obj) - - }, - UpdateFunc: func(oldObj, newObj interface{}) { - npc.OnNamespaceUpdate(newObj) - - }, - DeleteFunc: func(obj interface{}) { - npc.OnNamespaceUpdate(obj) - - }, - } -} - -func (npc *NetworkPolicyController) newNetworkPolicyEventHandler() cache.ResourceEventHandler { - return cache.ResourceEventHandlerFuncs{ - AddFunc: func(obj interface{}) { - npc.OnNetworkPolicyUpdate(obj) - - }, - UpdateFunc: func(oldObj, newObj interface{}) { - npc.OnNetworkPolicyUpdate(newObj) - }, - DeleteFunc: func(obj interface{}) { - npc.OnNetworkPolicyUpdate(obj) - - }, + glog.Errorf("Failed to clean up ipsets: " + err.Error()) } + glog.Infof("Successfully cleaned the iptables configuration done by kube-router") } // NewNetworkPolicyController returns new NetworkPolicyController object -func NewNetworkPolicyController( - stopCh <-chan struct{}, - clientset kubernetes.Interface, - ipTablesSyncPeriod time.Duration, - hostnameOverride string) (*NetworkPolicyController, error) { - +func NewNetworkPolicyController(clientset kubernetes.Interface, + config *options.KubeRouterConfig, podInformer cache.SharedIndexInformer, + npInformer cache.SharedIndexInformer, nsInformer cache.SharedIndexInformer) (*NetworkPolicyController, error) { npc := NetworkPolicyController{} - informerFactory := informers.NewSharedInformerFactory(clientset, 0) - podInformer := informerFactory.Core().V1().Pods().Informer() - nsInformer := informerFactory.Core().V1().Namespaces().Informer() - npInformer := informerFactory.Networking().V1().NetworkPolicies().Informer() - informerFactory.Start(stopCh) + // Creating a single-item buffered channel to ensure that we only keep a single full sync request at a time, + // additional requests would be pointless to queue since after the first one was processed the system would already + // be up to date with all of the policy changes from any enqueued request after that + npc.fullSyncRequestChan = make(chan struct{}, 1) - if err := CacheSyncOrTimeout(informerFactory, stopCh, 1*time.Minute); err != nil { - return nil, errors.New("Failed to synchronize cache: " + err.Error()) - } - - // if config.MetricsEnabled { - // //Register the metrics for this controller - // prometheus.MustRegister(metrics.ControllerIPtablesSyncTime) - // prometheus.MustRegister(metrics.ControllerPolicyChainsSyncTime) - // npc.MetricsEnabled = true - // } - - npc.syncPeriod = ipTablesSyncPeriod - - npc.v1NetworkPolicy = true - v, err := clientset.Discovery().ServerVersion() + // Validate and parse ClusterIP service range + _, ipnet, err := net.ParseCIDR(config.ClusterIPCIDR) if err != nil { - return nil, err + return nil, fmt.Errorf("failed to get parse --service-cluster-ip-range parameter: %s", err.Error()) + } + npc.serviceClusterIPRange = *ipnet + + // Validate and parse NodePort range + nodePortValidator := regexp.MustCompile(`^([0-9]+)[:-]{1}([0-9]+)$`) + if matched := nodePortValidator.MatchString(config.NodePortRange); !matched { + return nil, fmt.Errorf("failed to parse node port range given: '%s' please see specification in help text", config.NodePortRange) + } + matches := nodePortValidator.FindStringSubmatch(config.NodePortRange) + if len(matches) != 3 { + return nil, fmt.Errorf("could not parse port number from range given: '%s'", config.NodePortRange) + } + port1, err := strconv.ParseInt(matches[1], 10, 16) + if err != nil { + return nil, fmt.Errorf("could not parse first port number from range given: '%s'", config.NodePortRange) + } + port2, err := strconv.ParseInt(matches[2], 10, 16) + if err != nil { + return nil, fmt.Errorf("could not parse second port number from range given: '%s'", config.NodePortRange) + } + if port1 >= port2 { + return nil, fmt.Errorf("port 1 is greater than or equal to port 2 in range given: '%s'", config.NodePortRange) + } + npc.serviceNodePortRange = fmt.Sprintf("%d:%d", port1, port2) + + // Validate and parse ExternalIP service range + for _, externalIPRange := range config.ExternalIPCIDRs { + _, ipnet, err := net.ParseCIDR(externalIPRange) + if err != nil { + return nil, fmt.Errorf("failed to get parse --service-external-ip-range parameter: '%s'. Error: %s", externalIPRange, err.Error()) + } + npc.serviceExternalIPRanges = append(npc.serviceExternalIPRanges, *ipnet) } - valid := regexp.MustCompile("[0-9]") - v.Minor = strings.Join(valid.FindAllString(v.Minor, -1), "") - minorVer, _ := strconv.Atoi(v.Minor) - if v.Major == "1" && minorVer < 7 { - npc.v1NetworkPolicy = false + if config.MetricsEnabled { + //Register the metrics for this controller + prometheus.MustRegister(metrics.ControllerIptablesSyncTime) + prometheus.MustRegister(metrics.ControllerPolicyChainsSyncTime) + npc.MetricsEnabled = true } - node, err := clientset.CoreV1().Nodes().Get(context.TODO(), hostnameOverride, metav1.GetOptions{}) + npc.syncPeriod = config.IPTablesSyncPeriod + + node, err := utils.GetNodeObject(clientset, config.HostnameOverride) if err != nil { return nil, err } npc.nodeHostName = node.Name - nodeIP, err := GetNodeIP(node) + nodeIP, err := utils.GetNodeIP(node) if err != nil { return nil, err } npc.nodeIP = nodeIP - ipset, err := NewSavedIPSet(false) + ipset, err := utils.NewIPSet(false) + if err != nil { + return nil, err + } + err = ipset.Save() if err != nil { return nil, err } @@ -1730,15 +672,12 @@ func NewNetworkPolicyController( npc.podLister = podInformer.GetIndexer() npc.PodEventHandler = npc.newPodEventHandler() - podInformer.AddEventHandler(npc.PodEventHandler) npc.nsLister = nsInformer.GetIndexer() npc.NamespaceEventHandler = npc.newNamespaceEventHandler() - nsInformer.AddEventHandler(npc.NamespaceEventHandler) npc.npLister = npInformer.GetIndexer() npc.NetworkPolicyEventHandler = npc.newNetworkPolicyEventHandler() - npInformer.AddEventHandler(npc.NetworkPolicyEventHandler) return &npc, nil } diff --git a/pkg/agent/netpol/network_policy_controller_test.go b/pkg/agent/netpol/network_policy_controller_test.go new file mode 100644 index 0000000000..d74d87f1ab --- /dev/null +++ b/pkg/agent/netpol/network_policy_controller_test.go @@ -0,0 +1,545 @@ +package netpol + +import ( + "context" + "net" + "strings" + "testing" + "time" + + netv1 "k8s.io/api/networking/v1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/client-go/tools/cache" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/client-go/informers" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/client-go/kubernetes/fake" + + "github.com/cloudnativelabs/kube-router/pkg/options" +) + +// newFakeInformersFromClient creates the different informers used in the uneventful network policy controller +func newFakeInformersFromClient(kubeClient clientset.Interface) (informers.SharedInformerFactory, cache.SharedIndexInformer, cache.SharedIndexInformer, cache.SharedIndexInformer) { + informerFactory := informers.NewSharedInformerFactory(kubeClient, 0) + podInformer := informerFactory.Core().V1().Pods().Informer() + npInformer := informerFactory.Networking().V1().NetworkPolicies().Informer() + nsInformer := informerFactory.Core().V1().Namespaces().Informer() + return informerFactory, podInformer, nsInformer, npInformer +} + +type tNamespaceMeta struct { + name string + labels labels.Set +} + +// Add resources to Informer Store object to simulate updating the Informer +func tAddToInformerStore(t *testing.T, informer cache.SharedIndexInformer, obj interface{}) { + err := informer.GetStore().Add(obj) + if err != nil { + t.Fatalf("error injecting object to Informer Store: %v", err) + } +} + +type tNetpol struct { + name string + namespace string + podSelector metav1.LabelSelector + ingress []netv1.NetworkPolicyIngressRule + egress []netv1.NetworkPolicyEgressRule +} + +// createFakeNetpol is a helper to create the network policy from the tNetpol struct +func (ns *tNetpol) createFakeNetpol(t *testing.T, informer cache.SharedIndexInformer) { + polTypes := make([]netv1.PolicyType, 0) + if len(ns.ingress) != 0 { + polTypes = append(polTypes, netv1.PolicyTypeIngress) + } + if len(ns.egress) != 0 { + polTypes = append(polTypes, netv1.PolicyTypeEgress) + } + tAddToInformerStore(t, informer, + &netv1.NetworkPolicy{ObjectMeta: metav1.ObjectMeta{Name: ns.name, Namespace: ns.namespace}, + Spec: netv1.NetworkPolicySpec{ + PodSelector: ns.podSelector, + PolicyTypes: polTypes, + Ingress: ns.ingress, + Egress: ns.egress, + }}) +} + +func (ns *tNetpol) findNetpolMatch(netpols *[]networkPolicyInfo) *networkPolicyInfo { + for _, netpol := range *netpols { + if netpol.namespace == ns.namespace && netpol.name == ns.name { + return &netpol + } + } + return nil +} + +// tPodNamespaceMap is a helper to create sets of namespace,pod names +type tPodNamespaceMap map[string]map[string]bool + +func (t tPodNamespaceMap) addPod(pod podInfo) { + if _, ok := t[pod.namespace]; !ok { + t[pod.namespace] = make(map[string]bool) + } + t[pod.namespace][pod.name] = true +} +func (t tPodNamespaceMap) delPod(pod podInfo) { + delete(t[pod.namespace], pod.name) + if len(t[pod.namespace]) == 0 { + delete(t, pod.namespace) + } +} +func (t tPodNamespaceMap) addNSPodInfo(ns, podname string) { + if _, ok := t[ns]; !ok { + t[ns] = make(map[string]bool) + } + t[ns][podname] = true +} +func (t tPodNamespaceMap) copy() tPodNamespaceMap { + newMap := make(tPodNamespaceMap) + for ns, pods := range t { + for p := range pods { + newMap.addNSPodInfo(ns, p) + } + } + return newMap +} +func (t tPodNamespaceMap) toStrSlice() (r []string) { + for ns, pods := range t { + for pod := range pods { + r = append(r, ns+":"+pod) + } + } + return +} + +// tNewPodNamespaceMapFromTC creates a new tPodNamespaceMap from the info of the testcase +func tNewPodNamespaceMapFromTC(target map[string]string) tPodNamespaceMap { + newMap := make(tPodNamespaceMap) + for ns, pods := range target { + for _, pod := range strings.Split(pods, ",") { + newMap.addNSPodInfo(ns, pod) + } + } + return newMap +} + +// tCreateFakePods creates the Pods and Namespaces that will be affected by the network policies +// returns a map like map[Namespace]map[PodName]bool +func tCreateFakePods(t *testing.T, podInformer cache.SharedIndexInformer, nsInformer cache.SharedIndexInformer) tPodNamespaceMap { + podNamespaceMap := make(tPodNamespaceMap) + pods := []podInfo{ + {name: "Aa", labels: labels.Set{"app": "a"}, namespace: "nsA", ip: "1.1"}, + {name: "Aaa", labels: labels.Set{"app": "a", "component": "a"}, namespace: "nsA", ip: "1.2"}, + {name: "Aab", labels: labels.Set{"app": "a", "component": "b"}, namespace: "nsA", ip: "1.3"}, + {name: "Aac", labels: labels.Set{"app": "a", "component": "c"}, namespace: "nsA", ip: "1.4"}, + {name: "Ba", labels: labels.Set{"app": "a"}, namespace: "nsB", ip: "2.1"}, + {name: "Baa", labels: labels.Set{"app": "a", "component": "a"}, namespace: "nsB", ip: "2.2"}, + {name: "Bab", labels: labels.Set{"app": "a", "component": "b"}, namespace: "nsB", ip: "2.3"}, + {name: "Ca", labels: labels.Set{"app": "a"}, namespace: "nsC", ip: "3.1"}, + } + namespaces := []tNamespaceMeta{ + {name: "nsA", labels: labels.Set{"name": "a", "team": "a"}}, + {name: "nsB", labels: labels.Set{"name": "b", "team": "a"}}, + {name: "nsC", labels: labels.Set{"name": "c"}}, + {name: "nsD", labels: labels.Set{"name": "d"}}, + } + ipsUsed := make(map[string]bool) + for _, pod := range pods { + podNamespaceMap.addPod(pod) + ipaddr := "1.1." + pod.ip + if ipsUsed[ipaddr] { + t.Fatalf("there is another pod with the same Ip address %s as this pod %s namespace %s", + ipaddr, pod.name, pod.name) + } + ipsUsed[ipaddr] = true + tAddToInformerStore(t, podInformer, + &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: pod.name, Labels: pod.labels, Namespace: pod.namespace}, + Status: v1.PodStatus{PodIP: ipaddr}}) + } + for _, ns := range namespaces { + tAddToInformerStore(t, nsInformer, &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: ns.name, Labels: ns.labels}}) + } + return podNamespaceMap +} + +// newFakeNode is a helper function for creating Nodes for testing. +func newFakeNode(name string, addr string) *v1.Node { + return &v1.Node{ + ObjectMeta: metav1.ObjectMeta{Name: name}, + Status: v1.NodeStatus{ + Capacity: v1.ResourceList{ + v1.ResourceName(v1.ResourceCPU): resource.MustParse("1"), + v1.ResourceName(v1.ResourceMemory): resource.MustParse("1G"), + }, + Addresses: []v1.NodeAddress{{Type: v1.NodeExternalIP, Address: addr}}, + }, + } +} + +// newUneventfulNetworkPolicyController returns new NetworkPolicyController object without any event handler +func newUneventfulNetworkPolicyController(podInformer cache.SharedIndexInformer, + npInformer cache.SharedIndexInformer, nsInformer cache.SharedIndexInformer) (*NetworkPolicyController, error) { + + npc := NetworkPolicyController{} + npc.syncPeriod = time.Hour + + npc.nodeHostName = "node" + npc.nodeIP = net.IPv4(10, 10, 10, 10) + npc.podLister = podInformer.GetIndexer() + npc.nsLister = nsInformer.GetIndexer() + npc.npLister = npInformer.GetIndexer() + + return &npc, nil +} + +// tNetpolTestCase helper struct to define the inputs to the test case (netpols) and +// the expected selected targets (targetPods, inSourcePods for ingress targets, and outDestPods +// for egress targets) as maps with key being the namespace and a csv of pod names +type tNetpolTestCase struct { + name string + netpol tNetpol + targetPods tPodNamespaceMap + inSourcePods tPodNamespaceMap + outDestPods tPodNamespaceMap +} + +// tGetNotTargetedPods finds set of pods that should not be targeted by netpol selectors +func tGetNotTargetedPods(podsGot []podInfo, wanted tPodNamespaceMap) []string { + unwanted := make(tPodNamespaceMap) + for _, pod := range podsGot { + if !wanted[pod.namespace][pod.name] { + unwanted.addPod(pod) + } + } + return unwanted.toStrSlice() +} + +// tGetTargetPodsMissing returns the set of pods that should have been targeted but were missing by netpol selectors +func tGetTargetPodsMissing(podsGot []podInfo, wanted tPodNamespaceMap) []string { + missing := wanted.copy() + for _, pod := range podsGot { + if wanted[pod.namespace][pod.name] { + missing.delPod(pod) + } + } + return missing.toStrSlice() +} + +func tListOfPodsFromTargets(target map[string]podInfo) (r []podInfo) { + for _, pod := range target { + r = append(r, pod) + } + return +} + +func testForMissingOrUnwanted(t *testing.T, targetMsg string, got []podInfo, wanted tPodNamespaceMap) { + if missing := tGetTargetPodsMissing(got, wanted); len(missing) != 0 { + t.Errorf("Some Pods were not selected %s: %s", targetMsg, strings.Join(missing, ", ")) + } + if missing := tGetNotTargetedPods(got, wanted); len(missing) != 0 { + t.Errorf("Some Pods NOT expected were selected on %s: %s", targetMsg, strings.Join(missing, ", ")) + } +} + +func newMinimalKubeRouterConfig(clusterIPCIDR string, nodePortRange string, hostNameOverride string, externalIPs []string) *options.KubeRouterConfig { + kubeConfig := options.NewKubeRouterConfig() + if clusterIPCIDR != "" { + kubeConfig.ClusterIPCIDR = clusterIPCIDR + } + if nodePortRange != "" { + kubeConfig.NodePortRange = nodePortRange + } + if hostNameOverride != "" { + kubeConfig.HostnameOverride = hostNameOverride + } + if externalIPs != nil { + kubeConfig.ExternalIPCIDRs = externalIPs + } + return kubeConfig +} + +type tNetPolConfigTestCase struct { + name string + config *options.KubeRouterConfig + expectError bool + errorText string +} + +func TestNewNetworkPolicySelectors(t *testing.T) { + testCases := []tNetpolTestCase{ + { + name: "Non-Existent Namespace", + netpol: tNetpol{name: "nsXX", podSelector: metav1.LabelSelector{}, namespace: "nsXX"}, + targetPods: nil, + }, + { + name: "Empty Namespace", + netpol: tNetpol{name: "nsD", podSelector: metav1.LabelSelector{}, namespace: "nsD"}, + targetPods: nil, + }, + { + name: "All pods in nsA", + netpol: tNetpol{name: "nsA", podSelector: metav1.LabelSelector{}, namespace: "nsA"}, + targetPods: tNewPodNamespaceMapFromTC(map[string]string{"nsA": "Aa,Aaa,Aab,Aac"}), + }, + { + name: "All pods in nsB", + netpol: tNetpol{name: "nsB", podSelector: metav1.LabelSelector{}, namespace: "nsB"}, + targetPods: tNewPodNamespaceMapFromTC(map[string]string{"nsB": "Ba,Baa,Bab"}), + }, + { + name: "All pods in nsC", + netpol: tNetpol{name: "nsC", podSelector: metav1.LabelSelector{}, namespace: "nsC"}, + targetPods: tNewPodNamespaceMapFromTC(map[string]string{"nsC": "Ca"}), + }, + { + name: "All pods app=a in nsA using matchExpressions", + netpol: tNetpol{ + name: "nsA-app-a-matchExpression", + namespace: "nsA", + podSelector: metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{{ + Key: "app", + Operator: "In", + Values: []string{"a"}, + }}}}, + targetPods: tNewPodNamespaceMapFromTC(map[string]string{"nsA": "Aa,Aaa,Aab,Aac"}), + }, + { + name: "All pods app=a in nsA using matchLabels", + netpol: tNetpol{name: "nsA-app-a-matchLabels", namespace: "nsA", + podSelector: metav1.LabelSelector{ + MatchLabels: map[string]string{"app": "a"}}}, + targetPods: tNewPodNamespaceMapFromTC(map[string]string{"nsA": "Aa,Aaa,Aab,Aac"}), + }, + { + name: "All pods app=a in nsA using matchLabels ingress allow from any pod in nsB", + netpol: tNetpol{name: "nsA-app-a-matchLabels-2", namespace: "nsA", + podSelector: metav1.LabelSelector{MatchLabels: map[string]string{"app": "a"}}, + ingress: []netv1.NetworkPolicyIngressRule{{From: []netv1.NetworkPolicyPeer{{NamespaceSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"name": "b"}}}}}}, + }, + targetPods: tNewPodNamespaceMapFromTC(map[string]string{"nsA": "Aa,Aaa,Aab,Aac"}), + inSourcePods: tNewPodNamespaceMapFromTC(map[string]string{"nsB": "Ba,Baa,Bab"}), + }, + { + name: "All pods app=a in nsA using matchLabels ingress allow from pod in nsB with component = b", + netpol: tNetpol{name: "nsA-app-a-matchExpression-2", namespace: "nsA", + podSelector: metav1.LabelSelector{MatchLabels: map[string]string{"app": "a"}}, + ingress: []netv1.NetworkPolicyIngressRule{{From: []netv1.NetworkPolicyPeer{ + { + NamespaceSelector: &metav1.LabelSelector{MatchLabels: map[string]string{"name": "b"}}, + PodSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{{ + Key: "component", + Operator: "In", + Values: []string{"b"}, + }}}, + }, + }}}}, + targetPods: tNewPodNamespaceMapFromTC(map[string]string{"nsA": "Aa,Aaa,Aab,Aac"}), + inSourcePods: tNewPodNamespaceMapFromTC(map[string]string{"nsB": "Bab"}), + }, + { + name: "All pods app=a,component=b or c in nsA", + netpol: tNetpol{name: "nsA-app-a-matchExpression-3", namespace: "nsA", + podSelector: metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "app", + Operator: "In", + Values: []string{"a"}, + }, + { + Key: "component", + Operator: "In", + Values: []string{"b", "c"}, + }}}, + }, + targetPods: tNewPodNamespaceMapFromTC(map[string]string{"nsA": "Aab,Aac"}), + }, + } + + client := fake.NewSimpleClientset(&v1.NodeList{Items: []v1.Node{*newFakeNode("node", "10.10.10.10")}}) + informerFactory, podInformer, nsInformer, netpolInformer := newFakeInformersFromClient(client) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + informerFactory.Start(ctx.Done()) + cache.WaitForCacheSync(ctx.Done(), podInformer.HasSynced) + krNetPol, _ := newUneventfulNetworkPolicyController(podInformer, netpolInformer, nsInformer) + tCreateFakePods(t, podInformer, nsInformer) + for _, test := range testCases { + test.netpol.createFakeNetpol(t, netpolInformer) + } + netpols, err := krNetPol.buildNetworkPoliciesInfo() + if err != nil { + t.Errorf("Problems building policies") + } + + for _, test := range testCases { + t.Run(test.name, func(t *testing.T) { + np := test.netpol.findNetpolMatch(&netpols) + testForMissingOrUnwanted(t, "targetPods", tListOfPodsFromTargets(np.targetPods), test.targetPods) + for _, ingress := range np.ingressRules { + testForMissingOrUnwanted(t, "ingress srcPods", ingress.srcPods, test.inSourcePods) + } + for _, egress := range np.egressRules { + testForMissingOrUnwanted(t, "egress dstPods", egress.dstPods, test.outDestPods) + } + }) + } +} + +func TestNetworkPolicyController(t *testing.T) { + testCases := []tNetPolConfigTestCase{ + { + "Default options are successful", + newMinimalKubeRouterConfig("", "", "node", nil), + false, + "", + }, + { + "Missing nodename fails appropriately", + newMinimalKubeRouterConfig("", "", "", nil), + true, + "Failed to identify the node by NODE_NAME, hostname or --hostname-override", + }, + { + "Test bad cluster CIDR (not properly formatting ip address)", + newMinimalKubeRouterConfig("10.10.10", "", "node", nil), + true, + "failed to get parse --service-cluster-ip-range parameter: invalid CIDR address: 10.10.10", + }, + { + "Test bad cluster CIDR (not using an ip address)", + newMinimalKubeRouterConfig("foo", "", "node", nil), + true, + "failed to get parse --service-cluster-ip-range parameter: invalid CIDR address: foo", + }, + { + "Test bad cluster CIDR (using an ip address that is not a CIDR)", + newMinimalKubeRouterConfig("10.10.10.10", "", "node", nil), + true, + "failed to get parse --service-cluster-ip-range parameter: invalid CIDR address: 10.10.10.10", + }, + { + "Test good cluster CIDR (using single IP with a /32)", + newMinimalKubeRouterConfig("10.10.10.10/32", "", "node", nil), + false, + "", + }, + { + "Test good cluster CIDR (using normal range with /24)", + newMinimalKubeRouterConfig("10.10.10.0/24", "", "node", nil), + false, + "", + }, + { + "Test bad node port specification (using commas)", + newMinimalKubeRouterConfig("", "8080,8081", "node", nil), + true, + "failed to parse node port range given: '8080,8081' please see specification in help text", + }, + { + "Test bad node port specification (not using numbers)", + newMinimalKubeRouterConfig("", "foo:bar", "node", nil), + true, + "failed to parse node port range given: 'foo:bar' please see specification in help text", + }, + { + "Test bad node port specification (using anything in addition to range)", + newMinimalKubeRouterConfig("", "8080,8081-8090", "node", nil), + true, + "failed to parse node port range given: '8080,8081-8090' please see specification in help text", + }, + { + "Test bad node port specification (using reversed range)", + newMinimalKubeRouterConfig("", "8090-8080", "node", nil), + true, + "port 1 is greater than or equal to port 2 in range given: '8090-8080'", + }, + { + "Test bad node port specification (port out of available range)", + newMinimalKubeRouterConfig("", "132000-132001", "node", nil), + true, + "could not parse first port number from range given: '132000-132001'", + }, + { + "Test good node port specification (using colon separator)", + newMinimalKubeRouterConfig("", "8080:8090", "node", nil), + false, + "", + }, + { + "Test good node port specification (using hyphen separator)", + newMinimalKubeRouterConfig("", "8080-8090", "node", nil), + false, + "", + }, + { + "Test bad external IP CIDR (not properly formatting ip address)", + newMinimalKubeRouterConfig("", "", "node", []string{"199.10.10"}), + true, + "failed to get parse --service-external-ip-range parameter: '199.10.10'. Error: invalid CIDR address: 199.10.10", + }, + { + "Test bad external IP CIDR (not using an ip address)", + newMinimalKubeRouterConfig("", "", "node", []string{"foo"}), + true, + "failed to get parse --service-external-ip-range parameter: 'foo'. Error: invalid CIDR address: foo", + }, + { + "Test bad external IP CIDR (using an ip address that is not a CIDR)", + newMinimalKubeRouterConfig("", "", "node", []string{"199.10.10.10"}), + true, + "failed to get parse --service-external-ip-range parameter: '199.10.10.10'. Error: invalid CIDR address: 199.10.10.10", + }, + { + "Test bad external IP CIDR (making sure that it processes all items in the list)", + newMinimalKubeRouterConfig("", "", "node", []string{"199.10.10.10/32", "199.10.10.11"}), + true, + "failed to get parse --service-external-ip-range parameter: '199.10.10.11'. Error: invalid CIDR address: 199.10.10.11", + }, + { + "Test good external IP CIDR (using single IP with a /32)", + newMinimalKubeRouterConfig("", "", "node", []string{"199.10.10.10/32"}), + false, + "", + }, + { + "Test good external IP CIDR (using normal range with /24)", + newMinimalKubeRouterConfig("", "", "node", []string{"199.10.10.10/24"}), + false, + "", + }, + } + client := fake.NewSimpleClientset(&v1.NodeList{Items: []v1.Node{*newFakeNode("node", "10.10.10.10")}}) + _, podInformer, nsInformer, netpolInformer := newFakeInformersFromClient(client) + for _, test := range testCases { + t.Run(test.name, func(t *testing.T) { + _, err := NewNetworkPolicyController(client, test.config, podInformer, netpolInformer, nsInformer) + if err == nil && test.expectError { + t.Error("This config should have failed, but it was successful instead") + } else if err != nil { + // Unfortunately without doing a ton of extra refactoring work, we can't remove this reference easily + // from the controllers start up. Luckily it's one of the last items to be processed in the controller + // so for now we'll consider that if we hit this error that we essentially didn't hit an error at all + // TODO: refactor NPC to use an injectable interface for ipset operations + if !test.expectError && err.Error() != "Ipset utility not found" { + t.Errorf("This config should have been successful, but it failed instead. Error: %s", err) + } else if test.expectError && err.Error() != test.errorText { + t.Errorf("Expected error: '%s' but instead got: '%s'", test.errorText, err) + } + } + }) + } +} + +// Ref: +// https://github.com/kubernetes/kubernetes/blob/master/pkg/controller/podgc/gc_controller_test.go +// https://github.com/kubernetes/kubernetes/blob/master/pkg/controller/testutil/test_utils.go diff --git a/pkg/agent/netpol/pod.go b/pkg/agent/netpol/pod.go new file mode 100644 index 0000000000..b4d37ea7cc --- /dev/null +++ b/pkg/agent/netpol/pod.go @@ -0,0 +1,380 @@ +package netpol + +import ( + "crypto/sha256" + "encoding/base32" + "fmt" + "strings" + + "github.com/coreos/go-iptables/iptables" + "github.com/golang/glog" + api "k8s.io/api/core/v1" + "k8s.io/client-go/tools/cache" +) + +func (npc *NetworkPolicyController) newPodEventHandler() cache.ResourceEventHandler { + return cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { + npc.OnPodUpdate(obj) + + }, + UpdateFunc: func(oldObj, newObj interface{}) { + newPoObj := newObj.(*api.Pod) + oldPoObj := oldObj.(*api.Pod) + if newPoObj.Status.Phase != oldPoObj.Status.Phase || newPoObj.Status.PodIP != oldPoObj.Status.PodIP { + // for the network policies, we are only interested in pod status phase change or IP change + npc.OnPodUpdate(newObj) + } + }, + DeleteFunc: func(obj interface{}) { + npc.handlePodDelete(obj) + }, + } +} + +// OnPodUpdate handles updates to pods from the Kubernetes api server +func (npc *NetworkPolicyController) OnPodUpdate(obj interface{}) { + pod := obj.(*api.Pod) + glog.V(2).Infof("Received update to pod: %s/%s", pod.Namespace, pod.Name) + + npc.RequestFullSync() +} + +func (npc *NetworkPolicyController) handlePodDelete(obj interface{}) { + pod, ok := obj.(*api.Pod) + if !ok { + tombstone, ok := obj.(cache.DeletedFinalStateUnknown) + if !ok { + glog.Errorf("unexpected object type: %v", obj) + return + } + if pod, ok = tombstone.Obj.(*api.Pod); !ok { + glog.Errorf("unexpected object type: %v", obj) + return + } + } + glog.V(2).Infof("Received pod: %s/%s delete event", pod.Namespace, pod.Name) + + npc.RequestFullSync() +} + +func (npc *NetworkPolicyController) syncPodFirewallChains(networkPoliciesInfo []networkPolicyInfo, version string) (map[string]bool, error) { + + activePodFwChains := make(map[string]bool) + + iptablesCmdHandler, err := iptables.New() + if err != nil { + glog.Fatalf("Failed to initialize iptables executor: %s", err.Error()) + } + + dropUnmarkedTrafficRules := func(podName, podNamespace, podFwChainName string) error { + // add rule to log the packets that will be dropped due to network policy enforcement + comment := "rule to log dropped traffic POD name:" + podName + " namespace: " + podNamespace + args := []string{"-m", "comment", "--comment", comment, "-m", "mark", "!", "--mark", "0x10000/0x10000", "-j", "NFLOG", "--nflog-group", "100", "-m", "limit", "--limit", "10/minute", "--limit-burst", "10"} + err = iptablesCmdHandler.AppendUnique("filter", podFwChainName, args...) + if err != nil { + return fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + + // add rule to DROP if no applicable network policy permits the traffic + comment = "rule to REJECT traffic destined for POD name:" + podName + " namespace: " + podNamespace + args = []string{"-m", "comment", "--comment", comment, "-m", "mark", "!", "--mark", "0x10000/0x10000", "-j", "REJECT"} + err = iptablesCmdHandler.AppendUnique("filter", podFwChainName, args...) + if err != nil { + return fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + + // reset mark to let traffic pass through rest of the chains + args = []string{"-j", "MARK", "--set-mark", "0/0x10000"} + err = iptablesCmdHandler.AppendUnique("filter", podFwChainName, args...) + if err != nil { + return fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + + return nil + } + + // loop through the pods running on the node which to which ingress network policies to be applied + ingressNetworkPolicyEnabledPods, err := npc.getIngressNetworkPolicyEnabledPods(networkPoliciesInfo, npc.nodeIP.String()) + if err != nil { + return nil, err + } + for _, pod := range *ingressNetworkPolicyEnabledPods { + + // below condition occurs when we get trasient update while removing or adding pod + // subseqent update will do the correct action + if len(pod.ip) == 0 || pod.ip == "" { + continue + } + + // ensure pod specific firewall chain exist for all the pods that need ingress firewall + podFwChainName := podFirewallChainName(pod.namespace, pod.name, version) + err = iptablesCmdHandler.NewChain("filter", podFwChainName) + if err != nil && err.(*iptables.Error).ExitStatus() != 1 { + return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + activePodFwChains[podFwChainName] = true + + // add entries in pod firewall to run through required network policies + for _, policy := range networkPoliciesInfo { + if _, ok := policy.targetPods[pod.ip]; ok { + comment := "run through nw policy " + policy.name + policyChainName := networkPolicyChainName(policy.namespace, policy.name, version) + args := []string{"-m", "comment", "--comment", comment, "-j", policyChainName} + exists, err := iptablesCmdHandler.Exists("filter", podFwChainName, args...) + if err != nil { + return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + if !exists { + err := iptablesCmdHandler.Insert("filter", podFwChainName, 1, args...) + if err != nil && err.(*iptables.Error).ExitStatus() != 1 { + return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + } + } + } + + comment := "rule to permit the traffic traffic to pods when source is the pod's local node" + args := []string{"-m", "comment", "--comment", comment, "-m", "addrtype", "--src-type", "LOCAL", "-d", pod.ip, "-j", "ACCEPT"} + exists, err := iptablesCmdHandler.Exists("filter", podFwChainName, args...) + if err != nil { + return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + if !exists { + err := iptablesCmdHandler.Insert("filter", podFwChainName, 1, args...) + if err != nil { + return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + } + + // ensure statefull firewall, that permits return traffic for the traffic originated by the pod + comment = "rule for stateful firewall for pod" + args = []string{"-m", "comment", "--comment", comment, "-m", "conntrack", "--ctstate", "RELATED,ESTABLISHED", "-j", "ACCEPT"} + exists, err = iptablesCmdHandler.Exists("filter", podFwChainName, args...) + if err != nil { + return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + if !exists { + err := iptablesCmdHandler.Insert("filter", podFwChainName, 1, args...) + if err != nil { + return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + } + + // ensure there is rule in filter table and FORWARD chain to jump to pod specific firewall chain + // this rule applies to the traffic getting routed (coming for other node pods) + comment = "rule to jump traffic destined to POD name:" + pod.name + " namespace: " + pod.namespace + + " to chain " + podFwChainName + args = []string{"-m", "comment", "--comment", comment, "-d", pod.ip, "-j", podFwChainName} + exists, err = iptablesCmdHandler.Exists("filter", kubeForwardChainName, args...) + if err != nil { + return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + if !exists { + err := iptablesCmdHandler.Insert("filter", kubeForwardChainName, 1, args...) + if err != nil { + return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + } + + // ensure there is rule in filter table and OUTPUT chain to jump to pod specific firewall chain + // this rule applies to the traffic from a pod getting routed back to another pod on same node by service proxy + exists, err = iptablesCmdHandler.Exists("filter", kubeOutputChainName, args...) + if err != nil { + return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + if !exists { + err := iptablesCmdHandler.Insert("filter", kubeOutputChainName, 1, args...) + if err != nil { + return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + } + + // ensure there is rule in filter table and forward chain to jump to pod specific firewall chain + // this rule applies to the traffic getting switched (coming for same node pods) + comment = "rule to jump traffic destined to POD name:" + pod.name + " namespace: " + pod.namespace + + " to chain " + podFwChainName + args = []string{"-m", "physdev", "--physdev-is-bridged", + "-m", "comment", "--comment", comment, + "-d", pod.ip, + "-j", podFwChainName} + exists, err = iptablesCmdHandler.Exists("filter", kubeForwardChainName, args...) + if err != nil { + return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + if !exists { + err = iptablesCmdHandler.Insert("filter", kubeForwardChainName, 1, args...) + if err != nil { + return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + } + + err = dropUnmarkedTrafficRules(pod.name, pod.namespace, podFwChainName) + if err != nil { + return nil, err + } + } + + // loop through the pods running on the node which egress network policies to be applied + egressNetworkPolicyEnabledPods, err := npc.getEgressNetworkPolicyEnabledPods(networkPoliciesInfo, npc.nodeIP.String()) + if err != nil { + return nil, err + } + for _, pod := range *egressNetworkPolicyEnabledPods { + + // below condition occurs when we get trasient update while removing or adding pod + // subseqent update will do the correct action + if len(pod.ip) == 0 || pod.ip == "" { + continue + } + + // ensure pod specific firewall chain exist for all the pods that need egress firewall + podFwChainName := podFirewallChainName(pod.namespace, pod.name, version) + err = iptablesCmdHandler.NewChain("filter", podFwChainName) + if err != nil && err.(*iptables.Error).ExitStatus() != 1 { + return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + activePodFwChains[podFwChainName] = true + + // add entries in pod firewall to run through required network policies + for _, policy := range networkPoliciesInfo { + if _, ok := policy.targetPods[pod.ip]; ok { + comment := "run through nw policy " + policy.name + policyChainName := networkPolicyChainName(policy.namespace, policy.name, version) + args := []string{"-m", "comment", "--comment", comment, "-j", policyChainName} + exists, err := iptablesCmdHandler.Exists("filter", podFwChainName, args...) + if err != nil { + return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + if !exists { + err := iptablesCmdHandler.Insert("filter", podFwChainName, 1, args...) + if err != nil && err.(*iptables.Error).ExitStatus() != 1 { + return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + } + } + } + + // ensure statefull firewall, that permits return traffic for the traffic originated by the pod + comment := "rule for stateful firewall for pod" + args := []string{"-m", "comment", "--comment", comment, "-m", "conntrack", "--ctstate", "RELATED,ESTABLISHED", "-j", "ACCEPT"} + exists, err := iptablesCmdHandler.Exists("filter", podFwChainName, args...) + if err != nil { + return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + if !exists { + err := iptablesCmdHandler.Insert("filter", podFwChainName, 1, args...) + if err != nil { + return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + } + + egressFilterChains := []string{kubeInputChainName, kubeForwardChainName, kubeOutputChainName} + for _, chain := range egressFilterChains { + // ensure there is rule in filter table and FORWARD chain to jump to pod specific firewall chain + // this rule applies to the traffic getting forwarded/routed (traffic from the pod destinted + // to pod on a different node) + comment = "rule to jump traffic from POD name:" + pod.name + " namespace: " + pod.namespace + + " to chain " + podFwChainName + args = []string{"-m", "comment", "--comment", comment, "-s", pod.ip, "-j", podFwChainName} + exists, err = iptablesCmdHandler.Exists("filter", chain, args...) + if err != nil { + return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + if !exists { + err := iptablesCmdHandler.AppendUnique("filter", chain, args...) + if err != nil { + return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + } + } + + // ensure there is rule in filter table and forward chain to jump to pod specific firewall chain + // this rule applies to the traffic getting switched (coming for same node pods) + comment = "rule to jump traffic from POD name:" + pod.name + " namespace: " + pod.namespace + + " to chain " + podFwChainName + args = []string{"-m", "physdev", "--physdev-is-bridged", + "-m", "comment", "--comment", comment, + "-s", pod.ip, + "-j", podFwChainName} + exists, err = iptablesCmdHandler.Exists("filter", kubeForwardChainName, args...) + if err != nil { + return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + if !exists { + err = iptablesCmdHandler.Insert("filter", kubeForwardChainName, 1, args...) + if err != nil { + return nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + } + + err = dropUnmarkedTrafficRules(pod.name, pod.namespace, podFwChainName) + if err != nil { + return nil, err + } + } + + return activePodFwChains, nil +} + +func (npc *NetworkPolicyController) getIngressNetworkPolicyEnabledPods(networkPoliciesInfo []networkPolicyInfo, nodeIP string) (*map[string]podInfo, error) { + nodePods := make(map[string]podInfo) + + for _, obj := range npc.podLister.List() { + pod := obj.(*api.Pod) + + if strings.Compare(pod.Status.HostIP, nodeIP) != 0 { + continue + } + for _, policy := range networkPoliciesInfo { + if policy.namespace != pod.ObjectMeta.Namespace { + continue + } + _, ok := policy.targetPods[pod.Status.PodIP] + if ok && (policy.policyType == "both" || policy.policyType == "ingress") { + glog.V(2).Infof("Found pod name: " + pod.ObjectMeta.Name + " namespace: " + pod.ObjectMeta.Namespace + " for which network policies need to be applied.") + nodePods[pod.Status.PodIP] = podInfo{ip: pod.Status.PodIP, + name: pod.ObjectMeta.Name, + namespace: pod.ObjectMeta.Namespace, + labels: pod.ObjectMeta.Labels} + break + } + } + } + return &nodePods, nil + +} + +func (npc *NetworkPolicyController) getEgressNetworkPolicyEnabledPods(networkPoliciesInfo []networkPolicyInfo, nodeIP string) (*map[string]podInfo, error) { + + nodePods := make(map[string]podInfo) + + for _, obj := range npc.podLister.List() { + pod := obj.(*api.Pod) + + if strings.Compare(pod.Status.HostIP, nodeIP) != 0 { + continue + } + for _, policy := range networkPoliciesInfo { + if policy.namespace != pod.ObjectMeta.Namespace { + continue + } + _, ok := policy.targetPods[pod.Status.PodIP] + if ok && (policy.policyType == "both" || policy.policyType == "egress") { + glog.V(2).Infof("Found pod name: " + pod.ObjectMeta.Name + " namespace: " + pod.ObjectMeta.Namespace + " for which network policies need to be applied.") + nodePods[pod.Status.PodIP] = podInfo{ip: pod.Status.PodIP, + name: pod.ObjectMeta.Name, + namespace: pod.ObjectMeta.Namespace, + labels: pod.ObjectMeta.Labels} + break + } + } + } + return &nodePods, nil +} + +func podFirewallChainName(namespace, podName string, version string) string { + hash := sha256.Sum256([]byte(namespace + podName + version)) + encoded := base32.StdEncoding.EncodeToString(hash[:]) + return kubePodFirewallChainPrefix + encoded[:16] +} diff --git a/pkg/agent/netpol/policy.go b/pkg/agent/netpol/policy.go new file mode 100644 index 0000000000..627966200d --- /dev/null +++ b/pkg/agent/netpol/policy.go @@ -0,0 +1,817 @@ +package netpol + +import ( + "crypto/sha256" + "encoding/base32" + "errors" + "fmt" + "strconv" + "strings" + "time" + + "github.com/cloudnativelabs/kube-router/pkg/metrics" + "github.com/cloudnativelabs/kube-router/pkg/utils" + "github.com/coreos/go-iptables/iptables" + "github.com/golang/glog" + api "k8s.io/api/core/v1" + networking "k8s.io/api/networking/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/intstr" + listers "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" +) + +func (npc *NetworkPolicyController) newNetworkPolicyEventHandler() cache.ResourceEventHandler { + return cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { + npc.OnNetworkPolicyUpdate(obj) + + }, + UpdateFunc: func(oldObj, newObj interface{}) { + npc.OnNetworkPolicyUpdate(newObj) + }, + DeleteFunc: func(obj interface{}) { + npc.handleNetworkPolicyDelete(obj) + + }, + } +} + +// OnNetworkPolicyUpdate handles updates to network policy from the kubernetes api server +func (npc *NetworkPolicyController) OnNetworkPolicyUpdate(obj interface{}) { + netpol := obj.(*networking.NetworkPolicy) + glog.V(2).Infof("Received update for network policy: %s/%s", netpol.Namespace, netpol.Name) + + npc.RequestFullSync() +} + +func (npc *NetworkPolicyController) handleNetworkPolicyDelete(obj interface{}) { + netpol, ok := obj.(*networking.NetworkPolicy) + if !ok { + tombstone, ok := obj.(cache.DeletedFinalStateUnknown) + if !ok { + glog.Errorf("unexpected object type: %v", obj) + return + } + if netpol, ok = tombstone.Obj.(*networking.NetworkPolicy); !ok { + glog.Errorf("unexpected object type: %v", obj) + return + } + } + glog.V(2).Infof("Received network policy: %s/%s delete event", netpol.Namespace, netpol.Name) + + npc.RequestFullSync() +} + +// Configure iptables rules representing each network policy. All pod's matched by +// network policy spec podselector labels are grouped together in one ipset which +// is used for matching destination ip address. Each ingress rule in the network +// policyspec is evaluated to set of matching pods, which are grouped in to a +// ipset used for source ip addr matching. +func (npc *NetworkPolicyController) syncNetworkPolicyChains(networkPoliciesInfo []networkPolicyInfo, version string) (map[string]bool, map[string]bool, error) { + start := time.Now() + defer func() { + endTime := time.Since(start) + metrics.ControllerPolicyChainsSyncTime.Observe(endTime.Seconds()) + glog.V(2).Infof("Syncing network policy chains took %v", endTime) + }() + activePolicyChains := make(map[string]bool) + activePolicyIPSets := make(map[string]bool) + + iptablesCmdHandler, err := iptables.New() + if err != nil { + glog.Fatalf("Failed to initialize iptables executor due to: %s", err.Error()) + } + + // run through all network policies + for _, policy := range networkPoliciesInfo { + + // ensure there is a unique chain per network policy in filter table + policyChainName := networkPolicyChainName(policy.namespace, policy.name, version) + err := iptablesCmdHandler.NewChain("filter", policyChainName) + if err != nil && err.(*iptables.Error).ExitStatus() != 1 { + return nil, nil, fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + + activePolicyChains[policyChainName] = true + + currnetPodIps := make([]string, 0, len(policy.targetPods)) + for ip := range policy.targetPods { + currnetPodIps = append(currnetPodIps, ip) + } + + if policy.policyType == "both" || policy.policyType == "ingress" { + // create a ipset for all destination pod ip's matched by the policy spec PodSelector + targetDestPodIPSetName := policyDestinationPodIPSetName(policy.namespace, policy.name) + targetDestPodIPSet, err := npc.ipSetHandler.Create(targetDestPodIPSetName, utils.TypeHashIP, utils.OptionTimeout, "0") + if err != nil { + return nil, nil, fmt.Errorf("failed to create ipset: %s", err.Error()) + } + err = targetDestPodIPSet.Refresh(currnetPodIps) + if err != nil { + glog.Errorf("failed to refresh targetDestPodIPSet,: " + err.Error()) + } + err = npc.processIngressRules(policy, targetDestPodIPSetName, activePolicyIPSets, version) + if err != nil { + return nil, nil, err + } + activePolicyIPSets[targetDestPodIPSet.Name] = true + } + + if policy.policyType == "both" || policy.policyType == "egress" { + // create a ipset for all source pod ip's matched by the policy spec PodSelector + targetSourcePodIPSetName := policySourcePodIPSetName(policy.namespace, policy.name) + targetSourcePodIPSet, err := npc.ipSetHandler.Create(targetSourcePodIPSetName, utils.TypeHashIP, utils.OptionTimeout, "0") + if err != nil { + return nil, nil, fmt.Errorf("failed to create ipset: %s", err.Error()) + } + err = targetSourcePodIPSet.Refresh(currnetPodIps) + if err != nil { + glog.Errorf("failed to refresh targetSourcePodIPSet: " + err.Error()) + } + err = npc.processEgressRules(policy, targetSourcePodIPSetName, activePolicyIPSets, version) + if err != nil { + return nil, nil, err + } + activePolicyIPSets[targetSourcePodIPSet.Name] = true + } + + } + + glog.V(2).Infof("Iptables chains in the filter table are synchronized with the network policies.") + + return activePolicyChains, activePolicyIPSets, nil +} + +func (npc *NetworkPolicyController) processIngressRules(policy networkPolicyInfo, + targetDestPodIPSetName string, activePolicyIPSets map[string]bool, version string) error { + + // From network policy spec: "If field 'Ingress' is empty then this NetworkPolicy does not allow any traffic " + // so no whitelist rules to be added to the network policy + if policy.ingressRules == nil { + return nil + } + + iptablesCmdHandler, err := iptables.New() + if err != nil { + return fmt.Errorf("Failed to initialize iptables executor due to: %s", err.Error()) + } + + policyChainName := networkPolicyChainName(policy.namespace, policy.name, version) + + // run through all the ingress rules in the spec and create iptables rules + // in the chain for the network policy + for i, ingressRule := range policy.ingressRules { + + if len(ingressRule.srcPods) != 0 { + srcPodIPSetName := policyIndexedSourcePodIPSetName(policy.namespace, policy.name, i) + srcPodIPSet, err := npc.ipSetHandler.Create(srcPodIPSetName, utils.TypeHashIP, utils.OptionTimeout, "0") + if err != nil { + return fmt.Errorf("failed to create ipset: %s", err.Error()) + } + + activePolicyIPSets[srcPodIPSet.Name] = true + + ingressRuleSrcPodIPs := make([]string, 0, len(ingressRule.srcPods)) + for _, pod := range ingressRule.srcPods { + ingressRuleSrcPodIPs = append(ingressRuleSrcPodIPs, pod.ip) + } + err = srcPodIPSet.Refresh(ingressRuleSrcPodIPs) + if err != nil { + glog.Errorf("failed to refresh srcPodIPSet: " + err.Error()) + } + + if len(ingressRule.ports) != 0 { + // case where 'ports' details and 'from' details specified in the ingress rule + // so match on specified source and destination ip's and specified port (if any) and protocol + for _, portProtocol := range ingressRule.ports { + comment := "rule to ACCEPT traffic from source pods to dest pods selected by policy name " + + policy.name + " namespace " + policy.namespace + if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, srcPodIPSetName, targetDestPodIPSetName, portProtocol.protocol, portProtocol.port); err != nil { + return err + } + } + } + + if len(ingressRule.namedPorts) != 0 { + for j, endPoints := range ingressRule.namedPorts { + namedPortIPSetName := policyIndexedIngressNamedPortIPSetName(policy.namespace, policy.name, i, j) + namedPortIPSet, err := npc.ipSetHandler.Create(namedPortIPSetName, utils.TypeHashIP, utils.OptionTimeout, "0") + if err != nil { + return fmt.Errorf("failed to create ipset: %s", err.Error()) + } + activePolicyIPSets[namedPortIPSet.Name] = true + err = namedPortIPSet.Refresh(endPoints.ips) + if err != nil { + glog.Errorf("failed to refresh namedPortIPSet: " + err.Error()) + } + comment := "rule to ACCEPT traffic from source pods to dest pods selected by policy name " + + policy.name + " namespace " + policy.namespace + if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, srcPodIPSetName, namedPortIPSetName, endPoints.protocol, endPoints.port); err != nil { + return err + } + } + } + + if len(ingressRule.ports) == 0 && len(ingressRule.namedPorts) == 0 { + // case where no 'ports' details specified in the ingress rule but 'from' details specified + // so match on specified source and destination ip with all port and protocol + comment := "rule to ACCEPT traffic from source pods to dest pods selected by policy name " + + policy.name + " namespace " + policy.namespace + if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, srcPodIPSetName, targetDestPodIPSetName, "", ""); err != nil { + return err + } + } + } + + // case where only 'ports' details specified but no 'from' details in the ingress rule + // so match on all sources, with specified port (if any) and protocol + if ingressRule.matchAllSource && !ingressRule.matchAllPorts { + for _, portProtocol := range ingressRule.ports { + comment := "rule to ACCEPT traffic from all sources to dest pods selected by policy name: " + + policy.name + " namespace " + policy.namespace + if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, "", targetDestPodIPSetName, portProtocol.protocol, portProtocol.port); err != nil { + return err + } + } + + for j, endPoints := range ingressRule.namedPorts { + namedPortIPSetName := policyIndexedIngressNamedPortIPSetName(policy.namespace, policy.name, i, j) + namedPortIPSet, err := npc.ipSetHandler.Create(namedPortIPSetName, utils.TypeHashIP, utils.OptionTimeout, "0") + if err != nil { + return fmt.Errorf("failed to create ipset: %s", err.Error()) + } + + activePolicyIPSets[namedPortIPSet.Name] = true + + err = namedPortIPSet.Refresh(endPoints.ips) + if err != nil { + glog.Errorf("failed to refresh namedPortIPSet: " + err.Error()) + } + comment := "rule to ACCEPT traffic from all sources to dest pods selected by policy name: " + + policy.name + " namespace " + policy.namespace + if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, "", namedPortIPSetName, endPoints.protocol, endPoints.port); err != nil { + return err + } + } + } + + // case where nether ports nor from details are speified in the ingress rule + // so match on all ports, protocol, source IP's + if ingressRule.matchAllSource && ingressRule.matchAllPorts { + comment := "rule to ACCEPT traffic from all sources to dest pods selected by policy name: " + + policy.name + " namespace " + policy.namespace + if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, "", targetDestPodIPSetName, "", ""); err != nil { + return err + } + } + + if len(ingressRule.srcIPBlocks) != 0 { + srcIPBlockIPSetName := policyIndexedSourceIPBlockIPSetName(policy.namespace, policy.name, i) + srcIPBlockIPSet, err := npc.ipSetHandler.Create(srcIPBlockIPSetName, utils.TypeHashNet, utils.OptionTimeout, "0") + if err != nil { + return fmt.Errorf("failed to create ipset: %s", err.Error()) + } + activePolicyIPSets[srcIPBlockIPSet.Name] = true + err = srcIPBlockIPSet.RefreshWithBuiltinOptions(ingressRule.srcIPBlocks) + if err != nil { + glog.Errorf("failed to refresh srcIPBlockIPSet: " + err.Error()) + } + if !ingressRule.matchAllPorts { + for _, portProtocol := range ingressRule.ports { + comment := "rule to ACCEPT traffic from specified ipBlocks to dest pods selected by policy name: " + + policy.name + " namespace " + policy.namespace + if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, srcIPBlockIPSetName, targetDestPodIPSetName, portProtocol.protocol, portProtocol.port); err != nil { + return err + } + } + + for j, endPoints := range ingressRule.namedPorts { + namedPortIPSetName := policyIndexedIngressNamedPortIPSetName(policy.namespace, policy.name, i, j) + namedPortIPSet, err := npc.ipSetHandler.Create(namedPortIPSetName, utils.TypeHashIP, utils.OptionTimeout, "0") + if err != nil { + return fmt.Errorf("failed to create ipset: %s", err.Error()) + } + + activePolicyIPSets[namedPortIPSet.Name] = true + + err = namedPortIPSet.Refresh(endPoints.ips) + if err != nil { + glog.Errorf("failed to refresh namedPortIPSet: " + err.Error()) + } + comment := "rule to ACCEPT traffic from specified ipBlocks to dest pods selected by policy name: " + + policy.name + " namespace " + policy.namespace + if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, srcIPBlockIPSetName, namedPortIPSetName, endPoints.protocol, endPoints.port); err != nil { + return err + } + } + } + if ingressRule.matchAllPorts { + comment := "rule to ACCEPT traffic from specified ipBlocks to dest pods selected by policy name: " + + policy.name + " namespace " + policy.namespace + if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, srcIPBlockIPSetName, targetDestPodIPSetName, "", ""); err != nil { + return err + } + } + } + } + + return nil +} + +func (npc *NetworkPolicyController) processEgressRules(policy networkPolicyInfo, + targetSourcePodIPSetName string, activePolicyIPSets map[string]bool, version string) error { + + // From network policy spec: "If field 'Ingress' is empty then this NetworkPolicy does not allow any traffic " + // so no whitelist rules to be added to the network policy + if policy.egressRules == nil { + return nil + } + + iptablesCmdHandler, err := iptables.New() + if err != nil { + return fmt.Errorf("Failed to initialize iptables executor due to: %s", err.Error()) + } + + policyChainName := networkPolicyChainName(policy.namespace, policy.name, version) + + // run through all the egress rules in the spec and create iptables rules + // in the chain for the network policy + for i, egressRule := range policy.egressRules { + + if len(egressRule.dstPods) != 0 { + dstPodIPSetName := policyIndexedDestinationPodIPSetName(policy.namespace, policy.name, i) + dstPodIPSet, err := npc.ipSetHandler.Create(dstPodIPSetName, utils.TypeHashIP, utils.OptionTimeout, "0") + if err != nil { + return fmt.Errorf("failed to create ipset: %s", err.Error()) + } + + activePolicyIPSets[dstPodIPSet.Name] = true + + egressRuleDstPodIps := make([]string, 0, len(egressRule.dstPods)) + for _, pod := range egressRule.dstPods { + egressRuleDstPodIps = append(egressRuleDstPodIps, pod.ip) + } + err = dstPodIPSet.Refresh(egressRuleDstPodIps) + if err != nil { + glog.Errorf("failed to refresh dstPodIPSet: " + err.Error()) + } + if len(egressRule.ports) != 0 { + // case where 'ports' details and 'from' details specified in the egress rule + // so match on specified source and destination ip's and specified port (if any) and protocol + for _, portProtocol := range egressRule.ports { + comment := "rule to ACCEPT traffic from source pods to dest pods selected by policy name " + + policy.name + " namespace " + policy.namespace + if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, targetSourcePodIPSetName, dstPodIPSetName, portProtocol.protocol, portProtocol.port); err != nil { + return err + } + } + } + + if len(egressRule.namedPorts) != 0 { + for j, endPoints := range egressRule.namedPorts { + namedPortIPSetName := policyIndexedEgressNamedPortIPSetName(policy.namespace, policy.name, i, j) + namedPortIPSet, err := npc.ipSetHandler.Create(namedPortIPSetName, utils.TypeHashIP, utils.OptionTimeout, "0") + if err != nil { + return fmt.Errorf("failed to create ipset: %s", err.Error()) + } + + activePolicyIPSets[namedPortIPSet.Name] = true + + err = namedPortIPSet.Refresh(endPoints.ips) + if err != nil { + glog.Errorf("failed to refresh namedPortIPSet: " + err.Error()) + } + comment := "rule to ACCEPT traffic from source pods to dest pods selected by policy name " + + policy.name + " namespace " + policy.namespace + if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, targetSourcePodIPSetName, namedPortIPSetName, endPoints.protocol, endPoints.port); err != nil { + return err + } + } + + } + + if len(egressRule.ports) == 0 && len(egressRule.namedPorts) == 0 { + // case where no 'ports' details specified in the ingress rule but 'from' details specified + // so match on specified source and destination ip with all port and protocol + comment := "rule to ACCEPT traffic from source pods to dest pods selected by policy name " + + policy.name + " namespace " + policy.namespace + if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, targetSourcePodIPSetName, dstPodIPSetName, "", ""); err != nil { + return err + } + } + } + + // case where only 'ports' details specified but no 'to' details in the egress rule + // so match on all sources, with specified port (if any) and protocol + if egressRule.matchAllDestinations && !egressRule.matchAllPorts { + for _, portProtocol := range egressRule.ports { + comment := "rule to ACCEPT traffic from source pods to all destinations selected by policy name: " + + policy.name + " namespace " + policy.namespace + if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, targetSourcePodIPSetName, "", portProtocol.protocol, portProtocol.port); err != nil { + return err + } + } + } + + // case where nether ports nor from details are speified in the egress rule + // so match on all ports, protocol, source IP's + if egressRule.matchAllDestinations && egressRule.matchAllPorts { + comment := "rule to ACCEPT traffic from source pods to all destinations selected by policy name: " + + policy.name + " namespace " + policy.namespace + if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, targetSourcePodIPSetName, "", "", ""); err != nil { + return err + } + } + if len(egressRule.dstIPBlocks) != 0 { + dstIPBlockIPSetName := policyIndexedDestinationIPBlockIPSetName(policy.namespace, policy.name, i) + dstIPBlockIPSet, err := npc.ipSetHandler.Create(dstIPBlockIPSetName, utils.TypeHashNet, utils.OptionTimeout, "0") + if err != nil { + return fmt.Errorf("failed to create ipset: %s", err.Error()) + } + activePolicyIPSets[dstIPBlockIPSet.Name] = true + err = dstIPBlockIPSet.RefreshWithBuiltinOptions(egressRule.dstIPBlocks) + if err != nil { + glog.Errorf("failed to refresh dstIPBlockIPSet: " + err.Error()) + } + if !egressRule.matchAllPorts { + for _, portProtocol := range egressRule.ports { + comment := "rule to ACCEPT traffic from source pods to specified ipBlocks selected by policy name: " + + policy.name + " namespace " + policy.namespace + if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, targetSourcePodIPSetName, dstIPBlockIPSetName, portProtocol.protocol, portProtocol.port); err != nil { + return err + } + } + } + if egressRule.matchAllPorts { + comment := "rule to ACCEPT traffic from source pods to specified ipBlocks selected by policy name: " + + policy.name + " namespace " + policy.namespace + if err := npc.appendRuleToPolicyChain(iptablesCmdHandler, policyChainName, comment, targetSourcePodIPSetName, dstIPBlockIPSetName, "", ""); err != nil { + return err + } + } + } + } + return nil +} + +func (npc *NetworkPolicyController) appendRuleToPolicyChain(iptablesCmdHandler *iptables.IPTables, policyChainName, comment, srcIPSetName, dstIPSetName, protocol, dPort string) error { + if iptablesCmdHandler == nil { + return fmt.Errorf("Failed to run iptables command: iptablesCmdHandler is nil") + } + args := make([]string, 0) + if comment != "" { + args = append(args, "-m", "comment", "--comment", comment) + } + if srcIPSetName != "" { + args = append(args, "-m", "set", "--match-set", srcIPSetName, "src") + } + if dstIPSetName != "" { + args = append(args, "-m", "set", "--match-set", dstIPSetName, "dst") + } + if protocol != "" { + args = append(args, "-p", protocol) + } + if dPort != "" { + args = append(args, "--dport", dPort) + } + + markComment := "rule to mark traffic matching a network policy" + markArgs := append(args, "-j", "MARK", "-m", "comment", "--comment", markComment, "--set-xmark", "0x10000/0x10000") + err := iptablesCmdHandler.AppendUnique("filter", policyChainName, markArgs...) + if err != nil { + return fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + + returnComment := "rule to RETURN traffic matching a network policy" + returnArgs := append(args, "-m", "comment", "--comment", returnComment, "-m", "mark", "--mark", "0x10000/0x10000", "-j", "RETURN") + err = iptablesCmdHandler.AppendUnique("filter", policyChainName, returnArgs...) + if err != nil { + return fmt.Errorf("Failed to run iptables command: %s", err.Error()) + } + + return nil +} + +func (npc *NetworkPolicyController) buildNetworkPoliciesInfo() ([]networkPolicyInfo, error) { + + NetworkPolicies := make([]networkPolicyInfo, 0) + + for _, policyObj := range npc.npLister.List() { + + policy, ok := policyObj.(*networking.NetworkPolicy) + podSelector, _ := v1.LabelSelectorAsSelector(&policy.Spec.PodSelector) + if !ok { + return nil, fmt.Errorf("Failed to convert") + } + newPolicy := networkPolicyInfo{ + name: policy.Name, + namespace: policy.Namespace, + podSelector: podSelector, + policyType: "ingress", + } + + ingressType, egressType := false, false + for _, policyType := range policy.Spec.PolicyTypes { + if policyType == networking.PolicyTypeIngress { + ingressType = true + } + if policyType == networking.PolicyTypeEgress { + egressType = true + } + } + if ingressType && egressType { + newPolicy.policyType = "both" + } else if egressType { + newPolicy.policyType = "egress" + } else if ingressType { + newPolicy.policyType = "ingress" + } + + matchingPods, err := npc.ListPodsByNamespaceAndLabels(policy.Namespace, podSelector) + newPolicy.targetPods = make(map[string]podInfo) + namedPort2IngressEps := make(namedPort2eps) + if err == nil { + for _, matchingPod := range matchingPods { + if matchingPod.Status.PodIP == "" { + continue + } + newPolicy.targetPods[matchingPod.Status.PodIP] = podInfo{ip: matchingPod.Status.PodIP, + name: matchingPod.ObjectMeta.Name, + namespace: matchingPod.ObjectMeta.Namespace, + labels: matchingPod.ObjectMeta.Labels} + npc.grabNamedPortFromPod(matchingPod, &namedPort2IngressEps) + } + } + + if policy.Spec.Ingress == nil { + newPolicy.ingressRules = nil + } else { + newPolicy.ingressRules = make([]ingressRule, 0) + } + + if policy.Spec.Egress == nil { + newPolicy.egressRules = nil + } else { + newPolicy.egressRules = make([]egressRule, 0) + } + + for _, specIngressRule := range policy.Spec.Ingress { + ingressRule := ingressRule{} + ingressRule.srcPods = make([]podInfo, 0) + ingressRule.srcIPBlocks = make([][]string, 0) + + // If this field is empty or missing in the spec, this rule matches all sources + if len(specIngressRule.From) == 0 { + ingressRule.matchAllSource = true + } else { + ingressRule.matchAllSource = false + for _, peer := range specIngressRule.From { + if peerPods, err := npc.evalPodPeer(policy, peer); err == nil { + for _, peerPod := range peerPods { + if peerPod.Status.PodIP == "" { + continue + } + ingressRule.srcPods = append(ingressRule.srcPods, + podInfo{ip: peerPod.Status.PodIP, + name: peerPod.ObjectMeta.Name, + namespace: peerPod.ObjectMeta.Namespace, + labels: peerPod.ObjectMeta.Labels}) + } + } + ingressRule.srcIPBlocks = append(ingressRule.srcIPBlocks, npc.evalIPBlockPeer(peer)...) + } + } + + ingressRule.ports = make([]protocolAndPort, 0) + ingressRule.namedPorts = make([]endPoints, 0) + // If this field is empty or missing in the spec, this rule matches all ports + if len(specIngressRule.Ports) == 0 { + ingressRule.matchAllPorts = true + } else { + ingressRule.matchAllPorts = false + ingressRule.ports, ingressRule.namedPorts = npc.processNetworkPolicyPorts(specIngressRule.Ports, namedPort2IngressEps) + } + + newPolicy.ingressRules = append(newPolicy.ingressRules, ingressRule) + } + + for _, specEgressRule := range policy.Spec.Egress { + egressRule := egressRule{} + egressRule.dstPods = make([]podInfo, 0) + egressRule.dstIPBlocks = make([][]string, 0) + namedPort2EgressEps := make(namedPort2eps) + + // If this field is empty or missing in the spec, this rule matches all sources + if len(specEgressRule.To) == 0 { + egressRule.matchAllDestinations = true + } else { + egressRule.matchAllDestinations = false + for _, peer := range specEgressRule.To { + if peerPods, err := npc.evalPodPeer(policy, peer); err == nil { + for _, peerPod := range peerPods { + if peerPod.Status.PodIP == "" { + continue + } + egressRule.dstPods = append(egressRule.dstPods, + podInfo{ip: peerPod.Status.PodIP, + name: peerPod.ObjectMeta.Name, + namespace: peerPod.ObjectMeta.Namespace, + labels: peerPod.ObjectMeta.Labels}) + npc.grabNamedPortFromPod(peerPod, &namedPort2EgressEps) + } + + } + egressRule.dstIPBlocks = append(egressRule.dstIPBlocks, npc.evalIPBlockPeer(peer)...) + } + } + + egressRule.ports = make([]protocolAndPort, 0) + egressRule.namedPorts = make([]endPoints, 0) + // If this field is empty or missing in the spec, this rule matches all ports + if len(specEgressRule.Ports) == 0 { + egressRule.matchAllPorts = true + } else { + egressRule.matchAllPorts = false + egressRule.ports, egressRule.namedPorts = npc.processNetworkPolicyPorts(specEgressRule.Ports, namedPort2EgressEps) + } + + newPolicy.egressRules = append(newPolicy.egressRules, egressRule) + } + NetworkPolicies = append(NetworkPolicies, newPolicy) + } + + return NetworkPolicies, nil +} + +func (npc *NetworkPolicyController) evalPodPeer(policy *networking.NetworkPolicy, peer networking.NetworkPolicyPeer) ([]*api.Pod, error) { + + var matchingPods []*api.Pod + matchingPods = make([]*api.Pod, 0) + var err error + // spec can have both PodSelector AND NamespaceSelector + if peer.NamespaceSelector != nil { + namespaceSelector, _ := v1.LabelSelectorAsSelector(peer.NamespaceSelector) + namespaces, err := npc.ListNamespaceByLabels(namespaceSelector) + if err != nil { + return nil, errors.New("Failed to build network policies info due to " + err.Error()) + } + + podSelector := labels.Everything() + if peer.PodSelector != nil { + podSelector, _ = v1.LabelSelectorAsSelector(peer.PodSelector) + } + for _, namespace := range namespaces { + namespacePods, err := npc.ListPodsByNamespaceAndLabels(namespace.Name, podSelector) + if err != nil { + return nil, errors.New("Failed to build network policies info due to " + err.Error()) + } + matchingPods = append(matchingPods, namespacePods...) + } + } else if peer.PodSelector != nil { + podSelector, _ := v1.LabelSelectorAsSelector(peer.PodSelector) + matchingPods, err = npc.ListPodsByNamespaceAndLabels(policy.Namespace, podSelector) + } + + return matchingPods, err +} + +func (npc *NetworkPolicyController) processNetworkPolicyPorts(npPorts []networking.NetworkPolicyPort, namedPort2eps namedPort2eps) (numericPorts []protocolAndPort, namedPorts []endPoints) { + numericPorts, namedPorts = make([]protocolAndPort, 0), make([]endPoints, 0) + for _, npPort := range npPorts { + if npPort.Port == nil { + numericPorts = append(numericPorts, protocolAndPort{port: "", protocol: string(*npPort.Protocol)}) + } else if npPort.Port.Type == intstr.Int { + numericPorts = append(numericPorts, protocolAndPort{port: npPort.Port.String(), protocol: string(*npPort.Protocol)}) + } else { + if protocol2eps, ok := namedPort2eps[npPort.Port.String()]; ok { + if numericPort2eps, ok := protocol2eps[string(*npPort.Protocol)]; ok { + for _, eps := range numericPort2eps { + namedPorts = append(namedPorts, *eps) + } + } + } + } + } + return +} + +func (npc *NetworkPolicyController) ListPodsByNamespaceAndLabels(namespace string, podSelector labels.Selector) (ret []*api.Pod, err error) { + podLister := listers.NewPodLister(npc.podLister) + allMatchedNameSpacePods, err := podLister.Pods(namespace).List(podSelector) + if err != nil { + return nil, err + } + return allMatchedNameSpacePods, nil +} + +func (npc *NetworkPolicyController) ListNamespaceByLabels(namespaceSelector labels.Selector) ([]*api.Namespace, error) { + namespaceLister := listers.NewNamespaceLister(npc.nsLister) + matchedNamespaces, err := namespaceLister.List(namespaceSelector) + if err != nil { + return nil, err + } + return matchedNamespaces, nil +} + +func (npc *NetworkPolicyController) evalIPBlockPeer(peer networking.NetworkPolicyPeer) [][]string { + ipBlock := make([][]string, 0) + if peer.PodSelector == nil && peer.NamespaceSelector == nil && peer.IPBlock != nil { + if cidr := peer.IPBlock.CIDR; strings.HasSuffix(cidr, "/0") { + ipBlock = append(ipBlock, []string{"0.0.0.0/1", utils.OptionTimeout, "0"}, []string{"128.0.0.0/1", utils.OptionTimeout, "0"}) + } else { + ipBlock = append(ipBlock, []string{cidr, utils.OptionTimeout, "0"}) + } + for _, except := range peer.IPBlock.Except { + if strings.HasSuffix(except, "/0") { + ipBlock = append(ipBlock, []string{"0.0.0.0/1", utils.OptionTimeout, "0", utils.OptionNoMatch}, []string{"128.0.0.0/1", utils.OptionTimeout, "0", utils.OptionNoMatch}) + } else { + ipBlock = append(ipBlock, []string{except, utils.OptionTimeout, "0", utils.OptionNoMatch}) + } + } + } + return ipBlock +} + +func (npc *NetworkPolicyController) grabNamedPortFromPod(pod *api.Pod, namedPort2eps *namedPort2eps) { + if pod == nil || namedPort2eps == nil { + return + } + for k := range pod.Spec.Containers { + for _, port := range pod.Spec.Containers[k].Ports { + name := port.Name + protocol := string(port.Protocol) + containerPort := strconv.Itoa(int(port.ContainerPort)) + + if (*namedPort2eps)[name] == nil { + (*namedPort2eps)[name] = make(protocol2eps) + } + if (*namedPort2eps)[name][protocol] == nil { + (*namedPort2eps)[name][protocol] = make(numericPort2eps) + } + if eps, ok := (*namedPort2eps)[name][protocol][containerPort]; !ok { + (*namedPort2eps)[name][protocol][containerPort] = &endPoints{ + ips: []string{pod.Status.PodIP}, + protocolAndPort: protocolAndPort{port: containerPort, protocol: protocol}, + } + } else { + eps.ips = append(eps.ips, pod.Status.PodIP) + } + } + } +} + +func networkPolicyChainName(namespace, policyName string, version string) string { + hash := sha256.Sum256([]byte(namespace + policyName + version)) + encoded := base32.StdEncoding.EncodeToString(hash[:]) + return kubeNetworkPolicyChainPrefix + encoded[:16] +} + +func policySourcePodIPSetName(namespace, policyName string) string { + hash := sha256.Sum256([]byte(namespace + policyName)) + encoded := base32.StdEncoding.EncodeToString(hash[:]) + return kubeSourceIPSetPrefix + encoded[:16] +} + +func policyDestinationPodIPSetName(namespace, policyName string) string { + hash := sha256.Sum256([]byte(namespace + policyName)) + encoded := base32.StdEncoding.EncodeToString(hash[:]) + return kubeDestinationIPSetPrefix + encoded[:16] +} + +func policyIndexedSourcePodIPSetName(namespace, policyName string, ingressRuleNo int) string { + hash := sha256.Sum256([]byte(namespace + policyName + "ingressrule" + strconv.Itoa(ingressRuleNo) + "pod")) + encoded := base32.StdEncoding.EncodeToString(hash[:]) + return kubeSourceIPSetPrefix + encoded[:16] +} + +func policyIndexedDestinationPodIPSetName(namespace, policyName string, egressRuleNo int) string { + hash := sha256.Sum256([]byte(namespace + policyName + "egressrule" + strconv.Itoa(egressRuleNo) + "pod")) + encoded := base32.StdEncoding.EncodeToString(hash[:]) + return kubeDestinationIPSetPrefix + encoded[:16] +} + +func policyIndexedSourceIPBlockIPSetName(namespace, policyName string, ingressRuleNo int) string { + hash := sha256.Sum256([]byte(namespace + policyName + "ingressrule" + strconv.Itoa(ingressRuleNo) + "ipblock")) + encoded := base32.StdEncoding.EncodeToString(hash[:]) + return kubeSourceIPSetPrefix + encoded[:16] +} + +func policyIndexedDestinationIPBlockIPSetName(namespace, policyName string, egressRuleNo int) string { + hash := sha256.Sum256([]byte(namespace + policyName + "egressrule" + strconv.Itoa(egressRuleNo) + "ipblock")) + encoded := base32.StdEncoding.EncodeToString(hash[:]) + return kubeDestinationIPSetPrefix + encoded[:16] +} + +func policyIndexedIngressNamedPortIPSetName(namespace, policyName string, ingressRuleNo, namedPortNo int) string { + hash := sha256.Sum256([]byte(namespace + policyName + "ingressrule" + strconv.Itoa(ingressRuleNo) + strconv.Itoa(namedPortNo) + "namedport")) + encoded := base32.StdEncoding.EncodeToString(hash[:]) + return kubeDestinationIPSetPrefix + encoded[:16] +} + +func policyIndexedEgressNamedPortIPSetName(namespace, policyName string, egressRuleNo, namedPortNo int) string { + hash := sha256.Sum256([]byte(namespace + policyName + "egressrule" + strconv.Itoa(egressRuleNo) + strconv.Itoa(namedPortNo) + "namedport")) + encoded := base32.StdEncoding.EncodeToString(hash[:]) + return kubeDestinationIPSetPrefix + encoded[:16] +} diff --git a/pkg/agent/netpol/utils.go b/pkg/agent/netpol/utils/ipset.go similarity index 88% rename from pkg/agent/netpol/utils.go rename to pkg/agent/netpol/utils/ipset.go index 79ab0f3575..48812b0896 100644 --- a/pkg/agent/netpol/utils.go +++ b/pkg/agent/netpol/utils/ipset.go @@ -1,21 +1,11 @@ -// Apache License v2.0 (copyright Cloud Native Labs & Rancher Labs) -// - modified from https://github.com/cloudnativelabs/kube-router/tree/d6f9f31a7b/pkg/utils - -// +build !windows - -package netpol +package utils import ( "bytes" "errors" "fmt" - "net" "os/exec" "strings" - "time" - - apiv1 "k8s.io/api/core/v1" - "k8s.io/client-go/informers" ) var ( @@ -31,7 +21,7 @@ const ( // DefaultMaxElem Default OptionMaxElem value. DefaultMaxElem = "65536" - // DefaultHasSize Default OptionHashSize value. + // DefaultHasSize Defaul OptionHashSize value. DefaultHasSize = "1024" // TypeHashIP The hash:ip set type uses a hash to store IP host addresses (default) or network addresses. Zero valued IP address cannot be stored in a hash:ip type of set. @@ -96,7 +86,7 @@ type IPSet struct { isIpv6 bool } -// Set represent a ipset set entry. +// Set reprensent a ipset set entry. type Set struct { Parent *IPSet Name string @@ -156,8 +146,8 @@ func (ipset *IPSet) runWithStdin(stdin *bytes.Buffer, args ...string) (string, e return stdout.String(), nil } -// NewSavedIPSet create a new IPSet with ipSetPath initialized. -func NewSavedIPSet(isIpv6 bool) (*IPSet, error) { +// NewIPSet create a new IPSet with ipSetPath initialized. +func NewIPSet(isIpv6 bool) (*IPSet, error) { ipSetPath, err := getIPSetPath() if err != nil { return nil, err @@ -167,9 +157,6 @@ func NewSavedIPSet(isIpv6 bool) (*IPSet, error) { Sets: make(map[string]*Set), isIpv6: isIpv6, } - if err := ipSet.Save(); err != nil { - return nil, err - } return ipSet, nil } @@ -221,11 +208,14 @@ func (ipset *IPSet) Add(set *Set) error { return err } - for _, entry := range set.Entries { - _, err := ipset.Get(set.Name).Add(entry.Options...) - if err != nil { - return err - } + options := make([][]string, len(set.Entries)) + for index, entry := range set.Entries { + options[index] = entry.Options + } + + err = ipset.Get(set.Name).BatchAdd(options) + if err != nil { + return err } return nil @@ -233,6 +223,8 @@ func (ipset *IPSet) Add(set *Set) error { // Add a given entry to the set. If the -exist option is specified, ipset // ignores if the entry already added to the set. +// Note: if you need to add multiple entries (e.g., in a loop), use BatchAdd instead, +// as it’s much more performant. func (set *Set) Add(addOptions ...string) (*Entry, error) { entry := &Entry{ Set: set, @@ -246,6 +238,35 @@ func (set *Set) Add(addOptions ...string) (*Entry, error) { return entry, nil } +// Adds given entries (with their options) to the set. +// For multiple items, this is much faster than Add(). +func (set *Set) BatchAdd(addOptions [][]string) error { + newEntries := make([]*Entry, len(addOptions)) + for index, options := range addOptions { + entry := &Entry{ + Set: set, + Options: options, + } + newEntries[index] = entry + } + set.Entries = append(set.Entries, newEntries...) + + // Build the `restore` command contents + var builder strings.Builder + for _, options := range addOptions { + line := strings.Join(append([]string{"add", "-exist", set.name()}, options...), " ") + builder.WriteString(line + "\n") + } + restoreContents := builder.String() + + // Invoke the command + _, err := set.Parent.runWithStdin(bytes.NewBufferString(restoreContents), "restore") + if err != nil { + return err + } + return nil +} + // Del an entry from a set. If the -exist option is specified and the entry is // not in the set (maybe already expired), then the command is ignored. func (entry *Entry) Del() error { @@ -253,11 +274,14 @@ func (entry *Entry) Del() error { if err != nil { return err } - - return entry.Set.Parent.Save() + err = entry.Set.Parent.Save() + if err != nil { + return err + } + return nil } -// Test whether an entry is in a set or not. Exit status number is zero if the +// Test wether an entry is in a set or not. Exit status number is zero if the // tested entry is in the set and nonzero if it is missing from the set. func (set *Set) Test(testOptions ...string) (bool, error) { _, err := set.Parent.run(append([]string{"test", set.name()}, testOptions...)...) @@ -388,7 +412,7 @@ func (ipset *IPSet) Save() error { // stdin. Please note, existing sets and elements are not erased by restore // unless specified so in the restore file. All commands are allowed in restore // mode except list, help, version, interactive mode and restore itself. -// Send formatted ipset.sets into stdin of "ipset restore" command. +// Send formated ipset.sets into stdin of "ipset restore" command. func (ipset *IPSet) Restore() error { stdin := bytes.NewBufferString(buildIPSetRestore(ipset)) _, err := ipset.runWithStdin(stdin, "restore", "-exist") @@ -451,7 +475,19 @@ func (set *Set) Swap(setTo *Set) error { // Refresh a Set with new entries. func (set *Set) Refresh(entries []string, extraOptions ...string) error { + entriesWithOptions := make([][]string, len(entries)) + + for index, entry := range entries { + entriesWithOptions[index] = append([]string{entry}, extraOptions...) + } + + return set.RefreshWithBuiltinOptions(entriesWithOptions) +} + +// Refresh a Set with new entries with built-in options. +func (set *Set) RefreshWithBuiltinOptions(entries [][]string) error { var err error + // The set-name must be < 32 characters! tempName := set.Name + "-" @@ -466,11 +502,9 @@ func (set *Set) Refresh(entries []string, extraOptions ...string) error { return err } - for _, entry := range entries { - _, err = newSet.Add(entry) - if err != nil { - return err - } + err = newSet.BatchAdd(entries) + if err != nil { + return err } err = set.Swap(newSet) @@ -485,73 +519,3 @@ func (set *Set) Refresh(entries []string, extraOptions ...string) error { return nil } - -// Refresh a Set with new entries with built-in options. -func (set *Set) RefreshWithBuiltinOptions(entries [][]string) error { - var err error - tempName := set.Name + "-temp" - newSet := &Set{ - Parent: set.Parent, - Name: tempName, - Options: set.Options, - } - - err = set.Parent.Add(newSet) - if err != nil { - return err - } - - for _, entry := range entries { - _, err = newSet.Add(entry...) - if err != nil { - return err - } - } - - err = set.Swap(newSet) - if err != nil { - return err - } - - err = set.Parent.Destroy(tempName) - if err != nil { - return err - } - - return nil -} - -// GetNodeIP returns the most valid external facing IP address for a node. -// Order of preference: -// 1. NodeInternalIP -// 2. NodeExternalIP (Only set on cloud providers usually) -func GetNodeIP(node *apiv1.Node) (net.IP, error) { - addresses := node.Status.Addresses - addressMap := make(map[apiv1.NodeAddressType][]apiv1.NodeAddress) - for i := range addresses { - addressMap[addresses[i].Type] = append(addressMap[addresses[i].Type], addresses[i]) - } - if addresses, ok := addressMap[apiv1.NodeInternalIP]; ok { - return net.ParseIP(addresses[0].Address), nil - } - if addresses, ok := addressMap[apiv1.NodeExternalIP]; ok { - return net.ParseIP(addresses[0].Address), nil - } - return nil, errors.New("host IP unknown") -} - -// CacheSync performs cache synchronization under timeout limit -func CacheSyncOrTimeout(informerFactory informers.SharedInformerFactory, stopCh <-chan struct{}, cacheSyncTimeout time.Duration) error { - syncOverCh := make(chan struct{}) - go func() { - informerFactory.WaitForCacheSync(stopCh) - close(syncOverCh) - }() - - select { - case <-time.After(cacheSyncTimeout): - return errors.New(cacheSyncTimeout.String() + " timeout") - case <-syncOverCh: - return nil - } -} diff --git a/pkg/agent/netpol/utils/node.go b/pkg/agent/netpol/utils/node.go new file mode 100644 index 0000000000..6321437807 --- /dev/null +++ b/pkg/agent/netpol/utils/node.go @@ -0,0 +1,61 @@ +package utils + +import ( + "context" + "errors" + "fmt" + "net" + "os" + + apiv1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" +) + +// GetNodeObject returns the node API object for the node +func GetNodeObject(clientset kubernetes.Interface, hostnameOverride string) (*apiv1.Node, error) { + // assuming kube-router is running as pod, first check env NODE_NAME + nodeName := os.Getenv("NODE_NAME") + if nodeName != "" { + node, err := clientset.CoreV1().Nodes().Get(context.Background(), nodeName, metav1.GetOptions{}) + if err == nil { + return node, nil + } + } + + // if env NODE_NAME is not set then check if node is register with hostname + hostName, _ := os.Hostname() + node, err := clientset.CoreV1().Nodes().Get(context.Background(), hostName, metav1.GetOptions{}) + if err == nil { + return node, nil + } + + // if env NODE_NAME is not set and node is not registered with hostname, then use host name override + if hostnameOverride != "" { + node, err = clientset.CoreV1().Nodes().Get(context.Background(), hostnameOverride, metav1.GetOptions{}) + if err == nil { + return node, nil + } + } + + return nil, fmt.Errorf("Failed to identify the node by NODE_NAME, hostname or --hostname-override") +} + +// GetNodeIP returns the most valid external facing IP address for a node. +// Order of preference: +// 1. NodeInternalIP +// 2. NodeExternalIP (Only set on cloud providers usually) +func GetNodeIP(node *apiv1.Node) (net.IP, error) { + addresses := node.Status.Addresses + addressMap := make(map[apiv1.NodeAddressType][]apiv1.NodeAddress) + for i := range addresses { + addressMap[addresses[i].Type] = append(addressMap[addresses[i].Type], addresses[i]) + } + if addresses, ok := addressMap[apiv1.NodeInternalIP]; ok { + return net.ParseIP(addresses[0].Address), nil + } + if addresses, ok := addressMap[apiv1.NodeExternalIP]; ok { + return net.ParseIP(addresses[0].Address), nil + } + return nil, errors.New("host IP unknown") +}