2015-08-11 02:47:13 +00:00
|
|
|
/*
|
|
|
|
Copyright 2015 The Kubernetes Authors All rights reserved.
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package iptables
|
|
|
|
|
2015-08-16 04:16:04 +00:00
|
|
|
//
|
|
|
|
// NOTE: this needs to be tested in e2e since it uses iptables for everything.
|
|
|
|
//
|
2015-08-11 02:47:13 +00:00
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"crypto/sha256"
|
|
|
|
"encoding/base32"
|
|
|
|
"fmt"
|
|
|
|
"net"
|
|
|
|
"reflect"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
"sync"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/coreos/go-semver/semver"
|
2015-08-15 04:53:52 +00:00
|
|
|
"github.com/davecgh/go-spew/spew"
|
2015-08-11 02:47:13 +00:00
|
|
|
"github.com/golang/glog"
|
|
|
|
"k8s.io/kubernetes/pkg/api"
|
|
|
|
"k8s.io/kubernetes/pkg/proxy"
|
|
|
|
"k8s.io/kubernetes/pkg/types"
|
|
|
|
utilexec "k8s.io/kubernetes/pkg/util/exec"
|
|
|
|
utiliptables "k8s.io/kubernetes/pkg/util/iptables"
|
|
|
|
"k8s.io/kubernetes/pkg/util/slice"
|
2015-10-05 17:28:53 +00:00
|
|
|
utilsysctl "k8s.io/kubernetes/pkg/util/sysctl"
|
2015-08-11 02:47:13 +00:00
|
|
|
)
|
|
|
|
|
2015-08-16 04:16:04 +00:00
|
|
|
// iptablesMinVersion is the minimum version of iptables for which we will use the Proxier
|
|
|
|
// from this package instead of the userspace Proxier. While most of the
|
|
|
|
// features we need were available earlier, the '-C' flag was added more
|
|
|
|
// recently. We use that indirectly in Ensure* functions, and if we don't
|
|
|
|
// have it, we have to be extra careful about the exact args we feed in being
|
|
|
|
// the same as the args we read back (iptables itself normalizes some args).
|
|
|
|
// This is the "new" Proxier, so we require "new" versions of tools.
|
|
|
|
const iptablesMinVersion = utiliptables.MinCheckVersion
|
2015-08-11 02:47:13 +00:00
|
|
|
|
|
|
|
// the services chain
|
2015-08-14 22:20:19 +00:00
|
|
|
const iptablesServicesChain utiliptables.Chain = "KUBE-SERVICES"
|
|
|
|
|
|
|
|
// the nodeports chain
|
|
|
|
const iptablesNodePortsChain utiliptables.Chain = "KUBE-NODEPORTS"
|
|
|
|
|
|
|
|
// the mark we apply to traffic needing SNAT
|
|
|
|
const iptablesMasqueradeMark = "0x4d415351"
|
2015-08-11 02:47:13 +00:00
|
|
|
|
2015-10-27 06:08:37 +00:00
|
|
|
// IptablesVersioner can query the current iptables version.
|
|
|
|
type IptablesVersioner interface {
|
|
|
|
// returns "X.Y.Z"
|
|
|
|
GetVersion() (string, error)
|
|
|
|
}
|
|
|
|
|
|
|
|
// CanUseIptablesProxier returns true if we should use the iptables Proxier
|
2015-08-15 05:15:12 +00:00
|
|
|
// instead of the "classic" userspace Proxier. This is determined by checking
|
|
|
|
// the iptables version and for the existence of kernel features. It may return
|
2015-10-02 13:52:01 +00:00
|
|
|
// an error if it fails to get the iptables version without error, in which
|
2015-08-15 05:15:12 +00:00
|
|
|
// case it will also return false.
|
2015-10-27 06:08:37 +00:00
|
|
|
func CanUseIptablesProxier(iptver IptablesVersioner) (bool, error) {
|
2015-08-16 04:16:04 +00:00
|
|
|
minVersion, err := semver.NewVersion(iptablesMinVersion)
|
2015-08-11 02:47:13 +00:00
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
2015-10-27 06:08:37 +00:00
|
|
|
// returns "X.Y.Z"
|
|
|
|
versionString, err := iptver.GetVersion()
|
2015-08-11 02:47:13 +00:00
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
2015-08-26 13:57:31 +00:00
|
|
|
version, err := semver.NewVersion(versionString)
|
2015-08-11 02:47:13 +00:00
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
2015-08-15 05:15:12 +00:00
|
|
|
if version.LessThan(*minVersion) {
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check for the required sysctls. We don't care about the value, just
|
|
|
|
// that it exists. If this Proxier is chosen, we'll iniialize it as we
|
|
|
|
// need.
|
2015-10-27 06:08:37 +00:00
|
|
|
// TODO: we should inject a sysctl.Interface like we do for iptables
|
2015-10-05 17:28:53 +00:00
|
|
|
_, err = utilsysctl.GetSysctl(sysctlRouteLocalnet)
|
2015-08-15 05:15:12 +00:00
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return true, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
const sysctlRouteLocalnet = "net/ipv4/conf/all/route_localnet"
|
2015-08-15 05:36:11 +00:00
|
|
|
const sysctlBridgeCallIptables = "net/bridge/bridge-nf-call-iptables"
|
2015-08-15 05:15:12 +00:00
|
|
|
|
2015-08-11 02:47:13 +00:00
|
|
|
// internal struct for string service information
|
|
|
|
type serviceInfo struct {
|
2015-08-15 03:50:29 +00:00
|
|
|
clusterIP net.IP
|
|
|
|
port int
|
|
|
|
protocol api.Protocol
|
|
|
|
nodePort int
|
2015-08-11 02:47:13 +00:00
|
|
|
loadBalancerStatus api.LoadBalancerStatus
|
|
|
|
sessionAffinityType api.ServiceAffinity
|
2015-08-15 03:50:29 +00:00
|
|
|
stickyMaxAgeSeconds int
|
2015-08-11 02:47:13 +00:00
|
|
|
// Deprecated, but required for back-compat (including e2e)
|
2015-08-12 00:18:21 +00:00
|
|
|
externalIPs []string
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// returns a new serviceInfo struct
|
|
|
|
func newServiceInfo(service proxy.ServicePortName) *serviceInfo {
|
|
|
|
return &serviceInfo{
|
|
|
|
sessionAffinityType: api.ServiceAffinityNone, // default
|
2015-08-15 03:50:29 +00:00
|
|
|
stickyMaxAgeSeconds: 180, // TODO: paramaterize this in the API.
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Proxier is an iptables based proxy for connections between a localhost:lport
|
|
|
|
// and services that provide the actual backends.
|
|
|
|
type Proxier struct {
|
2015-08-20 06:11:56 +00:00
|
|
|
mu sync.Mutex // protects the following fields
|
2015-08-11 02:47:13 +00:00
|
|
|
serviceMap map[proxy.ServicePortName]*serviceInfo
|
2015-09-15 20:07:33 +00:00
|
|
|
endpointsMap map[proxy.ServicePortName][]string
|
2015-08-20 06:11:56 +00:00
|
|
|
portsMap map[localPort]closeable
|
2015-08-11 02:47:13 +00:00
|
|
|
haveReceivedServiceUpdate bool // true once we've seen an OnServiceUpdate event
|
|
|
|
haveReceivedEndpointsUpdate bool // true once we've seen an OnEndpointsUpdate event
|
2015-08-20 06:11:56 +00:00
|
|
|
|
|
|
|
// These are effectively const and do not need the mutex to be held.
|
|
|
|
syncPeriod time.Duration
|
|
|
|
iptables utiliptables.Interface
|
|
|
|
masqueradeAll bool
|
|
|
|
}
|
|
|
|
|
|
|
|
type localPort struct {
|
|
|
|
desc string
|
|
|
|
ip string
|
|
|
|
port int
|
|
|
|
protocol string
|
|
|
|
}
|
|
|
|
|
|
|
|
func (lp *localPort) String() string {
|
|
|
|
return fmt.Sprintf("%q (%s:%d/%s)", lp.desc, lp.ip, lp.port, lp.protocol)
|
|
|
|
}
|
|
|
|
|
|
|
|
type closeable interface {
|
|
|
|
Close() error
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Proxier implements ProxyProvider
|
|
|
|
var _ proxy.ProxyProvider = &Proxier{}
|
|
|
|
|
|
|
|
// NewProxier returns a new Proxier given an iptables Interface instance.
|
|
|
|
// Because of the iptables logic, it is assumed that there is only a single Proxier active on a machine.
|
|
|
|
// An error will be returned if iptables fails to update or acquire the initial lock.
|
|
|
|
// Once a proxier is created, it will keep iptables up to date in the background and
|
|
|
|
// will not terminate if a particular iptables call fails.
|
2015-08-20 06:11:56 +00:00
|
|
|
func NewProxier(ipt utiliptables.Interface, exec utilexec.Interface, syncPeriod time.Duration, masqueradeAll bool) (*Proxier, error) {
|
2015-08-15 05:15:12 +00:00
|
|
|
// Set the route_localnet sysctl we need for
|
2015-10-05 17:28:53 +00:00
|
|
|
if err := utilsysctl.SetSysctl(sysctlRouteLocalnet, 1); err != nil {
|
2015-08-15 05:36:11 +00:00
|
|
|
return nil, fmt.Errorf("can't set sysctl %s: %v", sysctlRouteLocalnet, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Load the module. It's OK if this fails (e.g. the module is not present)
|
|
|
|
// because we'll catch the error on the sysctl, which is what we actually
|
|
|
|
// care about.
|
|
|
|
exec.Command("modprobe", "br-netfilter").CombinedOutput()
|
2015-10-05 17:28:53 +00:00
|
|
|
if err := utilsysctl.SetSysctl(sysctlBridgeCallIptables, 1); err != nil {
|
2015-09-26 06:11:33 +00:00
|
|
|
glog.Warningf("can't set sysctl %s: %v", sysctlBridgeCallIptables, err)
|
2015-08-15 05:15:12 +00:00
|
|
|
}
|
|
|
|
|
2015-08-11 02:47:13 +00:00
|
|
|
return &Proxier{
|
2015-08-20 18:39:01 +00:00
|
|
|
serviceMap: make(map[proxy.ServicePortName]*serviceInfo),
|
2015-09-15 20:07:33 +00:00
|
|
|
endpointsMap: make(map[proxy.ServicePortName][]string),
|
2015-08-20 06:11:56 +00:00
|
|
|
portsMap: make(map[localPort]closeable),
|
2015-08-20 18:39:01 +00:00
|
|
|
syncPeriod: syncPeriod,
|
|
|
|
iptables: ipt,
|
2015-08-20 06:11:56 +00:00
|
|
|
masqueradeAll: masqueradeAll,
|
2015-08-11 02:47:13 +00:00
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
2015-08-20 17:01:37 +00:00
|
|
|
// CleanupLeftovers removes all iptables rules and chains created by the Proxier
|
|
|
|
// It returns true if an error was encountered. Errors are logged.
|
|
|
|
func CleanupLeftovers(ipt utiliptables.Interface) (encounteredError bool) {
|
|
|
|
//TODO: actually tear down all rules and chains.
|
2015-10-27 17:08:16 +00:00
|
|
|
args := []string{"-m", "comment", "--comment", "kubernetes service portals", "-j", string(iptablesServicesChain)}
|
2015-08-20 17:01:37 +00:00
|
|
|
if err := ipt.DeleteRule(utiliptables.TableNAT, utiliptables.ChainOutput, args...); err != nil {
|
|
|
|
glog.Errorf("Error removing pure-iptables proxy rule: %v", err)
|
|
|
|
encounteredError = true
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
2015-08-20 17:01:37 +00:00
|
|
|
if err := ipt.DeleteRule(utiliptables.TableNAT, utiliptables.ChainPrerouting, args...); err != nil {
|
|
|
|
glog.Errorf("Error removing pure-iptables proxy rule: %v", err)
|
|
|
|
encounteredError = true
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
2015-10-27 17:08:16 +00:00
|
|
|
|
|
|
|
args = []string{"-m", "comment", "--comment", "kubernetes service traffic requiring SNAT", "-m", "mark", "--mark", iptablesMasqueradeMark, "-j", "MASQUERADE"}
|
|
|
|
if err := ipt.DeleteRule(utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil {
|
|
|
|
glog.Errorf("Error removing pure-iptables proxy rule: %v", err)
|
|
|
|
encounteredError = true
|
|
|
|
}
|
|
|
|
|
|
|
|
// flush and delete chains.
|
|
|
|
chains := []utiliptables.Chain{iptablesServicesChain, iptablesNodePortsChain}
|
|
|
|
for _, c := range chains {
|
|
|
|
// flush chain, then if sucessful delete, delete will fail if flush fails.
|
|
|
|
if err := ipt.FlushChain(utiliptables.TableNAT, c); err != nil {
|
|
|
|
glog.Errorf("Error flushing pure-iptables proxy chain: %v", err)
|
|
|
|
encounteredError = true
|
|
|
|
} else {
|
|
|
|
if err = ipt.DeleteChain(utiliptables.TableNAT, c); err != nil {
|
|
|
|
glog.Errorf("Error deleting pure-iptables proxy chain: %v", err)
|
|
|
|
encounteredError = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2015-08-20 17:01:37 +00:00
|
|
|
return encounteredError
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (proxier *Proxier) sameConfig(info *serviceInfo, service *api.Service, port *api.ServicePort) bool {
|
2015-08-15 03:50:29 +00:00
|
|
|
if info.protocol != port.Protocol || info.port != port.Port || info.nodePort != port.NodePort {
|
2015-08-11 02:47:13 +00:00
|
|
|
return false
|
|
|
|
}
|
2015-08-15 03:50:29 +00:00
|
|
|
if !info.clusterIP.Equal(net.ParseIP(service.Spec.ClusterIP)) {
|
2015-08-11 02:47:13 +00:00
|
|
|
return false
|
|
|
|
}
|
2015-08-12 00:18:21 +00:00
|
|
|
if !ipsEqual(info.externalIPs, service.Spec.ExternalIPs) {
|
2015-08-11 02:47:13 +00:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
if !api.LoadBalancerStatusEqual(&info.loadBalancerStatus, &service.Status.LoadBalancer) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if info.sessionAffinityType != service.Spec.SessionAffinity {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
func ipsEqual(lhs, rhs []string) bool {
|
|
|
|
if len(lhs) != len(rhs) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
for i := range lhs {
|
|
|
|
if lhs[i] != rhs[i] {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2015-08-14 16:38:43 +00:00
|
|
|
// Sync is called to immediately synchronize the proxier state to iptables
|
|
|
|
func (proxier *Proxier) Sync() {
|
|
|
|
proxier.mu.Lock()
|
|
|
|
defer proxier.mu.Unlock()
|
|
|
|
proxier.syncProxyRules()
|
|
|
|
}
|
|
|
|
|
2015-08-11 02:47:13 +00:00
|
|
|
// SyncLoop runs periodic work. This is expected to run as a goroutine or as the main loop of the app. It does not return.
|
|
|
|
func (proxier *Proxier) SyncLoop() {
|
2015-08-13 00:33:05 +00:00
|
|
|
t := time.NewTicker(proxier.syncPeriod)
|
2015-08-11 02:47:13 +00:00
|
|
|
defer t.Stop()
|
|
|
|
for {
|
|
|
|
<-t.C
|
|
|
|
glog.V(6).Infof("Periodic sync")
|
2015-08-14 16:38:43 +00:00
|
|
|
proxier.Sync()
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// OnServiceUpdate tracks the active set of service proxies.
|
|
|
|
// They will be synchronized using syncProxyRules()
|
|
|
|
func (proxier *Proxier) OnServiceUpdate(allServices []api.Service) {
|
2016-02-02 03:02:23 +00:00
|
|
|
start := time.Now()
|
|
|
|
defer func() {
|
|
|
|
glog.V(4).Infof("OnServiceUpdate took %v for %d services", time.Since(start), len(allServices))
|
|
|
|
}()
|
2015-08-11 02:47:13 +00:00
|
|
|
proxier.mu.Lock()
|
|
|
|
defer proxier.mu.Unlock()
|
|
|
|
proxier.haveReceivedServiceUpdate = true
|
|
|
|
|
|
|
|
activeServices := make(map[proxy.ServicePortName]bool) // use a map as a set
|
|
|
|
|
|
|
|
for i := range allServices {
|
|
|
|
service := &allServices[i]
|
2015-08-20 06:11:56 +00:00
|
|
|
svcName := types.NamespacedName{
|
|
|
|
Namespace: service.Namespace,
|
|
|
|
Name: service.Name,
|
|
|
|
}
|
2015-08-11 02:47:13 +00:00
|
|
|
|
|
|
|
// if ClusterIP is "None" or empty, skip proxying
|
|
|
|
if !api.IsServiceIPSet(service) {
|
2015-08-20 06:11:56 +00:00
|
|
|
glog.V(3).Infof("Skipping service %s due to clusterIP = %q", svcName, service.Spec.ClusterIP)
|
2015-08-11 02:47:13 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
for i := range service.Spec.Ports {
|
|
|
|
servicePort := &service.Spec.Ports[i]
|
|
|
|
|
2015-08-20 06:11:56 +00:00
|
|
|
serviceName := proxy.ServicePortName{
|
|
|
|
NamespacedName: svcName,
|
|
|
|
Port: servicePort.Name,
|
|
|
|
}
|
2015-08-11 02:47:13 +00:00
|
|
|
activeServices[serviceName] = true
|
|
|
|
info, exists := proxier.serviceMap[serviceName]
|
|
|
|
if exists && proxier.sameConfig(info, service, servicePort) {
|
|
|
|
// Nothing changed.
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if exists {
|
2015-09-15 20:07:33 +00:00
|
|
|
// Something changed.
|
2015-08-15 04:53:52 +00:00
|
|
|
glog.V(3).Infof("Something changed for service %q: removing it", serviceName)
|
2015-08-11 02:47:13 +00:00
|
|
|
delete(proxier.serviceMap, serviceName)
|
|
|
|
}
|
|
|
|
serviceIP := net.ParseIP(service.Spec.ClusterIP)
|
|
|
|
glog.V(1).Infof("Adding new service %q at %s:%d/%s", serviceName, serviceIP, servicePort.Port, servicePort.Protocol)
|
|
|
|
info = newServiceInfo(serviceName)
|
2015-08-15 03:50:29 +00:00
|
|
|
info.clusterIP = serviceIP
|
|
|
|
info.port = servicePort.Port
|
|
|
|
info.protocol = servicePort.Protocol
|
|
|
|
info.nodePort = servicePort.NodePort
|
2015-08-12 00:18:21 +00:00
|
|
|
info.externalIPs = service.Spec.ExternalIPs
|
2015-08-11 02:47:13 +00:00
|
|
|
// Deep-copy in case the service instance changes
|
|
|
|
info.loadBalancerStatus = *api.LoadBalancerStatusDeepCopy(&service.Status.LoadBalancer)
|
|
|
|
info.sessionAffinityType = service.Spec.SessionAffinity
|
|
|
|
proxier.serviceMap[serviceName] = info
|
|
|
|
|
2015-08-15 04:53:52 +00:00
|
|
|
glog.V(4).Infof("added serviceInfo(%s): %s", serviceName, spew.Sdump(info))
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-09-15 20:07:33 +00:00
|
|
|
// Remove services missing from the update.
|
|
|
|
for name := range proxier.serviceMap {
|
|
|
|
if !activeServices[name] {
|
2015-08-11 02:47:13 +00:00
|
|
|
glog.V(1).Infof("Removing service %q", name)
|
|
|
|
delete(proxier.serviceMap, name)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-20 06:11:56 +00:00
|
|
|
proxier.syncProxyRules()
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// OnEndpointsUpdate takes in a slice of updated endpoints.
|
|
|
|
func (proxier *Proxier) OnEndpointsUpdate(allEndpoints []api.Endpoints) {
|
2016-02-02 03:02:23 +00:00
|
|
|
start := time.Now()
|
|
|
|
defer func() {
|
|
|
|
glog.V(4).Infof("OnEndpointsUpdate took %v for %d endpoints", time.Since(start), len(allEndpoints))
|
|
|
|
}()
|
|
|
|
|
2015-08-11 02:47:13 +00:00
|
|
|
proxier.mu.Lock()
|
|
|
|
defer proxier.mu.Unlock()
|
|
|
|
proxier.haveReceivedEndpointsUpdate = true
|
|
|
|
|
2015-09-15 20:07:33 +00:00
|
|
|
activeEndpoints := make(map[proxy.ServicePortName]bool) // use a map as a set
|
2015-08-11 02:47:13 +00:00
|
|
|
|
|
|
|
// Update endpoints for services.
|
|
|
|
for i := range allEndpoints {
|
|
|
|
svcEndpoints := &allEndpoints[i]
|
|
|
|
|
|
|
|
// We need to build a map of portname -> all ip:ports for that
|
|
|
|
// portname. Explode Endpoints.Subsets[*] into this structure.
|
|
|
|
portsToEndpoints := map[string][]hostPortPair{}
|
|
|
|
for i := range svcEndpoints.Subsets {
|
|
|
|
ss := &svcEndpoints.Subsets[i]
|
|
|
|
for i := range ss.Ports {
|
|
|
|
port := &ss.Ports[i]
|
|
|
|
for i := range ss.Addresses {
|
|
|
|
addr := &ss.Addresses[i]
|
|
|
|
portsToEndpoints[port.Name] = append(portsToEndpoints[port.Name], hostPortPair{addr.IP, port.Port})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for portname := range portsToEndpoints {
|
2015-08-13 15:26:43 +00:00
|
|
|
svcPort := proxy.ServicePortName{NamespacedName: types.NamespacedName{Namespace: svcEndpoints.Namespace, Name: svcEndpoints.Name}, Port: portname}
|
2015-09-15 20:07:33 +00:00
|
|
|
curEndpoints := proxier.endpointsMap[svcPort]
|
2015-08-11 02:47:13 +00:00
|
|
|
newEndpoints := flattenValidEndpoints(portsToEndpoints[portname])
|
|
|
|
if len(curEndpoints) != len(newEndpoints) || !slicesEquiv(slice.CopyStrings(curEndpoints), newEndpoints) {
|
2015-09-15 20:07:33 +00:00
|
|
|
glog.V(1).Infof("Setting endpoints for %q to %+v", svcPort, newEndpoints)
|
|
|
|
proxier.endpointsMap[svcPort] = newEndpoints
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
2015-09-15 20:07:33 +00:00
|
|
|
activeEndpoints[svcPort] = true
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
|
|
|
}
|
2015-09-15 20:07:33 +00:00
|
|
|
|
2015-08-11 02:47:13 +00:00
|
|
|
// Remove endpoints missing from the update.
|
2015-09-15 20:07:33 +00:00
|
|
|
for name := range proxier.endpointsMap {
|
|
|
|
if !activeEndpoints[name] {
|
|
|
|
glog.V(2).Infof("Removing endpoints for %q", name)
|
|
|
|
delete(proxier.endpointsMap, name)
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
|
|
|
}
|
2015-08-15 04:53:52 +00:00
|
|
|
|
2015-08-20 06:11:56 +00:00
|
|
|
proxier.syncProxyRules()
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// used in OnEndpointsUpdate
|
|
|
|
type hostPortPair struct {
|
|
|
|
host string
|
|
|
|
port int
|
|
|
|
}
|
|
|
|
|
|
|
|
func isValidEndpoint(hpp *hostPortPair) bool {
|
|
|
|
return hpp.host != "" && hpp.port > 0
|
|
|
|
}
|
|
|
|
|
|
|
|
// Tests whether two slices are equivalent. This sorts both slices in-place.
|
|
|
|
func slicesEquiv(lhs, rhs []string) bool {
|
|
|
|
if len(lhs) != len(rhs) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if reflect.DeepEqual(slice.SortStrings(lhs), slice.SortStrings(rhs)) {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
func flattenValidEndpoints(endpoints []hostPortPair) []string {
|
|
|
|
// Convert Endpoint objects into strings for easier use later.
|
|
|
|
var result []string
|
|
|
|
for i := range endpoints {
|
|
|
|
hpp := &endpoints[i]
|
|
|
|
if isValidEndpoint(hpp) {
|
|
|
|
result = append(result, net.JoinHostPort(hpp.host, strconv.Itoa(hpp.port)))
|
2015-08-20 06:11:56 +00:00
|
|
|
} else {
|
|
|
|
glog.Warningf("got invalid endpoint: %+v", *hpp)
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return result
|
|
|
|
}
|
|
|
|
|
2015-08-20 06:11:56 +00:00
|
|
|
// servicePortChainName takes the ServicePortName for a service and
|
2015-08-15 04:02:53 +00:00
|
|
|
// returns the associated iptables chain. This is computed by hashing (sha256)
|
|
|
|
// then encoding to base32 and truncating with the prefix "KUBE-SVC-". We do
|
|
|
|
// this because Iptables Chain Names must be <= 28 chars long, and the longer
|
|
|
|
// they are the harder they are to read.
|
2015-08-20 06:11:56 +00:00
|
|
|
func servicePortChainName(s proxy.ServicePortName, protocol string) utiliptables.Chain {
|
2015-08-15 04:02:53 +00:00
|
|
|
hash := sha256.Sum256([]byte(s.String() + protocol))
|
2015-08-11 02:47:13 +00:00
|
|
|
encoded := base32.StdEncoding.EncodeToString(hash[:])
|
2015-08-15 04:02:53 +00:00
|
|
|
return utiliptables.Chain("KUBE-SVC-" + encoded[:16])
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
|
|
|
|
2015-08-20 06:11:56 +00:00
|
|
|
// This is the same as servicePortChainName but with the endpoint included.
|
|
|
|
func servicePortEndpointChainName(s proxy.ServicePortName, protocol string, endpoint string) utiliptables.Chain {
|
2015-08-15 04:02:53 +00:00
|
|
|
hash := sha256.Sum256([]byte(s.String() + protocol + endpoint))
|
2015-08-11 02:47:13 +00:00
|
|
|
encoded := base32.StdEncoding.EncodeToString(hash[:])
|
2015-08-15 04:02:53 +00:00
|
|
|
return utiliptables.Chain("KUBE-SEP-" + encoded[:16])
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// This is where all of the iptables-save/restore calls happen.
|
|
|
|
// The only other iptables rules are those that are setup in iptablesInit()
|
|
|
|
// assumes proxier.mu is held
|
2015-08-20 06:11:56 +00:00
|
|
|
func (proxier *Proxier) syncProxyRules() {
|
2016-02-02 03:02:23 +00:00
|
|
|
start := time.Now()
|
|
|
|
defer func() {
|
|
|
|
glog.V(4).Infof("syncProxyRules took %v", time.Since(start))
|
|
|
|
}()
|
2015-08-11 02:47:13 +00:00
|
|
|
// don't sync rules till we've received services and endpoints
|
|
|
|
if !proxier.haveReceivedEndpointsUpdate || !proxier.haveReceivedServiceUpdate {
|
2015-08-15 04:53:52 +00:00
|
|
|
glog.V(2).Info("Not syncing iptables until Services and Endpoints have been received from master")
|
2015-08-20 06:11:56 +00:00
|
|
|
return
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
2015-08-15 04:53:52 +00:00
|
|
|
glog.V(3).Infof("Syncing iptables rules")
|
2015-08-11 02:47:13 +00:00
|
|
|
|
2015-08-14 22:20:19 +00:00
|
|
|
// Ensure main chains and rules are installed.
|
2016-01-13 00:13:29 +00:00
|
|
|
tablesNeedServicesChain := []utiliptables.Table{utiliptables.TableFilter, utiliptables.TableNAT}
|
|
|
|
for _, table := range tablesNeedServicesChain {
|
|
|
|
if _, err := proxier.iptables.EnsureChain(table, iptablesServicesChain); err != nil {
|
|
|
|
glog.Errorf("Failed to ensure that %s chain %s exists: %v", table, iptablesServicesChain, err)
|
2015-08-20 06:11:56 +00:00
|
|
|
return
|
2015-08-14 22:20:19 +00:00
|
|
|
}
|
2016-01-13 00:13:29 +00:00
|
|
|
}
|
|
|
|
// Link the services chain.
|
|
|
|
tableChainsNeedJumpServices := []struct {
|
|
|
|
table utiliptables.Table
|
|
|
|
chain utiliptables.Chain
|
|
|
|
}{
|
|
|
|
{utiliptables.TableFilter, utiliptables.ChainOutput},
|
|
|
|
{utiliptables.TableNAT, utiliptables.ChainOutput},
|
|
|
|
{utiliptables.TableNAT, utiliptables.ChainPrerouting},
|
|
|
|
}
|
|
|
|
comment := "kubernetes service portals"
|
|
|
|
args := []string{"-m", "comment", "--comment", comment, "-j", string(iptablesServicesChain)}
|
|
|
|
for _, tc := range tableChainsNeedJumpServices {
|
|
|
|
if _, err := proxier.iptables.EnsureRule(utiliptables.Prepend, tc.table, tc.chain, args...); err != nil {
|
|
|
|
glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", tc.table, tc.chain, iptablesServicesChain, err)
|
2015-08-20 06:11:56 +00:00
|
|
|
return
|
2015-08-14 22:20:19 +00:00
|
|
|
}
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
2015-08-14 22:20:19 +00:00
|
|
|
// Link the output rules.
|
|
|
|
{
|
|
|
|
comment := "kubernetes service traffic requiring SNAT"
|
|
|
|
args := []string{"-m", "comment", "--comment", comment, "-m", "mark", "--mark", iptablesMasqueradeMark, "-j", "MASQUERADE"}
|
|
|
|
if _, err := proxier.iptables.EnsureRule(utiliptables.Append, utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil {
|
2015-08-20 06:11:56 +00:00
|
|
|
glog.Errorf("Failed to ensure that chain %s obeys MASQUERADE mark: %v", utiliptables.ChainPostrouting, err)
|
|
|
|
return
|
2015-08-14 22:20:19 +00:00
|
|
|
}
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Get iptables-save output so we can check for existing chains and rules.
|
|
|
|
// This will be a map of chain name to chain with rules as stored in iptables-save/iptables-restore
|
2016-01-13 00:13:29 +00:00
|
|
|
existingFilterChains := make(map[utiliptables.Chain]string)
|
|
|
|
iptablesSaveRaw, err := proxier.iptables.Save(utiliptables.TableFilter)
|
2015-08-11 02:47:13 +00:00
|
|
|
if err != nil { // if we failed to get any rules
|
2015-10-02 13:52:01 +00:00
|
|
|
glog.Errorf("Failed to execute iptables-save, syncing all rules. %s", err.Error())
|
2015-08-11 02:47:13 +00:00
|
|
|
} else { // otherwise parse the output
|
2016-01-13 00:13:29 +00:00
|
|
|
existingFilterChains = getChainLines(utiliptables.TableFilter, iptablesSaveRaw)
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
|
|
|
|
2016-01-13 00:13:29 +00:00
|
|
|
existingNATChains := make(map[utiliptables.Chain]string)
|
|
|
|
iptablesSaveRaw, err = proxier.iptables.Save(utiliptables.TableNAT)
|
|
|
|
if err != nil { // if we failed to get any rules
|
|
|
|
glog.Errorf("Failed to execute iptables-save, syncing all rules. %s", err.Error())
|
|
|
|
} else { // otherwise parse the output
|
|
|
|
existingNATChains = getChainLines(utiliptables.TableNAT, iptablesSaveRaw)
|
|
|
|
}
|
2015-08-11 02:47:13 +00:00
|
|
|
|
2016-01-13 00:13:29 +00:00
|
|
|
filterChains := bytes.NewBuffer(nil)
|
|
|
|
filterRules := bytes.NewBuffer(nil)
|
|
|
|
natChains := bytes.NewBuffer(nil)
|
|
|
|
natRules := bytes.NewBuffer(nil)
|
|
|
|
|
|
|
|
// Write table headers.
|
|
|
|
writeLine(filterChains, "*filter")
|
|
|
|
writeLine(natChains, "*nat")
|
2015-08-11 02:47:13 +00:00
|
|
|
|
2015-08-15 03:50:29 +00:00
|
|
|
// Make sure we keep stats for the top-level chains, if they existed
|
|
|
|
// (which they should have because we created them above).
|
2016-01-13 00:13:29 +00:00
|
|
|
if chain, ok := existingFilterChains[iptablesServicesChain]; ok {
|
|
|
|
writeLine(filterChains, chain)
|
2015-08-11 02:47:13 +00:00
|
|
|
} else {
|
2016-01-13 00:13:29 +00:00
|
|
|
writeLine(filterChains, makeChainLine(iptablesServicesChain))
|
2015-08-15 03:50:29 +00:00
|
|
|
}
|
2016-01-13 00:13:29 +00:00
|
|
|
if chain, ok := existingNATChains[iptablesServicesChain]; ok {
|
|
|
|
writeLine(natChains, chain)
|
2015-08-15 03:50:29 +00:00
|
|
|
} else {
|
2016-01-13 00:13:29 +00:00
|
|
|
writeLine(natChains, makeChainLine(iptablesServicesChain))
|
|
|
|
}
|
|
|
|
if chain, ok := existingNATChains[iptablesNodePortsChain]; ok {
|
|
|
|
writeLine(natChains, chain)
|
|
|
|
} else {
|
|
|
|
writeLine(natChains, makeChainLine(iptablesNodePortsChain))
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
|
|
|
|
2016-01-13 00:13:29 +00:00
|
|
|
// Accumulate nat chains to keep.
|
|
|
|
activeNATChains := map[utiliptables.Chain]bool{} // use a map as a set
|
2015-08-20 06:11:56 +00:00
|
|
|
|
|
|
|
// Accumulate new local ports that we have opened.
|
|
|
|
newLocalPorts := map[localPort]closeable{}
|
2015-08-11 02:47:13 +00:00
|
|
|
|
2015-08-15 03:50:29 +00:00
|
|
|
// Build rules for each service.
|
2015-08-20 06:11:56 +00:00
|
|
|
for svcName, svcInfo := range proxier.serviceMap {
|
|
|
|
protocol := strings.ToLower(string(svcInfo.protocol))
|
2015-08-15 03:50:29 +00:00
|
|
|
|
|
|
|
// Create the per-service chain, retaining counters if possible.
|
2015-08-20 06:11:56 +00:00
|
|
|
svcChain := servicePortChainName(svcName, protocol)
|
2016-01-13 00:13:29 +00:00
|
|
|
if chain, ok := existingNATChains[svcChain]; ok {
|
|
|
|
writeLine(natChains, chain)
|
2015-08-11 02:47:13 +00:00
|
|
|
} else {
|
2016-01-13 00:13:29 +00:00
|
|
|
writeLine(natChains, makeChainLine(svcChain))
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
2016-01-13 00:13:29 +00:00
|
|
|
activeNATChains[svcChain] = true
|
2015-08-15 03:50:29 +00:00
|
|
|
|
|
|
|
// Capture the clusterIP.
|
2015-08-20 18:39:01 +00:00
|
|
|
args := []string{
|
2015-08-15 03:50:29 +00:00
|
|
|
"-A", string(iptablesServicesChain),
|
2015-08-20 06:11:56 +00:00
|
|
|
"-m", "comment", "--comment", fmt.Sprintf("\"%s cluster IP\"", svcName.String()),
|
2015-08-15 03:50:29 +00:00
|
|
|
"-m", protocol, "-p", protocol,
|
2015-08-20 06:11:56 +00:00
|
|
|
"-d", fmt.Sprintf("%s/32", svcInfo.clusterIP.String()),
|
|
|
|
"--dport", fmt.Sprintf("%d", svcInfo.port),
|
2015-08-20 18:39:01 +00:00
|
|
|
}
|
2015-08-20 06:11:56 +00:00
|
|
|
if proxier.masqueradeAll {
|
2016-01-13 00:13:29 +00:00
|
|
|
writeLine(natRules, append(args,
|
2015-08-20 18:39:01 +00:00
|
|
|
"-j", "MARK", "--set-xmark", fmt.Sprintf("%s/0xffffffff", iptablesMasqueradeMark))...)
|
|
|
|
}
|
2016-01-13 00:13:29 +00:00
|
|
|
writeLine(natRules, append(args,
|
2015-08-20 18:39:01 +00:00
|
|
|
"-j", string(svcChain))...)
|
2015-08-15 03:50:29 +00:00
|
|
|
|
|
|
|
// Capture externalIPs.
|
2015-08-20 06:11:56 +00:00
|
|
|
for _, externalIP := range svcInfo.externalIPs {
|
|
|
|
// If the "external" IP happens to be an IP that is local to this
|
|
|
|
// machine, hold the local port open so no other process can open it
|
|
|
|
// (because the socket might open but it would never work).
|
|
|
|
if local, err := isLocalIP(externalIP); err != nil {
|
|
|
|
glog.Errorf("can't determine if IP is local, assuming not: %v", err)
|
|
|
|
} else if local {
|
|
|
|
lp := localPort{
|
|
|
|
desc: "externalIP for " + svcName.String(),
|
|
|
|
ip: externalIP,
|
|
|
|
port: svcInfo.port,
|
|
|
|
protocol: protocol,
|
|
|
|
}
|
|
|
|
if proxier.portsMap[lp] != nil {
|
|
|
|
newLocalPorts[lp] = proxier.portsMap[lp]
|
|
|
|
} else {
|
|
|
|
socket, err := openLocalPort(&lp)
|
|
|
|
if err != nil {
|
|
|
|
glog.Errorf("can't open %s, skipping this externalIP: %v", lp.String(), err)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
newLocalPorts[lp] = socket
|
|
|
|
}
|
|
|
|
} // We're holding the port, so it's OK to install iptables rules.
|
2015-08-15 03:50:29 +00:00
|
|
|
args := []string{
|
|
|
|
"-A", string(iptablesServicesChain),
|
2015-08-20 06:11:56 +00:00
|
|
|
"-m", "comment", "--comment", fmt.Sprintf("\"%s external IP\"", svcName.String()),
|
2015-08-15 03:50:29 +00:00
|
|
|
"-m", protocol, "-p", protocol,
|
|
|
|
"-d", fmt.Sprintf("%s/32", externalIP),
|
2015-08-20 06:11:56 +00:00
|
|
|
"--dport", fmt.Sprintf("%d", svcInfo.port),
|
2015-08-15 03:50:29 +00:00
|
|
|
}
|
2015-08-12 00:18:21 +00:00
|
|
|
// We have to SNAT packets to external IPs.
|
2016-01-13 00:13:29 +00:00
|
|
|
writeLine(natRules, append(args,
|
2015-08-15 03:50:29 +00:00
|
|
|
"-j", "MARK", "--set-xmark", fmt.Sprintf("%s/0xffffffff", iptablesMasqueradeMark))...)
|
2015-08-12 00:18:21 +00:00
|
|
|
|
|
|
|
// Allow traffic for external IPs that does not come from a bridge (i.e. not from a container)
|
|
|
|
// nor from a local process to be forwarded to the service.
|
|
|
|
// This rule roughly translates to "all traffic from off-machine".
|
|
|
|
// This is imperfect in the face of network plugins that might not use a bridge, but we can revisit that later.
|
|
|
|
externalTrafficOnlyArgs := append(args,
|
|
|
|
"-m", "physdev", "!", "--physdev-is-in",
|
|
|
|
"-m", "addrtype", "!", "--src-type", "LOCAL")
|
2016-01-13 00:13:29 +00:00
|
|
|
writeLine(natRules, append(externalTrafficOnlyArgs,
|
2015-08-12 00:18:21 +00:00
|
|
|
"-j", string(svcChain))...)
|
|
|
|
dstLocalOnlyArgs := append(args, "-m", "addrtype", "--dst-type", "LOCAL")
|
|
|
|
// Allow traffic bound for external IPs that happen to be recognized as local IPs to stay local.
|
|
|
|
// This covers cases like GCE load-balancers which get added to the local routing table.
|
2016-01-13 00:13:29 +00:00
|
|
|
writeLine(natRules, append(dstLocalOnlyArgs,
|
2015-08-15 03:50:29 +00:00
|
|
|
"-j", string(svcChain))...)
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
|
|
|
|
2015-08-15 03:50:29 +00:00
|
|
|
// Capture load-balancer ingress.
|
2015-08-20 06:11:56 +00:00
|
|
|
for _, ingress := range svcInfo.loadBalancerStatus.Ingress {
|
2015-08-15 03:50:29 +00:00
|
|
|
if ingress.IP != "" {
|
|
|
|
args := []string{
|
|
|
|
"-A", string(iptablesServicesChain),
|
2015-08-20 06:11:56 +00:00
|
|
|
"-m", "comment", "--comment", fmt.Sprintf("\"%s loadbalancer IP\"", svcName.String()),
|
2015-08-15 03:50:29 +00:00
|
|
|
"-m", protocol, "-p", protocol,
|
|
|
|
"-d", fmt.Sprintf("%s/32", ingress.IP),
|
2015-08-20 06:11:56 +00:00
|
|
|
"--dport", fmt.Sprintf("%d", svcInfo.port),
|
2015-08-15 03:50:29 +00:00
|
|
|
}
|
|
|
|
// We have to SNAT packets from external IPs.
|
2016-01-13 00:13:29 +00:00
|
|
|
writeLine(natRules, append(args,
|
2015-08-15 03:50:29 +00:00
|
|
|
"-j", "MARK", "--set-xmark", fmt.Sprintf("%s/0xffffffff", iptablesMasqueradeMark))...)
|
2016-01-13 00:13:29 +00:00
|
|
|
writeLine(natRules, append(args,
|
2015-08-15 03:50:29 +00:00
|
|
|
"-j", string(svcChain))...)
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
2015-08-15 03:50:29 +00:00
|
|
|
}
|
2015-08-11 02:47:13 +00:00
|
|
|
|
2015-08-15 03:50:29 +00:00
|
|
|
// Capture nodeports. If we had more than 2 rules it might be
|
|
|
|
// worthwhile to make a new per-service chain for nodeport rules, but
|
|
|
|
// with just 2 rules it ends up being a waste and a cognitive burden.
|
2015-08-20 06:11:56 +00:00
|
|
|
if svcInfo.nodePort != 0 {
|
|
|
|
// Hold the local port open so no other process can open it
|
|
|
|
// (because the socket might open but it would never work).
|
|
|
|
lp := localPort{
|
|
|
|
desc: "nodePort for " + svcName.String(),
|
|
|
|
ip: "",
|
|
|
|
port: svcInfo.nodePort,
|
|
|
|
protocol: protocol,
|
|
|
|
}
|
|
|
|
if proxier.portsMap[lp] != nil {
|
|
|
|
newLocalPorts[lp] = proxier.portsMap[lp]
|
|
|
|
} else {
|
|
|
|
socket, err := openLocalPort(&lp)
|
|
|
|
if err != nil {
|
|
|
|
glog.Errorf("can't open %s, skipping this nodePort: %v", lp.String(), err)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
newLocalPorts[lp] = socket
|
|
|
|
} // We're holding the port, so it's OK to install iptables rules.
|
2015-08-15 03:50:29 +00:00
|
|
|
// Nodeports need SNAT.
|
2016-01-13 00:13:29 +00:00
|
|
|
writeLine(natRules,
|
2015-08-15 03:50:29 +00:00
|
|
|
"-A", string(iptablesNodePortsChain),
|
2015-08-20 06:11:56 +00:00
|
|
|
"-m", "comment", "--comment", svcName.String(),
|
2015-08-15 03:50:29 +00:00
|
|
|
"-m", protocol, "-p", protocol,
|
2015-08-20 06:11:56 +00:00
|
|
|
"--dport", fmt.Sprintf("%d", svcInfo.nodePort),
|
2015-08-15 03:50:29 +00:00
|
|
|
"-j", "MARK", "--set-xmark", fmt.Sprintf("%s/0xffffffff", iptablesMasqueradeMark))
|
|
|
|
// Jump to the service chain.
|
2016-01-13 00:13:29 +00:00
|
|
|
writeLine(natRules,
|
2015-08-15 03:50:29 +00:00
|
|
|
"-A", string(iptablesNodePortsChain),
|
2015-08-20 06:11:56 +00:00
|
|
|
"-m", "comment", "--comment", svcName.String(),
|
2015-08-15 03:50:29 +00:00
|
|
|
"-m", protocol, "-p", protocol,
|
2015-08-20 06:11:56 +00:00
|
|
|
"--dport", fmt.Sprintf("%d", svcInfo.nodePort),
|
2015-08-15 03:50:29 +00:00
|
|
|
"-j", string(svcChain))
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
|
|
|
|
2016-01-13 00:13:29 +00:00
|
|
|
// If the service has no endpoints then reject packets.
|
|
|
|
if len(proxier.endpointsMap[svcName]) == 0 {
|
|
|
|
writeLine(filterRules,
|
|
|
|
"-A", string(iptablesServicesChain),
|
|
|
|
"-m", "comment", "--comment", fmt.Sprintf("\"%s has no endpoints\"", svcName.String()),
|
|
|
|
"-m", protocol, "-p", protocol,
|
|
|
|
"-d", fmt.Sprintf("%s/32", svcInfo.clusterIP.String()),
|
|
|
|
"--dport", fmt.Sprintf("%d", svcInfo.port),
|
|
|
|
"-j", "REJECT",
|
|
|
|
)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2015-08-15 03:50:29 +00:00
|
|
|
// Generate the per-endpoint chains. We do this in multiple passes so we
|
|
|
|
// can group rules together.
|
|
|
|
endpoints := make([]string, 0)
|
|
|
|
endpointChains := make([]utiliptables.Chain, 0)
|
2015-09-15 20:07:33 +00:00
|
|
|
for _, ep := range proxier.endpointsMap[svcName] {
|
2015-08-15 03:50:29 +00:00
|
|
|
endpoints = append(endpoints, ep)
|
2015-08-20 06:11:56 +00:00
|
|
|
endpointChain := servicePortEndpointChainName(svcName, protocol, ep)
|
2015-08-15 03:50:29 +00:00
|
|
|
endpointChains = append(endpointChains, endpointChain)
|
|
|
|
|
|
|
|
// Create the endpoint chain, retaining counters if possible.
|
2016-01-13 00:13:29 +00:00
|
|
|
if chain, ok := existingNATChains[utiliptables.Chain(endpointChain)]; ok {
|
|
|
|
writeLine(natChains, chain)
|
2015-08-11 02:47:13 +00:00
|
|
|
} else {
|
2016-01-13 00:13:29 +00:00
|
|
|
writeLine(natChains, makeChainLine(endpointChain))
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
2016-01-13 00:13:29 +00:00
|
|
|
activeNATChains[endpointChain] = true
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
|
|
|
|
2015-08-15 03:50:29 +00:00
|
|
|
// First write session affinity rules, if applicable.
|
2015-08-20 06:11:56 +00:00
|
|
|
if svcInfo.sessionAffinityType == api.ServiceAffinityClientIP {
|
2015-08-15 03:50:29 +00:00
|
|
|
for _, endpointChain := range endpointChains {
|
2016-01-13 00:13:29 +00:00
|
|
|
writeLine(natRules,
|
2015-08-15 03:50:29 +00:00
|
|
|
"-A", string(svcChain),
|
2015-08-20 06:11:56 +00:00
|
|
|
"-m", "comment", "--comment", svcName.String(),
|
2015-08-15 03:50:29 +00:00
|
|
|
"-m", "recent", "--name", string(endpointChain),
|
2015-08-20 06:11:56 +00:00
|
|
|
"--rcheck", "--seconds", fmt.Sprintf("%d", svcInfo.stickyMaxAgeSeconds), "--reap",
|
2015-08-15 03:50:29 +00:00
|
|
|
"-j", string(endpointChain))
|
|
|
|
}
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
|
|
|
|
2015-08-15 03:50:29 +00:00
|
|
|
// Now write loadbalancing & DNAT rules.
|
|
|
|
n := len(endpointChains)
|
|
|
|
for i, endpointChain := range endpointChains {
|
|
|
|
// Balancing rules in the per-service chain.
|
|
|
|
args := []string{
|
|
|
|
"-A", string(svcChain),
|
2015-08-20 06:11:56 +00:00
|
|
|
"-m", "comment", "--comment", svcName.String(),
|
2015-08-15 03:50:29 +00:00
|
|
|
}
|
|
|
|
if i < (n - 1) {
|
|
|
|
// Each rule is a probabilistic match.
|
|
|
|
args = append(args,
|
|
|
|
"-m", "statistic",
|
|
|
|
"--mode", "random",
|
2015-08-18 03:54:05 +00:00
|
|
|
"--probability", fmt.Sprintf("%0.5f", 1.0/float64(n-i)))
|
2015-08-15 03:50:29 +00:00
|
|
|
}
|
|
|
|
// The final (or only if n == 1) rule is a guaranteed match.
|
|
|
|
args = append(args, "-j", string(endpointChain))
|
2016-01-13 00:13:29 +00:00
|
|
|
writeLine(natRules, args...)
|
2015-08-15 03:50:29 +00:00
|
|
|
|
|
|
|
// Rules in the per-endpoint chain.
|
|
|
|
args = []string{
|
|
|
|
"-A", string(endpointChain),
|
2015-08-20 06:11:56 +00:00
|
|
|
"-m", "comment", "--comment", svcName.String(),
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
2015-08-15 03:50:29 +00:00
|
|
|
// Handle traffic that loops back to the originator with SNAT.
|
|
|
|
// Technically we only need to do this if the endpoint is on this
|
|
|
|
// host, but we don't have that information, so we just do this for
|
|
|
|
// all endpoints.
|
|
|
|
// TODO: if we grow logic to get this node's pod CIDR, we can use it.
|
2016-01-13 00:13:29 +00:00
|
|
|
writeLine(natRules, append(args,
|
2015-08-15 03:50:29 +00:00
|
|
|
"-s", fmt.Sprintf("%s/32", strings.Split(endpoints[i], ":")[0]),
|
|
|
|
"-j", "MARK", "--set-xmark", fmt.Sprintf("%s/0xffffffff", iptablesMasqueradeMark))...)
|
|
|
|
|
|
|
|
// Update client-affinity lists.
|
2015-08-20 06:11:56 +00:00
|
|
|
if svcInfo.sessionAffinityType == api.ServiceAffinityClientIP {
|
2015-08-15 03:50:29 +00:00
|
|
|
args = append(args, "-m", "recent", "--name", string(endpointChain), "--set")
|
|
|
|
}
|
|
|
|
// DNAT to final destination.
|
|
|
|
args = append(args,
|
|
|
|
"-m", protocol, "-p", protocol,
|
|
|
|
"-j", "DNAT", "--to-destination", endpoints[i])
|
2016-01-13 00:13:29 +00:00
|
|
|
writeLine(natRules, args...)
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-15 03:50:29 +00:00
|
|
|
// Delete chains no longer in use.
|
2016-01-13 00:13:29 +00:00
|
|
|
for chain := range existingNATChains {
|
|
|
|
if !activeNATChains[chain] {
|
2015-08-11 02:47:13 +00:00
|
|
|
chainString := string(chain)
|
|
|
|
if !strings.HasPrefix(chainString, "KUBE-SVC-") && !strings.HasPrefix(chainString, "KUBE-SEP-") {
|
2015-08-15 03:50:29 +00:00
|
|
|
// Ignore chains that aren't ours.
|
2015-08-11 02:47:13 +00:00
|
|
|
continue
|
|
|
|
}
|
2015-08-15 03:50:29 +00:00
|
|
|
// We must (as per iptables) write a chain-line for it, which has
|
|
|
|
// the nice effect of flushing the chain. Then we can remove the
|
|
|
|
// chain.
|
2016-01-13 00:13:29 +00:00
|
|
|
writeLine(natChains, existingNATChains[chain])
|
|
|
|
writeLine(natRules, "-X", chainString)
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-19 05:12:42 +00:00
|
|
|
// Finally, tail-call to the nodeports chain. This needs to be after all
|
|
|
|
// other service portal rules.
|
2016-01-13 00:13:29 +00:00
|
|
|
writeLine(natRules,
|
2015-08-19 05:12:42 +00:00
|
|
|
"-A", string(iptablesServicesChain),
|
|
|
|
"-m", "comment", "--comment", "\"kubernetes service nodeports; NOTE: this must be the last rule in this chain\"",
|
|
|
|
"-m", "addrtype", "--dst-type", "LOCAL",
|
|
|
|
"-j", string(iptablesNodePortsChain))
|
|
|
|
|
2016-01-13 00:13:29 +00:00
|
|
|
// Write the end-of-table markers.
|
|
|
|
writeLine(filterRules, "COMMIT")
|
|
|
|
writeLine(natRules, "COMMIT")
|
2015-08-11 02:47:13 +00:00
|
|
|
|
2015-08-15 03:50:29 +00:00
|
|
|
// Sync rules.
|
|
|
|
// NOTE: NoFlushTables is used so we don't flush non-kubernetes chains in the table.
|
2016-01-13 00:13:29 +00:00
|
|
|
filterLines := append(filterChains.Bytes(), filterRules.Bytes()...)
|
|
|
|
natLines := append(natChains.Bytes(), natRules.Bytes()...)
|
|
|
|
lines := append(filterLines, natLines...)
|
|
|
|
|
|
|
|
glog.V(3).Infof("Syncing iptables rules: %s", lines)
|
|
|
|
err = proxier.iptables.RestoreAll(lines, utiliptables.NoFlushTables, utiliptables.RestoreCounters)
|
2015-08-20 06:11:56 +00:00
|
|
|
if err != nil {
|
|
|
|
glog.Errorf("Failed to sync iptables rules: %v", err)
|
|
|
|
// Revert new local ports.
|
|
|
|
for k, v := range newLocalPorts {
|
|
|
|
glog.Errorf("Closing local port %s", k.String())
|
|
|
|
v.Close()
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Close old local ports and save new ones.
|
|
|
|
for k, v := range proxier.portsMap {
|
|
|
|
if newLocalPorts[k] == nil {
|
|
|
|
v.Close()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
proxier.portsMap = newLocalPorts
|
|
|
|
}
|
2015-08-15 03:50:29 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Join all words with spaces, terminate with newline and write to buf.
|
|
|
|
func writeLine(buf *bytes.Buffer, words ...string) {
|
|
|
|
buf.WriteString(strings.Join(words, " ") + "\n")
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// return an iptables-save/restore formatted chain line given a Chain
|
|
|
|
func makeChainLine(chain utiliptables.Chain) string {
|
2015-08-15 03:50:29 +00:00
|
|
|
return fmt.Sprintf(":%s - [0:0]", chain)
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// getChainLines parses a table's iptables-save data to find chains in the table.
|
|
|
|
// It returns a map of iptables.Chain to string where the string is the chain line from the save (with counters etc).
|
|
|
|
func getChainLines(table utiliptables.Table, save []byte) map[utiliptables.Chain]string {
|
|
|
|
chainsMap := make(map[utiliptables.Chain]string)
|
|
|
|
tablePrefix := "*" + string(table)
|
2015-10-29 21:45:29 +00:00
|
|
|
readIndex := 0
|
2015-08-11 02:47:13 +00:00
|
|
|
// find beginning of table
|
2015-10-29 21:45:29 +00:00
|
|
|
for readIndex < len(save) {
|
|
|
|
line, n := readLine(readIndex, save)
|
|
|
|
readIndex = n
|
|
|
|
if strings.HasPrefix(line, tablePrefix) {
|
2015-08-11 02:47:13 +00:00
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// parse table lines
|
2015-10-29 21:45:29 +00:00
|
|
|
for readIndex < len(save) {
|
|
|
|
line, n := readLine(readIndex, save)
|
|
|
|
readIndex = n
|
|
|
|
if len(line) == 0 {
|
|
|
|
continue
|
|
|
|
}
|
2015-08-11 02:47:13 +00:00
|
|
|
if strings.HasPrefix(line, "COMMIT") || strings.HasPrefix(line, "*") {
|
|
|
|
break
|
2015-10-29 21:45:29 +00:00
|
|
|
} else if strings.HasPrefix(line, "#") {
|
2015-08-11 02:47:13 +00:00
|
|
|
continue
|
|
|
|
} else if strings.HasPrefix(line, ":") && len(line) > 1 {
|
|
|
|
chain := utiliptables.Chain(strings.SplitN(line[1:], " ", 2)[0])
|
2015-10-29 21:45:29 +00:00
|
|
|
chainsMap[chain] = line
|
2015-08-11 02:47:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return chainsMap
|
|
|
|
}
|
2015-08-20 06:11:56 +00:00
|
|
|
|
2015-10-29 21:45:29 +00:00
|
|
|
func readLine(readIndex int, byteArray []byte) (string, int) {
|
|
|
|
currentReadIndex := readIndex
|
|
|
|
|
|
|
|
// consume left spaces
|
|
|
|
for currentReadIndex < len(byteArray) {
|
|
|
|
if byteArray[currentReadIndex] == ' ' {
|
|
|
|
currentReadIndex++
|
|
|
|
} else {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// leftTrimIndex stores the left index of the line after the line is left-trimmed
|
|
|
|
leftTrimIndex := currentReadIndex
|
|
|
|
|
|
|
|
// rightTrimIndex stores the right index of the line after the line is right-trimmed
|
|
|
|
// it is set to -1 since the correct value has not yet been determined.
|
|
|
|
rightTrimIndex := -1
|
|
|
|
|
|
|
|
for ; currentReadIndex < len(byteArray); currentReadIndex++ {
|
|
|
|
if byteArray[currentReadIndex] == ' ' {
|
|
|
|
// set rightTrimIndex
|
|
|
|
if rightTrimIndex == -1 {
|
|
|
|
rightTrimIndex = currentReadIndex
|
|
|
|
}
|
|
|
|
} else if (byteArray[currentReadIndex] == '\n') || (currentReadIndex == (len(byteArray) - 1)) {
|
|
|
|
// end of line or byte buffer is reached
|
|
|
|
if currentReadIndex <= leftTrimIndex {
|
|
|
|
return "", currentReadIndex + 1
|
|
|
|
}
|
|
|
|
// set the rightTrimIndex
|
|
|
|
if rightTrimIndex == -1 {
|
|
|
|
rightTrimIndex = currentReadIndex
|
|
|
|
if currentReadIndex == (len(byteArray)-1) && (byteArray[currentReadIndex] != '\n') {
|
|
|
|
// ensure that the last character is part of the returned string,
|
|
|
|
// unless the last character is '\n'
|
|
|
|
rightTrimIndex = currentReadIndex + 1
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return string(byteArray[leftTrimIndex:rightTrimIndex]), currentReadIndex + 1
|
|
|
|
} else {
|
|
|
|
// unset rightTrimIndex
|
|
|
|
rightTrimIndex = -1
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return "", currentReadIndex
|
|
|
|
}
|
|
|
|
|
2015-08-20 06:11:56 +00:00
|
|
|
func isLocalIP(ip string) (bool, error) {
|
|
|
|
addrs, err := net.InterfaceAddrs()
|
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
for i := range addrs {
|
|
|
|
intf, _, err := net.ParseCIDR(addrs[i].String())
|
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
if net.ParseIP(ip).Equal(intf) {
|
|
|
|
return true, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func openLocalPort(lp *localPort) (closeable, error) {
|
|
|
|
// For ports on node IPs, open the actual port and hold it, even though we
|
|
|
|
// use iptables to redirect traffic.
|
|
|
|
// This ensures a) that it's safe to use that port and b) that (a) stays
|
|
|
|
// true. The risk is that some process on the node (e.g. sshd or kubelet)
|
|
|
|
// is using a port and we give that same port out to a Service. That would
|
|
|
|
// be bad because iptables would silently claim the traffic but the process
|
|
|
|
// would never know.
|
|
|
|
// NOTE: We should not need to have a real listen()ing socket - bind()
|
|
|
|
// should be enough, but I can't figure out a way to e2e test without
|
|
|
|
// it. Tools like 'ss' and 'netstat' do not show sockets that are
|
|
|
|
// bind()ed but not listen()ed, and at least the default debian netcat
|
|
|
|
// has no way to avoid about 10 seconds of retries.
|
|
|
|
var socket closeable
|
|
|
|
switch lp.protocol {
|
|
|
|
case "tcp":
|
|
|
|
listener, err := net.Listen("tcp", net.JoinHostPort(lp.ip, strconv.Itoa(lp.port)))
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
socket = listener
|
|
|
|
case "udp":
|
|
|
|
addr, err := net.ResolveUDPAddr("udp", net.JoinHostPort(lp.ip, strconv.Itoa(lp.port)))
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
conn, err := net.ListenUDP("udp", addr)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
socket = conn
|
|
|
|
default:
|
|
|
|
return nil, fmt.Errorf("unknown protocol %q", lp.protocol)
|
|
|
|
}
|
|
|
|
glog.V(2).Infof("Opened local port %s", lp.String())
|
|
|
|
return socket, nil
|
|
|
|
}
|