Merge pull request #65755 from wojtek-t/optimize_kube_proxy

Automatic merge from submit-queue (batch tested with PRs 65882, 65896, 65755, 60549, 65927). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Avoid printing some service comments in iptables rules

According to some profiles, with large number of endpoints in the system, comments mentioning the service in appropriate iptables rules may be responsible for 40% of all iptables contents.

Given that ~70% of memory usage of kube-proxy seems to be because of generated iptables rules, the overall saving may be at the level of 30% or so.

OTOH, we sacrifise a bit understandability of iptables, but this PR only changes some of iptables that contribute to the most painful rules.

@thockin @danwinship @dcbw - thoughts?

Ref #65441
pull/8/head
Kubernetes Submit Queue 2018-07-07 18:41:09 -07:00 committed by GitHub
commit 28e78ec987
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 36 additions and 11 deletions

View File

@ -249,6 +249,12 @@ type Proxier struct {
natChains *bytes.Buffer natChains *bytes.Buffer
natRules *bytes.Buffer natRules *bytes.Buffer
// endpointChainsNumber is the total amount of endpointChains across all
// services that we will generate (it is computed at the beginning of
// syncProxyRules method). If that is large enough, comments in some
// iptable rules are dropped to improve performance.
endpointChainsNumber int
// Values are as a parameter to select the interfaces where nodeport works. // Values are as a parameter to select the interfaces where nodeport works.
nodePortAddresses []string nodePortAddresses []string
// networkInterfacer defines an interface for several net library functions. // networkInterfacer defines an interface for several net library functions.
@ -606,6 +612,19 @@ func (proxier *Proxier) deleteEndpointConnections(connectionMap []proxy.ServiceE
} }
} }
const endpointChainsNumberThreshold = 1000
// Assumes proxier.mu is held.
func (proxier *Proxier) appendServiceCommentLocked(args []string, svcName string) {
// Not printing these comments, can reduce size of iptables (in case of large
// number of endpoints) even by 40%+. So if total number of endpoint chains
// is large enough, we simply drop those comments.
if proxier.endpointChainsNumber > endpointChainsNumberThreshold {
return
}
args = append(args, "-m", "comment", "--comment", svcName)
}
// This is where all of the iptables-save/restore calls happen. // This is where all of the iptables-save/restore calls happen.
// The only other iptables rules are those that are setup in iptablesInit() // The only other iptables rules are those that are setup in iptablesInit()
// This assumes proxier.mu is NOT held // This assumes proxier.mu is NOT held
@ -747,6 +766,12 @@ func (proxier *Proxier) syncProxyRules() {
// is just for efficiency, not correctness. // is just for efficiency, not correctness.
args := make([]string, 64) args := make([]string, 64)
// Compute total number of endpoint chains across all services.
proxier.endpointChainsNumber = 0
for svcName := range proxier.serviceMap {
proxier.endpointChainsNumber += len(proxier.endpointsMap[svcName])
}
// Build rules for each service. // Build rules for each service.
for svcName, svc := range proxier.serviceMap { for svcName, svc := range proxier.serviceMap {
svcInfo, ok := svc.(*serviceInfo) svcInfo, ok := svc.(*serviceInfo)
@ -1077,12 +1102,16 @@ func (proxier *Proxier) syncProxyRules() {
// First write session affinity rules, if applicable. // First write session affinity rules, if applicable.
if svcInfo.SessionAffinityType == api.ServiceAffinityClientIP { if svcInfo.SessionAffinityType == api.ServiceAffinityClientIP {
for _, endpointChain := range endpointChains { for _, endpointChain := range endpointChains {
writeLine(proxier.natRules, args = append(args[:0],
"-A", string(svcChain), "-A", string(svcChain),
"-m", "comment", "--comment", svcNameString, )
proxier.appendServiceCommentLocked(args, svcNameString)
args = append(args,
"-m", "recent", "--name", string(endpointChain), "-m", "recent", "--name", string(endpointChain),
"--rcheck", "--seconds", strconv.Itoa(svcInfo.StickyMaxAgeSeconds), "--reap", "--rcheck", "--seconds", strconv.Itoa(svcInfo.StickyMaxAgeSeconds), "--reap",
"-j", string(endpointChain)) "-j", string(endpointChain),
)
writeLine(proxier.natRules, args...)
} }
} }
@ -1095,10 +1124,8 @@ func (proxier *Proxier) syncProxyRules() {
continue continue
} }
// Balancing rules in the per-service chain. // Balancing rules in the per-service chain.
args = append(args[:0], []string{ args = append(args[:0], "-A", string(svcChain))
"-A", string(svcChain), proxier.appendServiceCommentLocked(args, svcNameString)
"-m", "comment", "--comment", svcNameString,
}...)
if i < (n - 1) { if i < (n - 1) {
// Each rule is a probabilistic match. // Each rule is a probabilistic match.
args = append(args, args = append(args,
@ -1111,10 +1138,8 @@ func (proxier *Proxier) syncProxyRules() {
writeLine(proxier.natRules, args...) writeLine(proxier.natRules, args...)
// Rules in the per-endpoint chain. // Rules in the per-endpoint chain.
args = append(args[:0], args = append(args[:0], "-A", string(endpointChain))
"-A", string(endpointChain), proxier.appendServiceCommentLocked(args, svcNameString)
"-m", "comment", "--comment", svcNameString,
)
// Handle traffic that loops back to the originator with SNAT. // Handle traffic that loops back to the originator with SNAT.
writeLine(proxier.natRules, append(args, writeLine(proxier.natRules, append(args,
"-s", utilproxy.ToCIDR(net.ParseIP(epIP)), "-s", utilproxy.ToCIDR(net.ParseIP(epIP)),