Reduce lock contention in watchcache.

pull/564/head
wojtekt 2019-02-12 14:26:45 +01:00
parent 3c2a4f0362
commit 9452adc5c8
2 changed files with 95 additions and 42 deletions

View File

@ -98,14 +98,17 @@ func (wm watchersMap) addWatcher(w *cacheWatcher, number int) {
wm[number] = w wm[number] = w
} }
func (wm watchersMap) deleteWatcher(number int) { func (wm watchersMap) deleteWatcher(number int, done func(*cacheWatcher)) {
delete(wm, number) if watcher, ok := wm[number]; ok {
delete(wm, number)
done(watcher)
}
} }
func (wm watchersMap) terminateAll() { func (wm watchersMap) terminateAll(done func(*cacheWatcher)) {
for key, watcher := range wm { for key, watcher := range wm {
delete(wm, key) delete(wm, key)
watcher.stop() done(watcher)
} }
} }
@ -125,24 +128,24 @@ func (i *indexedWatchers) addWatcher(w *cacheWatcher, number int, value string,
} }
} }
func (i *indexedWatchers) deleteWatcher(number int, value string, supported bool) { func (i *indexedWatchers) deleteWatcher(number int, value string, supported bool, done func(*cacheWatcher)) {
if supported { if supported {
i.valueWatchers[value].deleteWatcher(number) i.valueWatchers[value].deleteWatcher(number, done)
if len(i.valueWatchers[value]) == 0 { if len(i.valueWatchers[value]) == 0 {
delete(i.valueWatchers, value) delete(i.valueWatchers, value)
} }
} else { } else {
i.allWatchers.deleteWatcher(number) i.allWatchers.deleteWatcher(number, done)
} }
} }
func (i *indexedWatchers) terminateAll(objectType reflect.Type) { func (i *indexedWatchers) terminateAll(objectType reflect.Type, done func(*cacheWatcher)) {
if len(i.allWatchers) > 0 || len(i.valueWatchers) > 0 { if len(i.allWatchers) > 0 || len(i.valueWatchers) > 0 {
klog.Warningf("Terminating all watchers from cacher %v", objectType) klog.Warningf("Terminating all watchers from cacher %v", objectType)
} }
i.allWatchers.terminateAll() i.allWatchers.terminateAll(done)
for index, watchers := range i.valueWatchers { for index, watchers := range i.valueWatchers {
watchers.terminateAll() watchers.terminateAll(done)
delete(i.valueWatchers, index) delete(i.valueWatchers, index)
} }
} }
@ -203,8 +206,19 @@ type Cacher struct {
stopCh chan struct{} stopCh chan struct{}
stopWg sync.WaitGroup stopWg sync.WaitGroup
// Used to avoid unnecessary allocations in underlying watchers. // timer is used to avoid unnecessary allocations in underlying watchers.
timer *time.Timer timer *time.Timer
// dispatching determines whether there is currently dispatching of
// any event in flight.
dispatching bool
// watchersBuffer is a list of watchers potentially interested in currently
// dispatched event.
watchersBuffer []*cacheWatcher
// watchersToStop is a list of watchers that were supposed to be stopped
// during current dispatching, but stopping was deferred to the end of
// dispatching that event to avoid race with closing channels in watchers.
watchersToStop []*cacheWatcher
} }
// NewCacherFromConfig creates a new Cacher responsible for servicing WATCH and LIST requests from // NewCacherFromConfig creates a new Cacher responsible for servicing WATCH and LIST requests from
@ -650,19 +664,41 @@ func (c *Cacher) dispatchEvents() {
} }
func (c *Cacher) dispatchEvent(event *watchCacheEvent) { func (c *Cacher) dispatchEvent(event *watchCacheEvent) {
c.startDispatching(event)
// Since add() can block, we explicitly add when cacher is unlocked.
for _, watcher := range c.watchersBuffer {
watcher.add(event, c.timer, c.dispatchTimeoutBudget)
}
c.finishDispatching()
}
// startDispatching chooses watchers potentially interested in a given event
// a marks dispatching as true.
func (c *Cacher) startDispatching(event *watchCacheEvent) {
triggerValues, supported := c.triggerValues(event) triggerValues, supported := c.triggerValues(event)
c.Lock() c.Lock()
defer c.Unlock() defer c.Unlock()
c.dispatching = true
// We are reusing the slice to avoid memory reallocations in every
// dispatchEvent() call. That may prevent Go GC from freeing items
// from previous phases that are sitting behind the current length
// of the slice, but there is only a limited number of those and the
// gain from avoiding memory allocations is much bigger.
c.watchersBuffer = c.watchersBuffer[:0]
// Iterate over "allWatchers" no matter what the trigger function is. // Iterate over "allWatchers" no matter what the trigger function is.
for _, watcher := range c.watchers.allWatchers { for _, watcher := range c.watchers.allWatchers {
watcher.add(event, c.timer, c.dispatchTimeoutBudget) c.watchersBuffer = append(c.watchersBuffer, watcher)
} }
if supported { if supported {
// Iterate over watchers interested in the given values of the trigger. // Iterate over watchers interested in the given values of the trigger.
for _, triggerValue := range triggerValues { for _, triggerValue := range triggerValues {
for _, watcher := range c.watchers.valueWatchers[triggerValue] { for _, watcher := range c.watchers.valueWatchers[triggerValue] {
watcher.add(event, c.timer, c.dispatchTimeoutBudget) c.watchersBuffer = append(c.watchersBuffer, watcher)
} }
} }
} else { } else {
@ -675,16 +711,38 @@ func (c *Cacher) dispatchEvent(event *watchCacheEvent) {
// Iterate over watchers interested in exact values for all values. // Iterate over watchers interested in exact values for all values.
for _, watchers := range c.watchers.valueWatchers { for _, watchers := range c.watchers.valueWatchers {
for _, watcher := range watchers { for _, watcher := range watchers {
watcher.add(event, c.timer, c.dispatchTimeoutBudget) c.watchersBuffer = append(c.watchersBuffer, watcher)
} }
} }
} }
} }
// finishDispatching stops all the watchers that were supposed to be
// stopped in the meantime, but it was deferred to avoid closing input
// channels of watchers, as add() may still have writing to it.
// It also marks dispatching as false.
func (c *Cacher) finishDispatching() {
c.Lock()
defer c.Unlock()
c.dispatching = false
for _, watcher := range c.watchersToStop {
watcher.stop()
}
c.watchersToStop = c.watchersToStop[:0]
}
func (c *Cacher) terminateAllWatchers() { func (c *Cacher) terminateAllWatchers() {
c.Lock() c.Lock()
defer c.Unlock() defer c.Unlock()
c.watchers.terminateAll(c.objectType) c.watchers.terminateAll(c.objectType, c.stopWatcherThreadUnsafe)
}
func (c *Cacher) stopWatcherThreadUnsafe(watcher *cacheWatcher) {
if c.dispatching {
c.watchersToStop = append(c.watchersToStop, watcher)
} else {
watcher.stop()
}
} }
func (c *Cacher) isStopped() bool { func (c *Cacher) isStopped() bool {
@ -710,20 +768,15 @@ func (c *Cacher) Stop() {
c.stopWg.Wait() c.stopWg.Wait()
} }
func forgetWatcher(c *Cacher, index int, triggerValue string, triggerSupported bool) func(bool) { func forgetWatcher(c *Cacher, index int, triggerValue string, triggerSupported bool) func() {
return func(lock bool) { return func() {
if lock { c.Lock()
c.Lock() defer c.Unlock()
defer c.Unlock()
} else {
// false is currently passed only if we are forcing watcher to close due
// to its unresponsiveness and blocking other watchers.
// TODO: Get this information in cleaner way.
klog.V(1).Infof("Forcing watcher close due to unresponsiveness: %v", c.objectType.String())
}
// It's possible that the watcher is already not in the structure (e.g. in case of // It's possible that the watcher is already not in the structure (e.g. in case of
// simultaneous Stop() and terminateAllWatchers(), but it doesn't break anything. // simultaneous Stop() and terminateAllWatchers(), but it is safe to call stop()
c.watchers.deleteWatcher(index, triggerValue, triggerSupported) // on a watcher multiple times.
c.watchers.deleteWatcher(index, triggerValue, triggerSupported, c.stopWatcherThreadUnsafe)
} }
} }
@ -822,11 +875,11 @@ type cacheWatcher struct {
done chan struct{} done chan struct{}
filter filterWithAttrsFunc filter filterWithAttrsFunc
stopped bool stopped bool
forget func(bool) forget func()
versioner storage.Versioner versioner storage.Versioner
} }
func newCacheWatcher(resourceVersion uint64, chanSize int, initEvents []*watchCacheEvent, filter filterWithAttrsFunc, forget func(bool), versioner storage.Versioner) *cacheWatcher { func newCacheWatcher(resourceVersion uint64, chanSize int, initEvents []*watchCacheEvent, filter filterWithAttrsFunc, forget func(), versioner storage.Versioner) *cacheWatcher {
watcher := &cacheWatcher{ watcher := &cacheWatcher{
input: make(chan *watchCacheEvent, chanSize), input: make(chan *watchCacheEvent, chanSize),
result: make(chan watch.Event, chanSize), result: make(chan watch.Event, chanSize),
@ -847,8 +900,7 @@ func (c *cacheWatcher) ResultChan() <-chan watch.Event {
// Implements watch.Interface. // Implements watch.Interface.
func (c *cacheWatcher) Stop() { func (c *cacheWatcher) Stop() {
c.forget(true) c.forget()
c.stop()
} }
func (c *cacheWatcher) stop() { func (c *cacheWatcher) stop() {
@ -888,8 +940,8 @@ func (c *cacheWatcher) add(event *watchCacheEvent, timer *time.Timer, budget *ti
// This means that we couldn't send event to that watcher. // This means that we couldn't send event to that watcher.
// Since we don't want to block on it infinitely, // Since we don't want to block on it infinitely,
// we simply terminate it. // we simply terminate it.
c.forget(false) klog.V(1).Infof("Forcing watcher close due to unresponsiveness: %v", reflect.TypeOf(event.Object).String())
c.stop() c.forget()
} }
budget.returnUnused(timeout - time.Since(startTime)) budget.returnUnused(timeout - time.Since(startTime))
@ -982,11 +1034,7 @@ func (c *cacheWatcher) process(initEvents []*watchCacheEvent, resourceVersion ui
defer close(c.result) defer close(c.result)
defer c.Stop() defer c.Stop()
for { for event := range c.input {
event, ok := <-c.input
if !ok {
return
}
// only send events newer than resourceVersion // only send events newer than resourceVersion
if event.ResourceVersion > resourceVersion { if event.ResourceVersion > resourceVersion {
c.sendWatchCacheEvent(event) c.sendWatchCacheEvent(event)

View File

@ -45,12 +45,17 @@ import (
// the writes to cacheWatcher.result channel is blocked. // the writes to cacheWatcher.result channel is blocked.
func TestCacheWatcherCleanupNotBlockedByResult(t *testing.T) { func TestCacheWatcherCleanupNotBlockedByResult(t *testing.T) {
var lock sync.RWMutex var lock sync.RWMutex
var w *cacheWatcher
count := 0 count := 0
filter := func(string, labels.Set, fields.Set) bool { return true } filter := func(string, labels.Set, fields.Set) bool { return true }
forget := func(bool) { forget := func() {
lock.Lock() lock.Lock()
defer lock.Unlock() defer lock.Unlock()
count++ count++
// forget() has to stop the watcher, as only stopping the watcher
// triggers stopping the process() goroutine which we are in the
// end waiting for in this test.
w.stop()
} }
initEvents := []*watchCacheEvent{ initEvents := []*watchCacheEvent{
{Object: &v1.Pod{}}, {Object: &v1.Pod{}},
@ -58,7 +63,7 @@ func TestCacheWatcherCleanupNotBlockedByResult(t *testing.T) {
} }
// set the size of the buffer of w.result to 0, so that the writes to // set the size of the buffer of w.result to 0, so that the writes to
// w.result is blocked. // w.result is blocked.
w := newCacheWatcher(0, 0, initEvents, filter, forget, testVersioner{}) w = newCacheWatcher(0, 0, initEvents, filter, forget, testVersioner{})
w.Stop() w.Stop()
if err := wait.PollImmediate(1*time.Second, 5*time.Second, func() (bool, error) { if err := wait.PollImmediate(1*time.Second, 5*time.Second, func() (bool, error) {
lock.RLock() lock.RLock()
@ -73,7 +78,7 @@ func TestCacheWatcherHandlesFiltering(t *testing.T) {
filter := func(_ string, _ labels.Set, field fields.Set) bool { filter := func(_ string, _ labels.Set, field fields.Set) bool {
return field["spec.nodeName"] == "host" return field["spec.nodeName"] == "host"
} }
forget := func(bool) {} forget := func() {}
testCases := []struct { testCases := []struct {
events []*watchCacheEvent events []*watchCacheEvent