mirror of https://github.com/k3s-io/k3s
Merge pull request #3381 from a-robinson/events2
Improve client recording of events such that clients arepull/6/head
commit
2bee4ac572
|
@ -18,6 +18,7 @@ package record
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"time"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||
|
@ -30,8 +31,9 @@ import (
|
|||
"github.com/golang/glog"
|
||||
)
|
||||
|
||||
// retryEventSleep is the time between record failures to retry. Available for test alteration.
|
||||
var retryEventSleep = 1 * time.Second
|
||||
const maxTriesPerEvent = 12
|
||||
|
||||
var sleepDuration = 10 * time.Second
|
||||
|
||||
// EventRecorder knows how to store events (client.Client implements it.)
|
||||
// EventRecorder must respect the namespace that will be embedded in 'event'.
|
||||
|
@ -46,49 +48,68 @@ type EventRecorder interface {
|
|||
// or used to stop recording, if desired.
|
||||
// TODO: make me an object with parameterizable queue length and retry interval
|
||||
func StartRecording(recorder EventRecorder, source api.EventSource) watch.Interface {
|
||||
// The default math/rand package functions aren't thread safe, so create a
|
||||
// new Rand object for each StartRecording call.
|
||||
randGen := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
return GetEvents(func(event *api.Event) {
|
||||
// Make a copy before modification, because there could be multiple listeners.
|
||||
// Events are safe to copy like this.
|
||||
eventCopy := *event
|
||||
event = &eventCopy
|
||||
event.Source = source
|
||||
try := 0
|
||||
|
||||
tries := 0
|
||||
for {
|
||||
try++
|
||||
_, err := recorder.Create(event)
|
||||
if err == nil {
|
||||
if recordEvent(recorder, event) {
|
||||
break
|
||||
}
|
||||
// If we can't contact the server, then hold everything while we keep trying.
|
||||
// Otherwise, something about the event is malformed and we should abandon it.
|
||||
giveUp := false
|
||||
switch err.(type) {
|
||||
case *client.RequestConstructionError:
|
||||
// We will construct the request the same next time, so don't keep trying.
|
||||
giveUp = true
|
||||
case *errors.StatusError:
|
||||
// This indicates that the server understood and rejected our request.
|
||||
giveUp = true
|
||||
case *errors.UnexpectedObjectError:
|
||||
// We don't expect this; it implies the server's response didn't match a
|
||||
// known pattern. Go ahead and retry.
|
||||
default:
|
||||
// This case includes actual http transport errors. Go ahead and retry.
|
||||
}
|
||||
if giveUp {
|
||||
glog.Errorf("Unable to write event '%#v': '%v' (will not retry!)", event, err)
|
||||
tries++
|
||||
if tries >= maxTriesPerEvent {
|
||||
glog.Errorf("Unable to write event '%#v' (retry limit exceeded!)", event)
|
||||
break
|
||||
}
|
||||
if try >= 3 {
|
||||
glog.Errorf("Unable to write event '%#v': '%v' (retry limit exceeded!)", event, err)
|
||||
break
|
||||
// Randomize the first sleep so that various clients won't all be
|
||||
// synced up if the master goes down.
|
||||
if tries == 1 {
|
||||
time.Sleep(time.Duration(float64(sleepDuration) * randGen.Float64()))
|
||||
} else {
|
||||
time.Sleep(sleepDuration)
|
||||
}
|
||||
glog.Errorf("Unable to write event: '%v' (will retry in 1 second)", err)
|
||||
time.Sleep(retryEventSleep)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// recordEvent attempts to write event to recorder. It returns true if the event
|
||||
// was successfully recorded or discarded, false if it should be retried.
|
||||
func recordEvent(recorder EventRecorder, event *api.Event) bool {
|
||||
_, err := recorder.Create(event)
|
||||
if err == nil {
|
||||
return true
|
||||
}
|
||||
// If we can't contact the server, then hold everything while we keep trying.
|
||||
// Otherwise, something about the event is malformed and we should abandon it.
|
||||
giveUp := false
|
||||
switch err.(type) {
|
||||
case *client.RequestConstructionError:
|
||||
// We will construct the request the same next time, so don't keep trying.
|
||||
giveUp = true
|
||||
case *errors.StatusError:
|
||||
// This indicates that the server understood and rejected our request.
|
||||
giveUp = true
|
||||
case *errors.UnexpectedObjectError:
|
||||
// We don't expect this; it implies the server's response didn't match a
|
||||
// known pattern. Go ahead and retry.
|
||||
default:
|
||||
// This case includes actual http transport errors. Go ahead and retry.
|
||||
}
|
||||
if giveUp {
|
||||
glog.Errorf("Unable to write event '%#v': '%v' (will not retry!)", event, err)
|
||||
return true
|
||||
}
|
||||
glog.Errorf("Unable to write event: '%v' (may retry after sleeping)", err)
|
||||
return false
|
||||
}
|
||||
|
||||
// StartLogging just logs local events, using the given logging function. The
|
||||
// return value can be ignored or used to stop logging, if desired.
|
||||
func StartLogging(logf func(format string, args ...interface{})) watch.Interface {
|
||||
|
@ -120,9 +141,9 @@ func GetEvents(f func(*api.Event)) watch.Interface {
|
|||
return w
|
||||
}
|
||||
|
||||
const queueLen = 1000
|
||||
const maxQueuedEvents = 1000
|
||||
|
||||
var events = watch.NewBroadcaster(queueLen)
|
||||
var events = watch.NewBroadcaster(maxQueuedEvents, watch.DropIfChannelFull)
|
||||
|
||||
// Event constructs an event from the given information and puts it in the queue for sending.
|
||||
// 'object' is the object this event is about. Event will make a reference-- or you may also
|
||||
|
|
|
@ -19,9 +19,9 @@ package record
|
|||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
|
||||
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/errors"
|
||||
|
@ -31,7 +31,8 @@ import (
|
|||
)
|
||||
|
||||
func init() {
|
||||
retryEventSleep = 1 * time.Microsecond
|
||||
// Don't bother sleeping between retries.
|
||||
sleepDuration = 0
|
||||
}
|
||||
|
||||
type testEventRecorder struct {
|
||||
|
@ -188,12 +189,12 @@ func TestWriteEventError(t *testing.T) {
|
|||
},
|
||||
"retry1": {
|
||||
timesToSendError: 1000,
|
||||
attemptsWanted: 3,
|
||||
attemptsWanted: 12,
|
||||
err: &errors.UnexpectedObjectError{},
|
||||
},
|
||||
"retry2": {
|
||||
timesToSendError: 1000,
|
||||
attemptsWanted: 3,
|
||||
attemptsWanted: 12,
|
||||
err: fmt.Errorf("A weird error"),
|
||||
},
|
||||
"succeedEventually": {
|
||||
|
@ -238,3 +239,54 @@ func TestWriteEventError(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestLotsOfEvents(t *testing.T) {
|
||||
recorderCalled := make(chan struct{})
|
||||
loggerCalled := make(chan struct{})
|
||||
|
||||
// Fail each event a few times to ensure there's some load on the tested code.
|
||||
var counts [1000]int
|
||||
testEvents := testEventRecorder{
|
||||
OnEvent: func(event *api.Event) (*api.Event, error) {
|
||||
num, err := strconv.Atoi(event.Message)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return event, nil
|
||||
}
|
||||
counts[num]++
|
||||
if counts[num] < 5 {
|
||||
return nil, fmt.Errorf("fake error")
|
||||
}
|
||||
recorderCalled <- struct{}{}
|
||||
return event, nil
|
||||
},
|
||||
}
|
||||
recorder := StartRecording(&testEvents, api.EventSource{Component: "eventTest"})
|
||||
logger := StartLogging(func(formatter string, args ...interface{}) {
|
||||
loggerCalled <- struct{}{}
|
||||
})
|
||||
|
||||
ref := &api.ObjectReference{
|
||||
Kind: "Pod",
|
||||
Name: "foo",
|
||||
Namespace: "baz",
|
||||
UID: "bar",
|
||||
APIVersion: "v1beta1",
|
||||
}
|
||||
for i := 0; i < maxQueuedEvents; i++ {
|
||||
go Event(ref, "Status", "Reason", strconv.Itoa(i))
|
||||
}
|
||||
// Make sure no events were dropped by either of the listeners.
|
||||
for i := 0; i < maxQueuedEvents; i++ {
|
||||
<-recorderCalled
|
||||
<-loggerCalled
|
||||
}
|
||||
// Make sure that every event was attempted 5 times
|
||||
for i := 0; i < maxQueuedEvents; i++ {
|
||||
if counts[i] < 5 {
|
||||
t.Errorf("Only attempted to record event '%d' %d times.", i, counts[i])
|
||||
}
|
||||
}
|
||||
recorder.Stop()
|
||||
logger.Stop()
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ type GenericRegistry struct {
|
|||
func NewGeneric(list runtime.Object) *GenericRegistry {
|
||||
return &GenericRegistry{
|
||||
ObjectList: list,
|
||||
Broadcaster: watch.NewBroadcaster(0),
|
||||
Broadcaster: watch.NewBroadcaster(0, watch.WaitIfChannelFull),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -36,7 +36,7 @@ type PodRegistry struct {
|
|||
func NewPodRegistry(pods *api.PodList) *PodRegistry {
|
||||
return &PodRegistry{
|
||||
Pods: pods,
|
||||
broadcaster: watch.NewBroadcaster(0),
|
||||
broadcaster: watch.NewBroadcaster(0, watch.WaitIfChannelFull),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -22,6 +22,20 @@ import (
|
|||
"github.com/GoogleCloudPlatform/kubernetes/pkg/runtime"
|
||||
)
|
||||
|
||||
// FullChannelBehavior controls how the Broadcaster reacts if a watcher's watch
|
||||
// channel is full.
|
||||
type FullChannelBehavior int
|
||||
|
||||
const (
|
||||
WaitIfChannelFull FullChannelBehavior = iota
|
||||
DropIfChannelFull
|
||||
)
|
||||
|
||||
// Buffer the incoming queue a little bit even though it should rarely ever accumulate
|
||||
// anything, just in case a few events are received in such a short window that
|
||||
// Broadcaster can't move them onto the watchers' queues fast enough.
|
||||
const incomingQueueLength = 25
|
||||
|
||||
// Broadcaster distributes event notifications among any number of watchers. Every event
|
||||
// is delivered to every watcher.
|
||||
type Broadcaster struct {
|
||||
|
@ -31,17 +45,27 @@ type Broadcaster struct {
|
|||
nextWatcher int64
|
||||
|
||||
incoming chan Event
|
||||
|
||||
// How large to make watcher's channel.
|
||||
watchQueueLength int
|
||||
// If one of the watch channels is full, don't wait for it to become empty.
|
||||
// Instead just deliver it to the watchers that do have space in their
|
||||
// channels and move on to the next event.
|
||||
// It's more fair to do this on a per-watcher basis than to do it on the
|
||||
// "incoming" channel, which would allow one slow watcher to prevent all
|
||||
// other watchers from getting new events.
|
||||
fullChannelBehavior FullChannelBehavior
|
||||
}
|
||||
|
||||
// NewBroadcaster creates a new Broadcaster. queueLength is the maximum number of events to queue.
|
||||
// When queueLength is 0, Action will block until any prior event has been
|
||||
// completely distributed. It is guaranteed that events will be distibuted in the
|
||||
// order in which they ocurr, but the order in which a single event is distributed
|
||||
// among all of the watchers is unspecified.
|
||||
func NewBroadcaster(queueLength int) *Broadcaster {
|
||||
// NewBroadcaster creates a new Broadcaster. queueLength is the maximum number of events to queue per watcher.
|
||||
// It is guaranteed that events will be distibuted in the order in which they ocur,
|
||||
// but the order in which a single event is distributed among all of the watchers is unspecified.
|
||||
func NewBroadcaster(queueLength int, fullChannelBehavior FullChannelBehavior) *Broadcaster {
|
||||
m := &Broadcaster{
|
||||
watchers: map[int64]*broadcasterWatcher{},
|
||||
incoming: make(chan Event, queueLength),
|
||||
watchers: map[int64]*broadcasterWatcher{},
|
||||
incoming: make(chan Event, incomingQueueLength),
|
||||
watchQueueLength: queueLength,
|
||||
fullChannelBehavior: fullChannelBehavior,
|
||||
}
|
||||
go m.loop()
|
||||
return m
|
||||
|
@ -56,7 +80,7 @@ func (m *Broadcaster) Watch() Interface {
|
|||
id := m.nextWatcher
|
||||
m.nextWatcher++
|
||||
w := &broadcasterWatcher{
|
||||
result: make(chan Event),
|
||||
result: make(chan Event, m.watchQueueLength),
|
||||
stopped: make(chan struct{}),
|
||||
id: id,
|
||||
m: m,
|
||||
|
@ -119,10 +143,20 @@ func (m *Broadcaster) loop() {
|
|||
func (m *Broadcaster) distribute(event Event) {
|
||||
m.lock.Lock()
|
||||
defer m.lock.Unlock()
|
||||
for _, w := range m.watchers {
|
||||
select {
|
||||
case w.result <- event:
|
||||
case <-w.stopped:
|
||||
if m.fullChannelBehavior == DropIfChannelFull {
|
||||
for _, w := range m.watchers {
|
||||
select {
|
||||
case w.result <- event:
|
||||
case <-w.stopped:
|
||||
default: // Don't block if the event can't be queued.
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for _, w := range m.watchers {
|
||||
select {
|
||||
case w.result <- event:
|
||||
case <-w.stopped:
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ func TestBroadcaster(t *testing.T) {
|
|||
}
|
||||
|
||||
// The broadcaster we're testing
|
||||
m := NewBroadcaster(0)
|
||||
m := NewBroadcaster(0, WaitIfChannelFull)
|
||||
|
||||
// Add a bunch of watchers
|
||||
const testWatchers = 2
|
||||
|
@ -77,7 +77,7 @@ func TestBroadcaster(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestBroadcasterWatcherClose(t *testing.T) {
|
||||
m := NewBroadcaster(0)
|
||||
m := NewBroadcaster(0, WaitIfChannelFull)
|
||||
w := m.Watch()
|
||||
w2 := m.Watch()
|
||||
w.Stop()
|
||||
|
@ -95,7 +95,7 @@ func TestBroadcasterWatcherClose(t *testing.T) {
|
|||
|
||||
func TestBroadcasterWatcherStopDeadlock(t *testing.T) {
|
||||
done := make(chan bool)
|
||||
m := NewBroadcaster(0)
|
||||
m := NewBroadcaster(0, WaitIfChannelFull)
|
||||
go func(w0, w1 Interface) {
|
||||
// We know Broadcaster is in the distribute loop once one watcher receives
|
||||
// an event. Stop the other watcher while distribute is trying to
|
||||
|
@ -116,3 +116,52 @@ func TestBroadcasterWatcherStopDeadlock(t *testing.T) {
|
|||
}
|
||||
m.Shutdown()
|
||||
}
|
||||
|
||||
func TestBroadcasterDropIfChannelFull(t *testing.T) {
|
||||
m := NewBroadcaster(1, DropIfChannelFull)
|
||||
|
||||
event1 := Event{Added, &myType{"foo", "hello world 1"}}
|
||||
event2 := Event{Added, &myType{"bar", "hello world 2"}}
|
||||
|
||||
// Add a couple watchers
|
||||
const testWatchers = 2
|
||||
watches := make([]Interface, testWatchers)
|
||||
for i := 0; i < testWatchers; i++ {
|
||||
watches[i] = m.Watch()
|
||||
}
|
||||
|
||||
// Send a couple events before closing the broadcast channel.
|
||||
t.Log("Sending event 1")
|
||||
m.Action(event1.Type, event1.Object)
|
||||
t.Log("Sending event 2")
|
||||
m.Action(event2.Type, event2.Object)
|
||||
m.Shutdown()
|
||||
|
||||
// Pull events from the queue.
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(testWatchers)
|
||||
for i := 0; i < testWatchers; i++ {
|
||||
// Verify that each watcher only gets the first event because its watch
|
||||
// queue of length one was full from the first one.
|
||||
go func(watcher int, w Interface) {
|
||||
defer wg.Done()
|
||||
e1, ok := <-w.ResultChan()
|
||||
if !ok {
|
||||
t.Error("Watcher %v failed to retrieve first event.")
|
||||
return
|
||||
}
|
||||
if e, a := event1, e1; !reflect.DeepEqual(e, a) {
|
||||
t.Errorf("Watcher %v: Expected (%v, %#v), got (%v, %#v)",
|
||||
watcher, e.Type, e.Object, a.Type, a.Object)
|
||||
} else {
|
||||
t.Logf("Got (%v, %#v)", e1.Type, e1.Object)
|
||||
}
|
||||
e2, ok := <-w.ResultChan()
|
||||
if ok {
|
||||
t.Error("Watcher %v received second event (%v, %#v) even though it shouldn't have.",
|
||||
watcher, e2.Type, e2.Object)
|
||||
}
|
||||
}(i, watches[i])
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue