k3s/pkg/storage/etcd/etcd_watcher.go

379 lines
11 KiB
Go
Raw Normal View History

2014-08-25 00:29:30 +00:00
/*
Copyright 2014 The Kubernetes Authors All rights reserved.
2014-08-25 00:29:30 +00:00
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
2015-07-30 11:27:18 +00:00
package etcd
2014-08-25 00:29:30 +00:00
import (
"sync"
"time"
2014-08-25 00:29:30 +00:00
2015-08-05 22:03:47 +00:00
"k8s.io/kubernetes/pkg/api"
"k8s.io/kubernetes/pkg/runtime"
"k8s.io/kubernetes/pkg/storage"
"k8s.io/kubernetes/pkg/tools"
"k8s.io/kubernetes/pkg/util"
"k8s.io/kubernetes/pkg/watch"
2014-08-25 00:29:30 +00:00
"github.com/coreos/go-etcd/etcd"
"github.com/golang/glog"
)
// Etcd watch event actions
const (
EtcdCreate = "create"
EtcdGet = "get"
EtcdSet = "set"
EtcdCAS = "compareAndSwap"
EtcdDelete = "delete"
EtcdExpire = "expire"
)
// TransformFunc attempts to convert an object to another object for use with a watcher.
type TransformFunc func(runtime.Object) (runtime.Object, error)
2014-08-25 00:29:30 +00:00
// includeFunc returns true if the given key should be considered part of a watch
type includeFunc func(key string) bool
// exceptKey is an includeFunc that returns false when the provided key matches the watched key
func exceptKey(except string) includeFunc {
return func(key string) bool {
return key != except
}
}
2014-08-25 00:29:30 +00:00
// etcdWatcher converts a native etcd watch to a watch.Interface.
type etcdWatcher struct {
encoding runtime.Codec
2015-07-30 07:45:06 +00:00
versioner storage.Versioner
2014-08-25 00:29:30 +00:00
transform TransformFunc
list bool // If we're doing a recursive watch, should be true.
include includeFunc
2015-07-30 07:27:38 +00:00
filter storage.FilterFunc
2014-08-25 00:29:30 +00:00
etcdIncoming chan *etcd.Response
2014-09-22 23:12:32 +00:00
etcdError chan error
2014-08-25 00:29:30 +00:00
etcdStop chan bool
etcdCallEnded chan struct{}
outgoing chan watch.Event
userStop chan struct{}
stopped bool
stopLock sync.Mutex
// Injectable for testing. Send the event down the outgoing channel.
emit func(watch.Event)
cache etcdCache
2014-08-25 00:29:30 +00:00
}
// watchWaitDuration is the amount of time to wait for an error from watch.
const watchWaitDuration = 100 * time.Millisecond
2014-08-25 00:29:30 +00:00
// newEtcdWatcher returns a new etcdWatcher; if list is true, watch sub-nodes. If you provide a transform
// and a versioner, the versioner must be able to handle the objects that transform creates.
2015-07-30 07:45:06 +00:00
func newEtcdWatcher(list bool, include includeFunc, filter storage.FilterFunc, encoding runtime.Codec, versioner storage.Versioner, transform TransformFunc, cache etcdCache) *etcdWatcher {
2014-08-25 00:29:30 +00:00
w := &etcdWatcher{
2015-06-26 21:27:13 +00:00
encoding: encoding,
versioner: versioner,
transform: transform,
list: list,
include: include,
filter: filter,
// Buffer this channel, so that the etcd client is not forced
// to context switch with every object it gets, and so that a
// long time spent decoding an object won't block the *next*
// object. Basically, we see a lot of "401 window exceeded"
// errors from etcd, and that's due to the client not streaming
// results but rather getting them one at a time. So we really
// want to never block the etcd client, if possible. The 100 is
// mostly arbitrary--we know it goes as high as 50, though.
// There's a V(2) log message that prints the length so we can
// monitor how much of this buffer is actually used.
etcdIncoming: make(chan *etcd.Response, 100),
2014-09-22 23:12:32 +00:00
etcdError: make(chan error, 1),
etcdStop: make(chan bool),
outgoing: make(chan watch.Event),
userStop: make(chan struct{}),
cache: cache,
2014-08-25 00:29:30 +00:00
}
w.emit = func(e watch.Event) { w.outgoing <- e }
go w.translate()
return w
}
// etcdWatch calls etcd's Watch function, and handles any errors. Meant to be called
2014-09-22 23:12:32 +00:00
// as a goroutine.
2015-07-30 11:27:18 +00:00
func (w *etcdWatcher) etcdWatch(client tools.EtcdClient, key string, resourceVersion uint64) {
2014-08-25 00:29:30 +00:00
defer util.HandleCrash()
2014-09-22 23:12:32 +00:00
defer close(w.etcdError)
2014-08-25 00:29:30 +00:00
if resourceVersion == 0 {
2014-09-22 23:12:32 +00:00
latest, err := etcdGetInitialWatchState(client, key, w.list, w.etcdIncoming)
if err != nil {
w.etcdError <- err
2014-08-25 00:29:30 +00:00
return
}
resourceVersion = latest + 1
2014-08-25 00:29:30 +00:00
}
_, err := client.Watch(key, resourceVersion, w.list, w.etcdIncoming, w.etcdStop)
2014-09-22 23:12:32 +00:00
if err != nil && err != etcd.ErrWatchStoppedByUser {
w.etcdError <- err
2014-08-25 00:29:30 +00:00
}
}
// etcdGetInitialWatchState turns an etcd Get request into a watch equivalent
2015-07-30 11:27:18 +00:00
func etcdGetInitialWatchState(client tools.EtcdClient, key string, recursive bool, incoming chan<- *etcd.Response) (resourceVersion uint64, err error) {
2014-08-25 00:29:30 +00:00
resp, err := client.Get(key, false, recursive)
if err != nil {
if !IsEtcdNotFound(err) {
glog.Errorf("watch was unable to retrieve the current index for the provided key (%q): %v", key, err)
2014-09-22 23:12:32 +00:00
return resourceVersion, err
2014-08-25 00:29:30 +00:00
}
if index, ok := etcdErrorIndex(err); ok {
resourceVersion = index
2014-08-25 00:29:30 +00:00
}
2014-09-22 23:12:32 +00:00
return resourceVersion, nil
2014-08-25 00:29:30 +00:00
}
resourceVersion = resp.EtcdIndex
2014-08-25 00:29:30 +00:00
convertRecursiveResponse(resp.Node, resp, incoming)
return
}
// convertRecursiveResponse turns a recursive get response from etcd into individual response objects
// by copying the original response. This emulates the behavior of a recursive watch.
func convertRecursiveResponse(node *etcd.Node, response *etcd.Response, incoming chan<- *etcd.Response) {
if node.Dir {
for i := range node.Nodes {
convertRecursiveResponse(node.Nodes[i], response, incoming)
}
return
}
copied := *response
copied.Action = "get"
copied.Node = node
incoming <- &copied
}
var (
watchChannelHWM util.HighWaterMark
)
2014-09-22 23:12:32 +00:00
// translate pulls stuff from etcd, converts, and pushes out the outgoing channel. Meant to be
2014-08-25 00:29:30 +00:00
// called as a goroutine.
func (w *etcdWatcher) translate() {
defer close(w.outgoing)
defer util.HandleCrash()
for {
select {
2014-09-22 23:12:32 +00:00
case err := <-w.etcdError:
if err != nil {
w.emit(watch.Event{
2015-08-08 01:52:23 +00:00
Type: watch.Error,
Object: &api.Status{
2014-09-22 23:12:32 +00:00
Status: api.StatusFailure,
Message: err.Error(),
},
})
}
2014-08-25 00:29:30 +00:00
return
case <-w.userStop:
w.etcdStop <- true
return
case res, ok := <-w.etcdIncoming:
2014-09-22 23:12:32 +00:00
if ok {
if curLen := int64(len(w.etcdIncoming)); watchChannelHWM.Check(curLen) {
2015-06-26 21:27:13 +00:00
// Monitor if this gets backed up, and how much.
glog.V(2).Infof("watch: %v objects queued in channel.", curLen)
2015-06-26 21:27:13 +00:00
}
2014-09-22 23:12:32 +00:00
w.sendResult(res)
2014-08-25 00:29:30 +00:00
}
2014-09-22 23:12:32 +00:00
// If !ok, don't return here-- must wait for etcdError channel
// to give an error or be closed.
2014-08-25 00:29:30 +00:00
}
}
}
func (w *etcdWatcher) decodeObject(node *etcd.Node) (runtime.Object, error) {
if obj, found := w.cache.getFromCache(node.ModifiedIndex); found {
return obj, nil
}
obj, err := w.encoding.Decode([]byte(node.Value))
2014-08-25 00:29:30 +00:00
if err != nil {
return nil, err
}
// ensure resource version is set on the object we load from etcd
if w.versioner != nil {
2015-06-19 00:42:01 +00:00
if err := w.versioner.UpdateObject(obj, node.Expiration, node.ModifiedIndex); err != nil {
glog.Errorf("failure to version api object (%d) %#v: %v", node.ModifiedIndex, obj, err)
2014-08-25 00:29:30 +00:00
}
}
// perform any necessary transformation
if w.transform != nil {
obj, err = w.transform(obj)
if err != nil {
glog.Errorf("failure to transform api object %#v: %v", obj, err)
return nil, err
}
}
if node.ModifiedIndex != 0 {
w.cache.addToCache(node.ModifiedIndex, obj)
}
2014-08-25 00:29:30 +00:00
return obj, nil
}
func (w *etcdWatcher) sendAdd(res *etcd.Response) {
2014-08-25 00:29:30 +00:00
if res.Node == nil {
glog.Errorf("unexpected nil node: %#v", res)
return
}
if w.include != nil && !w.include(res.Node.Key) {
return
}
obj, err := w.decodeObject(res.Node)
2014-08-25 00:29:30 +00:00
if err != nil {
glog.Errorf("failure to decode api object: '%v' from %#v %#v", string(res.Node.Value), res, res.Node)
2014-08-25 00:29:30 +00:00
// TODO: expose an error through watch.Interface?
// Ignore this value. If we stop the watch on a bad value, a client that uses
// the resourceVersion to resume will never be able to get past a bad value.
return
}
if !w.filter(obj) {
return
}
action := watch.Added
if res.Node.ModifiedIndex != res.Node.CreatedIndex {
action = watch.Modified
}
w.emit(watch.Event{
Type: action,
Object: obj,
})
}
func (w *etcdWatcher) sendModify(res *etcd.Response) {
if res.Node == nil {
glog.Errorf("unexpected nil node: %#v", res)
return
}
if w.include != nil && !w.include(res.Node.Key) {
return
}
curObj, err := w.decodeObject(res.Node)
2014-08-25 00:29:30 +00:00
if err != nil {
glog.Errorf("failure to decode api object: '%v' from %#v %#v", string(res.Node.Value), res, res.Node)
2014-08-25 00:29:30 +00:00
// TODO: expose an error through watch.Interface?
// Ignore this value. If we stop the watch on a bad value, a client that uses
// the resourceVersion to resume will never be able to get past a bad value.
return
}
curObjPasses := w.filter(curObj)
oldObjPasses := false
var oldObj runtime.Object
2014-08-25 00:29:30 +00:00
if res.PrevNode != nil && res.PrevNode.Value != "" {
// Ignore problems reading the old object.
if oldObj, err = w.decodeObject(res.PrevNode); err == nil {
2014-08-25 00:29:30 +00:00
oldObjPasses = w.filter(oldObj)
}
}
// Some changes to an object may cause it to start or stop matching a filter.
// We need to report those as adds/deletes. So we have to check both the previous
// and current value of the object.
switch {
case curObjPasses && oldObjPasses:
w.emit(watch.Event{
Type: watch.Modified,
Object: curObj,
})
case curObjPasses && !oldObjPasses:
w.emit(watch.Event{
Type: watch.Added,
Object: curObj,
})
case !curObjPasses && oldObjPasses:
w.emit(watch.Event{
Type: watch.Deleted,
Object: oldObj,
})
}
// Do nothing if neither new nor old object passed the filter.
}
func (w *etcdWatcher) sendDelete(res *etcd.Response) {
if res.PrevNode == nil {
glog.Errorf("unexpected nil prev node: %#v", res)
return
}
if w.include != nil && !w.include(res.PrevNode.Key) {
return
}
node := *res.PrevNode
if res.Node != nil {
// Note that this sends the *old* object with the etcd index for the time at
// which it gets deleted. This will allow users to restart the watch at the right
// index.
node.ModifiedIndex = res.Node.ModifiedIndex
}
obj, err := w.decodeObject(&node)
2014-08-25 00:29:30 +00:00
if err != nil {
glog.Errorf("failure to decode api object: '%v' from %#v %#v", string(res.PrevNode.Value), res, res.PrevNode)
2014-08-25 00:29:30 +00:00
// TODO: expose an error through watch.Interface?
// Ignore this value. If we stop the watch on a bad value, a client that uses
// the resourceVersion to resume will never be able to get past a bad value.
return
}
if !w.filter(obj) {
return
}
w.emit(watch.Event{
Type: watch.Deleted,
Object: obj,
})
}
func (w *etcdWatcher) sendResult(res *etcd.Response) {
switch res.Action {
case EtcdCreate, EtcdGet:
w.sendAdd(res)
case EtcdSet, EtcdCAS:
2014-08-25 00:29:30 +00:00
w.sendModify(res)
case EtcdDelete, EtcdExpire:
2014-08-25 00:29:30 +00:00
w.sendDelete(res)
default:
glog.Errorf("unknown action: %v", res.Action)
}
}
// ResultChan implements watch.Interface.
2014-08-25 00:29:30 +00:00
func (w *etcdWatcher) ResultChan() <-chan watch.Event {
return w.outgoing
}
// Stop implements watch.Interface.
func (w *etcdWatcher) Stop() {
w.stopLock.Lock()
defer w.stopLock.Unlock()
// Prevent double channel closes.
if !w.stopped {
w.stopped = true
close(w.userStop)
}
}