mirror of https://github.com/k3s-io/k3s
348 lines
9.6 KiB
Go
348 lines
9.6 KiB
Go
/*
|
|
Copyright 2016 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package etcd3
|
|
|
|
import (
|
|
"fmt"
|
|
"net/http"
|
|
"strings"
|
|
"sync"
|
|
|
|
"k8s.io/kubernetes/pkg/api/unversioned"
|
|
"k8s.io/kubernetes/pkg/runtime"
|
|
"k8s.io/kubernetes/pkg/storage"
|
|
"k8s.io/kubernetes/pkg/watch"
|
|
|
|
"github.com/coreos/etcd/clientv3"
|
|
etcdrpc "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
|
|
"github.com/golang/glog"
|
|
"golang.org/x/net/context"
|
|
"google.golang.org/grpc"
|
|
"google.golang.org/grpc/codes"
|
|
)
|
|
|
|
const (
|
|
// We have set a buffer in order to reduce times of context switches.
|
|
incomingBufSize = 100
|
|
outgoingBufSize = 100
|
|
)
|
|
|
|
type watcher struct {
|
|
client *clientv3.Client
|
|
codec runtime.Codec
|
|
versioner storage.Versioner
|
|
}
|
|
|
|
// watchChan implements watch.Interface.
|
|
type watchChan struct {
|
|
watcher *watcher
|
|
key string
|
|
initialRev int64
|
|
recursive bool
|
|
filter storage.Filter
|
|
ctx context.Context
|
|
cancel context.CancelFunc
|
|
incomingEventChan chan *event
|
|
resultChan chan watch.Event
|
|
errChan chan error
|
|
}
|
|
|
|
func newWatcher(client *clientv3.Client, codec runtime.Codec, versioner storage.Versioner) *watcher {
|
|
return &watcher{
|
|
client: client,
|
|
codec: codec,
|
|
versioner: versioner,
|
|
}
|
|
}
|
|
|
|
// Watch watches on a key and returns a watch.Interface that transfers relevant notifications.
|
|
// If rev is zero, it will return the existing object(s) and then start watching from
|
|
// the maximum revision+1 from returned objects.
|
|
// If rev is non-zero, it will watch events happened after given revision.
|
|
// If recursive is false, it watches on given key.
|
|
// If recursive is true, it watches any children and directories under the key, excluding the root key itself.
|
|
// filter must be non-nil. Only if filter returns true will the changes be returned.
|
|
func (w *watcher) Watch(ctx context.Context, key string, rev int64, recursive bool, filter storage.Filter) (watch.Interface, error) {
|
|
if recursive && !strings.HasSuffix(key, "/") {
|
|
key += "/"
|
|
}
|
|
wc := w.createWatchChan(ctx, key, rev, recursive, filter)
|
|
go wc.run()
|
|
return wc, nil
|
|
}
|
|
|
|
func (w *watcher) createWatchChan(ctx context.Context, key string, rev int64, recursive bool, filter storage.Filter) *watchChan {
|
|
wc := &watchChan{
|
|
watcher: w,
|
|
key: key,
|
|
initialRev: rev,
|
|
recursive: recursive,
|
|
filter: filter,
|
|
incomingEventChan: make(chan *event, incomingBufSize),
|
|
resultChan: make(chan watch.Event, outgoingBufSize),
|
|
errChan: make(chan error, 1),
|
|
}
|
|
wc.ctx, wc.cancel = context.WithCancel(ctx)
|
|
return wc
|
|
}
|
|
|
|
func (wc *watchChan) run() {
|
|
go wc.startWatching()
|
|
|
|
var resultChanWG sync.WaitGroup
|
|
resultChanWG.Add(1)
|
|
go wc.processEvent(&resultChanWG)
|
|
|
|
select {
|
|
case err := <-wc.errChan:
|
|
errResult := parseError(err)
|
|
if errResult != nil {
|
|
// error result is guaranteed to be received by user before closing ResultChan.
|
|
select {
|
|
case wc.resultChan <- *errResult:
|
|
case <-wc.ctx.Done(): // user has given up all results
|
|
}
|
|
}
|
|
wc.cancel()
|
|
case <-wc.ctx.Done():
|
|
}
|
|
// we need to wait until resultChan wouldn't be sent to anymore
|
|
resultChanWG.Wait()
|
|
close(wc.resultChan)
|
|
}
|
|
|
|
func (wc *watchChan) Stop() {
|
|
wc.cancel()
|
|
}
|
|
|
|
func (wc *watchChan) ResultChan() <-chan watch.Event {
|
|
return wc.resultChan
|
|
}
|
|
|
|
// sync tries to retrieve existing data and send them to process.
|
|
// The revision to watch will be set to the revision in response.
|
|
func (wc *watchChan) sync() error {
|
|
opts := []clientv3.OpOption{}
|
|
if wc.recursive {
|
|
opts = append(opts, clientv3.WithPrefix())
|
|
}
|
|
getResp, err := wc.watcher.client.Get(wc.ctx, wc.key, opts...)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
wc.initialRev = getResp.Header.Revision
|
|
|
|
for _, kv := range getResp.Kvs {
|
|
wc.sendEvent(parseKV(kv))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// startWatching does:
|
|
// - get current objects if initialRev=0; set initialRev to current rev
|
|
// - watch on given key and send events to process.
|
|
func (wc *watchChan) startWatching() {
|
|
if wc.initialRev == 0 {
|
|
if err := wc.sync(); err != nil {
|
|
wc.sendError(err)
|
|
return
|
|
}
|
|
}
|
|
opts := []clientv3.OpOption{clientv3.WithRev(wc.initialRev + 1)}
|
|
if wc.recursive {
|
|
opts = append(opts, clientv3.WithPrefix())
|
|
}
|
|
wch := wc.watcher.client.Watch(wc.ctx, wc.key, opts...)
|
|
for wres := range wch {
|
|
if wres.Err() != nil {
|
|
// If there is an error on server (e.g. compaction), the channel will return it before closed.
|
|
wc.sendError(wres.Err())
|
|
return
|
|
}
|
|
for _, e := range wres.Events {
|
|
wc.sendEvent(parseEvent(e))
|
|
}
|
|
}
|
|
}
|
|
|
|
// processEvent processes events from etcd watcher and sends results to resultChan.
|
|
func (wc *watchChan) processEvent(wg *sync.WaitGroup) {
|
|
defer wg.Done()
|
|
|
|
for {
|
|
select {
|
|
case e := <-wc.incomingEventChan:
|
|
res := wc.transform(e)
|
|
if res == nil {
|
|
continue
|
|
}
|
|
// If user couldn't receive results fast enough, we also block incoming events from watcher.
|
|
// Because storing events in local will cause more memory usage.
|
|
// The worst case would be closing the fast watcher.
|
|
select {
|
|
case wc.resultChan <- *res:
|
|
case <-wc.ctx.Done():
|
|
return
|
|
}
|
|
case <-wc.ctx.Done():
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// transform transforms an event into a result for user if not filtered.
|
|
// TODO (Optimization):
|
|
// - Save remote round-trip.
|
|
// Currently, DELETE and PUT event don't contain the previous value.
|
|
// We need to do another Get() in order to get previous object and have logic upon it.
|
|
// We could potentially do some optimizations:
|
|
// - For PUT, we can save current and previous objects into the value.
|
|
// - For DELETE, See https://github.com/coreos/etcd/issues/4620
|
|
func (wc *watchChan) transform(e *event) (res *watch.Event) {
|
|
curObj, oldObj, err := prepareObjs(wc.ctx, e, wc.watcher.client, wc.watcher.codec, wc.watcher.versioner)
|
|
if err != nil {
|
|
wc.sendError(err)
|
|
return nil
|
|
}
|
|
|
|
switch {
|
|
case e.isDeleted:
|
|
if !wc.filter.Filter(oldObj) {
|
|
return nil
|
|
}
|
|
res = &watch.Event{
|
|
Type: watch.Deleted,
|
|
Object: oldObj,
|
|
}
|
|
case e.isCreated:
|
|
if !wc.filter.Filter(curObj) {
|
|
return nil
|
|
}
|
|
res = &watch.Event{
|
|
Type: watch.Added,
|
|
Object: curObj,
|
|
}
|
|
default:
|
|
curObjPasses := wc.filter.Filter(curObj)
|
|
oldObjPasses := wc.filter.Filter(oldObj)
|
|
switch {
|
|
case curObjPasses && oldObjPasses:
|
|
res = &watch.Event{
|
|
Type: watch.Modified,
|
|
Object: curObj,
|
|
}
|
|
case curObjPasses && !oldObjPasses:
|
|
res = &watch.Event{
|
|
Type: watch.Added,
|
|
Object: curObj,
|
|
}
|
|
case !curObjPasses && oldObjPasses:
|
|
res = &watch.Event{
|
|
Type: watch.Deleted,
|
|
Object: oldObj,
|
|
}
|
|
}
|
|
}
|
|
return res
|
|
}
|
|
|
|
func parseError(err error) *watch.Event {
|
|
var status *unversioned.Status
|
|
switch {
|
|
case err == etcdrpc.ErrCompacted:
|
|
status = &unversioned.Status{
|
|
Status: unversioned.StatusFailure,
|
|
Message: err.Error(),
|
|
Code: http.StatusGone,
|
|
Reason: unversioned.StatusReasonExpired,
|
|
}
|
|
default:
|
|
status = &unversioned.Status{
|
|
Status: unversioned.StatusFailure,
|
|
Message: err.Error(),
|
|
Code: http.StatusInternalServerError,
|
|
Reason: unversioned.StatusReasonInternalError,
|
|
}
|
|
}
|
|
|
|
return &watch.Event{
|
|
Type: watch.Error,
|
|
Object: status,
|
|
}
|
|
}
|
|
|
|
func (wc *watchChan) sendError(err error) {
|
|
// Context.canceled is an expected behavior.
|
|
// We should just stop all goroutines in watchChan without returning error.
|
|
// TODO: etcd client should return context.Canceled instead of grpc specific error.
|
|
if grpc.Code(err) == codes.Canceled || err == context.Canceled {
|
|
return
|
|
}
|
|
select {
|
|
case wc.errChan <- err:
|
|
case <-wc.ctx.Done():
|
|
}
|
|
}
|
|
|
|
func (wc *watchChan) sendEvent(e *event) {
|
|
if len(wc.incomingEventChan) == incomingBufSize {
|
|
glog.V(2).Infof("Fast watcher, slow processing. Number of buffered events: %d."+
|
|
"Probably caused by slow decoding, user not receiving fast, or other processing logic",
|
|
incomingBufSize)
|
|
}
|
|
select {
|
|
case wc.incomingEventChan <- e:
|
|
case <-wc.ctx.Done():
|
|
}
|
|
}
|
|
|
|
func prepareObjs(ctx context.Context, e *event, client *clientv3.Client, codec runtime.Codec, versioner storage.Versioner) (curObj runtime.Object, oldObj runtime.Object, err error) {
|
|
if !e.isDeleted {
|
|
curObj, err = decodeObj(codec, versioner, e.value, e.rev)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
}
|
|
if e.isDeleted || !e.isCreated {
|
|
getResp, err := client.Get(ctx, e.key, clientv3.WithRev(e.rev-1))
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
// Note that this sends the *old* object with the etcd revision for the time at
|
|
// which it gets deleted.
|
|
// We assume old object is returned only in Deleted event. Users (e.g. cacher) need
|
|
// to have larger than previous rev to tell the ordering.
|
|
oldObj, err = decodeObj(codec, versioner, getResp.Kvs[0].Value, e.rev)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
}
|
|
return curObj, oldObj, nil
|
|
}
|
|
|
|
func decodeObj(codec runtime.Codec, versioner storage.Versioner, data []byte, rev int64) (runtime.Object, error) {
|
|
obj, err := runtime.Decode(codec, []byte(data))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
// ensure resource version is set on the object we load from etcd
|
|
if err := versioner.UpdateObject(obj, uint64(rev)); err != nil {
|
|
return nil, fmt.Errorf("failure to version api object (%d) %#v: %v", rev, obj, err)
|
|
}
|
|
return obj, nil
|
|
}
|