2019-01-12 04:58:27 +00:00
|
|
|
/*
|
|
|
|
Copyright 2016 The Kubernetes Authors.
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package etcd3
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"context"
|
|
|
|
"encoding/base64"
|
|
|
|
"encoding/json"
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"k8s.io/klog"
|
|
|
|
"path"
|
|
|
|
"reflect"
|
|
|
|
"strings"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/ibuildthecloud/kvsql/clientv3"
|
|
|
|
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
|
|
|
"k8s.io/apimachinery/pkg/api/meta"
|
|
|
|
"k8s.io/apimachinery/pkg/conversion"
|
|
|
|
"k8s.io/apimachinery/pkg/runtime"
|
|
|
|
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
|
|
|
"k8s.io/apimachinery/pkg/watch"
|
|
|
|
"k8s.io/apiserver/pkg/storage"
|
|
|
|
"k8s.io/apiserver/pkg/storage/etcd"
|
|
|
|
"k8s.io/apiserver/pkg/storage/value"
|
2019-04-07 17:07:55 +00:00
|
|
|
utiltrace "k8s.io/utils/trace"
|
2019-01-12 04:58:27 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// authenticatedDataString satisfies the value.Context interface. It uses the key to
|
|
|
|
// authenticate the stored data. This does not defend against reuse of previously
|
|
|
|
// encrypted values under the same key, but will prevent an attacker from using an
|
|
|
|
// encrypted value from a different key. A stronger authenticated data segment would
|
|
|
|
// include the etcd3 Version field (which is incremented on each write to a key and
|
|
|
|
// reset when the key is deleted), but an attacker with write access to etcd can
|
|
|
|
// force deletion and recreation of keys to weaken that angle.
|
|
|
|
type authenticatedDataString string
|
|
|
|
|
|
|
|
// AuthenticatedData implements the value.Context interface.
|
|
|
|
func (d authenticatedDataString) AuthenticatedData() []byte {
|
|
|
|
return []byte(string(d))
|
|
|
|
}
|
|
|
|
|
|
|
|
var _ value.Context = authenticatedDataString("")
|
|
|
|
|
|
|
|
type store struct {
|
|
|
|
client *clientv3.Client
|
|
|
|
// getOpts contains additional options that should be passed
|
|
|
|
// to all Get() calls.
|
|
|
|
getOps []clientv3.OpOption
|
|
|
|
codec runtime.Codec
|
|
|
|
versioner storage.Versioner
|
|
|
|
transformer value.Transformer
|
|
|
|
pathPrefix string
|
|
|
|
watcher *watcher
|
|
|
|
pagingEnabled bool
|
|
|
|
leaseManager *leaseManager
|
|
|
|
}
|
|
|
|
|
|
|
|
type objState struct {
|
|
|
|
obj runtime.Object
|
|
|
|
meta *storage.ResponseMeta
|
|
|
|
rev int64
|
|
|
|
data []byte
|
|
|
|
stale bool
|
|
|
|
}
|
|
|
|
|
|
|
|
// New returns an etcd3 implementation of storage.Interface.
|
|
|
|
func New(c *clientv3.Client, codec runtime.Codec, prefix string, transformer value.Transformer, pagingEnabled bool) storage.Interface {
|
|
|
|
return newStore(c, true, pagingEnabled, codec, prefix, transformer)
|
|
|
|
}
|
|
|
|
|
|
|
|
func newStore(c *clientv3.Client, quorumRead, pagingEnabled bool, codec runtime.Codec, prefix string, transformer value.Transformer) *store {
|
|
|
|
versioner := etcd.APIObjectVersioner{}
|
|
|
|
result := &store{
|
|
|
|
client: c,
|
|
|
|
codec: codec,
|
|
|
|
versioner: versioner,
|
|
|
|
transformer: transformer,
|
|
|
|
pagingEnabled: pagingEnabled,
|
|
|
|
// for compatibility with etcd2 impl.
|
|
|
|
// no-op for default prefix of '/registry'.
|
|
|
|
// keeps compatibility with etcd2 impl for custom prefixes that don't start with '/'
|
|
|
|
pathPrefix: path.Join("/", prefix),
|
|
|
|
watcher: newWatcher(c, codec, versioner, transformer),
|
|
|
|
leaseManager: newDefaultLeaseManager(c),
|
|
|
|
}
|
|
|
|
if !quorumRead {
|
|
|
|
// In case of non-quorum reads, we can set WithSerializable()
|
|
|
|
// options for all Get operations.
|
|
|
|
result.getOps = append(result.getOps, clientv3.WithSerializable())
|
|
|
|
}
|
|
|
|
return result
|
|
|
|
}
|
|
|
|
|
|
|
|
// Versioner implements storage.Interface.Versioner.
|
|
|
|
func (s *store) Versioner() storage.Versioner {
|
|
|
|
return s.versioner
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get implements storage.Interface.Get.
|
|
|
|
func (s *store) Get(ctx context.Context, key string, resourceVersion string, out runtime.Object, ignoreNotFound bool) error {
|
|
|
|
key = path.Join(s.pathPrefix, key)
|
|
|
|
getResp, err := s.client.KV.Get(ctx, key, s.getOps...)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(getResp.Kvs) == 0 {
|
|
|
|
if ignoreNotFound {
|
|
|
|
return runtime.SetZeroValue(out)
|
|
|
|
}
|
|
|
|
return storage.NewKeyNotFoundError(key, 0)
|
|
|
|
}
|
|
|
|
kv := getResp.Kvs[0]
|
|
|
|
|
|
|
|
data, _, err := s.transformer.TransformFromStorage(kv.Value, authenticatedDataString(key))
|
|
|
|
if err != nil {
|
|
|
|
return storage.NewInternalError(err.Error())
|
|
|
|
}
|
|
|
|
|
|
|
|
return decode(s.codec, s.versioner, data, out, kv.ModRevision)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create implements storage.Interface.Create.
|
|
|
|
func (s *store) Create(ctx context.Context, key string, obj, out runtime.Object, ttl uint64) error {
|
|
|
|
if version, err := s.versioner.ObjectResourceVersion(obj); err == nil && version != 0 {
|
|
|
|
return errors.New("resourceVersion should not be set on objects to be created")
|
|
|
|
}
|
|
|
|
if err := s.versioner.PrepareObjectForStorage(obj); err != nil {
|
|
|
|
return fmt.Errorf("PrepareObjectForStorage failed: %v", err)
|
|
|
|
}
|
|
|
|
data, err := runtime.Encode(s.codec, obj)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
key = path.Join(s.pathPrefix, key)
|
|
|
|
|
|
|
|
opts, err := s.ttlOpts(ctx, int64(ttl))
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
newData, err := s.transformer.TransformToStorage(data, authenticatedDataString(key))
|
|
|
|
if err != nil {
|
|
|
|
return storage.NewInternalError(err.Error())
|
|
|
|
}
|
|
|
|
|
|
|
|
txnResp, err := s.client.KV.Txn(ctx).If(
|
|
|
|
notFound(key),
|
|
|
|
).Then(
|
|
|
|
clientv3.OpPut(key, string(newData), opts...),
|
|
|
|
).Commit()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if !txnResp.Succeeded {
|
|
|
|
return storage.NewKeyExistsError(key, 0)
|
|
|
|
}
|
|
|
|
|
|
|
|
if out != nil {
|
|
|
|
putResp := txnResp.Responses[0].GetResponsePut()
|
|
|
|
return decode(s.codec, s.versioner, data, out, putResp.Header.Revision)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Delete implements storage.Interface.Delete.
|
|
|
|
func (s *store) Delete(ctx context.Context, key string, out runtime.Object, preconditions *storage.Preconditions) error {
|
|
|
|
v, err := conversion.EnforcePtr(out)
|
|
|
|
if err != nil {
|
|
|
|
panic("unable to convert output object to pointer")
|
|
|
|
}
|
|
|
|
key = path.Join(s.pathPrefix, key)
|
|
|
|
if preconditions == nil {
|
|
|
|
return s.unconditionalDelete(ctx, key, out)
|
|
|
|
}
|
|
|
|
return s.conditionalDelete(ctx, key, out, v, preconditions)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *store) unconditionalDelete(ctx context.Context, key string, out runtime.Object) error {
|
|
|
|
// We need to do get and delete in single transaction in order to
|
|
|
|
// know the value and revision before deleting it.
|
|
|
|
txnResp, err := s.client.KV.Txn(ctx).If().Then(
|
|
|
|
clientv3.OpGet(key),
|
|
|
|
clientv3.OpDelete(key),
|
|
|
|
).Commit()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
getResp := txnResp.Responses[0].GetResponseRange()
|
|
|
|
if len(getResp.Kvs) == 0 {
|
|
|
|
return storage.NewKeyNotFoundError(key, 0)
|
|
|
|
}
|
|
|
|
|
|
|
|
kv := getResp.Kvs[0]
|
|
|
|
data, _, err := s.transformer.TransformFromStorage(kv.Value, authenticatedDataString(key))
|
|
|
|
if err != nil {
|
|
|
|
return storage.NewInternalError(err.Error())
|
|
|
|
}
|
|
|
|
return decode(s.codec, s.versioner, data, out, kv.ModRevision)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *store) conditionalDelete(ctx context.Context, key string, out runtime.Object, v reflect.Value, preconditions *storage.Preconditions) error {
|
|
|
|
getResp, err := s.client.KV.Get(ctx, key)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
for {
|
|
|
|
origState, err := s.getState(getResp, key, v, false)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := preconditions.Check(key, origState.obj); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
txnResp, err := s.client.KV.Txn(ctx).If(
|
|
|
|
clientv3.Compare(clientv3.ModRevision(key), "=", origState.rev),
|
|
|
|
).Then(
|
|
|
|
clientv3.OpDelete(key),
|
|
|
|
).Else(
|
|
|
|
clientv3.OpGet(key),
|
|
|
|
).Commit()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if !txnResp.Succeeded {
|
|
|
|
getResp = (*clientv3.GetResponse)(txnResp.Responses[0].GetResponseRange())
|
|
|
|
klog.V(4).Infof("deletion of %s failed because of a conflict, going to retry", key)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
return decode(s.codec, s.versioner, origState.data, out, origState.rev)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// GuaranteedUpdate implements storage.Interface.GuaranteedUpdate.
|
|
|
|
func (s *store) GuaranteedUpdate(
|
|
|
|
ctx context.Context, key string, out runtime.Object, ignoreNotFound bool,
|
|
|
|
preconditions *storage.Preconditions, tryUpdate storage.UpdateFunc, suggestion ...runtime.Object) error {
|
|
|
|
trace := utiltrace.New(fmt.Sprintf("GuaranteedUpdate etcd3: %s", reflect.TypeOf(out).String()))
|
|
|
|
defer trace.LogIfLong(500 * time.Millisecond)
|
|
|
|
|
|
|
|
v, err := conversion.EnforcePtr(out)
|
|
|
|
if err != nil {
|
|
|
|
panic("unable to convert output object to pointer")
|
|
|
|
}
|
|
|
|
key = path.Join(s.pathPrefix, key)
|
|
|
|
|
|
|
|
getCurrentState := func() (*objState, error) {
|
|
|
|
getResp, err := s.client.KV.Get(ctx, key, s.getOps...)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return s.getState(getResp, key, v, ignoreNotFound)
|
|
|
|
}
|
|
|
|
|
|
|
|
var origState *objState
|
|
|
|
var mustCheckData bool
|
|
|
|
if len(suggestion) == 1 && suggestion[0] != nil {
|
|
|
|
origState, err = s.getStateFromObject(suggestion[0])
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
mustCheckData = true
|
|
|
|
} else {
|
|
|
|
origState, err = getCurrentState()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
trace.Step("initial value restored")
|
|
|
|
|
|
|
|
transformContext := authenticatedDataString(key)
|
|
|
|
for {
|
|
|
|
if err := preconditions.Check(key, origState.obj); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
ret, ttl, err := s.updateState(origState, tryUpdate)
|
|
|
|
if err != nil {
|
|
|
|
// It's possible we were working with stale data
|
|
|
|
if mustCheckData && apierrors.IsConflict(err) {
|
|
|
|
// Actually fetch
|
|
|
|
origState, err = getCurrentState()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
mustCheckData = false
|
|
|
|
// Retry
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
data, err := runtime.Encode(s.codec, ret)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if !origState.stale && bytes.Equal(data, origState.data) {
|
|
|
|
// if we skipped the original Get in this loop, we must refresh from
|
|
|
|
// etcd in order to be sure the data in the store is equivalent to
|
|
|
|
// our desired serialization
|
|
|
|
if mustCheckData {
|
|
|
|
origState, err = getCurrentState()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
mustCheckData = false
|
|
|
|
if !bytes.Equal(data, origState.data) {
|
|
|
|
// original data changed, restart loop
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// recheck that the data from etcd is not stale before short-circuiting a write
|
|
|
|
if !origState.stale {
|
|
|
|
return decode(s.codec, s.versioner, origState.data, out, origState.rev)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
newData, err := s.transformer.TransformToStorage(data, transformContext)
|
|
|
|
if err != nil {
|
|
|
|
return storage.NewInternalError(err.Error())
|
|
|
|
}
|
|
|
|
|
|
|
|
opts, err := s.ttlOpts(ctx, int64(ttl))
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
trace.Step("Transaction prepared")
|
|
|
|
|
|
|
|
txnResp, err := s.client.KV.Txn(ctx).If(
|
|
|
|
clientv3.Compare(clientv3.ModRevision(key), "=", origState.rev),
|
|
|
|
).Then(
|
|
|
|
clientv3.OpPut(key, string(newData), opts...),
|
|
|
|
).Else(
|
|
|
|
clientv3.OpGet(key),
|
|
|
|
).Commit()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
trace.Step("Transaction committed")
|
|
|
|
if !txnResp.Succeeded {
|
|
|
|
getResp := (*clientv3.GetResponse)(txnResp.Responses[0].GetResponseRange())
|
|
|
|
klog.V(4).Infof("GuaranteedUpdate of %s failed because of a conflict, going to retry", key)
|
|
|
|
origState, err = s.getState(getResp, key, v, ignoreNotFound)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
trace.Step("Retry value restored")
|
|
|
|
mustCheckData = false
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
putResp := txnResp.Responses[0].GetResponsePut()
|
|
|
|
|
|
|
|
return decode(s.codec, s.versioner, data, out, putResp.Header.Revision)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// GetToList implements storage.Interface.GetToList.
|
|
|
|
func (s *store) GetToList(ctx context.Context, key string, resourceVersion string, pred storage.SelectionPredicate, listObj runtime.Object) error {
|
|
|
|
listPtr, err := meta.GetItemsPtr(listObj)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
v, err := conversion.EnforcePtr(listPtr)
|
|
|
|
if err != nil || v.Kind() != reflect.Slice {
|
|
|
|
panic("need ptr to slice")
|
|
|
|
}
|
|
|
|
|
|
|
|
key = path.Join(s.pathPrefix, key)
|
|
|
|
getResp, err := s.client.KV.Get(ctx, key, s.getOps...)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(getResp.Kvs) > 0 {
|
|
|
|
data, _, err := s.transformer.TransformFromStorage(getResp.Kvs[0].Value, authenticatedDataString(key))
|
|
|
|
if err != nil {
|
|
|
|
return storage.NewInternalError(err.Error())
|
|
|
|
}
|
|
|
|
if err := appendListItem(v, data, uint64(getResp.Kvs[0].ModRevision), pred, s.codec, s.versioner); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// update version with cluster level revision
|
|
|
|
return s.versioner.UpdateList(listObj, uint64(getResp.Header.Revision), "")
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *store) Count(key string) (int64, error) {
|
|
|
|
key = path.Join(s.pathPrefix, key)
|
|
|
|
getResp, err := s.client.KV.Get(context.Background(), key, clientv3.WithRange(clientv3.GetPrefixRangeEnd(key)), clientv3.WithCountOnly())
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
return getResp.Count, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// continueToken is a simple structured object for encoding the state of a continue token.
|
|
|
|
// TODO: if we change the version of the encoded from, we can't start encoding the new version
|
|
|
|
// until all other servers are upgraded (i.e. we need to support rolling schema)
|
|
|
|
// This is a public API struct and cannot change.
|
|
|
|
type continueToken struct {
|
|
|
|
APIVersion string `json:"v"`
|
|
|
|
ResourceVersion int64 `json:"rv"`
|
|
|
|
StartKey string `json:"start"`
|
|
|
|
}
|
|
|
|
|
|
|
|
// parseFrom transforms an encoded predicate from into a versioned struct.
|
|
|
|
// TODO: return a typed error that instructs clients that they must relist
|
|
|
|
func decodeContinue(continueValue, keyPrefix string) (fromKey string, rv int64, err error) {
|
|
|
|
data, err := base64.RawURLEncoding.DecodeString(continueValue)
|
|
|
|
if err != nil {
|
|
|
|
return "", 0, fmt.Errorf("continue key is not valid: %v", err)
|
|
|
|
}
|
|
|
|
var c continueToken
|
|
|
|
if err := json.Unmarshal(data, &c); err != nil {
|
|
|
|
return "", 0, fmt.Errorf("continue key is not valid: %v", err)
|
|
|
|
}
|
|
|
|
switch c.APIVersion {
|
|
|
|
case "meta.k8s.io/v1":
|
|
|
|
if c.ResourceVersion == 0 {
|
|
|
|
return "", 0, fmt.Errorf("continue key is not valid: incorrect encoded start resourceVersion (version meta.k8s.io/v1)")
|
|
|
|
}
|
|
|
|
if len(c.StartKey) == 0 {
|
|
|
|
return "", 0, fmt.Errorf("continue key is not valid: encoded start key empty (version meta.k8s.io/v1)")
|
|
|
|
}
|
|
|
|
// defend against path traversal attacks by clients - path.Clean will ensure that startKey cannot
|
|
|
|
// be at a higher level of the hierarchy, and so when we append the key prefix we will end up with
|
|
|
|
// continue start key that is fully qualified and cannot range over anything less specific than
|
|
|
|
// keyPrefix.
|
|
|
|
key := c.StartKey
|
|
|
|
if !strings.HasPrefix(key, "/") {
|
|
|
|
key = "/" + key
|
|
|
|
}
|
|
|
|
cleaned := path.Clean(key)
|
|
|
|
if cleaned != key {
|
|
|
|
return "", 0, fmt.Errorf("continue key is not valid: %s", c.StartKey)
|
|
|
|
}
|
|
|
|
return keyPrefix + cleaned[1:], c.ResourceVersion, nil
|
|
|
|
default:
|
|
|
|
return "", 0, fmt.Errorf("continue key is not valid: server does not recognize this encoded version %q", c.APIVersion)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// encodeContinue returns a string representing the encoded continuation of the current query.
|
|
|
|
func encodeContinue(key, keyPrefix string, resourceVersion int64) (string, error) {
|
|
|
|
nextKey := strings.TrimPrefix(key, keyPrefix)
|
|
|
|
if nextKey == key {
|
|
|
|
return "", fmt.Errorf("unable to encode next field: the key and key prefix do not match")
|
|
|
|
}
|
|
|
|
out, err := json.Marshal(&continueToken{APIVersion: "meta.k8s.io/v1", ResourceVersion: resourceVersion, StartKey: nextKey})
|
|
|
|
if err != nil {
|
|
|
|
return "", err
|
|
|
|
}
|
|
|
|
return base64.RawURLEncoding.EncodeToString(out), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// List implements storage.Interface.List.
|
|
|
|
func (s *store) List(ctx context.Context, key, resourceVersion string, pred storage.SelectionPredicate, listObj runtime.Object) error {
|
|
|
|
listPtr, err := meta.GetItemsPtr(listObj)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
v, err := conversion.EnforcePtr(listPtr)
|
|
|
|
if err != nil || v.Kind() != reflect.Slice {
|
|
|
|
panic("need ptr to slice")
|
|
|
|
}
|
|
|
|
|
|
|
|
if s.pathPrefix != "" {
|
|
|
|
key = path.Join(s.pathPrefix, key)
|
|
|
|
}
|
|
|
|
// We need to make sure the key ended with "/" so that we only get children "directories".
|
|
|
|
// e.g. if we have key "/a", "/a/b", "/ab", getting keys with prefix "/a" will return all three,
|
|
|
|
// while with prefix "/a/" will return only "/a/b" which is the correct answer.
|
|
|
|
if !strings.HasSuffix(key, "/") {
|
|
|
|
key += "/"
|
|
|
|
}
|
|
|
|
keyPrefix := key
|
|
|
|
|
|
|
|
// set the appropriate clientv3 options to filter the returned data set
|
|
|
|
var paging bool
|
|
|
|
options := make([]clientv3.OpOption, 0, 4)
|
|
|
|
if s.pagingEnabled && pred.Limit > 0 {
|
|
|
|
paging = true
|
|
|
|
options = append(options, clientv3.WithLimit(pred.Limit))
|
|
|
|
}
|
|
|
|
|
|
|
|
var returnedRV, continueRV int64
|
|
|
|
var continueKey string
|
|
|
|
switch {
|
|
|
|
case s.pagingEnabled && len(pred.Continue) > 0:
|
|
|
|
continueKey, continueRV, err = decodeContinue(pred.Continue, keyPrefix)
|
|
|
|
if err != nil {
|
|
|
|
return apierrors.NewBadRequest(fmt.Sprintf("invalid continue token: %v", err))
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(resourceVersion) > 0 && resourceVersion != "0" {
|
|
|
|
return apierrors.NewBadRequest("specifying resource version is not allowed when using continue")
|
|
|
|
}
|
|
|
|
|
|
|
|
rangeEnd := clientv3.GetPrefixRangeEnd(keyPrefix)
|
|
|
|
options = append(options, clientv3.WithRange(rangeEnd))
|
|
|
|
key = continueKey
|
|
|
|
|
|
|
|
// If continueRV > 0, the LIST request needs a specific resource version.
|
|
|
|
// continueRV==0 is invalid.
|
|
|
|
// If continueRV < 0, the request is for the latest resource version.
|
|
|
|
if continueRV > 0 {
|
|
|
|
options = append(options, clientv3.WithRev(continueRV))
|
|
|
|
returnedRV = continueRV
|
|
|
|
}
|
|
|
|
case s.pagingEnabled && pred.Limit > 0:
|
|
|
|
if len(resourceVersion) > 0 {
|
|
|
|
fromRV, err := s.versioner.ParseResourceVersion(resourceVersion)
|
|
|
|
if err != nil {
|
|
|
|
return apierrors.NewBadRequest(fmt.Sprintf("invalid resource version: %v", err))
|
|
|
|
}
|
|
|
|
if fromRV > 0 {
|
|
|
|
options = append(options, clientv3.WithRev(int64(fromRV)))
|
|
|
|
}
|
|
|
|
returnedRV = int64(fromRV)
|
|
|
|
}
|
|
|
|
|
|
|
|
rangeEnd := clientv3.GetPrefixRangeEnd(keyPrefix)
|
|
|
|
options = append(options, clientv3.WithRange(rangeEnd))
|
|
|
|
|
|
|
|
default:
|
|
|
|
if len(resourceVersion) > 0 {
|
|
|
|
fromRV, err := s.versioner.ParseResourceVersion(resourceVersion)
|
|
|
|
if err != nil {
|
|
|
|
return apierrors.NewBadRequest(fmt.Sprintf("invalid resource version: %v", err))
|
|
|
|
}
|
|
|
|
if fromRV > 0 {
|
|
|
|
options = append(options, clientv3.WithRev(int64(fromRV)))
|
|
|
|
}
|
|
|
|
returnedRV = int64(fromRV)
|
|
|
|
}
|
|
|
|
|
|
|
|
options = append(options, clientv3.WithPrefix())
|
|
|
|
}
|
|
|
|
|
|
|
|
// loop until we have filled the requested limit from etcd or there are no more results
|
|
|
|
var lastKey []byte
|
|
|
|
var hasMore bool
|
|
|
|
for {
|
|
|
|
getResp, err := s.client.KV.Get(ctx, key, options...)
|
|
|
|
if err != nil {
|
|
|
|
return interpretListError(err, len(pred.Continue) > 0, continueKey, keyPrefix)
|
|
|
|
}
|
|
|
|
hasMore = getResp.More
|
|
|
|
|
|
|
|
if len(getResp.Kvs) == 0 && getResp.More {
|
|
|
|
return fmt.Errorf("no results were found, but etcd indicated there were more values remaining")
|
|
|
|
}
|
|
|
|
|
|
|
|
// avoid small allocations for the result slice, since this can be called in many
|
|
|
|
// different contexts and we don't know how significantly the result will be filtered
|
|
|
|
if pred.Empty() {
|
|
|
|
growSlice(v, len(getResp.Kvs))
|
|
|
|
} else {
|
|
|
|
growSlice(v, 2048, len(getResp.Kvs))
|
|
|
|
}
|
|
|
|
|
|
|
|
// take items from the response until the bucket is full, filtering as we go
|
|
|
|
for _, kv := range getResp.Kvs {
|
|
|
|
if paging && int64(v.Len()) >= pred.Limit {
|
|
|
|
hasMore = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
lastKey = kv.Key
|
|
|
|
|
|
|
|
data, _, err := s.transformer.TransformFromStorage(kv.Value, authenticatedDataString(kv.Key))
|
|
|
|
if err != nil {
|
|
|
|
utilruntime.HandleError(fmt.Errorf("unable to transform key %q: %v", kv.Key, err))
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := appendListItem(v, data, uint64(kv.ModRevision), pred, s.codec, s.versioner); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// indicate to the client which resource version was returned
|
|
|
|
if returnedRV == 0 {
|
|
|
|
returnedRV = getResp.Header.Revision
|
|
|
|
}
|
|
|
|
|
|
|
|
// no more results remain or we didn't request paging
|
|
|
|
if !hasMore || !paging {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
// we're paging but we have filled our bucket
|
|
|
|
if int64(v.Len()) >= pred.Limit {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
key = string(lastKey) + "\x00"
|
|
|
|
}
|
|
|
|
|
|
|
|
// instruct the client to begin querying from immediately after the last key we returned
|
|
|
|
// we never return a key that the client wouldn't be allowed to see
|
|
|
|
if hasMore {
|
|
|
|
// we want to start immediately after the last key
|
|
|
|
next, err := encodeContinue(string(lastKey)+"\x00", keyPrefix, returnedRV)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return s.versioner.UpdateList(listObj, uint64(returnedRV), next)
|
|
|
|
}
|
|
|
|
|
|
|
|
// no continuation
|
|
|
|
return s.versioner.UpdateList(listObj, uint64(returnedRV), "")
|
|
|
|
}
|
|
|
|
|
|
|
|
// growSlice takes a slice value and grows its capacity up
|
|
|
|
// to the maximum of the passed sizes or maxCapacity, whichever
|
|
|
|
// is smaller. Above maxCapacity decisions about allocation are left
|
|
|
|
// to the Go runtime on append. This allows a caller to make an
|
|
|
|
// educated guess about the potential size of the total list while
|
|
|
|
// still avoiding overly aggressive initial allocation. If sizes
|
|
|
|
// is empty maxCapacity will be used as the size to grow.
|
|
|
|
func growSlice(v reflect.Value, maxCapacity int, sizes ...int) {
|
|
|
|
cap := v.Cap()
|
|
|
|
max := cap
|
|
|
|
for _, size := range sizes {
|
|
|
|
if size > max {
|
|
|
|
max = size
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if len(sizes) == 0 || max > maxCapacity {
|
|
|
|
max = maxCapacity
|
|
|
|
}
|
|
|
|
if max <= cap {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if v.Len() > 0 {
|
|
|
|
extra := reflect.MakeSlice(v.Type(), 0, max)
|
|
|
|
reflect.Copy(extra, v)
|
|
|
|
v.Set(extra)
|
|
|
|
} else {
|
|
|
|
extra := reflect.MakeSlice(v.Type(), 0, max)
|
|
|
|
v.Set(extra)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Watch implements storage.Interface.Watch.
|
|
|
|
func (s *store) Watch(ctx context.Context, key string, resourceVersion string, pred storage.SelectionPredicate) (watch.Interface, error) {
|
|
|
|
return s.watch(ctx, key, resourceVersion, pred, false)
|
|
|
|
}
|
|
|
|
|
|
|
|
// WatchList implements storage.Interface.WatchList.
|
|
|
|
func (s *store) WatchList(ctx context.Context, key string, resourceVersion string, pred storage.SelectionPredicate) (watch.Interface, error) {
|
|
|
|
return s.watch(ctx, key, resourceVersion, pred, true)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *store) watch(ctx context.Context, key string, rv string, pred storage.SelectionPredicate, recursive bool) (watch.Interface, error) {
|
|
|
|
rev, err := s.versioner.ParseResourceVersion(rv)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
key = path.Join(s.pathPrefix, key)
|
|
|
|
return s.watcher.Watch(ctx, key, int64(rev), recursive, pred)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *store) getState(getResp *clientv3.GetResponse, key string, v reflect.Value, ignoreNotFound bool) (*objState, error) {
|
|
|
|
state := &objState{
|
|
|
|
obj: reflect.New(v.Type()).Interface().(runtime.Object),
|
|
|
|
meta: &storage.ResponseMeta{},
|
|
|
|
}
|
|
|
|
if len(getResp.Kvs) == 0 {
|
|
|
|
if !ignoreNotFound {
|
|
|
|
return nil, storage.NewKeyNotFoundError(key, 0)
|
|
|
|
}
|
|
|
|
if err := runtime.SetZeroValue(state.obj); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
data, stale, err := s.transformer.TransformFromStorage(getResp.Kvs[0].Value, authenticatedDataString(key))
|
|
|
|
if err != nil {
|
|
|
|
return nil, storage.NewInternalError(err.Error())
|
|
|
|
}
|
|
|
|
state.rev = getResp.Kvs[0].ModRevision
|
|
|
|
state.meta.ResourceVersion = uint64(state.rev)
|
|
|
|
state.data = data
|
|
|
|
state.stale = stale
|
|
|
|
if err := decode(s.codec, s.versioner, state.data, state.obj, state.rev); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return state, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *store) getStateFromObject(obj runtime.Object) (*objState, error) {
|
|
|
|
state := &objState{
|
|
|
|
obj: obj,
|
|
|
|
meta: &storage.ResponseMeta{},
|
|
|
|
}
|
|
|
|
|
|
|
|
rv, err := s.versioner.ObjectResourceVersion(obj)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("couldn't get resource version: %v", err)
|
|
|
|
}
|
|
|
|
state.rev = int64(rv)
|
|
|
|
state.meta.ResourceVersion = uint64(state.rev)
|
|
|
|
|
|
|
|
// Compute the serialized form - for that we need to temporarily clean
|
|
|
|
// its resource version field (those are not stored in etcd).
|
|
|
|
if err := s.versioner.PrepareObjectForStorage(obj); err != nil {
|
|
|
|
return nil, fmt.Errorf("PrepareObjectForStorage failed: %v", err)
|
|
|
|
}
|
|
|
|
state.data, err = runtime.Encode(s.codec, obj)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
s.versioner.UpdateObject(state.obj, uint64(rv))
|
|
|
|
return state, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *store) updateState(st *objState, userUpdate storage.UpdateFunc) (runtime.Object, uint64, error) {
|
|
|
|
ret, ttlPtr, err := userUpdate(st.obj, *st.meta)
|
|
|
|
if err != nil {
|
|
|
|
return nil, 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := s.versioner.PrepareObjectForStorage(ret); err != nil {
|
|
|
|
return nil, 0, fmt.Errorf("PrepareObjectForStorage failed: %v", err)
|
|
|
|
}
|
|
|
|
var ttl uint64
|
|
|
|
if ttlPtr != nil {
|
|
|
|
ttl = *ttlPtr
|
|
|
|
}
|
|
|
|
return ret, ttl, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// ttlOpts returns client options based on given ttl.
|
|
|
|
// ttl: if ttl is non-zero, it will attach the key to a lease with ttl of roughly the same length
|
|
|
|
func (s *store) ttlOpts(ctx context.Context, ttl int64) ([]clientv3.OpOption, error) {
|
|
|
|
if ttl == 0 {
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
id, err := s.leaseManager.GetLease(ctx, ttl)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return []clientv3.OpOption{clientv3.WithLease(id)}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// decode decodes value of bytes into object. It will also set the object resource version to rev.
|
|
|
|
// On success, objPtr would be set to the object.
|
|
|
|
func decode(codec runtime.Codec, versioner storage.Versioner, value []byte, objPtr runtime.Object, rev int64) error {
|
|
|
|
if _, err := conversion.EnforcePtr(objPtr); err != nil {
|
|
|
|
panic("unable to convert output object to pointer")
|
|
|
|
}
|
|
|
|
_, _, err := codec.Decode(value, nil, objPtr)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
// being unable to set the version does not prevent the object from being extracted
|
|
|
|
versioner.UpdateObject(objPtr, uint64(rev))
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// appendListItem decodes and appends the object (if it passes filter) to v, which must be a slice.
|
|
|
|
func appendListItem(v reflect.Value, data []byte, rev uint64, pred storage.SelectionPredicate, codec runtime.Codec, versioner storage.Versioner) error {
|
|
|
|
obj, _, err := codec.Decode(data, nil, reflect.New(v.Type().Elem()).Interface().(runtime.Object))
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
// being unable to set the version does not prevent the object from being extracted
|
|
|
|
versioner.UpdateObject(obj, rev)
|
|
|
|
if matched, err := pred.Matches(obj); err == nil && matched {
|
|
|
|
v.Set(reflect.Append(v, reflect.ValueOf(obj).Elem()))
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func notFound(key string) clientv3.Cmp {
|
|
|
|
return clientv3.Compare(clientv3.ModRevision(key), "=", 0)
|
|
|
|
}
|