Add support for SQL driver

k3s-v1.14.6
Darren Shepherd 2018-10-23 10:10:56 -07:00 committed by Erik Wilson
parent 80b9f6022f
commit 14b632c696
30 changed files with 4067 additions and 2 deletions

View File

@ -25,6 +25,7 @@ import (
const (
StorageTypeUnset = ""
StorageTypeKVSQL = "kvsql"
StorageTypeETCD3 = "etcd3"
DefaultCompactInterval = 5 * time.Minute

View File

@ -19,6 +19,7 @@ package factory
import (
"fmt"
"github.com/ibuildthecloud/kvsql"
"k8s.io/apiserver/pkg/storage"
"k8s.io/apiserver/pkg/storage/storagebackend"
)
@ -29,7 +30,13 @@ type DestroyFunc func()
// Create creates a storage backend based on given config.
func Create(c storagebackend.Config) (storage.Interface, DestroyFunc, error) {
switch c.Type {
case storagebackend.StorageTypeUnset, storagebackend.StorageTypeETCD3:
case storagebackend.StorageTypeUnset, storagebackend.StorageTypeKVSQL:
return factory.NewKVSQLStorage(c)
case storagebackend.StorageTypeETCD3:
// TODO: We have the following features to implement:
// - Support secure connection by using key, cert, and CA files.
// - Honor "https" scheme to support secure connection in gRPC.
// - Support non-quorum read.
return newETCD3Storage(c)
default:
return nil, nil, fmt.Errorf("unknown storage type: %s", c.Type)
@ -39,7 +46,9 @@ func Create(c storagebackend.Config) (storage.Interface, DestroyFunc, error) {
// CreateHealthCheck creates a healthcheck function based on given config.
func CreateHealthCheck(c storagebackend.Config) (func() error, error) {
switch c.Type {
case storagebackend.StorageTypeUnset, storagebackend.StorageTypeETCD3:
case storagebackend.StorageTypeUnset, storagebackend.StorageTypeKVSQL:
return factory.NewKVSQLHealthCheck(c)
case storagebackend.StorageTypeETCD3:
return newETCD3HealthCheck(c)
default:
return nil, fmt.Errorf("unknown storage type: %s", c.Type)

65
vendor/github.com/docker/docker/pkg/locker/README.md generated vendored Normal file
View File

@ -0,0 +1,65 @@
Locker
=====
locker provides a mechanism for creating finer-grained locking to help
free up more global locks to handle other tasks.
The implementation looks close to a sync.Mutex, however, the user must provide a
reference to use to refer to the underlying lock when locking and unlocking,
and unlock may generate an error.
If a lock with a given name does not exist when `Lock` is called, one is
created.
Lock references are automatically cleaned up on `Unlock` if nothing else is
waiting for the lock.
## Usage
```go
package important
import (
"sync"
"time"
"github.com/docker/docker/pkg/locker"
)
type important struct {
locks *locker.Locker
data map[string]interface{}
mu sync.Mutex
}
func (i *important) Get(name string) interface{} {
i.locks.Lock(name)
defer i.locks.Unlock(name)
return i.data[name]
}
func (i *important) Create(name string, data interface{}) {
i.locks.Lock(name)
defer i.locks.Unlock(name)
i.createImportant(data)
i.mu.Lock()
i.data[name] = data
i.mu.Unlock()
}
func (i *important) createImportant(data interface{}) {
time.Sleep(10 * time.Second)
}
```
For functions dealing with a given name, always lock at the beginning of the
function (or before doing anything with the underlying state), this ensures any
other function that is dealing with the same name will block.
When needing to modify the underlying data, use the global lock to ensure nothing
else is modifying it at the same time.
Since name lock is already in place, no reads will occur while the modification
is being performed.

112
vendor/github.com/docker/docker/pkg/locker/locker.go generated vendored Normal file
View File

@ -0,0 +1,112 @@
/*
Package locker provides a mechanism for creating finer-grained locking to help
free up more global locks to handle other tasks.
The implementation looks close to a sync.Mutex, however the user must provide a
reference to use to refer to the underlying lock when locking and unlocking,
and unlock may generate an error.
If a lock with a given name does not exist when `Lock` is called, one is
created.
Lock references are automatically cleaned up on `Unlock` if nothing else is
waiting for the lock.
*/
package locker // import "github.com/docker/docker/pkg/locker"
import (
"errors"
"sync"
"sync/atomic"
)
// ErrNoSuchLock is returned when the requested lock does not exist
var ErrNoSuchLock = errors.New("no such lock")
// Locker provides a locking mechanism based on the passed in reference name
type Locker struct {
mu sync.Mutex
locks map[string]*lockCtr
}
// lockCtr is used by Locker to represent a lock with a given name.
type lockCtr struct {
mu sync.Mutex
// waiters is the number of waiters waiting to acquire the lock
// this is int32 instead of uint32 so we can add `-1` in `dec()`
waiters int32
}
// inc increments the number of waiters waiting for the lock
func (l *lockCtr) inc() {
atomic.AddInt32(&l.waiters, 1)
}
// dec decrements the number of waiters waiting on the lock
func (l *lockCtr) dec() {
atomic.AddInt32(&l.waiters, -1)
}
// count gets the current number of waiters
func (l *lockCtr) count() int32 {
return atomic.LoadInt32(&l.waiters)
}
// Lock locks the mutex
func (l *lockCtr) Lock() {
l.mu.Lock()
}
// Unlock unlocks the mutex
func (l *lockCtr) Unlock() {
l.mu.Unlock()
}
// New creates a new Locker
func New() *Locker {
return &Locker{
locks: make(map[string]*lockCtr),
}
}
// Lock locks a mutex with the given name. If it doesn't exist, one is created
func (l *Locker) Lock(name string) {
l.mu.Lock()
if l.locks == nil {
l.locks = make(map[string]*lockCtr)
}
nameLock, exists := l.locks[name]
if !exists {
nameLock = &lockCtr{}
l.locks[name] = nameLock
}
// increment the nameLock waiters while inside the main mutex
// this makes sure that the lock isn't deleted if `Lock` and `Unlock` are called concurrently
nameLock.inc()
l.mu.Unlock()
// Lock the nameLock outside the main mutex so we don't block other operations
// once locked then we can decrement the number of waiters for this lock
nameLock.Lock()
nameLock.dec()
}
// Unlock unlocks the mutex with the given name
// If the given lock is not being waited on by any other callers, it is deleted
func (l *Locker) Unlock(name string) error {
l.mu.Lock()
nameLock, exists := l.locks[name]
if !exists {
l.mu.Unlock()
return ErrNoSuchLock
}
if nameLock.count() == 0 {
delete(l.locks, name)
}
nameLock.Unlock()
l.mu.Unlock()
return nil
}

View File

@ -0,0 +1,161 @@
package locker // import "github.com/docker/docker/pkg/locker"
import (
"math/rand"
"strconv"
"sync"
"testing"
"time"
)
func TestLockCounter(t *testing.T) {
l := &lockCtr{}
l.inc()
if l.waiters != 1 {
t.Fatal("counter inc failed")
}
l.dec()
if l.waiters != 0 {
t.Fatal("counter dec failed")
}
}
func TestLockerLock(t *testing.T) {
l := New()
l.Lock("test")
ctr := l.locks["test"]
if ctr.count() != 0 {
t.Fatalf("expected waiters to be 0, got :%d", ctr.waiters)
}
chDone := make(chan struct{})
go func() {
l.Lock("test")
close(chDone)
}()
chWaiting := make(chan struct{})
go func() {
for range time.Tick(1 * time.Millisecond) {
if ctr.count() == 1 {
close(chWaiting)
break
}
}
}()
select {
case <-chWaiting:
case <-time.After(3 * time.Second):
t.Fatal("timed out waiting for lock waiters to be incremented")
}
select {
case <-chDone:
t.Fatal("lock should not have returned while it was still held")
default:
}
if err := l.Unlock("test"); err != nil {
t.Fatal(err)
}
select {
case <-chDone:
case <-time.After(3 * time.Second):
t.Fatalf("lock should have completed")
}
if ctr.count() != 0 {
t.Fatalf("expected waiters to be 0, got: %d", ctr.count())
}
}
func TestLockerUnlock(t *testing.T) {
l := New()
l.Lock("test")
l.Unlock("test")
chDone := make(chan struct{})
go func() {
l.Lock("test")
close(chDone)
}()
select {
case <-chDone:
case <-time.After(3 * time.Second):
t.Fatalf("lock should not be blocked")
}
}
func TestLockerConcurrency(t *testing.T) {
l := New()
var wg sync.WaitGroup
for i := 0; i <= 10000; i++ {
wg.Add(1)
go func() {
l.Lock("test")
// if there is a concurrency issue, will very likely panic here
l.Unlock("test")
wg.Done()
}()
}
chDone := make(chan struct{})
go func() {
wg.Wait()
close(chDone)
}()
select {
case <-chDone:
case <-time.After(10 * time.Second):
t.Fatal("timeout waiting for locks to complete")
}
// Since everything has unlocked this should not exist anymore
if ctr, exists := l.locks["test"]; exists {
t.Fatalf("lock should not exist: %v", ctr)
}
}
func BenchmarkLocker(b *testing.B) {
l := New()
for i := 0; i < b.N; i++ {
l.Lock("test")
l.Unlock("test")
}
}
func BenchmarkLockerParallel(b *testing.B) {
l := New()
b.SetParallelism(128)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
l.Lock("test")
l.Unlock("test")
}
})
}
func BenchmarkLockerMoreKeys(b *testing.B) {
l := New()
var keys []string
for i := 0; i < 64; i++ {
keys = append(keys, strconv.Itoa(i))
}
b.SetParallelism(128)
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
k := keys[rand.Intn(len(keys))]
l.Lock(k)
l.Unlock(k)
}
})
}

178
vendor/github.com/ibuildthecloud/kvsql/LICENSE generated vendored Normal file
View File

@ -0,0 +1,178 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS

View File

@ -0,0 +1,55 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package clientv3
import (
"google.golang.org/grpc"
)
// Client provides and manages an etcd v3 client session.
type Client struct {
Cluster
KV
Lease
Watcher
callOpts []grpc.CallOption
}
// New creates a new etcdv3 client from a given configuration.
func New(cfg Config) (*Client, error) {
c := &Client{
Lease: &lessor{},
}
kv, err := newKV(cfg)
if err != nil {
return nil, err
}
c.KV = kv
c.Watcher = kv
return c, nil
}
// Close shuts down the client's etcd connections.
func (c *Client) Close() error {
if c.Watcher != nil {
return c.Watcher.Close()
}
return nil
}
// Endpoints lists the registered endpoints for the client.
func (c *Client) Endpoints() (eps []string) {
return
}

View File

@ -0,0 +1,26 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package clientv3
import (
"golang.org/x/net/context"
)
type Cluster struct {
}
func (c *Cluster) MemberList(ctx context.Context) (interface{}, error) {
return nil, nil
}

View File

@ -0,0 +1,44 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package clientv3
import (
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
)
// CompactOp represents a compact operation.
type CompactOp struct {
revision int64
}
// CompactOption configures compact operation.
type CompactOption func(*CompactOp)
func (op *CompactOp) applyCompactOpts(opts []CompactOption) {
for _, opt := range opts {
opt(op)
}
}
// OpCompact wraps slice CompactOption to create a CompactOp.
func OpCompact(rev int64, opts ...CompactOption) CompactOp {
ret := CompactOp{revision: rev}
ret.applyCompactOpts(opts)
return ret
}
func (op CompactOp) toRequest() *pb.CompactionRequest {
return &pb.CompactionRequest{Revision: op.revision}
}

View File

@ -0,0 +1,86 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package clientv3
import (
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
)
type CompareTarget int
type CompareResult int
const (
CompareVersion CompareTarget = iota
CompareCreated
CompareModified
CompareValue
)
type Cmp pb.Compare
func Compare(cmp Cmp, result string, v interface{}) Cmp {
var r pb.Compare_CompareResult
switch result {
case "=":
r = pb.Compare_EQUAL
case "!=":
r = pb.Compare_NOT_EQUAL
case ">":
r = pb.Compare_GREATER
case "<":
r = pb.Compare_LESS
default:
panic("Unknown result op")
}
cmp.Result = r
switch cmp.Target {
case pb.Compare_VALUE:
val, ok := v.(string)
if !ok {
panic("bad compare value")
}
cmp.TargetUnion = &pb.Compare_Value{Value: []byte(val)}
case pb.Compare_VERSION:
cmp.TargetUnion = &pb.Compare_Version{Version: mustInt64(v)}
case pb.Compare_CREATE:
cmp.TargetUnion = &pb.Compare_CreateRevision{CreateRevision: mustInt64(v)}
case pb.Compare_MOD:
cmp.TargetUnion = &pb.Compare_ModRevision{ModRevision: mustInt64(v)}
default:
panic("Unknown compare type")
}
return cmp
}
func Version(key string) Cmp {
return Cmp{Key: []byte(key), Target: pb.Compare_VERSION}
}
func ModRevision(key string) Cmp {
return Cmp{Key: []byte(key), Target: pb.Compare_MOD}
}
// mustInt64 panics if val isn't an int or int64. It returns an int64 otherwise.
func mustInt64(val interface{}) int64 {
if v, ok := val.(int64); ok {
return v
}
if v, ok := val.(int); ok {
return int64(v)
}
panic("bad value")
}

View File

@ -0,0 +1,42 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package clientv3
import (
"crypto/tls"
"time"
"google.golang.org/grpc"
)
type Config struct {
// Endpoints is a list of URLs.
Endpoints []string `json:"endpoints"`
// DialKeepAliveTime is the time in seconds after which client pings the server to see if
// transport is alive.
DialKeepAliveTime time.Duration `json:"dial-keep-alive-time"`
// DialKeepAliveTimeout is the time in seconds that the client waits for a response for the
// keep-alive probe. If the response is not received in this time, the connection is closed.
DialKeepAliveTimeout time.Duration `json:"dial-keep-alive-timeout"`
// TLS holds the client secure credentials, if any.
TLS *tls.Config
DialTimeout time.Duration
DialOptions []grpc.DialOption
}

View File

@ -0,0 +1,24 @@
package driver
import (
"errors"
)
var (
ErrExists = errors.New("key exists")
ErrNotExists = errors.New("key and or Revision does not exists")
ErrRevisionMatch = errors.New("revision does not match")
)
type KeyValue struct {
ID int64
Key string
Value []byte
OldValue []byte
OldRevision int64
CreateRevision int64
Revision int64
TTL int64
Version int64
Del int64
}

View File

@ -0,0 +1,18 @@
package driver
import (
"context"
)
type Driver interface {
List(ctx context.Context, revision, limit int64, rangeKey, startKey string) (kvs []*KeyValue, listRevision int64, err error)
Delete(ctx context.Context, key string, revision int64) ([]*KeyValue, error)
// Update should return ErrNotExist when the key does not exist and ErrRevisionMatch when revision doesn't match
Update(ctx context.Context, key string, value []byte, revision, ttl int64) (oldKv *KeyValue, newKv *KeyValue, err error)
Watch(ctx context.Context, key string, revision int64) <-chan Event
Close() error
}

View File

@ -0,0 +1,293 @@
package driver
import (
"context"
"database/sql"
"fmt"
"strings"
"sync/atomic"
"time"
"github.com/ibuildthecloud/kvsql/pkg/broadcast"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
utiltrace "k8s.io/utils/trace"
)
type Generic struct {
db *sql.DB
CleanupSQL string
GetSQL string
ListSQL string
ListRevisionSQL string
ListResumeSQL string
ReplaySQL string
InsertSQL string
GetRevisionSQL string
ToDeleteSQL string
DeleteOldSQL string
revision int64
changes chan *KeyValue
broadcaster broadcast.Broadcaster
cancel func()
}
func (g *Generic) Start(ctx context.Context, db *sql.DB) error {
g.db = db
g.changes = make(chan *KeyValue, 1024)
row := db.QueryRowContext(ctx, g.GetRevisionSQL)
rev := sql.NullInt64{}
if err := row.Scan(&rev); err != nil {
return errors.Wrap(err, "Failed to initialize revision")
}
if rev.Int64 == 0 {
g.revision = 1
} else {
g.revision = rev.Int64
}
go func() {
for {
select {
case <-ctx.Done():
return
case <-time.After(time.Minute):
_, err := g.ExecContext(ctx, g.CleanupSQL, time.Now().Unix())
if err != nil {
logrus.Errorf("Failed to purge expired TTL entries")
}
err = g.cleanup(ctx)
if err != nil {
logrus.Errorf("Failed to cleanup duplicate entries")
}
}
}
}()
return nil
}
func (g *Generic) cleanup(ctx context.Context) error {
rows, err := g.QueryContext(ctx, g.ToDeleteSQL)
if err != nil {
return err
}
defer rows.Close()
toDelete := map[string]int64{}
for rows.Next() {
var (
count, revision int64
name string
)
err := rows.Scan(&count, &name, &revision)
if err != nil {
return err
}
toDelete[name] = revision
}
rows.Close()
for name, rev := range toDelete {
_, err = g.ExecContext(ctx, g.DeleteOldSQL, name, rev, rev)
if err != nil {
return err
}
}
return nil
}
func (g *Generic) Get(ctx context.Context, key string) (*KeyValue, error) {
kvs, _, err := g.List(ctx, 0, 1, key, "")
if err != nil {
return nil, err
}
if len(kvs) > 0 {
return kvs[0], nil
}
return nil, nil
}
func (g *Generic) replayEvents(ctx context.Context, key string, revision int64) ([]*KeyValue, error) {
rows, err := g.QueryContext(ctx, g.ReplaySQL, key, revision)
if err != nil {
return nil, err
}
defer rows.Close()
var resp []*KeyValue
for rows.Next() {
value := KeyValue{}
if err := scan(rows.Scan, &value); err != nil {
return nil, err
}
resp = append(resp, &value)
}
return resp, nil
}
func (g *Generic) List(ctx context.Context, revision, limit int64, rangeKey, startKey string) ([]*KeyValue, int64, error) {
var (
rows *sql.Rows
err error
)
if limit == 0 {
limit = 1000000
} else {
limit = limit + 1
}
listRevision := atomic.LoadInt64(&g.revision)
if !strings.HasSuffix(rangeKey, "%") && revision <= 0 {
rows, err = g.QueryContext(ctx, g.GetSQL, rangeKey, limit)
} else if revision <= 0 {
rows, err = g.QueryContext(ctx, g.ListSQL, rangeKey, limit)
} else if len(startKey) > 0 {
listRevision = revision
rows, err = g.QueryContext(ctx, g.ListResumeSQL, revision, rangeKey, startKey, limit)
} else {
rows, err = g.QueryContext(ctx, g.ListRevisionSQL, revision, rangeKey, limit)
}
if err != nil {
return nil, 0, err
}
defer rows.Close()
var resp []*KeyValue
for rows.Next() {
value := KeyValue{}
if err := scan(rows.Scan, &value); err != nil {
return nil, 0, err
}
if value.Revision > listRevision {
listRevision = value.Revision
}
if value.Del == 0 {
resp = append(resp, &value)
}
}
return resp, listRevision, nil
}
func (g *Generic) Delete(ctx context.Context, key string, revision int64) ([]*KeyValue, error) {
if strings.HasSuffix(key, "%") {
panic("can not delete list revision")
}
_, err := g.mod(ctx, true, key, []byte{}, revision, 0)
return nil, err
}
func (g *Generic) Update(ctx context.Context, key string, value []byte, revision, ttl int64) (*KeyValue, *KeyValue, error) {
kv, err := g.mod(ctx, false, key, value, revision, ttl)
if err != nil {
return nil, nil, err
}
if kv.Version == 1 {
return nil, kv, nil
}
oldKv := *kv
oldKv.Revision = oldKv.OldRevision
oldKv.Value = oldKv.OldValue
return &oldKv, kv, nil
}
func (g *Generic) ExecContext(ctx context.Context, query string, args ...interface{}) (sql.Result, error) {
trace := utiltrace.New(fmt.Sprintf("SQL DB ExecContext query: %s keys: %v", query, args))
defer trace.LogIfLong(500 * time.Millisecond)
return g.db.ExecContext(ctx, query, args...)
}
func (g *Generic) QueryContext(ctx context.Context, query string, args ...interface{}) (*sql.Rows, error) {
trace := utiltrace.New(fmt.Sprintf("SQL DB QueryContext query: %s keys: %v", query, args))
defer trace.LogIfLong(500 * time.Millisecond)
return g.db.QueryContext(ctx, query, args...)
}
func (g *Generic) mod(ctx context.Context, delete bool, key string, value []byte, revision int64, ttl int64) (*KeyValue, error) {
oldKv, err := g.Get(ctx, key)
if err != nil {
return nil, err
}
if revision > 0 && oldKv == nil {
return nil, ErrNotExists
}
if revision > 0 && oldKv.Revision != revision {
return nil, ErrRevisionMatch
}
if ttl > 0 {
ttl = int64(time.Now().Unix()) + ttl
}
newRevision := atomic.AddInt64(&g.revision, 1)
result := &KeyValue{
Key: key,
Value: value,
Revision: newRevision,
TTL: int64(ttl),
CreateRevision: newRevision,
Version: 1,
}
if oldKv != nil {
result.OldRevision = oldKv.Revision
result.OldValue = oldKv.Value
result.TTL = oldKv.TTL
result.CreateRevision = oldKv.CreateRevision
result.Version = oldKv.Version + 1
}
if delete {
result.Del = 1
}
_, err = g.ExecContext(ctx, g.InsertSQL,
result.Key,
result.Value,
result.OldValue,
result.OldRevision,
result.CreateRevision,
result.Revision,
result.TTL,
result.Version,
result.Del,
)
if err != nil {
return nil, err
}
g.changes <- result
return result, nil
}
type scanner func(dest ...interface{}) error
func scan(s scanner, out *KeyValue) error {
return s(
&out.ID,
&out.Key,
&out.Value,
&out.OldValue,
&out.OldRevision,
&out.CreateRevision,
&out.Revision,
&out.TTL,
&out.Version,
&out.Del)
}

View File

@ -0,0 +1,56 @@
package sqlite
import (
"database/sql"
"github.com/ibuildthecloud/kvsql/clientv3/driver"
"github.com/ibuildthecloud/kvsql/clientv3/driver/sqlite"
)
var (
schema = []string{
`create table if not exists key_value
(
name int not null,
value MEDIUMTEXT not null,
create_revision int not null,
revision int not null,
ttl int not null,
version int not null,
del int not null,
old_value MEDIUMTEXT not null,
id int auto_increment,
old_revision int not null,
constraint key_value_pk
primary key (id)
)`,
}
idx = []string{
"create index key_value__name_idx on key_value (name)",
"create index key_value__revision_idx on key_value (revision)",
}
)
func NewMYSQL() *driver.Generic {
return sqlite.NewSQLite()
}
func Open(dataSourceName string) (*sql.DB, error) {
db, err := sql.Open("mysql", dataSourceName)
if err != nil {
return nil, err
}
for _, stmt := range schema {
_, err := db.Exec(stmt)
if err != nil {
return nil, err
}
}
for _, stmt := range idx {
db.Exec(stmt)
}
return db, nil
}

View File

@ -0,0 +1,81 @@
package sqlite
import (
"database/sql"
"strings"
"github.com/ibuildthecloud/kvsql/clientv3/driver"
)
var (
fieldList = "name, value, old_value, old_revision, create_revision, revision, ttl, version, del"
baseList = `
SELECT kv.id, kv.name, kv.value, kv.old_value, kv.old_revision, kv.create_revision, kv.revision, kv.ttl, kv.version, kv.del
FROM key_value kv
INNER JOIN
(
SELECT MAX(revision) revision, kvi.name
FROM key_value kvi
%REV%
GROUP BY kvi.name
) AS r
ON r.name = kv.name AND r.revision = kv.revision
WHERE kv.name like ? %RES% ORDER BY kv.name ASC limit ?
`
insertSQL = `
INSERT INTO key_value(` + fieldList + `)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`
schema = []string{
`create table if not exists key_value
(
name INTEGER,
value BLOB,
create_revision INTEGER,
revision INTEGER,
ttl INTEGER,
version INTEGER,
del INTEGER,
old_value BLOB,
id INTEGER primary key autoincrement,
old_revision INTEGER
)`,
`create index if not exists name_idx on key_value (name)`,
`create index if not exists revision_idx on key_value (revision)`,
}
)
func NewSQLite() *driver.Generic {
return &driver.Generic{
CleanupSQL: "DELETE FROM key_value WHERE ttl > 0 AND ttl < ?",
GetSQL: "SELECT id, " + fieldList + " FROM key_value WHERE name = ? ORDER BY revision DESC limit ?",
ListSQL: strings.Replace(strings.Replace(baseList, "%REV%", "", -1), "%RES%", "", -1),
ListRevisionSQL: strings.Replace(strings.Replace(baseList, "%REV%", "WHERE kvi.revision >= ?", -1), "%RES%", "", -1),
ListResumeSQL: strings.Replace(strings.Replace(baseList, "%REV%", "WHERE kvi.revision <= ?", -1),
"%RES%", "and kv.name > ? ", -1),
InsertSQL: insertSQL,
ReplaySQL: "SELECT id, " + fieldList + " FROM key_value WHERE name like ? and revision > ? ORDER BY revision ASC",
GetRevisionSQL: "SELECT MAX(revision) FROM key_value",
ToDeleteSQL: "SELECT count(*) c, name, max(revision) FROM key_value GROUP BY name HAVING c > 1 or (c = 1 and del = 1)",
DeleteOldSQL: "DELETE FROM key_value WHERE name = ? AND (revision < ? OR (revision = ? AND del = 1))",
}
}
func Open(dataSourceName string) (*sql.DB, error) {
if dataSourceName == "" {
dataSourceName = "./state.db?_journal=WAL&cache=shared"
}
db, err := sql.Open("sqlite3", dataSourceName)
if err != nil {
return nil, err
}
for _, stmt := range schema {
_, err := db.Exec(stmt)
if err != nil {
return nil, err
}
}
return db, nil
}

View File

@ -0,0 +1,111 @@
package driver
import (
"context"
"io"
"strings"
)
type Event struct {
KV *KeyValue
Err error
Start bool
}
func matchesKey(prefix bool, key string, kv *KeyValue) bool {
if kv == nil {
return false
}
if prefix {
return strings.HasPrefix(kv.Key, key[:len(key)-1])
}
return kv.Key == key
}
func (g *Generic) globalWatcher() (chan map[string]interface{}, error) {
ctx, cancel := context.WithCancel(context.Background())
g.cancel = cancel
result := make(chan map[string]interface{}, 100)
go func() {
defer close(result)
for {
select {
case <-ctx.Done():
return
case e := <-g.changes:
result <- map[string]interface{}{
"data": e,
}
}
}
}()
return result, nil
}
func (g *Generic) Watch(ctx context.Context, key string, revision int64) <-chan Event {
ctx, parentCancel := context.WithCancel(ctx)
prefix := strings.HasSuffix(key, "%")
events, err := g.broadcaster.Subscribe(ctx, g.globalWatcher)
if err != nil {
panic(err)
}
watchChan := make(chan Event)
go func() (returnErr error) {
defer func() {
sendErrorAndClose(watchChan, returnErr)
parentCancel()
}()
start(watchChan)
if revision > 0 {
keys, err := g.replayEvents(ctx, key, revision)
if err != nil {
return err
}
for _, k := range keys {
watchChan <- Event{KV: k}
}
}
for e := range events {
k, ok := e["data"].(*KeyValue)
if ok && matchesKey(prefix, key, k) {
watchChan <- Event{KV: k}
}
}
return nil
}()
return watchChan
}
func start(watchResponses chan Event) {
watchResponses <- Event{
Start: true,
}
}
func sendErrorAndClose(watchResponses chan Event, err error) {
if err == nil {
err = io.EOF
}
watchResponses <- Event{Err: err}
close(watchResponses)
}
// Close closes the watcher and cancels all watch requests.
func (g *Generic) Close() error {
if g.cancel != nil {
g.cancel()
g.cancel = nil
}
return nil
}

269
vendor/github.com/ibuildthecloud/kvsql/clientv3/kv.go generated vendored Normal file
View File

@ -0,0 +1,269 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package clientv3
import (
"bytes"
"database/sql"
"fmt"
"strings"
"sync"
"github.com/ibuildthecloud/kvsql/clientv3/driver"
"github.com/ibuildthecloud/kvsql/clientv3/driver/sqlite"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/mvcc/mvccpb"
"github.com/docker/docker/pkg/locker"
"golang.org/x/net/context"
)
type (
CompactResponse pb.CompactionResponse
PutResponse pb.PutResponse
GetResponse pb.RangeResponse
DeleteResponse pb.DeleteRangeResponse
TxnResponse pb.TxnResponse
)
var (
connections map[string]*kv
connectionsCtx context.Context
CloseDB func()
connectionsLock sync.Mutex
)
type KV interface {
// Put puts a key-value pair into etcd.
// Note that key,value can be plain bytes array and string is
// an immutable representation of that bytes array.
// To get a string of bytes, do string([]byte{0x10, 0x20}).
Put(ctx context.Context, key, val string, opts ...OpOption) (*PutResponse, error)
// Get retrieves keys.
// By default, Get will return the value for "key", if any.
// When passed WithRange(end), Get will return the keys in the range [key, end).
// When passed WithFromKey(), Get returns keys greater than or equal to key.
// When passed WithRev(rev) with rev > 0, Get retrieves keys at the given revision;
// if the required revision is compacted, the request will fail with ErrCompacted .
// When passed WithLimit(limit), the number of returned keys is bounded by limit.
// When passed WithSort(), the keys will be sorted.
Get(ctx context.Context, key string, opts ...OpOption) (*GetResponse, error)
// Delete deletes a key, or optionally using WithRange(end), [key, end).
Delete(ctx context.Context, key string, opts ...OpOption) (*DeleteResponse, error)
// Compact compacts etcd KV history before the given rev.
Compact(ctx context.Context, rev int64, opts ...CompactOption) (*CompactResponse, error)
// Txn creates a transaction.
Txn(ctx context.Context) Txn
}
type kv struct {
l locker.Locker
d driver.Driver
}
func newKV(cfg Config) (*kv, error) {
connectionsLock.Lock()
defer connectionsLock.Unlock()
if len(cfg.Endpoints) != 1 {
return nil, fmt.Errorf("exactly one endpoint required for DB setting, got %v", cfg.Endpoints)
}
key := cfg.Endpoints[0]
if kv, ok := connections[key]; ok {
return kv, nil
}
if connections == nil {
connections = map[string]*kv{}
connectionsCtx, CloseDB = context.WithCancel(context.Background())
}
parts := strings.SplitN(key, "://", 2)
if len(parts) != 2 {
return nil, fmt.Errorf("invalid kvsql string")
}
var (
db *sql.DB
driver *driver.Generic
err error
)
switch parts[0] {
case "sqlite":
if db, err = sqlite.Open(parts[1]); err != nil {
return nil, err
}
driver = sqlite.NewSQLite()
}
if err := driver.Start(context.TODO(), db); err != nil {
db.Close()
return nil, err
}
kv := &kv{
d:driver,
}
connections[key] = kv
return kv, nil
}
func (k *kv) Put(ctx context.Context, key, val string, opts ...OpOption) (*PutResponse, error) {
//trace := utiltrace.New(fmt.Sprintf("SQL Put key: %s", key))
//defer trace.LogIfLong(500 * time.Millisecond)
k.l.Lock(key)
defer k.l.Unlock(key)
op := OpPut(key, val, opts...)
return k.opPut(ctx, op)
}
func (k *kv) opPut(ctx context.Context, op Op) (*PutResponse, error) {
oldR, r, err := k.d.Update(ctx, op.key, op.val, op.rev, int64(op.leaseID))
if err != nil {
return nil, err
}
return getPutResponse(oldR, r), nil
}
func (k *kv) Get(ctx context.Context, key string, opts ...OpOption) (*GetResponse, error) {
//trace := utiltrace.New(fmt.Sprintf("SQL Get key: %s", key))
//defer trace.LogIfLong(500 * time.Millisecond)
op := OpGet(key, opts...)
return k.opGet(ctx, op)
}
func (k *kv) opGet(ctx context.Context, op Op) (*GetResponse, error) {
var (
rangeKey string
startKey string
)
if op.boundingKey == "" {
rangeKey = op.key
startKey = ""
} else {
rangeKey = op.boundingKey
startKey = string(bytes.SplitN([]byte(op.key), []byte{'\x00'}, -1)[0])
}
kvs, rev, err := k.d.List(ctx, op.rev, op.limit, rangeKey, startKey)
if err != nil {
return nil, err
}
return getResponse(kvs, rev, op.limit, op.countOnly), nil
}
func getPutResponse(oldValue *driver.KeyValue, value *driver.KeyValue) *PutResponse {
return &PutResponse{
Header: &pb.ResponseHeader{
Revision: value.Revision,
},
PrevKv: toKeyValue(oldValue),
}
}
func toKeyValue(v *driver.KeyValue) *mvccpb.KeyValue {
if v == nil {
return nil
}
return &mvccpb.KeyValue{
Key: []byte(v.Key),
CreateRevision: v.CreateRevision,
ModRevision: v.Revision,
Version: v.Version,
Value: v.Value,
Lease: v.TTL,
}
}
func getDeleteResponse(values []*driver.KeyValue) *DeleteResponse {
gr := getResponse(values, 0, 0, false)
return &DeleteResponse{
Header: &pb.ResponseHeader{
Revision: gr.Header.Revision,
},
PrevKvs: gr.Kvs,
}
}
func getResponse(values []*driver.KeyValue, revision, limit int64, count bool) *GetResponse {
gr := &GetResponse{
Header: &pb.ResponseHeader{
Revision: revision,
},
}
for _, v := range values {
kv := toKeyValue(v)
if kv.ModRevision > gr.Header.Revision {
gr.Header.Revision = kv.ModRevision
}
gr.Kvs = append(gr.Kvs, kv)
}
gr.Count = int64(len(gr.Kvs))
if limit > 0 && gr.Count > limit {
gr.Kvs = gr.Kvs[:limit]
gr.More = true
}
if count {
gr.Kvs = nil
}
return gr
}
func (k *kv) Delete(ctx context.Context, key string, opts ...OpOption) (*DeleteResponse, error) {
//trace := utiltrace.New(fmt.Sprintf("SQL Delete key: %s", key))
//defer trace.LogIfLong(500 * time.Millisecond)
k.l.Lock(key)
defer k.l.Unlock(key)
op := OpDelete(key, opts...)
return k.opDelete(ctx, op)
}
func (k *kv) opDelete(ctx context.Context, op Op) (*DeleteResponse, error) {
r, err := k.d.Delete(ctx, op.key, op.rev)
if err != nil {
return nil, err
}
return getDeleteResponse(r), nil
}
func (k *kv) Compact(ctx context.Context, rev int64, opts ...CompactOption) (*CompactResponse, error) {
return &CompactResponse{
Header: &pb.ResponseHeader{},
}, nil
}
func (k *kv) Txn(ctx context.Context) Txn {
return &txn{
kv: k,
ctx: ctx,
}
}

View File

@ -0,0 +1,50 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package clientv3
import (
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"golang.org/x/net/context"
)
type (
LeaseRevokeResponse pb.LeaseRevokeResponse
LeaseID int64
)
// LeaseGrantResponse wraps the protobuf message LeaseGrantResponse.
type LeaseGrantResponse struct {
*pb.ResponseHeader
ID LeaseID
TTL int64
Error string
}
type Lease interface {
// Grant creates a new lease.
Grant(ctx context.Context, ttl int64) (*LeaseGrantResponse, error)
}
type lessor struct {
}
func (l *lessor) Grant(ctx context.Context, ttl int64) (*LeaseGrantResponse, error) {
return &LeaseGrantResponse{
ResponseHeader: &pb.ResponseHeader{},
TTL: ttl,
ID: LeaseID(ttl),
}, nil
}

200
vendor/github.com/ibuildthecloud/kvsql/clientv3/op.go generated vendored Normal file
View File

@ -0,0 +1,200 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package clientv3
import pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
type opType int
const (
// A default Op has opType 0, which is invalid.
tRange opType = iota + 1
tPut
tDeleteRange
tTxn
)
var (
noPrefixEnd = []byte{0}
)
// Op represents an Operation that kv can execute.
type Op struct {
t opType
key string
boundingKey string
// for range
limit int64
countOnly bool
// for range, watch
rev int64
// for watch, put, delete
prevKV bool
// for put
val []byte
leaseID LeaseID
// txn
cmps []Cmp
thenOps []Op
elseOps []Op
}
// accessors / mutators
func (op Op) IsTxn() bool { return op.t == tTxn }
func (op Op) Txn() ([]Cmp, []Op, []Op) { return op.cmps, op.thenOps, op.elseOps }
// Rev returns the requested revision, if any.
func (op Op) Rev() int64 { return op.rev }
// IsPut returns true iff the operation is a Put.
func (op Op) IsPut() bool { return op.t == tPut }
// IsGet returns true iff the operation is a Get.
func (op Op) IsGet() bool { return op.t == tRange }
// IsDelete returns true iff the operation is a Delete.
func (op Op) IsDelete() bool { return op.t == tDeleteRange }
// IsCountOnly returns whether countOnly is set.
func (op Op) IsCountOnly() bool { return op.countOnly == true }
// ValueBytes returns the byte slice holding the Op's value, if any.
func (op Op) ValueBytes() []byte { return op.val }
// WithValueBytes sets the byte slice for the Op's value.
func (op *Op) WithValueBytes(v []byte) { op.val = v }
func (op Op) toRangeRequest() *pb.RangeRequest {
if op.t != tRange {
panic("op.t != tRange")
}
r := &pb.RangeRequest{
Key: []byte(op.key),
RangeEnd: []byte(op.boundingKey),
Limit: op.limit,
Revision: op.rev,
CountOnly: op.countOnly,
}
return r
}
func OpGet(key string, opts ...OpOption) Op {
ret := Op{t: tRange, key: key}
ret.applyOpts(opts)
return ret
}
func OpDelete(key string, opts ...OpOption) Op {
ret := Op{t: tDeleteRange, key: key}
ret.applyOpts(opts)
switch {
case ret.leaseID != 0:
panic("unexpected lease in delete")
case ret.limit != 0:
panic("unexpected limit in delete")
case ret.rev != 0:
panic("unexpected revision in delete")
case ret.countOnly:
panic("unexpected countOnly in delete")
}
return ret
}
func OpPut(key, val string, opts ...OpOption) Op {
ret := Op{t: tPut, key: key, val: []byte(val)}
ret.applyOpts(opts)
switch {
case len(ret.key) > 0 && ret.key[len(ret.key)-1] == '%':
panic("unexpected range in put")
case ret.limit != 0:
panic("unexpected limit in put")
case ret.rev != 0:
panic("unexpected revision in put")
case ret.countOnly:
panic("unexpected countOnly in put")
}
return ret
}
func (op *Op) applyOpts(opts []OpOption) {
for _, opt := range opts {
opt(op)
}
}
// OpOption configures Operations like Get, Put, Delete.
type OpOption func(*Op)
// WithLease attaches a lease ID to a key in 'Put' request.
func WithLease(leaseID LeaseID) OpOption {
return func(op *Op) { op.leaseID = leaseID }
}
// WithLimit limits the number of results to return from 'Get' request.
// If WithLimit is given a 0 limit, it is treated as no limit.
func WithLimit(n int64) OpOption { return func(op *Op) { op.limit = n } }
// WithRev specifies the store revision for 'Get' request.
// Or the start revision of 'Watch' request.
func WithRev(rev int64) OpOption { return func(op *Op) { op.rev = rev } }
// GetPrefixRangeEnd gets the range end of the prefix.
// 'Get(foo, WithPrefix())' is equal to 'Get(foo, WithRange(GetPrefixRangeEnd(foo))'.
func GetPrefixRangeEnd(prefix string) string {
return prefix + "%"
}
// WithPrefix enables 'Get', 'Delete', or 'Watch' requests to operate
// on the keys with matching prefix. For example, 'Get(foo, WithPrefix())'
// can return 'foo1', 'foo2', and so on.
func WithPrefix() OpOption {
return func(op *Op) {
op.key += "%"
}
}
// WithRange specifies the range of 'Get', 'Delete', 'Watch' requests.
// For example, 'Get' requests with 'WithRange(end)' returns
// the keys in the range [key, end).
// endKey must be lexicographically greater than start key.
func WithRange(endKey string) OpOption {
return func(op *Op) { op.boundingKey = endKey }
}
// WithSerializable makes 'Get' request serializable. By default,
// it's linearizable. Serializable requests are better for lower latency
// requirement.
func WithSerializable() OpOption {
return func(op *Op) {}
}
// WithCountOnly makes the 'Get' request return only the count of keys.
func WithCountOnly() OpOption {
return func(op *Op) { op.countOnly = true }
}
// WithPrevKV gets the previous key-value pair before the event happens. If the previous KV is already compacted,
// nothing will be returned.
func WithPrevKV() OpOption {
return func(op *Op) {
op.prevKV = true
}
}

224
vendor/github.com/ibuildthecloud/kvsql/clientv3/txn.go generated vendored Normal file
View File

@ -0,0 +1,224 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package clientv3
import (
"fmt"
"sync"
"time"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"golang.org/x/net/context"
utiltrace "k8s.io/utils/trace"
)
type Txn interface {
// If takes a list of comparison. If all comparisons passed in succeed,
// the operations passed into Then() will be executed. Or the operations
// passed into Else() will be executed.
If(cs ...Cmp) Txn
// Then takes a list of operations. The Ops list will be executed, if the
// comparisons passed in If() succeed.
Then(ops ...Op) Txn
// Else takes a list of operations. The Ops list will be executed, if the
// comparisons passed in If() fail.
Else(ops ...Op) Txn
// Commit tries to commit the transaction.
Commit() (*TxnResponse, error)
}
type txn struct {
kv *kv
ctx context.Context
mu sync.Mutex
cif bool
cthen bool
celse bool
cmps []*pb.Compare
sus []Op
fas []Op
}
func (txn *txn) If(cs ...Cmp) Txn {
txn.mu.Lock()
defer txn.mu.Unlock()
if txn.cif {
panic("cannot call If twice!")
}
if txn.cthen {
panic("cannot call If after Then!")
}
if txn.celse {
panic("cannot call If after Else!")
}
txn.cif = true
for i := range cs {
txn.cmps = append(txn.cmps, (*pb.Compare)(&cs[i]))
}
return txn
}
func (txn *txn) Then(ops ...Op) Txn {
txn.mu.Lock()
defer txn.mu.Unlock()
if txn.cthen {
panic("cannot call Then twice!")
}
if txn.celse {
panic("cannot call Then after Else!")
}
txn.cthen = true
for _, op := range ops {
txn.sus = append(txn.sus, op)
}
return txn
}
func (txn *txn) Else(ops ...Op) Txn {
txn.mu.Lock()
defer txn.mu.Unlock()
if txn.celse {
panic("cannot call Else twice!")
}
txn.celse = true
for _, op := range ops {
txn.fas = append(txn.fas, op)
}
return txn
}
func (txn *txn) do(op Op) (*pb.ResponseOp, error) {
switch op.t {
case tRange:
r, err := txn.kv.opGet(txn.ctx, op)
if err != nil {
return nil, err
}
return &pb.ResponseOp{
Response: &pb.ResponseOp_ResponseRange{
ResponseRange: (*pb.RangeResponse)(r),
},
}, nil
case tPut:
r, err := txn.kv.opPut(txn.ctx, op)
if err != nil {
return nil, err
}
return &pb.ResponseOp{
Response: &pb.ResponseOp_ResponsePut{
ResponsePut: (*pb.PutResponse)(r),
},
}, nil
case tDeleteRange:
r, err := txn.kv.opDelete(txn.ctx, op)
if err != nil {
return nil, err
}
return &pb.ResponseOp{
Response: &pb.ResponseOp_ResponseDeleteRange{
ResponseDeleteRange: (*pb.DeleteRangeResponse)(r),
},
}, nil
default:
return nil, fmt.Errorf("unknown op in txn: %#v", op)
}
}
func (txn *txn) Commit() (*TxnResponse, error) {
trace := utiltrace.New("SQL Commit")
defer trace.LogIfLong(500 * time.Millisecond)
locks := map[string]bool{}
resp := &TxnResponse{
Header: &pb.ResponseHeader{},
}
good := true
for _, c := range txn.cmps {
k := string(c.Key)
if !locks[k] {
txn.kv.l.Lock(k)
trace.Step(fmt.Sprintf("lock acquired: %s", k))
locks[k] = true
defer txn.kv.l.Unlock(k)
}
gr, err := txn.kv.Get(txn.ctx, k)
if err != nil {
return nil, err
}
switch c.Target {
case pb.Compare_VERSION:
ver := int64(0)
if len(gr.Kvs) > 0 {
ver = gr.Kvs[0].Version
}
cv, _ := c.TargetUnion.(*pb.Compare_Version)
if ver != cv.Version {
good = false
}
case pb.Compare_MOD:
mod := int64(0)
if len(gr.Kvs) > 0 {
mod = gr.Kvs[0].ModRevision
}
cv, _ := c.TargetUnion.(*pb.Compare_ModRevision)
if mod != cv.ModRevision {
good = false
}
default:
return nil, fmt.Errorf("unknown txn target %v", c.Target)
}
trace.Step(fmt.Sprintf("condition key %s good %v", k, good))
}
resp.Succeeded = good
ops := txn.sus
if !good {
ops = txn.fas
}
for _, op := range ops {
r, err := txn.do(op)
if err != nil {
return nil, err
}
resp.Responses = append(resp.Responses, r)
trace.Step(fmt.Sprintf("op key %s op %v", op.key, op.t))
}
return resp, nil
}

View File

@ -0,0 +1,148 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package clientv3
import (
v3rpc "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/mvcc/mvccpb"
"golang.org/x/net/context"
)
const (
EventTypeDelete = mvccpb.DELETE
EventTypePut = mvccpb.PUT
)
type Event mvccpb.Event
type WatchChan <-chan WatchResponse
type Watcher interface {
// Watch watches on a key or prefix. The watched events will be returned
// through the returned channel. If revisions waiting to be sent over the
// watch are compacted, then the watch will be canceled by the server, the
// client will post a compacted error watch response, and the channel will close.
Watch(ctx context.Context, key string, opts ...OpOption) WatchChan
// Close closes the watcher and cancels all watch requests.
Close() error
}
func (k *kv) Watch(ctx context.Context, key string, opts ...OpOption) WatchChan {
op := OpGet(key, opts...)
c := k.d.Watch(ctx, op.key, op.rev)
result := make(chan WatchResponse)
go func() {
defer close(result)
for e := range c {
if e.Err != nil {
result <- NewWatchResponseErr(e.Err)
continue
} else if e.Start {
result <- WatchResponse{
Created: true,
}
continue
}
k := e.KV
event := &Event{}
if k.Del == 0 {
event.Type = mvccpb.PUT
} else {
event.Type = mvccpb.DELETE
}
event.Kv = toKeyValue(k)
if event.Kv.Version > 1 && k.OldRevision > 0 {
oldKV := *event.Kv
oldKV.ModRevision = k.OldRevision
oldKV.Value = k.OldValue
event.PrevKv = &oldKV
}
wr := WatchResponse{
Header: pb.ResponseHeader{
Revision: event.Kv.ModRevision,
},
Events: []*Event{
event,
},
}
result <- wr
}
}()
return result
}
func (k *kv) Close() error {
return k.d.Close()
}
type WatchResponse struct {
Header pb.ResponseHeader
Events []*Event
// CompactRevision is the minimum revision the watcher may receive.
CompactRevision int64
// Canceled is used to indicate watch failure.
// If the watch failed and the stream was about to close, before the channel is closed,
// the channel sends a final response that has Canceled set to true with a non-nil Err().
Canceled bool
// Created is used to indicate the creation of the watcher.
Created bool
closeErr error
}
func NewWatchResponseErr(err error) WatchResponse {
return WatchResponse{
Canceled: true,
closeErr: err,
}
}
// IsCreate returns true if the event tells that the key is newly created.
func (e *Event) IsCreate() bool {
return e.Type == EventTypePut && e.Kv.CreateRevision == e.Kv.ModRevision
}
// IsModify returns true if the event tells that a new value is put on existing key.
func (e *Event) IsModify() bool {
return e.Type == EventTypePut && e.Kv.CreateRevision != e.Kv.ModRevision
}
// Err is the error value if this WatchResponse holds an error.
func (wr *WatchResponse) Err() error {
switch {
case wr.closeErr != nil:
return v3rpc.Error(wr.closeErr)
case wr.CompactRevision != 0:
return v3rpc.ErrCompacted
}
return nil
}
// IsProgressNotify returns true if the WatchResponse is progress notification.
func (wr *WatchResponse) IsProgressNotify() bool {
return len(wr.Events) == 0 && !wr.Canceled && !wr.Created && wr.CompactRevision == 0 && wr.Header.Revision != 0
}

97
vendor/github.com/ibuildthecloud/kvsql/factory.go generated vendored Normal file
View File

@ -0,0 +1,97 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package factory
import (
"context"
"fmt"
"sync/atomic"
"time"
"github.com/ibuildthecloud/kvsql/clientv3"
etcd3 "github.com/ibuildthecloud/kvsql/storage"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apiserver/pkg/storage"
"k8s.io/apiserver/pkg/storage/storagebackend"
"k8s.io/apiserver/pkg/storage/value"
)
func NewKVSQLHealthCheck(c storagebackend.Config) (func() error, error) {
// constructing the etcd v3 client blocks and times out if etcd is not available.
// retry in a loop in the background until we successfully create the client, storing the client or error encountered
clientValue := &atomic.Value{}
clientErrMsg := &atomic.Value{}
clientErrMsg.Store("etcd client connection not yet established")
go wait.PollUntil(time.Second, func() (bool, error) {
client, err := newETCD3Client(c)
if err != nil {
clientErrMsg.Store(err.Error())
return false, nil
}
clientValue.Store(client)
clientErrMsg.Store("")
return true, nil
}, wait.NeverStop)
return func() error {
if errMsg := clientErrMsg.Load().(string); len(errMsg) > 0 {
return fmt.Errorf(errMsg)
}
client := clientValue.Load().(*clientv3.Client)
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
if _, err := client.Cluster.MemberList(ctx); err != nil {
return fmt.Errorf("error listing etcd members: %v", err)
}
return nil
}, nil
}
func newETCD3Client(c storagebackend.Config) (*clientv3.Client, error) {
cfg := clientv3.Config{
Endpoints: c.Transport.ServerList,
}
if len(cfg.Endpoints) == 0 {
cfg.Endpoints = []string{"sqlite://"}
}
client, err := clientv3.New(cfg)
return client, err
}
func NewKVSQLStorage(c storagebackend.Config) (storage.Interface, func(), error) {
client, err := newETCD3Client(c)
if err != nil {
return nil, nil, err
}
ctx, cancel := context.WithCancel(context.Background())
etcd3.StartCompactor(ctx, client, c.CompactionInterval)
destroyFunc := func() {
cancel()
client.Close()
}
transformer := c.Transformer
if transformer == nil {
transformer = value.IdentityTransformer
}
return etcd3.New(client, c.Codec, c.Prefix, transformer, c.Paging), destroyFunc, nil
}

View File

@ -0,0 +1,126 @@
package broadcast
import (
"context"
"sync"
)
type ConnectFunc func() (chan map[string]interface{}, error)
type Broadcaster struct {
sync.Mutex
running bool
subs map[chan map[string]interface{}]struct{}
}
func (b *Broadcaster) Subscribe(ctx context.Context, connect ConnectFunc) (chan map[string]interface{}, error) {
b.Lock()
defer b.Unlock()
if !b.running {
if err := b.start(connect); err != nil {
return nil, err
}
}
sub := make(chan map[string]interface{}, 100)
if b.subs == nil {
b.subs = map[chan map[string]interface{}]struct{}{}
}
b.subs[sub] = struct{}{}
go func() {
<-ctx.Done()
b.unsub(sub, true)
}()
return sub, nil
}
func (b *Broadcaster) unsub(sub chan map[string]interface{}, lock bool) {
if lock {
b.Lock()
}
if _, ok := b.subs[sub]; ok {
close(sub)
delete(b.subs, sub)
}
if lock {
b.Unlock()
}
}
func (b *Broadcaster) start(connect ConnectFunc) error {
c, err := connect()
if err != nil {
return err
}
go b.stream(c)
b.running = true
return nil
}
func (b *Broadcaster) stream(input chan map[string]interface{}) {
for item := range input {
b.Lock()
for sub := range b.subs {
newItem := cloneMap(item)
select {
case sub <- newItem:
default:
// Slow consumer, drop
go b.unsub(sub, true)
}
}
b.Unlock()
}
b.Lock()
for sub := range b.subs {
b.unsub(sub, false)
}
b.running = false
b.Unlock()
}
func cloneMap(data map[string]interface{}) map[string]interface{} {
if data == nil {
return nil
}
result := map[string]interface{}{}
for k, v := range data {
result[k] = cloneValue(v)
}
return result
}
func cloneValue(v interface{}) interface{} {
switch t := v.(type) {
case []interface{}:
return cloneSlice(t)
case []map[string]interface{}:
return cloneMapSlice(t)
case map[string]interface{}:
return cloneMap(t)
default:
return v
}
}
func cloneMapSlice(data []map[string]interface{}) []interface{} {
result := make([]interface{}, len(data))
for i := range data {
result[i] = cloneValue(data[i])
}
return result
}
func cloneSlice(data []interface{}) []interface{} {
result := make([]interface{}, len(data))
for i := range data {
result[i] = cloneValue(data[i])
}
return result
}

View File

@ -0,0 +1,162 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package etcd3
import (
"context"
"strconv"
"sync"
"time"
"k8s.io/klog"
"github.com/ibuildthecloud/kvsql/clientv3"
)
const (
compactRevKey = "compact_rev_key"
)
var (
endpointsMapMu sync.Mutex
endpointsMap map[string]struct{}
)
func init() {
endpointsMap = make(map[string]struct{})
}
// StartCompactor starts a compactor in the background to compact old version of keys that's not needed.
// By default, we save the most recent 10 minutes data and compact versions > 10minutes ago.
// It should be enough for slow watchers and to tolerate burst.
// TODO: We might keep a longer history (12h) in the future once storage API can take advantage of past version of keys.
func StartCompactor(ctx context.Context, client *clientv3.Client, compactInterval time.Duration) {
endpointsMapMu.Lock()
defer endpointsMapMu.Unlock()
// In one process, we can have only one compactor for one cluster.
// Currently we rely on endpoints to differentiate clusters.
for _, ep := range client.Endpoints() {
if _, ok := endpointsMap[ep]; ok {
klog.V(4).Infof("compactor already exists for endpoints %v", client.Endpoints())
return
}
}
for _, ep := range client.Endpoints() {
endpointsMap[ep] = struct{}{}
}
if compactInterval != 0 {
go compactor(ctx, client, compactInterval)
}
}
// compactor periodically compacts historical versions of keys in etcd.
// It will compact keys with versions older than given interval.
// In other words, after compaction, it will only contain keys set during last interval.
// Any API call for the older versions of keys will return error.
// Interval is the time interval between each compaction. The first compaction happens after "interval".
func compactor(ctx context.Context, client *clientv3.Client, interval time.Duration) {
// Technical definitions:
// We have a special key in etcd defined as *compactRevKey*.
// compactRevKey's value will be set to the string of last compacted revision.
// compactRevKey's version will be used as logical time for comparison. THe version is referred as compact time.
// Initially, because the key doesn't exist, the compact time (version) is 0.
//
// Algorithm:
// - Compare to see if (local compact_time) = (remote compact_time).
// - If yes, increment both local and remote compact_time, and do a compaction.
// - If not, set local to remote compact_time.
//
// Technical details/insights:
//
// The protocol here is lease based. If one compactor CAS successfully, the others would know it when they fail in
// CAS later and would try again in 10 minutes. If an APIServer crashed, another one would "take over" the lease.
//
// For example, in the following diagram, we have a compactor C1 doing compaction in t1, t2. Another compactor C2
// at t1' (t1 < t1' < t2) would CAS fail, set its known oldRev to rev at t1', and try again in t2' (t2' > t2).
// If C1 crashed and wouldn't compact at t2, C2 would CAS successfully at t2'.
//
// oldRev(t2) curRev(t2)
// +
// oldRev curRev |
// + + |
// | | |
// | | t1' | t2'
// +---v-------------v----^---------v------^---->
// t0 t1 t2
//
// We have the guarantees:
// - in normal cases, the interval is 10 minutes.
// - in failover, the interval is >10m and <20m
//
// FAQ:
// - What if time is not accurate? We don't care as long as someone did the compaction. Atomicity is ensured using
// etcd API.
// - What happened under heavy load scenarios? Initially, each apiserver will do only one compaction
// every 10 minutes. This is very unlikely affecting or affected w.r.t. server load.
var compactTime int64
var rev int64
var err error
for {
select {
case <-time.After(interval):
case <-ctx.Done():
return
}
compactTime, rev, err = compact(ctx, client, compactTime, rev)
if err != nil {
klog.Errorf("etcd: endpoint (%v) compact failed: %v", client.Endpoints(), err)
continue
}
}
}
// compact compacts etcd store and returns current rev.
// It will return the current compact time and global revision if no error occurred.
// Note that CAS fail will not incur any error.
func compact(ctx context.Context, client *clientv3.Client, t, rev int64) (int64, int64, error) {
resp, err := client.KV.Txn(ctx).If(
clientv3.Compare(clientv3.Version(compactRevKey), "=", t),
).Then(
clientv3.OpPut(compactRevKey, strconv.FormatInt(rev, 10)), // Expect side effect: increment Version
).Else(
clientv3.OpGet(compactRevKey),
).Commit()
if err != nil {
return t, rev, err
}
curRev := resp.Header.Revision
if !resp.Succeeded {
curTime := resp.Responses[0].GetResponseRange().Kvs[0].Version
return curTime, curRev, nil
}
curTime := t + 1
if rev == 0 {
// We don't compact on bootstrap.
return curTime, curRev, nil
}
if _, err = client.Compact(ctx, rev); err != nil {
return curTime, curRev, err
}
klog.V(4).Infof("etcd: compacted rev (%d), endpoints (%v)", rev, client.Endpoints())
return curTime, curRev, nil
}

View File

@ -0,0 +1,71 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package etcd3
import (
"k8s.io/apimachinery/pkg/api/errors"
etcdrpc "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
)
func interpretWatchError(err error) error {
switch {
case err == etcdrpc.ErrCompacted:
return errors.NewResourceExpired("The resourceVersion for the provided watch is too old.")
}
return err
}
const (
expired string = "The resourceVersion for the provided list is too old."
continueExpired string = "The provided continue parameter is too old " +
"to display a consistent list result. You can start a new list without " +
"the continue parameter."
inconsistentContinue string = "The provided continue parameter is too old " +
"to display a consistent list result. You can start a new list without " +
"the continue parameter, or use the continue token in this response to " +
"retrieve the remainder of the results. Continuing with the provided " +
"token results in an inconsistent list - objects that were created, " +
"modified, or deleted between the time the first chunk was returned " +
"and now may show up in the list."
)
func interpretListError(err error, paging bool, continueKey, keyPrefix string) error {
switch {
case err == etcdrpc.ErrCompacted:
if paging {
return handleCompactedErrorForPaging(continueKey, keyPrefix)
}
return errors.NewResourceExpired(expired)
}
return err
}
func handleCompactedErrorForPaging(continueKey, keyPrefix string) error {
// continueToken.ResoureVersion=-1 means that the apiserver can
// continue the list at the latest resource version. We don't use rv=0
// for this purpose to distinguish from a bad token that has empty rv.
newToken, err := encodeContinue(continueKey, keyPrefix, -1)
if err != nil {
utilruntime.HandleError(err)
return errors.NewResourceExpired(continueExpired)
}
statusError := errors.NewResourceExpired(inconsistentContinue)
statusError.ErrStatus.ListMeta.Continue = newToken
return statusError
}

View File

@ -0,0 +1,57 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package etcd3
import (
"github.com/ibuildthecloud/kvsql/clientv3"
"github.com/coreos/etcd/mvcc/mvccpb"
)
type event struct {
key string
value []byte
prevValue []byte
rev int64
isDeleted bool
isCreated bool
}
// parseKV converts a KeyValue retrieved from an initial sync() listing to a synthetic isCreated event.
func parseKV(kv *mvccpb.KeyValue) *event {
return &event{
key: string(kv.Key),
value: kv.Value,
prevValue: nil,
rev: kv.ModRevision,
isDeleted: false,
isCreated: true,
}
}
func parseEvent(e *clientv3.Event) *event {
ret := &event{
key: string(e.Kv.Key),
value: e.Kv.Value,
rev: e.Kv.ModRevision,
isDeleted: e.Type == clientv3.EventTypeDelete,
isCreated: e.IsCreate(),
}
if e.PrevKv != nil {
ret.prevValue = e.PrevKv.Value
}
return ret
}

View File

@ -0,0 +1,102 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package etcd3
import (
"context"
"sync"
"time"
"github.com/ibuildthecloud/kvsql/clientv3"
)
// leaseManager is used to manage leases requested from etcd. If a new write
// needs a lease that has similar expiration time to the previous one, the old
// lease will be reused to reduce the overhead of etcd, since lease operations
// are expensive. In the implementation, we only store one previous lease,
// since all the events have the same ttl.
type leaseManager struct {
client *clientv3.Client // etcd client used to grant leases
leaseMu sync.Mutex
prevLeaseID clientv3.LeaseID
prevLeaseExpirationTime time.Time
// The period of time in seconds and percent of TTL that each lease is
// reused. The minimum of them is used to avoid unreasonably large
// numbers. We use var instead of const for testing purposes.
leaseReuseDurationSeconds int64
leaseReuseDurationPercent float64
}
// newDefaultLeaseManager creates a new lease manager using default setting.
func newDefaultLeaseManager(client *clientv3.Client) *leaseManager {
return newLeaseManager(client, 60, 0.05)
}
// newLeaseManager creates a new lease manager with the number of buffered
// leases, lease reuse duration in seconds and percentage. The percentage
// value x means x*100%.
func newLeaseManager(client *clientv3.Client, leaseReuseDurationSeconds int64, leaseReuseDurationPercent float64) *leaseManager {
return &leaseManager{
client: client,
leaseReuseDurationSeconds: leaseReuseDurationSeconds,
leaseReuseDurationPercent: leaseReuseDurationPercent,
}
}
// setLeaseReuseDurationSeconds is used for testing purpose. It is used to
// reduce the extra lease duration to avoid unnecessary timeout in testing.
func (l *leaseManager) setLeaseReuseDurationSeconds(duration int64) {
l.leaseMu.Lock()
defer l.leaseMu.Unlock()
l.leaseReuseDurationSeconds = duration
}
// GetLease returns a lease based on requested ttl: if the cached previous
// lease can be reused, reuse it; otherwise request a new one from etcd.
func (l *leaseManager) GetLease(ctx context.Context, ttl int64) (clientv3.LeaseID, error) {
now := time.Now()
l.leaseMu.Lock()
defer l.leaseMu.Unlock()
// check if previous lease can be reused
reuseDurationSeconds := l.getReuseDurationSecondsLocked(ttl)
valid := now.Add(time.Duration(ttl) * time.Second).Before(l.prevLeaseExpirationTime)
sufficient := now.Add(time.Duration(ttl+reuseDurationSeconds) * time.Second).After(l.prevLeaseExpirationTime)
if valid && sufficient {
return l.prevLeaseID, nil
}
// request a lease with a little extra ttl from etcd
ttl += reuseDurationSeconds
lcr, err := l.client.Lease.Grant(ctx, ttl)
if err != nil {
return clientv3.LeaseID(0), err
}
// cache the new lease id
l.prevLeaseID = lcr.ID
l.prevLeaseExpirationTime = now.Add(time.Duration(ttl) * time.Second)
return lcr.ID, nil
}
// getReuseDurationSecondsLocked returns the reusable duration in seconds
// based on the configuration. Lock has to be acquired before calling this
// function.
func (l *leaseManager) getReuseDurationSecondsLocked(ttl int64) int64 {
reuseDurationSeconds := int64(l.leaseReuseDurationPercent * float64(ttl))
if reuseDurationSeconds > l.leaseReuseDurationSeconds {
reuseDurationSeconds = l.leaseReuseDurationSeconds
}
return reuseDurationSeconds
}

795
vendor/github.com/ibuildthecloud/kvsql/storage/store.go generated vendored Normal file
View File

@ -0,0 +1,795 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package etcd3
import (
"bytes"
"context"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"path"
"reflect"
"strings"
"time"
"k8s.io/klog"
"github.com/ibuildthecloud/kvsql/clientv3"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/conversion"
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/apiserver/pkg/storage"
"k8s.io/apiserver/pkg/storage/etcd"
"k8s.io/apiserver/pkg/storage/value"
utiltrace "k8s.io/utils/trace"
)
// authenticatedDataString satisfies the value.Context interface. It uses the key to
// authenticate the stored data. This does not defend against reuse of previously
// encrypted values under the same key, but will prevent an attacker from using an
// encrypted value from a different key. A stronger authenticated data segment would
// include the etcd3 Version field (which is incremented on each write to a key and
// reset when the key is deleted), but an attacker with write access to etcd can
// force deletion and recreation of keys to weaken that angle.
type authenticatedDataString string
// AuthenticatedData implements the value.Context interface.
func (d authenticatedDataString) AuthenticatedData() []byte {
return []byte(string(d))
}
var _ value.Context = authenticatedDataString("")
type store struct {
client *clientv3.Client
// getOpts contains additional options that should be passed
// to all Get() calls.
getOps []clientv3.OpOption
codec runtime.Codec
versioner storage.Versioner
transformer value.Transformer
pathPrefix string
watcher *watcher
pagingEnabled bool
leaseManager *leaseManager
}
type objState struct {
obj runtime.Object
meta *storage.ResponseMeta
rev int64
data []byte
stale bool
}
// New returns an etcd3 implementation of storage.Interface.
func New(c *clientv3.Client, codec runtime.Codec, prefix string, transformer value.Transformer, pagingEnabled bool) storage.Interface {
return newStore(c, true, pagingEnabled, codec, prefix, transformer)
}
func newStore(c *clientv3.Client, quorumRead, pagingEnabled bool, codec runtime.Codec, prefix string, transformer value.Transformer) *store {
versioner := etcd.APIObjectVersioner{}
result := &store{
client: c,
codec: codec,
versioner: versioner,
transformer: transformer,
pagingEnabled: pagingEnabled,
// for compatibility with etcd2 impl.
// no-op for default prefix of '/registry'.
// keeps compatibility with etcd2 impl for custom prefixes that don't start with '/'
pathPrefix: path.Join("/", prefix),
watcher: newWatcher(c, codec, versioner, transformer),
leaseManager: newDefaultLeaseManager(c),
}
if !quorumRead {
// In case of non-quorum reads, we can set WithSerializable()
// options for all Get operations.
result.getOps = append(result.getOps, clientv3.WithSerializable())
}
return result
}
// Versioner implements storage.Interface.Versioner.
func (s *store) Versioner() storage.Versioner {
return s.versioner
}
// Get implements storage.Interface.Get.
func (s *store) Get(ctx context.Context, key string, resourceVersion string, out runtime.Object, ignoreNotFound bool) error {
key = path.Join(s.pathPrefix, key)
getResp, err := s.client.KV.Get(ctx, key, s.getOps...)
if err != nil {
return err
}
if len(getResp.Kvs) == 0 {
if ignoreNotFound {
return runtime.SetZeroValue(out)
}
return storage.NewKeyNotFoundError(key, 0)
}
kv := getResp.Kvs[0]
data, _, err := s.transformer.TransformFromStorage(kv.Value, authenticatedDataString(key))
if err != nil {
return storage.NewInternalError(err.Error())
}
return decode(s.codec, s.versioner, data, out, kv.ModRevision)
}
// Create implements storage.Interface.Create.
func (s *store) Create(ctx context.Context, key string, obj, out runtime.Object, ttl uint64) error {
if version, err := s.versioner.ObjectResourceVersion(obj); err == nil && version != 0 {
return errors.New("resourceVersion should not be set on objects to be created")
}
if err := s.versioner.PrepareObjectForStorage(obj); err != nil {
return fmt.Errorf("PrepareObjectForStorage failed: %v", err)
}
data, err := runtime.Encode(s.codec, obj)
if err != nil {
return err
}
key = path.Join(s.pathPrefix, key)
opts, err := s.ttlOpts(ctx, int64(ttl))
if err != nil {
return err
}
newData, err := s.transformer.TransformToStorage(data, authenticatedDataString(key))
if err != nil {
return storage.NewInternalError(err.Error())
}
txnResp, err := s.client.KV.Txn(ctx).If(
notFound(key),
).Then(
clientv3.OpPut(key, string(newData), opts...),
).Commit()
if err != nil {
return err
}
if !txnResp.Succeeded {
return storage.NewKeyExistsError(key, 0)
}
if out != nil {
putResp := txnResp.Responses[0].GetResponsePut()
return decode(s.codec, s.versioner, data, out, putResp.Header.Revision)
}
return nil
}
// Delete implements storage.Interface.Delete.
func (s *store) Delete(ctx context.Context, key string, out runtime.Object, preconditions *storage.Preconditions) error {
v, err := conversion.EnforcePtr(out)
if err != nil {
panic("unable to convert output object to pointer")
}
key = path.Join(s.pathPrefix, key)
if preconditions == nil {
return s.unconditionalDelete(ctx, key, out)
}
return s.conditionalDelete(ctx, key, out, v, preconditions)
}
func (s *store) unconditionalDelete(ctx context.Context, key string, out runtime.Object) error {
// We need to do get and delete in single transaction in order to
// know the value and revision before deleting it.
txnResp, err := s.client.KV.Txn(ctx).If().Then(
clientv3.OpGet(key),
clientv3.OpDelete(key),
).Commit()
if err != nil {
return err
}
getResp := txnResp.Responses[0].GetResponseRange()
if len(getResp.Kvs) == 0 {
return storage.NewKeyNotFoundError(key, 0)
}
kv := getResp.Kvs[0]
data, _, err := s.transformer.TransformFromStorage(kv.Value, authenticatedDataString(key))
if err != nil {
return storage.NewInternalError(err.Error())
}
return decode(s.codec, s.versioner, data, out, kv.ModRevision)
}
func (s *store) conditionalDelete(ctx context.Context, key string, out runtime.Object, v reflect.Value, preconditions *storage.Preconditions) error {
getResp, err := s.client.KV.Get(ctx, key)
if err != nil {
return err
}
for {
origState, err := s.getState(getResp, key, v, false)
if err != nil {
return err
}
if err := preconditions.Check(key, origState.obj); err != nil {
return err
}
txnResp, err := s.client.KV.Txn(ctx).If(
clientv3.Compare(clientv3.ModRevision(key), "=", origState.rev),
).Then(
clientv3.OpDelete(key),
).Else(
clientv3.OpGet(key),
).Commit()
if err != nil {
return err
}
if !txnResp.Succeeded {
getResp = (*clientv3.GetResponse)(txnResp.Responses[0].GetResponseRange())
klog.V(4).Infof("deletion of %s failed because of a conflict, going to retry", key)
continue
}
return decode(s.codec, s.versioner, origState.data, out, origState.rev)
}
}
// GuaranteedUpdate implements storage.Interface.GuaranteedUpdate.
func (s *store) GuaranteedUpdate(
ctx context.Context, key string, out runtime.Object, ignoreNotFound bool,
preconditions *storage.Preconditions, tryUpdate storage.UpdateFunc, suggestion ...runtime.Object) error {
trace := utiltrace.New(fmt.Sprintf("GuaranteedUpdate etcd3: %s", reflect.TypeOf(out).String()))
defer trace.LogIfLong(500 * time.Millisecond)
v, err := conversion.EnforcePtr(out)
if err != nil {
panic("unable to convert output object to pointer")
}
key = path.Join(s.pathPrefix, key)
getCurrentState := func() (*objState, error) {
getResp, err := s.client.KV.Get(ctx, key, s.getOps...)
if err != nil {
return nil, err
}
return s.getState(getResp, key, v, ignoreNotFound)
}
var origState *objState
var mustCheckData bool
if len(suggestion) == 1 && suggestion[0] != nil {
origState, err = s.getStateFromObject(suggestion[0])
if err != nil {
return err
}
mustCheckData = true
} else {
origState, err = getCurrentState()
if err != nil {
return err
}
}
trace.Step("initial value restored")
transformContext := authenticatedDataString(key)
for {
if err := preconditions.Check(key, origState.obj); err != nil {
return err
}
ret, ttl, err := s.updateState(origState, tryUpdate)
if err != nil {
// It's possible we were working with stale data
if mustCheckData && apierrors.IsConflict(err) {
// Actually fetch
origState, err = getCurrentState()
if err != nil {
return err
}
mustCheckData = false
// Retry
continue
}
return err
}
data, err := runtime.Encode(s.codec, ret)
if err != nil {
return err
}
if !origState.stale && bytes.Equal(data, origState.data) {
// if we skipped the original Get in this loop, we must refresh from
// etcd in order to be sure the data in the store is equivalent to
// our desired serialization
if mustCheckData {
origState, err = getCurrentState()
if err != nil {
return err
}
mustCheckData = false
if !bytes.Equal(data, origState.data) {
// original data changed, restart loop
continue
}
}
// recheck that the data from etcd is not stale before short-circuiting a write
if !origState.stale {
return decode(s.codec, s.versioner, origState.data, out, origState.rev)
}
}
newData, err := s.transformer.TransformToStorage(data, transformContext)
if err != nil {
return storage.NewInternalError(err.Error())
}
opts, err := s.ttlOpts(ctx, int64(ttl))
if err != nil {
return err
}
trace.Step("Transaction prepared")
txnResp, err := s.client.KV.Txn(ctx).If(
clientv3.Compare(clientv3.ModRevision(key), "=", origState.rev),
).Then(
clientv3.OpPut(key, string(newData), opts...),
).Else(
clientv3.OpGet(key),
).Commit()
if err != nil {
return err
}
trace.Step("Transaction committed")
if !txnResp.Succeeded {
getResp := (*clientv3.GetResponse)(txnResp.Responses[0].GetResponseRange())
klog.V(4).Infof("GuaranteedUpdate of %s failed because of a conflict, going to retry", key)
origState, err = s.getState(getResp, key, v, ignoreNotFound)
if err != nil {
return err
}
trace.Step("Retry value restored")
mustCheckData = false
continue
}
putResp := txnResp.Responses[0].GetResponsePut()
return decode(s.codec, s.versioner, data, out, putResp.Header.Revision)
}
}
// GetToList implements storage.Interface.GetToList.
func (s *store) GetToList(ctx context.Context, key string, resourceVersion string, pred storage.SelectionPredicate, listObj runtime.Object) error {
listPtr, err := meta.GetItemsPtr(listObj)
if err != nil {
return err
}
v, err := conversion.EnforcePtr(listPtr)
if err != nil || v.Kind() != reflect.Slice {
panic("need ptr to slice")
}
key = path.Join(s.pathPrefix, key)
getResp, err := s.client.KV.Get(ctx, key, s.getOps...)
if err != nil {
return err
}
if len(getResp.Kvs) > 0 {
data, _, err := s.transformer.TransformFromStorage(getResp.Kvs[0].Value, authenticatedDataString(key))
if err != nil {
return storage.NewInternalError(err.Error())
}
if err := appendListItem(v, data, uint64(getResp.Kvs[0].ModRevision), pred, s.codec, s.versioner); err != nil {
return err
}
}
// update version with cluster level revision
return s.versioner.UpdateList(listObj, uint64(getResp.Header.Revision), "")
}
func (s *store) Count(key string) (int64, error) {
key = path.Join(s.pathPrefix, key)
getResp, err := s.client.KV.Get(context.Background(), key, clientv3.WithRange(clientv3.GetPrefixRangeEnd(key)), clientv3.WithCountOnly())
if err != nil {
return 0, err
}
return getResp.Count, nil
}
// continueToken is a simple structured object for encoding the state of a continue token.
// TODO: if we change the version of the encoded from, we can't start encoding the new version
// until all other servers are upgraded (i.e. we need to support rolling schema)
// This is a public API struct and cannot change.
type continueToken struct {
APIVersion string `json:"v"`
ResourceVersion int64 `json:"rv"`
StartKey string `json:"start"`
}
// parseFrom transforms an encoded predicate from into a versioned struct.
// TODO: return a typed error that instructs clients that they must relist
func decodeContinue(continueValue, keyPrefix string) (fromKey string, rv int64, err error) {
data, err := base64.RawURLEncoding.DecodeString(continueValue)
if err != nil {
return "", 0, fmt.Errorf("continue key is not valid: %v", err)
}
var c continueToken
if err := json.Unmarshal(data, &c); err != nil {
return "", 0, fmt.Errorf("continue key is not valid: %v", err)
}
switch c.APIVersion {
case "meta.k8s.io/v1":
if c.ResourceVersion == 0 {
return "", 0, fmt.Errorf("continue key is not valid: incorrect encoded start resourceVersion (version meta.k8s.io/v1)")
}
if len(c.StartKey) == 0 {
return "", 0, fmt.Errorf("continue key is not valid: encoded start key empty (version meta.k8s.io/v1)")
}
// defend against path traversal attacks by clients - path.Clean will ensure that startKey cannot
// be at a higher level of the hierarchy, and so when we append the key prefix we will end up with
// continue start key that is fully qualified and cannot range over anything less specific than
// keyPrefix.
key := c.StartKey
if !strings.HasPrefix(key, "/") {
key = "/" + key
}
cleaned := path.Clean(key)
if cleaned != key {
return "", 0, fmt.Errorf("continue key is not valid: %s", c.StartKey)
}
return keyPrefix + cleaned[1:], c.ResourceVersion, nil
default:
return "", 0, fmt.Errorf("continue key is not valid: server does not recognize this encoded version %q", c.APIVersion)
}
}
// encodeContinue returns a string representing the encoded continuation of the current query.
func encodeContinue(key, keyPrefix string, resourceVersion int64) (string, error) {
nextKey := strings.TrimPrefix(key, keyPrefix)
if nextKey == key {
return "", fmt.Errorf("unable to encode next field: the key and key prefix do not match")
}
out, err := json.Marshal(&continueToken{APIVersion: "meta.k8s.io/v1", ResourceVersion: resourceVersion, StartKey: nextKey})
if err != nil {
return "", err
}
return base64.RawURLEncoding.EncodeToString(out), nil
}
// List implements storage.Interface.List.
func (s *store) List(ctx context.Context, key, resourceVersion string, pred storage.SelectionPredicate, listObj runtime.Object) error {
listPtr, err := meta.GetItemsPtr(listObj)
if err != nil {
return err
}
v, err := conversion.EnforcePtr(listPtr)
if err != nil || v.Kind() != reflect.Slice {
panic("need ptr to slice")
}
if s.pathPrefix != "" {
key = path.Join(s.pathPrefix, key)
}
// We need to make sure the key ended with "/" so that we only get children "directories".
// e.g. if we have key "/a", "/a/b", "/ab", getting keys with prefix "/a" will return all three,
// while with prefix "/a/" will return only "/a/b" which is the correct answer.
if !strings.HasSuffix(key, "/") {
key += "/"
}
keyPrefix := key
// set the appropriate clientv3 options to filter the returned data set
var paging bool
options := make([]clientv3.OpOption, 0, 4)
if s.pagingEnabled && pred.Limit > 0 {
paging = true
options = append(options, clientv3.WithLimit(pred.Limit))
}
var returnedRV, continueRV int64
var continueKey string
switch {
case s.pagingEnabled && len(pred.Continue) > 0:
continueKey, continueRV, err = decodeContinue(pred.Continue, keyPrefix)
if err != nil {
return apierrors.NewBadRequest(fmt.Sprintf("invalid continue token: %v", err))
}
if len(resourceVersion) > 0 && resourceVersion != "0" {
return apierrors.NewBadRequest("specifying resource version is not allowed when using continue")
}
rangeEnd := clientv3.GetPrefixRangeEnd(keyPrefix)
options = append(options, clientv3.WithRange(rangeEnd))
key = continueKey
// If continueRV > 0, the LIST request needs a specific resource version.
// continueRV==0 is invalid.
// If continueRV < 0, the request is for the latest resource version.
if continueRV > 0 {
options = append(options, clientv3.WithRev(continueRV))
returnedRV = continueRV
}
case s.pagingEnabled && pred.Limit > 0:
if len(resourceVersion) > 0 {
fromRV, err := s.versioner.ParseResourceVersion(resourceVersion)
if err != nil {
return apierrors.NewBadRequest(fmt.Sprintf("invalid resource version: %v", err))
}
if fromRV > 0 {
options = append(options, clientv3.WithRev(int64(fromRV)))
}
returnedRV = int64(fromRV)
}
rangeEnd := clientv3.GetPrefixRangeEnd(keyPrefix)
options = append(options, clientv3.WithRange(rangeEnd))
default:
if len(resourceVersion) > 0 {
fromRV, err := s.versioner.ParseResourceVersion(resourceVersion)
if err != nil {
return apierrors.NewBadRequest(fmt.Sprintf("invalid resource version: %v", err))
}
if fromRV > 0 {
options = append(options, clientv3.WithRev(int64(fromRV)))
}
returnedRV = int64(fromRV)
}
options = append(options, clientv3.WithPrefix())
}
// loop until we have filled the requested limit from etcd or there are no more results
var lastKey []byte
var hasMore bool
for {
getResp, err := s.client.KV.Get(ctx, key, options...)
if err != nil {
return interpretListError(err, len(pred.Continue) > 0, continueKey, keyPrefix)
}
hasMore = getResp.More
if len(getResp.Kvs) == 0 && getResp.More {
return fmt.Errorf("no results were found, but etcd indicated there were more values remaining")
}
// avoid small allocations for the result slice, since this can be called in many
// different contexts and we don't know how significantly the result will be filtered
if pred.Empty() {
growSlice(v, len(getResp.Kvs))
} else {
growSlice(v, 2048, len(getResp.Kvs))
}
// take items from the response until the bucket is full, filtering as we go
for _, kv := range getResp.Kvs {
if paging && int64(v.Len()) >= pred.Limit {
hasMore = true
break
}
lastKey = kv.Key
data, _, err := s.transformer.TransformFromStorage(kv.Value, authenticatedDataString(kv.Key))
if err != nil {
utilruntime.HandleError(fmt.Errorf("unable to transform key %q: %v", kv.Key, err))
continue
}
if err := appendListItem(v, data, uint64(kv.ModRevision), pred, s.codec, s.versioner); err != nil {
return err
}
}
// indicate to the client which resource version was returned
if returnedRV == 0 {
returnedRV = getResp.Header.Revision
}
// no more results remain or we didn't request paging
if !hasMore || !paging {
break
}
// we're paging but we have filled our bucket
if int64(v.Len()) >= pred.Limit {
break
}
key = string(lastKey) + "\x00"
}
// instruct the client to begin querying from immediately after the last key we returned
// we never return a key that the client wouldn't be allowed to see
if hasMore {
// we want to start immediately after the last key
next, err := encodeContinue(string(lastKey)+"\x00", keyPrefix, returnedRV)
if err != nil {
return err
}
return s.versioner.UpdateList(listObj, uint64(returnedRV), next)
}
// no continuation
return s.versioner.UpdateList(listObj, uint64(returnedRV), "")
}
// growSlice takes a slice value and grows its capacity up
// to the maximum of the passed sizes or maxCapacity, whichever
// is smaller. Above maxCapacity decisions about allocation are left
// to the Go runtime on append. This allows a caller to make an
// educated guess about the potential size of the total list while
// still avoiding overly aggressive initial allocation. If sizes
// is empty maxCapacity will be used as the size to grow.
func growSlice(v reflect.Value, maxCapacity int, sizes ...int) {
cap := v.Cap()
max := cap
for _, size := range sizes {
if size > max {
max = size
}
}
if len(sizes) == 0 || max > maxCapacity {
max = maxCapacity
}
if max <= cap {
return
}
if v.Len() > 0 {
extra := reflect.MakeSlice(v.Type(), 0, max)
reflect.Copy(extra, v)
v.Set(extra)
} else {
extra := reflect.MakeSlice(v.Type(), 0, max)
v.Set(extra)
}
}
// Watch implements storage.Interface.Watch.
func (s *store) Watch(ctx context.Context, key string, resourceVersion string, pred storage.SelectionPredicate) (watch.Interface, error) {
return s.watch(ctx, key, resourceVersion, pred, false)
}
// WatchList implements storage.Interface.WatchList.
func (s *store) WatchList(ctx context.Context, key string, resourceVersion string, pred storage.SelectionPredicate) (watch.Interface, error) {
return s.watch(ctx, key, resourceVersion, pred, true)
}
func (s *store) watch(ctx context.Context, key string, rv string, pred storage.SelectionPredicate, recursive bool) (watch.Interface, error) {
rev, err := s.versioner.ParseResourceVersion(rv)
if err != nil {
return nil, err
}
key = path.Join(s.pathPrefix, key)
return s.watcher.Watch(ctx, key, int64(rev), recursive, pred)
}
func (s *store) getState(getResp *clientv3.GetResponse, key string, v reflect.Value, ignoreNotFound bool) (*objState, error) {
state := &objState{
obj: reflect.New(v.Type()).Interface().(runtime.Object),
meta: &storage.ResponseMeta{},
}
if len(getResp.Kvs) == 0 {
if !ignoreNotFound {
return nil, storage.NewKeyNotFoundError(key, 0)
}
if err := runtime.SetZeroValue(state.obj); err != nil {
return nil, err
}
} else {
data, stale, err := s.transformer.TransformFromStorage(getResp.Kvs[0].Value, authenticatedDataString(key))
if err != nil {
return nil, storage.NewInternalError(err.Error())
}
state.rev = getResp.Kvs[0].ModRevision
state.meta.ResourceVersion = uint64(state.rev)
state.data = data
state.stale = stale
if err := decode(s.codec, s.versioner, state.data, state.obj, state.rev); err != nil {
return nil, err
}
}
return state, nil
}
func (s *store) getStateFromObject(obj runtime.Object) (*objState, error) {
state := &objState{
obj: obj,
meta: &storage.ResponseMeta{},
}
rv, err := s.versioner.ObjectResourceVersion(obj)
if err != nil {
return nil, fmt.Errorf("couldn't get resource version: %v", err)
}
state.rev = int64(rv)
state.meta.ResourceVersion = uint64(state.rev)
// Compute the serialized form - for that we need to temporarily clean
// its resource version field (those are not stored in etcd).
if err := s.versioner.PrepareObjectForStorage(obj); err != nil {
return nil, fmt.Errorf("PrepareObjectForStorage failed: %v", err)
}
state.data, err = runtime.Encode(s.codec, obj)
if err != nil {
return nil, err
}
s.versioner.UpdateObject(state.obj, uint64(rv))
return state, nil
}
func (s *store) updateState(st *objState, userUpdate storage.UpdateFunc) (runtime.Object, uint64, error) {
ret, ttlPtr, err := userUpdate(st.obj, *st.meta)
if err != nil {
return nil, 0, err
}
if err := s.versioner.PrepareObjectForStorage(ret); err != nil {
return nil, 0, fmt.Errorf("PrepareObjectForStorage failed: %v", err)
}
var ttl uint64
if ttlPtr != nil {
ttl = *ttlPtr
}
return ret, ttl, nil
}
// ttlOpts returns client options based on given ttl.
// ttl: if ttl is non-zero, it will attach the key to a lease with ttl of roughly the same length
func (s *store) ttlOpts(ctx context.Context, ttl int64) ([]clientv3.OpOption, error) {
if ttl == 0 {
return nil, nil
}
id, err := s.leaseManager.GetLease(ctx, ttl)
if err != nil {
return nil, err
}
return []clientv3.OpOption{clientv3.WithLease(id)}, nil
}
// decode decodes value of bytes into object. It will also set the object resource version to rev.
// On success, objPtr would be set to the object.
func decode(codec runtime.Codec, versioner storage.Versioner, value []byte, objPtr runtime.Object, rev int64) error {
if _, err := conversion.EnforcePtr(objPtr); err != nil {
panic("unable to convert output object to pointer")
}
_, _, err := codec.Decode(value, nil, objPtr)
if err != nil {
return err
}
// being unable to set the version does not prevent the object from being extracted
versioner.UpdateObject(objPtr, uint64(rev))
return nil
}
// appendListItem decodes and appends the object (if it passes filter) to v, which must be a slice.
func appendListItem(v reflect.Value, data []byte, rev uint64, pred storage.SelectionPredicate, codec runtime.Codec, versioner storage.Versioner) error {
obj, _, err := codec.Decode(data, nil, reflect.New(v.Type().Elem()).Interface().(runtime.Object))
if err != nil {
return err
}
// being unable to set the version does not prevent the object from being extracted
versioner.UpdateObject(obj, rev)
if matched, err := pred.Matches(obj); err == nil && matched {
v.Set(reflect.Append(v, reflect.ValueOf(obj).Elem()))
}
return nil
}
func notFound(key string) clientv3.Cmp {
return clientv3.Compare(clientv3.ModRevision(key), "=", 0)
}

View File

@ -0,0 +1,402 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package etcd3
import (
"context"
"errors"
"fmt"
"k8s.io/klog"
"os"
"strconv"
"strings"
"sync"
apierrs "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/apiserver/pkg/storage"
"k8s.io/apiserver/pkg/storage/value"
"github.com/ibuildthecloud/kvsql/clientv3"
)
const (
// We have set a buffer in order to reduce times of context switches.
incomingBufSize = 100
outgoingBufSize = 100
)
// fatalOnDecodeError is used during testing to panic the server if watcher encounters a decoding error
var fatalOnDecodeError = false
// errTestingDecode is the only error that testingDeferOnDecodeError catches during a panic
var errTestingDecode = errors.New("sentinel error only used during testing to indicate watch decoding error")
// testingDeferOnDecodeError is used during testing to recover from a panic caused by errTestingDecode, all other values continue to panic
func testingDeferOnDecodeError() {
if r := recover(); r != nil && r != errTestingDecode {
panic(r)
}
}
func init() {
// check to see if we are running in a test environment
fatalOnDecodeError, _ = strconv.ParseBool(os.Getenv("KUBE_PANIC_WATCH_DECODE_ERROR"))
}
type watcher struct {
client *clientv3.Client
codec runtime.Codec
versioner storage.Versioner
transformer value.Transformer
}
// watchChan implements watch.Interface.
type watchChan struct {
watcher *watcher
key string
initialRev int64
recursive bool
internalPred storage.SelectionPredicate
ctx context.Context
cancel context.CancelFunc
incomingEventChan chan *event
resultChan chan watch.Event
errChan chan error
}
func newWatcher(client *clientv3.Client, codec runtime.Codec, versioner storage.Versioner, transformer value.Transformer) *watcher {
return &watcher{
client: client,
codec: codec,
versioner: versioner,
transformer: transformer,
}
}
// Watch watches on a key and returns a watch.Interface that transfers relevant notifications.
// If rev is zero, it will return the existing object(s) and then start watching from
// the maximum revision+1 from returned objects.
// If rev is non-zero, it will watch events happened after given revision.
// If recursive is false, it watches on given key.
// If recursive is true, it watches any children and directories under the key, excluding the root key itself.
// pred must be non-nil. Only if pred matches the change, it will be returned.
func (w *watcher) Watch(ctx context.Context, key string, rev int64, recursive bool, pred storage.SelectionPredicate) (watch.Interface, error) {
if recursive && !strings.HasSuffix(key, "/") {
key += "/"
}
wc := w.createWatchChan(ctx, key, rev, recursive, pred)
go wc.run()
return wc, nil
}
func (w *watcher) createWatchChan(ctx context.Context, key string, rev int64, recursive bool, pred storage.SelectionPredicate) *watchChan {
wc := &watchChan{
watcher: w,
key: key,
initialRev: rev,
recursive: recursive,
internalPred: pred,
incomingEventChan: make(chan *event, incomingBufSize),
resultChan: make(chan watch.Event, outgoingBufSize),
errChan: make(chan error, 1),
}
if pred.Empty() {
// The filter doesn't filter out any object.
wc.internalPred = storage.Everything
}
wc.ctx, wc.cancel = context.WithCancel(ctx)
return wc
}
func (wc *watchChan) run() {
watchClosedCh := make(chan struct{})
go wc.startWatching(watchClosedCh)
var resultChanWG sync.WaitGroup
resultChanWG.Add(1)
go wc.processEvent(&resultChanWG)
select {
case err := <-wc.errChan:
if err == context.Canceled {
break
}
errResult := transformErrorToEvent(err)
if errResult != nil {
// error result is guaranteed to be received by user before closing ResultChan.
select {
case wc.resultChan <- *errResult:
case <-wc.ctx.Done(): // user has given up all results
}
}
case <-watchClosedCh:
case <-wc.ctx.Done(): // user cancel
}
// We use wc.ctx to reap all goroutines. Under whatever condition, we should stop them all.
// It's fine to double cancel.
wc.cancel()
// we need to wait until resultChan wouldn't be used anymore
resultChanWG.Wait()
close(wc.resultChan)
}
func (wc *watchChan) Stop() {
wc.cancel()
}
func (wc *watchChan) ResultChan() <-chan watch.Event {
return wc.resultChan
}
// sync tries to retrieve existing data and send them to process.
// The revision to watch will be set to the revision in response.
// All events sent will have isCreated=true
func (wc *watchChan) sync() error {
opts := []clientv3.OpOption{}
if wc.recursive {
opts = append(opts, clientv3.WithPrefix())
}
getResp, err := wc.watcher.client.Get(wc.ctx, wc.key, opts...)
if err != nil {
return err
}
wc.initialRev = getResp.Header.Revision
for _, kv := range getResp.Kvs {
wc.sendEvent(parseKV(kv))
}
return nil
}
// startWatching does:
// - get current objects if initialRev=0; set initialRev to current rev
// - watch on given key and send events to process.
func (wc *watchChan) startWatching(watchClosedCh chan struct{}) {
if wc.initialRev == 0 {
if err := wc.sync(); err != nil {
klog.Errorf("failed to sync with latest state: %v", err)
wc.sendError(err)
return
}
}
opts := []clientv3.OpOption{clientv3.WithRev(wc.initialRev + 1), clientv3.WithPrevKV()}
if wc.recursive {
opts = append(opts, clientv3.WithPrefix())
}
wch := wc.watcher.client.Watch(wc.ctx, wc.key, opts...)
for wres := range wch {
if wres.Err() != nil {
err := wres.Err()
// If there is an error on server (e.g. compaction), the channel will return it before closed.
klog.Errorf("watch chan error: %v", err)
wc.sendError(err)
return
}
for _, e := range wres.Events {
wc.sendEvent(parseEvent(e))
}
}
// When we come to this point, it's only possible that client side ends the watch.
// e.g. cancel the context, close the client.
// If this watch chan is broken and context isn't cancelled, other goroutines will still hang.
// We should notify the main thread that this goroutine has exited.
close(watchClosedCh)
}
// processEvent processes events from etcd watcher and sends results to resultChan.
func (wc *watchChan) processEvent(wg *sync.WaitGroup) {
defer wg.Done()
for {
select {
case e := <-wc.incomingEventChan:
res := wc.transform(e)
if res == nil {
continue
}
if len(wc.resultChan) == outgoingBufSize {
klog.V(3).Infof("Fast watcher, slow processing. Number of buffered events: %d."+
"Probably caused by slow dispatching events to watchers", outgoingBufSize)
}
// If user couldn't receive results fast enough, we also block incoming events from watcher.
// Because storing events in local will cause more memory usage.
// The worst case would be closing the fast watcher.
select {
case wc.resultChan <- *res:
case <-wc.ctx.Done():
return
}
case <-wc.ctx.Done():
return
}
}
}
func (wc *watchChan) filter(obj runtime.Object) bool {
if wc.internalPred.Empty() {
return true
}
matched, err := wc.internalPred.Matches(obj)
return err == nil && matched
}
func (wc *watchChan) acceptAll() bool {
return wc.internalPred.Empty()
}
// transform transforms an event into a result for user if not filtered.
func (wc *watchChan) transform(e *event) (res *watch.Event) {
curObj, oldObj, err := wc.prepareObjs(e)
if err != nil {
klog.Errorf("failed to prepare current and previous objects: %v", err)
wc.sendError(err)
return nil
}
switch {
case e.isDeleted:
if !wc.filter(oldObj) {
return nil
}
res = &watch.Event{
Type: watch.Deleted,
Object: oldObj,
}
case e.isCreated:
if !wc.filter(curObj) {
return nil
}
res = &watch.Event{
Type: watch.Added,
Object: curObj,
}
default:
if wc.acceptAll() {
res = &watch.Event{
Type: watch.Modified,
Object: curObj,
}
return res
}
curObjPasses := wc.filter(curObj)
oldObjPasses := wc.filter(oldObj)
switch {
case curObjPasses && oldObjPasses:
res = &watch.Event{
Type: watch.Modified,
Object: curObj,
}
case curObjPasses && !oldObjPasses:
res = &watch.Event{
Type: watch.Added,
Object: curObj,
}
case !curObjPasses && oldObjPasses:
res = &watch.Event{
Type: watch.Deleted,
Object: oldObj,
}
}
}
return res
}
func transformErrorToEvent(err error) *watch.Event {
err = interpretWatchError(err)
if _, ok := err.(apierrs.APIStatus); !ok {
err = apierrs.NewInternalError(err)
}
status := err.(apierrs.APIStatus).Status()
return &watch.Event{
Type: watch.Error,
Object: &status,
}
}
func (wc *watchChan) sendError(err error) {
select {
case wc.errChan <- err:
case <-wc.ctx.Done():
}
}
func (wc *watchChan) sendEvent(e *event) {
if len(wc.incomingEventChan) == incomingBufSize {
klog.V(3).Infof("Fast watcher, slow processing. Number of buffered events: %d."+
"Probably caused by slow decoding, user not receiving fast, or other processing logic",
incomingBufSize)
}
select {
case wc.incomingEventChan <- e:
case <-wc.ctx.Done():
}
}
func (wc *watchChan) prepareObjs(e *event) (curObj runtime.Object, oldObj runtime.Object, err error) {
if !e.isDeleted {
data, _, err := wc.watcher.transformer.TransformFromStorage(e.value, authenticatedDataString(e.key))
if err != nil {
return nil, nil, err
}
curObj, err = decodeObj(wc.watcher.codec, wc.watcher.versioner, data, e.rev)
if err != nil {
return nil, nil, err
}
}
// We need to decode prevValue, only if this is deletion event or
// the underlying filter doesn't accept all objects (otherwise we
// know that the filter for previous object will return true and
// we need the object only to compute whether it was filtered out
// before).
if len(e.prevValue) > 0 && (e.isDeleted || !wc.acceptAll()) {
data, _, err := wc.watcher.transformer.TransformFromStorage(e.prevValue, authenticatedDataString(e.key))
if err != nil {
return nil, nil, err
}
// Note that this sends the *old* object with the etcd revision for the time at
// which it gets deleted.
oldObj, err = decodeObj(wc.watcher.codec, wc.watcher.versioner, data, e.rev)
if err != nil {
return nil, nil, err
}
}
return curObj, oldObj, nil
}
func decodeObj(codec runtime.Codec, versioner storage.Versioner, data []byte, rev int64) (_ runtime.Object, err error) {
obj, err := runtime.Decode(codec, []byte(data))
if err != nil {
if fatalOnDecodeError {
// catch watch decode error iff we caused it on
// purpose during a unit test
defer testingDeferOnDecodeError()
// we are running in a test environment and thus an
// error here is due to a coder mistake if the defer
// does not catch it
panic(err)
}
return nil, err
}
// ensure resource version is set on the object we load from etcd
if err := versioner.UpdateObject(obj, uint64(rev)); err != nil {
return nil, fmt.Errorf("failure to version api object (%d) %#v: %v", rev, obj, err)
}
return obj, nil
}