mirror of https://github.com/k3s-io/k3s
Add support for SQL driver
parent
80b9f6022f
commit
14b632c696
|
@ -25,6 +25,7 @@ import (
|
|||
|
||||
const (
|
||||
StorageTypeUnset = ""
|
||||
StorageTypeKVSQL = "kvsql"
|
||||
StorageTypeETCD3 = "etcd3"
|
||||
|
||||
DefaultCompactInterval = 5 * time.Minute
|
||||
|
|
|
@ -19,6 +19,7 @@ package factory
|
|||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/ibuildthecloud/kvsql"
|
||||
"k8s.io/apiserver/pkg/storage"
|
||||
"k8s.io/apiserver/pkg/storage/storagebackend"
|
||||
)
|
||||
|
@ -29,7 +30,13 @@ type DestroyFunc func()
|
|||
// Create creates a storage backend based on given config.
|
||||
func Create(c storagebackend.Config) (storage.Interface, DestroyFunc, error) {
|
||||
switch c.Type {
|
||||
case storagebackend.StorageTypeUnset, storagebackend.StorageTypeETCD3:
|
||||
case storagebackend.StorageTypeUnset, storagebackend.StorageTypeKVSQL:
|
||||
return factory.NewKVSQLStorage(c)
|
||||
case storagebackend.StorageTypeETCD3:
|
||||
// TODO: We have the following features to implement:
|
||||
// - Support secure connection by using key, cert, and CA files.
|
||||
// - Honor "https" scheme to support secure connection in gRPC.
|
||||
// - Support non-quorum read.
|
||||
return newETCD3Storage(c)
|
||||
default:
|
||||
return nil, nil, fmt.Errorf("unknown storage type: %s", c.Type)
|
||||
|
@ -39,7 +46,9 @@ func Create(c storagebackend.Config) (storage.Interface, DestroyFunc, error) {
|
|||
// CreateHealthCheck creates a healthcheck function based on given config.
|
||||
func CreateHealthCheck(c storagebackend.Config) (func() error, error) {
|
||||
switch c.Type {
|
||||
case storagebackend.StorageTypeUnset, storagebackend.StorageTypeETCD3:
|
||||
case storagebackend.StorageTypeUnset, storagebackend.StorageTypeKVSQL:
|
||||
return factory.NewKVSQLHealthCheck(c)
|
||||
case storagebackend.StorageTypeETCD3:
|
||||
return newETCD3HealthCheck(c)
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown storage type: %s", c.Type)
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
Locker
|
||||
=====
|
||||
|
||||
locker provides a mechanism for creating finer-grained locking to help
|
||||
free up more global locks to handle other tasks.
|
||||
|
||||
The implementation looks close to a sync.Mutex, however, the user must provide a
|
||||
reference to use to refer to the underlying lock when locking and unlocking,
|
||||
and unlock may generate an error.
|
||||
|
||||
If a lock with a given name does not exist when `Lock` is called, one is
|
||||
created.
|
||||
Lock references are automatically cleaned up on `Unlock` if nothing else is
|
||||
waiting for the lock.
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
```go
|
||||
package important
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/docker/docker/pkg/locker"
|
||||
)
|
||||
|
||||
type important struct {
|
||||
locks *locker.Locker
|
||||
data map[string]interface{}
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
func (i *important) Get(name string) interface{} {
|
||||
i.locks.Lock(name)
|
||||
defer i.locks.Unlock(name)
|
||||
return i.data[name]
|
||||
}
|
||||
|
||||
func (i *important) Create(name string, data interface{}) {
|
||||
i.locks.Lock(name)
|
||||
defer i.locks.Unlock(name)
|
||||
|
||||
i.createImportant(data)
|
||||
|
||||
i.mu.Lock()
|
||||
i.data[name] = data
|
||||
i.mu.Unlock()
|
||||
}
|
||||
|
||||
func (i *important) createImportant(data interface{}) {
|
||||
time.Sleep(10 * time.Second)
|
||||
}
|
||||
```
|
||||
|
||||
For functions dealing with a given name, always lock at the beginning of the
|
||||
function (or before doing anything with the underlying state), this ensures any
|
||||
other function that is dealing with the same name will block.
|
||||
|
||||
When needing to modify the underlying data, use the global lock to ensure nothing
|
||||
else is modifying it at the same time.
|
||||
Since name lock is already in place, no reads will occur while the modification
|
||||
is being performed.
|
||||
|
|
@ -0,0 +1,112 @@
|
|||
/*
|
||||
Package locker provides a mechanism for creating finer-grained locking to help
|
||||
free up more global locks to handle other tasks.
|
||||
|
||||
The implementation looks close to a sync.Mutex, however the user must provide a
|
||||
reference to use to refer to the underlying lock when locking and unlocking,
|
||||
and unlock may generate an error.
|
||||
|
||||
If a lock with a given name does not exist when `Lock` is called, one is
|
||||
created.
|
||||
Lock references are automatically cleaned up on `Unlock` if nothing else is
|
||||
waiting for the lock.
|
||||
*/
|
||||
package locker // import "github.com/docker/docker/pkg/locker"
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
// ErrNoSuchLock is returned when the requested lock does not exist
|
||||
var ErrNoSuchLock = errors.New("no such lock")
|
||||
|
||||
// Locker provides a locking mechanism based on the passed in reference name
|
||||
type Locker struct {
|
||||
mu sync.Mutex
|
||||
locks map[string]*lockCtr
|
||||
}
|
||||
|
||||
// lockCtr is used by Locker to represent a lock with a given name.
|
||||
type lockCtr struct {
|
||||
mu sync.Mutex
|
||||
// waiters is the number of waiters waiting to acquire the lock
|
||||
// this is int32 instead of uint32 so we can add `-1` in `dec()`
|
||||
waiters int32
|
||||
}
|
||||
|
||||
// inc increments the number of waiters waiting for the lock
|
||||
func (l *lockCtr) inc() {
|
||||
atomic.AddInt32(&l.waiters, 1)
|
||||
}
|
||||
|
||||
// dec decrements the number of waiters waiting on the lock
|
||||
func (l *lockCtr) dec() {
|
||||
atomic.AddInt32(&l.waiters, -1)
|
||||
}
|
||||
|
||||
// count gets the current number of waiters
|
||||
func (l *lockCtr) count() int32 {
|
||||
return atomic.LoadInt32(&l.waiters)
|
||||
}
|
||||
|
||||
// Lock locks the mutex
|
||||
func (l *lockCtr) Lock() {
|
||||
l.mu.Lock()
|
||||
}
|
||||
|
||||
// Unlock unlocks the mutex
|
||||
func (l *lockCtr) Unlock() {
|
||||
l.mu.Unlock()
|
||||
}
|
||||
|
||||
// New creates a new Locker
|
||||
func New() *Locker {
|
||||
return &Locker{
|
||||
locks: make(map[string]*lockCtr),
|
||||
}
|
||||
}
|
||||
|
||||
// Lock locks a mutex with the given name. If it doesn't exist, one is created
|
||||
func (l *Locker) Lock(name string) {
|
||||
l.mu.Lock()
|
||||
if l.locks == nil {
|
||||
l.locks = make(map[string]*lockCtr)
|
||||
}
|
||||
|
||||
nameLock, exists := l.locks[name]
|
||||
if !exists {
|
||||
nameLock = &lockCtr{}
|
||||
l.locks[name] = nameLock
|
||||
}
|
||||
|
||||
// increment the nameLock waiters while inside the main mutex
|
||||
// this makes sure that the lock isn't deleted if `Lock` and `Unlock` are called concurrently
|
||||
nameLock.inc()
|
||||
l.mu.Unlock()
|
||||
|
||||
// Lock the nameLock outside the main mutex so we don't block other operations
|
||||
// once locked then we can decrement the number of waiters for this lock
|
||||
nameLock.Lock()
|
||||
nameLock.dec()
|
||||
}
|
||||
|
||||
// Unlock unlocks the mutex with the given name
|
||||
// If the given lock is not being waited on by any other callers, it is deleted
|
||||
func (l *Locker) Unlock(name string) error {
|
||||
l.mu.Lock()
|
||||
nameLock, exists := l.locks[name]
|
||||
if !exists {
|
||||
l.mu.Unlock()
|
||||
return ErrNoSuchLock
|
||||
}
|
||||
|
||||
if nameLock.count() == 0 {
|
||||
delete(l.locks, name)
|
||||
}
|
||||
nameLock.Unlock()
|
||||
|
||||
l.mu.Unlock()
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,161 @@
|
|||
package locker // import "github.com/docker/docker/pkg/locker"
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"strconv"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestLockCounter(t *testing.T) {
|
||||
l := &lockCtr{}
|
||||
l.inc()
|
||||
|
||||
if l.waiters != 1 {
|
||||
t.Fatal("counter inc failed")
|
||||
}
|
||||
|
||||
l.dec()
|
||||
if l.waiters != 0 {
|
||||
t.Fatal("counter dec failed")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLockerLock(t *testing.T) {
|
||||
l := New()
|
||||
l.Lock("test")
|
||||
ctr := l.locks["test"]
|
||||
|
||||
if ctr.count() != 0 {
|
||||
t.Fatalf("expected waiters to be 0, got :%d", ctr.waiters)
|
||||
}
|
||||
|
||||
chDone := make(chan struct{})
|
||||
go func() {
|
||||
l.Lock("test")
|
||||
close(chDone)
|
||||
}()
|
||||
|
||||
chWaiting := make(chan struct{})
|
||||
go func() {
|
||||
for range time.Tick(1 * time.Millisecond) {
|
||||
if ctr.count() == 1 {
|
||||
close(chWaiting)
|
||||
break
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-chWaiting:
|
||||
case <-time.After(3 * time.Second):
|
||||
t.Fatal("timed out waiting for lock waiters to be incremented")
|
||||
}
|
||||
|
||||
select {
|
||||
case <-chDone:
|
||||
t.Fatal("lock should not have returned while it was still held")
|
||||
default:
|
||||
}
|
||||
|
||||
if err := l.Unlock("test"); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
select {
|
||||
case <-chDone:
|
||||
case <-time.After(3 * time.Second):
|
||||
t.Fatalf("lock should have completed")
|
||||
}
|
||||
|
||||
if ctr.count() != 0 {
|
||||
t.Fatalf("expected waiters to be 0, got: %d", ctr.count())
|
||||
}
|
||||
}
|
||||
|
||||
func TestLockerUnlock(t *testing.T) {
|
||||
l := New()
|
||||
|
||||
l.Lock("test")
|
||||
l.Unlock("test")
|
||||
|
||||
chDone := make(chan struct{})
|
||||
go func() {
|
||||
l.Lock("test")
|
||||
close(chDone)
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-chDone:
|
||||
case <-time.After(3 * time.Second):
|
||||
t.Fatalf("lock should not be blocked")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLockerConcurrency(t *testing.T) {
|
||||
l := New()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for i := 0; i <= 10000; i++ {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
l.Lock("test")
|
||||
// if there is a concurrency issue, will very likely panic here
|
||||
l.Unlock("test")
|
||||
wg.Done()
|
||||
}()
|
||||
}
|
||||
|
||||
chDone := make(chan struct{})
|
||||
go func() {
|
||||
wg.Wait()
|
||||
close(chDone)
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-chDone:
|
||||
case <-time.After(10 * time.Second):
|
||||
t.Fatal("timeout waiting for locks to complete")
|
||||
}
|
||||
|
||||
// Since everything has unlocked this should not exist anymore
|
||||
if ctr, exists := l.locks["test"]; exists {
|
||||
t.Fatalf("lock should not exist: %v", ctr)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkLocker(b *testing.B) {
|
||||
l := New()
|
||||
for i := 0; i < b.N; i++ {
|
||||
l.Lock("test")
|
||||
l.Unlock("test")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkLockerParallel(b *testing.B) {
|
||||
l := New()
|
||||
b.SetParallelism(128)
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
l.Lock("test")
|
||||
l.Unlock("test")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkLockerMoreKeys(b *testing.B) {
|
||||
l := New()
|
||||
var keys []string
|
||||
for i := 0; i < 64; i++ {
|
||||
keys = append(keys, strconv.Itoa(i))
|
||||
}
|
||||
b.SetParallelism(128)
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
k := keys[rand.Intn(len(keys))]
|
||||
l.Lock(k)
|
||||
l.Unlock(k)
|
||||
}
|
||||
})
|
||||
}
|
|
@ -0,0 +1,178 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package clientv3
|
||||
|
||||
import (
|
||||
"google.golang.org/grpc"
|
||||
)
|
||||
|
||||
// Client provides and manages an etcd v3 client session.
|
||||
type Client struct {
|
||||
Cluster
|
||||
KV
|
||||
Lease
|
||||
Watcher
|
||||
callOpts []grpc.CallOption
|
||||
}
|
||||
|
||||
// New creates a new etcdv3 client from a given configuration.
|
||||
func New(cfg Config) (*Client, error) {
|
||||
c := &Client{
|
||||
Lease: &lessor{},
|
||||
}
|
||||
kv, err := newKV(cfg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
c.KV = kv
|
||||
c.Watcher = kv
|
||||
return c, nil
|
||||
}
|
||||
|
||||
// Close shuts down the client's etcd connections.
|
||||
func (c *Client) Close() error {
|
||||
if c.Watcher != nil {
|
||||
return c.Watcher.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Endpoints lists the registered endpoints for the client.
|
||||
func (c *Client) Endpoints() (eps []string) {
|
||||
return
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package clientv3
|
||||
|
||||
import (
|
||||
"golang.org/x/net/context"
|
||||
)
|
||||
|
||||
type Cluster struct {
|
||||
}
|
||||
|
||||
func (c *Cluster) MemberList(ctx context.Context) (interface{}, error) {
|
||||
return nil, nil
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package clientv3
|
||||
|
||||
import (
|
||||
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
|
||||
)
|
||||
|
||||
// CompactOp represents a compact operation.
|
||||
type CompactOp struct {
|
||||
revision int64
|
||||
}
|
||||
|
||||
// CompactOption configures compact operation.
|
||||
type CompactOption func(*CompactOp)
|
||||
|
||||
func (op *CompactOp) applyCompactOpts(opts []CompactOption) {
|
||||
for _, opt := range opts {
|
||||
opt(op)
|
||||
}
|
||||
}
|
||||
|
||||
// OpCompact wraps slice CompactOption to create a CompactOp.
|
||||
func OpCompact(rev int64, opts ...CompactOption) CompactOp {
|
||||
ret := CompactOp{revision: rev}
|
||||
ret.applyCompactOpts(opts)
|
||||
return ret
|
||||
}
|
||||
|
||||
func (op CompactOp) toRequest() *pb.CompactionRequest {
|
||||
return &pb.CompactionRequest{Revision: op.revision}
|
||||
}
|
|
@ -0,0 +1,86 @@
|
|||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package clientv3
|
||||
|
||||
import (
|
||||
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
|
||||
)
|
||||
|
||||
type CompareTarget int
|
||||
type CompareResult int
|
||||
|
||||
const (
|
||||
CompareVersion CompareTarget = iota
|
||||
CompareCreated
|
||||
CompareModified
|
||||
CompareValue
|
||||
)
|
||||
|
||||
type Cmp pb.Compare
|
||||
|
||||
func Compare(cmp Cmp, result string, v interface{}) Cmp {
|
||||
var r pb.Compare_CompareResult
|
||||
|
||||
switch result {
|
||||
case "=":
|
||||
r = pb.Compare_EQUAL
|
||||
case "!=":
|
||||
r = pb.Compare_NOT_EQUAL
|
||||
case ">":
|
||||
r = pb.Compare_GREATER
|
||||
case "<":
|
||||
r = pb.Compare_LESS
|
||||
default:
|
||||
panic("Unknown result op")
|
||||
}
|
||||
|
||||
cmp.Result = r
|
||||
switch cmp.Target {
|
||||
case pb.Compare_VALUE:
|
||||
val, ok := v.(string)
|
||||
if !ok {
|
||||
panic("bad compare value")
|
||||
}
|
||||
cmp.TargetUnion = &pb.Compare_Value{Value: []byte(val)}
|
||||
case pb.Compare_VERSION:
|
||||
cmp.TargetUnion = &pb.Compare_Version{Version: mustInt64(v)}
|
||||
case pb.Compare_CREATE:
|
||||
cmp.TargetUnion = &pb.Compare_CreateRevision{CreateRevision: mustInt64(v)}
|
||||
case pb.Compare_MOD:
|
||||
cmp.TargetUnion = &pb.Compare_ModRevision{ModRevision: mustInt64(v)}
|
||||
default:
|
||||
panic("Unknown compare type")
|
||||
}
|
||||
return cmp
|
||||
}
|
||||
|
||||
func Version(key string) Cmp {
|
||||
return Cmp{Key: []byte(key), Target: pb.Compare_VERSION}
|
||||
}
|
||||
|
||||
func ModRevision(key string) Cmp {
|
||||
return Cmp{Key: []byte(key), Target: pb.Compare_MOD}
|
||||
}
|
||||
|
||||
// mustInt64 panics if val isn't an int or int64. It returns an int64 otherwise.
|
||||
func mustInt64(val interface{}) int64 {
|
||||
if v, ok := val.(int64); ok {
|
||||
return v
|
||||
}
|
||||
if v, ok := val.(int); ok {
|
||||
return int64(v)
|
||||
}
|
||||
panic("bad value")
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package clientv3
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"time"
|
||||
|
||||
"google.golang.org/grpc"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
// Endpoints is a list of URLs.
|
||||
Endpoints []string `json:"endpoints"`
|
||||
|
||||
// DialKeepAliveTime is the time in seconds after which client pings the server to see if
|
||||
// transport is alive.
|
||||
DialKeepAliveTime time.Duration `json:"dial-keep-alive-time"`
|
||||
|
||||
// DialKeepAliveTimeout is the time in seconds that the client waits for a response for the
|
||||
// keep-alive probe. If the response is not received in this time, the connection is closed.
|
||||
DialKeepAliveTimeout time.Duration `json:"dial-keep-alive-timeout"`
|
||||
|
||||
// TLS holds the client secure credentials, if any.
|
||||
TLS *tls.Config
|
||||
|
||||
DialTimeout time.Duration
|
||||
|
||||
DialOptions []grpc.DialOption
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
package driver
|
||||
|
||||
import (
|
||||
"errors"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrExists = errors.New("key exists")
|
||||
ErrNotExists = errors.New("key and or Revision does not exists")
|
||||
ErrRevisionMatch = errors.New("revision does not match")
|
||||
)
|
||||
|
||||
type KeyValue struct {
|
||||
ID int64
|
||||
Key string
|
||||
Value []byte
|
||||
OldValue []byte
|
||||
OldRevision int64
|
||||
CreateRevision int64
|
||||
Revision int64
|
||||
TTL int64
|
||||
Version int64
|
||||
Del int64
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
package driver
|
||||
|
||||
import (
|
||||
"context"
|
||||
)
|
||||
|
||||
type Driver interface {
|
||||
List(ctx context.Context, revision, limit int64, rangeKey, startKey string) (kvs []*KeyValue, listRevision int64, err error)
|
||||
|
||||
Delete(ctx context.Context, key string, revision int64) ([]*KeyValue, error)
|
||||
|
||||
// Update should return ErrNotExist when the key does not exist and ErrRevisionMatch when revision doesn't match
|
||||
Update(ctx context.Context, key string, value []byte, revision, ttl int64) (oldKv *KeyValue, newKv *KeyValue, err error)
|
||||
|
||||
Watch(ctx context.Context, key string, revision int64) <-chan Event
|
||||
|
||||
Close() error
|
||||
}
|
293
vendor/github.com/ibuildthecloud/kvsql/clientv3/driver/generic.go
generated
vendored
Normal file
293
vendor/github.com/ibuildthecloud/kvsql/clientv3/driver/generic.go
generated
vendored
Normal file
|
@ -0,0 +1,293 @@
|
|||
package driver
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/ibuildthecloud/kvsql/pkg/broadcast"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
utiltrace "k8s.io/utils/trace"
|
||||
)
|
||||
|
||||
type Generic struct {
|
||||
db *sql.DB
|
||||
|
||||
CleanupSQL string
|
||||
GetSQL string
|
||||
ListSQL string
|
||||
ListRevisionSQL string
|
||||
ListResumeSQL string
|
||||
ReplaySQL string
|
||||
InsertSQL string
|
||||
GetRevisionSQL string
|
||||
ToDeleteSQL string
|
||||
DeleteOldSQL string
|
||||
revision int64
|
||||
|
||||
changes chan *KeyValue
|
||||
broadcaster broadcast.Broadcaster
|
||||
cancel func()
|
||||
}
|
||||
|
||||
func (g *Generic) Start(ctx context.Context, db *sql.DB) error {
|
||||
g.db = db
|
||||
g.changes = make(chan *KeyValue, 1024)
|
||||
|
||||
row := db.QueryRowContext(ctx, g.GetRevisionSQL)
|
||||
rev := sql.NullInt64{}
|
||||
if err := row.Scan(&rev); err != nil {
|
||||
return errors.Wrap(err, "Failed to initialize revision")
|
||||
}
|
||||
if rev.Int64 == 0 {
|
||||
g.revision = 1
|
||||
} else {
|
||||
g.revision = rev.Int64
|
||||
}
|
||||
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-time.After(time.Minute):
|
||||
_, err := g.ExecContext(ctx, g.CleanupSQL, time.Now().Unix())
|
||||
if err != nil {
|
||||
logrus.Errorf("Failed to purge expired TTL entries")
|
||||
}
|
||||
|
||||
err = g.cleanup(ctx)
|
||||
if err != nil {
|
||||
logrus.Errorf("Failed to cleanup duplicate entries")
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (g *Generic) cleanup(ctx context.Context) error {
|
||||
rows, err := g.QueryContext(ctx, g.ToDeleteSQL)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
toDelete := map[string]int64{}
|
||||
for rows.Next() {
|
||||
var (
|
||||
count, revision int64
|
||||
name string
|
||||
)
|
||||
err := rows.Scan(&count, &name, &revision)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
toDelete[name] = revision
|
||||
}
|
||||
|
||||
rows.Close()
|
||||
|
||||
for name, rev := range toDelete {
|
||||
_, err = g.ExecContext(ctx, g.DeleteOldSQL, name, rev, rev)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (g *Generic) Get(ctx context.Context, key string) (*KeyValue, error) {
|
||||
kvs, _, err := g.List(ctx, 0, 1, key, "")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(kvs) > 0 {
|
||||
return kvs[0], nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (g *Generic) replayEvents(ctx context.Context, key string, revision int64) ([]*KeyValue, error) {
|
||||
rows, err := g.QueryContext(ctx, g.ReplaySQL, key, revision)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var resp []*KeyValue
|
||||
for rows.Next() {
|
||||
value := KeyValue{}
|
||||
if err := scan(rows.Scan, &value); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resp = append(resp, &value)
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func (g *Generic) List(ctx context.Context, revision, limit int64, rangeKey, startKey string) ([]*KeyValue, int64, error) {
|
||||
var (
|
||||
rows *sql.Rows
|
||||
err error
|
||||
)
|
||||
|
||||
if limit == 0 {
|
||||
limit = 1000000
|
||||
} else {
|
||||
limit = limit + 1
|
||||
}
|
||||
|
||||
listRevision := atomic.LoadInt64(&g.revision)
|
||||
if !strings.HasSuffix(rangeKey, "%") && revision <= 0 {
|
||||
rows, err = g.QueryContext(ctx, g.GetSQL, rangeKey, limit)
|
||||
} else if revision <= 0 {
|
||||
rows, err = g.QueryContext(ctx, g.ListSQL, rangeKey, limit)
|
||||
} else if len(startKey) > 0 {
|
||||
listRevision = revision
|
||||
rows, err = g.QueryContext(ctx, g.ListResumeSQL, revision, rangeKey, startKey, limit)
|
||||
} else {
|
||||
rows, err = g.QueryContext(ctx, g.ListRevisionSQL, revision, rangeKey, limit)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var resp []*KeyValue
|
||||
for rows.Next() {
|
||||
value := KeyValue{}
|
||||
if err := scan(rows.Scan, &value); err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
if value.Revision > listRevision {
|
||||
listRevision = value.Revision
|
||||
}
|
||||
if value.Del == 0 {
|
||||
resp = append(resp, &value)
|
||||
}
|
||||
}
|
||||
|
||||
return resp, listRevision, nil
|
||||
}
|
||||
|
||||
func (g *Generic) Delete(ctx context.Context, key string, revision int64) ([]*KeyValue, error) {
|
||||
if strings.HasSuffix(key, "%") {
|
||||
panic("can not delete list revision")
|
||||
}
|
||||
|
||||
_, err := g.mod(ctx, true, key, []byte{}, revision, 0)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
func (g *Generic) Update(ctx context.Context, key string, value []byte, revision, ttl int64) (*KeyValue, *KeyValue, error) {
|
||||
kv, err := g.mod(ctx, false, key, value, revision, ttl)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
if kv.Version == 1 {
|
||||
return nil, kv, nil
|
||||
}
|
||||
|
||||
oldKv := *kv
|
||||
oldKv.Revision = oldKv.OldRevision
|
||||
oldKv.Value = oldKv.OldValue
|
||||
return &oldKv, kv, nil
|
||||
}
|
||||
|
||||
func (g *Generic) ExecContext(ctx context.Context, query string, args ...interface{}) (sql.Result, error) {
|
||||
trace := utiltrace.New(fmt.Sprintf("SQL DB ExecContext query: %s keys: %v", query, args))
|
||||
defer trace.LogIfLong(500 * time.Millisecond)
|
||||
|
||||
return g.db.ExecContext(ctx, query, args...)
|
||||
}
|
||||
|
||||
func (g *Generic) QueryContext(ctx context.Context, query string, args ...interface{}) (*sql.Rows, error) {
|
||||
trace := utiltrace.New(fmt.Sprintf("SQL DB QueryContext query: %s keys: %v", query, args))
|
||||
defer trace.LogIfLong(500 * time.Millisecond)
|
||||
|
||||
return g.db.QueryContext(ctx, query, args...)
|
||||
}
|
||||
|
||||
func (g *Generic) mod(ctx context.Context, delete bool, key string, value []byte, revision int64, ttl int64) (*KeyValue, error) {
|
||||
oldKv, err := g.Get(ctx, key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if revision > 0 && oldKv == nil {
|
||||
return nil, ErrNotExists
|
||||
}
|
||||
|
||||
if revision > 0 && oldKv.Revision != revision {
|
||||
return nil, ErrRevisionMatch
|
||||
}
|
||||
|
||||
if ttl > 0 {
|
||||
ttl = int64(time.Now().Unix()) + ttl
|
||||
}
|
||||
|
||||
newRevision := atomic.AddInt64(&g.revision, 1)
|
||||
result := &KeyValue{
|
||||
Key: key,
|
||||
Value: value,
|
||||
Revision: newRevision,
|
||||
TTL: int64(ttl),
|
||||
CreateRevision: newRevision,
|
||||
Version: 1,
|
||||
}
|
||||
if oldKv != nil {
|
||||
result.OldRevision = oldKv.Revision
|
||||
result.OldValue = oldKv.Value
|
||||
result.TTL = oldKv.TTL
|
||||
result.CreateRevision = oldKv.CreateRevision
|
||||
result.Version = oldKv.Version + 1
|
||||
}
|
||||
|
||||
if delete {
|
||||
result.Del = 1
|
||||
}
|
||||
|
||||
_, err = g.ExecContext(ctx, g.InsertSQL,
|
||||
result.Key,
|
||||
result.Value,
|
||||
result.OldValue,
|
||||
result.OldRevision,
|
||||
result.CreateRevision,
|
||||
result.Revision,
|
||||
result.TTL,
|
||||
result.Version,
|
||||
result.Del,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
g.changes <- result
|
||||
return result, nil
|
||||
}
|
||||
|
||||
type scanner func(dest ...interface{}) error
|
||||
|
||||
func scan(s scanner, out *KeyValue) error {
|
||||
return s(
|
||||
&out.ID,
|
||||
&out.Key,
|
||||
&out.Value,
|
||||
&out.OldValue,
|
||||
&out.OldRevision,
|
||||
&out.CreateRevision,
|
||||
&out.Revision,
|
||||
&out.TTL,
|
||||
&out.Version,
|
||||
&out.Del)
|
||||
}
|
56
vendor/github.com/ibuildthecloud/kvsql/clientv3/driver/mysql/sqlite.go
generated
vendored
Normal file
56
vendor/github.com/ibuildthecloud/kvsql/clientv3/driver/mysql/sqlite.go
generated
vendored
Normal file
|
@ -0,0 +1,56 @@
|
|||
package sqlite
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"github.com/ibuildthecloud/kvsql/clientv3/driver"
|
||||
"github.com/ibuildthecloud/kvsql/clientv3/driver/sqlite"
|
||||
)
|
||||
|
||||
var (
|
||||
schema = []string{
|
||||
`create table if not exists key_value
|
||||
(
|
||||
name int not null,
|
||||
value MEDIUMTEXT not null,
|
||||
create_revision int not null,
|
||||
revision int not null,
|
||||
ttl int not null,
|
||||
version int not null,
|
||||
del int not null,
|
||||
old_value MEDIUMTEXT not null,
|
||||
id int auto_increment,
|
||||
old_revision int not null,
|
||||
constraint key_value_pk
|
||||
primary key (id)
|
||||
)`,
|
||||
}
|
||||
|
||||
idx = []string{
|
||||
"create index key_value__name_idx on key_value (name)",
|
||||
"create index key_value__revision_idx on key_value (revision)",
|
||||
}
|
||||
)
|
||||
|
||||
func NewMYSQL() *driver.Generic {
|
||||
return sqlite.NewSQLite()
|
||||
}
|
||||
|
||||
func Open(dataSourceName string) (*sql.DB, error) {
|
||||
db, err := sql.Open("mysql", dataSourceName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, stmt := range schema {
|
||||
_, err := db.Exec(stmt)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
for _, stmt := range idx {
|
||||
db.Exec(stmt)
|
||||
}
|
||||
|
||||
return db, nil
|
||||
}
|
81
vendor/github.com/ibuildthecloud/kvsql/clientv3/driver/sqlite/sqlite.go
generated
vendored
Normal file
81
vendor/github.com/ibuildthecloud/kvsql/clientv3/driver/sqlite/sqlite.go
generated
vendored
Normal file
|
@ -0,0 +1,81 @@
|
|||
package sqlite
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"strings"
|
||||
|
||||
"github.com/ibuildthecloud/kvsql/clientv3/driver"
|
||||
)
|
||||
|
||||
var (
|
||||
fieldList = "name, value, old_value, old_revision, create_revision, revision, ttl, version, del"
|
||||
baseList = `
|
||||
SELECT kv.id, kv.name, kv.value, kv.old_value, kv.old_revision, kv.create_revision, kv.revision, kv.ttl, kv.version, kv.del
|
||||
FROM key_value kv
|
||||
INNER JOIN
|
||||
(
|
||||
SELECT MAX(revision) revision, kvi.name
|
||||
FROM key_value kvi
|
||||
%REV%
|
||||
GROUP BY kvi.name
|
||||
) AS r
|
||||
ON r.name = kv.name AND r.revision = kv.revision
|
||||
WHERE kv.name like ? %RES% ORDER BY kv.name ASC limit ?
|
||||
`
|
||||
insertSQL = `
|
||||
INSERT INTO key_value(` + fieldList + `)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
||||
|
||||
schema = []string{
|
||||
`create table if not exists key_value
|
||||
(
|
||||
name INTEGER,
|
||||
value BLOB,
|
||||
create_revision INTEGER,
|
||||
revision INTEGER,
|
||||
ttl INTEGER,
|
||||
version INTEGER,
|
||||
del INTEGER,
|
||||
old_value BLOB,
|
||||
id INTEGER primary key autoincrement,
|
||||
old_revision INTEGER
|
||||
)`,
|
||||
`create index if not exists name_idx on key_value (name)`,
|
||||
`create index if not exists revision_idx on key_value (revision)`,
|
||||
}
|
||||
)
|
||||
|
||||
func NewSQLite() *driver.Generic {
|
||||
return &driver.Generic{
|
||||
CleanupSQL: "DELETE FROM key_value WHERE ttl > 0 AND ttl < ?",
|
||||
GetSQL: "SELECT id, " + fieldList + " FROM key_value WHERE name = ? ORDER BY revision DESC limit ?",
|
||||
ListSQL: strings.Replace(strings.Replace(baseList, "%REV%", "", -1), "%RES%", "", -1),
|
||||
ListRevisionSQL: strings.Replace(strings.Replace(baseList, "%REV%", "WHERE kvi.revision >= ?", -1), "%RES%", "", -1),
|
||||
ListResumeSQL: strings.Replace(strings.Replace(baseList, "%REV%", "WHERE kvi.revision <= ?", -1),
|
||||
"%RES%", "and kv.name > ? ", -1),
|
||||
InsertSQL: insertSQL,
|
||||
ReplaySQL: "SELECT id, " + fieldList + " FROM key_value WHERE name like ? and revision > ? ORDER BY revision ASC",
|
||||
GetRevisionSQL: "SELECT MAX(revision) FROM key_value",
|
||||
ToDeleteSQL: "SELECT count(*) c, name, max(revision) FROM key_value GROUP BY name HAVING c > 1 or (c = 1 and del = 1)",
|
||||
DeleteOldSQL: "DELETE FROM key_value WHERE name = ? AND (revision < ? OR (revision = ? AND del = 1))",
|
||||
}
|
||||
}
|
||||
|
||||
func Open(dataSourceName string) (*sql.DB, error) {
|
||||
if dataSourceName == "" {
|
||||
dataSourceName = "./state.db?_journal=WAL&cache=shared"
|
||||
}
|
||||
db, err := sql.Open("sqlite3", dataSourceName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, stmt := range schema {
|
||||
_, err := db.Exec(stmt)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return db, nil
|
||||
}
|
111
vendor/github.com/ibuildthecloud/kvsql/clientv3/driver/watcher.go
generated
vendored
Normal file
111
vendor/github.com/ibuildthecloud/kvsql/clientv3/driver/watcher.go
generated
vendored
Normal file
|
@ -0,0 +1,111 @@
|
|||
package driver
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type Event struct {
|
||||
KV *KeyValue
|
||||
Err error
|
||||
Start bool
|
||||
}
|
||||
|
||||
func matchesKey(prefix bool, key string, kv *KeyValue) bool {
|
||||
if kv == nil {
|
||||
return false
|
||||
}
|
||||
if prefix {
|
||||
return strings.HasPrefix(kv.Key, key[:len(key)-1])
|
||||
}
|
||||
return kv.Key == key
|
||||
}
|
||||
|
||||
func (g *Generic) globalWatcher() (chan map[string]interface{}, error) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
g.cancel = cancel
|
||||
result := make(chan map[string]interface{}, 100)
|
||||
|
||||
go func() {
|
||||
defer close(result)
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case e := <-g.changes:
|
||||
result <- map[string]interface{}{
|
||||
"data": e,
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (g *Generic) Watch(ctx context.Context, key string, revision int64) <-chan Event {
|
||||
ctx, parentCancel := context.WithCancel(ctx)
|
||||
|
||||
prefix := strings.HasSuffix(key, "%")
|
||||
|
||||
events, err := g.broadcaster.Subscribe(ctx, g.globalWatcher)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
watchChan := make(chan Event)
|
||||
go func() (returnErr error) {
|
||||
defer func() {
|
||||
sendErrorAndClose(watchChan, returnErr)
|
||||
parentCancel()
|
||||
}()
|
||||
|
||||
start(watchChan)
|
||||
|
||||
if revision > 0 {
|
||||
keys, err := g.replayEvents(ctx, key, revision)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, k := range keys {
|
||||
watchChan <- Event{KV: k}
|
||||
}
|
||||
}
|
||||
|
||||
for e := range events {
|
||||
k, ok := e["data"].(*KeyValue)
|
||||
if ok && matchesKey(prefix, key, k) {
|
||||
watchChan <- Event{KV: k}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}()
|
||||
|
||||
return watchChan
|
||||
}
|
||||
|
||||
func start(watchResponses chan Event) {
|
||||
watchResponses <- Event{
|
||||
Start: true,
|
||||
}
|
||||
}
|
||||
|
||||
func sendErrorAndClose(watchResponses chan Event, err error) {
|
||||
if err == nil {
|
||||
err = io.EOF
|
||||
}
|
||||
watchResponses <- Event{Err: err}
|
||||
close(watchResponses)
|
||||
}
|
||||
|
||||
// Close closes the watcher and cancels all watch requests.
|
||||
func (g *Generic) Close() error {
|
||||
if g.cancel != nil {
|
||||
g.cancel()
|
||||
g.cancel = nil
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,269 @@
|
|||
// Copyright 2015 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package clientv3
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/ibuildthecloud/kvsql/clientv3/driver"
|
||||
"github.com/ibuildthecloud/kvsql/clientv3/driver/sqlite"
|
||||
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
|
||||
"github.com/coreos/etcd/mvcc/mvccpb"
|
||||
"github.com/docker/docker/pkg/locker"
|
||||
"golang.org/x/net/context"
|
||||
)
|
||||
|
||||
type (
|
||||
CompactResponse pb.CompactionResponse
|
||||
PutResponse pb.PutResponse
|
||||
GetResponse pb.RangeResponse
|
||||
DeleteResponse pb.DeleteRangeResponse
|
||||
TxnResponse pb.TxnResponse
|
||||
)
|
||||
|
||||
var (
|
||||
connections map[string]*kv
|
||||
connectionsCtx context.Context
|
||||
CloseDB func()
|
||||
connectionsLock sync.Mutex
|
||||
)
|
||||
|
||||
type KV interface {
|
||||
// Put puts a key-value pair into etcd.
|
||||
// Note that key,value can be plain bytes array and string is
|
||||
// an immutable representation of that bytes array.
|
||||
// To get a string of bytes, do string([]byte{0x10, 0x20}).
|
||||
Put(ctx context.Context, key, val string, opts ...OpOption) (*PutResponse, error)
|
||||
|
||||
// Get retrieves keys.
|
||||
// By default, Get will return the value for "key", if any.
|
||||
// When passed WithRange(end), Get will return the keys in the range [key, end).
|
||||
// When passed WithFromKey(), Get returns keys greater than or equal to key.
|
||||
// When passed WithRev(rev) with rev > 0, Get retrieves keys at the given revision;
|
||||
// if the required revision is compacted, the request will fail with ErrCompacted .
|
||||
// When passed WithLimit(limit), the number of returned keys is bounded by limit.
|
||||
// When passed WithSort(), the keys will be sorted.
|
||||
Get(ctx context.Context, key string, opts ...OpOption) (*GetResponse, error)
|
||||
|
||||
// Delete deletes a key, or optionally using WithRange(end), [key, end).
|
||||
Delete(ctx context.Context, key string, opts ...OpOption) (*DeleteResponse, error)
|
||||
|
||||
// Compact compacts etcd KV history before the given rev.
|
||||
Compact(ctx context.Context, rev int64, opts ...CompactOption) (*CompactResponse, error)
|
||||
|
||||
// Txn creates a transaction.
|
||||
Txn(ctx context.Context) Txn
|
||||
}
|
||||
|
||||
type kv struct {
|
||||
l locker.Locker
|
||||
d driver.Driver
|
||||
}
|
||||
|
||||
func newKV(cfg Config) (*kv, error) {
|
||||
connectionsLock.Lock()
|
||||
defer connectionsLock.Unlock()
|
||||
|
||||
if len(cfg.Endpoints) != 1 {
|
||||
return nil, fmt.Errorf("exactly one endpoint required for DB setting, got %v", cfg.Endpoints)
|
||||
}
|
||||
|
||||
key := cfg.Endpoints[0]
|
||||
|
||||
if kv, ok := connections[key]; ok {
|
||||
return kv, nil
|
||||
}
|
||||
|
||||
if connections == nil {
|
||||
connections = map[string]*kv{}
|
||||
connectionsCtx, CloseDB = context.WithCancel(context.Background())
|
||||
}
|
||||
|
||||
parts := strings.SplitN(key, "://", 2)
|
||||
if len(parts) != 2 {
|
||||
return nil, fmt.Errorf("invalid kvsql string")
|
||||
}
|
||||
|
||||
var (
|
||||
db *sql.DB
|
||||
driver *driver.Generic
|
||||
err error
|
||||
)
|
||||
|
||||
switch parts[0] {
|
||||
case "sqlite":
|
||||
if db, err = sqlite.Open(parts[1]); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
driver = sqlite.NewSQLite()
|
||||
}
|
||||
|
||||
if err := driver.Start(context.TODO(), db); err != nil {
|
||||
db.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
kv := &kv{
|
||||
d:driver,
|
||||
}
|
||||
connections[key] = kv
|
||||
|
||||
return kv, nil
|
||||
}
|
||||
|
||||
func (k *kv) Put(ctx context.Context, key, val string, opts ...OpOption) (*PutResponse, error) {
|
||||
//trace := utiltrace.New(fmt.Sprintf("SQL Put key: %s", key))
|
||||
//defer trace.LogIfLong(500 * time.Millisecond)
|
||||
k.l.Lock(key)
|
||||
defer k.l.Unlock(key)
|
||||
|
||||
op := OpPut(key, val, opts...)
|
||||
return k.opPut(ctx, op)
|
||||
}
|
||||
|
||||
func (k *kv) opPut(ctx context.Context, op Op) (*PutResponse, error) {
|
||||
oldR, r, err := k.d.Update(ctx, op.key, op.val, op.rev, int64(op.leaseID))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return getPutResponse(oldR, r), nil
|
||||
}
|
||||
|
||||
func (k *kv) Get(ctx context.Context, key string, opts ...OpOption) (*GetResponse, error) {
|
||||
//trace := utiltrace.New(fmt.Sprintf("SQL Get key: %s", key))
|
||||
//defer trace.LogIfLong(500 * time.Millisecond)
|
||||
op := OpGet(key, opts...)
|
||||
return k.opGet(ctx, op)
|
||||
}
|
||||
|
||||
func (k *kv) opGet(ctx context.Context, op Op) (*GetResponse, error) {
|
||||
var (
|
||||
rangeKey string
|
||||
startKey string
|
||||
)
|
||||
|
||||
if op.boundingKey == "" {
|
||||
rangeKey = op.key
|
||||
startKey = ""
|
||||
} else {
|
||||
rangeKey = op.boundingKey
|
||||
startKey = string(bytes.SplitN([]byte(op.key), []byte{'\x00'}, -1)[0])
|
||||
}
|
||||
|
||||
kvs, rev, err := k.d.List(ctx, op.rev, op.limit, rangeKey, startKey)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return getResponse(kvs, rev, op.limit, op.countOnly), nil
|
||||
}
|
||||
|
||||
func getPutResponse(oldValue *driver.KeyValue, value *driver.KeyValue) *PutResponse {
|
||||
return &PutResponse{
|
||||
Header: &pb.ResponseHeader{
|
||||
Revision: value.Revision,
|
||||
},
|
||||
PrevKv: toKeyValue(oldValue),
|
||||
}
|
||||
}
|
||||
|
||||
func toKeyValue(v *driver.KeyValue) *mvccpb.KeyValue {
|
||||
if v == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return &mvccpb.KeyValue{
|
||||
Key: []byte(v.Key),
|
||||
CreateRevision: v.CreateRevision,
|
||||
ModRevision: v.Revision,
|
||||
Version: v.Version,
|
||||
Value: v.Value,
|
||||
Lease: v.TTL,
|
||||
}
|
||||
}
|
||||
|
||||
func getDeleteResponse(values []*driver.KeyValue) *DeleteResponse {
|
||||
gr := getResponse(values, 0, 0, false)
|
||||
return &DeleteResponse{
|
||||
Header: &pb.ResponseHeader{
|
||||
Revision: gr.Header.Revision,
|
||||
},
|
||||
PrevKvs: gr.Kvs,
|
||||
}
|
||||
}
|
||||
|
||||
func getResponse(values []*driver.KeyValue, revision, limit int64, count bool) *GetResponse {
|
||||
gr := &GetResponse{
|
||||
Header: &pb.ResponseHeader{
|
||||
Revision: revision,
|
||||
},
|
||||
}
|
||||
|
||||
for _, v := range values {
|
||||
kv := toKeyValue(v)
|
||||
if kv.ModRevision > gr.Header.Revision {
|
||||
gr.Header.Revision = kv.ModRevision
|
||||
}
|
||||
|
||||
gr.Kvs = append(gr.Kvs, kv)
|
||||
}
|
||||
|
||||
gr.Count = int64(len(gr.Kvs))
|
||||
if limit > 0 && gr.Count > limit {
|
||||
gr.Kvs = gr.Kvs[:limit]
|
||||
gr.More = true
|
||||
}
|
||||
|
||||
if count {
|
||||
gr.Kvs = nil
|
||||
}
|
||||
|
||||
return gr
|
||||
}
|
||||
|
||||
func (k *kv) Delete(ctx context.Context, key string, opts ...OpOption) (*DeleteResponse, error) {
|
||||
//trace := utiltrace.New(fmt.Sprintf("SQL Delete key: %s", key))
|
||||
//defer trace.LogIfLong(500 * time.Millisecond)
|
||||
k.l.Lock(key)
|
||||
defer k.l.Unlock(key)
|
||||
|
||||
op := OpDelete(key, opts...)
|
||||
return k.opDelete(ctx, op)
|
||||
}
|
||||
|
||||
func (k *kv) opDelete(ctx context.Context, op Op) (*DeleteResponse, error) {
|
||||
r, err := k.d.Delete(ctx, op.key, op.rev)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return getDeleteResponse(r), nil
|
||||
}
|
||||
|
||||
func (k *kv) Compact(ctx context.Context, rev int64, opts ...CompactOption) (*CompactResponse, error) {
|
||||
return &CompactResponse{
|
||||
Header: &pb.ResponseHeader{},
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (k *kv) Txn(ctx context.Context) Txn {
|
||||
return &txn{
|
||||
kv: k,
|
||||
ctx: ctx,
|
||||
}
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package clientv3
|
||||
|
||||
import (
|
||||
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
|
||||
|
||||
"golang.org/x/net/context"
|
||||
)
|
||||
|
||||
type (
|
||||
LeaseRevokeResponse pb.LeaseRevokeResponse
|
||||
LeaseID int64
|
||||
)
|
||||
|
||||
// LeaseGrantResponse wraps the protobuf message LeaseGrantResponse.
|
||||
type LeaseGrantResponse struct {
|
||||
*pb.ResponseHeader
|
||||
ID LeaseID
|
||||
TTL int64
|
||||
Error string
|
||||
}
|
||||
|
||||
type Lease interface {
|
||||
// Grant creates a new lease.
|
||||
Grant(ctx context.Context, ttl int64) (*LeaseGrantResponse, error)
|
||||
}
|
||||
|
||||
type lessor struct {
|
||||
}
|
||||
|
||||
func (l *lessor) Grant(ctx context.Context, ttl int64) (*LeaseGrantResponse, error) {
|
||||
return &LeaseGrantResponse{
|
||||
ResponseHeader: &pb.ResponseHeader{},
|
||||
TTL: ttl,
|
||||
ID: LeaseID(ttl),
|
||||
}, nil
|
||||
}
|
|
@ -0,0 +1,200 @@
|
|||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package clientv3
|
||||
|
||||
import pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
|
||||
|
||||
type opType int
|
||||
|
||||
const (
|
||||
// A default Op has opType 0, which is invalid.
|
||||
tRange opType = iota + 1
|
||||
tPut
|
||||
tDeleteRange
|
||||
tTxn
|
||||
)
|
||||
|
||||
var (
|
||||
noPrefixEnd = []byte{0}
|
||||
)
|
||||
|
||||
// Op represents an Operation that kv can execute.
|
||||
type Op struct {
|
||||
t opType
|
||||
key string
|
||||
boundingKey string
|
||||
|
||||
// for range
|
||||
limit int64
|
||||
countOnly bool
|
||||
|
||||
// for range, watch
|
||||
rev int64
|
||||
|
||||
// for watch, put, delete
|
||||
prevKV bool
|
||||
|
||||
// for put
|
||||
val []byte
|
||||
leaseID LeaseID
|
||||
|
||||
// txn
|
||||
cmps []Cmp
|
||||
thenOps []Op
|
||||
elseOps []Op
|
||||
}
|
||||
|
||||
// accessors / mutators
|
||||
|
||||
func (op Op) IsTxn() bool { return op.t == tTxn }
|
||||
func (op Op) Txn() ([]Cmp, []Op, []Op) { return op.cmps, op.thenOps, op.elseOps }
|
||||
|
||||
// Rev returns the requested revision, if any.
|
||||
func (op Op) Rev() int64 { return op.rev }
|
||||
|
||||
// IsPut returns true iff the operation is a Put.
|
||||
func (op Op) IsPut() bool { return op.t == tPut }
|
||||
|
||||
// IsGet returns true iff the operation is a Get.
|
||||
func (op Op) IsGet() bool { return op.t == tRange }
|
||||
|
||||
// IsDelete returns true iff the operation is a Delete.
|
||||
func (op Op) IsDelete() bool { return op.t == tDeleteRange }
|
||||
|
||||
// IsCountOnly returns whether countOnly is set.
|
||||
func (op Op) IsCountOnly() bool { return op.countOnly == true }
|
||||
|
||||
// ValueBytes returns the byte slice holding the Op's value, if any.
|
||||
func (op Op) ValueBytes() []byte { return op.val }
|
||||
|
||||
// WithValueBytes sets the byte slice for the Op's value.
|
||||
func (op *Op) WithValueBytes(v []byte) { op.val = v }
|
||||
|
||||
func (op Op) toRangeRequest() *pb.RangeRequest {
|
||||
if op.t != tRange {
|
||||
panic("op.t != tRange")
|
||||
}
|
||||
r := &pb.RangeRequest{
|
||||
Key: []byte(op.key),
|
||||
RangeEnd: []byte(op.boundingKey),
|
||||
Limit: op.limit,
|
||||
Revision: op.rev,
|
||||
CountOnly: op.countOnly,
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
func OpGet(key string, opts ...OpOption) Op {
|
||||
ret := Op{t: tRange, key: key}
|
||||
ret.applyOpts(opts)
|
||||
return ret
|
||||
}
|
||||
|
||||
func OpDelete(key string, opts ...OpOption) Op {
|
||||
ret := Op{t: tDeleteRange, key: key}
|
||||
ret.applyOpts(opts)
|
||||
switch {
|
||||
case ret.leaseID != 0:
|
||||
panic("unexpected lease in delete")
|
||||
case ret.limit != 0:
|
||||
panic("unexpected limit in delete")
|
||||
case ret.rev != 0:
|
||||
panic("unexpected revision in delete")
|
||||
case ret.countOnly:
|
||||
panic("unexpected countOnly in delete")
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func OpPut(key, val string, opts ...OpOption) Op {
|
||||
ret := Op{t: tPut, key: key, val: []byte(val)}
|
||||
ret.applyOpts(opts)
|
||||
switch {
|
||||
case len(ret.key) > 0 && ret.key[len(ret.key)-1] == '%':
|
||||
panic("unexpected range in put")
|
||||
case ret.limit != 0:
|
||||
panic("unexpected limit in put")
|
||||
case ret.rev != 0:
|
||||
panic("unexpected revision in put")
|
||||
case ret.countOnly:
|
||||
panic("unexpected countOnly in put")
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func (op *Op) applyOpts(opts []OpOption) {
|
||||
for _, opt := range opts {
|
||||
opt(op)
|
||||
}
|
||||
}
|
||||
|
||||
// OpOption configures Operations like Get, Put, Delete.
|
||||
type OpOption func(*Op)
|
||||
|
||||
// WithLease attaches a lease ID to a key in 'Put' request.
|
||||
func WithLease(leaseID LeaseID) OpOption {
|
||||
return func(op *Op) { op.leaseID = leaseID }
|
||||
}
|
||||
|
||||
// WithLimit limits the number of results to return from 'Get' request.
|
||||
// If WithLimit is given a 0 limit, it is treated as no limit.
|
||||
func WithLimit(n int64) OpOption { return func(op *Op) { op.limit = n } }
|
||||
|
||||
// WithRev specifies the store revision for 'Get' request.
|
||||
// Or the start revision of 'Watch' request.
|
||||
func WithRev(rev int64) OpOption { return func(op *Op) { op.rev = rev } }
|
||||
|
||||
// GetPrefixRangeEnd gets the range end of the prefix.
|
||||
// 'Get(foo, WithPrefix())' is equal to 'Get(foo, WithRange(GetPrefixRangeEnd(foo))'.
|
||||
func GetPrefixRangeEnd(prefix string) string {
|
||||
return prefix + "%"
|
||||
}
|
||||
|
||||
// WithPrefix enables 'Get', 'Delete', or 'Watch' requests to operate
|
||||
// on the keys with matching prefix. For example, 'Get(foo, WithPrefix())'
|
||||
// can return 'foo1', 'foo2', and so on.
|
||||
func WithPrefix() OpOption {
|
||||
return func(op *Op) {
|
||||
op.key += "%"
|
||||
}
|
||||
}
|
||||
|
||||
// WithRange specifies the range of 'Get', 'Delete', 'Watch' requests.
|
||||
// For example, 'Get' requests with 'WithRange(end)' returns
|
||||
// the keys in the range [key, end).
|
||||
// endKey must be lexicographically greater than start key.
|
||||
func WithRange(endKey string) OpOption {
|
||||
return func(op *Op) { op.boundingKey = endKey }
|
||||
}
|
||||
|
||||
// WithSerializable makes 'Get' request serializable. By default,
|
||||
// it's linearizable. Serializable requests are better for lower latency
|
||||
// requirement.
|
||||
func WithSerializable() OpOption {
|
||||
return func(op *Op) {}
|
||||
}
|
||||
|
||||
// WithCountOnly makes the 'Get' request return only the count of keys.
|
||||
func WithCountOnly() OpOption {
|
||||
return func(op *Op) { op.countOnly = true }
|
||||
}
|
||||
|
||||
// WithPrevKV gets the previous key-value pair before the event happens. If the previous KV is already compacted,
|
||||
// nothing will be returned.
|
||||
func WithPrevKV() OpOption {
|
||||
return func(op *Op) {
|
||||
op.prevKV = true
|
||||
}
|
||||
}
|
|
@ -0,0 +1,224 @@
|
|||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package clientv3
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
|
||||
"golang.org/x/net/context"
|
||||
utiltrace "k8s.io/utils/trace"
|
||||
)
|
||||
|
||||
type Txn interface {
|
||||
// If takes a list of comparison. If all comparisons passed in succeed,
|
||||
// the operations passed into Then() will be executed. Or the operations
|
||||
// passed into Else() will be executed.
|
||||
If(cs ...Cmp) Txn
|
||||
|
||||
// Then takes a list of operations. The Ops list will be executed, if the
|
||||
// comparisons passed in If() succeed.
|
||||
Then(ops ...Op) Txn
|
||||
|
||||
// Else takes a list of operations. The Ops list will be executed, if the
|
||||
// comparisons passed in If() fail.
|
||||
Else(ops ...Op) Txn
|
||||
|
||||
// Commit tries to commit the transaction.
|
||||
Commit() (*TxnResponse, error)
|
||||
}
|
||||
|
||||
type txn struct {
|
||||
kv *kv
|
||||
ctx context.Context
|
||||
|
||||
mu sync.Mutex
|
||||
cif bool
|
||||
cthen bool
|
||||
celse bool
|
||||
|
||||
cmps []*pb.Compare
|
||||
|
||||
sus []Op
|
||||
fas []Op
|
||||
}
|
||||
|
||||
func (txn *txn) If(cs ...Cmp) Txn {
|
||||
txn.mu.Lock()
|
||||
defer txn.mu.Unlock()
|
||||
|
||||
if txn.cif {
|
||||
panic("cannot call If twice!")
|
||||
}
|
||||
|
||||
if txn.cthen {
|
||||
panic("cannot call If after Then!")
|
||||
}
|
||||
|
||||
if txn.celse {
|
||||
panic("cannot call If after Else!")
|
||||
}
|
||||
|
||||
txn.cif = true
|
||||
|
||||
for i := range cs {
|
||||
txn.cmps = append(txn.cmps, (*pb.Compare)(&cs[i]))
|
||||
}
|
||||
|
||||
return txn
|
||||
}
|
||||
|
||||
func (txn *txn) Then(ops ...Op) Txn {
|
||||
txn.mu.Lock()
|
||||
defer txn.mu.Unlock()
|
||||
|
||||
if txn.cthen {
|
||||
panic("cannot call Then twice!")
|
||||
}
|
||||
if txn.celse {
|
||||
panic("cannot call Then after Else!")
|
||||
}
|
||||
|
||||
txn.cthen = true
|
||||
|
||||
for _, op := range ops {
|
||||
txn.sus = append(txn.sus, op)
|
||||
}
|
||||
|
||||
return txn
|
||||
}
|
||||
|
||||
func (txn *txn) Else(ops ...Op) Txn {
|
||||
txn.mu.Lock()
|
||||
defer txn.mu.Unlock()
|
||||
|
||||
if txn.celse {
|
||||
panic("cannot call Else twice!")
|
||||
}
|
||||
|
||||
txn.celse = true
|
||||
|
||||
for _, op := range ops {
|
||||
txn.fas = append(txn.fas, op)
|
||||
}
|
||||
|
||||
return txn
|
||||
}
|
||||
|
||||
func (txn *txn) do(op Op) (*pb.ResponseOp, error) {
|
||||
switch op.t {
|
||||
case tRange:
|
||||
r, err := txn.kv.opGet(txn.ctx, op)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &pb.ResponseOp{
|
||||
Response: &pb.ResponseOp_ResponseRange{
|
||||
ResponseRange: (*pb.RangeResponse)(r),
|
||||
},
|
||||
}, nil
|
||||
case tPut:
|
||||
r, err := txn.kv.opPut(txn.ctx, op)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &pb.ResponseOp{
|
||||
Response: &pb.ResponseOp_ResponsePut{
|
||||
ResponsePut: (*pb.PutResponse)(r),
|
||||
},
|
||||
}, nil
|
||||
case tDeleteRange:
|
||||
r, err := txn.kv.opDelete(txn.ctx, op)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &pb.ResponseOp{
|
||||
Response: &pb.ResponseOp_ResponseDeleteRange{
|
||||
ResponseDeleteRange: (*pb.DeleteRangeResponse)(r),
|
||||
},
|
||||
}, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown op in txn: %#v", op)
|
||||
}
|
||||
}
|
||||
|
||||
func (txn *txn) Commit() (*TxnResponse, error) {
|
||||
trace := utiltrace.New("SQL Commit")
|
||||
defer trace.LogIfLong(500 * time.Millisecond)
|
||||
|
||||
locks := map[string]bool{}
|
||||
resp := &TxnResponse{
|
||||
Header: &pb.ResponseHeader{},
|
||||
}
|
||||
|
||||
good := true
|
||||
for _, c := range txn.cmps {
|
||||
k := string(c.Key)
|
||||
if !locks[k] {
|
||||
txn.kv.l.Lock(k)
|
||||
trace.Step(fmt.Sprintf("lock acquired: %s", k))
|
||||
locks[k] = true
|
||||
defer txn.kv.l.Unlock(k)
|
||||
}
|
||||
gr, err := txn.kv.Get(txn.ctx, k)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch c.Target {
|
||||
case pb.Compare_VERSION:
|
||||
ver := int64(0)
|
||||
if len(gr.Kvs) > 0 {
|
||||
ver = gr.Kvs[0].Version
|
||||
}
|
||||
cv, _ := c.TargetUnion.(*pb.Compare_Version)
|
||||
if ver != cv.Version {
|
||||
good = false
|
||||
}
|
||||
case pb.Compare_MOD:
|
||||
mod := int64(0)
|
||||
if len(gr.Kvs) > 0 {
|
||||
mod = gr.Kvs[0].ModRevision
|
||||
}
|
||||
cv, _ := c.TargetUnion.(*pb.Compare_ModRevision)
|
||||
if mod != cv.ModRevision {
|
||||
good = false
|
||||
}
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown txn target %v", c.Target)
|
||||
}
|
||||
|
||||
trace.Step(fmt.Sprintf("condition key %s good %v", k, good))
|
||||
}
|
||||
|
||||
resp.Succeeded = good
|
||||
ops := txn.sus
|
||||
if !good {
|
||||
ops = txn.fas
|
||||
}
|
||||
|
||||
for _, op := range ops {
|
||||
r, err := txn.do(op)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resp.Responses = append(resp.Responses, r)
|
||||
trace.Step(fmt.Sprintf("op key %s op %v", op.key, op.t))
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
}
|
|
@ -0,0 +1,148 @@
|
|||
// Copyright 2016 The etcd Authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package clientv3
|
||||
|
||||
import (
|
||||
v3rpc "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
|
||||
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
|
||||
"github.com/coreos/etcd/mvcc/mvccpb"
|
||||
|
||||
"golang.org/x/net/context"
|
||||
)
|
||||
|
||||
const (
|
||||
EventTypeDelete = mvccpb.DELETE
|
||||
EventTypePut = mvccpb.PUT
|
||||
)
|
||||
|
||||
type Event mvccpb.Event
|
||||
|
||||
type WatchChan <-chan WatchResponse
|
||||
|
||||
type Watcher interface {
|
||||
// Watch watches on a key or prefix. The watched events will be returned
|
||||
// through the returned channel. If revisions waiting to be sent over the
|
||||
// watch are compacted, then the watch will be canceled by the server, the
|
||||
// client will post a compacted error watch response, and the channel will close.
|
||||
Watch(ctx context.Context, key string, opts ...OpOption) WatchChan
|
||||
|
||||
// Close closes the watcher and cancels all watch requests.
|
||||
Close() error
|
||||
}
|
||||
|
||||
func (k *kv) Watch(ctx context.Context, key string, opts ...OpOption) WatchChan {
|
||||
op := OpGet(key, opts...)
|
||||
c := k.d.Watch(ctx, op.key, op.rev)
|
||||
|
||||
result := make(chan WatchResponse)
|
||||
go func() {
|
||||
defer close(result)
|
||||
for e := range c {
|
||||
if e.Err != nil {
|
||||
result <- NewWatchResponseErr(e.Err)
|
||||
continue
|
||||
} else if e.Start {
|
||||
result <- WatchResponse{
|
||||
Created: true,
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
k := e.KV
|
||||
|
||||
event := &Event{}
|
||||
if k.Del == 0 {
|
||||
event.Type = mvccpb.PUT
|
||||
} else {
|
||||
event.Type = mvccpb.DELETE
|
||||
}
|
||||
|
||||
event.Kv = toKeyValue(k)
|
||||
if event.Kv.Version > 1 && k.OldRevision > 0 {
|
||||
oldKV := *event.Kv
|
||||
oldKV.ModRevision = k.OldRevision
|
||||
oldKV.Value = k.OldValue
|
||||
event.PrevKv = &oldKV
|
||||
}
|
||||
|
||||
wr := WatchResponse{
|
||||
Header: pb.ResponseHeader{
|
||||
Revision: event.Kv.ModRevision,
|
||||
},
|
||||
Events: []*Event{
|
||||
event,
|
||||
},
|
||||
}
|
||||
result <- wr
|
||||
}
|
||||
}()
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (k *kv) Close() error {
|
||||
return k.d.Close()
|
||||
}
|
||||
|
||||
type WatchResponse struct {
|
||||
Header pb.ResponseHeader
|
||||
Events []*Event
|
||||
|
||||
// CompactRevision is the minimum revision the watcher may receive.
|
||||
CompactRevision int64
|
||||
|
||||
// Canceled is used to indicate watch failure.
|
||||
// If the watch failed and the stream was about to close, before the channel is closed,
|
||||
// the channel sends a final response that has Canceled set to true with a non-nil Err().
|
||||
Canceled bool
|
||||
|
||||
// Created is used to indicate the creation of the watcher.
|
||||
Created bool
|
||||
|
||||
closeErr error
|
||||
}
|
||||
|
||||
func NewWatchResponseErr(err error) WatchResponse {
|
||||
return WatchResponse{
|
||||
Canceled: true,
|
||||
closeErr: err,
|
||||
}
|
||||
}
|
||||
|
||||
// IsCreate returns true if the event tells that the key is newly created.
|
||||
func (e *Event) IsCreate() bool {
|
||||
return e.Type == EventTypePut && e.Kv.CreateRevision == e.Kv.ModRevision
|
||||
}
|
||||
|
||||
// IsModify returns true if the event tells that a new value is put on existing key.
|
||||
func (e *Event) IsModify() bool {
|
||||
return e.Type == EventTypePut && e.Kv.CreateRevision != e.Kv.ModRevision
|
||||
}
|
||||
|
||||
// Err is the error value if this WatchResponse holds an error.
|
||||
func (wr *WatchResponse) Err() error {
|
||||
switch {
|
||||
case wr.closeErr != nil:
|
||||
return v3rpc.Error(wr.closeErr)
|
||||
case wr.CompactRevision != 0:
|
||||
return v3rpc.ErrCompacted
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// IsProgressNotify returns true if the WatchResponse is progress notification.
|
||||
func (wr *WatchResponse) IsProgressNotify() bool {
|
||||
return len(wr.Events) == 0 && !wr.Canceled && !wr.Created && wr.CompactRevision == 0 && wr.Header.Revision != 0
|
||||
}
|
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package factory
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/ibuildthecloud/kvsql/clientv3"
|
||||
etcd3 "github.com/ibuildthecloud/kvsql/storage"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
"k8s.io/apiserver/pkg/storage"
|
||||
"k8s.io/apiserver/pkg/storage/storagebackend"
|
||||
"k8s.io/apiserver/pkg/storage/value"
|
||||
)
|
||||
|
||||
func NewKVSQLHealthCheck(c storagebackend.Config) (func() error, error) {
|
||||
// constructing the etcd v3 client blocks and times out if etcd is not available.
|
||||
// retry in a loop in the background until we successfully create the client, storing the client or error encountered
|
||||
|
||||
clientValue := &atomic.Value{}
|
||||
|
||||
clientErrMsg := &atomic.Value{}
|
||||
clientErrMsg.Store("etcd client connection not yet established")
|
||||
|
||||
go wait.PollUntil(time.Second, func() (bool, error) {
|
||||
client, err := newETCD3Client(c)
|
||||
if err != nil {
|
||||
clientErrMsg.Store(err.Error())
|
||||
return false, nil
|
||||
}
|
||||
clientValue.Store(client)
|
||||
clientErrMsg.Store("")
|
||||
return true, nil
|
||||
}, wait.NeverStop)
|
||||
|
||||
return func() error {
|
||||
if errMsg := clientErrMsg.Load().(string); len(errMsg) > 0 {
|
||||
return fmt.Errorf(errMsg)
|
||||
}
|
||||
client := clientValue.Load().(*clientv3.Client)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
if _, err := client.Cluster.MemberList(ctx); err != nil {
|
||||
return fmt.Errorf("error listing etcd members: %v", err)
|
||||
}
|
||||
return nil
|
||||
}, nil
|
||||
}
|
||||
|
||||
func newETCD3Client(c storagebackend.Config) (*clientv3.Client, error) {
|
||||
cfg := clientv3.Config{
|
||||
Endpoints: c.Transport.ServerList,
|
||||
}
|
||||
|
||||
if len(cfg.Endpoints) == 0 {
|
||||
cfg.Endpoints = []string{"sqlite://"}
|
||||
}
|
||||
|
||||
client, err := clientv3.New(cfg)
|
||||
return client, err
|
||||
}
|
||||
|
||||
func NewKVSQLStorage(c storagebackend.Config) (storage.Interface, func(), error) {
|
||||
client, err := newETCD3Client(c)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
etcd3.StartCompactor(ctx, client, c.CompactionInterval)
|
||||
destroyFunc := func() {
|
||||
cancel()
|
||||
client.Close()
|
||||
}
|
||||
transformer := c.Transformer
|
||||
if transformer == nil {
|
||||
transformer = value.IdentityTransformer
|
||||
}
|
||||
|
||||
return etcd3.New(client, c.Codec, c.Prefix, transformer, c.Paging), destroyFunc, nil
|
||||
}
|
126
vendor/github.com/ibuildthecloud/kvsql/pkg/broadcast/broadcaster.go
generated
vendored
Normal file
126
vendor/github.com/ibuildthecloud/kvsql/pkg/broadcast/broadcaster.go
generated
vendored
Normal file
|
@ -0,0 +1,126 @@
|
|||
package broadcast
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type ConnectFunc func() (chan map[string]interface{}, error)
|
||||
|
||||
type Broadcaster struct {
|
||||
sync.Mutex
|
||||
running bool
|
||||
subs map[chan map[string]interface{}]struct{}
|
||||
}
|
||||
|
||||
func (b *Broadcaster) Subscribe(ctx context.Context, connect ConnectFunc) (chan map[string]interface{}, error) {
|
||||
b.Lock()
|
||||
defer b.Unlock()
|
||||
|
||||
if !b.running {
|
||||
if err := b.start(connect); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
sub := make(chan map[string]interface{}, 100)
|
||||
if b.subs == nil {
|
||||
b.subs = map[chan map[string]interface{}]struct{}{}
|
||||
}
|
||||
b.subs[sub] = struct{}{}
|
||||
go func() {
|
||||
<-ctx.Done()
|
||||
b.unsub(sub, true)
|
||||
}()
|
||||
|
||||
return sub, nil
|
||||
}
|
||||
|
||||
func (b *Broadcaster) unsub(sub chan map[string]interface{}, lock bool) {
|
||||
if lock {
|
||||
b.Lock()
|
||||
}
|
||||
if _, ok := b.subs[sub]; ok {
|
||||
close(sub)
|
||||
delete(b.subs, sub)
|
||||
}
|
||||
if lock {
|
||||
b.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Broadcaster) start(connect ConnectFunc) error {
|
||||
c, err := connect()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
go b.stream(c)
|
||||
b.running = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *Broadcaster) stream(input chan map[string]interface{}) {
|
||||
for item := range input {
|
||||
b.Lock()
|
||||
for sub := range b.subs {
|
||||
newItem := cloneMap(item)
|
||||
select {
|
||||
case sub <- newItem:
|
||||
default:
|
||||
// Slow consumer, drop
|
||||
go b.unsub(sub, true)
|
||||
}
|
||||
}
|
||||
b.Unlock()
|
||||
}
|
||||
|
||||
b.Lock()
|
||||
for sub := range b.subs {
|
||||
b.unsub(sub, false)
|
||||
}
|
||||
b.running = false
|
||||
b.Unlock()
|
||||
}
|
||||
|
||||
func cloneMap(data map[string]interface{}) map[string]interface{} {
|
||||
if data == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
result := map[string]interface{}{}
|
||||
for k, v := range data {
|
||||
result[k] = cloneValue(v)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func cloneValue(v interface{}) interface{} {
|
||||
switch t := v.(type) {
|
||||
case []interface{}:
|
||||
return cloneSlice(t)
|
||||
case []map[string]interface{}:
|
||||
return cloneMapSlice(t)
|
||||
case map[string]interface{}:
|
||||
return cloneMap(t)
|
||||
default:
|
||||
return v
|
||||
}
|
||||
}
|
||||
|
||||
func cloneMapSlice(data []map[string]interface{}) []interface{} {
|
||||
result := make([]interface{}, len(data))
|
||||
for i := range data {
|
||||
result[i] = cloneValue(data[i])
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func cloneSlice(data []interface{}) []interface{} {
|
||||
result := make([]interface{}, len(data))
|
||||
for i := range data {
|
||||
result[i] = cloneValue(data[i])
|
||||
}
|
||||
return result
|
||||
}
|
|
@ -0,0 +1,162 @@
|
|||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package etcd3
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"k8s.io/klog"
|
||||
"github.com/ibuildthecloud/kvsql/clientv3"
|
||||
)
|
||||
|
||||
const (
|
||||
compactRevKey = "compact_rev_key"
|
||||
)
|
||||
|
||||
var (
|
||||
endpointsMapMu sync.Mutex
|
||||
endpointsMap map[string]struct{}
|
||||
)
|
||||
|
||||
func init() {
|
||||
endpointsMap = make(map[string]struct{})
|
||||
}
|
||||
|
||||
// StartCompactor starts a compactor in the background to compact old version of keys that's not needed.
|
||||
// By default, we save the most recent 10 minutes data and compact versions > 10minutes ago.
|
||||
// It should be enough for slow watchers and to tolerate burst.
|
||||
// TODO: We might keep a longer history (12h) in the future once storage API can take advantage of past version of keys.
|
||||
func StartCompactor(ctx context.Context, client *clientv3.Client, compactInterval time.Duration) {
|
||||
endpointsMapMu.Lock()
|
||||
defer endpointsMapMu.Unlock()
|
||||
|
||||
// In one process, we can have only one compactor for one cluster.
|
||||
// Currently we rely on endpoints to differentiate clusters.
|
||||
for _, ep := range client.Endpoints() {
|
||||
if _, ok := endpointsMap[ep]; ok {
|
||||
klog.V(4).Infof("compactor already exists for endpoints %v", client.Endpoints())
|
||||
return
|
||||
}
|
||||
}
|
||||
for _, ep := range client.Endpoints() {
|
||||
endpointsMap[ep] = struct{}{}
|
||||
}
|
||||
|
||||
if compactInterval != 0 {
|
||||
go compactor(ctx, client, compactInterval)
|
||||
}
|
||||
}
|
||||
|
||||
// compactor periodically compacts historical versions of keys in etcd.
|
||||
// It will compact keys with versions older than given interval.
|
||||
// In other words, after compaction, it will only contain keys set during last interval.
|
||||
// Any API call for the older versions of keys will return error.
|
||||
// Interval is the time interval between each compaction. The first compaction happens after "interval".
|
||||
func compactor(ctx context.Context, client *clientv3.Client, interval time.Duration) {
|
||||
// Technical definitions:
|
||||
// We have a special key in etcd defined as *compactRevKey*.
|
||||
// compactRevKey's value will be set to the string of last compacted revision.
|
||||
// compactRevKey's version will be used as logical time for comparison. THe version is referred as compact time.
|
||||
// Initially, because the key doesn't exist, the compact time (version) is 0.
|
||||
//
|
||||
// Algorithm:
|
||||
// - Compare to see if (local compact_time) = (remote compact_time).
|
||||
// - If yes, increment both local and remote compact_time, and do a compaction.
|
||||
// - If not, set local to remote compact_time.
|
||||
//
|
||||
// Technical details/insights:
|
||||
//
|
||||
// The protocol here is lease based. If one compactor CAS successfully, the others would know it when they fail in
|
||||
// CAS later and would try again in 10 minutes. If an APIServer crashed, another one would "take over" the lease.
|
||||
//
|
||||
// For example, in the following diagram, we have a compactor C1 doing compaction in t1, t2. Another compactor C2
|
||||
// at t1' (t1 < t1' < t2) would CAS fail, set its known oldRev to rev at t1', and try again in t2' (t2' > t2).
|
||||
// If C1 crashed and wouldn't compact at t2, C2 would CAS successfully at t2'.
|
||||
//
|
||||
// oldRev(t2) curRev(t2)
|
||||
// +
|
||||
// oldRev curRev |
|
||||
// + + |
|
||||
// | | |
|
||||
// | | t1' | t2'
|
||||
// +---v-------------v----^---------v------^---->
|
||||
// t0 t1 t2
|
||||
//
|
||||
// We have the guarantees:
|
||||
// - in normal cases, the interval is 10 minutes.
|
||||
// - in failover, the interval is >10m and <20m
|
||||
//
|
||||
// FAQ:
|
||||
// - What if time is not accurate? We don't care as long as someone did the compaction. Atomicity is ensured using
|
||||
// etcd API.
|
||||
// - What happened under heavy load scenarios? Initially, each apiserver will do only one compaction
|
||||
// every 10 minutes. This is very unlikely affecting or affected w.r.t. server load.
|
||||
|
||||
var compactTime int64
|
||||
var rev int64
|
||||
var err error
|
||||
for {
|
||||
select {
|
||||
case <-time.After(interval):
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
|
||||
compactTime, rev, err = compact(ctx, client, compactTime, rev)
|
||||
if err != nil {
|
||||
klog.Errorf("etcd: endpoint (%v) compact failed: %v", client.Endpoints(), err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// compact compacts etcd store and returns current rev.
|
||||
// It will return the current compact time and global revision if no error occurred.
|
||||
// Note that CAS fail will not incur any error.
|
||||
func compact(ctx context.Context, client *clientv3.Client, t, rev int64) (int64, int64, error) {
|
||||
resp, err := client.KV.Txn(ctx).If(
|
||||
clientv3.Compare(clientv3.Version(compactRevKey), "=", t),
|
||||
).Then(
|
||||
clientv3.OpPut(compactRevKey, strconv.FormatInt(rev, 10)), // Expect side effect: increment Version
|
||||
).Else(
|
||||
clientv3.OpGet(compactRevKey),
|
||||
).Commit()
|
||||
if err != nil {
|
||||
return t, rev, err
|
||||
}
|
||||
|
||||
curRev := resp.Header.Revision
|
||||
|
||||
if !resp.Succeeded {
|
||||
curTime := resp.Responses[0].GetResponseRange().Kvs[0].Version
|
||||
return curTime, curRev, nil
|
||||
}
|
||||
curTime := t + 1
|
||||
|
||||
if rev == 0 {
|
||||
// We don't compact on bootstrap.
|
||||
return curTime, curRev, nil
|
||||
}
|
||||
if _, err = client.Compact(ctx, rev); err != nil {
|
||||
return curTime, curRev, err
|
||||
}
|
||||
klog.V(4).Infof("etcd: compacted rev (%d), endpoints (%v)", rev, client.Endpoints())
|
||||
return curTime, curRev, nil
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package etcd3
|
||||
|
||||
import (
|
||||
"k8s.io/apimachinery/pkg/api/errors"
|
||||
|
||||
etcdrpc "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
)
|
||||
|
||||
func interpretWatchError(err error) error {
|
||||
switch {
|
||||
case err == etcdrpc.ErrCompacted:
|
||||
return errors.NewResourceExpired("The resourceVersion for the provided watch is too old.")
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
const (
|
||||
expired string = "The resourceVersion for the provided list is too old."
|
||||
continueExpired string = "The provided continue parameter is too old " +
|
||||
"to display a consistent list result. You can start a new list without " +
|
||||
"the continue parameter."
|
||||
inconsistentContinue string = "The provided continue parameter is too old " +
|
||||
"to display a consistent list result. You can start a new list without " +
|
||||
"the continue parameter, or use the continue token in this response to " +
|
||||
"retrieve the remainder of the results. Continuing with the provided " +
|
||||
"token results in an inconsistent list - objects that were created, " +
|
||||
"modified, or deleted between the time the first chunk was returned " +
|
||||
"and now may show up in the list."
|
||||
)
|
||||
|
||||
func interpretListError(err error, paging bool, continueKey, keyPrefix string) error {
|
||||
switch {
|
||||
case err == etcdrpc.ErrCompacted:
|
||||
if paging {
|
||||
return handleCompactedErrorForPaging(continueKey, keyPrefix)
|
||||
}
|
||||
return errors.NewResourceExpired(expired)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func handleCompactedErrorForPaging(continueKey, keyPrefix string) error {
|
||||
// continueToken.ResoureVersion=-1 means that the apiserver can
|
||||
// continue the list at the latest resource version. We don't use rv=0
|
||||
// for this purpose to distinguish from a bad token that has empty rv.
|
||||
newToken, err := encodeContinue(continueKey, keyPrefix, -1)
|
||||
if err != nil {
|
||||
utilruntime.HandleError(err)
|
||||
return errors.NewResourceExpired(continueExpired)
|
||||
}
|
||||
statusError := errors.NewResourceExpired(inconsistentContinue)
|
||||
statusError.ErrStatus.ListMeta.Continue = newToken
|
||||
return statusError
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package etcd3
|
||||
|
||||
import (
|
||||
"github.com/ibuildthecloud/kvsql/clientv3"
|
||||
"github.com/coreos/etcd/mvcc/mvccpb"
|
||||
)
|
||||
|
||||
type event struct {
|
||||
key string
|
||||
value []byte
|
||||
prevValue []byte
|
||||
rev int64
|
||||
isDeleted bool
|
||||
isCreated bool
|
||||
}
|
||||
|
||||
// parseKV converts a KeyValue retrieved from an initial sync() listing to a synthetic isCreated event.
|
||||
func parseKV(kv *mvccpb.KeyValue) *event {
|
||||
return &event{
|
||||
key: string(kv.Key),
|
||||
value: kv.Value,
|
||||
prevValue: nil,
|
||||
rev: kv.ModRevision,
|
||||
isDeleted: false,
|
||||
isCreated: true,
|
||||
}
|
||||
}
|
||||
|
||||
func parseEvent(e *clientv3.Event) *event {
|
||||
ret := &event{
|
||||
key: string(e.Kv.Key),
|
||||
value: e.Kv.Value,
|
||||
rev: e.Kv.ModRevision,
|
||||
isDeleted: e.Type == clientv3.EventTypeDelete,
|
||||
isCreated: e.IsCreate(),
|
||||
}
|
||||
if e.PrevKv != nil {
|
||||
ret.prevValue = e.PrevKv.Value
|
||||
}
|
||||
return ret
|
||||
}
|
|
@ -0,0 +1,102 @@
|
|||
/*
|
||||
Copyright 2018 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package etcd3
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/ibuildthecloud/kvsql/clientv3"
|
||||
)
|
||||
|
||||
// leaseManager is used to manage leases requested from etcd. If a new write
|
||||
// needs a lease that has similar expiration time to the previous one, the old
|
||||
// lease will be reused to reduce the overhead of etcd, since lease operations
|
||||
// are expensive. In the implementation, we only store one previous lease,
|
||||
// since all the events have the same ttl.
|
||||
type leaseManager struct {
|
||||
client *clientv3.Client // etcd client used to grant leases
|
||||
leaseMu sync.Mutex
|
||||
prevLeaseID clientv3.LeaseID
|
||||
prevLeaseExpirationTime time.Time
|
||||
// The period of time in seconds and percent of TTL that each lease is
|
||||
// reused. The minimum of them is used to avoid unreasonably large
|
||||
// numbers. We use var instead of const for testing purposes.
|
||||
leaseReuseDurationSeconds int64
|
||||
leaseReuseDurationPercent float64
|
||||
}
|
||||
|
||||
// newDefaultLeaseManager creates a new lease manager using default setting.
|
||||
func newDefaultLeaseManager(client *clientv3.Client) *leaseManager {
|
||||
return newLeaseManager(client, 60, 0.05)
|
||||
}
|
||||
|
||||
// newLeaseManager creates a new lease manager with the number of buffered
|
||||
// leases, lease reuse duration in seconds and percentage. The percentage
|
||||
// value x means x*100%.
|
||||
func newLeaseManager(client *clientv3.Client, leaseReuseDurationSeconds int64, leaseReuseDurationPercent float64) *leaseManager {
|
||||
return &leaseManager{
|
||||
client: client,
|
||||
leaseReuseDurationSeconds: leaseReuseDurationSeconds,
|
||||
leaseReuseDurationPercent: leaseReuseDurationPercent,
|
||||
}
|
||||
}
|
||||
|
||||
// setLeaseReuseDurationSeconds is used for testing purpose. It is used to
|
||||
// reduce the extra lease duration to avoid unnecessary timeout in testing.
|
||||
func (l *leaseManager) setLeaseReuseDurationSeconds(duration int64) {
|
||||
l.leaseMu.Lock()
|
||||
defer l.leaseMu.Unlock()
|
||||
l.leaseReuseDurationSeconds = duration
|
||||
}
|
||||
|
||||
// GetLease returns a lease based on requested ttl: if the cached previous
|
||||
// lease can be reused, reuse it; otherwise request a new one from etcd.
|
||||
func (l *leaseManager) GetLease(ctx context.Context, ttl int64) (clientv3.LeaseID, error) {
|
||||
now := time.Now()
|
||||
l.leaseMu.Lock()
|
||||
defer l.leaseMu.Unlock()
|
||||
// check if previous lease can be reused
|
||||
reuseDurationSeconds := l.getReuseDurationSecondsLocked(ttl)
|
||||
valid := now.Add(time.Duration(ttl) * time.Second).Before(l.prevLeaseExpirationTime)
|
||||
sufficient := now.Add(time.Duration(ttl+reuseDurationSeconds) * time.Second).After(l.prevLeaseExpirationTime)
|
||||
if valid && sufficient {
|
||||
return l.prevLeaseID, nil
|
||||
}
|
||||
// request a lease with a little extra ttl from etcd
|
||||
ttl += reuseDurationSeconds
|
||||
lcr, err := l.client.Lease.Grant(ctx, ttl)
|
||||
if err != nil {
|
||||
return clientv3.LeaseID(0), err
|
||||
}
|
||||
// cache the new lease id
|
||||
l.prevLeaseID = lcr.ID
|
||||
l.prevLeaseExpirationTime = now.Add(time.Duration(ttl) * time.Second)
|
||||
return lcr.ID, nil
|
||||
}
|
||||
|
||||
// getReuseDurationSecondsLocked returns the reusable duration in seconds
|
||||
// based on the configuration. Lock has to be acquired before calling this
|
||||
// function.
|
||||
func (l *leaseManager) getReuseDurationSecondsLocked(ttl int64) int64 {
|
||||
reuseDurationSeconds := int64(l.leaseReuseDurationPercent * float64(ttl))
|
||||
if reuseDurationSeconds > l.leaseReuseDurationSeconds {
|
||||
reuseDurationSeconds = l.leaseReuseDurationSeconds
|
||||
}
|
||||
return reuseDurationSeconds
|
||||
}
|
|
@ -0,0 +1,795 @@
|
|||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package etcd3
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"path"
|
||||
"reflect"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"k8s.io/klog"
|
||||
|
||||
"github.com/ibuildthecloud/kvsql/clientv3"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/apimachinery/pkg/api/meta"
|
||||
"k8s.io/apimachinery/pkg/conversion"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
"k8s.io/apimachinery/pkg/watch"
|
||||
"k8s.io/apiserver/pkg/storage"
|
||||
"k8s.io/apiserver/pkg/storage/etcd"
|
||||
"k8s.io/apiserver/pkg/storage/value"
|
||||
utiltrace "k8s.io/utils/trace"
|
||||
)
|
||||
|
||||
// authenticatedDataString satisfies the value.Context interface. It uses the key to
|
||||
// authenticate the stored data. This does not defend against reuse of previously
|
||||
// encrypted values under the same key, but will prevent an attacker from using an
|
||||
// encrypted value from a different key. A stronger authenticated data segment would
|
||||
// include the etcd3 Version field (which is incremented on each write to a key and
|
||||
// reset when the key is deleted), but an attacker with write access to etcd can
|
||||
// force deletion and recreation of keys to weaken that angle.
|
||||
type authenticatedDataString string
|
||||
|
||||
// AuthenticatedData implements the value.Context interface.
|
||||
func (d authenticatedDataString) AuthenticatedData() []byte {
|
||||
return []byte(string(d))
|
||||
}
|
||||
|
||||
var _ value.Context = authenticatedDataString("")
|
||||
|
||||
type store struct {
|
||||
client *clientv3.Client
|
||||
// getOpts contains additional options that should be passed
|
||||
// to all Get() calls.
|
||||
getOps []clientv3.OpOption
|
||||
codec runtime.Codec
|
||||
versioner storage.Versioner
|
||||
transformer value.Transformer
|
||||
pathPrefix string
|
||||
watcher *watcher
|
||||
pagingEnabled bool
|
||||
leaseManager *leaseManager
|
||||
}
|
||||
|
||||
type objState struct {
|
||||
obj runtime.Object
|
||||
meta *storage.ResponseMeta
|
||||
rev int64
|
||||
data []byte
|
||||
stale bool
|
||||
}
|
||||
|
||||
// New returns an etcd3 implementation of storage.Interface.
|
||||
func New(c *clientv3.Client, codec runtime.Codec, prefix string, transformer value.Transformer, pagingEnabled bool) storage.Interface {
|
||||
return newStore(c, true, pagingEnabled, codec, prefix, transformer)
|
||||
}
|
||||
|
||||
func newStore(c *clientv3.Client, quorumRead, pagingEnabled bool, codec runtime.Codec, prefix string, transformer value.Transformer) *store {
|
||||
versioner := etcd.APIObjectVersioner{}
|
||||
result := &store{
|
||||
client: c,
|
||||
codec: codec,
|
||||
versioner: versioner,
|
||||
transformer: transformer,
|
||||
pagingEnabled: pagingEnabled,
|
||||
// for compatibility with etcd2 impl.
|
||||
// no-op for default prefix of '/registry'.
|
||||
// keeps compatibility with etcd2 impl for custom prefixes that don't start with '/'
|
||||
pathPrefix: path.Join("/", prefix),
|
||||
watcher: newWatcher(c, codec, versioner, transformer),
|
||||
leaseManager: newDefaultLeaseManager(c),
|
||||
}
|
||||
if !quorumRead {
|
||||
// In case of non-quorum reads, we can set WithSerializable()
|
||||
// options for all Get operations.
|
||||
result.getOps = append(result.getOps, clientv3.WithSerializable())
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// Versioner implements storage.Interface.Versioner.
|
||||
func (s *store) Versioner() storage.Versioner {
|
||||
return s.versioner
|
||||
}
|
||||
|
||||
// Get implements storage.Interface.Get.
|
||||
func (s *store) Get(ctx context.Context, key string, resourceVersion string, out runtime.Object, ignoreNotFound bool) error {
|
||||
key = path.Join(s.pathPrefix, key)
|
||||
getResp, err := s.client.KV.Get(ctx, key, s.getOps...)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(getResp.Kvs) == 0 {
|
||||
if ignoreNotFound {
|
||||
return runtime.SetZeroValue(out)
|
||||
}
|
||||
return storage.NewKeyNotFoundError(key, 0)
|
||||
}
|
||||
kv := getResp.Kvs[0]
|
||||
|
||||
data, _, err := s.transformer.TransformFromStorage(kv.Value, authenticatedDataString(key))
|
||||
if err != nil {
|
||||
return storage.NewInternalError(err.Error())
|
||||
}
|
||||
|
||||
return decode(s.codec, s.versioner, data, out, kv.ModRevision)
|
||||
}
|
||||
|
||||
// Create implements storage.Interface.Create.
|
||||
func (s *store) Create(ctx context.Context, key string, obj, out runtime.Object, ttl uint64) error {
|
||||
if version, err := s.versioner.ObjectResourceVersion(obj); err == nil && version != 0 {
|
||||
return errors.New("resourceVersion should not be set on objects to be created")
|
||||
}
|
||||
if err := s.versioner.PrepareObjectForStorage(obj); err != nil {
|
||||
return fmt.Errorf("PrepareObjectForStorage failed: %v", err)
|
||||
}
|
||||
data, err := runtime.Encode(s.codec, obj)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
key = path.Join(s.pathPrefix, key)
|
||||
|
||||
opts, err := s.ttlOpts(ctx, int64(ttl))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
newData, err := s.transformer.TransformToStorage(data, authenticatedDataString(key))
|
||||
if err != nil {
|
||||
return storage.NewInternalError(err.Error())
|
||||
}
|
||||
|
||||
txnResp, err := s.client.KV.Txn(ctx).If(
|
||||
notFound(key),
|
||||
).Then(
|
||||
clientv3.OpPut(key, string(newData), opts...),
|
||||
).Commit()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !txnResp.Succeeded {
|
||||
return storage.NewKeyExistsError(key, 0)
|
||||
}
|
||||
|
||||
if out != nil {
|
||||
putResp := txnResp.Responses[0].GetResponsePut()
|
||||
return decode(s.codec, s.versioner, data, out, putResp.Header.Revision)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Delete implements storage.Interface.Delete.
|
||||
func (s *store) Delete(ctx context.Context, key string, out runtime.Object, preconditions *storage.Preconditions) error {
|
||||
v, err := conversion.EnforcePtr(out)
|
||||
if err != nil {
|
||||
panic("unable to convert output object to pointer")
|
||||
}
|
||||
key = path.Join(s.pathPrefix, key)
|
||||
if preconditions == nil {
|
||||
return s.unconditionalDelete(ctx, key, out)
|
||||
}
|
||||
return s.conditionalDelete(ctx, key, out, v, preconditions)
|
||||
}
|
||||
|
||||
func (s *store) unconditionalDelete(ctx context.Context, key string, out runtime.Object) error {
|
||||
// We need to do get and delete in single transaction in order to
|
||||
// know the value and revision before deleting it.
|
||||
txnResp, err := s.client.KV.Txn(ctx).If().Then(
|
||||
clientv3.OpGet(key),
|
||||
clientv3.OpDelete(key),
|
||||
).Commit()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
getResp := txnResp.Responses[0].GetResponseRange()
|
||||
if len(getResp.Kvs) == 0 {
|
||||
return storage.NewKeyNotFoundError(key, 0)
|
||||
}
|
||||
|
||||
kv := getResp.Kvs[0]
|
||||
data, _, err := s.transformer.TransformFromStorage(kv.Value, authenticatedDataString(key))
|
||||
if err != nil {
|
||||
return storage.NewInternalError(err.Error())
|
||||
}
|
||||
return decode(s.codec, s.versioner, data, out, kv.ModRevision)
|
||||
}
|
||||
|
||||
func (s *store) conditionalDelete(ctx context.Context, key string, out runtime.Object, v reflect.Value, preconditions *storage.Preconditions) error {
|
||||
getResp, err := s.client.KV.Get(ctx, key)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for {
|
||||
origState, err := s.getState(getResp, key, v, false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := preconditions.Check(key, origState.obj); err != nil {
|
||||
return err
|
||||
}
|
||||
txnResp, err := s.client.KV.Txn(ctx).If(
|
||||
clientv3.Compare(clientv3.ModRevision(key), "=", origState.rev),
|
||||
).Then(
|
||||
clientv3.OpDelete(key),
|
||||
).Else(
|
||||
clientv3.OpGet(key),
|
||||
).Commit()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !txnResp.Succeeded {
|
||||
getResp = (*clientv3.GetResponse)(txnResp.Responses[0].GetResponseRange())
|
||||
klog.V(4).Infof("deletion of %s failed because of a conflict, going to retry", key)
|
||||
continue
|
||||
}
|
||||
return decode(s.codec, s.versioner, origState.data, out, origState.rev)
|
||||
}
|
||||
}
|
||||
|
||||
// GuaranteedUpdate implements storage.Interface.GuaranteedUpdate.
|
||||
func (s *store) GuaranteedUpdate(
|
||||
ctx context.Context, key string, out runtime.Object, ignoreNotFound bool,
|
||||
preconditions *storage.Preconditions, tryUpdate storage.UpdateFunc, suggestion ...runtime.Object) error {
|
||||
trace := utiltrace.New(fmt.Sprintf("GuaranteedUpdate etcd3: %s", reflect.TypeOf(out).String()))
|
||||
defer trace.LogIfLong(500 * time.Millisecond)
|
||||
|
||||
v, err := conversion.EnforcePtr(out)
|
||||
if err != nil {
|
||||
panic("unable to convert output object to pointer")
|
||||
}
|
||||
key = path.Join(s.pathPrefix, key)
|
||||
|
||||
getCurrentState := func() (*objState, error) {
|
||||
getResp, err := s.client.KV.Get(ctx, key, s.getOps...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return s.getState(getResp, key, v, ignoreNotFound)
|
||||
}
|
||||
|
||||
var origState *objState
|
||||
var mustCheckData bool
|
||||
if len(suggestion) == 1 && suggestion[0] != nil {
|
||||
origState, err = s.getStateFromObject(suggestion[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
mustCheckData = true
|
||||
} else {
|
||||
origState, err = getCurrentState()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
trace.Step("initial value restored")
|
||||
|
||||
transformContext := authenticatedDataString(key)
|
||||
for {
|
||||
if err := preconditions.Check(key, origState.obj); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ret, ttl, err := s.updateState(origState, tryUpdate)
|
||||
if err != nil {
|
||||
// It's possible we were working with stale data
|
||||
if mustCheckData && apierrors.IsConflict(err) {
|
||||
// Actually fetch
|
||||
origState, err = getCurrentState()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
mustCheckData = false
|
||||
// Retry
|
||||
continue
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
data, err := runtime.Encode(s.codec, ret)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !origState.stale && bytes.Equal(data, origState.data) {
|
||||
// if we skipped the original Get in this loop, we must refresh from
|
||||
// etcd in order to be sure the data in the store is equivalent to
|
||||
// our desired serialization
|
||||
if mustCheckData {
|
||||
origState, err = getCurrentState()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
mustCheckData = false
|
||||
if !bytes.Equal(data, origState.data) {
|
||||
// original data changed, restart loop
|
||||
continue
|
||||
}
|
||||
}
|
||||
// recheck that the data from etcd is not stale before short-circuiting a write
|
||||
if !origState.stale {
|
||||
return decode(s.codec, s.versioner, origState.data, out, origState.rev)
|
||||
}
|
||||
}
|
||||
|
||||
newData, err := s.transformer.TransformToStorage(data, transformContext)
|
||||
if err != nil {
|
||||
return storage.NewInternalError(err.Error())
|
||||
}
|
||||
|
||||
opts, err := s.ttlOpts(ctx, int64(ttl))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
trace.Step("Transaction prepared")
|
||||
|
||||
txnResp, err := s.client.KV.Txn(ctx).If(
|
||||
clientv3.Compare(clientv3.ModRevision(key), "=", origState.rev),
|
||||
).Then(
|
||||
clientv3.OpPut(key, string(newData), opts...),
|
||||
).Else(
|
||||
clientv3.OpGet(key),
|
||||
).Commit()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
trace.Step("Transaction committed")
|
||||
if !txnResp.Succeeded {
|
||||
getResp := (*clientv3.GetResponse)(txnResp.Responses[0].GetResponseRange())
|
||||
klog.V(4).Infof("GuaranteedUpdate of %s failed because of a conflict, going to retry", key)
|
||||
origState, err = s.getState(getResp, key, v, ignoreNotFound)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
trace.Step("Retry value restored")
|
||||
mustCheckData = false
|
||||
continue
|
||||
}
|
||||
putResp := txnResp.Responses[0].GetResponsePut()
|
||||
|
||||
return decode(s.codec, s.versioner, data, out, putResp.Header.Revision)
|
||||
}
|
||||
}
|
||||
|
||||
// GetToList implements storage.Interface.GetToList.
|
||||
func (s *store) GetToList(ctx context.Context, key string, resourceVersion string, pred storage.SelectionPredicate, listObj runtime.Object) error {
|
||||
listPtr, err := meta.GetItemsPtr(listObj)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
v, err := conversion.EnforcePtr(listPtr)
|
||||
if err != nil || v.Kind() != reflect.Slice {
|
||||
panic("need ptr to slice")
|
||||
}
|
||||
|
||||
key = path.Join(s.pathPrefix, key)
|
||||
getResp, err := s.client.KV.Get(ctx, key, s.getOps...)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(getResp.Kvs) > 0 {
|
||||
data, _, err := s.transformer.TransformFromStorage(getResp.Kvs[0].Value, authenticatedDataString(key))
|
||||
if err != nil {
|
||||
return storage.NewInternalError(err.Error())
|
||||
}
|
||||
if err := appendListItem(v, data, uint64(getResp.Kvs[0].ModRevision), pred, s.codec, s.versioner); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// update version with cluster level revision
|
||||
return s.versioner.UpdateList(listObj, uint64(getResp.Header.Revision), "")
|
||||
}
|
||||
|
||||
func (s *store) Count(key string) (int64, error) {
|
||||
key = path.Join(s.pathPrefix, key)
|
||||
getResp, err := s.client.KV.Get(context.Background(), key, clientv3.WithRange(clientv3.GetPrefixRangeEnd(key)), clientv3.WithCountOnly())
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return getResp.Count, nil
|
||||
}
|
||||
|
||||
// continueToken is a simple structured object for encoding the state of a continue token.
|
||||
// TODO: if we change the version of the encoded from, we can't start encoding the new version
|
||||
// until all other servers are upgraded (i.e. we need to support rolling schema)
|
||||
// This is a public API struct and cannot change.
|
||||
type continueToken struct {
|
||||
APIVersion string `json:"v"`
|
||||
ResourceVersion int64 `json:"rv"`
|
||||
StartKey string `json:"start"`
|
||||
}
|
||||
|
||||
// parseFrom transforms an encoded predicate from into a versioned struct.
|
||||
// TODO: return a typed error that instructs clients that they must relist
|
||||
func decodeContinue(continueValue, keyPrefix string) (fromKey string, rv int64, err error) {
|
||||
data, err := base64.RawURLEncoding.DecodeString(continueValue)
|
||||
if err != nil {
|
||||
return "", 0, fmt.Errorf("continue key is not valid: %v", err)
|
||||
}
|
||||
var c continueToken
|
||||
if err := json.Unmarshal(data, &c); err != nil {
|
||||
return "", 0, fmt.Errorf("continue key is not valid: %v", err)
|
||||
}
|
||||
switch c.APIVersion {
|
||||
case "meta.k8s.io/v1":
|
||||
if c.ResourceVersion == 0 {
|
||||
return "", 0, fmt.Errorf("continue key is not valid: incorrect encoded start resourceVersion (version meta.k8s.io/v1)")
|
||||
}
|
||||
if len(c.StartKey) == 0 {
|
||||
return "", 0, fmt.Errorf("continue key is not valid: encoded start key empty (version meta.k8s.io/v1)")
|
||||
}
|
||||
// defend against path traversal attacks by clients - path.Clean will ensure that startKey cannot
|
||||
// be at a higher level of the hierarchy, and so when we append the key prefix we will end up with
|
||||
// continue start key that is fully qualified and cannot range over anything less specific than
|
||||
// keyPrefix.
|
||||
key := c.StartKey
|
||||
if !strings.HasPrefix(key, "/") {
|
||||
key = "/" + key
|
||||
}
|
||||
cleaned := path.Clean(key)
|
||||
if cleaned != key {
|
||||
return "", 0, fmt.Errorf("continue key is not valid: %s", c.StartKey)
|
||||
}
|
||||
return keyPrefix + cleaned[1:], c.ResourceVersion, nil
|
||||
default:
|
||||
return "", 0, fmt.Errorf("continue key is not valid: server does not recognize this encoded version %q", c.APIVersion)
|
||||
}
|
||||
}
|
||||
|
||||
// encodeContinue returns a string representing the encoded continuation of the current query.
|
||||
func encodeContinue(key, keyPrefix string, resourceVersion int64) (string, error) {
|
||||
nextKey := strings.TrimPrefix(key, keyPrefix)
|
||||
if nextKey == key {
|
||||
return "", fmt.Errorf("unable to encode next field: the key and key prefix do not match")
|
||||
}
|
||||
out, err := json.Marshal(&continueToken{APIVersion: "meta.k8s.io/v1", ResourceVersion: resourceVersion, StartKey: nextKey})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return base64.RawURLEncoding.EncodeToString(out), nil
|
||||
}
|
||||
|
||||
// List implements storage.Interface.List.
|
||||
func (s *store) List(ctx context.Context, key, resourceVersion string, pred storage.SelectionPredicate, listObj runtime.Object) error {
|
||||
listPtr, err := meta.GetItemsPtr(listObj)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
v, err := conversion.EnforcePtr(listPtr)
|
||||
if err != nil || v.Kind() != reflect.Slice {
|
||||
panic("need ptr to slice")
|
||||
}
|
||||
|
||||
if s.pathPrefix != "" {
|
||||
key = path.Join(s.pathPrefix, key)
|
||||
}
|
||||
// We need to make sure the key ended with "/" so that we only get children "directories".
|
||||
// e.g. if we have key "/a", "/a/b", "/ab", getting keys with prefix "/a" will return all three,
|
||||
// while with prefix "/a/" will return only "/a/b" which is the correct answer.
|
||||
if !strings.HasSuffix(key, "/") {
|
||||
key += "/"
|
||||
}
|
||||
keyPrefix := key
|
||||
|
||||
// set the appropriate clientv3 options to filter the returned data set
|
||||
var paging bool
|
||||
options := make([]clientv3.OpOption, 0, 4)
|
||||
if s.pagingEnabled && pred.Limit > 0 {
|
||||
paging = true
|
||||
options = append(options, clientv3.WithLimit(pred.Limit))
|
||||
}
|
||||
|
||||
var returnedRV, continueRV int64
|
||||
var continueKey string
|
||||
switch {
|
||||
case s.pagingEnabled && len(pred.Continue) > 0:
|
||||
continueKey, continueRV, err = decodeContinue(pred.Continue, keyPrefix)
|
||||
if err != nil {
|
||||
return apierrors.NewBadRequest(fmt.Sprintf("invalid continue token: %v", err))
|
||||
}
|
||||
|
||||
if len(resourceVersion) > 0 && resourceVersion != "0" {
|
||||
return apierrors.NewBadRequest("specifying resource version is not allowed when using continue")
|
||||
}
|
||||
|
||||
rangeEnd := clientv3.GetPrefixRangeEnd(keyPrefix)
|
||||
options = append(options, clientv3.WithRange(rangeEnd))
|
||||
key = continueKey
|
||||
|
||||
// If continueRV > 0, the LIST request needs a specific resource version.
|
||||
// continueRV==0 is invalid.
|
||||
// If continueRV < 0, the request is for the latest resource version.
|
||||
if continueRV > 0 {
|
||||
options = append(options, clientv3.WithRev(continueRV))
|
||||
returnedRV = continueRV
|
||||
}
|
||||
case s.pagingEnabled && pred.Limit > 0:
|
||||
if len(resourceVersion) > 0 {
|
||||
fromRV, err := s.versioner.ParseResourceVersion(resourceVersion)
|
||||
if err != nil {
|
||||
return apierrors.NewBadRequest(fmt.Sprintf("invalid resource version: %v", err))
|
||||
}
|
||||
if fromRV > 0 {
|
||||
options = append(options, clientv3.WithRev(int64(fromRV)))
|
||||
}
|
||||
returnedRV = int64(fromRV)
|
||||
}
|
||||
|
||||
rangeEnd := clientv3.GetPrefixRangeEnd(keyPrefix)
|
||||
options = append(options, clientv3.WithRange(rangeEnd))
|
||||
|
||||
default:
|
||||
if len(resourceVersion) > 0 {
|
||||
fromRV, err := s.versioner.ParseResourceVersion(resourceVersion)
|
||||
if err != nil {
|
||||
return apierrors.NewBadRequest(fmt.Sprintf("invalid resource version: %v", err))
|
||||
}
|
||||
if fromRV > 0 {
|
||||
options = append(options, clientv3.WithRev(int64(fromRV)))
|
||||
}
|
||||
returnedRV = int64(fromRV)
|
||||
}
|
||||
|
||||
options = append(options, clientv3.WithPrefix())
|
||||
}
|
||||
|
||||
// loop until we have filled the requested limit from etcd or there are no more results
|
||||
var lastKey []byte
|
||||
var hasMore bool
|
||||
for {
|
||||
getResp, err := s.client.KV.Get(ctx, key, options...)
|
||||
if err != nil {
|
||||
return interpretListError(err, len(pred.Continue) > 0, continueKey, keyPrefix)
|
||||
}
|
||||
hasMore = getResp.More
|
||||
|
||||
if len(getResp.Kvs) == 0 && getResp.More {
|
||||
return fmt.Errorf("no results were found, but etcd indicated there were more values remaining")
|
||||
}
|
||||
|
||||
// avoid small allocations for the result slice, since this can be called in many
|
||||
// different contexts and we don't know how significantly the result will be filtered
|
||||
if pred.Empty() {
|
||||
growSlice(v, len(getResp.Kvs))
|
||||
} else {
|
||||
growSlice(v, 2048, len(getResp.Kvs))
|
||||
}
|
||||
|
||||
// take items from the response until the bucket is full, filtering as we go
|
||||
for _, kv := range getResp.Kvs {
|
||||
if paging && int64(v.Len()) >= pred.Limit {
|
||||
hasMore = true
|
||||
break
|
||||
}
|
||||
lastKey = kv.Key
|
||||
|
||||
data, _, err := s.transformer.TransformFromStorage(kv.Value, authenticatedDataString(kv.Key))
|
||||
if err != nil {
|
||||
utilruntime.HandleError(fmt.Errorf("unable to transform key %q: %v", kv.Key, err))
|
||||
continue
|
||||
}
|
||||
|
||||
if err := appendListItem(v, data, uint64(kv.ModRevision), pred, s.codec, s.versioner); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// indicate to the client which resource version was returned
|
||||
if returnedRV == 0 {
|
||||
returnedRV = getResp.Header.Revision
|
||||
}
|
||||
|
||||
// no more results remain or we didn't request paging
|
||||
if !hasMore || !paging {
|
||||
break
|
||||
}
|
||||
// we're paging but we have filled our bucket
|
||||
if int64(v.Len()) >= pred.Limit {
|
||||
break
|
||||
}
|
||||
key = string(lastKey) + "\x00"
|
||||
}
|
||||
|
||||
// instruct the client to begin querying from immediately after the last key we returned
|
||||
// we never return a key that the client wouldn't be allowed to see
|
||||
if hasMore {
|
||||
// we want to start immediately after the last key
|
||||
next, err := encodeContinue(string(lastKey)+"\x00", keyPrefix, returnedRV)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return s.versioner.UpdateList(listObj, uint64(returnedRV), next)
|
||||
}
|
||||
|
||||
// no continuation
|
||||
return s.versioner.UpdateList(listObj, uint64(returnedRV), "")
|
||||
}
|
||||
|
||||
// growSlice takes a slice value and grows its capacity up
|
||||
// to the maximum of the passed sizes or maxCapacity, whichever
|
||||
// is smaller. Above maxCapacity decisions about allocation are left
|
||||
// to the Go runtime on append. This allows a caller to make an
|
||||
// educated guess about the potential size of the total list while
|
||||
// still avoiding overly aggressive initial allocation. If sizes
|
||||
// is empty maxCapacity will be used as the size to grow.
|
||||
func growSlice(v reflect.Value, maxCapacity int, sizes ...int) {
|
||||
cap := v.Cap()
|
||||
max := cap
|
||||
for _, size := range sizes {
|
||||
if size > max {
|
||||
max = size
|
||||
}
|
||||
}
|
||||
if len(sizes) == 0 || max > maxCapacity {
|
||||
max = maxCapacity
|
||||
}
|
||||
if max <= cap {
|
||||
return
|
||||
}
|
||||
if v.Len() > 0 {
|
||||
extra := reflect.MakeSlice(v.Type(), 0, max)
|
||||
reflect.Copy(extra, v)
|
||||
v.Set(extra)
|
||||
} else {
|
||||
extra := reflect.MakeSlice(v.Type(), 0, max)
|
||||
v.Set(extra)
|
||||
}
|
||||
}
|
||||
|
||||
// Watch implements storage.Interface.Watch.
|
||||
func (s *store) Watch(ctx context.Context, key string, resourceVersion string, pred storage.SelectionPredicate) (watch.Interface, error) {
|
||||
return s.watch(ctx, key, resourceVersion, pred, false)
|
||||
}
|
||||
|
||||
// WatchList implements storage.Interface.WatchList.
|
||||
func (s *store) WatchList(ctx context.Context, key string, resourceVersion string, pred storage.SelectionPredicate) (watch.Interface, error) {
|
||||
return s.watch(ctx, key, resourceVersion, pred, true)
|
||||
}
|
||||
|
||||
func (s *store) watch(ctx context.Context, key string, rv string, pred storage.SelectionPredicate, recursive bool) (watch.Interface, error) {
|
||||
rev, err := s.versioner.ParseResourceVersion(rv)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
key = path.Join(s.pathPrefix, key)
|
||||
return s.watcher.Watch(ctx, key, int64(rev), recursive, pred)
|
||||
}
|
||||
|
||||
func (s *store) getState(getResp *clientv3.GetResponse, key string, v reflect.Value, ignoreNotFound bool) (*objState, error) {
|
||||
state := &objState{
|
||||
obj: reflect.New(v.Type()).Interface().(runtime.Object),
|
||||
meta: &storage.ResponseMeta{},
|
||||
}
|
||||
if len(getResp.Kvs) == 0 {
|
||||
if !ignoreNotFound {
|
||||
return nil, storage.NewKeyNotFoundError(key, 0)
|
||||
}
|
||||
if err := runtime.SetZeroValue(state.obj); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
data, stale, err := s.transformer.TransformFromStorage(getResp.Kvs[0].Value, authenticatedDataString(key))
|
||||
if err != nil {
|
||||
return nil, storage.NewInternalError(err.Error())
|
||||
}
|
||||
state.rev = getResp.Kvs[0].ModRevision
|
||||
state.meta.ResourceVersion = uint64(state.rev)
|
||||
state.data = data
|
||||
state.stale = stale
|
||||
if err := decode(s.codec, s.versioner, state.data, state.obj, state.rev); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return state, nil
|
||||
}
|
||||
|
||||
func (s *store) getStateFromObject(obj runtime.Object) (*objState, error) {
|
||||
state := &objState{
|
||||
obj: obj,
|
||||
meta: &storage.ResponseMeta{},
|
||||
}
|
||||
|
||||
rv, err := s.versioner.ObjectResourceVersion(obj)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("couldn't get resource version: %v", err)
|
||||
}
|
||||
state.rev = int64(rv)
|
||||
state.meta.ResourceVersion = uint64(state.rev)
|
||||
|
||||
// Compute the serialized form - for that we need to temporarily clean
|
||||
// its resource version field (those are not stored in etcd).
|
||||
if err := s.versioner.PrepareObjectForStorage(obj); err != nil {
|
||||
return nil, fmt.Errorf("PrepareObjectForStorage failed: %v", err)
|
||||
}
|
||||
state.data, err = runtime.Encode(s.codec, obj)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s.versioner.UpdateObject(state.obj, uint64(rv))
|
||||
return state, nil
|
||||
}
|
||||
|
||||
func (s *store) updateState(st *objState, userUpdate storage.UpdateFunc) (runtime.Object, uint64, error) {
|
||||
ret, ttlPtr, err := userUpdate(st.obj, *st.meta)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
if err := s.versioner.PrepareObjectForStorage(ret); err != nil {
|
||||
return nil, 0, fmt.Errorf("PrepareObjectForStorage failed: %v", err)
|
||||
}
|
||||
var ttl uint64
|
||||
if ttlPtr != nil {
|
||||
ttl = *ttlPtr
|
||||
}
|
||||
return ret, ttl, nil
|
||||
}
|
||||
|
||||
// ttlOpts returns client options based on given ttl.
|
||||
// ttl: if ttl is non-zero, it will attach the key to a lease with ttl of roughly the same length
|
||||
func (s *store) ttlOpts(ctx context.Context, ttl int64) ([]clientv3.OpOption, error) {
|
||||
if ttl == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
id, err := s.leaseManager.GetLease(ctx, ttl)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return []clientv3.OpOption{clientv3.WithLease(id)}, nil
|
||||
}
|
||||
|
||||
// decode decodes value of bytes into object. It will also set the object resource version to rev.
|
||||
// On success, objPtr would be set to the object.
|
||||
func decode(codec runtime.Codec, versioner storage.Versioner, value []byte, objPtr runtime.Object, rev int64) error {
|
||||
if _, err := conversion.EnforcePtr(objPtr); err != nil {
|
||||
panic("unable to convert output object to pointer")
|
||||
}
|
||||
_, _, err := codec.Decode(value, nil, objPtr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// being unable to set the version does not prevent the object from being extracted
|
||||
versioner.UpdateObject(objPtr, uint64(rev))
|
||||
return nil
|
||||
}
|
||||
|
||||
// appendListItem decodes and appends the object (if it passes filter) to v, which must be a slice.
|
||||
func appendListItem(v reflect.Value, data []byte, rev uint64, pred storage.SelectionPredicate, codec runtime.Codec, versioner storage.Versioner) error {
|
||||
obj, _, err := codec.Decode(data, nil, reflect.New(v.Type().Elem()).Interface().(runtime.Object))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// being unable to set the version does not prevent the object from being extracted
|
||||
versioner.UpdateObject(obj, rev)
|
||||
if matched, err := pred.Matches(obj); err == nil && matched {
|
||||
v.Set(reflect.Append(v, reflect.ValueOf(obj).Elem()))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func notFound(key string) clientv3.Cmp {
|
||||
return clientv3.Compare(clientv3.ModRevision(key), "=", 0)
|
||||
}
|
|
@ -0,0 +1,402 @@
|
|||
/*
|
||||
Copyright 2016 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package etcd3
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"k8s.io/klog"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
apierrs "k8s.io/apimachinery/pkg/api/errors"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/watch"
|
||||
"k8s.io/apiserver/pkg/storage"
|
||||
"k8s.io/apiserver/pkg/storage/value"
|
||||
|
||||
"github.com/ibuildthecloud/kvsql/clientv3"
|
||||
)
|
||||
|
||||
const (
|
||||
// We have set a buffer in order to reduce times of context switches.
|
||||
incomingBufSize = 100
|
||||
outgoingBufSize = 100
|
||||
)
|
||||
|
||||
// fatalOnDecodeError is used during testing to panic the server if watcher encounters a decoding error
|
||||
var fatalOnDecodeError = false
|
||||
|
||||
// errTestingDecode is the only error that testingDeferOnDecodeError catches during a panic
|
||||
var errTestingDecode = errors.New("sentinel error only used during testing to indicate watch decoding error")
|
||||
|
||||
// testingDeferOnDecodeError is used during testing to recover from a panic caused by errTestingDecode, all other values continue to panic
|
||||
func testingDeferOnDecodeError() {
|
||||
if r := recover(); r != nil && r != errTestingDecode {
|
||||
panic(r)
|
||||
}
|
||||
}
|
||||
|
||||
func init() {
|
||||
// check to see if we are running in a test environment
|
||||
fatalOnDecodeError, _ = strconv.ParseBool(os.Getenv("KUBE_PANIC_WATCH_DECODE_ERROR"))
|
||||
}
|
||||
|
||||
type watcher struct {
|
||||
client *clientv3.Client
|
||||
codec runtime.Codec
|
||||
versioner storage.Versioner
|
||||
transformer value.Transformer
|
||||
}
|
||||
|
||||
// watchChan implements watch.Interface.
|
||||
type watchChan struct {
|
||||
watcher *watcher
|
||||
key string
|
||||
initialRev int64
|
||||
recursive bool
|
||||
internalPred storage.SelectionPredicate
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
incomingEventChan chan *event
|
||||
resultChan chan watch.Event
|
||||
errChan chan error
|
||||
}
|
||||
|
||||
func newWatcher(client *clientv3.Client, codec runtime.Codec, versioner storage.Versioner, transformer value.Transformer) *watcher {
|
||||
return &watcher{
|
||||
client: client,
|
||||
codec: codec,
|
||||
versioner: versioner,
|
||||
transformer: transformer,
|
||||
}
|
||||
}
|
||||
|
||||
// Watch watches on a key and returns a watch.Interface that transfers relevant notifications.
|
||||
// If rev is zero, it will return the existing object(s) and then start watching from
|
||||
// the maximum revision+1 from returned objects.
|
||||
// If rev is non-zero, it will watch events happened after given revision.
|
||||
// If recursive is false, it watches on given key.
|
||||
// If recursive is true, it watches any children and directories under the key, excluding the root key itself.
|
||||
// pred must be non-nil. Only if pred matches the change, it will be returned.
|
||||
func (w *watcher) Watch(ctx context.Context, key string, rev int64, recursive bool, pred storage.SelectionPredicate) (watch.Interface, error) {
|
||||
if recursive && !strings.HasSuffix(key, "/") {
|
||||
key += "/"
|
||||
}
|
||||
wc := w.createWatchChan(ctx, key, rev, recursive, pred)
|
||||
go wc.run()
|
||||
return wc, nil
|
||||
}
|
||||
|
||||
func (w *watcher) createWatchChan(ctx context.Context, key string, rev int64, recursive bool, pred storage.SelectionPredicate) *watchChan {
|
||||
wc := &watchChan{
|
||||
watcher: w,
|
||||
key: key,
|
||||
initialRev: rev,
|
||||
recursive: recursive,
|
||||
internalPred: pred,
|
||||
incomingEventChan: make(chan *event, incomingBufSize),
|
||||
resultChan: make(chan watch.Event, outgoingBufSize),
|
||||
errChan: make(chan error, 1),
|
||||
}
|
||||
if pred.Empty() {
|
||||
// The filter doesn't filter out any object.
|
||||
wc.internalPred = storage.Everything
|
||||
}
|
||||
wc.ctx, wc.cancel = context.WithCancel(ctx)
|
||||
return wc
|
||||
}
|
||||
|
||||
func (wc *watchChan) run() {
|
||||
watchClosedCh := make(chan struct{})
|
||||
go wc.startWatching(watchClosedCh)
|
||||
|
||||
var resultChanWG sync.WaitGroup
|
||||
resultChanWG.Add(1)
|
||||
go wc.processEvent(&resultChanWG)
|
||||
|
||||
select {
|
||||
case err := <-wc.errChan:
|
||||
if err == context.Canceled {
|
||||
break
|
||||
}
|
||||
errResult := transformErrorToEvent(err)
|
||||
if errResult != nil {
|
||||
// error result is guaranteed to be received by user before closing ResultChan.
|
||||
select {
|
||||
case wc.resultChan <- *errResult:
|
||||
case <-wc.ctx.Done(): // user has given up all results
|
||||
}
|
||||
}
|
||||
case <-watchClosedCh:
|
||||
case <-wc.ctx.Done(): // user cancel
|
||||
}
|
||||
|
||||
// We use wc.ctx to reap all goroutines. Under whatever condition, we should stop them all.
|
||||
// It's fine to double cancel.
|
||||
wc.cancel()
|
||||
|
||||
// we need to wait until resultChan wouldn't be used anymore
|
||||
resultChanWG.Wait()
|
||||
close(wc.resultChan)
|
||||
}
|
||||
|
||||
func (wc *watchChan) Stop() {
|
||||
wc.cancel()
|
||||
}
|
||||
|
||||
func (wc *watchChan) ResultChan() <-chan watch.Event {
|
||||
return wc.resultChan
|
||||
}
|
||||
|
||||
// sync tries to retrieve existing data and send them to process.
|
||||
// The revision to watch will be set to the revision in response.
|
||||
// All events sent will have isCreated=true
|
||||
func (wc *watchChan) sync() error {
|
||||
opts := []clientv3.OpOption{}
|
||||
if wc.recursive {
|
||||
opts = append(opts, clientv3.WithPrefix())
|
||||
}
|
||||
getResp, err := wc.watcher.client.Get(wc.ctx, wc.key, opts...)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
wc.initialRev = getResp.Header.Revision
|
||||
for _, kv := range getResp.Kvs {
|
||||
wc.sendEvent(parseKV(kv))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// startWatching does:
|
||||
// - get current objects if initialRev=0; set initialRev to current rev
|
||||
// - watch on given key and send events to process.
|
||||
func (wc *watchChan) startWatching(watchClosedCh chan struct{}) {
|
||||
if wc.initialRev == 0 {
|
||||
if err := wc.sync(); err != nil {
|
||||
klog.Errorf("failed to sync with latest state: %v", err)
|
||||
wc.sendError(err)
|
||||
return
|
||||
}
|
||||
}
|
||||
opts := []clientv3.OpOption{clientv3.WithRev(wc.initialRev + 1), clientv3.WithPrevKV()}
|
||||
if wc.recursive {
|
||||
opts = append(opts, clientv3.WithPrefix())
|
||||
}
|
||||
wch := wc.watcher.client.Watch(wc.ctx, wc.key, opts...)
|
||||
for wres := range wch {
|
||||
if wres.Err() != nil {
|
||||
err := wres.Err()
|
||||
// If there is an error on server (e.g. compaction), the channel will return it before closed.
|
||||
klog.Errorf("watch chan error: %v", err)
|
||||
wc.sendError(err)
|
||||
return
|
||||
}
|
||||
for _, e := range wres.Events {
|
||||
wc.sendEvent(parseEvent(e))
|
||||
}
|
||||
}
|
||||
// When we come to this point, it's only possible that client side ends the watch.
|
||||
// e.g. cancel the context, close the client.
|
||||
// If this watch chan is broken and context isn't cancelled, other goroutines will still hang.
|
||||
// We should notify the main thread that this goroutine has exited.
|
||||
close(watchClosedCh)
|
||||
}
|
||||
|
||||
// processEvent processes events from etcd watcher and sends results to resultChan.
|
||||
func (wc *watchChan) processEvent(wg *sync.WaitGroup) {
|
||||
defer wg.Done()
|
||||
|
||||
for {
|
||||
select {
|
||||
case e := <-wc.incomingEventChan:
|
||||
res := wc.transform(e)
|
||||
if res == nil {
|
||||
continue
|
||||
}
|
||||
if len(wc.resultChan) == outgoingBufSize {
|
||||
klog.V(3).Infof("Fast watcher, slow processing. Number of buffered events: %d."+
|
||||
"Probably caused by slow dispatching events to watchers", outgoingBufSize)
|
||||
}
|
||||
// If user couldn't receive results fast enough, we also block incoming events from watcher.
|
||||
// Because storing events in local will cause more memory usage.
|
||||
// The worst case would be closing the fast watcher.
|
||||
select {
|
||||
case wc.resultChan <- *res:
|
||||
case <-wc.ctx.Done():
|
||||
return
|
||||
}
|
||||
case <-wc.ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (wc *watchChan) filter(obj runtime.Object) bool {
|
||||
if wc.internalPred.Empty() {
|
||||
return true
|
||||
}
|
||||
matched, err := wc.internalPred.Matches(obj)
|
||||
return err == nil && matched
|
||||
}
|
||||
|
||||
func (wc *watchChan) acceptAll() bool {
|
||||
return wc.internalPred.Empty()
|
||||
}
|
||||
|
||||
// transform transforms an event into a result for user if not filtered.
|
||||
func (wc *watchChan) transform(e *event) (res *watch.Event) {
|
||||
curObj, oldObj, err := wc.prepareObjs(e)
|
||||
if err != nil {
|
||||
klog.Errorf("failed to prepare current and previous objects: %v", err)
|
||||
wc.sendError(err)
|
||||
return nil
|
||||
}
|
||||
|
||||
switch {
|
||||
case e.isDeleted:
|
||||
if !wc.filter(oldObj) {
|
||||
return nil
|
||||
}
|
||||
res = &watch.Event{
|
||||
Type: watch.Deleted,
|
||||
Object: oldObj,
|
||||
}
|
||||
case e.isCreated:
|
||||
if !wc.filter(curObj) {
|
||||
return nil
|
||||
}
|
||||
res = &watch.Event{
|
||||
Type: watch.Added,
|
||||
Object: curObj,
|
||||
}
|
||||
default:
|
||||
if wc.acceptAll() {
|
||||
res = &watch.Event{
|
||||
Type: watch.Modified,
|
||||
Object: curObj,
|
||||
}
|
||||
return res
|
||||
}
|
||||
curObjPasses := wc.filter(curObj)
|
||||
oldObjPasses := wc.filter(oldObj)
|
||||
switch {
|
||||
case curObjPasses && oldObjPasses:
|
||||
res = &watch.Event{
|
||||
Type: watch.Modified,
|
||||
Object: curObj,
|
||||
}
|
||||
case curObjPasses && !oldObjPasses:
|
||||
res = &watch.Event{
|
||||
Type: watch.Added,
|
||||
Object: curObj,
|
||||
}
|
||||
case !curObjPasses && oldObjPasses:
|
||||
res = &watch.Event{
|
||||
Type: watch.Deleted,
|
||||
Object: oldObj,
|
||||
}
|
||||
}
|
||||
}
|
||||
return res
|
||||
}
|
||||
|
||||
func transformErrorToEvent(err error) *watch.Event {
|
||||
err = interpretWatchError(err)
|
||||
if _, ok := err.(apierrs.APIStatus); !ok {
|
||||
err = apierrs.NewInternalError(err)
|
||||
}
|
||||
status := err.(apierrs.APIStatus).Status()
|
||||
return &watch.Event{
|
||||
Type: watch.Error,
|
||||
Object: &status,
|
||||
}
|
||||
}
|
||||
|
||||
func (wc *watchChan) sendError(err error) {
|
||||
select {
|
||||
case wc.errChan <- err:
|
||||
case <-wc.ctx.Done():
|
||||
}
|
||||
}
|
||||
|
||||
func (wc *watchChan) sendEvent(e *event) {
|
||||
if len(wc.incomingEventChan) == incomingBufSize {
|
||||
klog.V(3).Infof("Fast watcher, slow processing. Number of buffered events: %d."+
|
||||
"Probably caused by slow decoding, user not receiving fast, or other processing logic",
|
||||
incomingBufSize)
|
||||
}
|
||||
select {
|
||||
case wc.incomingEventChan <- e:
|
||||
case <-wc.ctx.Done():
|
||||
}
|
||||
}
|
||||
|
||||
func (wc *watchChan) prepareObjs(e *event) (curObj runtime.Object, oldObj runtime.Object, err error) {
|
||||
if !e.isDeleted {
|
||||
data, _, err := wc.watcher.transformer.TransformFromStorage(e.value, authenticatedDataString(e.key))
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
curObj, err = decodeObj(wc.watcher.codec, wc.watcher.versioner, data, e.rev)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
// We need to decode prevValue, only if this is deletion event or
|
||||
// the underlying filter doesn't accept all objects (otherwise we
|
||||
// know that the filter for previous object will return true and
|
||||
// we need the object only to compute whether it was filtered out
|
||||
// before).
|
||||
if len(e.prevValue) > 0 && (e.isDeleted || !wc.acceptAll()) {
|
||||
data, _, err := wc.watcher.transformer.TransformFromStorage(e.prevValue, authenticatedDataString(e.key))
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
// Note that this sends the *old* object with the etcd revision for the time at
|
||||
// which it gets deleted.
|
||||
oldObj, err = decodeObj(wc.watcher.codec, wc.watcher.versioner, data, e.rev)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
return curObj, oldObj, nil
|
||||
}
|
||||
|
||||
func decodeObj(codec runtime.Codec, versioner storage.Versioner, data []byte, rev int64) (_ runtime.Object, err error) {
|
||||
obj, err := runtime.Decode(codec, []byte(data))
|
||||
if err != nil {
|
||||
if fatalOnDecodeError {
|
||||
// catch watch decode error iff we caused it on
|
||||
// purpose during a unit test
|
||||
defer testingDeferOnDecodeError()
|
||||
// we are running in a test environment and thus an
|
||||
// error here is due to a coder mistake if the defer
|
||||
// does not catch it
|
||||
panic(err)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
// ensure resource version is set on the object we load from etcd
|
||||
if err := versioner.UpdateObject(obj, uint64(rev)); err != nil {
|
||||
return nil, fmt.Errorf("failure to version api object (%d) %#v: %v", rev, obj, err)
|
||||
}
|
||||
return obj, nil
|
||||
}
|
Loading…
Reference in New Issue