mirror of https://github.com/k3s-io/k3s
211 lines
5.5 KiB
Go
211 lines
5.5 KiB
Go
// Copyright 2017 The etcd Authors
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package backend
|
|
|
|
import (
|
|
"bytes"
|
|
"math"
|
|
"sync"
|
|
|
|
bolt "go.etcd.io/bbolt"
|
|
)
|
|
|
|
// safeRangeBucket is a hack to avoid inadvertently reading duplicate keys;
|
|
// overwrites on a bucket should only fetch with limit=1, but safeRangeBucket
|
|
// is known to never overwrite any key so range is safe.
|
|
var safeRangeBucket = []byte("key")
|
|
|
|
type ReadTx interface {
|
|
Lock()
|
|
Unlock()
|
|
RLock()
|
|
RUnlock()
|
|
|
|
UnsafeRange(bucketName []byte, key, endKey []byte, limit int64) (keys [][]byte, vals [][]byte)
|
|
UnsafeForEach(bucketName []byte, visitor func(k, v []byte) error) error
|
|
}
|
|
|
|
type readTx struct {
|
|
// mu protects accesses to the txReadBuffer
|
|
mu sync.RWMutex
|
|
buf txReadBuffer
|
|
|
|
// TODO: group and encapsulate {txMu, tx, buckets, txWg}, as they share the same lifecycle.
|
|
// txMu protects accesses to buckets and tx on Range requests.
|
|
txMu sync.RWMutex
|
|
tx *bolt.Tx
|
|
buckets map[string]*bolt.Bucket
|
|
// txWg protects tx from being rolled back at the end of a batch interval until all reads using this tx are done.
|
|
txWg *sync.WaitGroup
|
|
}
|
|
|
|
func (rt *readTx) Lock() { rt.mu.Lock() }
|
|
func (rt *readTx) Unlock() { rt.mu.Unlock() }
|
|
func (rt *readTx) RLock() { rt.mu.RLock() }
|
|
func (rt *readTx) RUnlock() { rt.mu.RUnlock() }
|
|
|
|
func (rt *readTx) UnsafeRange(bucketName, key, endKey []byte, limit int64) ([][]byte, [][]byte) {
|
|
if endKey == nil {
|
|
// forbid duplicates for single keys
|
|
limit = 1
|
|
}
|
|
if limit <= 0 {
|
|
limit = math.MaxInt64
|
|
}
|
|
if limit > 1 && !bytes.Equal(bucketName, safeRangeBucket) {
|
|
panic("do not use unsafeRange on non-keys bucket")
|
|
}
|
|
keys, vals := rt.buf.Range(bucketName, key, endKey, limit)
|
|
if int64(len(keys)) == limit {
|
|
return keys, vals
|
|
}
|
|
|
|
// find/cache bucket
|
|
bn := string(bucketName)
|
|
rt.txMu.RLock()
|
|
bucket, ok := rt.buckets[bn]
|
|
rt.txMu.RUnlock()
|
|
if !ok {
|
|
rt.txMu.Lock()
|
|
bucket = rt.tx.Bucket(bucketName)
|
|
rt.buckets[bn] = bucket
|
|
rt.txMu.Unlock()
|
|
}
|
|
|
|
// ignore missing bucket since may have been created in this batch
|
|
if bucket == nil {
|
|
return keys, vals
|
|
}
|
|
rt.txMu.Lock()
|
|
c := bucket.Cursor()
|
|
rt.txMu.Unlock()
|
|
|
|
k2, v2 := unsafeRange(c, key, endKey, limit-int64(len(keys)))
|
|
return append(k2, keys...), append(v2, vals...)
|
|
}
|
|
|
|
func (rt *readTx) UnsafeForEach(bucketName []byte, visitor func(k, v []byte) error) error {
|
|
dups := make(map[string]struct{})
|
|
getDups := func(k, v []byte) error {
|
|
dups[string(k)] = struct{}{}
|
|
return nil
|
|
}
|
|
visitNoDup := func(k, v []byte) error {
|
|
if _, ok := dups[string(k)]; ok {
|
|
return nil
|
|
}
|
|
return visitor(k, v)
|
|
}
|
|
if err := rt.buf.ForEach(bucketName, getDups); err != nil {
|
|
return err
|
|
}
|
|
rt.txMu.Lock()
|
|
err := unsafeForEach(rt.tx, bucketName, visitNoDup)
|
|
rt.txMu.Unlock()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return rt.buf.ForEach(bucketName, visitor)
|
|
}
|
|
|
|
func (rt *readTx) reset() {
|
|
rt.buf.reset()
|
|
rt.buckets = make(map[string]*bolt.Bucket)
|
|
rt.tx = nil
|
|
rt.txWg = new(sync.WaitGroup)
|
|
}
|
|
|
|
// TODO: create a base type for readTx and concurrentReadTx to avoid duplicated function implementation?
|
|
type concurrentReadTx struct {
|
|
buf txReadBuffer
|
|
txMu *sync.RWMutex
|
|
tx *bolt.Tx
|
|
buckets map[string]*bolt.Bucket
|
|
txWg *sync.WaitGroup
|
|
}
|
|
|
|
func (rt *concurrentReadTx) Lock() {}
|
|
func (rt *concurrentReadTx) Unlock() {}
|
|
|
|
// RLock is no-op. concurrentReadTx does not need to be locked after it is created.
|
|
func (rt *concurrentReadTx) RLock() {}
|
|
|
|
// RUnlock signals the end of concurrentReadTx.
|
|
func (rt *concurrentReadTx) RUnlock() { rt.txWg.Done() }
|
|
|
|
func (rt *concurrentReadTx) UnsafeForEach(bucketName []byte, visitor func(k, v []byte) error) error {
|
|
dups := make(map[string]struct{})
|
|
getDups := func(k, v []byte) error {
|
|
dups[string(k)] = struct{}{}
|
|
return nil
|
|
}
|
|
visitNoDup := func(k, v []byte) error {
|
|
if _, ok := dups[string(k)]; ok {
|
|
return nil
|
|
}
|
|
return visitor(k, v)
|
|
}
|
|
if err := rt.buf.ForEach(bucketName, getDups); err != nil {
|
|
return err
|
|
}
|
|
rt.txMu.Lock()
|
|
err := unsafeForEach(rt.tx, bucketName, visitNoDup)
|
|
rt.txMu.Unlock()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return rt.buf.ForEach(bucketName, visitor)
|
|
}
|
|
|
|
func (rt *concurrentReadTx) UnsafeRange(bucketName, key, endKey []byte, limit int64) ([][]byte, [][]byte) {
|
|
if endKey == nil {
|
|
// forbid duplicates for single keys
|
|
limit = 1
|
|
}
|
|
if limit <= 0 {
|
|
limit = math.MaxInt64
|
|
}
|
|
if limit > 1 && !bytes.Equal(bucketName, safeRangeBucket) {
|
|
panic("do not use unsafeRange on non-keys bucket")
|
|
}
|
|
keys, vals := rt.buf.Range(bucketName, key, endKey, limit)
|
|
if int64(len(keys)) == limit {
|
|
return keys, vals
|
|
}
|
|
|
|
// find/cache bucket
|
|
bn := string(bucketName)
|
|
rt.txMu.RLock()
|
|
bucket, ok := rt.buckets[bn]
|
|
rt.txMu.RUnlock()
|
|
if !ok {
|
|
rt.txMu.Lock()
|
|
bucket = rt.tx.Bucket(bucketName)
|
|
rt.buckets[bn] = bucket
|
|
rt.txMu.Unlock()
|
|
}
|
|
|
|
// ignore missing bucket since may have been created in this batch
|
|
if bucket == nil {
|
|
return keys, vals
|
|
}
|
|
rt.txMu.Lock()
|
|
c := bucket.Cursor()
|
|
rt.txMu.Unlock()
|
|
|
|
k2, v2 := unsafeRange(c, key, endKey, limit-int64(len(keys)))
|
|
return append(k2, keys...), append(v2, vals...)
|
|
}
|