mirror of https://github.com/prometheus/prometheus
663 lines
18 KiB
Go
663 lines
18 KiB
Go
package iradix
|
|
|
|
import (
|
|
"bytes"
|
|
"strings"
|
|
|
|
"github.com/hashicorp/golang-lru/simplelru"
|
|
)
|
|
|
|
const (
|
|
// defaultModifiedCache is the default size of the modified node
|
|
// cache used per transaction. This is used to cache the updates
|
|
// to the nodes near the root, while the leaves do not need to be
|
|
// cached. This is important for very large transactions to prevent
|
|
// the modified cache from growing to be enormous. This is also used
|
|
// to set the max size of the mutation notify maps since those should
|
|
// also be bounded in a similar way.
|
|
defaultModifiedCache = 8192
|
|
)
|
|
|
|
// Tree implements an immutable radix tree. This can be treated as a
|
|
// Dictionary abstract data type. The main advantage over a standard
|
|
// hash map is prefix-based lookups and ordered iteration. The immutability
|
|
// means that it is safe to concurrently read from a Tree without any
|
|
// coordination.
|
|
type Tree struct {
|
|
root *Node
|
|
size int
|
|
}
|
|
|
|
// New returns an empty Tree
|
|
func New() *Tree {
|
|
t := &Tree{
|
|
root: &Node{
|
|
mutateCh: make(chan struct{}),
|
|
},
|
|
}
|
|
return t
|
|
}
|
|
|
|
// Len is used to return the number of elements in the tree
|
|
func (t *Tree) Len() int {
|
|
return t.size
|
|
}
|
|
|
|
// Txn is a transaction on the tree. This transaction is applied
|
|
// atomically and returns a new tree when committed. A transaction
|
|
// is not thread safe, and should only be used by a single goroutine.
|
|
type Txn struct {
|
|
// root is the modified root for the transaction.
|
|
root *Node
|
|
|
|
// snap is a snapshot of the root node for use if we have to run the
|
|
// slow notify algorithm.
|
|
snap *Node
|
|
|
|
// size tracks the size of the tree as it is modified during the
|
|
// transaction.
|
|
size int
|
|
|
|
// writable is a cache of writable nodes that have been created during
|
|
// the course of the transaction. This allows us to re-use the same
|
|
// nodes for further writes and avoid unnecessary copies of nodes that
|
|
// have never been exposed outside the transaction. This will only hold
|
|
// up to defaultModifiedCache number of entries.
|
|
writable *simplelru.LRU
|
|
|
|
// trackChannels is used to hold channels that need to be notified to
|
|
// signal mutation of the tree. This will only hold up to
|
|
// defaultModifiedCache number of entries, after which we will set the
|
|
// trackOverflow flag, which will cause us to use a more expensive
|
|
// algorithm to perform the notifications. Mutation tracking is only
|
|
// performed if trackMutate is true.
|
|
trackChannels map[chan struct{}]struct{}
|
|
trackOverflow bool
|
|
trackMutate bool
|
|
}
|
|
|
|
// Txn starts a new transaction that can be used to mutate the tree
|
|
func (t *Tree) Txn() *Txn {
|
|
txn := &Txn{
|
|
root: t.root,
|
|
snap: t.root,
|
|
size: t.size,
|
|
}
|
|
return txn
|
|
}
|
|
|
|
// TrackMutate can be used to toggle if mutations are tracked. If this is enabled
|
|
// then notifications will be issued for affected internal nodes and leaves when
|
|
// the transaction is committed.
|
|
func (t *Txn) TrackMutate(track bool) {
|
|
t.trackMutate = track
|
|
}
|
|
|
|
// trackChannel safely attempts to track the given mutation channel, setting the
|
|
// overflow flag if we can no longer track any more. This limits the amount of
|
|
// state that will accumulate during a transaction and we have a slower algorithm
|
|
// to switch to if we overflow.
|
|
func (t *Txn) trackChannel(ch chan struct{}) {
|
|
// In overflow, make sure we don't store any more objects.
|
|
if t.trackOverflow {
|
|
return
|
|
}
|
|
|
|
// If this would overflow the state we reject it and set the flag (since
|
|
// we aren't tracking everything that's required any longer).
|
|
if len(t.trackChannels) >= defaultModifiedCache {
|
|
// Mark that we are in the overflow state
|
|
t.trackOverflow = true
|
|
|
|
// Clear the map so that the channels can be garbage collected. It is
|
|
// safe to do this since we have already overflowed and will be using
|
|
// the slow notify algorithm.
|
|
t.trackChannels = nil
|
|
return
|
|
}
|
|
|
|
// Create the map on the fly when we need it.
|
|
if t.trackChannels == nil {
|
|
t.trackChannels = make(map[chan struct{}]struct{})
|
|
}
|
|
|
|
// Otherwise we are good to track it.
|
|
t.trackChannels[ch] = struct{}{}
|
|
}
|
|
|
|
// writeNode returns a node to be modified, if the current node has already been
|
|
// modified during the course of the transaction, it is used in-place. Set
|
|
// forLeafUpdate to true if you are getting a write node to update the leaf,
|
|
// which will set leaf mutation tracking appropriately as well.
|
|
func (t *Txn) writeNode(n *Node, forLeafUpdate bool) *Node {
|
|
// Ensure the writable set exists.
|
|
if t.writable == nil {
|
|
lru, err := simplelru.NewLRU(defaultModifiedCache, nil)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
t.writable = lru
|
|
}
|
|
|
|
// If this node has already been modified, we can continue to use it
|
|
// during this transaction. We know that we don't need to track it for
|
|
// a node update since the node is writable, but if this is for a leaf
|
|
// update we track it, in case the initial write to this node didn't
|
|
// update the leaf.
|
|
if _, ok := t.writable.Get(n); ok {
|
|
if t.trackMutate && forLeafUpdate && n.leaf != nil {
|
|
t.trackChannel(n.leaf.mutateCh)
|
|
}
|
|
return n
|
|
}
|
|
|
|
// Mark this node as being mutated.
|
|
if t.trackMutate {
|
|
t.trackChannel(n.mutateCh)
|
|
}
|
|
|
|
// Mark its leaf as being mutated, if appropriate.
|
|
if t.trackMutate && forLeafUpdate && n.leaf != nil {
|
|
t.trackChannel(n.leaf.mutateCh)
|
|
}
|
|
|
|
// Copy the existing node. If you have set forLeafUpdate it will be
|
|
// safe to replace this leaf with another after you get your node for
|
|
// writing. You MUST replace it, because the channel associated with
|
|
// this leaf will be closed when this transaction is committed.
|
|
nc := &Node{
|
|
mutateCh: make(chan struct{}),
|
|
leaf: n.leaf,
|
|
}
|
|
if n.prefix != nil {
|
|
nc.prefix = make([]byte, len(n.prefix))
|
|
copy(nc.prefix, n.prefix)
|
|
}
|
|
if len(n.edges) != 0 {
|
|
nc.edges = make([]edge, len(n.edges))
|
|
copy(nc.edges, n.edges)
|
|
}
|
|
|
|
// Mark this node as writable.
|
|
t.writable.Add(nc, nil)
|
|
return nc
|
|
}
|
|
|
|
// Visit all the nodes in the tree under n, and add their mutateChannels to the transaction
|
|
// Returns the size of the subtree visited
|
|
func (t *Txn) trackChannelsAndCount(n *Node) int {
|
|
// Count only leaf nodes
|
|
leaves := 0
|
|
if n.leaf != nil {
|
|
leaves = 1
|
|
}
|
|
// Mark this node as being mutated.
|
|
if t.trackMutate {
|
|
t.trackChannel(n.mutateCh)
|
|
}
|
|
|
|
// Mark its leaf as being mutated, if appropriate.
|
|
if t.trackMutate && n.leaf != nil {
|
|
t.trackChannel(n.leaf.mutateCh)
|
|
}
|
|
|
|
// Recurse on the children
|
|
for _, e := range n.edges {
|
|
leaves += t.trackChannelsAndCount(e.node)
|
|
}
|
|
return leaves
|
|
}
|
|
|
|
// mergeChild is called to collapse the given node with its child. This is only
|
|
// called when the given node is not a leaf and has a single edge.
|
|
func (t *Txn) mergeChild(n *Node) {
|
|
// Mark the child node as being mutated since we are about to abandon
|
|
// it. We don't need to mark the leaf since we are retaining it if it
|
|
// is there.
|
|
e := n.edges[0]
|
|
child := e.node
|
|
if t.trackMutate {
|
|
t.trackChannel(child.mutateCh)
|
|
}
|
|
|
|
// Merge the nodes.
|
|
n.prefix = concat(n.prefix, child.prefix)
|
|
n.leaf = child.leaf
|
|
if len(child.edges) != 0 {
|
|
n.edges = make([]edge, len(child.edges))
|
|
copy(n.edges, child.edges)
|
|
} else {
|
|
n.edges = nil
|
|
}
|
|
}
|
|
|
|
// insert does a recursive insertion
|
|
func (t *Txn) insert(n *Node, k, search []byte, v interface{}) (*Node, interface{}, bool) {
|
|
// Handle key exhaustion
|
|
if len(search) == 0 {
|
|
var oldVal interface{}
|
|
didUpdate := false
|
|
if n.isLeaf() {
|
|
oldVal = n.leaf.val
|
|
didUpdate = true
|
|
}
|
|
|
|
nc := t.writeNode(n, true)
|
|
nc.leaf = &leafNode{
|
|
mutateCh: make(chan struct{}),
|
|
key: k,
|
|
val: v,
|
|
}
|
|
return nc, oldVal, didUpdate
|
|
}
|
|
|
|
// Look for the edge
|
|
idx, child := n.getEdge(search[0])
|
|
|
|
// No edge, create one
|
|
if child == nil {
|
|
e := edge{
|
|
label: search[0],
|
|
node: &Node{
|
|
mutateCh: make(chan struct{}),
|
|
leaf: &leafNode{
|
|
mutateCh: make(chan struct{}),
|
|
key: k,
|
|
val: v,
|
|
},
|
|
prefix: search,
|
|
},
|
|
}
|
|
nc := t.writeNode(n, false)
|
|
nc.addEdge(e)
|
|
return nc, nil, false
|
|
}
|
|
|
|
// Determine longest prefix of the search key on match
|
|
commonPrefix := longestPrefix(search, child.prefix)
|
|
if commonPrefix == len(child.prefix) {
|
|
search = search[commonPrefix:]
|
|
newChild, oldVal, didUpdate := t.insert(child, k, search, v)
|
|
if newChild != nil {
|
|
nc := t.writeNode(n, false)
|
|
nc.edges[idx].node = newChild
|
|
return nc, oldVal, didUpdate
|
|
}
|
|
return nil, oldVal, didUpdate
|
|
}
|
|
|
|
// Split the node
|
|
nc := t.writeNode(n, false)
|
|
splitNode := &Node{
|
|
mutateCh: make(chan struct{}),
|
|
prefix: search[:commonPrefix],
|
|
}
|
|
nc.replaceEdge(edge{
|
|
label: search[0],
|
|
node: splitNode,
|
|
})
|
|
|
|
// Restore the existing child node
|
|
modChild := t.writeNode(child, false)
|
|
splitNode.addEdge(edge{
|
|
label: modChild.prefix[commonPrefix],
|
|
node: modChild,
|
|
})
|
|
modChild.prefix = modChild.prefix[commonPrefix:]
|
|
|
|
// Create a new leaf node
|
|
leaf := &leafNode{
|
|
mutateCh: make(chan struct{}),
|
|
key: k,
|
|
val: v,
|
|
}
|
|
|
|
// If the new key is a subset, add to to this node
|
|
search = search[commonPrefix:]
|
|
if len(search) == 0 {
|
|
splitNode.leaf = leaf
|
|
return nc, nil, false
|
|
}
|
|
|
|
// Create a new edge for the node
|
|
splitNode.addEdge(edge{
|
|
label: search[0],
|
|
node: &Node{
|
|
mutateCh: make(chan struct{}),
|
|
leaf: leaf,
|
|
prefix: search,
|
|
},
|
|
})
|
|
return nc, nil, false
|
|
}
|
|
|
|
// delete does a recursive deletion
|
|
func (t *Txn) delete(parent, n *Node, search []byte) (*Node, *leafNode) {
|
|
// Check for key exhaustion
|
|
if len(search) == 0 {
|
|
if !n.isLeaf() {
|
|
return nil, nil
|
|
}
|
|
// Copy the pointer in case we are in a transaction that already
|
|
// modified this node since the node will be reused. Any changes
|
|
// made to the node will not affect returning the original leaf
|
|
// value.
|
|
oldLeaf := n.leaf
|
|
|
|
// Remove the leaf node
|
|
nc := t.writeNode(n, true)
|
|
nc.leaf = nil
|
|
|
|
// Check if this node should be merged
|
|
if n != t.root && len(nc.edges) == 1 {
|
|
t.mergeChild(nc)
|
|
}
|
|
return nc, oldLeaf
|
|
}
|
|
|
|
// Look for an edge
|
|
label := search[0]
|
|
idx, child := n.getEdge(label)
|
|
if child == nil || !bytes.HasPrefix(search, child.prefix) {
|
|
return nil, nil
|
|
}
|
|
|
|
// Consume the search prefix
|
|
search = search[len(child.prefix):]
|
|
newChild, leaf := t.delete(n, child, search)
|
|
if newChild == nil {
|
|
return nil, nil
|
|
}
|
|
|
|
// Copy this node. WATCH OUT - it's safe to pass "false" here because we
|
|
// will only ADD a leaf via nc.mergeChild() if there isn't one due to
|
|
// the !nc.isLeaf() check in the logic just below. This is pretty subtle,
|
|
// so be careful if you change any of the logic here.
|
|
nc := t.writeNode(n, false)
|
|
|
|
// Delete the edge if the node has no edges
|
|
if newChild.leaf == nil && len(newChild.edges) == 0 {
|
|
nc.delEdge(label)
|
|
if n != t.root && len(nc.edges) == 1 && !nc.isLeaf() {
|
|
t.mergeChild(nc)
|
|
}
|
|
} else {
|
|
nc.edges[idx].node = newChild
|
|
}
|
|
return nc, leaf
|
|
}
|
|
|
|
// delete does a recursive deletion
|
|
func (t *Txn) deletePrefix(parent, n *Node, search []byte) (*Node, int) {
|
|
// Check for key exhaustion
|
|
if len(search) == 0 {
|
|
nc := t.writeNode(n, true)
|
|
if n.isLeaf() {
|
|
nc.leaf = nil
|
|
}
|
|
nc.edges = nil
|
|
return nc, t.trackChannelsAndCount(n)
|
|
}
|
|
|
|
// Look for an edge
|
|
label := search[0]
|
|
idx, child := n.getEdge(label)
|
|
// We make sure that either the child node's prefix starts with the search term, or the search term starts with the child node's prefix
|
|
// Need to do both so that we can delete prefixes that don't correspond to any node in the tree
|
|
if child == nil || (!bytes.HasPrefix(child.prefix, search) && !bytes.HasPrefix(search, child.prefix)) {
|
|
return nil, 0
|
|
}
|
|
|
|
// Consume the search prefix
|
|
if len(child.prefix) > len(search) {
|
|
search = []byte("")
|
|
} else {
|
|
search = search[len(child.prefix):]
|
|
}
|
|
newChild, numDeletions := t.deletePrefix(n, child, search)
|
|
if newChild == nil {
|
|
return nil, 0
|
|
}
|
|
// Copy this node. WATCH OUT - it's safe to pass "false" here because we
|
|
// will only ADD a leaf via nc.mergeChild() if there isn't one due to
|
|
// the !nc.isLeaf() check in the logic just below. This is pretty subtle,
|
|
// so be careful if you change any of the logic here.
|
|
|
|
nc := t.writeNode(n, false)
|
|
|
|
// Delete the edge if the node has no edges
|
|
if newChild.leaf == nil && len(newChild.edges) == 0 {
|
|
nc.delEdge(label)
|
|
if n != t.root && len(nc.edges) == 1 && !nc.isLeaf() {
|
|
t.mergeChild(nc)
|
|
}
|
|
} else {
|
|
nc.edges[idx].node = newChild
|
|
}
|
|
return nc, numDeletions
|
|
}
|
|
|
|
// Insert is used to add or update a given key. The return provides
|
|
// the previous value and a bool indicating if any was set.
|
|
func (t *Txn) Insert(k []byte, v interface{}) (interface{}, bool) {
|
|
newRoot, oldVal, didUpdate := t.insert(t.root, k, k, v)
|
|
if newRoot != nil {
|
|
t.root = newRoot
|
|
}
|
|
if !didUpdate {
|
|
t.size++
|
|
}
|
|
return oldVal, didUpdate
|
|
}
|
|
|
|
// Delete is used to delete a given key. Returns the old value if any,
|
|
// and a bool indicating if the key was set.
|
|
func (t *Txn) Delete(k []byte) (interface{}, bool) {
|
|
newRoot, leaf := t.delete(nil, t.root, k)
|
|
if newRoot != nil {
|
|
t.root = newRoot
|
|
}
|
|
if leaf != nil {
|
|
t.size--
|
|
return leaf.val, true
|
|
}
|
|
return nil, false
|
|
}
|
|
|
|
// DeletePrefix is used to delete an entire subtree that matches the prefix
|
|
// This will delete all nodes under that prefix
|
|
func (t *Txn) DeletePrefix(prefix []byte) bool {
|
|
newRoot, numDeletions := t.deletePrefix(nil, t.root, prefix)
|
|
if newRoot != nil {
|
|
t.root = newRoot
|
|
t.size = t.size - numDeletions
|
|
return true
|
|
}
|
|
return false
|
|
|
|
}
|
|
|
|
// Root returns the current root of the radix tree within this
|
|
// transaction. The root is not safe across insert and delete operations,
|
|
// but can be used to read the current state during a transaction.
|
|
func (t *Txn) Root() *Node {
|
|
return t.root
|
|
}
|
|
|
|
// Get is used to lookup a specific key, returning
|
|
// the value and if it was found
|
|
func (t *Txn) Get(k []byte) (interface{}, bool) {
|
|
return t.root.Get(k)
|
|
}
|
|
|
|
// GetWatch is used to lookup a specific key, returning
|
|
// the watch channel, value and if it was found
|
|
func (t *Txn) GetWatch(k []byte) (<-chan struct{}, interface{}, bool) {
|
|
return t.root.GetWatch(k)
|
|
}
|
|
|
|
// Commit is used to finalize the transaction and return a new tree. If mutation
|
|
// tracking is turned on then notifications will also be issued.
|
|
func (t *Txn) Commit() *Tree {
|
|
nt := t.CommitOnly()
|
|
if t.trackMutate {
|
|
t.Notify()
|
|
}
|
|
return nt
|
|
}
|
|
|
|
// CommitOnly is used to finalize the transaction and return a new tree, but
|
|
// does not issue any notifications until Notify is called.
|
|
func (t *Txn) CommitOnly() *Tree {
|
|
nt := &Tree{t.root, t.size}
|
|
t.writable = nil
|
|
return nt
|
|
}
|
|
|
|
// slowNotify does a complete comparison of the before and after trees in order
|
|
// to trigger notifications. This doesn't require any additional state but it
|
|
// is very expensive to compute.
|
|
func (t *Txn) slowNotify() {
|
|
snapIter := t.snap.rawIterator()
|
|
rootIter := t.root.rawIterator()
|
|
for snapIter.Front() != nil || rootIter.Front() != nil {
|
|
// If we've exhausted the nodes in the old snapshot, we know
|
|
// there's nothing remaining to notify.
|
|
if snapIter.Front() == nil {
|
|
return
|
|
}
|
|
snapElem := snapIter.Front()
|
|
|
|
// If we've exhausted the nodes in the new root, we know we need
|
|
// to invalidate everything that remains in the old snapshot. We
|
|
// know from the loop condition there's something in the old
|
|
// snapshot.
|
|
if rootIter.Front() == nil {
|
|
close(snapElem.mutateCh)
|
|
if snapElem.isLeaf() {
|
|
close(snapElem.leaf.mutateCh)
|
|
}
|
|
snapIter.Next()
|
|
continue
|
|
}
|
|
|
|
// Do one string compare so we can check the various conditions
|
|
// below without repeating the compare.
|
|
cmp := strings.Compare(snapIter.Path(), rootIter.Path())
|
|
|
|
// If the snapshot is behind the root, then we must have deleted
|
|
// this node during the transaction.
|
|
if cmp < 0 {
|
|
close(snapElem.mutateCh)
|
|
if snapElem.isLeaf() {
|
|
close(snapElem.leaf.mutateCh)
|
|
}
|
|
snapIter.Next()
|
|
continue
|
|
}
|
|
|
|
// If the snapshot is ahead of the root, then we must have added
|
|
// this node during the transaction.
|
|
if cmp > 0 {
|
|
rootIter.Next()
|
|
continue
|
|
}
|
|
|
|
// If we have the same path, then we need to see if we mutated a
|
|
// node and possibly the leaf.
|
|
rootElem := rootIter.Front()
|
|
if snapElem != rootElem {
|
|
close(snapElem.mutateCh)
|
|
if snapElem.leaf != nil && (snapElem.leaf != rootElem.leaf) {
|
|
close(snapElem.leaf.mutateCh)
|
|
}
|
|
}
|
|
snapIter.Next()
|
|
rootIter.Next()
|
|
}
|
|
}
|
|
|
|
// Notify is used along with TrackMutate to trigger notifications. This must
|
|
// only be done once a transaction is committed via CommitOnly, and it is called
|
|
// automatically by Commit.
|
|
func (t *Txn) Notify() {
|
|
if !t.trackMutate {
|
|
return
|
|
}
|
|
|
|
// If we've overflowed the tracking state we can't use it in any way and
|
|
// need to do a full tree compare.
|
|
if t.trackOverflow {
|
|
t.slowNotify()
|
|
} else {
|
|
for ch := range t.trackChannels {
|
|
close(ch)
|
|
}
|
|
}
|
|
|
|
// Clean up the tracking state so that a re-notify is safe (will trigger
|
|
// the else clause above which will be a no-op).
|
|
t.trackChannels = nil
|
|
t.trackOverflow = false
|
|
}
|
|
|
|
// Insert is used to add or update a given key. The return provides
|
|
// the new tree, previous value and a bool indicating if any was set.
|
|
func (t *Tree) Insert(k []byte, v interface{}) (*Tree, interface{}, bool) {
|
|
txn := t.Txn()
|
|
old, ok := txn.Insert(k, v)
|
|
return txn.Commit(), old, ok
|
|
}
|
|
|
|
// Delete is used to delete a given key. Returns the new tree,
|
|
// old value if any, and a bool indicating if the key was set.
|
|
func (t *Tree) Delete(k []byte) (*Tree, interface{}, bool) {
|
|
txn := t.Txn()
|
|
old, ok := txn.Delete(k)
|
|
return txn.Commit(), old, ok
|
|
}
|
|
|
|
// DeletePrefix is used to delete all nodes starting with a given prefix. Returns the new tree,
|
|
// and a bool indicating if the prefix matched any nodes
|
|
func (t *Tree) DeletePrefix(k []byte) (*Tree, bool) {
|
|
txn := t.Txn()
|
|
ok := txn.DeletePrefix(k)
|
|
return txn.Commit(), ok
|
|
}
|
|
|
|
// Root returns the root node of the tree which can be used for richer
|
|
// query operations.
|
|
func (t *Tree) Root() *Node {
|
|
return t.root
|
|
}
|
|
|
|
// Get is used to lookup a specific key, returning
|
|
// the value and if it was found
|
|
func (t *Tree) Get(k []byte) (interface{}, bool) {
|
|
return t.root.Get(k)
|
|
}
|
|
|
|
// longestPrefix finds the length of the shared prefix
|
|
// of two strings
|
|
func longestPrefix(k1, k2 []byte) int {
|
|
max := len(k1)
|
|
if l := len(k2); l < max {
|
|
max = l
|
|
}
|
|
var i int
|
|
for i = 0; i < max; i++ {
|
|
if k1[i] != k2[i] {
|
|
break
|
|
}
|
|
}
|
|
return i
|
|
}
|
|
|
|
// concat two byte slices, returning a third new copy
|
|
func concat(a, b []byte) []byte {
|
|
c := make([]byte, len(a)+len(b))
|
|
copy(c, a)
|
|
copy(c[len(a):], b)
|
|
return c
|
|
}
|