Merge pull request #13040 from bboreham/smaller-stripeseries

TSDB: make the global hash lookup table smaller
pull/13167/head
Bryan Boreham 1 year ago committed by GitHub
commit f13bc1a5c9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1666,26 +1666,34 @@ func (h *Head) mmapHeadChunks() {
var count int var count int
for i := 0; i < h.series.size; i++ { for i := 0; i < h.series.size; i++ {
h.series.locks[i].RLock() h.series.locks[i].RLock()
for _, all := range h.series.hashes[i] { for _, series := range h.series.series[i] {
for _, series := range all { series.Lock()
series.Lock() count += series.mmapChunks(h.chunkDiskMapper)
count += series.mmapChunks(h.chunkDiskMapper) series.Unlock()
series.Unlock()
}
} }
h.series.locks[i].RUnlock() h.series.locks[i].RUnlock()
} }
h.metrics.mmapChunksTotal.Add(float64(count)) h.metrics.mmapChunksTotal.Add(float64(count))
} }
// seriesHashmap is a simple hashmap for memSeries by their label set. It is built // seriesHashmap lets TSDB find a memSeries by its label set, via a 64-bit hash.
// on top of a regular hashmap and holds a slice of series to resolve hash collisions. // There is one map for the common case where the hash value is unique, and a
// second map for the case that two series have the same hash value.
// Each series is in only one of the maps.
// Its methods require the hash to be submitted with it to avoid re-computations throughout // Its methods require the hash to be submitted with it to avoid re-computations throughout
// the code. // the code.
type seriesHashmap map[uint64][]*memSeries type seriesHashmap struct {
unique map[uint64]*memSeries
conflicts map[uint64][]*memSeries
}
func (m seriesHashmap) get(hash uint64, lset labels.Labels) *memSeries { func (m *seriesHashmap) get(hash uint64, lset labels.Labels) *memSeries {
for _, s := range m[hash] { if s, found := m.unique[hash]; found {
if labels.Equal(s.lset, lset) {
return s
}
}
for _, s := range m.conflicts[hash] {
if labels.Equal(s.lset, lset) { if labels.Equal(s.lset, lset) {
return s return s
} }
@ -1694,27 +1702,49 @@ func (m seriesHashmap) get(hash uint64, lset labels.Labels) *memSeries {
} }
func (m seriesHashmap) set(hash uint64, s *memSeries) { func (m seriesHashmap) set(hash uint64, s *memSeries) {
l := m[hash] if existing, found := m.unique[hash]; !found || labels.Equal(existing.lset, s.lset) {
m.unique[hash] = s
return
}
if m.conflicts == nil {
m.conflicts = make(map[uint64][]*memSeries)
}
l := m.conflicts[hash]
for i, prev := range l { for i, prev := range l {
if labels.Equal(prev.lset, s.lset) { if labels.Equal(prev.lset, s.lset) {
l[i] = s l[i] = s
return return
} }
} }
m[hash] = append(l, s) m.conflicts[hash] = append(l, s)
} }
func (m seriesHashmap) del(hash uint64, lset labels.Labels) { func (m seriesHashmap) del(hash uint64, lset labels.Labels) {
var rem []*memSeries var rem []*memSeries
for _, s := range m[hash] { unique, found := m.unique[hash]
if !labels.Equal(s.lset, lset) { switch {
rem = append(rem, s) case !found:
return
case labels.Equal(unique.lset, lset):
conflicts := m.conflicts[hash]
if len(conflicts) == 0 {
delete(m.unique, hash)
return
}
rem = conflicts
default:
rem = append(rem, unique)
for _, s := range m.conflicts[hash] {
if !labels.Equal(s.lset, lset) {
rem = append(rem, s)
}
} }
} }
if len(rem) == 0 { m.unique[hash] = rem[0]
delete(m, hash) if len(rem) == 1 {
delete(m.conflicts, hash)
} else { } else {
m[hash] = rem m.conflicts[hash] = rem[1:]
} }
} }
@ -1756,7 +1786,10 @@ func newStripeSeries(stripeSize int, seriesCallback SeriesLifecycleCallback) *st
s.series[i] = map[chunks.HeadSeriesRef]*memSeries{} s.series[i] = map[chunks.HeadSeriesRef]*memSeries{}
} }
for i := range s.hashes { for i := range s.hashes {
s.hashes[i] = seriesHashmap{} s.hashes[i] = seriesHashmap{
unique: map[uint64]*memSeries{},
conflicts: nil, // Initialized on demand in set().
}
} }
return s return s
} }
@ -1776,70 +1809,72 @@ func (s *stripeSeries) gc(mint int64, minOOOMmapRef chunks.ChunkDiskMapperRef) (
deletedFromPrevStripe = 0 deletedFromPrevStripe = 0
) )
minMmapFile = math.MaxInt32 minMmapFile = math.MaxInt32
// Run through all series and truncate old chunks. Mark those with no
// chunks left as deleted and store their ID.
for i := 0; i < s.size; i++ {
deletedForCallback := make(map[chunks.HeadSeriesRef]labels.Labels, deletedFromPrevStripe)
s.locks[i].Lock()
for hash, all := range s.hashes[i] { // For one series, truncate old chunks and check if any chunks left. If not, mark as deleted and collect the ID.
for _, series := range all { check := func(hashShard int, hash uint64, series *memSeries, deletedForCallback map[chunks.HeadSeriesRef]labels.Labels) {
series.Lock() series.Lock()
rmChunks += series.truncateChunksBefore(mint, minOOOMmapRef) defer series.Unlock()
if len(series.mmappedChunks) > 0 {
seq, _ := series.mmappedChunks[0].ref.Unpack()
if seq < minMmapFile {
minMmapFile = seq
}
}
if series.ooo != nil && len(series.ooo.oooMmappedChunks) > 0 {
seq, _ := series.ooo.oooMmappedChunks[0].ref.Unpack()
if seq < minMmapFile {
minMmapFile = seq
}
for _, ch := range series.ooo.oooMmappedChunks {
if ch.minTime < minOOOTime {
minOOOTime = ch.minTime
}
}
}
if series.ooo != nil && series.ooo.oooHeadChunk != nil {
if series.ooo.oooHeadChunk.minTime < minOOOTime {
minOOOTime = series.ooo.oooHeadChunk.minTime
}
}
if len(series.mmappedChunks) > 0 || series.headChunks != nil || series.pendingCommit ||
(series.ooo != nil && (len(series.ooo.oooMmappedChunks) > 0 || series.ooo.oooHeadChunk != nil)) {
seriesMint := series.minTime()
if seriesMint < actualMint {
actualMint = seriesMint
}
series.Unlock()
continue
}
// The series is gone entirely. We need to keep the series lock rmChunks += series.truncateChunksBefore(mint, minOOOMmapRef)
// and make sure we have acquired the stripe locks for hash and ID of the
// series alike.
// If we don't hold them all, there's a very small chance that a series receives
// samples again while we are half-way into deleting it.
j := int(series.ref) & (s.size - 1)
if i != j { if len(series.mmappedChunks) > 0 {
s.locks[j].Lock() seq, _ := series.mmappedChunks[0].ref.Unpack()
if seq < minMmapFile {
minMmapFile = seq
}
}
if series.ooo != nil && len(series.ooo.oooMmappedChunks) > 0 {
seq, _ := series.ooo.oooMmappedChunks[0].ref.Unpack()
if seq < minMmapFile {
minMmapFile = seq
}
for _, ch := range series.ooo.oooMmappedChunks {
if ch.minTime < minOOOTime {
minOOOTime = ch.minTime
} }
}
}
if series.ooo != nil && series.ooo.oooHeadChunk != nil {
if series.ooo.oooHeadChunk.minTime < minOOOTime {
minOOOTime = series.ooo.oooHeadChunk.minTime
}
}
if len(series.mmappedChunks) > 0 || series.headChunks != nil || series.pendingCommit ||
(series.ooo != nil && (len(series.ooo.oooMmappedChunks) > 0 || series.ooo.oooHeadChunk != nil)) {
seriesMint := series.minTime()
if seriesMint < actualMint {
actualMint = seriesMint
}
return
}
// The series is gone entirely. We need to keep the series lock
// and make sure we have acquired the stripe locks for hash and ID of the
// series alike.
// If we don't hold them all, there's a very small chance that a series receives
// samples again while we are half-way into deleting it.
refShard := int(series.ref) & (s.size - 1)
if hashShard != refShard {
s.locks[refShard].Lock()
defer s.locks[refShard].Unlock()
}
deleted[storage.SeriesRef(series.ref)] = struct{}{} deleted[storage.SeriesRef(series.ref)] = struct{}{}
s.hashes[i].del(hash, series.lset) s.hashes[hashShard].del(hash, series.lset)
delete(s.series[j], series.ref) delete(s.series[refShard], series.ref)
deletedForCallback[series.ref] = series.lset deletedForCallback[series.ref] = series.lset
}
if i != j { // Run through all series shard by shard, checking which should be deleted.
s.locks[j].Unlock() for i := 0; i < s.size; i++ {
} deletedForCallback := make(map[chunks.HeadSeriesRef]labels.Labels, deletedFromPrevStripe)
s.locks[i].Lock()
series.Unlock() for hash, series := range s.hashes[i].unique {
check(i, hash, series, deletedForCallback)
}
for hash, all := range s.hashes[i].conflicts {
for _, series := range all {
check(i, hash, series, deletedForCallback)
} }
} }

Loading…
Cancel
Save