2020-03-19 16:33:44 +00:00
// Copyright 2020 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package chunks
import (
"bufio"
"bytes"
"encoding/binary"
2023-11-08 09:02:59 +00:00
"errors"
"fmt"
2020-03-19 16:33:44 +00:00
"hash"
"io"
"os"
"path/filepath"
"strconv"
"sync"
2022-01-10 13:36:45 +00:00
"github.com/dennwc/varint"
"github.com/prometheus/client_golang/prometheus"
2020-10-22 09:00:08 +00:00
"go.uber.org/atomic"
2022-09-30 14:33:56 +00:00
"golang.org/x/exp/slices"
2020-10-22 09:00:08 +00:00
2020-03-19 16:33:44 +00:00
"github.com/prometheus/prometheus/tsdb/chunkenc"
tsdb_errors "github.com/prometheus/prometheus/tsdb/errors"
"github.com/prometheus/prometheus/tsdb/fileutil"
)
// Head chunk file header fields constants.
const (
// MagicHeadChunks is 4 bytes at the beginning of a head chunk file.
MagicHeadChunks = 0x0130BC91
headChunksFormatV1 = 1
)
2021-10-22 08:06:44 +00:00
// ErrChunkDiskMapperClosed returned by any method indicates
// that the ChunkDiskMapper was closed.
var ErrChunkDiskMapperClosed = errors . New ( "ChunkDiskMapper closed" )
2020-03-19 16:33:44 +00:00
const (
// MintMaxtSize is the size of the mint/maxt for head chunk file and chunks.
MintMaxtSize = 8
// SeriesRefSize is the size of series reference on disk.
SeriesRefSize = 8
2021-11-17 10:21:27 +00:00
// HeadChunkFileHeaderSize is the total size of the header for a head chunk file.
2020-03-19 16:33:44 +00:00
HeadChunkFileHeaderSize = SegmentHeaderSize
// MaxHeadChunkFileSize is the max size of a head chunk file.
2020-05-29 14:38:41 +00:00
MaxHeadChunkFileSize = 128 * 1024 * 1024 // 128 MiB.
2020-03-19 16:33:44 +00:00
// CRCSize is the size of crc32 sum on disk.
CRCSize = 4
// MaxHeadChunkMetaSize is the max size of an mmapped chunks minus the chunks data.
// Max because the uvarint size can be smaller.
2022-01-19 14:50:35 +00:00
MaxHeadChunkMetaSize = SeriesRefSize + 2 * MintMaxtSize + ChunkEncodingSize + MaxChunkLengthFieldSize + CRCSize
2020-11-19 13:00:47 +00:00
// MinWriteBufferSize is the minimum write buffer size allowed.
MinWriteBufferSize = 64 * 1024 // 64KB.
// MaxWriteBufferSize is the maximum write buffer size allowed.
MaxWriteBufferSize = 8 * 1024 * 1024 // 8 MiB.
// DefaultWriteBufferSize is the default write buffer size.
DefaultWriteBufferSize = 4 * 1024 * 1024 // 4 MiB.
2022-01-10 13:36:45 +00:00
// DefaultWriteQueueSize is the default size of the in-memory queue used before flushing chunks to the disk.
2022-03-11 16:26:59 +00:00
// A value of 0 completely disables this feature.
DefaultWriteQueueSize = 0
2020-03-19 16:33:44 +00:00
)
2021-10-13 12:14:32 +00:00
// ChunkDiskMapperRef represents the location of a head chunk on disk.
// The upper 4 bytes hold the index of the head chunk file and
// the lower 4 bytes hold the byte offset in the head chunk file where the chunk starts.
type ChunkDiskMapperRef uint64
func newChunkDiskMapperRef ( seq , offset uint64 ) ChunkDiskMapperRef {
return ChunkDiskMapperRef ( ( seq << 32 ) | offset )
}
2022-01-10 13:36:45 +00:00
func ( ref ChunkDiskMapperRef ) Unpack ( ) ( seq , offset int ) {
seq = int ( ref >> 32 )
offset = int ( ( ref << 32 ) >> 32 )
return seq , offset
2021-10-13 12:14:32 +00:00
}
2022-09-20 17:05:50 +00:00
func ( ref ChunkDiskMapperRef ) GreaterThanOrEqualTo ( r ChunkDiskMapperRef ) bool {
s1 , o1 := ref . Unpack ( )
s2 , o2 := r . Unpack ( )
return s1 > s2 || ( s1 == s2 && o1 >= o2 )
}
func ( ref ChunkDiskMapperRef ) GreaterThan ( r ChunkDiskMapperRef ) bool {
s1 , o1 := ref . Unpack ( )
s2 , o2 := r . Unpack ( )
return s1 > s2 || ( s1 == s2 && o1 > o2 )
}
2021-01-06 07:54:35 +00:00
// CorruptionErr is an error that's returned when corruption is encountered.
2020-05-06 15:30:00 +00:00
type CorruptionErr struct {
2020-03-19 16:33:44 +00:00
Dir string
FileIndex int
Err error
}
2020-05-06 15:30:00 +00:00
func ( e * CorruptionErr ) Error ( ) string {
2023-11-08 09:02:59 +00:00
return fmt . Errorf ( "corruption in head chunk file %s: %w" , segmentFile ( e . Dir , e . FileIndex ) , e . Err ) . Error ( )
2020-03-19 16:33:44 +00:00
}
2023-11-16 18:54:41 +00:00
func ( e * CorruptionErr ) Unwrap ( ) error {
return e . Err
}
2022-01-10 13:36:45 +00:00
// chunkPos keeps track of the position in the head chunk files.
// chunkPos is not thread-safe, a lock must be used to protect it.
type chunkPos struct {
seq uint64 // Index of chunk file.
offset uint64 // Offset within chunk file.
cutFile bool // When true then the next chunk will be written to a new file.
}
// getNextChunkRef takes a chunk and returns the chunk reference which will refer to it once it has been written.
// getNextChunkRef also decides whether a new file should be cut before writing this chunk, and it returns the decision via the second return value.
// The order of calling getNextChunkRef must be the order in which chunks are written to the disk.
func ( f * chunkPos ) getNextChunkRef ( chk chunkenc . Chunk ) ( chkRef ChunkDiskMapperRef , cutFile bool ) {
chkLen := uint64 ( len ( chk . Bytes ( ) ) )
bytesToWrite := f . bytesToWriteForChunk ( chkLen )
2022-01-19 14:50:35 +00:00
if f . shouldCutNewFile ( bytesToWrite ) {
2022-01-10 13:36:45 +00:00
f . toNewFile ( )
f . cutFile = false
cutFile = true
}
chkOffset := f . offset
f . offset += bytesToWrite
return newChunkDiskMapperRef ( f . seq , chkOffset ) , cutFile
}
// toNewFile updates the seq/offset position to point to the beginning of a new chunk file.
func ( f * chunkPos ) toNewFile ( ) {
f . seq ++
f . offset = SegmentHeaderSize
}
// cutFileOnNextChunk triggers that the next chunk will be written in to a new file.
// Not thread safe, a lock must be held when calling this.
func ( f * chunkPos ) cutFileOnNextChunk ( ) {
f . cutFile = true
}
2022-02-22 15:05:15 +00:00
// setSeq sets the sequence number of the head chunk file.
func ( f * chunkPos ) setSeq ( seq uint64 ) {
2022-01-10 13:36:45 +00:00
f . seq = seq
}
// shouldCutNewFile returns whether a new file should be cut based on the file size.
2022-01-19 14:50:35 +00:00
// Not thread safe, a lock must be held when calling this.
func ( f * chunkPos ) shouldCutNewFile ( bytesToWrite uint64 ) bool {
2022-01-10 13:36:45 +00:00
if f . cutFile {
return true
}
return f . offset == 0 || // First head chunk file.
2022-01-19 14:50:35 +00:00
f . offset + bytesToWrite > MaxHeadChunkFileSize // Exceeds the max head chunk file size.
2022-01-10 13:36:45 +00:00
}
// bytesToWriteForChunk returns the number of bytes that will need to be written for the given chunk size,
// including all meta data before and after the chunk data.
// Head chunk format: https://github.com/prometheus/prometheus/blob/main/tsdb/docs/format/head_chunks.md#chunk
func ( f * chunkPos ) bytesToWriteForChunk ( chkLen uint64 ) uint64 {
// Headers.
bytes := uint64 ( SeriesRefSize ) + 2 * MintMaxtSize + ChunkEncodingSize
// Size of chunk length encoded as uvarint.
bytes += uint64 ( varint . UvarintSize ( chkLen ) )
// Chunk length.
bytes += chkLen
// crc32.
bytes += CRCSize
return bytes
}
2020-03-19 16:33:44 +00:00
// ChunkDiskMapper is for writing the Head block chunks to the disk
// and access chunks via mmapped file.
type ChunkDiskMapper struct {
/// Writer.
2020-11-19 13:00:47 +00:00
dir * os . File
writeBufferSize int
2020-03-19 16:33:44 +00:00
2022-01-10 13:36:45 +00:00
curFile * os . File // File being written to.
2022-02-22 15:05:15 +00:00
curFileSequence int // Index of current open file being appended to. 0 if no file is active.
2022-01-10 13:36:45 +00:00
curFileOffset atomic . Uint64 // Bytes written in current open file.
curFileMaxt int64 // Used for the size retention.
// The values in evtlPos represent the file position which will eventually be
// reached once the content of the write queue has been fully processed.
evtlPosMtx sync . Mutex
evtlPos chunkPos
2020-03-19 16:33:44 +00:00
byteBuf [ MaxHeadChunkMetaSize ] byte // Buffer used to write the header of the chunk.
chkWriter * bufio . Writer // Writer for the current open file.
crc32 hash . Hash
2020-05-29 14:38:41 +00:00
writePathMtx sync . Mutex
2020-03-19 16:33:44 +00:00
/// Reader.
// The int key in the map is the file number on the disk.
mmappedChunkFiles map [ int ] * mmappedChunkFile // Contains the m-mapped files for each chunk file mapped with its index.
closers map [ int ] io . Closer // Closers for resources behind the byte slices.
readPathMtx sync . RWMutex // Mutex used to protect the above 2 maps.
pool chunkenc . Pool // This is used when fetching a chunk from the disk to allocate a chunk.
// Writer and Reader.
// We flush chunks to disk in batches. Hence, we store them in this buffer
// from which chunks are served till they are flushed and are ready for m-mapping.
chunkBuffer * chunkBuffer
2021-11-17 10:21:27 +00:00
// Whether the maxt field is set for all mmapped chunk files tracked within the mmappedChunkFiles map.
// This is done after iterating through all the chunks in those files using the IterateAllChunks method.
2020-03-19 16:33:44 +00:00
fileMaxtSet bool
2022-01-10 13:36:45 +00:00
writeQueue * chunkWriteQueue
2020-03-19 16:33:44 +00:00
closed bool
}
2021-11-17 10:21:27 +00:00
// mmappedChunkFile provides mmapp access to an entire head chunks file that holds many chunks.
2020-03-19 16:33:44 +00:00
type mmappedChunkFile struct {
byteSlice ByteSlice
2021-11-17 10:21:27 +00:00
maxt int64 // Max timestamp among all of this file's chunks.
2020-03-19 16:33:44 +00:00
}
2021-11-17 10:21:27 +00:00
// NewChunkDiskMapper returns a new ChunkDiskMapper against the given directory
2020-03-19 16:33:44 +00:00
// using the default head chunk file duration.
// NOTE: 'IterateAllChunks' method needs to be called at least once after creating ChunkDiskMapper
// to set the maxt of all the file.
2022-01-10 13:36:45 +00:00
func NewChunkDiskMapper ( reg prometheus . Registerer , dir string , pool chunkenc . Pool , writeBufferSize , writeQueueSize int ) ( * ChunkDiskMapper , error ) {
2020-11-19 13:00:47 +00:00
// Validate write buffer size.
if writeBufferSize < MinWriteBufferSize || writeBufferSize > MaxWriteBufferSize {
2023-11-08 09:02:59 +00:00
return nil , fmt . Errorf ( "ChunkDiskMapper write buffer size should be between %d and %d (actual: %d)" , MinWriteBufferSize , MaxWriteBufferSize , writeBufferSize )
2020-11-19 13:00:47 +00:00
}
if writeBufferSize % 1024 != 0 {
2023-11-08 09:02:59 +00:00
return nil , fmt . Errorf ( "ChunkDiskMapper write buffer size should be a multiple of 1024 (actual: %d)" , writeBufferSize )
2020-11-19 13:00:47 +00:00
}
2021-10-22 08:06:44 +00:00
if err := os . MkdirAll ( dir , 0 o777 ) ; err != nil {
2020-03-19 16:33:44 +00:00
return nil , err
}
dirFile , err := fileutil . OpenDir ( dir )
if err != nil {
return nil , err
}
m := & ChunkDiskMapper {
2020-11-19 13:00:47 +00:00
dir : dirFile ,
pool : pool ,
writeBufferSize : writeBufferSize ,
crc32 : newCRC32 ( ) ,
chunkBuffer : newChunkBuffer ( ) ,
2020-03-19 16:33:44 +00:00
}
2022-03-11 16:26:59 +00:00
if writeQueueSize > 0 {
m . writeQueue = newChunkWriteQueue ( reg , writeQueueSize , m . writeChunk )
}
2020-03-19 16:33:44 +00:00
if m . pool == nil {
m . pool = chunkenc . NewPool ( )
}
return m , m . openMMapFiles ( )
}
2023-03-16 10:23:47 +00:00
// Chunk encodings for out-of-order chunks.
// These encodings must be only used by the Head block for its internal bookkeeping.
const (
OutOfOrderMask = uint8 ( 0b10000000 )
)
func ( cdm * ChunkDiskMapper ) ApplyOutOfOrderMask ( sourceEncoding chunkenc . Encoding ) chunkenc . Encoding {
enc := uint8 ( sourceEncoding ) | OutOfOrderMask
return chunkenc . Encoding ( enc )
}
func ( cdm * ChunkDiskMapper ) IsOutOfOrderChunk ( e chunkenc . Encoding ) bool {
return ( uint8 ( e ) & OutOfOrderMask ) != 0
}
func ( cdm * ChunkDiskMapper ) RemoveMasks ( sourceEncoding chunkenc . Encoding ) chunkenc . Encoding {
restored := uint8 ( sourceEncoding ) & ( ^ OutOfOrderMask )
return chunkenc . Encoding ( restored )
}
2021-11-17 10:21:27 +00:00
// openMMapFiles opens all files within dir for mmapping.
2020-03-19 16:33:44 +00:00
func ( cdm * ChunkDiskMapper ) openMMapFiles ( ) ( returnErr error ) {
cdm . mmappedChunkFiles = map [ int ] * mmappedChunkFile { }
cdm . closers = map [ int ] io . Closer { }
defer func ( ) {
if returnErr != nil {
2020-10-28 15:24:58 +00:00
returnErr = tsdb_errors . NewMulti ( returnErr , closeAllFromMap ( cdm . closers ) ) . Err ( )
2020-03-19 16:33:44 +00:00
cdm . mmappedChunkFiles = nil
cdm . closers = nil
}
} ( )
files , err := listChunkFiles ( cdm . dir . Name ( ) )
if err != nil {
return err
}
2020-10-21 12:57:13 +00:00
files , err = repairLastChunkFile ( files )
if err != nil {
return err
}
2020-03-19 16:33:44 +00:00
chkFileIndices := make ( [ ] int , 0 , len ( files ) )
for seq , fn := range files {
f , err := fileutil . OpenMmapFile ( fn )
if err != nil {
2023-11-08 09:02:59 +00:00
return fmt . Errorf ( "mmap files, file: %s: %w" , fn , err )
2020-03-19 16:33:44 +00:00
}
cdm . closers [ seq ] = f
cdm . mmappedChunkFiles [ seq ] = & mmappedChunkFile { byteSlice : realByteSlice ( f . Bytes ( ) ) }
chkFileIndices = append ( chkFileIndices , seq )
}
// Check for gaps in the files.
2022-09-30 14:33:56 +00:00
slices . Sort ( chkFileIndices )
2020-03-19 16:33:44 +00:00
if len ( chkFileIndices ) == 0 {
return nil
}
lastSeq := chkFileIndices [ 0 ]
for _ , seq := range chkFileIndices [ 1 : ] {
if seq != lastSeq + 1 {
2023-11-08 09:02:59 +00:00
return fmt . Errorf ( "found unsequential head chunk files %s (index: %d) and %s (index: %d)" , files [ lastSeq ] , lastSeq , files [ seq ] , seq )
2020-03-19 16:33:44 +00:00
}
lastSeq = seq
}
for i , b := range cdm . mmappedChunkFiles {
if b . byteSlice . Len ( ) < HeadChunkFileHeaderSize {
2023-11-08 09:02:59 +00:00
return fmt . Errorf ( "%s: invalid head chunk file header: %w" , files [ i ] , errInvalidSize )
2020-03-19 16:33:44 +00:00
}
// Verify magic number.
if m := binary . BigEndian . Uint32 ( b . byteSlice . Range ( 0 , MagicChunksSize ) ) ; m != MagicHeadChunks {
2023-11-08 09:02:59 +00:00
return fmt . Errorf ( "%s: invalid magic number %x" , files [ i ] , m )
2020-03-19 16:33:44 +00:00
}
// Verify chunk format version.
if v := int ( b . byteSlice . Range ( MagicChunksSize , MagicChunksSize + ChunksFormatVersionSize ) [ 0 ] ) ; v != chunksFormatV1 {
2023-11-08 09:02:59 +00:00
return fmt . Errorf ( "%s: invalid chunk format version %d" , files [ i ] , v )
2020-03-19 16:33:44 +00:00
}
}
2022-02-22 15:05:15 +00:00
cdm . evtlPos . setSeq ( uint64 ( lastSeq ) )
2022-01-10 13:36:45 +00:00
2020-03-19 16:33:44 +00:00
return nil
}
func listChunkFiles ( dir string ) ( map [ int ] string , error ) {
2022-04-27 09:24:36 +00:00
files , err := os . ReadDir ( dir )
2020-03-19 16:33:44 +00:00
if err != nil {
return nil , err
}
res := map [ int ] string { }
for _ , fi := range files {
seq , err := strconv . ParseUint ( fi . Name ( ) , 10 , 64 )
if err != nil {
continue
}
res [ int ( seq ) ] = filepath . Join ( dir , fi . Name ( ) )
}
2020-10-21 12:57:13 +00:00
2020-03-19 16:33:44 +00:00
return res , nil
}
2020-10-21 12:57:13 +00:00
// repairLastChunkFile deletes the last file if it's empty.
2021-11-17 10:21:27 +00:00
// Because we don't fsync when creating these files, we could end
2020-10-21 12:57:13 +00:00
// up with an empty file at the end during an abrupt shutdown.
func repairLastChunkFile ( files map [ int ] string ) ( _ map [ int ] string , returnErr error ) {
lastFile := - 1
for seq := range files {
if seq > lastFile {
lastFile = seq
}
}
if lastFile <= 0 {
return files , nil
}
2022-09-28 16:13:58 +00:00
f , err := os . Open ( files [ lastFile ] )
2020-10-21 12:57:13 +00:00
if err != nil {
2023-11-08 09:02:59 +00:00
return files , fmt . Errorf ( "open file during last head chunk file repair: %w" , err )
2020-10-21 12:57:13 +00:00
}
2022-09-28 16:13:58 +00:00
buf := make ( [ ] byte , MagicChunksSize )
size , err := f . Read ( buf )
2023-11-16 18:54:41 +00:00
if err != nil && ! errors . Is ( err , io . EOF ) {
2023-11-08 09:02:59 +00:00
return files , fmt . Errorf ( "failed to read magic number during last head chunk file repair: %w" , err )
2022-09-28 16:13:58 +00:00
}
if err := f . Close ( ) ; err != nil {
2023-11-08 09:02:59 +00:00
return files , fmt . Errorf ( "close file during last head chunk file repair: %w" , err )
2022-09-28 16:13:58 +00:00
}
// We either don't have enough bytes for the magic number or the magic number is 0.
// NOTE: we should not check for wrong magic number here because that error
// needs to be sent up the function called (already done elsewhere)
// for proper repair mechanism to happen in the Head.
if size < MagicChunksSize || binary . BigEndian . Uint32 ( buf ) == 0 {
2020-10-21 12:57:13 +00:00
// Corrupt file, hence remove it.
if err := os . RemoveAll ( files [ lastFile ] ) ; err != nil {
2023-11-08 09:02:59 +00:00
return files , fmt . Errorf ( "delete corrupted, empty head chunk file during last file repair: %w" , err )
2020-10-21 12:57:13 +00:00
}
delete ( files , lastFile )
}
return files , nil
}
2020-03-19 16:33:44 +00:00
// WriteChunk writes the chunk to the disk.
// The returned chunk ref is the reference from where the chunk encoding starts for the chunk.
2023-03-16 10:23:47 +00:00
func ( cdm * ChunkDiskMapper ) WriteChunk ( seriesRef HeadSeriesRef , mint , maxt int64 , chk chunkenc . Chunk , isOOO bool , callback func ( err error ) ) ( chkRef ChunkDiskMapperRef ) {
2022-03-11 16:26:59 +00:00
// cdm.evtlPosMtx must be held to serialize the calls to cdm.evtlPos.getNextChunkRef() and the writing of the chunk (either with or without queue).
2022-01-10 13:36:45 +00:00
cdm . evtlPosMtx . Lock ( )
defer cdm . evtlPosMtx . Unlock ( )
ref , cutFile := cdm . evtlPos . getNextChunkRef ( chk )
2022-03-11 16:26:59 +00:00
if cdm . writeQueue != nil {
2023-03-16 10:23:47 +00:00
return cdm . writeChunkViaQueue ( ref , isOOO , cutFile , seriesRef , mint , maxt , chk , callback )
2022-03-11 16:26:59 +00:00
}
2023-03-16 10:23:47 +00:00
err := cdm . writeChunk ( seriesRef , mint , maxt , chk , ref , isOOO , cutFile )
2022-03-11 16:26:59 +00:00
if callback != nil {
callback ( err )
}
return ref
}
2023-03-16 10:23:47 +00:00
func ( cdm * ChunkDiskMapper ) writeChunkViaQueue ( ref ChunkDiskMapperRef , isOOO , cutFile bool , seriesRef HeadSeriesRef , mint , maxt int64 , chk chunkenc . Chunk , callback func ( err error ) ) ( chkRef ChunkDiskMapperRef ) {
2022-03-11 16:26:59 +00:00
var err error
if callback != nil {
defer func ( ) {
if err != nil {
callback ( err )
}
} ( )
}
2022-01-10 13:36:45 +00:00
err = cdm . writeQueue . addJob ( chunkWriteJob {
cutFile : cutFile ,
seriesRef : seriesRef ,
mint : mint ,
maxt : maxt ,
chk : chk ,
ref : ref ,
2023-03-16 10:23:47 +00:00
isOOO : isOOO ,
2022-01-10 13:36:45 +00:00
callback : callback ,
} )
return ref
}
2023-03-16 10:23:47 +00:00
func ( cdm * ChunkDiskMapper ) writeChunk ( seriesRef HeadSeriesRef , mint , maxt int64 , chk chunkenc . Chunk , ref ChunkDiskMapperRef , isOOO , cutFile bool ) ( err error ) {
2020-03-19 16:33:44 +00:00
cdm . writePathMtx . Lock ( )
defer cdm . writePathMtx . Unlock ( )
if cdm . closed {
2022-01-10 13:36:45 +00:00
return ErrChunkDiskMapperClosed
2020-03-19 16:33:44 +00:00
}
2022-01-10 13:36:45 +00:00
if cutFile {
err := cdm . cutAndExpectRef ( ref )
if err != nil {
return err
2020-03-19 16:33:44 +00:00
}
}
// if len(chk.Bytes())+MaxHeadChunkMetaSize >= writeBufferSize, it means that chunk >= the buffer size;
// so no need to flush here, as we have to flush at the end (to not keep partial chunks in buffer).
2020-11-19 13:00:47 +00:00
if len ( chk . Bytes ( ) ) + MaxHeadChunkMetaSize < cdm . writeBufferSize && cdm . chkWriter . Available ( ) < MaxHeadChunkMetaSize + len ( chk . Bytes ( ) ) {
2020-03-19 16:33:44 +00:00
if err := cdm . flushBuffer ( ) ; err != nil {
2022-01-10 13:36:45 +00:00
return err
2020-03-19 16:33:44 +00:00
}
}
cdm . crc32 . Reset ( )
bytesWritten := 0
2021-11-06 10:10:04 +00:00
binary . BigEndian . PutUint64 ( cdm . byteBuf [ bytesWritten : ] , uint64 ( seriesRef ) )
2020-03-19 16:33:44 +00:00
bytesWritten += SeriesRefSize
binary . BigEndian . PutUint64 ( cdm . byteBuf [ bytesWritten : ] , uint64 ( mint ) )
bytesWritten += MintMaxtSize
binary . BigEndian . PutUint64 ( cdm . byteBuf [ bytesWritten : ] , uint64 ( maxt ) )
bytesWritten += MintMaxtSize
2023-03-16 10:23:47 +00:00
enc := chk . Encoding ( )
if isOOO {
enc = cdm . ApplyOutOfOrderMask ( enc )
}
cdm . byteBuf [ bytesWritten ] = byte ( enc )
2020-03-19 16:33:44 +00:00
bytesWritten += ChunkEncodingSize
n := binary . PutUvarint ( cdm . byteBuf [ bytesWritten : ] , uint64 ( len ( chk . Bytes ( ) ) ) )
bytesWritten += n
if err := cdm . writeAndAppendToCRC32 ( cdm . byteBuf [ : bytesWritten ] ) ; err != nil {
2022-01-10 13:36:45 +00:00
return err
2020-03-19 16:33:44 +00:00
}
if err := cdm . writeAndAppendToCRC32 ( chk . Bytes ( ) ) ; err != nil {
2022-01-10 13:36:45 +00:00
return err
2020-03-19 16:33:44 +00:00
}
if err := cdm . writeCRC32 ( ) ; err != nil {
2022-01-10 13:36:45 +00:00
return err
2020-03-19 16:33:44 +00:00
}
if maxt > cdm . curFileMaxt {
cdm . curFileMaxt = maxt
}
2022-01-10 13:36:45 +00:00
cdm . chunkBuffer . put ( ref , chk )
2020-03-19 16:33:44 +00:00
2020-11-19 13:00:47 +00:00
if len ( chk . Bytes ( ) ) + MaxHeadChunkMetaSize >= cdm . writeBufferSize {
2020-03-19 16:33:44 +00:00
// The chunk was bigger than the buffer itself.
// Flushing to not keep partial chunks in buffer.
if err := cdm . flushBuffer ( ) ; err != nil {
2022-01-10 13:36:45 +00:00
return err
2020-03-19 16:33:44 +00:00
}
}
2022-01-10 13:36:45 +00:00
return nil
2020-03-19 16:33:44 +00:00
}
2022-01-10 13:36:45 +00:00
// CutNewFile makes that a new file will be created the next time a chunk is written.
func ( cdm * ChunkDiskMapper ) CutNewFile ( ) {
cdm . evtlPosMtx . Lock ( )
defer cdm . evtlPosMtx . Unlock ( )
cdm . evtlPos . cutFileOnNextChunk ( )
2020-03-19 16:33:44 +00:00
}
2022-01-11 11:54:03 +00:00
func ( cdm * ChunkDiskMapper ) IsQueueEmpty ( ) bool {
2022-03-11 16:26:59 +00:00
if cdm . writeQueue == nil {
return true
}
2022-01-11 11:54:03 +00:00
return cdm . writeQueue . queueIsEmpty ( )
}
2022-01-10 13:36:45 +00:00
// cutAndExpectRef creates a new m-mapped file.
// The write lock should be held before calling this.
// It ensures that the position in the new file matches the given chunk reference, if not then it errors.
func ( cdm * ChunkDiskMapper ) cutAndExpectRef ( chkRef ChunkDiskMapperRef ) ( err error ) {
seq , offset , err := cdm . cut ( )
if err != nil {
return err
}
if expSeq , expOffset := chkRef . Unpack ( ) ; seq != expSeq || offset != expOffset {
2023-11-08 09:02:59 +00:00
return fmt . Errorf ( "expected newly cut file to have sequence:offset %d:%d, got %d:%d" , expSeq , expOffset , seq , offset )
2022-01-10 13:36:45 +00:00
}
2020-05-29 14:38:41 +00:00
2022-01-10 13:36:45 +00:00
return nil
2020-05-29 14:38:41 +00:00
}
// cut creates a new m-mapped file. The write lock should be held before calling this.
2022-01-10 13:36:45 +00:00
// It returns the file sequence and the offset in that file to start writing chunks.
func ( cdm * ChunkDiskMapper ) cut ( ) ( seq , offset int , returnErr error ) {
2020-03-19 16:33:44 +00:00
// Sync current tail to disk and close.
if err := cdm . finalizeCurFile ( ) ; err != nil {
2022-01-10 13:36:45 +00:00
return 0 , 0 , err
2020-03-19 16:33:44 +00:00
}
2022-01-10 13:36:45 +00:00
offset , newFile , seq , err := cutSegmentFile ( cdm . dir , MagicHeadChunks , headChunksFormatV1 , HeadChunkFilePreallocationSize )
2020-03-19 16:33:44 +00:00
if err != nil {
2022-01-10 13:36:45 +00:00
return 0 , 0 , err
2020-03-19 16:33:44 +00:00
}
2022-01-10 13:36:45 +00:00
2020-03-19 16:33:44 +00:00
defer func ( ) {
// The file should not be closed if there is no error,
// its kept open in the ChunkDiskMapper.
if returnErr != nil {
2020-10-28 15:24:58 +00:00
returnErr = tsdb_errors . NewMulti ( returnErr , newFile . Close ( ) ) . Err ( )
2020-03-19 16:33:44 +00:00
}
} ( )
2022-01-10 13:36:45 +00:00
cdm . curFileOffset . Store ( uint64 ( offset ) )
2020-03-19 16:33:44 +00:00
if cdm . curFile != nil {
cdm . readPathMtx . Lock ( )
cdm . mmappedChunkFiles [ cdm . curFileSequence ] . maxt = cdm . curFileMaxt
cdm . readPathMtx . Unlock ( )
}
2021-07-29 12:56:57 +00:00
mmapFile , err := fileutil . OpenMmapFileWithSize ( newFile . Name ( ) , MaxHeadChunkFileSize )
2020-03-19 16:33:44 +00:00
if err != nil {
2022-01-10 13:36:45 +00:00
return 0 , 0 , err
2020-03-19 16:33:44 +00:00
}
2020-08-26 08:52:48 +00:00
cdm . readPathMtx . Lock ( )
2020-03-19 16:33:44 +00:00
cdm . curFileSequence = seq
cdm . curFile = newFile
if cdm . chkWriter != nil {
cdm . chkWriter . Reset ( newFile )
} else {
2020-11-19 13:00:47 +00:00
cdm . chkWriter = bufio . NewWriterSize ( newFile , cdm . writeBufferSize )
2020-03-19 16:33:44 +00:00
}
cdm . closers [ cdm . curFileSequence ] = mmapFile
cdm . mmappedChunkFiles [ cdm . curFileSequence ] = & mmappedChunkFile { byteSlice : realByteSlice ( mmapFile . Bytes ( ) ) }
cdm . readPathMtx . Unlock ( )
cdm . curFileMaxt = 0
2022-01-10 13:36:45 +00:00
return seq , offset , nil
2020-03-19 16:33:44 +00:00
}
// finalizeCurFile writes all pending data to the current tail file,
// truncates its size, and closes it.
func ( cdm * ChunkDiskMapper ) finalizeCurFile ( ) error {
if cdm . curFile == nil {
return nil
}
if err := cdm . flushBuffer ( ) ; err != nil {
return err
}
if err := cdm . curFile . Sync ( ) ; err != nil {
return err
}
return cdm . curFile . Close ( )
}
func ( cdm * ChunkDiskMapper ) write ( b [ ] byte ) error {
n , err := cdm . chkWriter . Write ( b )
2022-01-10 13:36:45 +00:00
cdm . curFileOffset . Add ( uint64 ( n ) )
2020-03-19 16:33:44 +00:00
return err
}
func ( cdm * ChunkDiskMapper ) writeAndAppendToCRC32 ( b [ ] byte ) error {
if err := cdm . write ( b ) ; err != nil {
return err
}
_ , err := cdm . crc32 . Write ( b )
return err
}
func ( cdm * ChunkDiskMapper ) writeCRC32 ( ) error {
return cdm . write ( cdm . crc32 . Sum ( cdm . byteBuf [ : 0 ] ) )
}
// flushBuffer flushes the current in-memory chunks.
// Assumes that writePathMtx is _write_ locked before calling this method.
func ( cdm * ChunkDiskMapper ) flushBuffer ( ) error {
if err := cdm . chkWriter . Flush ( ) ; err != nil {
return err
}
cdm . chunkBuffer . clear ( )
return nil
}
// Chunk returns a chunk from a given reference.
2021-10-13 12:14:32 +00:00
func ( cdm * ChunkDiskMapper ) Chunk ( ref ChunkDiskMapperRef ) ( chunkenc . Chunk , error ) {
2020-03-19 16:33:44 +00:00
cdm . readPathMtx . RLock ( )
2021-11-17 10:21:27 +00:00
// We hold this read lock for the entire duration because if Close()
2020-03-19 16:33:44 +00:00
// is called, the data in the byte slice will get corrupted as the mmapped
// file will be closed.
defer cdm . readPathMtx . RUnlock ( )
if cdm . closed {
return nil , ErrChunkDiskMapperClosed
}
2022-03-11 16:26:59 +00:00
if cdm . writeQueue != nil {
chunk := cdm . writeQueue . get ( ref )
if chunk != nil {
return chunk , nil
}
2022-01-10 13:36:45 +00:00
}
2021-10-13 12:14:32 +00:00
sgmIndex , chkStart := ref . Unpack ( )
// We skip the series ref and the mint/maxt beforehand.
chkStart += SeriesRefSize + ( 2 * MintMaxtSize )
chkCRC32 := newCRC32 ( )
2020-03-19 16:33:44 +00:00
// If it is the current open file, then the chunks can be in the buffer too.
if sgmIndex == cdm . curFileSequence {
chunk := cdm . chunkBuffer . get ( ref )
if chunk != nil {
return chunk , nil
}
}
mmapFile , ok := cdm . mmappedChunkFiles [ sgmIndex ]
if ! ok {
if sgmIndex > cdm . curFileSequence {
2020-05-22 09:03:23 +00:00
return nil , & CorruptionErr {
Dir : cdm . dir . Name ( ) ,
FileIndex : - 1 ,
2023-11-08 09:02:59 +00:00
Err : fmt . Errorf ( "head chunk file index %d more than current open file" , sgmIndex ) ,
2020-05-22 09:03:23 +00:00
}
}
return nil , & CorruptionErr {
Dir : cdm . dir . Name ( ) ,
FileIndex : sgmIndex ,
2023-11-08 09:02:59 +00:00
Err : fmt . Errorf ( "head chunk file index %d does not exist on disk" , sgmIndex ) ,
2020-03-19 16:33:44 +00:00
}
}
if chkStart + MaxChunkLengthFieldSize > mmapFile . byteSlice . Len ( ) {
2020-05-22 09:03:23 +00:00
return nil , & CorruptionErr {
Dir : cdm . dir . Name ( ) ,
FileIndex : sgmIndex ,
2023-11-08 09:02:59 +00:00
Err : fmt . Errorf ( "head chunk file doesn't include enough bytes to read the chunk size data field - required:%v, available:%v" , chkStart + MaxChunkLengthFieldSize , mmapFile . byteSlice . Len ( ) ) ,
2020-05-22 09:03:23 +00:00
}
2020-03-19 16:33:44 +00:00
}
// Encoding.
chkEnc := mmapFile . byteSlice . Range ( chkStart , chkStart + ChunkEncodingSize ) [ 0 ]
2023-03-16 10:23:47 +00:00
sourceChkEnc := chunkenc . Encoding ( chkEnc )
// Extract the encoding from the byte. ChunkDiskMapper uses only the last 7 bits for the encoding.
chkEnc = byte ( cdm . RemoveMasks ( sourceChkEnc ) )
2020-03-19 16:33:44 +00:00
// Data length.
// With the minimum chunk length this should never cause us reading
// over the end of the slice.
chkDataLenStart := chkStart + ChunkEncodingSize
c := mmapFile . byteSlice . Range ( chkDataLenStart , chkDataLenStart + MaxChunkLengthFieldSize )
chkDataLen , n := binary . Uvarint ( c )
if n <= 0 {
2020-05-22 09:03:23 +00:00
return nil , & CorruptionErr {
Dir : cdm . dir . Name ( ) ,
FileIndex : sgmIndex ,
2023-11-08 09:02:59 +00:00
Err : fmt . Errorf ( "reading chunk length failed with %d" , n ) ,
2020-05-22 09:03:23 +00:00
}
2020-03-19 16:33:44 +00:00
}
// Verify the chunk data end.
chkDataEnd := chkDataLenStart + n + int ( chkDataLen )
if chkDataEnd > mmapFile . byteSlice . Len ( ) {
2020-05-22 09:03:23 +00:00
return nil , & CorruptionErr {
Dir : cdm . dir . Name ( ) ,
FileIndex : sgmIndex ,
2023-11-08 09:02:59 +00:00
Err : fmt . Errorf ( "head chunk file doesn't include enough bytes to read the chunk - required:%v, available:%v" , chkDataEnd , mmapFile . byteSlice . Len ( ) ) ,
2020-05-22 09:03:23 +00:00
}
2020-03-19 16:33:44 +00:00
}
// Check the CRC.
sum := mmapFile . byteSlice . Range ( chkDataEnd , chkDataEnd + CRCSize )
if _ , err := chkCRC32 . Write ( mmapFile . byteSlice . Range ( chkStart - ( SeriesRefSize + 2 * MintMaxtSize ) , chkDataEnd ) ) ; err != nil {
2020-05-22 09:03:23 +00:00
return nil , & CorruptionErr {
Dir : cdm . dir . Name ( ) ,
FileIndex : sgmIndex ,
Err : err ,
}
2020-03-19 16:33:44 +00:00
}
if act := chkCRC32 . Sum ( nil ) ; ! bytes . Equal ( act , sum ) {
2020-05-22 09:03:23 +00:00
return nil , & CorruptionErr {
Dir : cdm . dir . Name ( ) ,
FileIndex : sgmIndex ,
2023-11-08 09:02:59 +00:00
Err : fmt . Errorf ( "checksum mismatch expected:%x, actual:%x" , sum , act ) ,
2020-05-22 09:03:23 +00:00
}
2020-03-19 16:33:44 +00:00
}
// The chunk data itself.
chkData := mmapFile . byteSlice . Range ( chkDataEnd - int ( chkDataLen ) , chkDataEnd )
2021-05-06 20:18:59 +00:00
// Make a copy of the chunk data to prevent a panic occurring because the returned
// chunk data slice references an mmap-ed file which could be closed after the
// function returns but while the chunk is still in use.
chkDataCopy := make ( [ ] byte , len ( chkData ) )
copy ( chkDataCopy , chkData )
chk , err := cdm . pool . Get ( chunkenc . Encoding ( chkEnc ) , chkDataCopy )
2020-05-22 09:03:23 +00:00
if err != nil {
return nil , & CorruptionErr {
Dir : cdm . dir . Name ( ) ,
FileIndex : sgmIndex ,
Err : err ,
}
}
return chk , nil
2020-03-19 16:33:44 +00:00
}
2021-11-17 10:21:27 +00:00
// IterateAllChunks iterates all mmappedChunkFiles (in order of head chunk file name/number) and all the chunks within it
// and runs the provided function with information about each chunk. It returns on the first error encountered.
2020-03-19 16:33:44 +00:00
// NOTE: This method needs to be called at least once after creating ChunkDiskMapper
// to set the maxt of all the file.
2023-03-16 10:23:47 +00:00
func ( cdm * ChunkDiskMapper ) IterateAllChunks ( f func ( seriesRef HeadSeriesRef , chunkRef ChunkDiskMapperRef , mint , maxt int64 , numSamples uint16 , encoding chunkenc . Encoding , isOOO bool ) error ) ( err error ) {
2020-03-19 16:33:44 +00:00
cdm . writePathMtx . Lock ( )
defer cdm . writePathMtx . Unlock ( )
defer func ( ) {
2020-08-26 17:59:18 +00:00
cdm . fileMaxtSet = true
2020-03-19 16:33:44 +00:00
} ( )
chkCRC32 := newCRC32 ( )
// Iterate files in ascending order.
segIDs := make ( [ ] int , 0 , len ( cdm . mmappedChunkFiles ) )
for seg := range cdm . mmappedChunkFiles {
segIDs = append ( segIDs , seg )
}
2022-09-30 14:33:56 +00:00
slices . Sort ( segIDs )
2020-03-19 16:33:44 +00:00
for _ , segID := range segIDs {
mmapFile := cdm . mmappedChunkFiles [ segID ]
fileEnd := mmapFile . byteSlice . Len ( )
if segID == cdm . curFileSequence {
2020-07-23 12:35:19 +00:00
fileEnd = int ( cdm . curFileSize ( ) )
2020-03-19 16:33:44 +00:00
}
idx := HeadChunkFileHeaderSize
for idx < fileEnd {
if fileEnd - idx < MaxHeadChunkMetaSize {
// Check for all 0s which marks the end of the file.
allZeros := true
for _ , b := range mmapFile . byteSlice . Range ( idx , fileEnd ) {
if b != byte ( 0 ) {
allZeros = false
break
}
}
if allZeros {
2021-02-25 09:08:12 +00:00
// End of segment chunk file content.
2020-03-19 16:33:44 +00:00
break
}
2020-05-06 15:30:00 +00:00
return & CorruptionErr {
2020-03-19 16:33:44 +00:00
Dir : cdm . dir . Name ( ) ,
FileIndex : segID ,
2023-11-08 09:02:59 +00:00
Err : fmt . Errorf ( "head chunk file has some unread data, but doesn't include enough bytes to read the chunk header" +
2021-02-25 09:08:12 +00:00
" - required:%v, available:%v, file:%d" , idx + MaxHeadChunkMetaSize , fileEnd , segID ) ,
2020-03-19 16:33:44 +00:00
}
}
chkCRC32 . Reset ( )
2021-10-13 12:14:32 +00:00
chunkRef := newChunkDiskMapperRef ( uint64 ( segID ) , uint64 ( idx ) )
2020-03-19 16:33:44 +00:00
startIdx := idx
2021-11-06 10:10:04 +00:00
seriesRef := HeadSeriesRef ( binary . BigEndian . Uint64 ( mmapFile . byteSlice . Range ( idx , idx + SeriesRefSize ) ) )
2020-03-19 16:33:44 +00:00
idx += SeriesRefSize
mint := int64 ( binary . BigEndian . Uint64 ( mmapFile . byteSlice . Range ( idx , idx + MintMaxtSize ) ) )
idx += MintMaxtSize
maxt := int64 ( binary . BigEndian . Uint64 ( mmapFile . byteSlice . Range ( idx , idx + MintMaxtSize ) ) )
idx += MintMaxtSize
// We preallocate file to help with m-mapping (especially windows systems).
// As series ref always starts from 1, we assume it being 0 to be the end of the actual file data.
// We are not considering possible file corruption that can cause it to be 0.
// Additionally we are checking mint and maxt just to be sure.
if seriesRef == 0 && mint == 0 && maxt == 0 {
break
}
2022-09-20 17:05:50 +00:00
chkEnc := chunkenc . Encoding ( mmapFile . byteSlice . Range ( idx , idx + ChunkEncodingSize ) [ 0 ] )
idx += ChunkEncodingSize
2020-03-19 16:33:44 +00:00
dataLen , n := binary . Uvarint ( mmapFile . byteSlice . Range ( idx , idx + MaxChunkLengthFieldSize ) )
2020-05-06 15:30:00 +00:00
idx += n
numSamples := binary . BigEndian . Uint16 ( mmapFile . byteSlice . Range ( idx , idx + 2 ) )
idx += int ( dataLen ) // Skip the data.
2020-03-19 16:33:44 +00:00
// In the beginning we only checked for the chunk meta size.
// Now that we have added the chunk data length, we check for sufficient bytes again.
if idx + CRCSize > fileEnd {
2020-05-06 15:30:00 +00:00
return & CorruptionErr {
2020-03-19 16:33:44 +00:00
Dir : cdm . dir . Name ( ) ,
FileIndex : segID ,
2023-11-08 09:02:59 +00:00
Err : fmt . Errorf ( "head chunk file doesn't include enough bytes to read the chunk header - required:%v, available:%v, file:%d" , idx + CRCSize , fileEnd , segID ) ,
2020-03-19 16:33:44 +00:00
}
}
// Check CRC.
sum := mmapFile . byteSlice . Range ( idx , idx + CRCSize )
if _ , err := chkCRC32 . Write ( mmapFile . byteSlice . Range ( startIdx , idx ) ) ; err != nil {
return err
}
if act := chkCRC32 . Sum ( nil ) ; ! bytes . Equal ( act , sum ) {
2020-05-06 15:30:00 +00:00
return & CorruptionErr {
2020-03-19 16:33:44 +00:00
Dir : cdm . dir . Name ( ) ,
FileIndex : segID ,
2023-11-08 09:02:59 +00:00
Err : fmt . Errorf ( "checksum mismatch expected:%x, actual:%x" , sum , act ) ,
2020-03-19 16:33:44 +00:00
}
}
idx += CRCSize
if maxt > mmapFile . maxt {
mmapFile . maxt = maxt
}
2023-03-16 10:23:47 +00:00
isOOO := cdm . IsOutOfOrderChunk ( chkEnc )
// Extract the encoding from the byte. ChunkDiskMapper uses only the last 7 bits for the encoding.
chkEnc = cdm . RemoveMasks ( chkEnc )
if err := f ( seriesRef , chunkRef , mint , maxt , numSamples , chkEnc , isOOO ) ; err != nil {
2023-11-16 18:54:41 +00:00
var cerr * CorruptionErr
if errors . As ( err , & cerr ) {
2020-08-26 15:06:27 +00:00
cerr . Dir = cdm . dir . Name ( )
cerr . FileIndex = segID
return cerr
}
2020-03-19 16:33:44 +00:00
return err
}
}
if idx > fileEnd {
// It should be equal to the slice length.
2020-05-06 15:30:00 +00:00
return & CorruptionErr {
2020-03-19 16:33:44 +00:00
Dir : cdm . dir . Name ( ) ,
FileIndex : segID ,
2023-11-08 09:02:59 +00:00
Err : fmt . Errorf ( "head chunk file doesn't include enough bytes to read the last chunk data - required:%v, available:%v, file:%d" , idx , fileEnd , segID ) ,
2020-03-19 16:33:44 +00:00
}
}
}
return nil
}
2022-09-20 17:05:50 +00:00
// Truncate deletes the head chunk files whose file number is less than given fileNo.
func ( cdm * ChunkDiskMapper ) Truncate ( fileNo uint32 ) error {
2020-03-19 16:33:44 +00:00
cdm . readPathMtx . RLock ( )
// Sort the file indices, else if files deletion fails in between,
// it can lead to unsequential files as the map is not sorted.
chkFileIndices := make ( [ ] int , 0 , len ( cdm . mmappedChunkFiles ) )
for seq := range cdm . mmappedChunkFiles {
chkFileIndices = append ( chkFileIndices , seq )
}
2022-09-30 14:33:56 +00:00
slices . Sort ( chkFileIndices )
2020-03-19 16:33:44 +00:00
var removedFiles [ ] int
for _ , seq := range chkFileIndices {
2022-09-20 17:05:50 +00:00
if seq == cdm . curFileSequence || uint32 ( seq ) >= fileNo {
2020-06-18 13:54:58 +00:00
break
2020-03-19 16:33:44 +00:00
}
2022-09-20 17:05:50 +00:00
removedFiles = append ( removedFiles , seq )
2020-03-19 16:33:44 +00:00
}
cdm . readPathMtx . RUnlock ( )
2020-10-28 15:24:58 +00:00
errs := tsdb_errors . NewMulti ( )
2020-06-18 13:54:58 +00:00
// Cut a new file only if the current file has some chunks.
2020-07-23 12:35:19 +00:00
if cdm . curFileSize ( ) > HeadChunkFileHeaderSize {
2022-01-10 13:36:45 +00:00
// There is a known race condition here because between the check of curFileSize() and the call to CutNewFile()
// a new file could already be cut, this is acceptable because it will simply result in an empty file which
// won't do any harm.
cdm . CutNewFile ( )
2020-06-18 13:54:58 +00:00
}
2022-02-22 15:05:15 +00:00
pendingDeletes , err := cdm . deleteFiles ( removedFiles )
errs . Add ( err )
if len ( chkFileIndices ) == len ( removedFiles ) {
// All files were deleted. Reset the current sequence.
cdm . evtlPosMtx . Lock ( )
2023-06-30 12:59:59 +00:00
// We can safely reset the sequence only if the write queue is empty. If it's not empty,
// then there may be a job in the queue that will create a new segment file with an ID
// generated before the sequence reset.
//
// The queueIsEmpty() function must be called while holding the cdm.evtlPosMtx to avoid
// a race condition with WriteChunk().
if cdm . writeQueue == nil || cdm . writeQueue . queueIsEmpty ( ) {
if err == nil {
cdm . evtlPos . setSeq ( 0 )
} else {
// In case of error, set it to the last file number on the disk that was not deleted.
cdm . evtlPos . setSeq ( uint64 ( pendingDeletes [ len ( pendingDeletes ) - 1 ] ) )
}
2022-02-22 15:05:15 +00:00
}
2023-06-30 12:59:59 +00:00
2022-02-22 15:05:15 +00:00
cdm . evtlPosMtx . Unlock ( )
}
2020-10-28 15:24:58 +00:00
return errs . Err ( )
2020-03-19 16:33:44 +00:00
}
2022-02-22 15:05:15 +00:00
// deleteFiles deletes the given file sequences in order of the sequence.
// In case of an error, it returns the sorted file sequences that were not deleted from the _disk_.
func ( cdm * ChunkDiskMapper ) deleteFiles ( removedFiles [ ] int ) ( [ ] int , error ) {
2022-09-30 14:33:56 +00:00
slices . Sort ( removedFiles ) // To delete them in order.
2020-03-19 16:33:44 +00:00
cdm . readPathMtx . Lock ( )
for _ , seq := range removedFiles {
if err := cdm . closers [ seq ] . Close ( ) ; err != nil {
cdm . readPathMtx . Unlock ( )
2022-02-22 15:05:15 +00:00
return removedFiles , err
2020-03-19 16:33:44 +00:00
}
delete ( cdm . mmappedChunkFiles , seq )
delete ( cdm . closers , seq )
}
cdm . readPathMtx . Unlock ( )
// We actually delete the files separately to not block the readPathMtx for long.
2022-02-22 15:05:15 +00:00
for i , seq := range removedFiles {
2020-03-19 16:33:44 +00:00
if err := os . Remove ( segmentFile ( cdm . dir . Name ( ) , seq ) ) ; err != nil {
2022-02-22 15:05:15 +00:00
return removedFiles [ i : ] , err
2020-03-19 16:33:44 +00:00
}
}
2022-02-22 15:05:15 +00:00
return nil , nil
2020-03-19 16:33:44 +00:00
}
2020-05-06 15:30:00 +00:00
// DeleteCorrupted deletes all the head chunk files after the one which had the corruption
2020-03-19 16:33:44 +00:00
// (including the corrupt file).
2020-05-06 15:30:00 +00:00
func ( cdm * ChunkDiskMapper ) DeleteCorrupted ( originalErr error ) error {
2023-11-08 09:02:59 +00:00
var cerr * CorruptionErr
if ! errors . As ( originalErr , & cerr ) {
return fmt . Errorf ( "cannot handle error: %w" , originalErr )
2020-03-19 16:33:44 +00:00
}
// Delete all the head chunk files following the corrupt head chunk file.
segs := [ ] int { }
2020-08-26 08:52:48 +00:00
cdm . readPathMtx . RLock ( )
2022-02-22 15:05:15 +00:00
lastSeq := 0
2020-03-19 16:33:44 +00:00
for seg := range cdm . mmappedChunkFiles {
style: Replace `else if` cascades with `switch`
Wiser coders than myself have come to the conclusion that a `switch`
statement is almost always superior to a statement that includes any
`else if`.
The exceptions that I have found in our codebase are just these two:
* The `if else` is followed by an additional statement before the next
condition (separated by a `;`).
* The whole thing is within a `for` loop and `break` statements are
used. In this case, using `switch` would require tagging the `for`
loop, which probably tips the balance.
Why are `switch` statements more readable?
For one, fewer curly braces. But more importantly, the conditions all
have the same alignment, so the whole thing follows the natural flow
of going down a list of conditions. With `else if`, in contrast, all
conditions but the first are "hidden" behind `} else if `, harder to
spot and (for no good reason) presented differently from the first
condition.
I'm sure the aforemention wise coders can list even more reasons.
In any case, I like it so much that I have found myself recommending
it in code reviews. I would like to make it a habit in our code base,
without making it a hard requirement that we would test on the CI. But
for that, there has to be a role model, so this commit eliminates all
`if else` occurrences, unless it is autogenerated code or fits one of
the exceptions above.
Signed-off-by: beorn7 <beorn@grafana.com>
2023-04-12 14:14:31 +00:00
switch {
case seg >= cerr . FileIndex :
2020-03-19 16:33:44 +00:00
segs = append ( segs , seg )
style: Replace `else if` cascades with `switch`
Wiser coders than myself have come to the conclusion that a `switch`
statement is almost always superior to a statement that includes any
`else if`.
The exceptions that I have found in our codebase are just these two:
* The `if else` is followed by an additional statement before the next
condition (separated by a `;`).
* The whole thing is within a `for` loop and `break` statements are
used. In this case, using `switch` would require tagging the `for`
loop, which probably tips the balance.
Why are `switch` statements more readable?
For one, fewer curly braces. But more importantly, the conditions all
have the same alignment, so the whole thing follows the natural flow
of going down a list of conditions. With `else if`, in contrast, all
conditions but the first are "hidden" behind `} else if `, harder to
spot and (for no good reason) presented differently from the first
condition.
I'm sure the aforemention wise coders can list even more reasons.
In any case, I like it so much that I have found myself recommending
it in code reviews. I would like to make it a habit in our code base,
without making it a hard requirement that we would test on the CI. But
for that, there has to be a role model, so this commit eliminates all
`if else` occurrences, unless it is autogenerated code or fits one of
the exceptions above.
Signed-off-by: beorn7 <beorn@grafana.com>
2023-04-12 14:14:31 +00:00
case seg > lastSeq :
2022-02-22 15:05:15 +00:00
lastSeq = seg
2020-03-19 16:33:44 +00:00
}
}
2020-08-26 08:52:48 +00:00
cdm . readPathMtx . RUnlock ( )
2022-02-22 15:05:15 +00:00
pendingDeletes , err := cdm . deleteFiles ( segs )
cdm . evtlPosMtx . Lock ( )
if err == nil {
cdm . evtlPos . setSeq ( uint64 ( lastSeq ) )
} else {
// In case of error, set it to the last file number on the disk that was not deleted.
cdm . evtlPos . setSeq ( uint64 ( pendingDeletes [ len ( pendingDeletes ) - 1 ] ) )
}
cdm . evtlPosMtx . Unlock ( )
return err
2020-03-19 16:33:44 +00:00
}
// Size returns the size of the chunk files.
2020-11-03 10:04:59 +00:00
func ( cdm * ChunkDiskMapper ) Size ( ) ( int64 , error ) {
return fileutil . DirSize ( cdm . dir . Name ( ) )
2020-07-23 12:35:19 +00:00
}
2022-01-10 13:36:45 +00:00
func ( cdm * ChunkDiskMapper ) curFileSize ( ) uint64 {
return cdm . curFileOffset . Load ( )
2020-03-19 16:33:44 +00:00
}
// Close closes all the open files in ChunkDiskMapper.
// It is not longer safe to access chunks from this struct after calling Close.
func ( cdm * ChunkDiskMapper ) Close ( ) error {
2022-01-10 13:36:45 +00:00
// Locking the eventual position lock blocks WriteChunk()
cdm . evtlPosMtx . Lock ( )
defer cdm . evtlPosMtx . Unlock ( )
2022-03-11 16:26:59 +00:00
if cdm . writeQueue != nil {
cdm . writeQueue . stop ( )
}
2022-01-10 13:36:45 +00:00
2020-03-19 16:33:44 +00:00
// 'WriteChunk' locks writePathMtx first and then readPathMtx for cutting head chunk file.
// The lock order should not be reversed here else it can cause deadlocks.
cdm . writePathMtx . Lock ( )
defer cdm . writePathMtx . Unlock ( )
cdm . readPathMtx . Lock ( )
defer cdm . readPathMtx . Unlock ( )
if cdm . closed {
return nil
}
cdm . closed = true
2020-10-28 15:24:58 +00:00
errs := tsdb_errors . NewMulti (
closeAllFromMap ( cdm . closers ) ,
cdm . finalizeCurFile ( ) ,
cdm . dir . Close ( ) ,
)
2020-03-19 16:33:44 +00:00
cdm . mmappedChunkFiles = map [ int ] * mmappedChunkFile { }
cdm . closers = map [ int ] io . Closer { }
2020-10-28 15:24:58 +00:00
return errs . Err ( )
2020-03-19 16:33:44 +00:00
}
func closeAllFromMap ( cs map [ int ] io . Closer ) error {
2020-10-28 15:24:58 +00:00
errs := tsdb_errors . NewMulti ( )
2020-03-19 16:33:44 +00:00
for _ , c := range cs {
2020-10-28 15:24:58 +00:00
errs . Add ( c . Close ( ) )
2020-03-19 16:33:44 +00:00
}
2020-10-28 15:24:58 +00:00
return errs . Err ( )
2020-03-19 16:33:44 +00:00
}
const inBufferShards = 128 // 128 is a randomly chosen number.
2021-11-17 10:21:27 +00:00
// chunkBuffer is a thread safe lookup table for chunks by their ref.
2020-03-19 16:33:44 +00:00
type chunkBuffer struct {
2021-10-13 12:14:32 +00:00
inBufferChunks [ inBufferShards ] map [ ChunkDiskMapperRef ] chunkenc . Chunk
2020-03-19 16:33:44 +00:00
inBufferChunksMtxs [ inBufferShards ] sync . RWMutex
}
func newChunkBuffer ( ) * chunkBuffer {
cb := & chunkBuffer { }
for i := 0 ; i < inBufferShards ; i ++ {
2021-10-13 12:14:32 +00:00
cb . inBufferChunks [ i ] = make ( map [ ChunkDiskMapperRef ] chunkenc . Chunk )
2020-03-19 16:33:44 +00:00
}
return cb
}
2021-10-13 12:14:32 +00:00
func ( cb * chunkBuffer ) put ( ref ChunkDiskMapperRef , chk chunkenc . Chunk ) {
2020-03-19 16:33:44 +00:00
shardIdx := ref % inBufferShards
cb . inBufferChunksMtxs [ shardIdx ] . Lock ( )
cb . inBufferChunks [ shardIdx ] [ ref ] = chk
cb . inBufferChunksMtxs [ shardIdx ] . Unlock ( )
}
2021-10-13 12:14:32 +00:00
func ( cb * chunkBuffer ) get ( ref ChunkDiskMapperRef ) chunkenc . Chunk {
2020-03-19 16:33:44 +00:00
shardIdx := ref % inBufferShards
cb . inBufferChunksMtxs [ shardIdx ] . RLock ( )
defer cb . inBufferChunksMtxs [ shardIdx ] . RUnlock ( )
return cb . inBufferChunks [ shardIdx ] [ ref ]
}
func ( cb * chunkBuffer ) clear ( ) {
for i := 0 ; i < inBufferShards ; i ++ {
cb . inBufferChunksMtxs [ i ] . Lock ( )
2021-10-13 12:14:32 +00:00
cb . inBufferChunks [ i ] = make ( map [ ChunkDiskMapperRef ] chunkenc . Chunk )
2020-03-19 16:33:44 +00:00
cb . inBufferChunksMtxs [ i ] . Unlock ( )
}
}