mirror of https://github.com/prometheus/prometheus
Test to corrupt segments mid-WAL, repair and check we can read the correct number of records. (#528)
Test to corrupt segments mid-WAL, repair and check we can read the correct number of records. Make segmentBufReader pad short segments with zeros, and only advance curr segment index after fully reading segment.pull/5805/head
parent
c59ed492b2
commit
bc3b0bd429
|
@ -0,0 +1,35 @@
|
|||
// Copyright 2019 The Prometheus Authors
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package testutil
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
)
|
||||
|
||||
type logger struct {
|
||||
t *testing.T
|
||||
}
|
||||
|
||||
// NewLogger returns a gokit compatible Logger which calls t.Log.
|
||||
func NewLogger(t *testing.T) log.Logger {
|
||||
return logger{t: t}
|
||||
}
|
||||
|
||||
// Log implements log.Logger.
|
||||
func (t logger) Log(keyvals ...interface{}) error {
|
||||
t.t.Log(keyvals...)
|
||||
return nil
|
||||
}
|
54
wal/wal.go
54
wal/wal.go
|
@ -681,20 +681,20 @@ func NewSegmentsRangeReader(sr ...SegmentRange) (io.ReadCloser, error) {
|
|||
|
||||
// segmentBufReader is a buffered reader that reads in multiples of pages.
|
||||
// The main purpose is that we are able to track segment and offset for
|
||||
// corruption reporting.
|
||||
// corruption reporting. We have to be careful not to increment curr too
|
||||
// early, as it is used by Reader.Err() to tell Repair which segment is corrupt.
|
||||
// As such we pad the end of non-page align segments with zeros.
|
||||
type segmentBufReader struct {
|
||||
buf *bufio.Reader
|
||||
segs []*Segment
|
||||
cur int
|
||||
off int
|
||||
more bool
|
||||
cur int // Index into segs.
|
||||
off int // Offset of read data into current segment.
|
||||
}
|
||||
|
||||
func newSegmentBufReader(segs ...*Segment) *segmentBufReader {
|
||||
return &segmentBufReader{
|
||||
buf: bufio.NewReaderSize(nil, 16*pageSize),
|
||||
buf: bufio.NewReaderSize(segs[0], 16*pageSize),
|
||||
segs: segs,
|
||||
cur: -1,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -707,25 +707,39 @@ func (r *segmentBufReader) Close() (err error) {
|
|||
return err
|
||||
}
|
||||
|
||||
// Read implements io.Reader.
|
||||
func (r *segmentBufReader) Read(b []byte) (n int, err error) {
|
||||
if !r.more {
|
||||
if r.cur+1 >= len(r.segs) {
|
||||
return 0, io.EOF
|
||||
}
|
||||
r.cur++
|
||||
r.off = 0
|
||||
r.more = true
|
||||
r.buf.Reset(r.segs[r.cur])
|
||||
}
|
||||
n, err = r.buf.Read(b)
|
||||
r.off += n
|
||||
if err != io.EOF {
|
||||
|
||||
// If we succeeded, or hit a non-EOF, we can stop.
|
||||
if err == nil || err != io.EOF {
|
||||
return n, err
|
||||
}
|
||||
// Just return what we read so far, but don't signal EOF.
|
||||
// Only unset more so we don't invalidate the current segment and
|
||||
// offset before the next read.
|
||||
r.more = false
|
||||
|
||||
// We hit EOF; fake out zero padding at the end of short segments, so we
|
||||
// don't increment curr too early and report the wrong segment as corrupt.
|
||||
if r.off%pageSize != 0 {
|
||||
i := 0
|
||||
for ; n+i < len(b) && (r.off+i)%pageSize != 0; i++ {
|
||||
b[n+i] = 0
|
||||
}
|
||||
|
||||
// Return early, even if we didn't fill b.
|
||||
r.off += i
|
||||
return n + i, nil
|
||||
}
|
||||
|
||||
// There is no more deta left in the curr segment and there are no more
|
||||
// segments left. Return EOF.
|
||||
if r.cur+1 >= len(r.segs) {
|
||||
return n, io.EOF
|
||||
}
|
||||
|
||||
// Move to next segment.
|
||||
r.cur++
|
||||
r.off = 0
|
||||
r.buf.Reset(r.segs[r.cur])
|
||||
return n, nil
|
||||
}
|
||||
|
||||
|
|
126
wal/wal_test.go
126
wal/wal_test.go
|
@ -24,6 +24,7 @@ import (
|
|||
"math/rand"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
@ -225,7 +226,7 @@ func TestReader_Live(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestWAL_FuzzWriteRead_Live(t *testing.T) {
|
||||
const count = 5000
|
||||
const count = 500
|
||||
var input [][]byte
|
||||
lock := sync.RWMutex{}
|
||||
var recs [][]byte
|
||||
|
@ -547,6 +548,129 @@ func TestWAL_Repair(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
// TestCorruptAndCarryOn writes a multi-segment WAL; corrupts the first segment and
|
||||
// ensures that an error during reading that segment are correctly repaired before
|
||||
// moving to write more records to the WAL.
|
||||
func TestCorruptAndCarryOn(t *testing.T) {
|
||||
dir, err := ioutil.TempDir("", "wal_repair")
|
||||
testutil.Ok(t, err)
|
||||
defer os.RemoveAll(dir)
|
||||
|
||||
var (
|
||||
logger = testutil.NewLogger(t)
|
||||
segmentSize = pageSize * 3
|
||||
recordSize = (pageSize / 3) - recordHeaderSize
|
||||
)
|
||||
|
||||
// Produce a WAL with a two segments of 3 pages with 3 records each,
|
||||
// so when we truncate the file we're guaranteed to split a record.
|
||||
{
|
||||
w, err := NewSize(logger, nil, dir, segmentSize)
|
||||
testutil.Ok(t, err)
|
||||
|
||||
for i := 0; i < 18; i++ {
|
||||
buf := make([]byte, recordSize)
|
||||
_, err := rand.Read(buf)
|
||||
testutil.Ok(t, err)
|
||||
|
||||
err = w.Log(buf)
|
||||
testutil.Ok(t, err)
|
||||
}
|
||||
|
||||
err = w.Close()
|
||||
testutil.Ok(t, err)
|
||||
}
|
||||
|
||||
// Check all the segments are the correct size.
|
||||
{
|
||||
segments, err := listSegments(dir)
|
||||
testutil.Ok(t, err)
|
||||
for _, segment := range segments {
|
||||
f, err := os.OpenFile(filepath.Join(dir, fmt.Sprintf("%08d", segment.index)), os.O_RDONLY, 0666)
|
||||
testutil.Ok(t, err)
|
||||
|
||||
fi, err := f.Stat()
|
||||
testutil.Ok(t, err)
|
||||
|
||||
t.Log("segment", segment.index, "size", fi.Size())
|
||||
testutil.Equals(t, int64(segmentSize), fi.Size())
|
||||
|
||||
err = f.Close()
|
||||
testutil.Ok(t, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Truncate the first file, splitting the middle record in the second
|
||||
// page in half, leaving 4 valid records.
|
||||
{
|
||||
f, err := os.OpenFile(filepath.Join(dir, fmt.Sprintf("%08d", 0)), os.O_RDWR, 0666)
|
||||
testutil.Ok(t, err)
|
||||
|
||||
fi, err := f.Stat()
|
||||
testutil.Ok(t, err)
|
||||
testutil.Equals(t, int64(segmentSize), fi.Size())
|
||||
|
||||
err = f.Truncate(int64(segmentSize / 2))
|
||||
testutil.Ok(t, err)
|
||||
|
||||
err = f.Close()
|
||||
testutil.Ok(t, err)
|
||||
}
|
||||
|
||||
// Now try and repair this WAL, and write 5 more records to it.
|
||||
{
|
||||
sr, err := NewSegmentsReader(dir)
|
||||
testutil.Ok(t, err)
|
||||
|
||||
reader := NewReader(sr)
|
||||
i := 0
|
||||
for ; i < 4 && reader.Next(); i++ {
|
||||
testutil.Equals(t, recordSize, len(reader.Record()))
|
||||
}
|
||||
testutil.Equals(t, 4, i, "not enough records")
|
||||
testutil.Assert(t, !reader.Next(), "unexpected record")
|
||||
|
||||
corruptionErr := reader.Err()
|
||||
testutil.Assert(t, corruptionErr != nil, "expected error")
|
||||
|
||||
err = sr.Close()
|
||||
testutil.Ok(t, err)
|
||||
|
||||
w, err := NewSize(logger, nil, dir, segmentSize)
|
||||
testutil.Ok(t, err)
|
||||
|
||||
err = w.Repair(corruptionErr)
|
||||
testutil.Ok(t, err)
|
||||
|
||||
for i := 0; i < 5; i++ {
|
||||
buf := make([]byte, recordSize)
|
||||
_, err := rand.Read(buf)
|
||||
testutil.Ok(t, err)
|
||||
|
||||
err = w.Log(buf)
|
||||
testutil.Ok(t, err)
|
||||
}
|
||||
|
||||
err = w.Close()
|
||||
testutil.Ok(t, err)
|
||||
}
|
||||
|
||||
// Replay the WAL. Should get 9 records.
|
||||
{
|
||||
sr, err := NewSegmentsReader(dir)
|
||||
testutil.Ok(t, err)
|
||||
|
||||
reader := NewReader(sr)
|
||||
i := 0
|
||||
for ; i < 9 && reader.Next(); i++ {
|
||||
testutil.Equals(t, recordSize, len(reader.Record()))
|
||||
}
|
||||
testutil.Equals(t, 9, i, "wrong number of records")
|
||||
testutil.Assert(t, !reader.Next(), "unexpected record")
|
||||
testutil.Equals(t, nil, reader.Err())
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkWAL_LogBatched(b *testing.B) {
|
||||
dir, err := ioutil.TempDir("", "bench_logbatch")
|
||||
testutil.Ok(b, err)
|
||||
|
|
Loading…
Reference in New Issue