mirror of https://github.com/hashicorp/consul
141 lines
3.9 KiB
Go
141 lines
3.9 KiB
Go
package retry
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"math/rand"
|
|
"time"
|
|
)
|
|
|
|
// Jitter should return a new wait duration optionally with some time added or
|
|
// removed to create some randomness in wait time.
|
|
type Jitter func(baseTime time.Duration) time.Duration
|
|
|
|
// NewJitter returns a new random Jitter that is up to percent longer than the
|
|
// original wait time.
|
|
func NewJitter(percent int64) Jitter {
|
|
if percent < 0 {
|
|
percent = 0
|
|
}
|
|
|
|
return func(baseTime time.Duration) time.Duration {
|
|
if percent == 0 {
|
|
return baseTime
|
|
}
|
|
max := (int64(baseTime) * percent) / 100
|
|
if max < 0 { // overflow
|
|
return baseTime
|
|
}
|
|
return baseTime + time.Duration(rand.Int63n(max))
|
|
}
|
|
}
|
|
|
|
// Waiter records the number of failures and performs exponential backoff when
|
|
// there are consecutive failures.
|
|
type Waiter struct {
|
|
// MinFailures before exponential backoff starts. Any failures before
|
|
// MinFailures is reached will wait MinWait time.
|
|
MinFailures uint
|
|
// MinWait time. Returned after the first failure.
|
|
MinWait time.Duration
|
|
// MaxWait time applied before Jitter. Note that the actual maximum wait time
|
|
// is MaxWait + MaxWait * Jitter.
|
|
MaxWait time.Duration
|
|
// Jitter to add to each wait time. The Jitter is applied after MaxWait, which
|
|
// may cause the actual wait time to exceed MaxWait.
|
|
Jitter Jitter
|
|
// Factor is the multiplier to use when calculating the delay. Defaults to
|
|
// 1 second.
|
|
Factor time.Duration
|
|
failures uint
|
|
}
|
|
|
|
// delay calculates the time to wait based on the number of failures
|
|
func (w *Waiter) delay() time.Duration {
|
|
if w.failures <= w.MinFailures {
|
|
return w.MinWait
|
|
}
|
|
factor := w.Factor
|
|
if factor == 0 {
|
|
factor = time.Second
|
|
}
|
|
|
|
shift := w.failures - w.MinFailures - 1
|
|
waitTime := w.MaxWait
|
|
if shift < 31 {
|
|
waitTime = (1 << shift) * factor
|
|
}
|
|
// apply MaxWait before jitter so that multiple waiters with the same MaxWait
|
|
// do not converge when they hit their max.
|
|
if w.MaxWait != 0 && waitTime > w.MaxWait {
|
|
waitTime = w.MaxWait
|
|
}
|
|
if w.Jitter != nil {
|
|
waitTime = w.Jitter(waitTime)
|
|
}
|
|
if waitTime < w.MinWait {
|
|
return w.MinWait
|
|
}
|
|
return waitTime
|
|
}
|
|
|
|
// Reset the failure count to 0.
|
|
// Reset must be called if the operation done after Wait did not fail.
|
|
func (w *Waiter) Reset() {
|
|
w.failures = 0
|
|
}
|
|
|
|
// Failures returns the count of consecutive failures.
|
|
func (w *Waiter) Failures() int {
|
|
return int(w.failures)
|
|
}
|
|
|
|
// Wait increases the number of failures by one, and then blocks until the context
|
|
// is cancelled, or until the wait time is reached.
|
|
//
|
|
// The wait time increases exponentially as the number of failures increases.
|
|
// Every call to Wait increments the failures count, so Reset must be called
|
|
// after Wait when there wasn't a failure.
|
|
//
|
|
// The only non-nil error that Wait returns will come from ctx.Err(),
|
|
// such as when the context is canceled. This makes it suitable for
|
|
// long-running routines that do not get re-initialized, such as replication.
|
|
func (w *Waiter) Wait(ctx context.Context) error {
|
|
w.failures++
|
|
timer := time.NewTimer(w.delay())
|
|
select {
|
|
case <-ctx.Done():
|
|
timer.Stop()
|
|
return ctx.Err()
|
|
case <-timer.C:
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// NextWait returns the period the next call to Wait with block for assuming
|
|
// it's context is not cancelled. It's useful for informing a user how long
|
|
// it will be before the next attempt is made.
|
|
func (w *Waiter) NextWait() time.Duration {
|
|
return w.delay()
|
|
}
|
|
|
|
// RetryLoop retries an operation until either operation completes without error
|
|
// or Waiter's context is canceled.
|
|
func (w *Waiter) RetryLoop(ctx context.Context, operation func() error) error {
|
|
var lastError error
|
|
for {
|
|
if err := w.Wait(ctx); err != nil {
|
|
// The error will only be non-nil if the context is canceled.
|
|
return fmt.Errorf("could not retry operation: %w", lastError)
|
|
}
|
|
|
|
if err := operation(); err == nil {
|
|
// Reset the failure count seen by the waiter if there was no error.
|
|
w.Reset()
|
|
return nil
|
|
} else {
|
|
lastError = err
|
|
}
|
|
}
|
|
}
|