mirror of https://github.com/hashicorp/consul
Adds monitor retries to the consul lock command.
parent
ca08ba3aee
commit
4afeddacc8
|
@ -69,17 +69,29 @@ Options:
|
|||
-pass-stdin Pass stdin to child process.
|
||||
-try=duration Make a single attempt to acquire the lock, waiting
|
||||
up to the given duration (eg. "15s").
|
||||
-monitor-retry=n Retry up to n times if Consul returns a 500 error
|
||||
while monitoring the lock. This allows riding out brief
|
||||
periods of unavailability without causing leader
|
||||
elections, but increases the amount of time required
|
||||
to detect a lost lock in some cases. Defaults to 0.
|
||||
-verbose Enables verbose output
|
||||
`
|
||||
return strings.TrimSpace(helpText)
|
||||
}
|
||||
|
||||
func (c *LockCommand) Run(args []string) int {
|
||||
var lu *LockUnlock
|
||||
return c.run(args, &lu)
|
||||
}
|
||||
|
||||
// run exposes the underlying lock for testing.
|
||||
func (c *LockCommand) run(args []string, lu **LockUnlock) int {
|
||||
var childDone chan struct{}
|
||||
var name, token string
|
||||
var limit int
|
||||
var passStdin bool
|
||||
var try string
|
||||
var retry int
|
||||
cmdFlags := flag.NewFlagSet("watch", flag.ContinueOnError)
|
||||
cmdFlags.Usage = func() { c.Ui.Output(c.Help()) }
|
||||
cmdFlags.IntVar(&limit, "n", 1, "")
|
||||
|
@ -87,6 +99,7 @@ func (c *LockCommand) Run(args []string) int {
|
|||
cmdFlags.StringVar(&token, "token", "", "")
|
||||
cmdFlags.BoolVar(&passStdin, "pass-stdin", false, "")
|
||||
cmdFlags.StringVar(&try, "try", "", "")
|
||||
cmdFlags.IntVar(&retry, "monitor-retry", 0, "")
|
||||
cmdFlags.BoolVar(&c.verbose, "verbose", false, "")
|
||||
httpAddr := HTTPAddrFlag(cmdFlags)
|
||||
if err := cmdFlags.Parse(args); err != nil {
|
||||
|
@ -135,6 +148,12 @@ func (c *LockCommand) Run(args []string) int {
|
|||
oneshot = true
|
||||
}
|
||||
|
||||
// Check the retry parameter
|
||||
if retry < 0 {
|
||||
c.Ui.Error("Number for 'monitor-retry' must be >= 0")
|
||||
return 1
|
||||
}
|
||||
|
||||
// Create and test the HTTP client
|
||||
conf := api.DefaultConfig()
|
||||
conf.Address = *httpAddr
|
||||
|
@ -151,11 +170,10 @@ func (c *LockCommand) Run(args []string) int {
|
|||
}
|
||||
|
||||
// Setup the lock or semaphore
|
||||
var lu *LockUnlock
|
||||
if limit == 1 {
|
||||
lu, err = c.setupLock(client, prefix, name, oneshot, wait)
|
||||
*lu, err = c.setupLock(client, prefix, name, oneshot, wait, retry)
|
||||
} else {
|
||||
lu, err = c.setupSemaphore(client, limit, prefix, name, oneshot, wait)
|
||||
*lu, err = c.setupSemaphore(client, limit, prefix, name, oneshot, wait, retry)
|
||||
}
|
||||
if err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Lock setup failed: %s", err))
|
||||
|
@ -166,7 +184,7 @@ func (c *LockCommand) Run(args []string) int {
|
|||
if c.verbose {
|
||||
c.Ui.Info("Attempting lock acquisition")
|
||||
}
|
||||
lockCh, err := lu.lockFn(c.ShutdownCh)
|
||||
lockCh, err := (*lu).lockFn(c.ShutdownCh)
|
||||
if lockCh == nil {
|
||||
if err == nil {
|
||||
c.Ui.Error("Shutdown triggered or timeout during lock acquisition")
|
||||
|
@ -219,14 +237,14 @@ func (c *LockCommand) Run(args []string) int {
|
|||
|
||||
RELEASE:
|
||||
// Release the lock before termination
|
||||
if err := lu.unlockFn(); err != nil {
|
||||
if err := (*lu).unlockFn(); err != nil {
|
||||
c.Ui.Error(fmt.Sprintf("Lock release failed: %s", err))
|
||||
return 1
|
||||
}
|
||||
|
||||
// Cleanup the lock if no longer in use
|
||||
if err := lu.cleanupFn(); err != nil {
|
||||
if err != lu.inUseErr {
|
||||
if err := (*lu).cleanupFn(); err != nil {
|
||||
if err != (*lu).inUseErr {
|
||||
c.Ui.Error(fmt.Sprintf("Lock cleanup failed: %s", err))
|
||||
return 1
|
||||
} else if c.verbose {
|
||||
|
@ -240,8 +258,11 @@ RELEASE:
|
|||
|
||||
// setupLock is used to setup a new Lock given the API client, the key prefix to
|
||||
// operate on, and an optional session name. If oneshot is true then we will set
|
||||
// up for a single attempt at acquisition, using the given wait time.
|
||||
func (c *LockCommand) setupLock(client *api.Client, prefix, name string, oneshot bool, wait time.Duration) (*LockUnlock, error) {
|
||||
// up for a single attempt at acquisition, using the given wait time. The retry
|
||||
// parameter sets how many 500 errors the lock monitor will tolerate before
|
||||
// giving up the lock.
|
||||
func (c *LockCommand) setupLock(client *api.Client, prefix, name string,
|
||||
oneshot bool, wait time.Duration, retry int) (*LockUnlock, error) {
|
||||
// Use the DefaultSemaphoreKey extension, this way if a lock and
|
||||
// semaphore are both used at the same prefix, we will get a conflict
|
||||
// which we can report to the user.
|
||||
|
@ -250,8 +271,9 @@ func (c *LockCommand) setupLock(client *api.Client, prefix, name string, oneshot
|
|||
c.Ui.Info(fmt.Sprintf("Setting up lock at path: %s", key))
|
||||
}
|
||||
opts := api.LockOptions{
|
||||
Key: key,
|
||||
SessionName: name,
|
||||
Key: key,
|
||||
SessionName: name,
|
||||
MonitorRetries: retry,
|
||||
}
|
||||
if oneshot {
|
||||
opts.LockTryOnce = true
|
||||
|
@ -266,21 +288,26 @@ func (c *LockCommand) setupLock(client *api.Client, prefix, name string, oneshot
|
|||
unlockFn: l.Unlock,
|
||||
cleanupFn: l.Destroy,
|
||||
inUseErr: api.ErrLockInUse,
|
||||
rawOpts: &opts,
|
||||
}
|
||||
return lu, nil
|
||||
}
|
||||
|
||||
// setupSemaphore is used to setup a new Semaphore given the API client, key
|
||||
// prefix, session name, and slot holder limit. If oneshot is true then we will
|
||||
// set up for a single attempt at acquisition, using the given wait time.
|
||||
func (c *LockCommand) setupSemaphore(client *api.Client, limit int, prefix, name string, oneshot bool, wait time.Duration) (*LockUnlock, error) {
|
||||
// set up for a single attempt at acquisition, using the given wait time. The
|
||||
// retry parameter sets how many 500 errors the lock monitor will tolerate
|
||||
// before giving up the semaphore.
|
||||
func (c *LockCommand) setupSemaphore(client *api.Client, limit int, prefix, name string,
|
||||
oneshot bool, wait time.Duration, retry int) (*LockUnlock, error) {
|
||||
if c.verbose {
|
||||
c.Ui.Info(fmt.Sprintf("Setting up semaphore (limit %d) at prefix: %s", limit, prefix))
|
||||
}
|
||||
opts := api.SemaphoreOptions{
|
||||
Prefix: prefix,
|
||||
Limit: limit,
|
||||
SessionName: name,
|
||||
Prefix: prefix,
|
||||
Limit: limit,
|
||||
SessionName: name,
|
||||
MonitorRetries: retry,
|
||||
}
|
||||
if oneshot {
|
||||
opts.SemaphoreTryOnce = true
|
||||
|
@ -295,6 +322,7 @@ func (c *LockCommand) setupSemaphore(client *api.Client, limit int, prefix, name
|
|||
unlockFn: s.Release,
|
||||
cleanupFn: s.Destroy,
|
||||
inUseErr: api.ErrSemaphoreInUse,
|
||||
rawOpts: &opts,
|
||||
}
|
||||
return lu, nil
|
||||
}
|
||||
|
@ -408,4 +436,5 @@ type LockUnlock struct {
|
|||
unlockFn func() error
|
||||
cleanupFn func() error
|
||||
inUseErr error
|
||||
rawOpts interface{}
|
||||
}
|
||||
|
|
|
@ -5,9 +5,10 @@ import (
|
|||
"io/ioutil"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/consul/api"
|
||||
"github.com/mitchellh/cli"
|
||||
)
|
||||
|
||||
|
@ -15,17 +16,22 @@ func TestLockCommand_implements(t *testing.T) {
|
|||
var _ cli.Command = &LockCommand{}
|
||||
}
|
||||
|
||||
func TestLockCommand_BadArgs(t *testing.T) {
|
||||
func argFail(t *testing.T, args []string, expected string) {
|
||||
ui := new(cli.MockUi)
|
||||
c := &LockCommand{Ui: ui}
|
||||
|
||||
if code := c.Run([]string{"-try=blah"}); code != 1 {
|
||||
if code := c.Run(args); code != 1 {
|
||||
t.Fatalf("expected return code 1, got %d", code)
|
||||
}
|
||||
|
||||
if code := c.Run([]string{"-try=-10s"}); code != 1 {
|
||||
t.Fatalf("expected return code 1, got %d", code)
|
||||
if reason := ui.ErrorWriter.String(); !strings.Contains(reason, expected) {
|
||||
t.Fatalf("bad reason: got='%s', expected='%s'", reason, expected)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestLockCommand_BadArgs(t *testing.T) {
|
||||
argFail(t, []string{"-try=blah", "test/prefix", "date"}, "parsing duration")
|
||||
argFail(t, []string{"-try=-10s", "test/prefix", "date"}, "must be positive")
|
||||
argFail(t, []string{"-monitor-retry=-5", "test/prefix", "date"}, "must be >= 0")
|
||||
}
|
||||
|
||||
func TestLockCommand_Run(t *testing.T) {
|
||||
|
@ -51,68 +57,130 @@ func TestLockCommand_Run(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func runTry(t *testing.T, n int) {
|
||||
func TestLockCommand_Try_Lock(t *testing.T) {
|
||||
a1 := testAgent(t)
|
||||
defer a1.Shutdown()
|
||||
waitForLeader(t, a1.httpAddr)
|
||||
|
||||
// Define a long-running command.
|
||||
nArg := fmt.Sprintf("-n=%d", n)
|
||||
args := []string{"-http-addr=" + a1.httpAddr, nArg, "-try=250ms", "test/prefix", "sleep 2"}
|
||||
ui := new(cli.MockUi)
|
||||
c := &LockCommand{Ui: ui}
|
||||
filePath := filepath.Join(a1.dir, "test_touch")
|
||||
touchCmd := fmt.Sprintf("touch '%s'", filePath)
|
||||
args := []string{"-http-addr=" + a1.httpAddr, "-try=10s", "test/prefix", touchCmd}
|
||||
|
||||
// Run several commands at once.
|
||||
var wg sync.WaitGroup
|
||||
locked := make([]bool, n+1)
|
||||
tried := make([]bool, n+1)
|
||||
for i := 0; i < n+1; i++ {
|
||||
wg.Add(1)
|
||||
go func(index int) {
|
||||
ui := new(cli.MockUi)
|
||||
c := &LockCommand{Ui: ui}
|
||||
|
||||
code := c.Run(append([]string{"-try=250ms"}, args...))
|
||||
if code == 0 {
|
||||
locked[index] = true
|
||||
} else {
|
||||
reason := ui.ErrorWriter.String()
|
||||
if !strings.Contains(reason, "Shutdown triggered or timeout during lock acquisition") {
|
||||
t.Fatalf("bad reason: %s", reason)
|
||||
}
|
||||
tried[index] = true
|
||||
}
|
||||
wg.Done()
|
||||
}(i)
|
||||
// Run the command.
|
||||
var lu *LockUnlock
|
||||
code := c.run(args, &lu)
|
||||
if code != 0 {
|
||||
t.Fatalf("bad: %d. %#v", code, ui.ErrorWriter.String())
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
// Tally up the outcomes.
|
||||
totalLocked := 0
|
||||
totalTried := 0
|
||||
for i := 0; i < n+1; i++ {
|
||||
if locked[i] == tried[i] {
|
||||
t.Fatalf("command %d didn't lock or try, or did both", i+1)
|
||||
}
|
||||
if locked[i] {
|
||||
totalLocked++
|
||||
}
|
||||
if tried[i] {
|
||||
totalTried++
|
||||
}
|
||||
_, err := ioutil.ReadFile(filePath)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// We can't check exact counts because sometimes the try attempts may
|
||||
// fail because they get woken up but need to do another try, but we
|
||||
// should get one of each outcome.
|
||||
if totalLocked == 0 || totalTried == 0 {
|
||||
t.Fatalf("unexpected outcome: locked=%d, tried=%d", totalLocked, totalTried)
|
||||
// Make sure the try options were set correctly.
|
||||
opts, ok := lu.rawOpts.(*api.LockOptions)
|
||||
if !ok {
|
||||
t.Fatalf("bad type")
|
||||
}
|
||||
if !opts.LockTryOnce || opts.LockWaitTime != 10*time.Second {
|
||||
t.Fatalf("bad: %#v", opts)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLockCommand_Try_Lock(t *testing.T) {
|
||||
runTry(t, 1)
|
||||
}
|
||||
|
||||
func TestLockCommand_Try_Semaphore(t *testing.T) {
|
||||
runTry(t, 2)
|
||||
runTry(t, 3)
|
||||
a1 := testAgent(t)
|
||||
defer a1.Shutdown()
|
||||
waitForLeader(t, a1.httpAddr)
|
||||
|
||||
ui := new(cli.MockUi)
|
||||
c := &LockCommand{Ui: ui}
|
||||
filePath := filepath.Join(a1.dir, "test_touch")
|
||||
touchCmd := fmt.Sprintf("touch '%s'", filePath)
|
||||
args := []string{"-http-addr=" + a1.httpAddr, "-n=3", "-try=10s", "test/prefix", touchCmd}
|
||||
|
||||
// Run the command.
|
||||
var lu *LockUnlock
|
||||
code := c.run(args, &lu)
|
||||
if code != 0 {
|
||||
t.Fatalf("bad: %d. %#v", code, ui.ErrorWriter.String())
|
||||
}
|
||||
_, err := ioutil.ReadFile(filePath)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Make sure the try options were set correctly.
|
||||
opts, ok := lu.rawOpts.(*api.SemaphoreOptions)
|
||||
if !ok {
|
||||
t.Fatalf("bad type")
|
||||
}
|
||||
if !opts.SemaphoreTryOnce || opts.SemaphoreWaitTime != 10*time.Second {
|
||||
t.Fatalf("bad: %#v", opts)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLockCommand_MonitorRetry_Lock(t *testing.T) {
|
||||
a1 := testAgent(t)
|
||||
defer a1.Shutdown()
|
||||
waitForLeader(t, a1.httpAddr)
|
||||
|
||||
ui := new(cli.MockUi)
|
||||
c := &LockCommand{Ui: ui}
|
||||
filePath := filepath.Join(a1.dir, "test_touch")
|
||||
touchCmd := fmt.Sprintf("touch '%s'", filePath)
|
||||
args := []string{"-http-addr=" + a1.httpAddr, "-monitor-retry=3", "test/prefix", touchCmd}
|
||||
|
||||
// Run the command.
|
||||
var lu *LockUnlock
|
||||
code := c.run(args, &lu)
|
||||
if code != 0 {
|
||||
t.Fatalf("bad: %d. %#v", code, ui.ErrorWriter.String())
|
||||
}
|
||||
_, err := ioutil.ReadFile(filePath)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Make sure the monitor options were set correctly.
|
||||
opts, ok := lu.rawOpts.(*api.LockOptions)
|
||||
if !ok {
|
||||
t.Fatalf("bad type")
|
||||
}
|
||||
if opts.MonitorRetries != 3 {
|
||||
t.Fatalf("bad: %d", opts.MonitorRetries)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLockCommand_MonitorRetry_Semaphore(t *testing.T) {
|
||||
a1 := testAgent(t)
|
||||
defer a1.Shutdown()
|
||||
waitForLeader(t, a1.httpAddr)
|
||||
|
||||
ui := new(cli.MockUi)
|
||||
c := &LockCommand{Ui: ui}
|
||||
filePath := filepath.Join(a1.dir, "test_touch")
|
||||
touchCmd := fmt.Sprintf("touch '%s'", filePath)
|
||||
args := []string{"-http-addr=" + a1.httpAddr, "-n=3", "-monitor-retry=3", "test/prefix", touchCmd}
|
||||
|
||||
// Run the command.
|
||||
var lu *LockUnlock
|
||||
code := c.run(args, &lu)
|
||||
if code != 0 {
|
||||
t.Fatalf("bad: %d. %#v", code, ui.ErrorWriter.String())
|
||||
}
|
||||
_, err := ioutil.ReadFile(filePath)
|
||||
if err != nil {
|
||||
t.Fatalf("err: %v", err)
|
||||
}
|
||||
|
||||
// Make sure the monitor options were set correctly.
|
||||
opts, ok := lu.rawOpts.(*api.SemaphoreOptions)
|
||||
if !ok {
|
||||
t.Fatalf("bad type")
|
||||
}
|
||||
if opts.MonitorRetries != 3 {
|
||||
t.Fatalf("bad: %d", opts.MonitorRetries)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -67,5 +67,10 @@ The list of available flags are:
|
|||
unit suffix, such as "500ms". Valid time units are "ns", "us" (or "µs"), "ms",
|
||||
"s", "m", "h".
|
||||
|
||||
* `-monitor-retry` - Retry up to this number of times if Consul returns a 500 error
|
||||
while monitoring the lock. This allows riding out brief periods of unavailability
|
||||
without causing leader elections, but increases the amount of time required
|
||||
to detect a lost lock in some cases. Defaults to 0.
|
||||
|
||||
* `-verbose` - Enables verbose output.
|
||||
|
||||
|
|
Loading…
Reference in New Issue