From 4afeddacc8b81d42361899ce72122394685cce24 Mon Sep 17 00:00:00 2001 From: James Phillips Date: Tue, 5 Jan 2016 17:59:58 -0800 Subject: [PATCH] Adds monitor retries to the consul lock command. --- command/lock.go | 61 ++++-- command/lock_test.go | 186 ++++++++++++------ .../source/docs/commands/lock.html.markdown | 5 + 3 files changed, 177 insertions(+), 75 deletions(-) diff --git a/command/lock.go b/command/lock.go index fcbf4bfe87..55ec9c5cc0 100644 --- a/command/lock.go +++ b/command/lock.go @@ -69,17 +69,29 @@ Options: -pass-stdin Pass stdin to child process. -try=duration Make a single attempt to acquire the lock, waiting up to the given duration (eg. "15s"). + -monitor-retry=n Retry up to n times if Consul returns a 500 error + while monitoring the lock. This allows riding out brief + periods of unavailability without causing leader + elections, but increases the amount of time required + to detect a lost lock in some cases. Defaults to 0. -verbose Enables verbose output ` return strings.TrimSpace(helpText) } func (c *LockCommand) Run(args []string) int { + var lu *LockUnlock + return c.run(args, &lu) +} + +// run exposes the underlying lock for testing. +func (c *LockCommand) run(args []string, lu **LockUnlock) int { var childDone chan struct{} var name, token string var limit int var passStdin bool var try string + var retry int cmdFlags := flag.NewFlagSet("watch", flag.ContinueOnError) cmdFlags.Usage = func() { c.Ui.Output(c.Help()) } cmdFlags.IntVar(&limit, "n", 1, "") @@ -87,6 +99,7 @@ func (c *LockCommand) Run(args []string) int { cmdFlags.StringVar(&token, "token", "", "") cmdFlags.BoolVar(&passStdin, "pass-stdin", false, "") cmdFlags.StringVar(&try, "try", "", "") + cmdFlags.IntVar(&retry, "monitor-retry", 0, "") cmdFlags.BoolVar(&c.verbose, "verbose", false, "") httpAddr := HTTPAddrFlag(cmdFlags) if err := cmdFlags.Parse(args); err != nil { @@ -135,6 +148,12 @@ func (c *LockCommand) Run(args []string) int { oneshot = true } + // Check the retry parameter + if retry < 0 { + c.Ui.Error("Number for 'monitor-retry' must be >= 0") + return 1 + } + // Create and test the HTTP client conf := api.DefaultConfig() conf.Address = *httpAddr @@ -151,11 +170,10 @@ func (c *LockCommand) Run(args []string) int { } // Setup the lock or semaphore - var lu *LockUnlock if limit == 1 { - lu, err = c.setupLock(client, prefix, name, oneshot, wait) + *lu, err = c.setupLock(client, prefix, name, oneshot, wait, retry) } else { - lu, err = c.setupSemaphore(client, limit, prefix, name, oneshot, wait) + *lu, err = c.setupSemaphore(client, limit, prefix, name, oneshot, wait, retry) } if err != nil { c.Ui.Error(fmt.Sprintf("Lock setup failed: %s", err)) @@ -166,7 +184,7 @@ func (c *LockCommand) Run(args []string) int { if c.verbose { c.Ui.Info("Attempting lock acquisition") } - lockCh, err := lu.lockFn(c.ShutdownCh) + lockCh, err := (*lu).lockFn(c.ShutdownCh) if lockCh == nil { if err == nil { c.Ui.Error("Shutdown triggered or timeout during lock acquisition") @@ -219,14 +237,14 @@ func (c *LockCommand) Run(args []string) int { RELEASE: // Release the lock before termination - if err := lu.unlockFn(); err != nil { + if err := (*lu).unlockFn(); err != nil { c.Ui.Error(fmt.Sprintf("Lock release failed: %s", err)) return 1 } // Cleanup the lock if no longer in use - if err := lu.cleanupFn(); err != nil { - if err != lu.inUseErr { + if err := (*lu).cleanupFn(); err != nil { + if err != (*lu).inUseErr { c.Ui.Error(fmt.Sprintf("Lock cleanup failed: %s", err)) return 1 } else if c.verbose { @@ -240,8 +258,11 @@ RELEASE: // setupLock is used to setup a new Lock given the API client, the key prefix to // operate on, and an optional session name. If oneshot is true then we will set -// up for a single attempt at acquisition, using the given wait time. -func (c *LockCommand) setupLock(client *api.Client, prefix, name string, oneshot bool, wait time.Duration) (*LockUnlock, error) { +// up for a single attempt at acquisition, using the given wait time. The retry +// parameter sets how many 500 errors the lock monitor will tolerate before +// giving up the lock. +func (c *LockCommand) setupLock(client *api.Client, prefix, name string, + oneshot bool, wait time.Duration, retry int) (*LockUnlock, error) { // Use the DefaultSemaphoreKey extension, this way if a lock and // semaphore are both used at the same prefix, we will get a conflict // which we can report to the user. @@ -250,8 +271,9 @@ func (c *LockCommand) setupLock(client *api.Client, prefix, name string, oneshot c.Ui.Info(fmt.Sprintf("Setting up lock at path: %s", key)) } opts := api.LockOptions{ - Key: key, - SessionName: name, + Key: key, + SessionName: name, + MonitorRetries: retry, } if oneshot { opts.LockTryOnce = true @@ -266,21 +288,26 @@ func (c *LockCommand) setupLock(client *api.Client, prefix, name string, oneshot unlockFn: l.Unlock, cleanupFn: l.Destroy, inUseErr: api.ErrLockInUse, + rawOpts: &opts, } return lu, nil } // setupSemaphore is used to setup a new Semaphore given the API client, key // prefix, session name, and slot holder limit. If oneshot is true then we will -// set up for a single attempt at acquisition, using the given wait time. -func (c *LockCommand) setupSemaphore(client *api.Client, limit int, prefix, name string, oneshot bool, wait time.Duration) (*LockUnlock, error) { +// set up for a single attempt at acquisition, using the given wait time. The +// retry parameter sets how many 500 errors the lock monitor will tolerate +// before giving up the semaphore. +func (c *LockCommand) setupSemaphore(client *api.Client, limit int, prefix, name string, + oneshot bool, wait time.Duration, retry int) (*LockUnlock, error) { if c.verbose { c.Ui.Info(fmt.Sprintf("Setting up semaphore (limit %d) at prefix: %s", limit, prefix)) } opts := api.SemaphoreOptions{ - Prefix: prefix, - Limit: limit, - SessionName: name, + Prefix: prefix, + Limit: limit, + SessionName: name, + MonitorRetries: retry, } if oneshot { opts.SemaphoreTryOnce = true @@ -295,6 +322,7 @@ func (c *LockCommand) setupSemaphore(client *api.Client, limit int, prefix, name unlockFn: s.Release, cleanupFn: s.Destroy, inUseErr: api.ErrSemaphoreInUse, + rawOpts: &opts, } return lu, nil } @@ -408,4 +436,5 @@ type LockUnlock struct { unlockFn func() error cleanupFn func() error inUseErr error + rawOpts interface{} } diff --git a/command/lock_test.go b/command/lock_test.go index 479ad7d5b8..f5f84ef9dd 100644 --- a/command/lock_test.go +++ b/command/lock_test.go @@ -5,9 +5,10 @@ import ( "io/ioutil" "path/filepath" "strings" - "sync" "testing" + "time" + "github.com/hashicorp/consul/api" "github.com/mitchellh/cli" ) @@ -15,17 +16,22 @@ func TestLockCommand_implements(t *testing.T) { var _ cli.Command = &LockCommand{} } -func TestLockCommand_BadArgs(t *testing.T) { +func argFail(t *testing.T, args []string, expected string) { ui := new(cli.MockUi) c := &LockCommand{Ui: ui} - - if code := c.Run([]string{"-try=blah"}); code != 1 { + if code := c.Run(args); code != 1 { t.Fatalf("expected return code 1, got %d", code) } - - if code := c.Run([]string{"-try=-10s"}); code != 1 { - t.Fatalf("expected return code 1, got %d", code) + if reason := ui.ErrorWriter.String(); !strings.Contains(reason, expected) { + t.Fatalf("bad reason: got='%s', expected='%s'", reason, expected) } + +} + +func TestLockCommand_BadArgs(t *testing.T) { + argFail(t, []string{"-try=blah", "test/prefix", "date"}, "parsing duration") + argFail(t, []string{"-try=-10s", "test/prefix", "date"}, "must be positive") + argFail(t, []string{"-monitor-retry=-5", "test/prefix", "date"}, "must be >= 0") } func TestLockCommand_Run(t *testing.T) { @@ -51,68 +57,130 @@ func TestLockCommand_Run(t *testing.T) { } } -func runTry(t *testing.T, n int) { +func TestLockCommand_Try_Lock(t *testing.T) { a1 := testAgent(t) defer a1.Shutdown() waitForLeader(t, a1.httpAddr) - // Define a long-running command. - nArg := fmt.Sprintf("-n=%d", n) - args := []string{"-http-addr=" + a1.httpAddr, nArg, "-try=250ms", "test/prefix", "sleep 2"} + ui := new(cli.MockUi) + c := &LockCommand{Ui: ui} + filePath := filepath.Join(a1.dir, "test_touch") + touchCmd := fmt.Sprintf("touch '%s'", filePath) + args := []string{"-http-addr=" + a1.httpAddr, "-try=10s", "test/prefix", touchCmd} - // Run several commands at once. - var wg sync.WaitGroup - locked := make([]bool, n+1) - tried := make([]bool, n+1) - for i := 0; i < n+1; i++ { - wg.Add(1) - go func(index int) { - ui := new(cli.MockUi) - c := &LockCommand{Ui: ui} - - code := c.Run(append([]string{"-try=250ms"}, args...)) - if code == 0 { - locked[index] = true - } else { - reason := ui.ErrorWriter.String() - if !strings.Contains(reason, "Shutdown triggered or timeout during lock acquisition") { - t.Fatalf("bad reason: %s", reason) - } - tried[index] = true - } - wg.Done() - }(i) + // Run the command. + var lu *LockUnlock + code := c.run(args, &lu) + if code != 0 { + t.Fatalf("bad: %d. %#v", code, ui.ErrorWriter.String()) } - wg.Wait() - - // Tally up the outcomes. - totalLocked := 0 - totalTried := 0 - for i := 0; i < n+1; i++ { - if locked[i] == tried[i] { - t.Fatalf("command %d didn't lock or try, or did both", i+1) - } - if locked[i] { - totalLocked++ - } - if tried[i] { - totalTried++ - } + _, err := ioutil.ReadFile(filePath) + if err != nil { + t.Fatalf("err: %v", err) } - // We can't check exact counts because sometimes the try attempts may - // fail because they get woken up but need to do another try, but we - // should get one of each outcome. - if totalLocked == 0 || totalTried == 0 { - t.Fatalf("unexpected outcome: locked=%d, tried=%d", totalLocked, totalTried) + // Make sure the try options were set correctly. + opts, ok := lu.rawOpts.(*api.LockOptions) + if !ok { + t.Fatalf("bad type") + } + if !opts.LockTryOnce || opts.LockWaitTime != 10*time.Second { + t.Fatalf("bad: %#v", opts) } -} - -func TestLockCommand_Try_Lock(t *testing.T) { - runTry(t, 1) } func TestLockCommand_Try_Semaphore(t *testing.T) { - runTry(t, 2) - runTry(t, 3) + a1 := testAgent(t) + defer a1.Shutdown() + waitForLeader(t, a1.httpAddr) + + ui := new(cli.MockUi) + c := &LockCommand{Ui: ui} + filePath := filepath.Join(a1.dir, "test_touch") + touchCmd := fmt.Sprintf("touch '%s'", filePath) + args := []string{"-http-addr=" + a1.httpAddr, "-n=3", "-try=10s", "test/prefix", touchCmd} + + // Run the command. + var lu *LockUnlock + code := c.run(args, &lu) + if code != 0 { + t.Fatalf("bad: %d. %#v", code, ui.ErrorWriter.String()) + } + _, err := ioutil.ReadFile(filePath) + if err != nil { + t.Fatalf("err: %v", err) + } + + // Make sure the try options were set correctly. + opts, ok := lu.rawOpts.(*api.SemaphoreOptions) + if !ok { + t.Fatalf("bad type") + } + if !opts.SemaphoreTryOnce || opts.SemaphoreWaitTime != 10*time.Second { + t.Fatalf("bad: %#v", opts) + } +} + +func TestLockCommand_MonitorRetry_Lock(t *testing.T) { + a1 := testAgent(t) + defer a1.Shutdown() + waitForLeader(t, a1.httpAddr) + + ui := new(cli.MockUi) + c := &LockCommand{Ui: ui} + filePath := filepath.Join(a1.dir, "test_touch") + touchCmd := fmt.Sprintf("touch '%s'", filePath) + args := []string{"-http-addr=" + a1.httpAddr, "-monitor-retry=3", "test/prefix", touchCmd} + + // Run the command. + var lu *LockUnlock + code := c.run(args, &lu) + if code != 0 { + t.Fatalf("bad: %d. %#v", code, ui.ErrorWriter.String()) + } + _, err := ioutil.ReadFile(filePath) + if err != nil { + t.Fatalf("err: %v", err) + } + + // Make sure the monitor options were set correctly. + opts, ok := lu.rawOpts.(*api.LockOptions) + if !ok { + t.Fatalf("bad type") + } + if opts.MonitorRetries != 3 { + t.Fatalf("bad: %d", opts.MonitorRetries) + } +} + +func TestLockCommand_MonitorRetry_Semaphore(t *testing.T) { + a1 := testAgent(t) + defer a1.Shutdown() + waitForLeader(t, a1.httpAddr) + + ui := new(cli.MockUi) + c := &LockCommand{Ui: ui} + filePath := filepath.Join(a1.dir, "test_touch") + touchCmd := fmt.Sprintf("touch '%s'", filePath) + args := []string{"-http-addr=" + a1.httpAddr, "-n=3", "-monitor-retry=3", "test/prefix", touchCmd} + + // Run the command. + var lu *LockUnlock + code := c.run(args, &lu) + if code != 0 { + t.Fatalf("bad: %d. %#v", code, ui.ErrorWriter.String()) + } + _, err := ioutil.ReadFile(filePath) + if err != nil { + t.Fatalf("err: %v", err) + } + + // Make sure the monitor options were set correctly. + opts, ok := lu.rawOpts.(*api.SemaphoreOptions) + if !ok { + t.Fatalf("bad type") + } + if opts.MonitorRetries != 3 { + t.Fatalf("bad: %d", opts.MonitorRetries) + } } diff --git a/website/source/docs/commands/lock.html.markdown b/website/source/docs/commands/lock.html.markdown index a5b63bf728..fe6f49f219 100644 --- a/website/source/docs/commands/lock.html.markdown +++ b/website/source/docs/commands/lock.html.markdown @@ -67,5 +67,10 @@ The list of available flags are: unit suffix, such as "500ms". Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". +* `-monitor-retry` - Retry up to this number of times if Consul returns a 500 error + while monitoring the lock. This allows riding out brief periods of unavailability + without causing leader elections, but increases the amount of time required + to detect a lost lock in some cases. Defaults to 0. + * `-verbose` - Enables verbose output.