Browse Source

retry locks on network errors (#3553)

* retry locks on network errors

When communicating with a local agent and watching a lock, a dropped
connection between the agent and server will show up as a server error
and immediately be retried. However if the client is connected to a
remote server, a dropped connection immediately aborts the lock.

* Updates comment about it being unsafe for writes.
pull/3573/head
James Bardin 7 years ago committed by James Phillips
parent
commit
fea32fb7d7
  1. 13
      api/api.go
  2. 19
      api/api_test.go
  3. 2
      api/lock.go
  4. 2
      api/semaphore.go

13
api/api.go

@ -555,13 +555,20 @@ func durToMsec(dur time.Duration) string {
// serverError is a string we look for to detect 500 errors. // serverError is a string we look for to detect 500 errors.
const serverError = "Unexpected response code: 500" const serverError = "Unexpected response code: 500"
// IsServerError returns true for 500 errors from the Consul servers, these are // IsRetryableError returns true for 500 errors from the Consul servers, and
// usually retryable at a later time. // network connection errors. These are usually retryable at a later time.
func IsServerError(err error) bool { // This applies to reads but NOT to writes. This may return true for errors
// on writes that may have still gone through, so do not use this to retry
// any write operations.
func IsRetryableError(err error) bool {
if err == nil { if err == nil {
return false return false
} }
if _, ok := err.(net.Error); ok {
return true
}
// TODO (slackpad) - Make a real error type here instead of using // TODO (slackpad) - Make a real error type here instead of using
// a string check. // a string check.
return strings.Contains(err.Error(), serverError) return strings.Contains(err.Error(), serverError)

19
api/api_test.go

@ -4,6 +4,7 @@ import (
crand "crypto/rand" crand "crypto/rand"
"crypto/tls" "crypto/tls"
"fmt" "fmt"
"net"
"net/http" "net/http"
"os" "os"
"path/filepath" "path/filepath"
@ -529,17 +530,21 @@ func TestAPI_durToMsec(t *testing.T) {
} }
} }
func TestAPI_IsServerError(t *testing.T) { func TestAPI_IsRetryableError(t *testing.T) {
t.Parallel() t.Parallel()
if IsServerError(nil) { if IsRetryableError(nil) {
t.Fatalf("should not be a server error") t.Fatal("should not be a retryable error")
} }
if IsServerError(fmt.Errorf("not the error you are looking for")) { if IsRetryableError(fmt.Errorf("not the error you are looking for")) {
t.Fatalf("should not be a server error") t.Fatal("should not be a retryable error")
} }
if !IsServerError(fmt.Errorf(serverError)) { if !IsRetryableError(fmt.Errorf(serverError)) {
t.Fatalf("should be a server error") t.Fatal("should be a retryable error")
}
if !IsRetryableError(&net.OpError{Err: fmt.Errorf("network conn error")}) {
t.Fatal("should be a retryable error")
} }
} }

2
api/lock.go

@ -370,7 +370,7 @@ RETRY:
// by doing retries. Note that we have to attempt the retry in a non- // by doing retries. Note that we have to attempt the retry in a non-
// blocking fashion so that we have a clean place to reset the retry // blocking fashion so that we have a clean place to reset the retry
// counter if service is restored. // counter if service is restored.
if retries > 0 && IsServerError(err) { if retries > 0 && IsRetryableError(err) {
time.Sleep(l.opts.MonitorRetryTime) time.Sleep(l.opts.MonitorRetryTime)
retries-- retries--
opts.WaitIndex = 0 opts.WaitIndex = 0

2
api/semaphore.go

@ -492,7 +492,7 @@ RETRY:
// by doing retries. Note that we have to attempt the retry in a non- // by doing retries. Note that we have to attempt the retry in a non-
// blocking fashion so that we have a clean place to reset the retry // blocking fashion so that we have a clean place to reset the retry
// counter if service is restored. // counter if service is restored.
if retries > 0 && IsServerError(err) { if retries > 0 && IsRetryableError(err) {
time.Sleep(s.opts.MonitorRetryTime) time.Sleep(s.opts.MonitorRetryTime)
retries-- retries--
opts.WaitIndex = 0 opts.WaitIndex = 0

Loading…
Cancel
Save