mirror of https://github.com/hashicorp/consul
retry locks on network errors (#3553)
* retry locks on network errors When communicating with a local agent and watching a lock, a dropped connection between the agent and server will show up as a server error and immediately be retried. However if the client is connected to a remote server, a dropped connection immediately aborts the lock. * Updates comment about it being unsafe for writes.pull/3573/head
parent
2e229617ce
commit
fea32fb7d7
13
api/api.go
13
api/api.go
|
@ -555,13 +555,20 @@ func durToMsec(dur time.Duration) string {
|
||||||
// serverError is a string we look for to detect 500 errors.
|
// serverError is a string we look for to detect 500 errors.
|
||||||
const serverError = "Unexpected response code: 500"
|
const serverError = "Unexpected response code: 500"
|
||||||
|
|
||||||
// IsServerError returns true for 500 errors from the Consul servers, these are
|
// IsRetryableError returns true for 500 errors from the Consul servers, and
|
||||||
// usually retryable at a later time.
|
// network connection errors. These are usually retryable at a later time.
|
||||||
func IsServerError(err error) bool {
|
// This applies to reads but NOT to writes. This may return true for errors
|
||||||
|
// on writes that may have still gone through, so do not use this to retry
|
||||||
|
// any write operations.
|
||||||
|
func IsRetryableError(err error) bool {
|
||||||
if err == nil {
|
if err == nil {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if _, ok := err.(net.Error); ok {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
// TODO (slackpad) - Make a real error type here instead of using
|
// TODO (slackpad) - Make a real error type here instead of using
|
||||||
// a string check.
|
// a string check.
|
||||||
return strings.Contains(err.Error(), serverError)
|
return strings.Contains(err.Error(), serverError)
|
||||||
|
|
|
@ -4,6 +4,7 @@ import (
|
||||||
crand "crypto/rand"
|
crand "crypto/rand"
|
||||||
"crypto/tls"
|
"crypto/tls"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
@ -529,17 +530,21 @@ func TestAPI_durToMsec(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAPI_IsServerError(t *testing.T) {
|
func TestAPI_IsRetryableError(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
if IsServerError(nil) {
|
if IsRetryableError(nil) {
|
||||||
t.Fatalf("should not be a server error")
|
t.Fatal("should not be a retryable error")
|
||||||
}
|
}
|
||||||
|
|
||||||
if IsServerError(fmt.Errorf("not the error you are looking for")) {
|
if IsRetryableError(fmt.Errorf("not the error you are looking for")) {
|
||||||
t.Fatalf("should not be a server error")
|
t.Fatal("should not be a retryable error")
|
||||||
}
|
}
|
||||||
|
|
||||||
if !IsServerError(fmt.Errorf(serverError)) {
|
if !IsRetryableError(fmt.Errorf(serverError)) {
|
||||||
t.Fatalf("should be a server error")
|
t.Fatal("should be a retryable error")
|
||||||
|
}
|
||||||
|
|
||||||
|
if !IsRetryableError(&net.OpError{Err: fmt.Errorf("network conn error")}) {
|
||||||
|
t.Fatal("should be a retryable error")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -370,7 +370,7 @@ RETRY:
|
||||||
// by doing retries. Note that we have to attempt the retry in a non-
|
// by doing retries. Note that we have to attempt the retry in a non-
|
||||||
// blocking fashion so that we have a clean place to reset the retry
|
// blocking fashion so that we have a clean place to reset the retry
|
||||||
// counter if service is restored.
|
// counter if service is restored.
|
||||||
if retries > 0 && IsServerError(err) {
|
if retries > 0 && IsRetryableError(err) {
|
||||||
time.Sleep(l.opts.MonitorRetryTime)
|
time.Sleep(l.opts.MonitorRetryTime)
|
||||||
retries--
|
retries--
|
||||||
opts.WaitIndex = 0
|
opts.WaitIndex = 0
|
||||||
|
|
|
@ -492,7 +492,7 @@ RETRY:
|
||||||
// by doing retries. Note that we have to attempt the retry in a non-
|
// by doing retries. Note that we have to attempt the retry in a non-
|
||||||
// blocking fashion so that we have a clean place to reset the retry
|
// blocking fashion so that we have a clean place to reset the retry
|
||||||
// counter if service is restored.
|
// counter if service is restored.
|
||||||
if retries > 0 && IsServerError(err) {
|
if retries > 0 && IsRetryableError(err) {
|
||||||
time.Sleep(s.opts.MonitorRetryTime)
|
time.Sleep(s.opts.MonitorRetryTime)
|
||||||
retries--
|
retries--
|
||||||
opts.WaitIndex = 0
|
opts.WaitIndex = 0
|
||||||
|
|
Loading…
Reference in New Issue