Pulls in latest Serf to get flap metric.

pull/2099/head
James Phillips 2016-06-07 09:22:41 -07:00
parent b83c4ffa84
commit a8bafda8b6
5 changed files with 19 additions and 6 deletions

9
Godeps/Godeps.json generated
View File

@ -1,6 +1,7 @@
{
"ImportPath": "github.com/hashicorp/consul",
"GoVersion": "go1.6",
"GodepVersion": "v74",
"Deps": [
{
"ImportPath": "github.com/DataDog/datadog-go/statsd",
@ -225,13 +226,13 @@
},
{
"ImportPath": "github.com/hashicorp/serf/coordinate",
"Comment": "v0.7.0-12-ge4ec8cc",
"Rev": "e4ec8cc423bbe20d26584b96efbeb9102e16d05f"
"Comment": "v0.7.0-62-gb60a6d9",
"Rev": "b60a6d928fe726a588f79a1d500582507f9d79de"
},
{
"ImportPath": "github.com/hashicorp/serf/serf",
"Comment": "v0.7.0-12-ge4ec8cc",
"Rev": "e4ec8cc423bbe20d26584b96efbeb9102e16d05f"
"Comment": "v0.7.0-62-gb60a6d9",
"Rev": "b60a6d928fe726a588f79a1d500582507f9d79de"
},
{
"ImportPath": "github.com/hashicorp/yamux",

View File

@ -1 +0,0 @@
# TODO - I'll beef this up as I implement each of the enhancements.

View File

@ -16,7 +16,7 @@ package coordinate
type Config struct {
// The dimensionality of the coordinate system. As discussed in [2], more
// dimensions improves the accuracy of the estimates up to a point. Per [2]
// we chose 4 dimensions plus a non-Euclidean height.
// we chose 8 dimensions plus a non-Euclidean height.
Dimensionality uint
// VivaldiErrorMax is the default error value when a node hasn't yet made

View File

@ -103,6 +103,13 @@ type Config struct {
ReconnectTimeout time.Duration
TombstoneTimeout time.Duration
// FlapTimeout is the amount of time less than which we consider a node
// being failed and rejoining looks like a flap for telemetry purposes.
// This should be set less than a typical reboot time, but large enough
// to see actual events, given our expected detection times for a failed
// node.
FlapTimeout time.Duration
// QueueDepthWarning is used to generate warning message if the
// number of queued messages to broadcast exceeds this number. This
// is to provide the user feedback if events are being triggered
@ -241,6 +248,7 @@ func DefaultConfig() *Config {
QueueDepthWarning: 128,
MaxQueueDepth: 4096,
TombstoneTimeout: 24 * time.Hour,
FlapTimeout: 60 * time.Second,
MemberlistConfig: memberlist.DefaultLANConfig(),
QueryTimeoutMult: 16,
QueryResponseSizeLimit: 1024,

View File

@ -871,6 +871,11 @@ func (s *Serf) handleNodeJoin(n *memberlist.Node) {
s.members[n.Name] = member
} else {
oldStatus = member.Status
deadTime := time.Now().Sub(member.leaveTime)
if oldStatus == StatusFailed && deadTime < s.config.FlapTimeout {
metrics.IncrCounter([]string{"serf", "member", "flap"}, 1)
}
member.Status = StatusAlive
member.leaveTime = time.Time{}
member.Addr = net.IP(n.Addr)