diff --git a/command/agent/agent.go b/command/agent/agent.go index a0bd63539a..9de70594c5 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -721,6 +721,7 @@ func (a *Agent) AddCheck(check *structs.HealthCheck, chkType *CheckType, persist CheckID: check.CheckID, HTTP: chkType.HTTP, Interval: chkType.Interval, + Timeout: chkType.Timeout, Logger: a.logger, } http.Start() diff --git a/command/agent/check.go b/command/agent/check.go index 71aa0eba0e..352c986fe3 100644 --- a/command/agent/check.go +++ b/command/agent/check.go @@ -36,7 +36,8 @@ type CheckType struct { HTTP string Interval time.Duration - TTL time.Duration + Timeout time.Duration + TTL time.Duration Notes string } @@ -269,6 +270,7 @@ type CheckHTTP struct { CheckID string HTTP string Interval time.Duration + Timeout time.Duration Logger *log.Logger httpClient *http.Client @@ -287,7 +289,9 @@ func (c *CheckHTTP) Start() { // For long (>10s) interval checks the http timeout is 10s, otherwise the // timeout is the interval. This means that a check *should* return // before the next check begins. - if c.Interval < 10*time.Second { + if c.Timeout > 0 && c.Timeout < c.Interval { + c.httpClient = &http.Client{Timeout: c.Timeout} + } else if c.Interval < 10*time.Second { c.httpClient = &http.Client{Timeout: c.Interval} } else { c.httpClient = &http.Client{Timeout: 10 * time.Second} diff --git a/command/agent/check_test.go b/command/agent/check_test.go index 46746d35a3..b55d5a6580 100644 --- a/command/agent/check_test.go +++ b/command/agent/check_test.go @@ -260,3 +260,48 @@ func TestCheckHTTPWarning(t *testing.T) { expectHTTPStatus(t, server.URL, "warning") server.Close() } + +func mockSlowHTTPServer(responseCode int, sleep time.Duration) *httptest.Server { + mux := http.NewServeMux() + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + time.Sleep(sleep) + w.WriteHeader(responseCode) + return + }) + + return httptest.NewServer(mux) +} + +func TestCheckHTTPTimeout(t *testing.T) { + server := mockSlowHTTPServer(200, 10*time.Millisecond) + defer server.Close() + + mock := &MockNotify{ + state: make(map[string]string), + updates: make(map[string]int), + output: make(map[string]string), + } + + check := &CheckHTTP{ + Notify: mock, + CheckID: "bar", + HTTP: server.URL, + Timeout: 5 * time.Millisecond, + Interval: 10 * time.Millisecond, + Logger: log.New(os.Stderr, "", log.LstdFlags), + } + + check.Start() + defer check.Stop() + + time.Sleep(50 * time.Millisecond) + + // Should have at least 2 updates + if mock.updates["bar"] < 2 { + t.Fatalf("should have at least 2 updates %v", mock.updates) + } + + if mock.state["bar"] != "critical" { + t.Fatalf("should be critical %v", mock.state) + } +} diff --git a/command/agent/config.go b/command/agent/config.go index 208daf5640..6f457aa054 100644 --- a/command/agent/config.go +++ b/command/agent/config.go @@ -644,7 +644,7 @@ AFTER_FIX: } func FixupCheckType(raw interface{}) error { - var ttlKey, intervalKey string + var ttlKey, intervalKey, timeoutKey string // Handle decoding of time durations rawMap, ok := raw.(map[string]interface{}) @@ -658,6 +658,8 @@ func FixupCheckType(raw interface{}) error { ttlKey = k case "interval": intervalKey = k + case "timeout": + timeoutKey = k case "service_id": rawMap["serviceid"] = v delete(rawMap, "service_id") @@ -685,6 +687,18 @@ func FixupCheckType(raw interface{}) error { } } } + + if timeout, ok := rawMap[timeoutKey]; ok { + timeoutS, ok := timeout.(string) + if ok { + if dur, err := time.ParseDuration(timeoutS); err != nil { + return err + } else { + rawMap[timeoutKey] = dur + } + } + } + return nil } diff --git a/command/agent/config_test.go b/command/agent/config_test.go index ee436dc7d0..3bc0f35370 100644 --- a/command/agent/config_test.go +++ b/command/agent/config_test.go @@ -682,6 +682,16 @@ func TestDecodeConfig_Services(t *testing.T) { "interval": "30s", "ttl": "60s" } + }, + { + "id": "es0", + "name": "elasticsearch", + port: "9200", + "check": { + "HTTP": "http://localhost:9200/_cluster/health", + "interval": "10s", + "timeout": "100ms" + } } ] }` @@ -730,6 +740,16 @@ func TestDecodeConfig_Services(t *testing.T) { }, Port: 7000, }, + &ServiceDefinition{ + Check: CheckType{ + HTTP: "http://localhost:9200/_cluster_health", + Interval: 10 * time.Second, + Timeout: 100 * time.Millisecond, + }, + ID: "es0", + Name: "elasticsearch", + Port: 9200, + }, }, } @@ -759,6 +779,14 @@ func TestDecodeConfig_Checks(t *testing.T) { "script": "/bin/check_redis_tx", "interval": "1m", "service_id": "redis" + }, + { + "id": "chk4", + "name": "service:elasticsearch:health", + "HTTP": "http://localhost:9200/_cluster/health", + "interval": "10s", + "timeout": "100ms" + "service_id": "elasticsearch" } ] }` @@ -795,6 +823,16 @@ func TestDecodeConfig_Checks(t *testing.T) { Interval: time.Minute, }, }, + &CheckDefinition{ + ID: "chk4", + Name: "service:elasticsearch:health", + ServiceID: "elasticsearch", + CheckType: CheckType{ + HTTP: "http://localhost:9200/_cluster_health", + Interval: 10 * time.Second, + Timeout: 100 * time.Millisecond, + }, + }, }, }