From 9caa5b365329f9c11c8d6e7e93f453948775547a Mon Sep 17 00:00:00 2001 From: James Phillips Date: Mon, 27 Jul 2015 14:41:46 -0700 Subject: [PATCH] Adds distance sorting to health endpoint. Cleans up unit tests. --- command/agent/health_endpoint.go | 3 + command/agent/health_endpoint_test.go | 260 +++++++++++++++++- consul/catalog_endpoint_test.go | 73 ----- consul/health_endpoint.go | 16 +- consul/health_endpoint_test.go | 254 +++++++++++++++++ consul/rtt.go | 82 ++++++ consul/rtt_test.go | 189 +++++++++++-- consul/structs/structs.go | 5 +- .../docs/agent/http/health.html.markdown | 15 + 9 files changed, 793 insertions(+), 104 deletions(-) diff --git a/command/agent/health_endpoint.go b/command/agent/health_endpoint.go index 3ee02ac5e6..f95e4c5e72 100644 --- a/command/agent/health_endpoint.go +++ b/command/agent/health_endpoint.go @@ -9,6 +9,7 @@ import ( func (s *HTTPServer) HealthChecksInState(resp http.ResponseWriter, req *http.Request) (interface{}, error) { // Set default DC args := structs.ChecksInStateRequest{} + s.parseSource(req, &args.Source) if done := s.parse(resp, req, &args.Datacenter, &args.QueryOptions); done { return nil, nil } @@ -57,6 +58,7 @@ func (s *HTTPServer) HealthNodeChecks(resp http.ResponseWriter, req *http.Reques func (s *HTTPServer) HealthServiceChecks(resp http.ResponseWriter, req *http.Request) (interface{}, error) { // Set default DC args := structs.ServiceSpecificRequest{} + s.parseSource(req, &args.Source) if done := s.parse(resp, req, &args.Datacenter, &args.QueryOptions); done { return nil, nil } @@ -81,6 +83,7 @@ func (s *HTTPServer) HealthServiceChecks(resp http.ResponseWriter, req *http.Req func (s *HTTPServer) HealthServiceNodes(resp http.ResponseWriter, req *http.Request) (interface{}, error) { // Set default DC args := structs.ServiceSpecificRequest{} + s.parseSource(req, &args.Source) if done := s.parse(resp, req, &args.Datacenter, &args.QueryOptions); done { return nil, nil } diff --git a/command/agent/health_endpoint_test.go b/command/agent/health_endpoint_test.go index 40ceedb0f3..be3156979c 100644 --- a/command/agent/health_endpoint_test.go +++ b/command/agent/health_endpoint_test.go @@ -2,13 +2,16 @@ package agent import ( "fmt" - "github.com/hashicorp/consul/consul/structs" - "github.com/hashicorp/consul/testutil" "net/http" "net/http/httptest" "os" "reflect" "testing" + "time" + + "github.com/hashicorp/consul/consul/structs" + "github.com/hashicorp/consul/testutil" + "github.com/hashicorp/serf/coordinate" ) func TestHealthChecksInState(t *testing.T) { @@ -38,6 +41,87 @@ func TestHealthChecksInState(t *testing.T) { }) } +func TestHealthChecksInState_DistanceSort(t *testing.T) { + dir, srv := makeHTTPServer(t) + defer os.RemoveAll(dir) + defer srv.Shutdown() + defer srv.agent.Shutdown() + + testutil.WaitForLeader(t, srv.agent.RPC, "dc1") + + args := &structs.RegisterRequest{ + Datacenter: "dc1", + Node: "bar", + Address: "127.0.0.1", + Check: &structs.HealthCheck{ + Node: "bar", + Name: "node check", + Status: structs.HealthCritical, + }, + } + + var out struct{} + if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil { + t.Fatalf("err: %v", err) + } + + args.Node, args.Check.Node = "foo", "foo" + if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil { + t.Fatalf("err: %v", err) + } + + req, err := http.NewRequest("GET", "/v1/health/state/critical?dc=dc1&near=foo", nil) + if err != nil { + t.Fatalf("err: %v", err) + } + + resp := httptest.NewRecorder() + obj, err := srv.HealthChecksInState(resp, req) + if err != nil { + t.Fatalf("err: %v", err) + } + assertIndex(t, resp) + nodes := obj.(structs.HealthChecks) + if len(nodes) != 2 { + t.Fatalf("bad: %v", nodes) + } + if nodes[0].Node != "bar" { + t.Fatalf("bad: %v", nodes) + } + if nodes[1].Node != "foo" { + t.Fatalf("bad: %v", nodes) + } + + // Send an update for the node and wait for it to get applied. + arg := structs.CoordinateUpdateRequest{ + Datacenter: "dc1", + Node: "foo", + Coord: coordinate.NewCoordinate(coordinate.DefaultConfig()), + } + if err := srv.agent.RPC("Coordinate.Update", &arg, &out); err != nil { + t.Fatalf("err: %v", err) + } + time.Sleep(200 * time.Millisecond) + + // Query again and now foo should have moved to the front of the line. + resp = httptest.NewRecorder() + obj, err = srv.HealthChecksInState(resp, req) + if err != nil { + t.Fatalf("err: %v", err) + } + assertIndex(t, resp) + nodes = obj.(structs.HealthChecks) + if len(nodes) != 2 { + t.Fatalf("bad: %v", nodes) + } + if nodes[0].Node != "foo" { + t.Fatalf("bad: %v", nodes) + } + if nodes[1].Node != "bar" { + t.Fatalf("bad: %v", nodes) + } +} + func TestHealthNodeChecks(t *testing.T) { dir, srv := makeHTTPServer(t) defer os.RemoveAll(dir) @@ -110,6 +194,92 @@ func TestHealthServiceChecks(t *testing.T) { } } +func TestHealthServiceChecks_DistanceSort(t *testing.T) { + dir, srv := makeHTTPServer(t) + defer os.RemoveAll(dir) + defer srv.Shutdown() + defer srv.agent.Shutdown() + + testutil.WaitForLeader(t, srv.agent.RPC, "dc1") + + // Create a service check + args := &structs.RegisterRequest{ + Datacenter: "dc1", + Node: "bar", + Address: "127.0.0.1", + Service: &structs.NodeService{ + ID: "test", + Service: "test", + }, + Check: &structs.HealthCheck{ + Node: "bar", + Name: "test check", + ServiceID: "test", + }, + } + + var out struct{} + if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil { + t.Fatalf("err: %v", err) + } + + args.Node, args.Check.Node = "foo", "foo" + if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil { + t.Fatalf("err: %v", err) + } + + req, err := http.NewRequest("GET", "/v1/health/checks/test?dc=dc1&near=foo", nil) + if err != nil { + t.Fatalf("err: %v", err) + } + + resp := httptest.NewRecorder() + obj, err := srv.HealthServiceChecks(resp, req) + if err != nil { + t.Fatalf("err: %v", err) + } + assertIndex(t, resp) + nodes := obj.(structs.HealthChecks) + if len(nodes) != 2 { + t.Fatalf("bad: %v", obj) + } + if nodes[0].Node != "bar" { + t.Fatalf("bad: %v", nodes) + } + if nodes[1].Node != "foo" { + t.Fatalf("bad: %v", nodes) + } + + // Send an update for the node and wait for it to get applied. + arg := structs.CoordinateUpdateRequest{ + Datacenter: "dc1", + Node: "foo", + Coord: coordinate.NewCoordinate(coordinate.DefaultConfig()), + } + if err := srv.agent.RPC("Coordinate.Update", &arg, &out); err != nil { + t.Fatalf("err: %v", err) + } + time.Sleep(200 * time.Millisecond) + + // Query again and now foo should have moved to the front of the line. + resp = httptest.NewRecorder() + obj, err = srv.HealthServiceChecks(resp, req) + if err != nil { + t.Fatalf("err: %v", err) + } + assertIndex(t, resp) + nodes = obj.(structs.HealthChecks) + if len(nodes) != 2 { + t.Fatalf("bad: %v", obj) + } + if nodes[0].Node != "foo" { + t.Fatalf("bad: %v", nodes) + } + if nodes[1].Node != "bar" { + t.Fatalf("bad: %v", nodes) + } +} + func TestHealthServiceNodes(t *testing.T) { dir, srv := makeHTTPServer(t) defer os.RemoveAll(dir) @@ -138,6 +308,92 @@ func TestHealthServiceNodes(t *testing.T) { } } +func TestHealthServiceNodes_DistanceSort(t *testing.T) { + dir, srv := makeHTTPServer(t) + defer os.RemoveAll(dir) + defer srv.Shutdown() + defer srv.agent.Shutdown() + + testutil.WaitForLeader(t, srv.agent.RPC, "dc1") + + // Create a service check + args := &structs.RegisterRequest{ + Datacenter: "dc1", + Node: "bar", + Address: "127.0.0.1", + Service: &structs.NodeService{ + ID: "test", + Service: "test", + }, + Check: &structs.HealthCheck{ + Node: "bar", + Name: "test check", + ServiceID: "test", + }, + } + + var out struct{} + if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil { + t.Fatalf("err: %v", err) + } + + args.Node, args.Check.Node = "foo", "foo" + if err := srv.agent.RPC("Catalog.Register", args, &out); err != nil { + t.Fatalf("err: %v", err) + } + + req, err := http.NewRequest("GET", "/v1/health/service/test?dc=dc1&near=foo", nil) + if err != nil { + t.Fatalf("err: %v", err) + } + + resp := httptest.NewRecorder() + obj, err := srv.HealthServiceNodes(resp, req) + if err != nil { + t.Fatalf("err: %v", err) + } + assertIndex(t, resp) + nodes := obj.(structs.CheckServiceNodes) + if len(nodes) != 2 { + t.Fatalf("bad: %v", obj) + } + if nodes[0].Node.Node != "bar" { + t.Fatalf("bad: %v", nodes) + } + if nodes[1].Node.Node != "foo" { + t.Fatalf("bad: %v", nodes) + } + + // Send an update for the node and wait for it to get applied. + arg := structs.CoordinateUpdateRequest{ + Datacenter: "dc1", + Node: "foo", + Coord: coordinate.NewCoordinate(coordinate.DefaultConfig()), + } + if err := srv.agent.RPC("Coordinate.Update", &arg, &out); err != nil { + t.Fatalf("err: %v", err) + } + time.Sleep(200 * time.Millisecond) + + // Query again and now foo should have moved to the front of the line. + resp = httptest.NewRecorder() + obj, err = srv.HealthServiceNodes(resp, req) + if err != nil { + t.Fatalf("err: %v", err) + } + assertIndex(t, resp) + nodes = obj.(structs.CheckServiceNodes) + if len(nodes) != 2 { + t.Fatalf("bad: %v", obj) + } + if nodes[0].Node.Node != "foo" { + t.Fatalf("bad: %v", nodes) + } + if nodes[1].Node.Node != "bar" { + t.Fatalf("bad: %v", nodes) + } +} + func TestHealthServiceNodes_PassingFilter(t *testing.T) { dir, srv := makeHTTPServer(t) defer os.RemoveAll(dir) diff --git a/consul/catalog_endpoint_test.go b/consul/catalog_endpoint_test.go index 5ce1115b4c..86ac975171 100644 --- a/consul/catalog_endpoint_test.go +++ b/consul/catalog_endpoint_test.go @@ -294,24 +294,6 @@ func TestCatalogListDatacenters_DistanceSort(t *testing.T) { if out[2] != "dc2" { t.Fatalf("bad: %v", out) } - - // Make sure we get the natural order if coordinates are disabled. - s1.config.DisableCoordinates = true - if err := client.Call("Catalog.ListDatacenters", struct{}{}, &out); err != nil { - t.Fatalf("err: %v", err) - } - if len(out) != 3 { - t.Fatalf("bad: %v", out) - } - if out[0] != "acdc" { - t.Fatalf("bad: %v", out) - } - if out[1] != "dc1" { - t.Fatalf("bad: %v", out) - } - if out[2] != "dc2" { - t.Fatalf("bad: %v", out) - } } func TestCatalogListNodes(t *testing.T) { @@ -529,7 +511,6 @@ func TestCatalogListNodes_DistanceSort(t *testing.T) { client := rpcClient(t, s1) defer client.Close() - // Add three nodes. testutil.WaitForLeader(t, client.Call, "dc1") if err := s1.fsm.State().EnsureNode(1, structs.Node{"aaa", "127.0.0.1"}); err != nil { t.Fatalf("err: %v", err) @@ -609,34 +590,6 @@ func TestCatalogListNodes_DistanceSort(t *testing.T) { if out.Nodes[4].Node != s1.config.NodeName { t.Fatalf("bad: %v", out) } - - // Make sure we get the natural order if coordinates are disabled. - s1.config.DisableCoordinates = true - args = structs.DCSpecificRequest{ - Datacenter: "dc1", - Source: structs.QuerySource{Datacenter: "dc1", Node: "foo"}, - } - testutil.WaitForResult(func() (bool, error) { - client.Call("Catalog.ListNodes", &args, &out) - return len(out.Nodes) == 5, nil - }, func(err error) { - t.Fatalf("err: %v", err) - }) - if out.Nodes[0].Node != "aaa" { - t.Fatalf("bad: %v", out) - } - if out.Nodes[1].Node != "bar" { - t.Fatalf("bad: %v", out) - } - if out.Nodes[2].Node != "baz" { - t.Fatalf("bad: %v", out) - } - if out.Nodes[3].Node != "foo" { - t.Fatalf("bad: %v", out) - } - if out.Nodes[4].Node != s1.config.NodeName { - t.Fatalf("bad: %v", out) - } } func BenchmarkCatalogListNodes(t *testing.B) { @@ -982,32 +935,6 @@ func TestCatalogListServiceNodes_DistanceSort(t *testing.T) { if out.ServiceNodes[3].Node != "aaa" { t.Fatalf("bad: %v", out) } - - // Make sure we get the natural order if coordinates are disabled. - s1.config.DisableCoordinates = true - args = structs.ServiceSpecificRequest{ - Datacenter: "dc1", - ServiceName: "db", - Source: structs.QuerySource{Datacenter: "dc1", Node: "foo"}, - } - if err := client.Call("Catalog.ServiceNodes", &args, &out); err != nil { - t.Fatalf("err: %v", err) - } - if len(out.ServiceNodes) != 4 { - t.Fatalf("bad: %v", out) - } - if out.ServiceNodes[0].Node != "aaa" { - t.Fatalf("bad: %v", out) - } - if out.ServiceNodes[1].Node != "foo" { - t.Fatalf("bad: %v", out) - } - if out.ServiceNodes[2].Node != "bar" { - t.Fatalf("bad: %v", out) - } - if out.ServiceNodes[3].Node != "baz" { - t.Fatalf("bad: %v", out) - } } func TestCatalogNodeServices(t *testing.T) { diff --git a/consul/health_endpoint.go b/consul/health_endpoint.go index 4bb6404c11..e5aa5fec24 100644 --- a/consul/health_endpoint.go +++ b/consul/health_endpoint.go @@ -30,7 +30,10 @@ func (h *Health) ChecksInState(args *structs.ChecksInStateRequest, return err } reply.Index, reply.HealthChecks = index, checks - return h.srv.filterACL(args.Token, reply) + if err := h.srv.filterACL(args.Token, reply); err != nil { + return err + } + return h.srv.sortNodesByDistanceFrom(args.Source, reply.HealthChecks) }) } @@ -82,7 +85,10 @@ func (h *Health) ServiceChecks(args *structs.ServiceSpecificRequest, return err } reply.Index, reply.HealthChecks = index, checks - return h.srv.filterACL(args.Token, reply) + if err := h.srv.filterACL(args.Token, reply); err != nil { + return err + } + return h.srv.sortNodesByDistanceFrom(args.Source, reply.HealthChecks) }) } @@ -115,8 +121,12 @@ func (h *Health) ServiceNodes(args *structs.ServiceSpecificRequest, reply *struc if err != nil { return err } + reply.Index, reply.Nodes = index, nodes - return h.srv.filterACL(args.Token, reply) + if err := h.srv.filterACL(args.Token, reply); err != nil { + return err + } + return h.srv.sortNodesByDistanceFrom(args.Source, reply.Nodes) }) // Provide some metrics diff --git a/consul/health_endpoint_test.go b/consul/health_endpoint_test.go index 77356983e2..13d85798d5 100644 --- a/consul/health_endpoint_test.go +++ b/consul/health_endpoint_test.go @@ -3,6 +3,7 @@ package consul import ( "os" "testing" + "time" "github.com/hashicorp/consul/consul/structs" "github.com/hashicorp/consul/testutil" @@ -55,6 +56,83 @@ func TestHealth_ChecksInState(t *testing.T) { } } +func TestHealth_ChecksInState_DistanceSort(t *testing.T) { + dir1, s1 := testServer(t) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + client := rpcClient(t, s1) + defer client.Close() + + testutil.WaitForLeader(t, client.Call, "dc1") + if err := s1.fsm.State().EnsureNode(1, structs.Node{"foo", "127.0.0.2"}); err != nil { + t.Fatalf("err: %v", err) + } + if err := s1.fsm.State().EnsureNode(2, structs.Node{"bar", "127.0.0.3"}); err != nil { + t.Fatalf("err: %v", err) + } + updates := []structs.Coordinate{ + {"foo", generateCoordinate(1 * time.Millisecond)}, + {"bar", generateCoordinate(2 * time.Millisecond)}, + } + if err := s1.fsm.State().CoordinateBatchUpdate(3, updates); err != nil { + t.Fatalf("err: %v", err) + } + + arg := structs.RegisterRequest{ + Datacenter: "dc1", + Node: "foo", + Address: "127.0.0.1", + Check: &structs.HealthCheck{ + Name: "memory utilization", + Status: structs.HealthPassing, + }, + } + + var out struct{} + if err := client.Call("Catalog.Register", &arg, &out); err != nil { + t.Fatalf("err: %v", err) + } + + arg.Node = "bar" + if err := client.Call("Catalog.Register", &arg, &out); err != nil { + t.Fatalf("err: %v", err) + } + + // Query relative to foo to make sure it shows up first in the list. + var out2 structs.IndexedHealthChecks + inState := structs.ChecksInStateRequest{ + Datacenter: "dc1", + State: structs.HealthPassing, + Source: structs.QuerySource{ + Datacenter: "dc1", + Node: "foo", + }, + } + if err := client.Call("Health.ChecksInState", &inState, &out2); err != nil { + t.Fatalf("err: %v", err) + } + checks := out2.HealthChecks + if len(checks) != 3 { + t.Fatalf("Bad: %v", checks) + } + if checks[0].Node != "foo" { + t.Fatalf("Bad: %v", checks[1]) + } + + // Now query relative to bar to make sure it shows up first. + inState.Source.Node = "bar" + if err := client.Call("Health.ChecksInState", &inState, &out2); err != nil { + t.Fatalf("err: %v", err) + } + checks = out2.HealthChecks + if len(checks) != 3 { + t.Fatalf("Bad: %v", checks) + } + if checks[0].Node != "bar" { + t.Fatalf("Bad: %v", checks[1]) + } +} + func TestHealth_NodeChecks(t *testing.T) { dir1, s1 := testServer(t) defer os.RemoveAll(dir1) @@ -142,6 +220,94 @@ func TestHealth_ServiceChecks(t *testing.T) { } } +func TestHealth_ServiceChecks_DistanceSort(t *testing.T) { + dir1, s1 := testServer(t) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + client := rpcClient(t, s1) + defer client.Close() + + testutil.WaitForLeader(t, client.Call, "dc1") + if err := s1.fsm.State().EnsureNode(1, structs.Node{"foo", "127.0.0.2"}); err != nil { + t.Fatalf("err: %v", err) + } + if err := s1.fsm.State().EnsureNode(2, structs.Node{"bar", "127.0.0.3"}); err != nil { + t.Fatalf("err: %v", err) + } + updates := []structs.Coordinate{ + {"foo", generateCoordinate(1 * time.Millisecond)}, + {"bar", generateCoordinate(2 * time.Millisecond)}, + } + if err := s1.fsm.State().CoordinateBatchUpdate(3, updates); err != nil { + t.Fatalf("err: %v", err) + } + + arg := structs.RegisterRequest{ + Datacenter: "dc1", + Node: "foo", + Address: "127.0.0.1", + Service: &structs.NodeService{ + ID: "db", + Service: "db", + }, + Check: &structs.HealthCheck{ + Name: "db connect", + Status: structs.HealthPassing, + ServiceID: "db", + }, + } + + var out struct{} + if err := client.Call("Catalog.Register", &arg, &out); err != nil { + t.Fatalf("err: %v", err) + } + + arg.Node = "bar" + if err := client.Call("Catalog.Register", &arg, &out); err != nil { + t.Fatalf("err: %v", err) + } + + // Query relative to foo to make sure it shows up first in the list. + var out2 structs.IndexedHealthChecks + node := structs.ServiceSpecificRequest{ + Datacenter: "dc1", + ServiceName: "db", + Source: structs.QuerySource{ + Datacenter: "dc1", + Node: "foo", + }, + } + if err := client.Call("Health.ServiceChecks", &node, &out2); err != nil { + t.Fatalf("err: %v", err) + } + checks := out2.HealthChecks + if len(checks) != 2 { + t.Fatalf("Bad: %v", checks) + } + if checks[0].Node != "foo" { + t.Fatalf("Bad: %v", checks) + } + if checks[1].Node != "bar" { + t.Fatalf("Bad: %v", checks) + } + + // Now query relative to bar to make sure it shows up first. + node.Source.Node = "bar" + if err := client.Call("Health.ServiceChecks", &node, &out2); err != nil { + t.Fatalf("err: %v", err) + } + checks = out2.HealthChecks + if len(checks) != 2 { + t.Fatalf("Bad: %v", checks) + } + if checks[0].Node != "bar" { + t.Fatalf("Bad: %v", checks) + } + if checks[1].Node != "foo" { + t.Fatalf("Bad: %v", checks) + } +} + func TestHealth_ServiceNodes(t *testing.T) { dir1, s1 := testServer(t) defer os.RemoveAll(dir1) @@ -225,6 +391,94 @@ func TestHealth_ServiceNodes(t *testing.T) { } } +func TestHealth_ServiceNodes_DistanceSort(t *testing.T) { + dir1, s1 := testServer(t) + defer os.RemoveAll(dir1) + defer s1.Shutdown() + client := rpcClient(t, s1) + defer client.Close() + + testutil.WaitForLeader(t, client.Call, "dc1") + if err := s1.fsm.State().EnsureNode(1, structs.Node{"foo", "127.0.0.2"}); err != nil { + t.Fatalf("err: %v", err) + } + if err := s1.fsm.State().EnsureNode(2, structs.Node{"bar", "127.0.0.3"}); err != nil { + t.Fatalf("err: %v", err) + } + updates := []structs.Coordinate{ + {"foo", generateCoordinate(1 * time.Millisecond)}, + {"bar", generateCoordinate(2 * time.Millisecond)}, + } + if err := s1.fsm.State().CoordinateBatchUpdate(3, updates); err != nil { + t.Fatalf("err: %v", err) + } + + arg := structs.RegisterRequest{ + Datacenter: "dc1", + Node: "foo", + Address: "127.0.0.1", + Service: &structs.NodeService{ + ID: "db", + Service: "db", + }, + Check: &structs.HealthCheck{ + Name: "db connect", + Status: structs.HealthPassing, + ServiceID: "db", + }, + } + + var out struct{} + if err := client.Call("Catalog.Register", &arg, &out); err != nil { + t.Fatalf("err: %v", err) + } + + arg.Node = "bar" + if err := client.Call("Catalog.Register", &arg, &out); err != nil { + t.Fatalf("err: %v", err) + } + + // Query relative to foo to make sure it shows up first in the list. + var out2 structs.IndexedCheckServiceNodes + req := structs.ServiceSpecificRequest{ + Datacenter: "dc1", + ServiceName: "db", + Source: structs.QuerySource{ + Datacenter: "dc1", + Node: "foo", + }, + } + if err := client.Call("Health.ServiceNodes", &req, &out2); err != nil { + t.Fatalf("err: %v", err) + } + nodes := out2.Nodes + if len(nodes) != 2 { + t.Fatalf("Bad: %v", nodes) + } + if nodes[0].Node.Node != "foo" { + t.Fatalf("Bad: %v", nodes[0]) + } + if nodes[1].Node.Node != "bar" { + t.Fatalf("Bad: %v", nodes[1]) + } + + // Now query relative to bar to make sure it shows up first. + req.Source.Node = "bar" + if err := client.Call("Health.ServiceNodes", &req, &out2); err != nil { + t.Fatalf("err: %v", err) + } + nodes = out2.Nodes + if len(nodes) != 2 { + t.Fatalf("Bad: %v", nodes) + } + if nodes[0].Node.Node != "bar" { + t.Fatalf("Bad: %v", nodes[0]) + } + if nodes[1].Node.Node != "foo" { + t.Fatalf("Bad: %v", nodes[1]) + } +} + func TestHealth_NodeChecks_FilterACL(t *testing.T) { dir, token, srv, codec := testACLFilterServer(t) defer os.RemoveAll(dir) diff --git a/consul/rtt.go b/consul/rtt.go index 596d1d3146..304712a96b 100644 --- a/consul/rtt.go +++ b/consul/rtt.go @@ -98,6 +98,84 @@ func (n *serviceNodeSorter) Less(i, j int) bool { return n.Vec[i] < n.Vec[j] } +// serviceNodeSorter takes a list of health checks and a parallel vector of +// distances and implements sort.Interface, keeping both structures coherent and +// sorting by distance. +type healthCheckSorter struct { + Checks structs.HealthChecks + Vec []float64 +} + +// newHealthCheckSorter returns a new sorter for the given source coordinate and +// set of health checks with nodes. +func (s *Server) newHealthCheckSorter(c *coordinate.Coordinate, checks structs.HealthChecks) (sort.Interface, error) { + state := s.fsm.State() + vec := make([]float64, len(checks)) + for i, check := range checks { + _, coord, err := state.CoordinateGet(check.Node) + if err != nil { + return nil, err + } + vec[i] = computeDistance(c, coord) + } + return &healthCheckSorter{checks, vec}, nil +} + +// See sort.Interface. +func (n *healthCheckSorter) Len() int { + return len(n.Checks) +} + +// See sort.Interface. +func (n *healthCheckSorter) Swap(i, j int) { + n.Checks[i], n.Checks[j] = n.Checks[j], n.Checks[i] + n.Vec[i], n.Vec[j] = n.Vec[j], n.Vec[i] +} + +// See sort.Interface. +func (n *healthCheckSorter) Less(i, j int) bool { + return n.Vec[i] < n.Vec[j] +} + +// checkServiceNodeSorter takes a list of service nodes and a parallel vector of +// distances and implements sort.Interface, keeping both structures coherent and +// sorting by distance. +type checkServiceNodeSorter struct { + Nodes structs.CheckServiceNodes + Vec []float64 +} + +// newCheckServiceNodeSorter returns a new sorter for the given source coordinate +// and set of nodes with health checks. +func (s *Server) newCheckServiceNodeSorter(c *coordinate.Coordinate, nodes structs.CheckServiceNodes) (sort.Interface, error) { + state := s.fsm.State() + vec := make([]float64, len(nodes)) + for i, node := range nodes { + _, coord, err := state.CoordinateGet(node.Node.Node) + if err != nil { + return nil, err + } + vec[i] = computeDistance(c, coord) + } + return &checkServiceNodeSorter{nodes, vec}, nil +} + +// See sort.Interface. +func (n *checkServiceNodeSorter) Len() int { + return len(n.Nodes) +} + +// See sort.Interface. +func (n *checkServiceNodeSorter) Swap(i, j int) { + n.Nodes[i], n.Nodes[j] = n.Nodes[j], n.Nodes[i] + n.Vec[i], n.Vec[j] = n.Vec[j], n.Vec[i] +} + +// See sort.Interface. +func (n *checkServiceNodeSorter) Less(i, j int) bool { + return n.Vec[i] < n.Vec[j] +} + // newSorterByDistanceFrom returns a sorter for the given type. func (s *Server) newSorterByDistanceFrom(c *coordinate.Coordinate, subj interface{}) (sort.Interface, error) { switch v := subj.(type) { @@ -105,6 +183,10 @@ func (s *Server) newSorterByDistanceFrom(c *coordinate.Coordinate, subj interfac return s.newNodeSorter(c, v) case structs.ServiceNodes: return s.newServiceNodeSorter(c, v) + case structs.HealthChecks: + return s.newHealthCheckSorter(c, v) + case structs.CheckServiceNodes: + return s.newCheckServiceNodeSorter(c, v) default: panic(fmt.Errorf("Unhandled type passed to newSorterByDistanceFrom: %#v", subj)) } diff --git a/consul/rtt_test.go b/consul/rtt_test.go index 4a882eaab7..ceff1fd976 100644 --- a/consul/rtt_test.go +++ b/consul/rtt_test.go @@ -47,6 +47,32 @@ func verifyServiceNodeSort(t *testing.T, nodes structs.ServiceNodes, expected st } } +// verifyHealthCheckSort makes sure the order of the nodes in the slice is the +// same as the expected order, expressed as a comma-separated string. +func verifyHealthCheckSort(t *testing.T, checks structs.HealthChecks, expected string) { + vec := make([]string, len(checks)) + for i, check := range checks { + vec[i] = check.Node + } + actual := strings.Join(vec, ",") + if actual != expected { + t.Fatalf("bad sort: %s != %s", actual, expected) + } +} + +// verifyCheckServiceNodeSort makes sure the order of the nodes in the slice is +// the same as the expected order, expressed as a comma-separated string. +func verifyCheckServiceNodeSort(t *testing.T, nodes structs.CheckServiceNodes, expected string) { + vec := make([]string, len(nodes)) + for i, node := range nodes { + vec[i] = node.Node.Node + } + actual := strings.Join(vec, ",") + if actual != expected { + t.Fatalf("bad sort: %s != %s", actual, expected) + } +} + // seedCoordinates uses the client to set up a set of nodes with a specific // set of distances from the origin. We also include the server so that we // can wait for the coordinates to get committed to the Raft log. @@ -97,7 +123,7 @@ func seedCoordinates(t *testing.T, client *rpc.Client, server *Server) { time.Sleep(2 * server.config.CoordinateUpdatePeriod) } -func TestRtt_sortNodesByDistanceFrom_Nodes(t *testing.T) { +func TestRtt_sortNodesByDistanceFrom(t *testing.T) { dir, server := testServer(t) defer os.RemoveAll(dir) defer server.Shutdown() @@ -139,9 +165,48 @@ func TestRtt_sortNodesByDistanceFrom_Nodes(t *testing.T) { } verifyNodeSort(t, nodes, "apple,node1,node2,node3,node4,node5") + // Set source to legit values relative to node1 but disable coordinates. + source.Node = "node1" + source.Datacenter = "dc1" + server.config.DisableCoordinates = true + if err := server.sortNodesByDistanceFrom(source, nodes); err != nil { + t.Fatalf("err: %v", err) + } + verifyNodeSort(t, nodes, "apple,node1,node2,node3,node4,node5") + + // Now enable coordinates and sort relative to node1, note that apple + // doesn't have any seeded coordinate info so it should end up at the + // end, despite its lexical hegemony. + server.config.DisableCoordinates = false + if err := server.sortNodesByDistanceFrom(source, nodes); err != nil { + t.Fatalf("err: %v", err) + } + verifyNodeSort(t, nodes, "node1,node4,node5,node2,node3,apple") +} + +func TestRtt_sortNodesByDistanceFrom_Nodes(t *testing.T) { + dir, server := testServer(t) + defer os.RemoveAll(dir) + defer server.Shutdown() + + client := rpcClient(t, server) + defer client.Close() + testutil.WaitForLeader(t, client.Call, "dc1") + seedCoordinates(t, client, server) + + nodes := structs.Nodes{ + structs.Node{Node: "apple"}, + structs.Node{Node: "node1"}, + structs.Node{Node: "node2"}, + structs.Node{Node: "node3"}, + structs.Node{Node: "node4"}, + structs.Node{Node: "node5"}, + } + // Now sort relative to node1, note that apple doesn't have any // seeded coordinate info so it should end up at the end, despite // its lexical hegemony. + var source structs.QuerySource source.Node = "node1" source.Datacenter = "dc1" if err := server.sortNodesByDistanceFrom(source, nodes); err != nil { @@ -187,32 +252,10 @@ func TestRtt_sortNodesByDistanceFrom_ServiceNodes(t *testing.T) { structs.ServiceNode{Node: "node5"}, } - // The zero value for the source should not trigger any sorting. - var source structs.QuerySource - if err := server.sortNodesByDistanceFrom(source, nodes); err != nil { - t.Fatalf("err: %v", err) - } - verifyServiceNodeSort(t, nodes, "apple,node1,node2,node3,node4,node5") - - // Same for a source in some other DC. - source.Node = "node1" - source.Datacenter = "dc2" - if err := server.sortNodesByDistanceFrom(source, nodes); err != nil { - t.Fatalf("err: %v", err) - } - verifyServiceNodeSort(t, nodes, "apple,node1,node2,node3,node4,node5") - - // Same for a source node in our DC that we have no coordinate for. - source.Node = "apple" - source.Datacenter = "dc1" - if err := server.sortNodesByDistanceFrom(source, nodes); err != nil { - t.Fatalf("err: %v", err) - } - verifyServiceNodeSort(t, nodes, "apple,node1,node2,node3,node4,node5") - // Now sort relative to node1, note that apple doesn't have any // seeded coordinate info so it should end up at the end, despite // its lexical hegemony. + var source structs.QuerySource source.Node = "node1" source.Datacenter = "dc1" if err := server.sortNodesByDistanceFrom(source, nodes); err != nil { @@ -239,6 +282,104 @@ func TestRtt_sortNodesByDistanceFrom_ServiceNodes(t *testing.T) { verifyServiceNodeSort(t, nodes, "node2,node3,node5,node4,node1,apple") } +func TestRtt_sortNodesByDistanceFrom_HealthChecks(t *testing.T) { + dir, server := testServer(t) + defer os.RemoveAll(dir) + defer server.Shutdown() + + client := rpcClient(t, server) + defer client.Close() + testutil.WaitForLeader(t, client.Call, "dc1") + seedCoordinates(t, client, server) + + checks := structs.HealthChecks{ + &structs.HealthCheck{Node: "apple"}, + &structs.HealthCheck{Node: "node1"}, + &structs.HealthCheck{Node: "node2"}, + &structs.HealthCheck{Node: "node3"}, + &structs.HealthCheck{Node: "node4"}, + &structs.HealthCheck{Node: "node5"}, + } + + // Now sort relative to node1, note that apple doesn't have any + // seeded coordinate info so it should end up at the end, despite + // its lexical hegemony. + var source structs.QuerySource + source.Node = "node1" + source.Datacenter = "dc1" + if err := server.sortNodesByDistanceFrom(source, checks); err != nil { + t.Fatalf("err: %v", err) + } + verifyHealthCheckSort(t, checks, "node1,node4,node5,node2,node3,apple") + + // Try another sort from node2. Note that node5 and node3 are the + // same distance away so the stable sort should preserve the order + // they were in from the previous sort. + source.Node = "node2" + source.Datacenter = "dc1" + if err := server.sortNodesByDistanceFrom(source, checks); err != nil { + t.Fatalf("err: %v", err) + } + verifyHealthCheckSort(t, checks, "node2,node5,node3,node4,node1,apple") + + // Let's exercise the stable sort explicitly to make sure we didn't + // just get lucky. + checks[1], checks[2] = checks[2], checks[1] + if err := server.sortNodesByDistanceFrom(source, checks); err != nil { + t.Fatalf("err: %v", err) + } + verifyHealthCheckSort(t, checks, "node2,node3,node5,node4,node1,apple") +} + +func TestRtt_sortNodesByDistanceFrom_CheckServiceNodes(t *testing.T) { + dir, server := testServer(t) + defer os.RemoveAll(dir) + defer server.Shutdown() + + client := rpcClient(t, server) + defer client.Close() + testutil.WaitForLeader(t, client.Call, "dc1") + seedCoordinates(t, client, server) + + nodes := structs.CheckServiceNodes{ + structs.CheckServiceNode{Node: structs.Node{Node: "apple"}}, + structs.CheckServiceNode{Node: structs.Node{Node: "node1"}}, + structs.CheckServiceNode{Node: structs.Node{Node: "node2"}}, + structs.CheckServiceNode{Node: structs.Node{Node: "node3"}}, + structs.CheckServiceNode{Node: structs.Node{Node: "node4"}}, + structs.CheckServiceNode{Node: structs.Node{Node: "node5"}}, + } + + // Now sort relative to node1, note that apple doesn't have any + // seeded coordinate info so it should end up at the end, despite + // its lexical hegemony. + var source structs.QuerySource + source.Node = "node1" + source.Datacenter = "dc1" + if err := server.sortNodesByDistanceFrom(source, nodes); err != nil { + t.Fatalf("err: %v", err) + } + verifyCheckServiceNodeSort(t, nodes, "node1,node4,node5,node2,node3,apple") + + // Try another sort from node2. Note that node5 and node3 are the + // same distance away so the stable sort should preserve the order + // they were in from the previous sort. + source.Node = "node2" + source.Datacenter = "dc1" + if err := server.sortNodesByDistanceFrom(source, nodes); err != nil { + t.Fatalf("err: %v", err) + } + verifyCheckServiceNodeSort(t, nodes, "node2,node5,node3,node4,node1,apple") + + // Let's exercise the stable sort explicitly to make sure we didn't + // just get lucky. + nodes[1], nodes[2] = nodes[2], nodes[1] + if err := server.sortNodesByDistanceFrom(source, nodes); err != nil { + t.Fatalf("err: %v", err) + } + verifyCheckServiceNodeSort(t, nodes, "node2,node3,node5,node4,node1,apple") +} + // mockNodeMap is keyed by node name and the values are the coordinates of the // node. type mockNodeMap map[string]*coordinate.Coordinate diff --git a/consul/structs/structs.go b/consul/structs/structs.go index 069e4bbca3..44c30f1c9f 100644 --- a/consul/structs/structs.go +++ b/consul/structs/structs.go @@ -203,7 +203,7 @@ func (r *DCSpecificRequest) RequestDatacenter() string { return r.Datacenter } -// ServiceSpecificRequest is used to query about a specific node +// ServiceSpecificRequest is used to query about a specific service type ServiceSpecificRequest struct { Datacenter string ServiceName string @@ -232,6 +232,7 @@ func (r *NodeSpecificRequest) RequestDatacenter() string { type ChecksInStateRequest struct { Datacenter string State string + Source QuerySource QueryOptions } @@ -356,7 +357,7 @@ type HealthCheck struct { type HealthChecks []*HealthCheck // CheckServiceNode is used to provide the node, its service -// definition, as well as a HealthCheck that is associated +// definition, as well as a HealthCheck that is associated. type CheckServiceNode struct { Node *Node Service *NodeService diff --git a/website/source/docs/agent/http/health.html.markdown b/website/source/docs/agent/http/health.html.markdown index ca965c99e8..e1321b3d7d 100644 --- a/website/source/docs/agent/http/health.html.markdown +++ b/website/source/docs/agent/http/health.html.markdown @@ -70,6 +70,11 @@ This endpoint is hit with a GET and returns the checks associated with the service provided on the path. By default, the datacenter of the agent is queried; however, the dc can be provided using the "?dc=" query parameter. +Adding the optional "?near=" parameter with a node name will sort +the node list in ascending order based on the estimated round trip +time from that node. Passing "?near=self" will use the agent's local +node for the sort. + It returns a JSON body like this: ```javascript @@ -95,6 +100,11 @@ This endpoint is hit with a GET and returns the nodes providing the service indicated on the path. By default, the datacenter of the agent is queried; however, the dc can be provided using the "?dc=" query parameter. +Adding the optional "?near=" parameter with a node name will sort +the node list in ascending order based on the estimated round trip +time from that node. Passing "?near=self" will use the agent's local +node for the sort. + By default, all nodes matching the service are returned. The list can be filtered by tag using the "?tag=" query parameter. @@ -159,6 +169,11 @@ This endpoint is hit with a GET and returns the checks in the state provided on the path. By default, the datacenter of the agent is queried; however, the dc can be provided using the "?dc=" query parameter. +Adding the optional "?near=" parameter with a node name will sort +the node list in ascending order based on the estimated round trip +time from that node. Passing "?near=self" will use the agent's local +node for the sort. + The supported states are `any`, `unknown`, `passing`, `warning`, or `critical`. The `any` state is a wildcard that can be used to return all checks.