Browse Source

make TestCatalogNodes_Blocking less flaky (#7074)

- Explicitly wait to start the test until the initial AE sync of the node.

- Run the blocking query in the main goroutine to cut down on possible
poor goroutine scheduling issues being to blame for delays.

- If the blocking query is woken up with no index change, rerun the
query. This may happen if the CI server is loaded and time dilation is
happening.
pull/7104/head
R.B. Boyer 5 years ago committed by GitHub
parent
commit
c91d0fa2c9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 72
      agent/catalog_endpoint_test.go

72
agent/catalog_endpoint_test.go

@ -292,70 +292,74 @@ func TestCatalogNodes_Blocking(t *testing.T) {
t.Parallel() t.Parallel()
a := NewTestAgent(t, t.Name(), "") a := NewTestAgent(t, t.Name(), "")
defer a.Shutdown() defer a.Shutdown()
testrpc.WaitForTestAgent(t, a.RPC, "dc1") testrpc.WaitForTestAgent(t, a.RPC, "dc1", testrpc.WaitForAntiEntropySync())
// Register node // Run the query
args := &structs.DCSpecificRequest{ args := &structs.DCSpecificRequest{
Datacenter: "dc1", Datacenter: "dc1",
} }
var out structs.IndexedNodes var out structs.IndexedNodes
if err := a.RPC("Catalog.ListNodes", *args, &out); err != nil { if err := a.RPC("Catalog.ListNodes", *args, &out); err != nil {
t.Fatalf("err: %v", err) t.Fatalf("err: %v", err)
} }
// t.Fatal must be called from the main go routine // Async cause a change
// of the test. Because of this we cannot call waitIndex := out.Index
// t.Fatal from within the go routines and use
// an error channel instead.
errch := make(chan error, 2)
go func() {
testrpc.WaitForTestAgent(t, a.RPC, "dc1")
start := time.Now() start := time.Now()
go func() {
// register a service after the blocking call time.Sleep(100 * time.Millisecond)
// in order to unblock it.
time.AfterFunc(100*time.Millisecond, func() {
args := &structs.RegisterRequest{ args := &structs.RegisterRequest{
Datacenter: "dc1", Datacenter: "dc1",
Node: "foo", Node: "foo",
Address: "127.0.0.1", Address: "127.0.0.1",
} }
var out struct{} var out struct{}
errch <- a.RPC("Catalog.Register", args, &out) if err := a.RPC("Catalog.Register", args, &out); err != nil {
}) t.Fatalf("err: %v", err)
}
}()
// now block const waitDuration = 3 * time.Second
req, _ := http.NewRequest("GET", fmt.Sprintf("/v1/catalog/nodes?wait=3s&index=%d", out.Index+1), nil)
// Re-run the query, if errantly woken up with no change, resume blocking.
var elapsed time.Duration
RUN_BLOCKING_QUERY:
req, err := http.NewRequest("GET", fmt.Sprintf("/v1/catalog/nodes?wait=%s&index=%d",
waitDuration.String(),
waitIndex), nil)
if err != nil {
t.Fatalf("err: %v", err)
}
resp := httptest.NewRecorder() resp := httptest.NewRecorder()
obj, err := a.srv.CatalogNodes(resp, req) obj, err := a.srv.CatalogNodes(resp, req)
if err != nil { if err != nil {
errch <- err t.Fatalf("err: %v", err)
} }
// Should block for a while elapsed = time.Since(start)
if d := time.Since(start); d < 50*time.Millisecond {
errch <- fmt.Errorf("too fast: %v", d) idx := getIndex(t, resp)
if idx < waitIndex {
t.Fatalf("bad: %v", idx)
} else if idx == waitIndex {
if elapsed > waitDuration {
// This should prevent the loop from running longer than the
// waitDuration
t.Fatalf("too slow: %v", elapsed)
}
goto RUN_BLOCKING_QUERY
} }
if idx := getIndex(t, resp); idx <= out.Index { // Should block at least 100ms before getting the changed results
errch <- fmt.Errorf("bad: %v", idx) if elapsed < 100*time.Millisecond {
t.Fatalf("too fast: %v", elapsed)
} }
nodes := obj.(structs.Nodes) nodes := obj.(structs.Nodes)
if len(nodes) != 2 { if len(nodes) != 2 {
errch <- fmt.Errorf("bad: %v", obj) t.Fatalf("bad: %v", obj)
} }
errch <- nil
}()
// wait for both go routines to return
if err := <-errch; err != nil {
t.Fatal(err)
}
if err := <-errch; err != nil {
t.Fatal(err)
}
} }
func TestCatalogNodes_DistanceSort(t *testing.T) { func TestCatalogNodes_DistanceSort(t *testing.T) {

Loading…
Cancel
Save