From 911ed76e5b76f7fc888f7a798f0b214f15d64623 Mon Sep 17 00:00:00 2001 From: "R.B. Boyer" Date: Fri, 12 Jul 2019 11:12:56 -0500 Subject: [PATCH] tests: further reduce envoy integration test flakiness (#6112) In addition to waiting until s2 shows up healthy in the Catalog, wait until s2 endpoints show up healthy via EDS in the s1 upstream clusters. --- .../connect/envoy/case-badauthz/verify.bats | 4 ++ .../connect/envoy/case-basic/verify.bats | 4 ++ .../envoy/case-centralconf/verify.bats | 4 ++ .../envoy/case-dogstatsd-udp/verify.bats | 4 ++ .../connect/envoy/case-grpc/verify.bats | 4 ++ .../envoy/case-http-badauthz/verify.bats | 4 ++ .../connect/envoy/case-http/verify.bats | 4 ++ .../connect/envoy/case-http2/verify.bats | 4 ++ .../connect/envoy/case-prometheus/verify.bats | 4 ++ .../connect/envoy/case-statsd-udp/verify.bats | 4 ++ .../connect/envoy/case-zipkin/verify.bats | 4 ++ test/integration/connect/envoy/helpers.bash | 37 +++++++++++++++++-- 12 files changed, 78 insertions(+), 3 deletions(-) diff --git a/test/integration/connect/envoy/case-badauthz/verify.bats b/test/integration/connect/envoy/case-badauthz/verify.bats index 1a07f48d35..b479875582 100644 --- a/test/integration/connect/envoy/case-badauthz/verify.bats +++ b/test/integration/connect/envoy/case-badauthz/verify.bats @@ -22,6 +22,10 @@ load helpers assert_service_has_healthy_instances s2 1 } +@test "s1 upstream should have healthy endpoints for s2" { + assert_upstream_has_healthy_endpoints 127.0.0.1:19000 s2 1 +} + @test "s1 upstream should NOT be able to connect to s2" { run retry_default must_fail_tcp_connection localhost:5000 diff --git a/test/integration/connect/envoy/case-basic/verify.bats b/test/integration/connect/envoy/case-basic/verify.bats index bb31749524..5c41a62bf1 100644 --- a/test/integration/connect/envoy/case-basic/verify.bats +++ b/test/integration/connect/envoy/case-basic/verify.bats @@ -26,6 +26,10 @@ load helpers assert_service_has_healthy_instances s2 1 } +@test "s1 upstream should have healthy endpoints for s2" { + assert_upstream_has_healthy_endpoints 127.0.0.1:19000 s2 1 +} + @test "s1 upstream should be able to connect to s2" { run retry_default curl -s -f -d hello localhost:5000 [ "$status" -eq 0 ] diff --git a/test/integration/connect/envoy/case-centralconf/verify.bats b/test/integration/connect/envoy/case-centralconf/verify.bats index b9af6d7112..0c414afb09 100644 --- a/test/integration/connect/envoy/case-centralconf/verify.bats +++ b/test/integration/connect/envoy/case-centralconf/verify.bats @@ -22,6 +22,10 @@ load helpers assert_service_has_healthy_instances s2 1 } +@test "s1 upstream should have healthy endpoints for s2" { + assert_upstream_has_healthy_endpoints 127.0.0.1:19000 s2 1 +} + @test "s1 upstream should be able to connect to s2 with http/1.1" { run retry_default curl --http1.1 -s -f -d hello localhost:5000 [ "$status" -eq 0 ] diff --git a/test/integration/connect/envoy/case-dogstatsd-udp/verify.bats b/test/integration/connect/envoy/case-dogstatsd-udp/verify.bats index eb4ca3bad7..c66a497b1e 100644 --- a/test/integration/connect/envoy/case-dogstatsd-udp/verify.bats +++ b/test/integration/connect/envoy/case-dogstatsd-udp/verify.bats @@ -14,6 +14,10 @@ load helpers assert_service_has_healthy_instances s2 1 } +@test "s1 upstream should have healthy endpoints for s2" { + assert_upstream_has_healthy_endpoints 127.0.0.1:19000 s2 1 +} + @test "s1 upstream should be able to connect to s2" { run retry_default curl -s -f -d hello localhost:5000 diff --git a/test/integration/connect/envoy/case-grpc/verify.bats b/test/integration/connect/envoy/case-grpc/verify.bats index bccc27f238..ffce93c252 100644 --- a/test/integration/connect/envoy/case-grpc/verify.bats +++ b/test/integration/connect/envoy/case-grpc/verify.bats @@ -14,6 +14,10 @@ load helpers assert_service_has_healthy_instances s2 1 } +@test "s1 upstream should have healthy endpoints for s2" { + assert_upstream_has_healthy_endpoints 127.0.0.1:19000 s2 1 +} + @test "s1 upstream should be able to connect to s2 via grpc" { run fortio grpcping localhost:5000 diff --git a/test/integration/connect/envoy/case-http-badauthz/verify.bats b/test/integration/connect/envoy/case-http-badauthz/verify.bats index 597dfbb519..a746a08141 100644 --- a/test/integration/connect/envoy/case-http-badauthz/verify.bats +++ b/test/integration/connect/envoy/case-http-badauthz/verify.bats @@ -22,6 +22,10 @@ load helpers assert_service_has_healthy_instances s2 1 } +@test "s1 upstream should have healthy endpoints for s2" { + assert_upstream_has_healthy_endpoints 127.0.0.1:19000 s2 1 +} + @test "s1 upstream should NOT be able to connect to s2" { run retry_default must_fail_http_connection localhost:5000 diff --git a/test/integration/connect/envoy/case-http/verify.bats b/test/integration/connect/envoy/case-http/verify.bats index 2c7d2b8e45..00454902bf 100644 --- a/test/integration/connect/envoy/case-http/verify.bats +++ b/test/integration/connect/envoy/case-http/verify.bats @@ -22,6 +22,10 @@ load helpers assert_service_has_healthy_instances s2 1 } +@test "s1 upstream should have healthy endpoints for s2" { + assert_upstream_has_healthy_endpoints 127.0.0.1:19000 s2 1 +} + @test "s1 upstream should be able to connect to s2 with http/1.1" { run retry_default curl --http1.1 -s -f -d hello localhost:5000 [ "$status" -eq 0 ] diff --git a/test/integration/connect/envoy/case-http2/verify.bats b/test/integration/connect/envoy/case-http2/verify.bats index 00c1a47192..f3ee52b703 100644 --- a/test/integration/connect/envoy/case-http2/verify.bats +++ b/test/integration/connect/envoy/case-http2/verify.bats @@ -22,6 +22,10 @@ load helpers assert_service_has_healthy_instances s2 1 } +@test "s1 upstream should have healthy endpoints for s2" { + assert_upstream_has_healthy_endpoints 127.0.0.1:19000 s2 1 +} + @test "s1 upstream should be able to connect to s2 via http2" { # We use grpc here because it's the easiest way to test http2. The server # needs to support h2c since the proxy doesn't talk TLS to the local app. diff --git a/test/integration/connect/envoy/case-prometheus/verify.bats b/test/integration/connect/envoy/case-prometheus/verify.bats index b076022441..182eb439ed 100644 --- a/test/integration/connect/envoy/case-prometheus/verify.bats +++ b/test/integration/connect/envoy/case-prometheus/verify.bats @@ -22,6 +22,10 @@ load helpers assert_service_has_healthy_instances s2 1 } +@test "s1 upstream should have healthy endpoints for s2" { + assert_upstream_has_healthy_endpoints 127.0.0.1:19000 s2 1 +} + @test "s1 upstream should be able to connect to s2 with http/1.1" { run retry_default curl --http1.1 -s -f -d hello localhost:5000 [ "$status" -eq 0 ] diff --git a/test/integration/connect/envoy/case-statsd-udp/verify.bats b/test/integration/connect/envoy/case-statsd-udp/verify.bats index 57d1445033..ee678ad747 100644 --- a/test/integration/connect/envoy/case-statsd-udp/verify.bats +++ b/test/integration/connect/envoy/case-statsd-udp/verify.bats @@ -14,6 +14,10 @@ load helpers assert_service_has_healthy_instances s2 1 } +@test "s1 upstream should have healthy endpoints for s2" { + assert_upstream_has_healthy_endpoints 127.0.0.1:19000 s2 1 +} + @test "s1 upstream should be able to connect to s2" { run retry_default curl -s -f -d hello localhost:5000 [ "$status" == 0 ] diff --git a/test/integration/connect/envoy/case-zipkin/verify.bats b/test/integration/connect/envoy/case-zipkin/verify.bats index 88743051b6..bfc697cfd5 100644 --- a/test/integration/connect/envoy/case-zipkin/verify.bats +++ b/test/integration/connect/envoy/case-zipkin/verify.bats @@ -22,6 +22,10 @@ load helpers assert_service_has_healthy_instances s2 1 } +@test "s1 upstream should have healthy endpoints for s2" { + assert_upstream_has_healthy_endpoints 127.0.0.1:19000 s2 1 +} + @test "s1 upstream should be able to connect to s2" { run retry_default curl -s -f -d hello localhost:5000 [ "$status" == "0" ] diff --git a/test/integration/connect/envoy/helpers.bash b/test/integration/connect/envoy/helpers.bash index e5d51a9f6e..9b933df12e 100755 --- a/test/integration/connect/envoy/helpers.bash +++ b/test/integration/connect/envoy/helpers.bash @@ -117,7 +117,38 @@ function snapshot_envoy_admin { local ENVOY_NAME=$2 docker_wget "http://${HOSTPORT}/config_dump" -q -O - > "./workdir/envoy/${ENVOY_NAME}-config_dump.json" - docker_wget "http://${HOSTPORT}/clusters" -q -O - > "./workdir/envoy/${ENVOY_NAME}-clusters.out" + docker_wget "http://${HOSTPORT}/clusters?format=json" -q -O - > "./workdir/envoy/${ENVOY_NAME}-clusters.json" +} + +function get_healthy_upstream_endpoint_count { + local HOSTPORT=$1 + local CLUSTER_NAME=$2 + run retry_default curl -s -f "http://${HOSTPORT}/clusters?format=json" + [ "$status" -eq 0 ] + echo "$output" | jq --raw-output " +.cluster_statuses[] +| select(.name|startswith(\"${CLUSTER_NAME}.default.dc1.internal.\")) +| .host_statuses[].health_status +| select(.eds_health_status == \"HEALTHY\") +| length" +} + +function assert_upstream_has_healthy_endpoints_once { + local HOSTPORT=$1 + local CLUSTER_NAME=$2 + local EXPECT_COUNT=$3 + + GOT_COUNT=$(get_healthy_upstream_endpoint_count $HOSTPORT $CLUSTER_NAME) + + [ "$GOT_COUNT" -eq $EXPECT_COUNT ] +} + +function assert_upstream_has_healthy_endpoints { + local HOSTPORT=$1 + local CLUSTER_NAME=$2 + local EXPECT_COUNT=$3 + run retry 30 1 assert_upstream_has_healthy_endpoints_once $HOSTPORT $CLUSTER_NAME $EXPECT_COUNT + [ "$status" -eq 0 ] } function get_healthy_service_count { @@ -127,7 +158,7 @@ function get_healthy_service_count { echo "$output" | jq --raw-output '. | length' } -function health_service_count_matches { +function assert_service_has_healthy_instances_once { local SERVICE_NAME=$1 local EXPECT_COUNT=$2 @@ -140,7 +171,7 @@ function assert_service_has_healthy_instances { local SERVICE_NAME=$1 local EXPECT_COUNT=$2 - run retry 30 2 health_service_count_matches $SERVICE_NAME $EXPECT_COUNT + run retry 30 1 assert_service_has_healthy_instances_once $SERVICE_NAME $EXPECT_COUNT [ "$status" -eq 0 ] }