mirror of https://github.com/hashicorp/consul
Allow ignoring checks by ID when defining a PreparedQuery. Fixes #3727.
parent
464bad03cb
commit
0d8993e338
|
@ -6,6 +6,7 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/hashicorp/consul/agent/structs"
|
"github.com/hashicorp/consul/agent/structs"
|
||||||
|
"github.com/hashicorp/consul/types"
|
||||||
"github.com/mitchellh/copystructure"
|
"github.com/mitchellh/copystructure"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -32,6 +33,15 @@ var (
|
||||||
"${agent.segment}",
|
"${agent.segment}",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
IgnoreCheckIDs: []types.CheckID{
|
||||||
|
"${name.full}",
|
||||||
|
"${name.prefix}",
|
||||||
|
"${name.suffix}",
|
||||||
|
"${match(0)}",
|
||||||
|
"${match(1)}",
|
||||||
|
"${match(2)}",
|
||||||
|
"${agent.segment}",
|
||||||
|
},
|
||||||
Tags: []string{
|
Tags: []string{
|
||||||
"${name.full}",
|
"${name.full}",
|
||||||
"${name.prefix}",
|
"${name.prefix}",
|
||||||
|
@ -124,6 +134,7 @@ func TestTemplate_Compile(t *testing.T) {
|
||||||
query.Template.Type = structs.QueryTemplateTypeNamePrefixMatch
|
query.Template.Type = structs.QueryTemplateTypeNamePrefixMatch
|
||||||
query.Template.Regexp = "^(hello)there$"
|
query.Template.Regexp = "^(hello)there$"
|
||||||
query.Service.Service = "${name.full}"
|
query.Service.Service = "${name.full}"
|
||||||
|
query.Service.IgnoreCheckIDs = []types.CheckID{"${match(1)}", "${agent.segment}"}
|
||||||
query.Service.Tags = []string{"${match(1)}", "${agent.segment}"}
|
query.Service.Tags = []string{"${match(1)}", "${agent.segment}"}
|
||||||
backup, err := copystructure.Copy(query)
|
backup, err := copystructure.Copy(query)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -151,6 +162,10 @@ func TestTemplate_Compile(t *testing.T) {
|
||||||
},
|
},
|
||||||
Service: structs.ServiceQuery{
|
Service: structs.ServiceQuery{
|
||||||
Service: "hellothere",
|
Service: "hellothere",
|
||||||
|
IgnoreCheckIDs: []types.CheckID{
|
||||||
|
"hello",
|
||||||
|
"segment-foo",
|
||||||
|
},
|
||||||
Tags: []string{
|
Tags: []string{
|
||||||
"hello",
|
"hello",
|
||||||
"segment-foo",
|
"segment-foo",
|
||||||
|
|
|
@ -496,7 +496,8 @@ func (p *PreparedQuery) execute(query *structs.PreparedQuery,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Filter out any unhealthy nodes.
|
// Filter out any unhealthy nodes.
|
||||||
nodes = nodes.Filter(query.Service.OnlyPassing)
|
nodes = nodes.FilterIgnore(query.Service.OnlyPassing,
|
||||||
|
query.Service.IgnoreCheckIDs)
|
||||||
|
|
||||||
// Apply the node metadata filters, if any.
|
// Apply the node metadata filters, if any.
|
||||||
if len(query.Service.NodeMeta) > 0 {
|
if len(query.Service.NodeMeta) > 0 {
|
||||||
|
|
|
@ -17,6 +17,7 @@ import (
|
||||||
"github.com/hashicorp/consul/api"
|
"github.com/hashicorp/consul/api"
|
||||||
"github.com/hashicorp/consul/testrpc"
|
"github.com/hashicorp/consul/testrpc"
|
||||||
"github.com/hashicorp/consul/testutil/retry"
|
"github.com/hashicorp/consul/testutil/retry"
|
||||||
|
"github.com/hashicorp/consul/types"
|
||||||
"github.com/hashicorp/net-rpc-msgpackrpc"
|
"github.com/hashicorp/net-rpc-msgpackrpc"
|
||||||
"github.com/hashicorp/serf/coordinate"
|
"github.com/hashicorp/serf/coordinate"
|
||||||
)
|
)
|
||||||
|
@ -2076,6 +2077,41 @@ func TestPreparedQuery_Execute(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Make the query ignore all our health checks (which have "failing" ID
|
||||||
|
// implicitly from their name).
|
||||||
|
query.Query.Service.IgnoreCheckIDs = []types.CheckID{"failing"}
|
||||||
|
if err := msgpackrpc.CallWithCodec(codec1, "PreparedQuery.Apply", &query, &query.Query.ID); err != nil {
|
||||||
|
t.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// We should end up with 10 nodes again
|
||||||
|
{
|
||||||
|
req := structs.PreparedQueryExecuteRequest{
|
||||||
|
Datacenter: "dc1",
|
||||||
|
QueryIDOrName: query.Query.ID,
|
||||||
|
QueryOptions: structs.QueryOptions{Token: execToken},
|
||||||
|
}
|
||||||
|
|
||||||
|
var reply structs.PreparedQueryExecuteResponse
|
||||||
|
if err := msgpackrpc.CallWithCodec(codec1, "PreparedQuery.Execute", &req, &reply); err != nil {
|
||||||
|
t.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(reply.Nodes) != 10 ||
|
||||||
|
reply.Datacenter != "dc1" ||
|
||||||
|
reply.Service != query.Query.Service.Service ||
|
||||||
|
!reflect.DeepEqual(reply.DNS, query.Query.DNS) ||
|
||||||
|
!reply.QueryMeta.KnownLeader {
|
||||||
|
t.Fatalf("bad: %v", reply)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Undo that so all the following tests aren't broken!
|
||||||
|
query.Query.Service.IgnoreCheckIDs = nil
|
||||||
|
if err := msgpackrpc.CallWithCodec(codec1, "PreparedQuery.Apply", &query, &query.Query.ID); err != nil {
|
||||||
|
t.Fatalf("err: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
// Make the query more picky by adding a tag filter. This just proves we
|
// Make the query more picky by adding a tag filter. This just proves we
|
||||||
// call into the tag filter, it is tested more thoroughly in a separate
|
// call into the tag filter, it is tested more thoroughly in a separate
|
||||||
// test.
|
// test.
|
||||||
|
|
|
@ -10,6 +10,7 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/hashicorp/consul/agent/structs"
|
"github.com/hashicorp/consul/agent/structs"
|
||||||
|
"github.com/hashicorp/consul/types"
|
||||||
)
|
)
|
||||||
|
|
||||||
// MockPreparedQuery is a fake endpoint that we inject into the Consul server
|
// MockPreparedQuery is a fake endpoint that we inject into the Consul server
|
||||||
|
@ -87,9 +88,10 @@ func TestPreparedQuery_Create(t *testing.T) {
|
||||||
NearestN: 4,
|
NearestN: 4,
|
||||||
Datacenters: []string{"dc1", "dc2"},
|
Datacenters: []string{"dc1", "dc2"},
|
||||||
},
|
},
|
||||||
OnlyPassing: true,
|
IgnoreCheckIDs: []types.CheckID{"broken_check"},
|
||||||
Tags: []string{"foo", "bar"},
|
OnlyPassing: true,
|
||||||
NodeMeta: map[string]string{"somekey": "somevalue"},
|
Tags: []string{"foo", "bar"},
|
||||||
|
NodeMeta: map[string]string{"somekey": "somevalue"},
|
||||||
},
|
},
|
||||||
DNS: structs.QueryDNSOptions{
|
DNS: structs.QueryDNSOptions{
|
||||||
TTL: "10s",
|
TTL: "10s",
|
||||||
|
@ -122,9 +124,10 @@ func TestPreparedQuery_Create(t *testing.T) {
|
||||||
"NearestN": 4,
|
"NearestN": 4,
|
||||||
"Datacenters": []string{"dc1", "dc2"},
|
"Datacenters": []string{"dc1", "dc2"},
|
||||||
},
|
},
|
||||||
"OnlyPassing": true,
|
"IgnoreCheckIDs": []string{"broken_check"},
|
||||||
"Tags": []string{"foo", "bar"},
|
"OnlyPassing": true,
|
||||||
"NodeMeta": map[string]string{"somekey": "somevalue"},
|
"Tags": []string{"foo", "bar"},
|
||||||
|
"NodeMeta": map[string]string{"somekey": "somevalue"},
|
||||||
},
|
},
|
||||||
"DNS": map[string]interface{}{
|
"DNS": map[string]interface{}{
|
||||||
"TTL": "10s",
|
"TTL": "10s",
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
package structs
|
package structs
|
||||||
|
|
||||||
|
import "github.com/hashicorp/consul/types"
|
||||||
|
|
||||||
// QueryDatacenterOptions sets options about how we fail over if there are no
|
// QueryDatacenterOptions sets options about how we fail over if there are no
|
||||||
// healthy nodes in the local datacenter.
|
// healthy nodes in the local datacenter.
|
||||||
type QueryDatacenterOptions struct {
|
type QueryDatacenterOptions struct {
|
||||||
|
@ -34,6 +36,12 @@ type ServiceQuery struct {
|
||||||
// discarded)
|
// discarded)
|
||||||
OnlyPassing bool
|
OnlyPassing bool
|
||||||
|
|
||||||
|
// IgnoreCheckIDs is an optional list of health check IDs to ignore when
|
||||||
|
// considering which nodes are healthy. It is useful as an emergency measure
|
||||||
|
// to temporarily override some health check that is producing false negatives
|
||||||
|
// for example.
|
||||||
|
IgnoreCheckIDs []types.CheckID
|
||||||
|
|
||||||
// Near allows the query to always prefer the node nearest the given
|
// Near allows the query to always prefer the node nearest the given
|
||||||
// node. If the node does not exist, results are returned in their
|
// node. If the node does not exist, results are returned in their
|
||||||
// normal randomly-shuffled order. Supplying the magic "_agent" value
|
// normal randomly-shuffled order. Supplying the magic "_agent" value
|
||||||
|
|
|
@ -580,16 +580,33 @@ func (nodes CheckServiceNodes) Shuffle() {
|
||||||
// check if that option is selected). Note that this returns the filtered
|
// check if that option is selected). Note that this returns the filtered
|
||||||
// results AND modifies the receiver for performance.
|
// results AND modifies the receiver for performance.
|
||||||
func (nodes CheckServiceNodes) Filter(onlyPassing bool) CheckServiceNodes {
|
func (nodes CheckServiceNodes) Filter(onlyPassing bool) CheckServiceNodes {
|
||||||
|
return nodes.FilterIgnore(onlyPassing, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
// FilterIgnore removes nodes that are failing health checks just like Filter.
|
||||||
|
// It also ignores the status of any check with an ID present in ignoreCheckIDs
|
||||||
|
// as if that check didn't exist. Note that this returns the filtered results
|
||||||
|
// AND modifies the receiver for performance.
|
||||||
|
func (nodes CheckServiceNodes) FilterIgnore(onlyPassing bool,
|
||||||
|
ignoreCheckIDs []types.CheckID) CheckServiceNodes {
|
||||||
n := len(nodes)
|
n := len(nodes)
|
||||||
OUTER:
|
OUTER:
|
||||||
for i := 0; i < n; i++ {
|
for i := 0; i < n; i++ {
|
||||||
node := nodes[i]
|
node := nodes[i]
|
||||||
|
INNER:
|
||||||
for _, check := range node.Checks {
|
for _, check := range node.Checks {
|
||||||
|
for _, ignore := range ignoreCheckIDs {
|
||||||
|
if check.CheckID == ignore {
|
||||||
|
// Skip this _check_ but keep looking at other checks for this node.
|
||||||
|
continue INNER
|
||||||
|
}
|
||||||
|
}
|
||||||
if check.Status == api.HealthCritical ||
|
if check.Status == api.HealthCritical ||
|
||||||
(onlyPassing && check.Status != api.HealthPassing) {
|
(onlyPassing && check.Status != api.HealthPassing) {
|
||||||
nodes[i], nodes[n-1] = nodes[n-1], CheckServiceNode{}
|
nodes[i], nodes[n-1] = nodes[n-1], CheckServiceNode{}
|
||||||
n--
|
n--
|
||||||
i--
|
i--
|
||||||
|
// Skip this _node_ now we've swapped it off the end of the list.
|
||||||
continue OUTER
|
continue OUTER
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -441,6 +441,20 @@ func TestStructs_CheckServiceNodes_Filter(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
CheckServiceNode{
|
||||||
|
Node: &Node{
|
||||||
|
Node: "node4",
|
||||||
|
Address: "127.0.0.4",
|
||||||
|
},
|
||||||
|
Checks: HealthChecks{
|
||||||
|
// This check has a different ID to the others to ensure it is not
|
||||||
|
// ignored by accident
|
||||||
|
&HealthCheck{
|
||||||
|
CheckID: "failing2",
|
||||||
|
Status: api.HealthCritical,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test the case where warnings are allowed.
|
// Test the case where warnings are allowed.
|
||||||
|
@ -473,6 +487,26 @@ func TestStructs_CheckServiceNodes_Filter(t *testing.T) {
|
||||||
t.Fatalf("bad: %v", filtered)
|
t.Fatalf("bad: %v", filtered)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Allow failing checks to be ignored (note that the test checks have empty
|
||||||
|
// CheckID which is valid).
|
||||||
|
{
|
||||||
|
twiddle := make(CheckServiceNodes, len(nodes))
|
||||||
|
if n := copy(twiddle, nodes); n != len(nodes) {
|
||||||
|
t.Fatalf("bad: %d", n)
|
||||||
|
}
|
||||||
|
filtered := twiddle.FilterIgnore(true, []types.CheckID{""})
|
||||||
|
expected := CheckServiceNodes{
|
||||||
|
nodes[0],
|
||||||
|
nodes[1],
|
||||||
|
nodes[2], // Node 3's critical check should be ignored.
|
||||||
|
// Node 4 should still be failing since it's got a critical check with a
|
||||||
|
// non-ignored ID.
|
||||||
|
}
|
||||||
|
if !reflect.DeepEqual(filtered, expected) {
|
||||||
|
t.Fatalf("bad: %v", filtered)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestStructs_DirEntry_Clone(t *testing.T) {
|
func TestStructs_DirEntry_Clone(t *testing.T) {
|
||||||
|
|
|
@ -34,6 +34,12 @@ type ServiceQuery struct {
|
||||||
// local datacenter.
|
// local datacenter.
|
||||||
Failover QueryDatacenterOptions
|
Failover QueryDatacenterOptions
|
||||||
|
|
||||||
|
// IgnoreCheckIDs is an optional list of health check IDs to ignore when
|
||||||
|
// considering which nodes are healthy. It is useful as an emergency measure
|
||||||
|
// to temporarily override some health check that is producing false negatives
|
||||||
|
// for example.
|
||||||
|
IgnoreCheckIDs []string
|
||||||
|
|
||||||
// If OnlyPassing is true then we will only include nodes with passing
|
// If OnlyPassing is true then we will only include nodes with passing
|
||||||
// health checks (critical AND warning checks will cause a node to be
|
// health checks (critical AND warning checks will cause a node to be
|
||||||
// discarded)
|
// discarded)
|
||||||
|
|
|
@ -116,6 +116,53 @@ func TestAPI_PreparedQuery(t *testing.T) {
|
||||||
t.Fatalf("bad datacenter: %v", results)
|
t.Fatalf("bad datacenter: %v", results)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add new node with failing health check.
|
||||||
|
reg2 := reg
|
||||||
|
reg2.Node = "failingnode"
|
||||||
|
reg2.Check = &AgentCheck{
|
||||||
|
Node: "failingnode",
|
||||||
|
ServiceID: "redis1",
|
||||||
|
ServiceName: "redis",
|
||||||
|
Name: "failingcheck",
|
||||||
|
Status: "critical",
|
||||||
|
}
|
||||||
|
retry.Run(t, func(r *retry.R) {
|
||||||
|
if _, err := catalog.Register(reg2, nil); err != nil {
|
||||||
|
r.Fatal(err)
|
||||||
|
}
|
||||||
|
if _, _, err := catalog.Node("failingnode", nil); err != nil {
|
||||||
|
r.Fatal(err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// Execute by ID. Should return only healthy node.
|
||||||
|
results, _, err = query.Execute(def.ID, nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("err: %s", err)
|
||||||
|
}
|
||||||
|
if len(results.Nodes) != 1 || results.Nodes[0].Node.Node != "foobar" {
|
||||||
|
t.Fatalf("bad: %v", results)
|
||||||
|
}
|
||||||
|
if wan, ok := results.Nodes[0].Node.TaggedAddresses["wan"]; !ok || wan != "127.0.0.1" {
|
||||||
|
t.Fatalf("bad: %v", results)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update PQ with ignore rule for the failing check
|
||||||
|
def.Service.IgnoreCheckIDs = []string{"failingcheck"}
|
||||||
|
_, err = query.Update(def, nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("err: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Execute by ID. Should return BOTH nodes ignoring the failing check.
|
||||||
|
results, _, err = query.Execute(def.ID, nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("err: %s", err)
|
||||||
|
}
|
||||||
|
if len(results.Nodes) != 2 {
|
||||||
|
t.Fatalf("got %d nodes, want 2", len(results.Nodes))
|
||||||
|
}
|
||||||
|
|
||||||
// Delete it.
|
// Delete it.
|
||||||
_, err = query.Delete(def.ID, nil)
|
_, err = query.Delete(def.ID, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
@ -25,7 +25,7 @@ section for more details about how prepared queries work with Consul's ACL syste
|
||||||
### Prepared Query Templates
|
### Prepared Query Templates
|
||||||
|
|
||||||
Consul 0.6.4 and later support prepared query templates. These are created
|
Consul 0.6.4 and later support prepared query templates. These are created
|
||||||
similar to static templates, except with some additional fields and features.
|
similar to static queries, except with some additional fields and features.
|
||||||
Here is an example prepared query template:
|
Here is an example prepared query template:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
|
@ -206,6 +206,13 @@ The table below shows this endpoint's support for
|
||||||
failover, even if it is selected by both `NearestN` and is listed in
|
failover, even if it is selected by both `NearestN` and is listed in
|
||||||
`Datacenters`.
|
`Datacenters`.
|
||||||
|
|
||||||
|
- `IgnoreCheckIDs` `(array<string>: nil)` - Specifies a list of check IDs that
|
||||||
|
should be ignored when filtering unhealthy instances. This is mostly useful
|
||||||
|
in an emergency or as a temporary measure when a health check is found to be
|
||||||
|
unreliable. Being able to ignore it in centrally-defined queries can be
|
||||||
|
simpler than de-registering the check as an interim solution until the check
|
||||||
|
can be fixed.
|
||||||
|
|
||||||
- `OnlyPassing` `(bool: false)` - Specifies the behavior of the query's health
|
- `OnlyPassing` `(bool: false)` - Specifies the behavior of the query's health
|
||||||
check filtering. If this is set to false, the results will include nodes
|
check filtering. If this is set to false, the results will include nodes
|
||||||
with checks in the passing as well as the warning states. If this is set to
|
with checks in the passing as well as the warning states. If this is set to
|
||||||
|
|
Loading…
Reference in New Issue