From a44b9e84b167ffb0d8aab92f38c2e980ce38f374 Mon Sep 17 00:00:00 2001
From: Pierre Souchay
Date: Mon, 19 Mar 2018 14:14:03 +0100
Subject: [PATCH 1/6] =?UTF-8?q?[BUGFIX]=C2=A0When=20a=20node=20level=20che?=
=?UTF-8?q?ck=20is=20removed,=20ensure=20all=20services=20of=20node=20are?=
=?UTF-8?q?=20notified?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Bugfix for https://github.com/hashicorp/consul/pull/3899
When a node level check is removed (example: maintenance),
some watchers on services might have to recompute their state.
If those nodes are performing blocking queries, they have to be notified.
While their state was updated when node-level state did change or was added
this was not the case when the check was removed. This fixes it.
---
agent/consul/state/catalog.go | 85 +++++++++++++++---------------
agent/consul/state/catalog_test.go | 5 ++
2 files changed, 48 insertions(+), 42 deletions(-)
diff --git a/agent/consul/state/catalog.go b/agent/consul/state/catalog.go
index 38d59e8109..6fcb45955b 100644
--- a/agent/consul/state/catalog.go
+++ b/agent/consul/state/catalog.go
@@ -1086,6 +1086,43 @@ func (s *Store) EnsureCheck(idx uint64, hc *structs.HealthCheck) error {
return nil
}
+// This ensure all connected services to a check are updated properly
+func (s *Store) alterServicesFromCheck(tx *memdb.Txn, idx uint64, hc *structs.HealthCheck, checkServiceExists bool) error {
+ // If the check is associated with a service, check that we have
+ // a registration for the service.
+ if hc.ServiceID != "" {
+ service, err := tx.First("services", "id", hc.Node, hc.ServiceID)
+ // When deleting a service, service might have been removed already
+ if err != nil && checkServiceExists {
+ return fmt.Errorf("failed service lookup: %s", err)
+ }
+ if service == nil {
+ return ErrMissingService
+ }
+
+ // Copy in the service name and tags
+ svc := service.(*structs.ServiceNode)
+ hc.ServiceName = svc.ServiceName
+ hc.ServiceTags = svc.ServiceTags
+ if err = tx.Insert("index", &IndexEntry{serviceIndexName(svc.ServiceName), idx}); err != nil {
+ return fmt.Errorf("failed updating index: %s", err)
+ }
+ } else {
+ // Update the status for all the services associated with this node
+ services, err := tx.Get("services", "node", hc.Node)
+ if err != nil {
+ return fmt.Errorf("failed updating services for node %s: %s", hc.Node, err)
+ }
+ for service := services.Next(); service != nil; service = services.Next() {
+ svc := service.(*structs.ServiceNode).ToNodeService()
+ if err := tx.Insert("index", &IndexEntry{serviceIndexName(svc.Service), idx}); err != nil {
+ return fmt.Errorf("failed updating index: %s", err)
+ }
+ }
+ }
+ return nil
+}
+
// ensureCheckTransaction is used as the inner method to handle inserting
// a health check into the state store. It ensures safety against inserting
// checks with no matching node or service.
@@ -1119,36 +1156,9 @@ func (s *Store) ensureCheckTxn(tx *memdb.Txn, idx uint64, hc *structs.HealthChec
return ErrMissingNode
}
- // If the check is associated with a service, check that we have
- // a registration for the service.
- if hc.ServiceID != "" {
- service, err := tx.First("services", "id", hc.Node, hc.ServiceID)
- if err != nil {
- return fmt.Errorf("failed service lookup: %s", err)
- }
- if service == nil {
- return ErrMissingService
- }
-
- // Copy in the service name and tags
- svc := service.(*structs.ServiceNode)
- hc.ServiceName = svc.ServiceName
- hc.ServiceTags = svc.ServiceTags
- if err = tx.Insert("index", &IndexEntry{serviceIndexName(svc.ServiceName), idx}); err != nil {
- return fmt.Errorf("failed updating index: %s", err)
- }
- } else {
- // Update the status for all the services associated with this node
- services, err := tx.Get("services", "node", hc.Node)
- if err != nil {
- return fmt.Errorf("failed updating services for node %s: %s", hc.Node, err)
- }
- for service := services.Next(); service != nil; service = services.Next() {
- svc := service.(*structs.ServiceNode).ToNodeService()
- if err := tx.Insert("index", &IndexEntry{serviceIndexName(svc.Service), idx}); err != nil {
- return fmt.Errorf("failed updating index: %s", err)
- }
- }
+ err = s.alterServicesFromCheck(tx, idx, hc, true)
+ if err != nil {
+ return err
}
// Delete any sessions for this check if the health is critical.
@@ -1390,19 +1400,10 @@ func (s *Store) deleteCheckTxn(tx *memdb.Txn, idx uint64, node string, checkID t
return nil
}
existing := hc.(*structs.HealthCheck)
- if existing != nil && existing.ServiceID != "" {
- service, err := tx.First("services", "id", node, existing.ServiceID)
+ if existing != nil {
+ err = s.alterServicesFromCheck(tx, idx, existing, false)
if err != nil {
- return fmt.Errorf("failed service lookup: %s", err)
- }
- if service == nil {
- return ErrMissingService
- }
-
- // Updated index of service
- svc := service.(*structs.ServiceNode)
- if err = tx.Insert("index", &IndexEntry{serviceIndexName(svc.ServiceName), idx}); err != nil {
- return fmt.Errorf("failed updating index: %s", err)
+ return fmt.Errorf("Failed to update services linked to deleted healthcheck: %s", err)
}
}
diff --git a/agent/consul/state/catalog_test.go b/agent/consul/state/catalog_test.go
index b22b82fed6..1c3b1a8678 100644
--- a/agent/consul/state/catalog_test.go
+++ b/agent/consul/state/catalog_test.go
@@ -2119,6 +2119,7 @@ func TestStateStore_DeleteCheck(t *testing.T) {
// Register a node and a node-level health check.
testRegisterNode(t, s, 1, "node1")
testRegisterCheck(t, s, 2, "node1", "", "check1", api.HealthPassing)
+ testRegisterService(t, s, 2, "node1", "service1")
// Make sure the check is there.
ws := memdb.NewWatchSet()
@@ -2130,6 +2131,8 @@ func TestStateStore_DeleteCheck(t *testing.T) {
t.Fatalf("bad: %#v", checks)
}
+ ensureServiceVersion(t, s, ws, "service1", 2, 1)
+
// Delete the check.
if err := s.DeleteCheck(3, "node1", "check1"); err != nil {
t.Fatalf("err: %s", err)
@@ -2137,6 +2140,8 @@ func TestStateStore_DeleteCheck(t *testing.T) {
if !watchFired(ws) {
t.Fatalf("bad")
}
+ // All services linked to this node should have their index updated
+ ensureServiceVersion(t, s, ws, "service1", 3, 1)
// Check is gone
ws = memdb.NewWatchSet()
From 5e974843f1147daec145b2ff4d2b316a36035487 Mon Sep 17 00:00:00 2001
From: Pierre Souchay
Date: Mon, 19 Mar 2018 16:12:54 +0100
Subject: [PATCH 2/6] Refactoring to have clearer code without weird bool
---
agent/consul/state/catalog.go | 83 +++++++++++++++++++----------------
1 file changed, 46 insertions(+), 37 deletions(-)
diff --git a/agent/consul/state/catalog.go b/agent/consul/state/catalog.go
index 6fcb45955b..b83265a037 100644
--- a/agent/consul/state/catalog.go
+++ b/agent/consul/state/catalog.go
@@ -1086,39 +1086,17 @@ func (s *Store) EnsureCheck(idx uint64, hc *structs.HealthCheck) error {
return nil
}
-// This ensure all connected services to a check are updated properly
-func (s *Store) alterServicesFromCheck(tx *memdb.Txn, idx uint64, hc *structs.HealthCheck, checkServiceExists bool) error {
- // If the check is associated with a service, check that we have
- // a registration for the service.
- if hc.ServiceID != "" {
- service, err := tx.First("services", "id", hc.Node, hc.ServiceID)
- // When deleting a service, service might have been removed already
- if err != nil && checkServiceExists {
- return fmt.Errorf("failed service lookup: %s", err)
- }
- if service == nil {
- return ErrMissingService
- }
-
- // Copy in the service name and tags
- svc := service.(*structs.ServiceNode)
- hc.ServiceName = svc.ServiceName
- hc.ServiceTags = svc.ServiceTags
- if err = tx.Insert("index", &IndexEntry{serviceIndexName(svc.ServiceName), idx}); err != nil {
+// updateIndexForAllServicesOfNode updates the status for all the services associated with this node
+func (s *Store) updateAllServiceIndexesOfNode(tx *memdb.Txn, idx uint64, nodeID string) error {
+ services, err := tx.Get("services", "node", nodeID)
+ if err != nil {
+ return fmt.Errorf("failed updating services for node %s: %s", nodeID, err)
+ }
+ for service := services.Next(); service != nil; service = services.Next() {
+ svc := service.(*structs.ServiceNode).ToNodeService()
+ if err := tx.Insert("index", &IndexEntry{serviceIndexName(svc.Service), idx}); err != nil {
return fmt.Errorf("failed updating index: %s", err)
}
- } else {
- // Update the status for all the services associated with this node
- services, err := tx.Get("services", "node", hc.Node)
- if err != nil {
- return fmt.Errorf("failed updating services for node %s: %s", hc.Node, err)
- }
- for service := services.Next(); service != nil; service = services.Next() {
- svc := service.(*structs.ServiceNode).ToNodeService()
- if err := tx.Insert("index", &IndexEntry{serviceIndexName(svc.Service), idx}); err != nil {
- return fmt.Errorf("failed updating index: %s", err)
- }
- }
}
return nil
}
@@ -1156,9 +1134,30 @@ func (s *Store) ensureCheckTxn(tx *memdb.Txn, idx uint64, hc *structs.HealthChec
return ErrMissingNode
}
- err = s.alterServicesFromCheck(tx, idx, hc, true)
- if err != nil {
- return err
+ // If the check is associated with a service, check that we have
+ // a registration for the service.
+ if hc.ServiceID != "" {
+ service, err := tx.First("services", "id", hc.Node, hc.ServiceID)
+ if err != nil {
+ return fmt.Errorf("failed service lookup: %s", err)
+ }
+ if service == nil {
+ return ErrMissingService
+ }
+
+ // Copy in the service name and tags
+ svc := service.(*structs.ServiceNode)
+ hc.ServiceName = svc.ServiceName
+ hc.ServiceTags = svc.ServiceTags
+ if err = tx.Insert("index", &IndexEntry{serviceIndexName(svc.ServiceName), idx}); err != nil {
+ return fmt.Errorf("failed updating index: %s", err)
+ }
+ } else {
+ // Update the status for all the services associated with this node
+ err = s.updateAllServiceIndexesOfNode(tx, idx, hc.Node)
+ if err != nil {
+ return err
+ }
}
// Delete any sessions for this check if the health is critical.
@@ -1401,9 +1400,19 @@ func (s *Store) deleteCheckTxn(tx *memdb.Txn, idx uint64, node string, checkID t
}
existing := hc.(*structs.HealthCheck)
if existing != nil {
- err = s.alterServicesFromCheck(tx, idx, existing, false)
- if err != nil {
- return fmt.Errorf("Failed to update services linked to deleted healthcheck: %s", err)
+ // When no service is linked to this service, update all services of node
+ if existing.ServiceID != "" {
+ if err = tx.Insert("index", &IndexEntry{serviceIndexName(existing.ServiceName), idx}); err != nil {
+ return fmt.Errorf("failed updating index: %s", err)
+ }
+ } else {
+ err = s.updateAllServiceIndexesOfNode(tx, idx, existing.Node)
+ if err != nil {
+ return fmt.Errorf("Failed to update services linked to deleted healthcheck: %s", err)
+ }
+ if err := tx.Insert("index", &IndexEntry{"services", idx}); err != nil {
+ return fmt.Errorf("failed updating index: %s", err)
+ }
}
}
From b6914617d951f33a1c866cb981dc3779db73da13 Mon Sep 17 00:00:00 2001
From: Pierre Souchay
Date: Mon, 19 Mar 2018 17:12:08 +0100
Subject: [PATCH 3/6] Fixed typo in comments
---
agent/consul/state/catalog.go | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/agent/consul/state/catalog.go b/agent/consul/state/catalog.go
index b83265a037..d40143de6e 100644
--- a/agent/consul/state/catalog.go
+++ b/agent/consul/state/catalog.go
@@ -1086,7 +1086,7 @@ func (s *Store) EnsureCheck(idx uint64, hc *structs.HealthCheck) error {
return nil
}
-// updateIndexForAllServicesOfNode updates the status for all the services associated with this node
+// updateAllServiceIndexesOfNode updates the status for all the services associated with this node
func (s *Store) updateAllServiceIndexesOfNode(tx *memdb.Txn, idx uint64, nodeID string) error {
services, err := tx.Get("services", "node", nodeID)
if err != nil {
From dd9efb755a5eb4df426f91a67fa30083f5210d33 Mon Sep 17 00:00:00 2001
From: Pierre Souchay
Date: Thu, 22 Mar 2018 10:30:05 +0100
Subject: [PATCH 4/6] Fixed minor typo in comments
Might fix unstable travis build
---
agent/consul/state/catalog.go | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/agent/consul/state/catalog.go b/agent/consul/state/catalog.go
index d40143de6e..4b563cba93 100644
--- a/agent/consul/state/catalog.go
+++ b/agent/consul/state/catalog.go
@@ -1086,7 +1086,7 @@ func (s *Store) EnsureCheck(idx uint64, hc *structs.HealthCheck) error {
return nil
}
-// updateAllServiceIndexesOfNode updates the status for all the services associated with this node
+// updateAllServiceIndexesOfNode updates the Raft index of all the services associated with this node
func (s *Store) updateAllServiceIndexesOfNode(tx *memdb.Txn, idx uint64, nodeID string) error {
services, err := tx.Get("services", "node", nodeID)
if err != nil {
From 39a7b5c20daf3a16b7d265d17fbfedc8874bd048 Mon Sep 17 00:00:00 2001
From: Pierre Souchay
Date: Thu, 22 Mar 2018 12:20:25 +0100
Subject: [PATCH 5/6] Added new test regarding checks index
---
agent/consul/state/catalog_test.go | 3 +++
1 file changed, 3 insertions(+)
diff --git a/agent/consul/state/catalog_test.go b/agent/consul/state/catalog_test.go
index 1c3b1a8678..818670908a 100644
--- a/agent/consul/state/catalog_test.go
+++ b/agent/consul/state/catalog_test.go
@@ -2137,6 +2137,9 @@ func TestStateStore_DeleteCheck(t *testing.T) {
if err := s.DeleteCheck(3, "node1", "check1"); err != nil {
t.Fatalf("err: %s", err)
}
+ if idx := s.maxIndex("checks"); idx != 3 {
+ t.Fatalf("bad index: %d", idx)
+ }
if !watchFired(ws) {
t.Fatalf("bad")
}
From 5fb1b18073bf9d3b16afa28f8967fb4fa312e03d Mon Sep 17 00:00:00 2001
From: Pierre Souchay
Date: Thu, 22 Mar 2018 12:41:06 +0100
Subject: [PATCH 6/6] More test cases
---
agent/consul/state/catalog_test.go | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/agent/consul/state/catalog_test.go b/agent/consul/state/catalog_test.go
index 818670908a..4d4500d64f 100644
--- a/agent/consul/state/catalog_test.go
+++ b/agent/consul/state/catalog_test.go
@@ -2137,8 +2137,11 @@ func TestStateStore_DeleteCheck(t *testing.T) {
if err := s.DeleteCheck(3, "node1", "check1"); err != nil {
t.Fatalf("err: %s", err)
}
+ if idx, check, err := s.NodeCheck("node1", "check1"); idx != 3 || err != nil || check != nil {
+ t.Fatalf("Node check should have been deleted idx=%d, node=%v, err=%s", idx, check, err)
+ }
if idx := s.maxIndex("checks"); idx != 3 {
- t.Fatalf("bad index: %d", idx)
+ t.Fatalf("bad index for checks: %d", idx)
}
if !watchFired(ws) {
t.Fatalf("bad")