Add metrics to count service mesh Kind instance counts

This will add the counts of service mesh instances tagged by the
different ServiceKind's.
pull/11222/head
Connor Kelly 2021-09-30 16:15:26 -05:00
parent 5e7ef183ea
commit 46bf882620
No known key found for this signature in database
GPG Key ID: 588D34E7812BC0B7
5 changed files with 378 additions and 18 deletions

View File

@ -15,6 +15,13 @@ const (
tableUsage = "usage"
)
var allConnectKind = []string{
string(structs.ServiceKindConnectProxy),
string(structs.ServiceKindIngressGateway),
string(structs.ServiceKindMeshGateway),
string(structs.ServiceKindTerminatingGateway),
}
// usageTableSchema returns a new table schema used for tracking various indexes
// for the Raft log.
func usageTableSchema() *memdb.TableSchema {
@ -44,8 +51,9 @@ type UsageEntry struct {
// ServiceUsage contains all of the usage data related to services
type ServiceUsage struct {
Services int
ServiceInstances int
Services int
ServiceInstances int
ConnectServiceInstances map[string]int
EnterpriseServiceUsage
}
@ -89,8 +97,10 @@ func updateUsage(tx WriteTxn, changes Changes) error {
case tableServices:
svc := changeObject(change).(*structs.ServiceNode)
usageDeltas[change.Table] += delta
// ServiceKind is empty string for non connect services
addEnterpriseServiceInstanceUsage(usageDeltas, change)
connectDeltas(change, usageDeltas, delta)
// Construct a mapping of all of the various service names that were
// changed, in order to compare it with the finished memdb state.
// Make sure to account for the fact that services can change their names.
@ -101,6 +111,7 @@ func updateUsage(tx WriteTxn, changes Changes) error {
} else {
serviceNameChanges[svc.CompoundServiceName()] += delta
}
case "kvs":
usageDeltas[change.Table] += delta
addEnterpriseKVUsage(usageDeltas, change)
@ -118,7 +129,7 @@ func updateUsage(tx WriteTxn, changes Changes) error {
// of the tables we are tracking.
if idx == 0 {
// TODO(partitions? namespaces?)
idx = maxIndexTxn(tx, tableNodes, tableServices)
idx = maxIndexTxn(tx, tableNodes, tableServices, "kvs")
}
return writeUsageDeltas(tx, idx, usageDeltas)
@ -141,19 +152,19 @@ func updateServiceNameUsage(tx WriteTxn, usageDeltas map[string]int, serviceName
// added/removed during the transaction. This allows us to handle a single
// transaction committing multiple changes related to a single service
// name.
var svcCount int
var count int
for service := serviceIter.Next(); service != nil; service = serviceIter.Next() {
svcCount += 1
count += 1
}
var serviceState uniqueServiceState
switch {
case svcCount == 0:
case count == 0:
// If no services exist, we know we deleted the last service
// instance.
serviceState = Deleted
usageDeltas[serviceNamesUsageTable] -= 1
case svcCount == delta:
case count == delta:
// If the current number of service instances equals the number added,
// than we know we created a new service name.
serviceState = Created
@ -180,6 +191,32 @@ func serviceNameChanged(change memdb.Change) bool {
return false
}
func connectDeltas(change memdb.Change, usageDeltas map[string]int, delta int) {
// Connect metrics for updated services are more complicated. Check for:
// 1. Did ServiceKind change?
// 2. Is before ServiceKind typical? don't remove from old service kind
// 3. Is After ServiceKind typical? don't add to new service kind
// 4. Add and remove to both ServiceKind's
if change.Updated() {
before := change.Before.(*structs.ServiceNode)
after := change.After.(*structs.ServiceNode)
if before.ServiceKind != structs.ServiceKindTypical {
usageDeltas[string(before.ServiceKind)] -= 1
addEnterpriseConnectServiceInstanceUsage(usageDeltas, before, -1)
}
if after.ServiceKind != structs.ServiceKindTypical {
usageDeltas[string(after.ServiceKind)] += 1
addEnterpriseConnectServiceInstanceUsage(usageDeltas, after, 1)
}
} else {
svc := changeObject(change).(*structs.ServiceNode)
if svc.ServiceKind != structs.ServiceKindTypical {
usageDeltas[string(svc.ServiceKind)] += delta
}
addEnterpriseConnectServiceInstanceUsage(usageDeltas, svc, delta)
}
}
// writeUsageDeltas will take in a map of IDs to deltas and update each
// entry accordingly, checking for integer underflow. The index that is
// passed in will be recorded on the entry as well.
@ -266,9 +303,19 @@ func (s *Store) ServiceUsage() (uint64, ServiceUsage, error) {
return 0, ServiceUsage{}, fmt.Errorf("failed services lookup: %s", err)
}
serviceKindInstances := make(map[string]int)
for _, kind := range allConnectKind {
usage, err := firstUsageEntry(tx, kind)
if err != nil {
return 0, ServiceUsage{}, fmt.Errorf("failed services lookup: %s", err)
}
serviceKindInstances[kind] = usage.Count
}
usage := ServiceUsage{
ServiceInstances: serviceInstances.Count,
Services: services.Count,
ServiceInstances: serviceInstances.Count,
Services: services.Count,
ConnectServiceInstances: serviceKindInstances,
}
results, err := compileEnterpriseServiceUsage(tx, usage)
if err != nil {

View File

@ -18,6 +18,8 @@ func addEnterpriseServiceInstanceUsage(map[string]int, memdb.Change) {}
func addEnterpriseServiceUsage(map[string]int, map[structs.ServiceName]uniqueServiceState) {}
func addEnterpriseConnectServiceInstanceUsage(map[string]int, *structs.ServiceNode, int) {}
func addEnterpriseKVUsage(map[string]int, memdb.Change) {}
func compileEnterpriseServiceUsage(tx ReadTxn, usage ServiceUsage) (ServiceUsage, error) {

View File

@ -92,6 +92,9 @@ func TestStateStore_Usage_ServiceUsageEmpty(t *testing.T) {
require.Equal(t, idx, uint64(0))
require.Equal(t, usage.Services, 0)
require.Equal(t, usage.ServiceInstances, 0)
for k := range usage.ConnectServiceInstances {
require.Equal(t, 0, usage.ConnectServiceInstances[k])
}
}
func TestStateStore_Usage_ServiceUsage(t *testing.T) {
@ -102,12 +105,15 @@ func TestStateStore_Usage_ServiceUsage(t *testing.T) {
testRegisterService(t, s, 8, "node1", "service1")
testRegisterService(t, s, 9, "node2", "service1")
testRegisterService(t, s, 10, "node2", "service2")
testRegisterSidecarProxy(t, s, 11, "node1", "service1")
testRegisterSidecarProxy(t, s, 12, "node2", "service1")
idx, usage, err := s.ServiceUsage()
require.NoError(t, err)
require.Equal(t, idx, uint64(10))
require.Equal(t, 2, usage.Services)
require.Equal(t, 3, usage.ServiceInstances)
require.Equal(t, idx, uint64(12))
require.Equal(t, 3, usage.Services)
require.Equal(t, 5, usage.ServiceInstances)
require.Equal(t, 2, usage.ConnectServiceInstances[string(structs.ServiceKindConnectProxy)])
}
func TestStateStore_Usage_ServiceUsage_DeleteNode(t *testing.T) {
@ -123,20 +129,22 @@ func TestStateStore_Usage_ServiceUsage_DeleteNode(t *testing.T) {
svc2 := &structs.NodeService{
ID: "service2",
Service: "test",
Tags: []string{},
Address: "1.1.1.1",
Port: 1111,
}
// Register multiple instances on a single node to test that we do not
// double count deletions within the same transaction.
require.NoError(t, s.EnsureService(1, "node1", svc1))
require.NoError(t, s.EnsureService(2, "node1", svc2))
testRegisterSidecarProxy(t, s, 3, "node1", "service2")
idx, usage, err := s.ServiceUsage()
require.NoError(t, err)
require.Equal(t, idx, uint64(2))
require.Equal(t, usage.Services, 1)
require.Equal(t, usage.ServiceInstances, 2)
require.Equal(t, idx, uint64(3))
require.Equal(t, 2, usage.Services)
require.Equal(t, 3, usage.ServiceInstances)
require.Equal(t, 1, usage.ConnectServiceInstances[string(structs.ServiceKindConnectProxy)])
require.NoError(t, s.DeleteNode(3, "node1", nil))
@ -145,6 +153,9 @@ func TestStateStore_Usage_ServiceUsage_DeleteNode(t *testing.T) {
require.Equal(t, idx, uint64(3))
require.Equal(t, usage.Services, 0)
require.Equal(t, usage.ServiceInstances, 0)
for k := range usage.ConnectServiceInstances {
require.Equal(t, 0, usage.ConnectServiceInstances[k])
}
}
func TestStateStore_Usage_Restore(t *testing.T) {
@ -195,6 +206,15 @@ func TestStateStore_Usage_updateUsage_Underflow(t *testing.T) {
Before: &structs.Node{},
After: nil,
},
{
Table: tableServices,
Before: &structs.ServiceNode{
ID: "service-1-connect-proxy",
ServiceKind: structs.ServiceKindConnectProxy,
ServiceID: "service-1",
ServiceName: "service",
},
},
},
}
@ -206,6 +226,10 @@ func TestStateStore_Usage_updateUsage_Underflow(t *testing.T) {
require.NoError(t, err)
require.Equal(t, 0, u.(*UsageEntry).Count)
u, err = txn.First("usage", "id", string(structs.ServiceKindConnectProxy))
require.NoError(t, err)
require.Equal(t, 0, u.(*UsageEntry).Count)
// A insert a change to create a usage entry
changes = Changes{
Index: 1,
@ -307,3 +331,69 @@ func TestStateStore_Usage_ServiceUsage_updatingServiceName(t *testing.T) {
require.Equal(t, usage.ServiceInstances, 3)
})
}
func TestStateStore_Usage_ServiceUsage_updatingConnectProxy(t *testing.T) {
s := testStateStore(t)
testRegisterNode(t, s, 1, "node1")
testRegisterService(t, s, 1, "node1", "service1")
t.Run("change service to ConnectProxy", func(t *testing.T) {
svc := &structs.NodeService{
Kind: structs.ServiceKindConnectProxy,
ID: "service1",
Service: "after",
Address: "1.1.1.1",
Port: 1111,
}
require.NoError(t, s.EnsureService(2, "node1", svc))
// We renamed a service with a single instance, so we maintain 1 service.
idx, usage, err := s.ServiceUsage()
require.NoError(t, err)
require.Equal(t, idx, uint64(2))
require.Equal(t, usage.Services, 1)
require.Equal(t, usage.ServiceInstances, 1)
require.Equal(t, 1, usage.ConnectServiceInstances[string(structs.ServiceKindConnectProxy)])
})
t.Run("rename service with a multiple instances", func(t *testing.T) {
svc2 := &structs.NodeService{
ID: "service2",
Service: "before",
Address: "1.1.1.2",
Port: 1111,
}
require.NoError(t, s.EnsureService(3, "node1", svc2))
svc3 := &structs.NodeService{
Kind: structs.ServiceKindConnectProxy,
ID: "service3",
Service: "before",
Address: "1.1.1.3",
Port: 1111,
}
require.NoError(t, s.EnsureService(4, "node1", svc3))
idx, usage, err := s.ServiceUsage()
require.NoError(t, err)
require.Equal(t, idx, uint64(4))
require.Equal(t, usage.Services, 2)
require.Equal(t, usage.ServiceInstances, 3)
require.Equal(t, 2, usage.ConnectServiceInstances[string(structs.ServiceKindConnectProxy)])
update := &structs.NodeService{
ID: "service3",
Service: "another-name",
Address: "1.1.1.2",
Port: 1111,
}
require.NoError(t, s.EnsureService(5, "node1", update))
idx, usage, err = s.ServiceUsage()
require.NoError(t, err)
require.Equal(t, idx, uint64(5))
require.Equal(t, usage.Services, 3)
require.Equal(t, usage.ServiceInstances, 3)
require.Equal(t, 1, usage.ConnectServiceInstances[string(structs.ServiceKindConnectProxy)])
})
}

View File

@ -57,6 +57,14 @@ func (u *UsageMetricsReporter) emitServiceUsage(serviceUsage state.ServiceUsage)
float32(serviceUsage.ServiceInstances),
u.metricLabels,
)
for k, i := range serviceUsage.ConnectServiceInstances {
metrics.SetGaugeWithLabels(
[]string{"consul", "state", "connect_instances"},
float32(i),
append(u.metricLabels, metrics.Label{Name: "kind", Value: k}),
)
}
}
func (u *UsageMetricsReporter) emitKVUsage(kvUsage state.KVUsage) {

View File

@ -57,6 +57,40 @@ func TestUsageReporter_emitNodeUsage_OSS(t *testing.T) {
Value: 0,
Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}},
},
// --- service mesh ---
"consul.usage.test.consul.state.connect_instances;datacenter=dc1;kind=connect-proxy": {
Name: "consul.usage.test.consul.state.connect_instances",
Value: 0,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
{Name: "kind", Value: "connect-proxy"},
},
},
"consul.usage.test.consul.state.connect_instances;datacenter=dc1;kind=terminating-gateway": {
Name: "consul.usage.test.consul.state.connect_instances",
Value: 0,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
{Name: "kind", Value: "terminating-gateway"},
},
},
"consul.usage.test.consul.state.connect_instances;datacenter=dc1;kind=ingress-gateway": {
Name: "consul.usage.test.consul.state.connect_instances",
Value: 0,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
{Name: "kind", Value: "ingress-gateway"},
},
},
"consul.usage.test.consul.state.connect_instances;datacenter=dc1;kind=mesh-gateway": {
Name: "consul.usage.test.consul.state.connect_instances",
Value: 0,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
{Name: "kind", Value: "mesh-gateway"},
},
},
// --- kv ---
"consul.usage.test.consul.state.kv_entries;datacenter=dc1": {
Name: "consul.usage.test.consul.state.kv_entries",
Value: 0,
@ -119,6 +153,40 @@ func TestUsageReporter_emitNodeUsage_OSS(t *testing.T) {
Value: 0,
Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}},
},
// --- service mesh ---
"consul.usage.test.consul.state.connect_instances;datacenter=dc1;kind=connect-proxy": {
Name: "consul.usage.test.consul.state.connect_instances",
Value: 0,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
{Name: "kind", Value: "connect-proxy"},
},
},
"consul.usage.test.consul.state.connect_instances;datacenter=dc1;kind=terminating-gateway": {
Name: "consul.usage.test.consul.state.connect_instances",
Value: 0,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
{Name: "kind", Value: "terminating-gateway"},
},
},
"consul.usage.test.consul.state.connect_instances;datacenter=dc1;kind=ingress-gateway": {
Name: "consul.usage.test.consul.state.connect_instances",
Value: 0,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
{Name: "kind", Value: "ingress-gateway"},
},
},
"consul.usage.test.consul.state.connect_instances;datacenter=dc1;kind=mesh-gateway": {
Name: "consul.usage.test.consul.state.connect_instances",
Value: 0,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
{Name: "kind", Value: "mesh-gateway"},
},
},
// --- kv ---
"consul.usage.test.consul.state.kv_entries;datacenter=dc1": {
Name: "consul.usage.test.consul.state.kv_entries",
Value: 0,
@ -209,6 +277,40 @@ func TestUsageReporter_emitServiceUsage_OSS(t *testing.T) {
{Name: "datacenter", Value: "dc1"},
},
},
// --- service mesh ---
"consul.usage.test.consul.state.connect_instances;datacenter=dc1;kind=connect-proxy": {
Name: "consul.usage.test.consul.state.connect_instances",
Value: 0,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
{Name: "kind", Value: "connect-proxy"},
},
},
"consul.usage.test.consul.state.connect_instances;datacenter=dc1;kind=terminating-gateway": {
Name: "consul.usage.test.consul.state.connect_instances",
Value: 0,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
{Name: "kind", Value: "terminating-gateway"},
},
},
"consul.usage.test.consul.state.connect_instances;datacenter=dc1;kind=ingress-gateway": {
Name: "consul.usage.test.consul.state.connect_instances",
Value: 0,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
{Name: "kind", Value: "ingress-gateway"},
},
},
"consul.usage.test.consul.state.connect_instances;datacenter=dc1;kind=mesh-gateway": {
Name: "consul.usage.test.consul.state.connect_instances",
Value: 0,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
{Name: "kind", Value: "mesh-gateway"},
},
},
// --- kv ---
"consul.usage.test.consul.state.kv_entries;datacenter=dc1": {
Name: "consul.usage.test.consul.state.kv_entries",
Value: 0,
@ -224,11 +326,20 @@ func TestUsageReporter_emitServiceUsage_OSS(t *testing.T) {
require.NoError(t, s.EnsureNode(3, &structs.Node{Node: "baz", Address: "127.0.0.2"}))
require.NoError(t, s.EnsureNode(4, &structs.Node{Node: "qux", Address: "127.0.0.3"}))
mgw := structs.TestNodeServiceMeshGateway(t)
mgw.ID = "mesh-gateway"
tgw := structs.TestNodeServiceTerminatingGateway(t, "1.1.1.1")
tgw.ID = "terminating-gateway"
// Typical services and some consul services spread across two nodes
require.NoError(t, s.EnsureService(5, "foo", &structs.NodeService{ID: "db", Service: "db", Tags: nil, Address: "", Port: 5000}))
require.NoError(t, s.EnsureService(6, "bar", &structs.NodeService{ID: "api", Service: "api", Tags: nil, Address: "", Port: 5000}))
require.NoError(t, s.EnsureService(7, "foo", &structs.NodeService{ID: "consul", Service: "consul", Tags: nil}))
require.NoError(t, s.EnsureService(8, "bar", &structs.NodeService{ID: "consul", Service: "consul", Tags: nil}))
require.NoError(t, s.EnsureService(9, "foo", &structs.NodeService{ID: "db-connect-proxy", Service: "db-connect-proxy", Tags: nil, Address: "", Port: 5000, Kind: structs.ServiceKindConnectProxy}))
require.NoError(t, s.EnsureRegistration(10, structs.TestRegisterIngressGateway(t)))
require.NoError(t, s.EnsureService(11, "foo", mgw))
require.NoError(t, s.EnsureService(12, "foo", tgw))
},
getMembersFunc: func() []serf.Member {
return []serf.Member{
@ -279,18 +390,52 @@ func TestUsageReporter_emitServiceUsage_OSS(t *testing.T) {
// --- service ---
"consul.usage.test.consul.state.services;datacenter=dc1": {
Name: "consul.usage.test.consul.state.services",
Value: 3,
Value: 7,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
},
},
"consul.usage.test.consul.state.service_instances;datacenter=dc1": {
Name: "consul.usage.test.consul.state.service_instances",
Value: 4,
Value: 8,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
},
},
// --- service mesh ---
"consul.usage.test.consul.state.connect_instances;datacenter=dc1;kind=connect-proxy": {
Name: "consul.usage.test.consul.state.connect_instances",
Value: 1,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
{Name: "kind", Value: "connect-proxy"},
},
},
"consul.usage.test.consul.state.connect_instances;datacenter=dc1;kind=terminating-gateway": {
Name: "consul.usage.test.consul.state.connect_instances",
Value: 1,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
{Name: "kind", Value: "terminating-gateway"},
},
},
"consul.usage.test.consul.state.connect_instances;datacenter=dc1;kind=ingress-gateway": {
Name: "consul.usage.test.consul.state.connect_instances",
Value: 1,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
{Name: "kind", Value: "ingress-gateway"},
},
},
"consul.usage.test.consul.state.connect_instances;datacenter=dc1;kind=mesh-gateway": {
Name: "consul.usage.test.consul.state.connect_instances",
Value: 1,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
{Name: "kind", Value: "mesh-gateway"},
},
},
// --- kv ---
"consul.usage.test.consul.state.kv_entries;datacenter=dc1": {
Name: "consul.usage.test.consul.state.kv_entries",
Value: 0,
@ -372,6 +517,40 @@ func TestUsageReporter_emitKVUsage_OSS(t *testing.T) {
Value: 0,
Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}},
},
// --- service mesh ---
"consul.usage.test.consul.state.connect_instances;datacenter=dc1;kind=connect-proxy": {
Name: "consul.usage.test.consul.state.connect_instances",
Value: 0,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
{Name: "kind", Value: "connect-proxy"},
},
},
"consul.usage.test.consul.state.connect_instances;datacenter=dc1;kind=terminating-gateway": {
Name: "consul.usage.test.consul.state.connect_instances",
Value: 0,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
{Name: "kind", Value: "terminating-gateway"},
},
},
"consul.usage.test.consul.state.connect_instances;datacenter=dc1;kind=ingress-gateway": {
Name: "consul.usage.test.consul.state.connect_instances",
Value: 0,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
{Name: "kind", Value: "ingress-gateway"},
},
},
"consul.usage.test.consul.state.connect_instances;datacenter=dc1;kind=mesh-gateway": {
Name: "consul.usage.test.consul.state.connect_instances",
Value: 0,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
{Name: "kind", Value: "mesh-gateway"},
},
},
// --- kv ---
"consul.usage.test.consul.state.kv_entries;datacenter=dc1": {
Name: "consul.usage.test.consul.state.kv_entries",
Value: 0,
@ -443,6 +622,40 @@ func TestUsageReporter_emitKVUsage_OSS(t *testing.T) {
Value: 0,
Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}},
},
// --- service mesh ---
"consul.usage.test.consul.state.connect_instances;datacenter=dc1;kind=connect-proxy": {
Name: "consul.usage.test.consul.state.connect_instances",
Value: 0,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
{Name: "kind", Value: "connect-proxy"},
},
},
"consul.usage.test.consul.state.connect_instances;datacenter=dc1;kind=terminating-gateway": {
Name: "consul.usage.test.consul.state.connect_instances",
Value: 0,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
{Name: "kind", Value: "terminating-gateway"},
},
},
"consul.usage.test.consul.state.connect_instances;datacenter=dc1;kind=ingress-gateway": {
Name: "consul.usage.test.consul.state.connect_instances",
Value: 0,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
{Name: "kind", Value: "ingress-gateway"},
},
},
"consul.usage.test.consul.state.connect_instances;datacenter=dc1;kind=mesh-gateway": {
Name: "consul.usage.test.consul.state.connect_instances",
Value: 0,
Labels: []metrics.Label{
{Name: "datacenter", Value: "dc1"},
{Name: "kind", Value: "mesh-gateway"},
},
},
// --- kv ---
"consul.usage.test.consul.state.kv_entries;datacenter=dc1": {
Name: "consul.usage.test.consul.state.kv_entries",
Value: 4,