From 30540e406b0a7d4e1c853fc4220f2e62b84f4d1d Mon Sep 17 00:00:00 2001 From: Chris Piraino Date: Fri, 9 Oct 2020 11:01:45 -0500 Subject: [PATCH] Emit service usage metrics with correct labeling strategy (#8856) Previously, we would emit service usage metrics both with and without a namespace label attached. This is problematic in the case when you want to aggregate metrics together, i.e. "sum(consul.state.services)". This would cause services to be counted twice in that aggregate, once via the metric emitted with a namespace label, and once in the metric emited without any namespace label. --- agent/consul/usagemetrics/usagemetrics.go | 14 +-- agent/consul/usagemetrics/usagemetrics_oss.go | 19 ++- .../usagemetrics/usagemetrics_oss_test.go | 111 +++++++++++++++++- .../consul/usagemetrics/usagemetrics_test.go | 42 +------ 4 files changed, 132 insertions(+), 54 deletions(-) diff --git a/agent/consul/usagemetrics/usagemetrics.go b/agent/consul/usagemetrics/usagemetrics.go index 18b36cfd69..259c6646e1 100644 --- a/agent/consul/usagemetrics/usagemetrics.go +++ b/agent/consul/usagemetrics/usagemetrics.go @@ -119,17 +119,5 @@ func (u *UsageMetricsReporter) runOnce() { u.logger.Warn("failed to retrieve services from state store", "error", err) } - metrics.SetGaugeWithLabels( - []string{"consul", "state", "services"}, - float32(serviceUsage.Services), - u.metricLabels, - ) - - metrics.SetGaugeWithLabels( - []string{"consul", "state", "service_instances"}, - float32(serviceUsage.ServiceInstances), - u.metricLabels, - ) - - u.emitEnterpriseUsage(serviceUsage) + u.emitServiceUsage(serviceUsage) } diff --git a/agent/consul/usagemetrics/usagemetrics_oss.go b/agent/consul/usagemetrics/usagemetrics_oss.go index 37d71b83f8..1cf7e6bda2 100644 --- a/agent/consul/usagemetrics/usagemetrics_oss.go +++ b/agent/consul/usagemetrics/usagemetrics_oss.go @@ -2,6 +2,21 @@ package usagemetrics -import "github.com/hashicorp/consul/agent/consul/state" +import ( + "github.com/armon/go-metrics" + "github.com/hashicorp/consul/agent/consul/state" +) -func (u *UsageMetricsReporter) emitEnterpriseUsage(state.ServiceUsage) {} +func (u *UsageMetricsReporter) emitServiceUsage(serviceUsage state.ServiceUsage) { + metrics.SetGaugeWithLabels( + []string{"consul", "state", "services"}, + float32(serviceUsage.Services), + u.metricLabels, + ) + + metrics.SetGaugeWithLabels( + []string{"consul", "state", "service_instances"}, + float32(serviceUsage.ServiceInstances), + u.metricLabels, + ) +} diff --git a/agent/consul/usagemetrics/usagemetrics_oss_test.go b/agent/consul/usagemetrics/usagemetrics_oss_test.go index 3d5263c0b2..da62483589 100644 --- a/agent/consul/usagemetrics/usagemetrics_oss_test.go +++ b/agent/consul/usagemetrics/usagemetrics_oss_test.go @@ -2,8 +2,117 @@ package usagemetrics -import "github.com/hashicorp/consul/agent/consul/state" +import ( + "testing" + "time" + + "github.com/armon/go-metrics" + "github.com/hashicorp/consul/agent/consul/state" + "github.com/hashicorp/consul/agent/structs" + "github.com/hashicorp/consul/sdk/testutil" + "github.com/stretchr/testify/require" +) func newStateStore() (*state.Store, error) { return state.NewStateStore(nil) } + +func TestUsageReporter_emitServiceUsage_OSS(t *testing.T) { + type testCase struct { + modfiyStateStore func(t *testing.T, s *state.Store) + expectedGauges map[string]metrics.GaugeValue + } + cases := map[string]testCase{ + "empty-state": { + expectedGauges: map[string]metrics.GaugeValue{ + "consul.usage.test.consul.state.nodes;datacenter=dc1": { + Name: "consul.usage.test.consul.state.nodes", + Value: 0, + Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}}, + }, + "consul.usage.test.consul.state.services;datacenter=dc1": { + Name: "consul.usage.test.consul.state.services", + Value: 0, + Labels: []metrics.Label{ + {Name: "datacenter", Value: "dc1"}, + }, + }, + "consul.usage.test.consul.state.service_instances;datacenter=dc1": { + Name: "consul.usage.test.consul.state.service_instances", + Value: 0, + Labels: []metrics.Label{ + {Name: "datacenter", Value: "dc1"}, + }, + }, + }, + }, + "nodes-and-services": { + modfiyStateStore: func(t *testing.T, s *state.Store) { + require.Nil(t, s.EnsureNode(1, &structs.Node{Node: "foo", Address: "127.0.0.1"})) + require.Nil(t, s.EnsureNode(2, &structs.Node{Node: "bar", Address: "127.0.0.2"})) + require.Nil(t, s.EnsureNode(3, &structs.Node{Node: "baz", Address: "127.0.0.2"})) + + // Typical services and some consul services spread across two nodes + require.Nil(t, s.EnsureService(4, "foo", &structs.NodeService{ID: "db", Service: "db", Tags: nil, Address: "", Port: 5000})) + require.Nil(t, s.EnsureService(5, "bar", &structs.NodeService{ID: "api", Service: "api", Tags: nil, Address: "", Port: 5000})) + require.Nil(t, s.EnsureService(6, "foo", &structs.NodeService{ID: "consul", Service: "consul", Tags: nil})) + require.Nil(t, s.EnsureService(7, "bar", &structs.NodeService{ID: "consul", Service: "consul", Tags: nil})) + }, + expectedGauges: map[string]metrics.GaugeValue{ + "consul.usage.test.consul.state.nodes;datacenter=dc1": { + Name: "consul.usage.test.consul.state.nodes", + Value: 3, + Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}}, + }, + "consul.usage.test.consul.state.services;datacenter=dc1": { + Name: "consul.usage.test.consul.state.services", + Value: 3, + Labels: []metrics.Label{ + {Name: "datacenter", Value: "dc1"}, + }, + }, + "consul.usage.test.consul.state.service_instances;datacenter=dc1": { + Name: "consul.usage.test.consul.state.service_instances", + Value: 4, + Labels: []metrics.Label{ + {Name: "datacenter", Value: "dc1"}, + }, + }, + }, + }, + } + + for name, tcase := range cases { + t.Run(name, func(t *testing.T) { + // Only have a single interval for the test + sink := metrics.NewInmemSink(1*time.Minute, 1*time.Minute) + cfg := metrics.DefaultConfig("consul.usage.test") + cfg.EnableHostname = false + metrics.NewGlobal(cfg, sink) + + mockStateProvider := &mockStateProvider{} + s, err := newStateStore() + require.NoError(t, err) + if tcase.modfiyStateStore != nil { + tcase.modfiyStateStore(t, s) + } + mockStateProvider.On("State").Return(s) + + reporter, err := NewUsageMetricsReporter( + new(Config). + WithStateProvider(mockStateProvider). + WithLogger(testutil.Logger(t)). + WithDatacenter("dc1"), + ) + require.NoError(t, err) + + reporter.runOnce() + + intervals := sink.Data() + require.Len(t, intervals, 1) + intv := intervals[0] + + require.Equal(t, tcase.expectedGauges, intv.Gauges) + }) + } +} diff --git a/agent/consul/usagemetrics/usagemetrics_test.go b/agent/consul/usagemetrics/usagemetrics_test.go index c293cbb1de..a618386b23 100644 --- a/agent/consul/usagemetrics/usagemetrics_test.go +++ b/agent/consul/usagemetrics/usagemetrics_test.go @@ -21,7 +21,7 @@ func (m *mockStateProvider) State() *state.Store { return retValues.Get(0).(*state.Store) } -func TestUsageReporter_Run(t *testing.T) { +func TestUsageReporter_Run_Nodes(t *testing.T) { type testCase struct { modfiyStateStore func(t *testing.T, s *state.Store) expectedGauges map[string]metrics.GaugeValue @@ -34,33 +34,13 @@ func TestUsageReporter_Run(t *testing.T) { Value: 0, Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}}, }, - "consul.usage.test.consul.state.services;datacenter=dc1": { - Name: "consul.usage.test.consul.state.services", - Value: 0, - Labels: []metrics.Label{ - {Name: "datacenter", Value: "dc1"}, - }, - }, - "consul.usage.test.consul.state.service_instances;datacenter=dc1": { - Name: "consul.usage.test.consul.state.service_instances", - Value: 0, - Labels: []metrics.Label{ - {Name: "datacenter", Value: "dc1"}, - }, - }, }, }, - "nodes-and-services": { + "nodes": { modfiyStateStore: func(t *testing.T, s *state.Store) { require.Nil(t, s.EnsureNode(1, &structs.Node{Node: "foo", Address: "127.0.0.1"})) require.Nil(t, s.EnsureNode(2, &structs.Node{Node: "bar", Address: "127.0.0.2"})) require.Nil(t, s.EnsureNode(3, &structs.Node{Node: "baz", Address: "127.0.0.2"})) - - // Typical services and some consul services spread across two nodes - require.Nil(t, s.EnsureService(4, "foo", &structs.NodeService{ID: "db", Service: "db", Tags: nil, Address: "", Port: 5000})) - require.Nil(t, s.EnsureService(5, "bar", &structs.NodeService{ID: "api", Service: "api", Tags: nil, Address: "", Port: 5000})) - require.Nil(t, s.EnsureService(6, "foo", &structs.NodeService{ID: "consul", Service: "consul", Tags: nil})) - require.Nil(t, s.EnsureService(7, "bar", &structs.NodeService{ID: "consul", Service: "consul", Tags: nil})) }, expectedGauges: map[string]metrics.GaugeValue{ "consul.usage.test.consul.state.nodes;datacenter=dc1": { @@ -68,20 +48,6 @@ func TestUsageReporter_Run(t *testing.T) { Value: 3, Labels: []metrics.Label{{Name: "datacenter", Value: "dc1"}}, }, - "consul.usage.test.consul.state.services;datacenter=dc1": { - Name: "consul.usage.test.consul.state.services", - Value: 3, - Labels: []metrics.Label{ - {Name: "datacenter", Value: "dc1"}, - }, - }, - "consul.usage.test.consul.state.service_instances;datacenter=dc1": { - Name: "consul.usage.test.consul.state.service_instances", - Value: 4, - Labels: []metrics.Label{ - {Name: "datacenter", Value: "dc1"}, - }, - }, }, }, } @@ -118,8 +84,8 @@ func TestUsageReporter_Run(t *testing.T) { // Range over the expected values instead of just doing an Equal // comparison on the maps because of different metrics emitted between - // OSS and Ent. The enterprise tests have a full equality comparison on - // the maps. + // OSS and Ent. The enterprise and OSS tests have a full equality + // comparison on the maps. for key, expected := range tcase.expectedGauges { require.Equal(t, expected, intv.Gauges[key]) }