Remove redundant usage metrics (#20674)

* Remove redundant usage metrics

* Add the changelog

* Update website/content/docs/upgrading/upgrade-specific.mdx

Co-authored-by: Jeff Boruszak <104028618+boruszak@users.noreply.github.com>

* Update website/content/docs/upgrading/upgrade-specific.mdx

Co-authored-by: Jeff Boruszak <104028618+boruszak@users.noreply.github.com>

* Update website/content/docs/upgrading/upgrade-specific.mdx

Co-authored-by: Jeff Boruszak <104028618+boruszak@users.noreply.github.com>

* Update website/content/docs/upgrading/upgrade-specific.mdx

Co-authored-by: Jeff Boruszak <104028618+boruszak@users.noreply.github.com>

* Update website/content/docs/upgrading/upgrade-specific.mdx

Co-authored-by: Jeff Boruszak <104028618+boruszak@users.noreply.github.com>

---------

Co-authored-by: Jeff Boruszak <104028618+boruszak@users.noreply.github.com>
pull/20781/head^2
Matt Keeler 2024-03-05 14:09:47 -05:00 committed by GitHub
parent 4e7982a5b7
commit abe14f11e6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 99 additions and 632 deletions

7
.changelog/20674.txt Normal file
View File

@ -0,0 +1,7 @@
```release-note:breaking-change
telemetry: State store usage metrics with a double `consul` element in the metric name have been removed. Please use the same metric without the second `consul` instead. As an example instead of `consul.consul.state.config_entries` use `consul.state.config_entries`
```
```release-note: improvement
telemetry: Improved the performance usage metrics emission by not outputting redundant metrics.
```

View File

@ -20,74 +20,38 @@ import (
)
var Gauges = []prometheus.GaugeDefinition{
{
Name: []string{"consul", "state", "nodes"},
Help: "Deprecated - please use state_nodes instead.",
},
{
Name: []string{"state", "nodes"},
Help: "Measures the current number of nodes registered with Consul. It is only emitted by Consul servers. Added in v1.9.0.",
},
{
Name: []string{"consul", "state", "peerings"},
Help: "Deprecated - please use state_peerings instead.",
},
{
Name: []string{"state", "peerings"},
Help: "Measures the current number of peerings registered with Consul. It is only emitted by Consul servers. Added in v1.13.0.",
},
{
Name: []string{"consul", "state", "services"},
Help: "Deprecated - please use state_services instead.",
},
{
Name: []string{"state", "services"},
Help: "Measures the current number of unique services registered with Consul, based on service name. It is only emitted by Consul servers. Added in v1.9.0.",
},
{
Name: []string{"consul", "state", "service_instances"},
Help: "Deprecated - please use state_service_instances instead.",
},
{
Name: []string{"state", "service_instances"},
Help: "Measures the current number of unique services registered with Consul, based on service name. It is only emitted by Consul servers. Added in v1.9.0.",
},
{
Name: []string{"consul", "members", "clients"},
Help: "Deprecated - please use members_clients instead.",
},
{
Name: []string{"members", "clients"},
Help: "Measures the current number of client agents registered with Consul. It is only emitted by Consul servers. Added in v1.9.6.",
},
{
Name: []string{"consul", "members", "servers"},
Help: "Deprecated - please use members_servers instead.",
},
{
Name: []string{"members", "servers"},
Help: "Measures the current number of server agents registered with Consul. It is only emitted by Consul servers. Added in v1.9.6.",
},
{
Name: []string{"consul", "state", "kv_entries"},
Help: "Deprecated - please use kv_entries instead.",
},
{
Name: []string{"state", "kv_entries"},
Help: "Measures the current number of entries in the Consul KV store. It is only emitted by Consul servers. Added in v1.10.3.",
},
{
Name: []string{"consul", "state", "connect_instances"},
Help: "Deprecated - please use state_connect_instances instead.",
},
{
Name: []string{"state", "connect_instances"},
Help: "Measures the current number of unique connect service instances registered with Consul, labeled by Kind. It is only emitted by Consul servers. Added in v1.10.4.",
},
{
Name: []string{"consul", "state", "config_entries"},
Help: "Deprecated - please use state_config_entries instead.",
},
{
Name: []string{"state", "config_entries"},
Help: "Measures the current number of unique configuration entries registered with Consul, labeled by Kind. It is only emitted by Consul servers. Added in v1.10.4.",

View File

@ -15,11 +15,6 @@ import (
)
func (u *UsageMetricsReporter) emitNodeUsage(nodeUsage state.NodeUsage) {
metrics.SetGaugeWithLabels(
[]string{"consul", "state", "nodes"},
float32(nodeUsage.Nodes),
u.metricLabels,
)
metrics.SetGaugeWithLabels(
[]string{"state", "nodes"},
float32(nodeUsage.Nodes),
@ -28,11 +23,6 @@ func (u *UsageMetricsReporter) emitNodeUsage(nodeUsage state.NodeUsage) {
}
func (u *UsageMetricsReporter) emitPeeringUsage(peeringUsage state.PeeringUsage) {
metrics.SetGaugeWithLabels(
[]string{"consul", "state", "peerings"},
float32(peeringUsage.Peerings),
u.metricLabels,
)
metrics.SetGaugeWithLabels(
[]string{"state", "peerings"},
float32(peeringUsage.Peerings),
@ -54,22 +44,12 @@ func (u *UsageMetricsReporter) emitMemberUsage(members []serf.Member) {
}
}
metrics.SetGaugeWithLabels(
[]string{"consul", "members", "clients"},
float32(clients),
u.metricLabels,
)
metrics.SetGaugeWithLabels(
[]string{"members", "clients"},
float32(clients),
u.metricLabels,
)
metrics.SetGaugeWithLabels(
[]string{"consul", "members", "servers"},
float32(servers),
u.metricLabels,
)
metrics.SetGaugeWithLabels(
[]string{"members", "servers"},
float32(servers),
@ -78,22 +58,12 @@ func (u *UsageMetricsReporter) emitMemberUsage(members []serf.Member) {
}
func (u *UsageMetricsReporter) emitServiceUsage(serviceUsage structs.ServiceUsage) {
metrics.SetGaugeWithLabels(
[]string{"consul", "state", "services"},
float32(serviceUsage.Services),
u.metricLabels,
)
metrics.SetGaugeWithLabels(
[]string{"state", "services"},
float32(serviceUsage.Services),
u.metricLabels,
)
metrics.SetGaugeWithLabels(
[]string{"consul", "state", "service_instances"},
float32(serviceUsage.ServiceInstances),
u.metricLabels,
)
metrics.SetGaugeWithLabels(
[]string{"state", "service_instances"},
float32(serviceUsage.ServiceInstances),
@ -106,11 +76,6 @@ func (u *UsageMetricsReporter) emitServiceUsage(serviceUsage structs.ServiceUsag
)
for k, i := range serviceUsage.ConnectServiceInstances {
metrics.SetGaugeWithLabels(
[]string{"consul", "state", "connect_instances"},
float32(i),
append(u.metricLabels, metrics.Label{Name: "kind", Value: k}),
)
metrics.SetGaugeWithLabels(
[]string{"state", "connect_instances"},
float32(i),
@ -120,11 +85,6 @@ func (u *UsageMetricsReporter) emitServiceUsage(serviceUsage structs.ServiceUsag
}
func (u *UsageMetricsReporter) emitKVUsage(kvUsage state.KVUsage) {
metrics.SetGaugeWithLabels(
[]string{"consul", "state", "kv_entries"},
float32(kvUsage.KVCount),
u.metricLabels,
)
metrics.SetGaugeWithLabels(
[]string{"state", "kv_entries"},
float32(kvUsage.KVCount),
@ -134,11 +94,6 @@ func (u *UsageMetricsReporter) emitKVUsage(kvUsage state.KVUsage) {
func (u *UsageMetricsReporter) emitConfigEntryUsage(configUsage state.ConfigEntryUsage) {
for k, i := range configUsage.ConfigByKind {
metrics.SetGaugeWithLabels(
[]string{"consul", "state", "config_entries"},
float32(i),
append(u.metricLabels, metrics.Label{Name: "kind", Value: k}),
)
metrics.SetGaugeWithLabels(
[]string{"state", "config_entries"},
float32(i),

File diff suppressed because it is too large Load Diff

View File

@ -4,13 +4,21 @@
package usagemetrics
import (
"fmt"
"testing"
"time"
"github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
"github.com/hashicorp/consul/agent/consul/state"
"github.com/hashicorp/consul/agent/structs"
"github.com/hashicorp/consul/lib"
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/serf/serf"
)
type mockStateProvider struct {
@ -39,3 +47,75 @@ func assertEqualGaugeMaps(t *testing.T, expectedMap, foundMap map[string]metrics
assert.Equal(t, expected, foundMap[key], "gauge key mismatch on %q", key)
}
}
func BenchmarkRunOnce(b *testing.B) {
const index = 123
store := state.NewStateStore(nil)
// This loop generates:
//
// 4 (service kind) * 100 (service) * 5 * (node) = 2000 proxy services. And 500 non-proxy services.
for _, kind := range []structs.ServiceKind{
// These will be included in the count.
structs.ServiceKindConnectProxy,
structs.ServiceKindIngressGateway,
structs.ServiceKindTerminatingGateway,
structs.ServiceKindMeshGateway,
// This one will not.
structs.ServiceKindTypical,
} {
for i := 0; i < 100; i++ {
serviceName := fmt.Sprintf("%s-%d", kind, i)
for j := 0; j < 5; j++ {
nodeName := fmt.Sprintf("%s-node-%d", serviceName, j)
require.NoError(b, store.EnsureRegistration(index, &structs.RegisterRequest{
Node: nodeName,
Service: &structs.NodeService{
ID: serviceName,
Service: serviceName,
Kind: kind,
},
}))
}
}
}
benchmarkRunOnce(b, store)
}
func benchmarkRunOnce(b *testing.B, store *state.Store) {
b.Helper()
config := lib.TelemetryConfig{
MetricsPrefix: "consul",
FilterDefault: true,
PrometheusOpts: prometheus.PrometheusOpts{
Expiration: time.Second * 30,
Name: "consul",
},
}
lib.InitTelemetry(config, hclog.NewNullLogger())
um, err := NewUsageMetricsReporter(&Config{
stateProvider: benchStateProvider(func() *state.Store { return store }),
logger: hclog.NewNullLogger(),
getMembersFunc: func() []serf.Member { return nil },
})
require.NoError(b, err)
b.ResetTimer()
for i := 0; i < b.N; i++ {
um.runOnce()
}
}
type benchStateProvider func() *state.Store
func (b benchStateProvider) State() *state.Store {
return b()
}

View File

@ -14,6 +14,17 @@ provided for their upgrades as a result of new features or changed behavior.
This page is used to document those details separately from the standard
upgrade flow.
## Consul v1.19.x
### Metrics removal
In previous versions, Consul emitted redundant state store usage metrics that contained two instances of `consul` in the metric name. As an example, config entry usage counts were emitted as both:
- `consul.state.config_entries`
- `consul.consul.state.config_entries`
As of Consul v1.19, Consul does not emit the redundant metric with the double `consul.consul` in its name. Any monitoring alerts and dashboards that you may have utilizing these metrics may require edits to update to the simplified metric name.
## Consul 1.17.x
### Known issues