add config toggle to disable host metrics and docs

pull/17038/head
Nick Ethier 2023-05-12 16:09:54 -04:00
parent fafb68b28d
commit baffef45bd
12 changed files with 53 additions and 16 deletions

View File

@ -941,6 +941,7 @@ func (b *builder) build() (rt RuntimeConfig, err error) {
CirconusCheckTags: stringVal(c.Telemetry.CirconusCheckTags), CirconusCheckTags: stringVal(c.Telemetry.CirconusCheckTags),
CirconusSubmissionInterval: stringVal(c.Telemetry.CirconusSubmissionInterval), CirconusSubmissionInterval: stringVal(c.Telemetry.CirconusSubmissionInterval),
CirconusSubmissionURL: stringVal(c.Telemetry.CirconusSubmissionURL), CirconusSubmissionURL: stringVal(c.Telemetry.CirconusSubmissionURL),
DisableHostMetrics: boolVal(c.Telemetry.DisableHostMetrics),
DisableHostname: boolVal(c.Telemetry.DisableHostname), DisableHostname: boolVal(c.Telemetry.DisableHostname),
DogstatsdAddr: stringVal(c.Telemetry.DogstatsdAddr), DogstatsdAddr: stringVal(c.Telemetry.DogstatsdAddr),
DogstatsdTags: c.Telemetry.DogstatsdTags, DogstatsdTags: c.Telemetry.DogstatsdTags,

View File

@ -690,6 +690,7 @@ type Telemetry struct {
CirconusCheckTags *string `mapstructure:"circonus_check_tags" json:"circonus_check_tags,omitempty"` CirconusCheckTags *string `mapstructure:"circonus_check_tags" json:"circonus_check_tags,omitempty"`
CirconusSubmissionInterval *string `mapstructure:"circonus_submission_interval" json:"circonus_submission_interval,omitempty"` CirconusSubmissionInterval *string `mapstructure:"circonus_submission_interval" json:"circonus_submission_interval,omitempty"`
CirconusSubmissionURL *string `mapstructure:"circonus_submission_url" json:"circonus_submission_url,omitempty"` CirconusSubmissionURL *string `mapstructure:"circonus_submission_url" json:"circonus_submission_url,omitempty"`
DisableHostMetrics *bool `mapstructure:"disable_host_metrics" json:"disable_host_metrics,omitempty"`
DisableHostname *bool `mapstructure:"disable_hostname" json:"disable_hostname,omitempty"` DisableHostname *bool `mapstructure:"disable_hostname" json:"disable_hostname,omitempty"`
DogstatsdAddr *string `mapstructure:"dogstatsd_addr" json:"dogstatsd_addr,omitempty"` DogstatsdAddr *string `mapstructure:"dogstatsd_addr" json:"dogstatsd_addr,omitempty"`
DogstatsdTags []string `mapstructure:"dogstatsd_tags" json:"dogstatsd_tags,omitempty"` DogstatsdTags []string `mapstructure:"dogstatsd_tags" json:"dogstatsd_tags,omitempty"`

View File

@ -6754,6 +6754,7 @@ func TestLoad_FullConfig(t *testing.T) {
Expiration: 15 * time.Second, Expiration: 15 * time.Second,
Name: "ftO6DySn", // notice this is the same as the metrics prefix Name: "ftO6DySn", // notice this is the same as the metrics prefix
}, },
DisableHostMetrics: true,
}, },
TLS: tlsutil.Config{ TLS: tlsutil.Config{
InternalRPC: tlsutil.ProtocolConfig{ InternalRPC: tlsutil.ProtocolConfig{

View File

@ -462,6 +462,7 @@
"CirconusSubmissionInterval": "", "CirconusSubmissionInterval": "",
"CirconusSubmissionURL": "", "CirconusSubmissionURL": "",
"Disable": false, "Disable": false,
"DisableHostMetrics": false,
"DisableHostname": false, "DisableHostname": false,
"DogstatsdAddr": "", "DogstatsdAddr": "",
"DogstatsdTags": [], "DogstatsdTags": [],

View File

@ -690,6 +690,7 @@ telemetry {
circonus_check_tags = "prvO4uBl" circonus_check_tags = "prvO4uBl"
circonus_submission_interval = "DolzaflP" circonus_submission_interval = "DolzaflP"
circonus_submission_url = "gTcbS93G" circonus_submission_url = "gTcbS93G"
disable_host_metrics = true
disable_hostname = true disable_hostname = true
dogstatsd_addr = "0wSndumK" dogstatsd_addr = "0wSndumK"
dogstatsd_tags = [ "3N81zSUB","Xtj8AnXZ" ] dogstatsd_tags = [ "3N81zSUB","Xtj8AnXZ" ]

View File

@ -808,6 +808,7 @@
"circonus_check_tags": "prvO4uBl", "circonus_check_tags": "prvO4uBl",
"circonus_submission_interval": "DolzaflP", "circonus_submission_interval": "DolzaflP",
"circonus_submission_url": "gTcbS93G", "circonus_submission_url": "gTcbS93G",
"disable_host_metrics": true,
"disable_hostname": true, "disable_hostname": true,
"dogstatsd_addr": "0wSndumK", "dogstatsd_addr": "0wSndumK",
"dogstatsd_tags": [ "dogstatsd_tags": [

View File

@ -119,7 +119,9 @@ func NewBaseDeps(configLoader ConfigLoader, logOut io.Writer, providedLogger hcl
if err != nil { if err != nil {
return d, fmt.Errorf("failed to initialize telemetry: %w", err) return d, fmt.Errorf("failed to initialize telemetry: %w", err)
} }
if !cfg.Telemetry.DisableHostMetrics {
hoststats.NewCollector(context.Background(), d.Logger, cfg.DataDir) hoststats.NewCollector(context.Background(), d.Logger, cfg.DataDir)
}
d.TLSConfigurator, err = tlsutil.NewConfigurator(cfg.TLS, d.Logger) d.TLSConfigurator, err = tlsutil.NewConfigurator(cfg.TLS, d.Logger)
if err != nil { if err != nil {

View File

@ -21,9 +21,9 @@ func (h *cpuStatsCalculator) calculate(times cpu.TimesStat) (idle float64, user
currentIdle := times.Idle currentIdle := times.Idle
currentUser := times.User currentUser := times.User
currentSystem := times.System currentSystem := times.System
currentTotal := times.Total()
currentBusy := times.User + times.System + times.Nice + times.Iowait + times.Irq + currentBusy := times.User + times.System + times.Nice + times.Iowait + times.Irq +
times.Softirq + times.Steal + times.Guest + times.GuestNice times.Softirq + times.Steal + times.Guest + times.GuestNice
currentTotal := currentBusy + currentIdle
deltaTotal := currentTotal - h.prevTotal deltaTotal := currentTotal - h.prevTotal
idle = ((currentIdle - h.prevIdle) / deltaTotal) * 100 idle = ((currentIdle - h.prevIdle) / deltaTotal) * 100

View File

@ -27,12 +27,9 @@ func TestCpuStats_percent(t *testing.T) {
} }
func TestHostStats_CPU(t *testing.T) { func TestHostStats_CPU(t *testing.T) {
assert := assert.New(t)
logger := testutil.Logger(t) logger := testutil.Logger(t)
cwd, err := os.Getwd() cwd, err := os.Getwd()
assert.Nil(err) assert.Nil(t, err)
hs := initCollector(logger, cwd) hs := initCollector(logger, cwd)
// Collect twice so we can calculate percents we need to generate some work // Collect twice so we can calculate percents we need to generate some work
@ -45,18 +42,18 @@ func TestHostStats_CPU(t *testing.T) {
} }
hs.collect() hs.collect()
stats := hs.Stats() stats := hs.Stats()
assert.NotZero(len(stats.CPU)) assert.NotZero(t, len(stats.CPU))
for _, cpu := range stats.CPU { for _, cpu := range stats.CPU {
assert.False(math.IsNaN(cpu.Idle)) assert.False(t, math.IsNaN(cpu.Idle))
assert.False(math.IsNaN(cpu.Total)) assert.False(t, math.IsNaN(cpu.Total))
assert.False(math.IsNaN(cpu.System)) assert.False(t, math.IsNaN(cpu.System))
assert.False(math.IsNaN(cpu.User)) assert.False(t, math.IsNaN(cpu.User))
assert.False(math.IsInf(cpu.Idle, 0)) assert.False(t, math.IsInf(cpu.Idle, 0))
assert.False(math.IsInf(cpu.Total, 0)) assert.False(t, math.IsInf(cpu.Total, 0))
assert.False(math.IsInf(cpu.System, 0)) assert.False(t, math.IsInf(cpu.System, 0))
assert.False(math.IsInf(cpu.User, 0)) assert.False(t, math.IsInf(cpu.User, 0))
} }
} }

View File

@ -210,6 +210,11 @@ type TelemetryConfig struct {
// //
// hcl: telemetry { prometheus_retention_time = "duration" } // hcl: telemetry { prometheus_retention_time = "duration" }
PrometheusOpts prometheus.PrometheusOpts PrometheusOpts prometheus.PrometheusOpts
// DisableHostMetrics will disable metrics collected about the host system such as cpu memory and disk usage.
//
// hcl: telemetry { disable_host_metrics = (true|false) }
DisableHostMetrics bool
} }
// MetricsHandler provides an http.Handler for displaying metrics. // MetricsHandler provides an http.Handler for displaying metrics.

View File

@ -1817,6 +1817,9 @@ subsystem that provides Consul's service mesh capabilities.
be used based on **where** this particular instance is running (e.g. a specific be used based on **where** this particular instance is running (e.g. a specific
geo location or datacenter, dc:sfo). By default, this is left blank and not used. geo location or datacenter, dc:sfo). By default, this is left blank and not used.
- `disable_host_metrics` ((#telemetry-disable_host_metrics))
This disables reporting of host metrics about system resources, defaults to false.
- `disable_hostname` ((#telemetry-disable_hostname)) - `disable_hostname` ((#telemetry-disable_hostname))
This controls whether or not to prepend runtime telemetry with the machine's This controls whether or not to prepend runtime telemetry with the machine's
hostname, defaults to false. hostname, defaults to false.

View File

@ -755,3 +755,27 @@ Consul attaches the following labels to metric values.
| `peer_id` | The ID of a peer connected to the reporting cluster or leader. | Any UUID | | `peer_id` | The ID of a peer connected to the reporting cluster or leader. | Any UUID |
| `partition` | <EnterpriseAlert inline /> Name of the partition that the peering is created in. | Any defined partition name in the cluster | | `partition` | <EnterpriseAlert inline /> Name of the partition that the peering is created in. | Any defined partition name in the cluster |
## Server Host Metrics
Consul servers report the following metrics about the host's system resources
**Requirements:**
- Consul 1.15.3+
| Metric | Description | Unit | Type |
| ----------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- | ------- |
| `consul.host.memory.total` | The total physical memory in bytes | mixed | mixed |
| `consul.host.memory.available` | The available physical memory in bytes | mixed | mixed |
| `consul.host.memory.free` | The free physical memory in bytes | mixed | mixed |
| `consul.host.memory.used` | The used physical memory in bytes | mixed | mixed |
| `consul.host.memory.used_percent` | The used physical memory as a percentage of total physical memory | mixed | mixed |
| `consul.host.cpu.total` | The host's total cpu utilization
| `consul.host.cpu.user` | The cpu utilization in user space
| `consul.host.cpu.idle` | The cpu utilization in idle state
| `consul.host.cpu.system` | The cpu utilization in system space
| `consul.host.disk.size` | The size in bytes of the data_dir disk
| `consul.host.disk.used` | The number of bytes used on the data_dir disk
| `consul.host.disk.available` | The number of bytes available on the data_dir disk
| `consul.host.disk.used_percent` | The percentage of disk space used on the data_dir disk
| `consul.host.disk.inodes_percent` | The percentage of inode usage on the data_dir disk
| `consul.host.uptime` | The uptime of the host in seconds