diff --git a/agent/config/builder.go b/agent/config/builder.go index 87ee229406..845218b018 100644 --- a/agent/config/builder.go +++ b/agent/config/builder.go @@ -941,6 +941,7 @@ func (b *builder) build() (rt RuntimeConfig, err error) { CirconusCheckTags: stringVal(c.Telemetry.CirconusCheckTags), CirconusSubmissionInterval: stringVal(c.Telemetry.CirconusSubmissionInterval), CirconusSubmissionURL: stringVal(c.Telemetry.CirconusSubmissionURL), + DisableHostMetrics: boolVal(c.Telemetry.DisableHostMetrics), DisableHostname: boolVal(c.Telemetry.DisableHostname), DogstatsdAddr: stringVal(c.Telemetry.DogstatsdAddr), DogstatsdTags: c.Telemetry.DogstatsdTags, diff --git a/agent/config/config.go b/agent/config/config.go index a1f4145292..c9098ec78e 100644 --- a/agent/config/config.go +++ b/agent/config/config.go @@ -690,6 +690,7 @@ type Telemetry struct { CirconusCheckTags *string `mapstructure:"circonus_check_tags" json:"circonus_check_tags,omitempty"` CirconusSubmissionInterval *string `mapstructure:"circonus_submission_interval" json:"circonus_submission_interval,omitempty"` CirconusSubmissionURL *string `mapstructure:"circonus_submission_url" json:"circonus_submission_url,omitempty"` + DisableHostMetrics *bool `mapstructure:"disable_host_metrics" json:"disable_host_metrics,omitempty"` DisableHostname *bool `mapstructure:"disable_hostname" json:"disable_hostname,omitempty"` DogstatsdAddr *string `mapstructure:"dogstatsd_addr" json:"dogstatsd_addr,omitempty"` DogstatsdTags []string `mapstructure:"dogstatsd_tags" json:"dogstatsd_tags,omitempty"` diff --git a/agent/config/runtime_test.go b/agent/config/runtime_test.go index 1064829cd3..a078107e1e 100644 --- a/agent/config/runtime_test.go +++ b/agent/config/runtime_test.go @@ -6754,6 +6754,7 @@ func TestLoad_FullConfig(t *testing.T) { Expiration: 15 * time.Second, Name: "ftO6DySn", // notice this is the same as the metrics prefix }, + DisableHostMetrics: true, }, TLS: tlsutil.Config{ InternalRPC: tlsutil.ProtocolConfig{ diff --git a/agent/config/testdata/TestRuntimeConfig_Sanitize.golden b/agent/config/testdata/TestRuntimeConfig_Sanitize.golden index c17636eef7..5ebb8b178e 100644 --- a/agent/config/testdata/TestRuntimeConfig_Sanitize.golden +++ b/agent/config/testdata/TestRuntimeConfig_Sanitize.golden @@ -462,6 +462,7 @@ "CirconusSubmissionInterval": "", "CirconusSubmissionURL": "", "Disable": false, + "DisableHostMetrics": false, "DisableHostname": false, "DogstatsdAddr": "", "DogstatsdTags": [], @@ -510,4 +511,4 @@ "VersionPrerelease": "", "Watches": [], "XDSUpdateRateLimit": 0 -} \ No newline at end of file +} diff --git a/agent/config/testdata/full-config.hcl b/agent/config/testdata/full-config.hcl index c29c334b95..912f5dfe70 100644 --- a/agent/config/testdata/full-config.hcl +++ b/agent/config/testdata/full-config.hcl @@ -690,6 +690,7 @@ telemetry { circonus_check_tags = "prvO4uBl" circonus_submission_interval = "DolzaflP" circonus_submission_url = "gTcbS93G" + disable_host_metrics = true disable_hostname = true dogstatsd_addr = "0wSndumK" dogstatsd_tags = [ "3N81zSUB","Xtj8AnXZ" ] diff --git a/agent/config/testdata/full-config.json b/agent/config/testdata/full-config.json index 7640394a4f..58f2e54acb 100644 --- a/agent/config/testdata/full-config.json +++ b/agent/config/testdata/full-config.json @@ -808,6 +808,7 @@ "circonus_check_tags": "prvO4uBl", "circonus_submission_interval": "DolzaflP", "circonus_submission_url": "gTcbS93G", + "disable_host_metrics": true, "disable_hostname": true, "dogstatsd_addr": "0wSndumK", "dogstatsd_tags": [ diff --git a/agent/setup.go b/agent/setup.go index a978744704..2419b07868 100644 --- a/agent/setup.go +++ b/agent/setup.go @@ -119,7 +119,9 @@ func NewBaseDeps(configLoader ConfigLoader, logOut io.Writer, providedLogger hcl if err != nil { return d, fmt.Errorf("failed to initialize telemetry: %w", err) } - hoststats.NewCollector(context.Background(), d.Logger, cfg.DataDir) + if !cfg.Telemetry.DisableHostMetrics { + hoststats.NewCollector(context.Background(), d.Logger, cfg.DataDir) + } d.TLSConfigurator, err = tlsutil.NewConfigurator(cfg.TLS, d.Logger) if err != nil { diff --git a/lib/hoststats/cpu.go b/lib/hoststats/cpu.go index 0fc3fc28c4..f00c6a91df 100644 --- a/lib/hoststats/cpu.go +++ b/lib/hoststats/cpu.go @@ -21,9 +21,9 @@ func (h *cpuStatsCalculator) calculate(times cpu.TimesStat) (idle float64, user currentIdle := times.Idle currentUser := times.User currentSystem := times.System - currentTotal := times.Total() currentBusy := times.User + times.System + times.Nice + times.Iowait + times.Irq + times.Softirq + times.Steal + times.Guest + times.GuestNice + currentTotal := currentBusy + currentIdle deltaTotal := currentTotal - h.prevTotal idle = ((currentIdle - h.prevIdle) / deltaTotal) * 100 diff --git a/lib/hoststats/cpu_test.go b/lib/hoststats/cpu_test.go index 6de0823a96..421ec6fb7c 100644 --- a/lib/hoststats/cpu_test.go +++ b/lib/hoststats/cpu_test.go @@ -27,12 +27,9 @@ func TestCpuStats_percent(t *testing.T) { } func TestHostStats_CPU(t *testing.T) { - - assert := assert.New(t) - logger := testutil.Logger(t) cwd, err := os.Getwd() - assert.Nil(err) + assert.Nil(t, err) hs := initCollector(logger, cwd) // Collect twice so we can calculate percents we need to generate some work @@ -45,18 +42,18 @@ func TestHostStats_CPU(t *testing.T) { } hs.collect() stats := hs.Stats() - assert.NotZero(len(stats.CPU)) + assert.NotZero(t, len(stats.CPU)) for _, cpu := range stats.CPU { - assert.False(math.IsNaN(cpu.Idle)) - assert.False(math.IsNaN(cpu.Total)) - assert.False(math.IsNaN(cpu.System)) - assert.False(math.IsNaN(cpu.User)) + assert.False(t, math.IsNaN(cpu.Idle)) + assert.False(t, math.IsNaN(cpu.Total)) + assert.False(t, math.IsNaN(cpu.System)) + assert.False(t, math.IsNaN(cpu.User)) - assert.False(math.IsInf(cpu.Idle, 0)) - assert.False(math.IsInf(cpu.Total, 0)) - assert.False(math.IsInf(cpu.System, 0)) - assert.False(math.IsInf(cpu.User, 0)) + assert.False(t, math.IsInf(cpu.Idle, 0)) + assert.False(t, math.IsInf(cpu.Total, 0)) + assert.False(t, math.IsInf(cpu.System, 0)) + assert.False(t, math.IsInf(cpu.User, 0)) } } diff --git a/lib/telemetry.go b/lib/telemetry.go index 2d87707c33..9b853169ec 100644 --- a/lib/telemetry.go +++ b/lib/telemetry.go @@ -210,6 +210,11 @@ type TelemetryConfig struct { // // hcl: telemetry { prometheus_retention_time = "duration" } PrometheusOpts prometheus.PrometheusOpts + + // DisableHostMetrics will disable metrics collected about the host system such as cpu memory and disk usage. + // + // hcl: telemetry { disable_host_metrics = (true|false) } + DisableHostMetrics bool } // MetricsHandler provides an http.Handler for displaying metrics. diff --git a/website/content/docs/agent/config/config-files.mdx b/website/content/docs/agent/config/config-files.mdx index 3d6efc68f0..8e5dd9ed4d 100644 --- a/website/content/docs/agent/config/config-files.mdx +++ b/website/content/docs/agent/config/config-files.mdx @@ -1817,6 +1817,9 @@ subsystem that provides Consul's service mesh capabilities. be used based on **where** this particular instance is running (e.g. a specific geo location or datacenter, dc:sfo). By default, this is left blank and not used. + - `disable_host_metrics` ((#telemetry-disable_host_metrics)) + This disables reporting of host metrics about system resources, defaults to false. + - `disable_hostname` ((#telemetry-disable_hostname)) This controls whether or not to prepend runtime telemetry with the machine's hostname, defaults to false. diff --git a/website/content/docs/agent/telemetry.mdx b/website/content/docs/agent/telemetry.mdx index 27626d9be8..e9b56d0b03 100644 --- a/website/content/docs/agent/telemetry.mdx +++ b/website/content/docs/agent/telemetry.mdx @@ -755,3 +755,27 @@ Consul attaches the following labels to metric values. | `peer_id` | The ID of a peer connected to the reporting cluster or leader. | Any UUID | | `partition` | Name of the partition that the peering is created in. | Any defined partition name in the cluster | +## Server Host Metrics + +Consul servers report the following metrics about the host's system resources + +**Requirements:** +- Consul 1.15.3+ + +| Metric | Description | Unit | Type | +| ----------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- | ------- | +| `consul.host.memory.total` | The total physical memory in bytes | mixed | mixed | +| `consul.host.memory.available` | The available physical memory in bytes | mixed | mixed | +| `consul.host.memory.free` | The free physical memory in bytes | mixed | mixed | +| `consul.host.memory.used` | The used physical memory in bytes | mixed | mixed | +| `consul.host.memory.used_percent` | The used physical memory as a percentage of total physical memory | mixed | mixed | +| `consul.host.cpu.total` | The host's total cpu utilization +| `consul.host.cpu.user` | The cpu utilization in user space +| `consul.host.cpu.idle` | The cpu utilization in idle state +| `consul.host.cpu.system` | The cpu utilization in system space +| `consul.host.disk.size` | The size in bytes of the data_dir disk +| `consul.host.disk.used` | The number of bytes used on the data_dir disk +| `consul.host.disk.available` | The number of bytes available on the data_dir disk +| `consul.host.disk.used_percent` | The percentage of disk space used on the data_dir disk +| `consul.host.disk.inodes_percent` | The percentage of inode usage on the data_dir disk +| `consul.host.uptime` | The uptime of the host in seconds