2018-05-08 10:10:29 +00:00
{
prometheusAlerts+:: {
groups+: [
{
2018-08-06 08:41:18 +00:00
name: 'node-exporter',
2018-05-08 10:10:29 +00:00
rules: [
{
alert: 'NodeFilesystemSpaceFillingUp',
expr: |||
2019-07-10 18:07:20 +00:00
(
2022-10-20 11:06:31 +00:00
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < %(fsSpaceFillingUpWarningThreshold)d
2019-07-16 19:40:57 +00:00
and
2023-11-13 01:10:56 +00:00
predict_linear(node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s}[%(fsSpaceFillingUpPredictionWindow)s], 24*60*60) < 0
2018-05-10 08:35:35 +00:00
and
2022-10-20 11:06:31 +00:00
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0
2019-07-10 18:07:20 +00:00
)
2018-05-08 10:10:29 +00:00
||| % $._config,
'for': '1h',
labels: {
severity: 'warning',
},
annotations: {
2019-07-16 19:40:57 +00:00
summary: 'Filesystem is predicted to run out of space within the next 24 hours.',
2023-04-05 15:53:10 +00:00
description: 'Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up.',
2018-05-08 10:10:29 +00:00
},
},
{
alert: 'NodeFilesystemSpaceFillingUp',
expr: |||
2019-07-10 18:07:20 +00:00
(
2022-10-20 11:06:31 +00:00
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < %(fsSpaceFillingUpCriticalThreshold)d
2019-07-16 19:40:57 +00:00
and
2022-10-20 11:06:31 +00:00
predict_linear(node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s}[6h], 4*60*60) < 0
2018-05-10 08:35:35 +00:00
and
2022-10-20 11:06:31 +00:00
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0
2019-07-10 18:07:20 +00:00
)
2018-05-08 10:10:29 +00:00
||| % $._config,
'for': '1h',
labels: {
2019-08-14 20:24:24 +00:00
severity: '%(nodeCriticalSeverity)s' % $._config,
2018-05-08 10:10:29 +00:00
},
annotations: {
2019-07-16 19:40:57 +00:00
summary: 'Filesystem is predicted to run out of space within the next 4 hours.',
2023-04-05 15:53:10 +00:00
description: 'Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast.',
2018-05-08 10:10:29 +00:00
},
},
{
2019-07-16 19:18:17 +00:00
alert: 'NodeFilesystemAlmostOutOfSpace',
2018-05-08 10:10:29 +00:00
expr: |||
2019-07-10 18:07:20 +00:00
(
2022-10-20 11:06:31 +00:00
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < %(fsSpaceAvailableWarningThreshold)d
2018-05-10 08:35:35 +00:00
and
2022-10-20 11:06:31 +00:00
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0
2019-07-10 18:07:20 +00:00
)
2018-05-08 10:10:29 +00:00
||| % $._config,
2023-06-29 15:24:03 +00:00
'for': '30m',
2018-05-08 10:10:29 +00:00
labels: {
severity: 'warning',
},
annotations: {
2022-04-21 16:32:10 +00:00
summary: 'Filesystem has less than %(fsSpaceAvailableWarningThreshold)d%% space left.' % $._config,
2023-04-05 15:53:10 +00:00
description: 'Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.',
2018-05-08 10:10:29 +00:00
},
},
{
2019-07-16 19:18:17 +00:00
alert: 'NodeFilesystemAlmostOutOfSpace',
2018-05-08 10:10:29 +00:00
expr: |||
2019-07-10 18:07:20 +00:00
(
2022-10-20 11:06:31 +00:00
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < %(fsSpaceAvailableCriticalThreshold)d
2018-05-10 08:35:35 +00:00
and
2022-10-20 11:06:31 +00:00
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0
2019-07-10 18:07:20 +00:00
)
2018-05-08 10:10:29 +00:00
||| % $._config,
2023-06-29 15:24:03 +00:00
'for': '30m',
2018-05-08 10:10:29 +00:00
labels: {
2019-08-14 20:24:24 +00:00
severity: '%(nodeCriticalSeverity)s' % $._config,
2018-05-08 10:10:29 +00:00
},
annotations: {
2022-04-21 16:32:10 +00:00
summary: 'Filesystem has less than %(fsSpaceAvailableCriticalThreshold)d%% space left.' % $._config,
2023-04-05 15:53:10 +00:00
description: 'Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left.',
2018-05-08 10:10:29 +00:00
},
},
{
alert: 'NodeFilesystemFilesFillingUp',
expr: |||
2019-07-10 18:07:20 +00:00
(
2022-10-20 11:06:31 +00:00
node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < 40
2019-07-16 19:40:57 +00:00
and
2022-10-20 11:06:31 +00:00
predict_linear(node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s}[6h], 24*60*60) < 0
2018-05-10 08:35:35 +00:00
and
2022-10-20 11:06:31 +00:00
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0
2019-07-10 18:07:20 +00:00
)
2018-05-08 10:10:29 +00:00
||| % $._config,
'for': '1h',
labels: {
severity: 'warning',
},
annotations: {
2019-07-16 19:40:57 +00:00
summary: 'Filesystem is predicted to run out of inodes within the next 24 hours.',
2023-04-05 15:53:10 +00:00
description: 'Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up.',
2018-05-08 10:10:29 +00:00
},
},
{
alert: 'NodeFilesystemFilesFillingUp',
expr: |||
2019-07-10 18:07:20 +00:00
(
2022-10-20 11:06:31 +00:00
node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < 20
2019-07-16 19:40:57 +00:00
and
2022-10-20 11:06:31 +00:00
predict_linear(node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s}[6h], 4*60*60) < 0
2018-05-10 08:35:35 +00:00
and
2022-10-20 11:06:31 +00:00
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0
2019-07-10 18:07:20 +00:00
)
2018-05-08 10:10:29 +00:00
||| % $._config,
'for': '1h',
labels: {
2019-08-14 20:24:24 +00:00
severity: '%(nodeCriticalSeverity)s' % $._config,
2018-05-08 10:10:29 +00:00
},
annotations: {
2019-07-16 19:40:57 +00:00
summary: 'Filesystem is predicted to run out of inodes within the next 4 hours.',
2023-04-05 15:53:10 +00:00
description: 'Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast.',
2018-05-08 10:10:29 +00:00
},
},
{
2019-07-16 19:18:17 +00:00
alert: 'NodeFilesystemAlmostOutOfFiles',
2018-05-08 10:10:29 +00:00
expr: |||
2019-07-10 18:07:20 +00:00
(
2022-10-20 11:06:31 +00:00
node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < 5
2018-05-10 08:35:35 +00:00
and
2022-10-20 11:06:31 +00:00
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0
2019-07-10 18:07:20 +00:00
)
2018-05-08 10:10:29 +00:00
||| % $._config,
2023-06-29 15:24:03 +00:00
'for': '1h',
2018-05-08 10:10:29 +00:00
labels: {
severity: 'warning',
},
annotations: {
2019-07-16 19:40:57 +00:00
summary: 'Filesystem has less than 5% inodes left.',
2023-04-05 15:53:10 +00:00
description: 'Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.',
2018-05-08 10:10:29 +00:00
},
},
{
2019-07-16 19:18:17 +00:00
alert: 'NodeFilesystemAlmostOutOfFiles',
2018-05-08 10:10:29 +00:00
expr: |||
2019-07-10 18:07:20 +00:00
(
2022-10-20 11:06:31 +00:00
node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < 3
2018-05-10 08:35:35 +00:00
and
2022-10-20 11:06:31 +00:00
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0
2019-07-10 18:07:20 +00:00
)
2018-05-08 10:10:29 +00:00
||| % $._config,
2023-06-29 15:24:03 +00:00
'for': '1h',
2018-05-08 10:10:29 +00:00
labels: {
2019-08-14 20:24:24 +00:00
severity: '%(nodeCriticalSeverity)s' % $._config,
2018-05-08 10:10:29 +00:00
},
annotations: {
2019-07-16 19:40:57 +00:00
summary: 'Filesystem has less than 3% inodes left.',
2023-04-05 15:53:10 +00:00
description: 'Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left.',
2018-05-08 10:10:29 +00:00
},
},
{
alert: 'NodeNetworkReceiveErrs',
expr: |||
2023-03-27 21:27:04 +00:00
rate(node_network_receive_errs_total{%(nodeExporterSelector)s}[2m]) / rate(node_network_receive_packets_total{%(nodeExporterSelector)s}[2m]) > 0.01
2018-05-08 10:10:29 +00:00
||| % $._config,
2023-06-29 15:24:03 +00:00
'for': '1h',
2018-05-08 10:10:29 +00:00
labels: {
2019-07-16 19:18:17 +00:00
severity: 'warning',
2018-05-08 10:10:29 +00:00
},
annotations: {
2019-07-16 19:40:57 +00:00
summary: 'Network interface is reporting many receive errors.',
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} receive errors in the last two minutes.',
2018-05-08 10:10:29 +00:00
},
},
{
alert: 'NodeNetworkTransmitErrs',
expr: |||
2023-03-27 21:27:04 +00:00
rate(node_network_transmit_errs_total{%(nodeExporterSelector)s}[2m]) / rate(node_network_transmit_packets_total{%(nodeExporterSelector)s}[2m]) > 0.01
2018-05-08 10:10:29 +00:00
||| % $._config,
2023-06-29 15:24:03 +00:00
'for': '1h',
2018-05-08 10:10:29 +00:00
labels: {
2019-07-16 19:18:17 +00:00
severity: 'warning',
2018-05-08 10:10:29 +00:00
},
annotations: {
2019-07-16 19:40:57 +00:00
summary: 'Network interface is reporting many transmit errors.',
description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }} transmit errors in the last two minutes.',
2018-05-08 10:10:29 +00:00
},
},
2020-03-05 06:55:11 +00:00
{
alert: 'NodeHighNumberConntrackEntriesUsed',
expr: |||
2023-03-27 21:27:04 +00:00
(node_nf_conntrack_entries{%(nodeExporterSelector)s} / node_nf_conntrack_entries_limit) > 0.75
2020-03-05 06:55:11 +00:00
||| % $._config,
annotations: {
2020-03-31 15:09:08 +00:00
summary: 'Number of conntrack are getting close to the limit.',
description: '{{ $value | humanizePercentage }} of conntrack entries are used.',
},
labels: {
severity: 'warning',
},
},
{
alert: 'NodeTextFileCollectorScrapeError',
expr: |||
node_textfile_scrape_error{%(nodeExporterSelector)s} == 1
||| % $._config,
annotations: {
summary: 'Node Exporter text file collector failed to scrape.',
2023-04-05 15:47:10 +00:00
description: 'Node Exporter text file collector on {{ $labels.instance }} failed to scrape.',
2020-03-05 06:55:11 +00:00
},
labels: {
severity: 'warning',
},
},
2019-09-10 14:52:12 +00:00
{
alert: 'NodeClockSkewDetected',
expr: |||
(
2022-07-19 13:40:16 +00:00
node_timex_offset_seconds{%(nodeExporterSelector)s} > 0.05
2019-09-10 14:52:12 +00:00
and
2022-07-19 13:40:16 +00:00
deriv(node_timex_offset_seconds{%(nodeExporterSelector)s}[5m]) >= 0
2019-09-10 14:52:12 +00:00
)
or
(
2022-07-19 13:40:16 +00:00
node_timex_offset_seconds{%(nodeExporterSelector)s} < -0.05
2019-09-10 14:52:12 +00:00
and
2022-07-19 13:40:16 +00:00
deriv(node_timex_offset_seconds{%(nodeExporterSelector)s}[5m]) <= 0
2019-09-10 14:52:12 +00:00
)
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
annotations: {
summary: 'Clock skew detected.',
2023-03-27 20:25:10 +00:00
description: 'Clock at {{ $labels.instance }} is out of sync by more than 0.05s. Ensure NTP is configured correctly on this host.',
2019-09-10 14:52:12 +00:00
},
},
{
alert: 'NodeClockNotSynchronising',
expr: |||
2022-07-19 13:40:16 +00:00
min_over_time(node_timex_sync_status{%(nodeExporterSelector)s}[5m]) == 0
2020-09-23 09:23:51 +00:00
and
2022-07-19 13:40:16 +00:00
node_timex_maxerror_seconds{%(nodeExporterSelector)s} >= 16
2019-09-10 14:52:12 +00:00
||| % $._config,
'for': '10m',
labels: {
severity: 'warning',
},
annotations: {
summary: 'Clock not synchronising.',
2023-03-27 20:25:10 +00:00
description: 'Clock at {{ $labels.instance }} is not synchronising. Ensure NTP is configured on this host.',
2019-09-10 14:52:12 +00:00
},
},
2020-07-27 09:58:36 +00:00
{
alert: 'NodeRAIDDegraded',
expr: |||
2022-07-19 13:40:16 +00:00
node_md_disks_required{%(nodeExporterSelector)s,%(diskDeviceSelector)s} - ignoring (state) (node_md_disks{state="active",%(nodeExporterSelector)s,%(diskDeviceSelector)s}) > 0
2020-07-27 09:58:36 +00:00
||| % $._config,
'for': '15m',
labels: {
severity: 'critical',
},
annotations: {
2023-04-05 15:47:10 +00:00
summary: 'RAID Array is degraded.',
2023-03-27 20:25:10 +00:00
description: "RAID array '{{ $labels.device }}' at {{ $labels.instance }} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically.",
2020-07-27 09:58:36 +00:00
},
},
{
alert: 'NodeRAIDDiskFailure',
expr: |||
2022-07-19 13:40:16 +00:00
node_md_disks{state="failed",%(nodeExporterSelector)s,%(diskDeviceSelector)s} > 0
2020-07-27 09:58:36 +00:00
||| % $._config,
labels: {
severity: 'warning',
},
annotations: {
2023-04-05 15:47:10 +00:00
summary: 'Failed device in RAID array.',
2023-03-27 20:25:10 +00:00
description: "At least one device in RAID array at {{ $labels.instance }} failed. Array '{{ $labels.device }}' needs attention and possibly a disk swap.",
2020-07-27 09:58:36 +00:00
},
},
2021-04-30 10:01:51 +00:00
{
alert: 'NodeFileDescriptorLimit',
expr: |||
(
2022-04-07 12:25:17 +00:00
node_filefd_allocated{%(nodeExporterSelector)s} * 100 / node_filefd_maximum{%(nodeExporterSelector)s} > 70
2021-04-30 10:01:51 +00:00
)
||| % $._config,
'for': '15m',
labels: {
severity: 'warning',
},
annotations: {
summary: 'Kernel is predicted to exhaust file descriptors limit soon.',
description: 'File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%.',
},
},
{
alert: 'NodeFileDescriptorLimit',
expr: |||
(
2022-04-07 12:25:17 +00:00
node_filefd_allocated{%(nodeExporterSelector)s} * 100 / node_filefd_maximum{%(nodeExporterSelector)s} > 90
2021-04-30 10:01:51 +00:00
)
||| % $._config,
'for': '15m',
labels: {
severity: 'critical',
},
annotations: {
summary: 'Kernel is predicted to exhaust file descriptors limit soon.',
description: 'File descriptors limit at {{ $labels.instance }} is currently at {{ printf "%.2f" $value }}%.',
},
},
2023-03-27 18:57:02 +00:00
{
alert: 'NodeCPUHighUsage',
expr: |||
2023-04-05 18:30:53 +00:00
sum without(mode) (avg without (cpu) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode!="idle"}[2m]))) * 100 > %(cpuHighUsageThreshold)d
2023-03-27 18:57:02 +00:00
||| % $._config,
'for': '15m',
labels: {
2023-04-05 18:30:53 +00:00
severity: 'info',
2023-03-27 18:57:02 +00:00
},
annotations: {
summary: 'High CPU usage.',
2023-04-05 18:30:53 +00:00
description: |||
CPU usage at {{ $labels.instance }} has been above %(cpuHighUsageThreshold)d%% for the last 15 minutes, is currently at {{ printf "%%.2f" $value }}%%.
||| % $._config,
2023-03-27 18:57:02 +00:00
},
},
2023-03-27 22:58:17 +00:00
{
alert: 'NodeSystemSaturation',
expr: |||
node_load1{%(nodeExporterSelector)s}
2023-04-05 16:56:00 +00:00
/ count without (cpu, mode) (node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle"}) > %(systemSaturationPerCoreThreshold)d
2023-03-27 22:58:17 +00:00
||| % $._config,
'for': '15m',
labels: {
2023-04-26 14:52:40 +00:00
severity: 'warning',
2023-03-27 22:58:17 +00:00
},
annotations: {
summary: 'System saturated, load per core is very high.',
description: |||
2023-04-05 18:30:53 +00:00
System load per core at {{ $labels.instance }} has been above %(systemSaturationPerCoreThreshold)d for the last 15 minutes, is currently at {{ printf "%%.2f" $value }}.
2023-03-27 22:58:17 +00:00
This might indicate this instance resources saturation and can cause it becoming unresponsive.
2023-04-05 16:56:00 +00:00
||| % $._config,
2023-03-27 22:58:17 +00:00
},
},
{
alert: 'NodeMemoryMajorPagesFaults',
expr: |||
2023-04-05 16:56:00 +00:00
rate(node_vmstat_pgmajfault{%(nodeExporterSelector)s}[5m]) > %(memoryMajorPagesFaultsThreshold)d
2023-03-27 22:58:17 +00:00
||| % $._config,
'for': '15m',
labels: {
severity: 'warning',
},
annotations: {
summary: 'Memory major page faults are occurring at very high rate.',
description: |||
2023-04-05 18:30:53 +00:00
Memory major pages are occurring at very high rate at {{ $labels.instance }}, %(memoryMajorPagesFaultsThreshold)d major page faults per second for the last 15 minutes, is currently at {{ printf "%%.2f" $value }}.
2023-03-27 22:58:17 +00:00
Please check that there is enough memory available at this instance.
2023-04-05 16:21:50 +00:00
||| % $._config,
2023-03-27 22:58:17 +00:00
},
},
2023-03-27 18:57:02 +00:00
{
alert: 'NodeMemoryHighUtilization',
expr: |||
2023-04-05 16:56:00 +00:00
100 - (node_memory_MemAvailable_bytes{%(nodeExporterSelector)s} / node_memory_MemTotal_bytes{%(nodeExporterSelector)s} * 100) > %(memoryHighUtilizationThreshold)d
2023-03-27 18:57:02 +00:00
||| % $._config,
'for': '15m',
labels: {
severity: 'warning',
},
annotations: {
2023-03-27 21:26:30 +00:00
summary: 'Host is running out of memory.',
2023-03-27 22:44:50 +00:00
description: |||
2023-04-05 18:30:53 +00:00
Memory is filling up at {{ $labels.instance }}, has been above %(memoryHighUtilizationThreshold)d%% for the last 15 minutes, is currently at {{ printf "%%.2f" $value }}%%.
2023-04-05 16:56:00 +00:00
||| % $._config,
2023-03-27 18:57:02 +00:00
},
},
2023-03-27 21:26:30 +00:00
{
alert: 'NodeDiskIOSaturation',
expr: |||
2023-04-05 16:56:00 +00:00
rate(node_disk_io_time_weighted_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[5m]) > %(diskIOSaturationThreshold)d
2023-03-27 21:26:30 +00:00
||| % $._config,
'for': '30m',
labels: {
severity: 'warning',
},
annotations: {
summary: 'Disk IO queue is high.',
description: |||
2024-02-16 07:58:22 +00:00
Disk IO queue (aqu-sq) is high on {{ $labels.device }} at {{ $labels.instance }}, has been above %(diskIOSaturationThreshold)d for the last 30 minutes, is currently at {{ printf "%%.2f" $value }}.
2023-03-27 22:58:17 +00:00
This symptom might indicate disk saturation.
2023-04-05 16:56:00 +00:00
||| % $._config,
2023-03-27 21:26:30 +00:00
},
},
2023-03-27 19:35:41 +00:00
{
alert: 'NodeSystemdServiceFailed',
expr: |||
node_systemd_unit_state{%(nodeExporterSelector)s, state="failed"} == 1
||| % $._config,
'for': '5m',
labels: {
2023-03-29 11:29:58 +00:00
severity: 'warning',
2023-03-27 19:35:41 +00:00
},
annotations: {
summary: 'Systemd service has entered failed state.',
2023-03-27 20:25:10 +00:00
description: 'Systemd service {{ $labels.name }} has entered failed state at {{ $labels.instance }}',
2023-03-27 19:35:41 +00:00
},
},
2023-11-12 23:36:30 +00:00
{
alert: 'NodeBondingDegraded',
expr: |||
(node_bonding_slaves - node_bonding_active) != 0
||| % $._config,
'for': '5m',
labels: {
severity: 'warning',
},
annotations: {
summary: 'Bonding interface is degraded',
description: 'Bonding interface {{ $labels.master }} on {{ $labels.instance }} is in degraded state due to one or more slave failures.',
},
},
2018-05-08 10:10:29 +00:00
],
},
],
},
}