Browse Source

Make interval configurable

Signed-off-by: Johannes 'fish' Ziemke <github@freigeist.org>
pull/2022/head
Johannes 'fish' Ziemke 4 years ago
parent
commit
a5908bf82b
  1. 2
      docs/node-mixin/config.libsonnet
  2. 12
      docs/node-mixin/dashboards/node.libsonnet
  3. 36
      docs/node-mixin/dashboards/use.libsonnet
  4. 32
      docs/node-mixin/rules/rules.libsonnet

2
docs/node-mixin/config.libsonnet

@ -53,5 +53,7 @@
fsSpaceAvailableWarningThreshold: 3, fsSpaceAvailableWarningThreshold: 3,
grafana_prefix: '', grafana_prefix: '',
rateInterval: '5m',
}, },
} }

12
docs/node-mixin/dashboards/node.libsonnet

@ -30,7 +30,7 @@ local gauge = promgrafonnet.gauge;
||| % $._config, ||| % $._config,
legendFormat='{{cpu}}', legendFormat='{{cpu}}',
intervalFactor=5, intervalFactor=5,
interval='5m', interval='$__rate_interval',
)); ));
local systemLoad = local systemLoad =
@ -101,17 +101,17 @@ local gauge = promgrafonnet.gauge;
.addTarget(prometheus.target( .addTarget(prometheus.target(
'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__interval])' % $._config, 'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__interval])' % $._config,
legendFormat='{{device}} read', legendFormat='{{device}} read',
interval='5m', interval='$__rate_interval',
)) ))
.addTarget(prometheus.target( .addTarget(prometheus.target(
'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__interval])' % $._config, 'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__interval])' % $._config,
legendFormat='{{device}} written', legendFormat='{{device}} written',
interval='5m', interval='$__rate_interval',
)) ))
.addTarget(prometheus.target( .addTarget(prometheus.target(
'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__interval])' % $._config, 'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__interval])' % $._config,
legendFormat='{{device}} io time', legendFormat='{{device}} io time',
interval='5m', interval='$__rate_interval',
)) + )) +
{ {
seriesOverrides: [ seriesOverrides: [
@ -188,7 +188,7 @@ local gauge = promgrafonnet.gauge;
.addTarget(prometheus.target( .addTarget(prometheus.target(
'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__interval])' % $._config, 'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__interval])' % $._config,
legendFormat='{{device}}', legendFormat='{{device}}',
interval='5m', interval='$__rate_interval',
)); ));
local networkTransmitted = local networkTransmitted =
@ -203,7 +203,7 @@ local gauge = promgrafonnet.gauge;
.addTarget(prometheus.target( .addTarget(prometheus.target(
'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__interval])' % $._config, 'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__interval])' % $._config,
legendFormat='{{device}}', legendFormat='{{device}}',
interval='5m', interval='$__rate_interval',
)); ));
dashboard.new('Nodes', time_from='now-1h') dashboard.new('Nodes', time_from='now-1h')

36
docs/node-mixin/dashboards/use.libsonnet

@ -12,7 +12,7 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
g.panel('CPU Utilisation') + g.panel('CPU Utilisation') +
g.queryPanel(||| g.queryPanel(|||
( (
instance:node_cpu_utilisation:rate5m{%(nodeExporterSelector)s} instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s}
* *
instance:node_num_cpu:sum{%(nodeExporterSelector)s} instance:node_num_cpu:sum{%(nodeExporterSelector)s}
) )
@ -47,7 +47,7 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
) )
.addPanel( .addPanel(
g.panel('Memory Saturation (Major Page Faults)') + g.panel('Memory Saturation (Major Page Faults)') +
g.queryPanel('instance:node_vmstat_pgmajfault:rate5m{%(nodeExporterSelector)s}' % $._config, '{{instance}}', legendLink) + g.queryPanel('instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s}' % $._config, '{{instance}}', legendLink) +
g.stack + g.stack +
{ yaxes: g.yaxes('rps') }, { yaxes: g.yaxes('rps') },
) )
@ -58,8 +58,8 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
g.panel('Net Utilisation (Bytes Receive/Transmit)') + g.panel('Net Utilisation (Bytes Receive/Transmit)') +
g.queryPanel( g.queryPanel(
[ [
'instance:node_network_receive_bytes_excluding_lo:rate5m{%(nodeExporterSelector)s}' % $._config, 'instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s}' % $._config,
'instance:node_network_transmit_bytes_excluding_lo:rate5m{%(nodeExporterSelector)s}' % $._config, 'instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s}' % $._config,
], ],
['{{instance}} Receive', '{{instance}} Transmit'], ['{{instance}} Receive', '{{instance}} Transmit'],
legendLink, legendLink,
@ -84,8 +84,8 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
g.panel('Net Saturation (Drops Receive/Transmit)') + g.panel('Net Saturation (Drops Receive/Transmit)') +
g.queryPanel( g.queryPanel(
[ [
'instance:node_network_receive_drop_excluding_lo:rate5m{%(nodeExporterSelector)s}' % $._config, 'instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s}' % $._config,
'instance:node_network_transmit_drop_excluding_lo:rate5m{%(nodeExporterSelector)s}' % $._config, 'instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s}' % $._config,
], ],
['{{instance}} Receive', '{{instance}} Transmit'], ['{{instance}} Receive', '{{instance}} Transmit'],
legendLink, legendLink,
@ -116,8 +116,8 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
// TODO: Does the partition by device make sense? Using the most utilized device per // TODO: Does the partition by device make sense? Using the most utilized device per
// instance might make more sense. // instance might make more sense.
g.queryPanel(||| g.queryPanel(|||
instance_device:node_disk_io_time_seconds:rate5m{%(nodeExporterSelector)s} instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s}
/ scalar(count(instance_device:node_disk_io_time_seconds:rate5m{%(nodeExporterSelector)s})) / scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s}))
||| % $._config, '{{instance}} {{device}}', legendLink) + ||| % $._config, '{{instance}} {{device}}', legendLink) +
g.stack + g.stack +
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
@ -125,8 +125,8 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
.addPanel( .addPanel(
g.panel('Disk IO Saturation') + g.panel('Disk IO Saturation') +
g.queryPanel(||| g.queryPanel(|||
instance_device:node_disk_io_time_weighted_seconds:rate5m{%(nodeExporterSelector)s} instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s}
/ scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate5m{%(nodeExporterSelector)s})) / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s}))
||| % $._config, '{{instance}} {{device}}', legendLink) + ||| % $._config, '{{instance}} {{device}}', legendLink) +
g.stack + g.stack +
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
@ -156,7 +156,7 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
g.row('CPU') g.row('CPU')
.addPanel( .addPanel(
g.panel('CPU Utilisation') + g.panel('CPU Utilisation') +
g.queryPanel('instance:node_cpu_utilisation:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Utilisation') + g.queryPanel('instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Utilisation') +
{ {
yaxes: g.yaxes('percentunit'), yaxes: g.yaxes('percentunit'),
legend+: { show: false }, legend+: { show: false },
@ -182,7 +182,7 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
) )
.addPanel( .addPanel(
g.panel('Memory Saturation (Major Page Faults)') + g.panel('Memory Saturation (Major Page Faults)') +
g.queryPanel('instance:node_vmstat_pgmajfault:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Major page faults') + g.queryPanel('instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Major page faults') +
{ {
yaxes: g.yaxes('short'), yaxes: g.yaxes('short'),
legend+: { show: false }, legend+: { show: false },
@ -195,8 +195,8 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
g.panel('Net Utilisation (Bytes Receive/Transmit)') + g.panel('Net Utilisation (Bytes Receive/Transmit)') +
g.queryPanel( g.queryPanel(
[ [
'instance:node_network_receive_bytes_excluding_lo:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance"}' % $._config,
'instance:node_network_transmit_bytes_excluding_lo:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance"}' % $._config,
], ],
['Receive', 'Transmit'], ['Receive', 'Transmit'],
) + ) +
@ -219,8 +219,8 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
g.panel('Net Saturation (Drops Receive/Transmit)') + g.panel('Net Saturation (Drops Receive/Transmit)') +
g.queryPanel( g.queryPanel(
[ [
'instance:node_network_receive_drop_excluding_lo:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance"}' % $._config,
'instance:node_network_transmit_drop_excluding_lo:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance"}' % $._config,
], ],
['Receive drops', 'Transmit drops'], ['Receive drops', 'Transmit drops'],
) + ) +
@ -244,12 +244,12 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
g.row('Disk IO') g.row('Disk IO')
.addPanel( .addPanel(
g.panel('Disk IO Utilisation') + g.panel('Disk IO Utilisation') +
g.queryPanel('instance_device:node_disk_io_time_seconds:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, '{{device}}') + g.queryPanel('instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance"}' % $._config, '{{device}}') +
{ yaxes: g.yaxes('percentunit') }, { yaxes: g.yaxes('percentunit') },
) )
.addPanel( .addPanel(
g.panel('Disk IO Saturation') + g.panel('Disk IO Saturation') +
g.queryPanel('instance_device:node_disk_io_time_weighted_seconds:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, '{{device}}') + g.queryPanel('instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance"}' % $._config, '{{device}}') +
{ yaxes: g.yaxes('percentunit') }, { yaxes: g.yaxes('percentunit') },
) )
) )

32
docs/node-mixin/rules/rules.libsonnet

@ -17,10 +17,10 @@
}, },
{ {
// CPU utilisation is % CPU is not idle. // CPU utilisation is % CPU is not idle.
record: 'instance:node_cpu_utilisation:rate5m', record: 'instance:node_cpu_utilisation:rate%(rateInterval)s' % $._config,
expr: ||| expr: |||
1 - avg without (cpu, mode) ( 1 - avg without (cpu, mode) (
rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle"}[5m]) rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle"}[%(rateInterval)s])
) )
||| % $._config, ||| % $._config,
}, },
@ -50,55 +50,55 @@
||| % $._config, ||| % $._config,
}, },
{ {
record: 'instance:node_vmstat_pgmajfault:rate5m', record: 'instance:node_vmstat_pgmajfault:rate%(rateInterval)s' % $._config,
expr: ||| expr: |||
rate(node_vmstat_pgmajfault{%(nodeExporterSelector)s}[5m]) rate(node_vmstat_pgmajfault{%(nodeExporterSelector)s}[%(rateInterval)s])
||| % $._config, ||| % $._config,
}, },
{ {
// Disk utilisation (seconds spent, 1 second rate). // Disk utilisation (seconds spent, 1 second rate).
record: 'instance_device:node_disk_io_time_seconds:rate5m', record: 'instance_device:node_disk_io_time_seconds:rate%(rateInterval)s' % $._config,
expr: ||| expr: |||
rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[5m]) rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[%(rateInterval)s])
||| % $._config, ||| % $._config,
}, },
{ {
// Disk saturation (weighted seconds spent, 1 second rate). // Disk saturation (weighted seconds spent, 1 second rate).
record: 'instance_device:node_disk_io_time_weighted_seconds:rate5m', record: 'instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s' % $._config,
expr: ||| expr: |||
rate(node_disk_io_time_weighted_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[5m]) rate(node_disk_io_time_weighted_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[%(rateInterval)s])
||| % $._config, ||| % $._config,
}, },
{ {
record: 'instance:node_network_receive_bytes_excluding_lo:rate5m', record: 'instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s' % $._config,
expr: ||| expr: |||
sum without (device) ( sum without (device) (
rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, device!="lo"}[5m]) rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, device!="lo"}[%(rateInterval)s])
) )
||| % $._config, ||| % $._config,
}, },
{ {
record: 'instance:node_network_transmit_bytes_excluding_lo:rate5m', record: 'instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s' % $._config,
expr: ||| expr: |||
sum without (device) ( sum without (device) (
rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, device!="lo"}[5m]) rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, device!="lo"}[%(rateInterval)s])
) )
||| % $._config, ||| % $._config,
}, },
// TODO: Find out if those drops ever happen on modern switched networks. // TODO: Find out if those drops ever happen on modern switched networks.
{ {
record: 'instance:node_network_receive_drop_excluding_lo:rate5m', record: 'instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s' % $._config,
expr: ||| expr: |||
sum without (device) ( sum without (device) (
rate(node_network_receive_drop_total{%(nodeExporterSelector)s, device!="lo"}[5m]) rate(node_network_receive_drop_total{%(nodeExporterSelector)s, device!="lo"}[%(rateInterval)s])
) )
||| % $._config, ||| % $._config,
}, },
{ {
record: 'instance:node_network_transmit_drop_excluding_lo:rate5m', record: 'instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s' % $._config,
expr: ||| expr: |||
sum without (device) ( sum without (device) (
rate(node_network_transmit_drop_total{%(nodeExporterSelector)s, device!="lo"}[5m]) rate(node_network_transmit_drop_total{%(nodeExporterSelector)s, device!="lo"}[%(rateInterval)s])
) )
||| % $._config, ||| % $._config,
}, },

Loading…
Cancel
Save