Fix up some of the USE metrics.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
pull/941/head
Tom Wilkie 2018-05-10 11:35:48 +02:00 committed by Matthias Loibl
parent c34275d6e5
commit 642f67ffa1
No known key found for this signature in database
GPG Key ID: B1C7DF661ABB2C1A
2 changed files with 21 additions and 36 deletions

View File

@ -45,7 +45,7 @@ local g = import 'grafana-builder/grafana.libsonnet';
// Full utilisation would be all disks on each node spending an average of // Full utilisation would be all disks on each node spending an average of
// 1 sec per second doing I/O, normalize by node count for stacked charts // 1 sec per second doing I/O, normalize by node count for stacked charts
g.queryPanel(||| g.queryPanel(|||
instance:node_disk_utilisation:avg_irate / scalar(sum(up{%(nodeExporterSelector)s})) instance:node_disk_utilisation:sum_irate / scalar(sum(up{%(nodeExporterSelector)s}))
||| % $._config, '{{instance}}', legendLink) + ||| % $._config, '{{instance}}', legendLink) +
g.stack + g.stack +
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
@ -53,7 +53,7 @@ local g = import 'grafana-builder/grafana.libsonnet';
.addPanel( .addPanel(
g.panel('Disk IO Saturation') + g.panel('Disk IO Saturation') +
g.queryPanel(||| g.queryPanel(|||
instance:node_disk_saturation:avg_irate / scalar(sum(up{%(nodeExporterSelector)s})) instance:node_disk_saturation:sum_irate / scalar(sum(up{%(nodeExporterSelector)s}))
||| % $._config, '{{instance}}', legendLink) + ||| % $._config, '{{instance}}', legendLink) +
g.stack + g.stack +
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
@ -104,7 +104,7 @@ local g = import 'grafana-builder/grafana.libsonnet';
g.row('Memory') g.row('Memory')
.addPanel( .addPanel(
g.panel('Memory Utilisation') + g.panel('Memory Utilisation') +
g.queryPanel('instance:node_memory_utilisation:{instance="$instance"}', 'Memory') + g.queryPanel('instance:node_memory_utilisation:ratio{instance="$instance"}', 'Memory') +
{ yaxes: g.yaxes('percentunit') }, { yaxes: g.yaxes('percentunit') },
) )
.addPanel( .addPanel(
@ -117,12 +117,12 @@ local g = import 'grafana-builder/grafana.libsonnet';
g.row('Disk') g.row('Disk')
.addPanel( .addPanel(
g.panel('Disk IO Utilisation') + g.panel('Disk IO Utilisation') +
g.queryPanel('instance:node_disk_utilisation:avg_irate{instance="$instance"}', 'Utilisation') + g.queryPanel('instance:node_disk_utilisation:sum_irate{instance="$instance"}', 'Utilisation') +
{ yaxes: g.yaxes('percentunit') }, { yaxes: g.yaxes('percentunit') },
) )
.addPanel( .addPanel(
g.panel('Disk IO Saturation') + g.panel('Disk IO Saturation') +
g.queryPanel('instance:node_disk_saturation:avg_irate{instance="$instance"}', 'Saturation') + g.queryPanel('instance:node_disk_saturation:sum_irate{instance="$instance"}', 'Saturation') +
{ yaxes: g.yaxes('percentunit') }, { yaxes: g.yaxes('percentunit') },
) )
) )

View File

@ -29,20 +29,9 @@
// Can go over 100%. >100% is bad. // Can go over 100%. >100% is bad.
record: 'instance:node_cpu_saturation_load1:', record: 'instance:node_cpu_saturation_load1:',
expr: ||| expr: |||
sum by (instance) ( sum by (instance) (node_load1{%(nodeExporterSelector)s})
node_load1{%(nodeExporterSelector)s}
)
/ /
instance:node_num_cpu:sum instance:node_num_cpu:sum
||| % $._config,
},
{
// Available memory per node
record: 'instance:node_memory_bytes_available:sum',
expr: |||
sum by (instance) (
(node_memory_MemFree{%(nodeExporterSelector)s} + node_memory_Cached{%(nodeExporterSelector)s} + node_memory_Buffers{%(nodeExporterSelector)s})
)
||| % $._config, ||| % $._config,
}, },
{ {
@ -58,17 +47,13 @@
// Memory utilisation per node, normalized by per-node memory // Memory utilisation per node, normalized by per-node memory
record: 'instance:node_memory_utilisation:ratio', record: 'instance:node_memory_utilisation:ratio',
expr: ||| expr: |||
(instance:node_memory_bytes_total:sum - instance:node_memory_bytes_available:sum) 1 - (
/ node_memory_MemAvailable{%(nodeExporterSelector)s}
scalar(sum(instance:node_memory_bytes_total:sum)) /
node_memory_MemTotal{%(nodeExporterSelector)s}
)
|||, |||,
}, },
{
record: 'instance:node_memory_utilisation:',
expr: |||
1 - (instance:node_memory_bytes_available:sum / instance:node_memory_bytes_total:sum)
||| % $._config,
},
{ {
record: 'instance:node_memory_swap_io_bytes:sum_rate', record: 'instance:node_memory_swap_io_bytes:sum_rate',
expr: ||| expr: |||
@ -79,19 +64,19 @@
||| % $._config, ||| % $._config,
}, },
{ {
// Disk utilisation (ms spent, by rate() it's bound by 1 second) // Disk utilisation (ms spent, 1 second irate())
record: 'instance:node_disk_utilisation:avg_irate', record: 'instance:node_disk_utilisation:sum_irate',
expr: ||| expr: |||
avg by (instance) ( sum by (instance) (
irate(node_disk_io_time_ms{%(nodeExporterSelector)s,device=~"(sd|xvd).+"}[1m]) / 1e3 irate(node_disk_io_time_ms{%(nodeExporterSelector)s,device=~"(sd|xvd).+"}[1m]) / 1e3
) )
||| % $._config, ||| % $._config,
}, },
{ {
// Disk saturation (ms spent, by rate() it's bound by 1 second) // Disk saturation (ms spent, by rate() it's bound by 1 second)
record: 'instance:node_disk_saturation:avg_irate', record: 'instance:node_disk_saturation:sum_irate',
expr: ||| expr: |||
avg by (instance) ( sum by (instance) (
irate(node_disk_io_time_weighted{%(nodeExporterSelector)s,device=~"(sd|xvd).+"}[1m]) / 1e3 irate(node_disk_io_time_weighted{%(nodeExporterSelector)s,device=~"(sd|xvd).+"}[1m]) / 1e3
) )
||| % $._config, ||| % $._config,
@ -100,8 +85,8 @@
record: 'instance:node_net_utilisation:sum_irate', record: 'instance:node_net_utilisation:sum_irate',
expr: ||| expr: |||
sum by (instance) ( sum by (instance) (
(irate(node_network_receive_bytes{%(nodeExporterSelector)s,device="eth0"}[1m]) + (irate(node_network_receive_bytes{%(nodeExporterSelector)s,device=~"eth[0-9]+"}[1m]) +
irate(node_network_transmit_bytes{%(nodeExporterSelector)s,device="eth0"}[1m])) irate(node_network_transmit_bytes{%(nodeExporterSelector)s,device=~"eth[0-9]+"}[1m]))
) )
||| % $._config, ||| % $._config,
}, },
@ -109,8 +94,8 @@
record: 'instance:node_net_saturation:sum_irate', record: 'instance:node_net_saturation:sum_irate',
expr: ||| expr: |||
sum by (instance) ( sum by (instance) (
(irate(node_network_receive_drop{%(nodeExporterSelector)s,device="eth0"}[1m]) + (irate(node_network_receive_drop{%(nodeExporterSelector)s,device=~"eth[0-9]+"}[1m]) +
irate(node_network_transmit_drop{%(nodeExporterSelector)s,device="eth0"}[1m])) irate(node_network_transmit_drop{%(nodeExporterSelector)s,device=~"eth[0-9]+"}[1m]))
) )
||| % $._config, ||| % $._config,
}, },