diff --git a/docs/node-mixin/config.libsonnet b/docs/node-mixin/config.libsonnet index b25c3939..fdea71d2 100644 --- a/docs/node-mixin/config.libsonnet +++ b/docs/node-mixin/config.libsonnet @@ -2,7 +2,12 @@ _config+:: { // Selectors are inserted between {} in Prometheus queries. - // Select the metrics coming from the node exporter. + // Select the metrics coming from the node exporter. Note that all + // the selected metrics are shown stacked on top of each other in + // the 'USE Method / Cluster' dashboard. Consider disabling that + // dashboard if mixing up all those metrics in the same dashboard + // doesn't make sense (e.g. because they are coming from different + // clusters). nodeExporterSelector: 'job="node"', // Select the fstype for filesystem-related queries. If left diff --git a/docs/node-mixin/dashboards/use.libsonnet b/docs/node-mixin/dashboards/use.libsonnet index d2a568f5..3d2e16d4 100644 --- a/docs/node-mixin/dashboards/use.libsonnet +++ b/docs/node-mixin/dashboards/use.libsonnet @@ -15,9 +15,8 @@ local g = import 'grafana-builder/grafana.libsonnet'; instance:node_cpu_utilisation:rate1m{%(nodeExporterSelector)s} * instance:node_num_cpu:sum{%(nodeExporterSelector)s} - / ignoring (instance) group_left - sum without (instance) (instance:node_num_cpu:sum{%(nodeExporterSelector)s}) ) + / scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s})) ||| % $._config, '{{instance}}', legendLink) + g.stack + { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, @@ -27,11 +26,8 @@ local g = import 'grafana-builder/grafana.libsonnet'; // average relates to the "CPU saturation" in the title. g.panel('CPU Saturation (load1 per CPU)') + g.queryPanel(||| - ( - instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s} - / ignoring (instance) group_left - count without (instance) (instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s}) - ) + instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s} + / scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s})) ||| % $._config, '{{instance}}', legendLink) + g.stack + // TODO: Does `max: 1` make sense? The stack can go over 1 in high-load scenarios. @@ -43,11 +39,8 @@ local g = import 'grafana-builder/grafana.libsonnet'; .addPanel( g.panel('Memory Utilisation') + g.queryPanel(||| - ( - instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s} - / ignoring (instance) group_left - count without (instance) (instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s}) - ) + instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s} + / scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s})) ||| % $._config, '{{instance}}', legendLink) + g.stack + { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, @@ -123,11 +116,8 @@ local g = import 'grafana-builder/grafana.libsonnet'; // TODO: Does the partition by device make sense? Using the most utilized device per // instance might make more sense. g.queryPanel(||| - ( - instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s} - / ignoring (instance, device) group_left - count without (instance, device) (instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s}) - ) + instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s} + / scalar(count(instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s})) ||| % $._config, '{{instance}} {{device}}', legendLink) + g.stack + { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, @@ -135,11 +125,8 @@ local g = import 'grafana-builder/grafana.libsonnet'; .addPanel( g.panel('Disk IO Saturation') + g.queryPanel(||| - ( - instance_device:node_disk_io_time_weighted_seconds:rate1m{%(nodeExporterSelector)s} - / ignoring (instance, device) group_left - count without (instance, device) (instance_device:node_disk_io_time_weighted_seconds:rate1m{%(nodeExporterSelector)s}) - ) + instance_device:node_disk_io_time_weighted_seconds:rate1m{%(nodeExporterSelector)s} + / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate1m{%(nodeExporterSelector)s})) ||| % $._config, '{{instance}} {{device}}', legendLink) + g.stack + { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, @@ -150,19 +137,12 @@ local g = import 'grafana-builder/grafana.libsonnet'; .addPanel( g.panel('Disk Space Utilisation') + g.queryPanel(||| - ( - sum without (device) ( - max without (fstype, mountpoint) ( - node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s} - node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s} - ) - ) - / ignoring (instance) group_left - sum without (instance, device) ( - max without (fstype, mountpoint) ( - node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s} - ) + sum without (device) ( + max without (fstype, mountpoint) ( + node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s} - node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s} ) - ) + ) + / scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s}))) ||| % $._config, '{{instance}}', legendLink) + g.stack + { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },