Migrate dashboards to new grafonnet library (#3147)

Migrated away from deprecated Grafonnet library. This replaces panels using Angular JS which are disabled by default in Grafana 11 and will be unsupported in Grafana 12.

Fixes #3046

---------

Signed-off-by: Tom <12222103+critchtionary@users.noreply.github.com>
pull/3032/head
Tom 2024-12-19 15:49:22 +00:00 committed by GitHub
parent ff97e35a71
commit d0c1d00d18
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 871 additions and 894 deletions

View File

@ -1,201 +1,178 @@
local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet'; local grafana = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet';
local dashboard = grafana.dashboard; local dashboard = grafana.dashboard;
local row = grafana.row; local variable = dashboard.variable;
local prometheus = grafana.prometheus; local row = grafana.panel.row;
local template = grafana.template; local prometheus = grafana.query.prometheus;
local graphPanel = grafana.graphPanel;
local timeSeriesPanel = grafana.panel.timeSeries;
local tsOptions = timeSeriesPanel.options;
local tsStandardOptions = timeSeriesPanel.standardOptions;
local tsQueryOptions = timeSeriesPanel.queryOptions;
local tsCustom = timeSeriesPanel.fieldConfig.defaults.custom;
local tsLegend = tsOptions.legend;
local c = import '../config.libsonnet'; local c = import '../config.libsonnet';
local datasourceTemplate = { local datasource = variable.datasource.new(
current: { 'datasource', 'prometheus'
text: 'default', );
value: 'default',
}, local tsCommonPanelOptions =
hide: 0, variable.query.withDatasourceFromVariable(datasource)
label: 'Data Source', + tsCustom.stacking.withMode('normal')
name: 'datasource', + tsCustom.withFillOpacity(100)
options: [], + tsCustom.withShowPoints('never')
query: 'prometheus', + tsLegend.withShowLegend(false)
refresh: 1, + tsOptions.tooltip.withMode('multi')
regex: '', + tsOptions.tooltip.withSort('desc');
type: 'datasource',
};
local CPUUtilisation = local CPUUtilisation =
graphPanel.new( timeSeriesPanel.new(
'CPU Utilisation', 'CPU Utilisation',
datasource='$datasource', )
span=6, + tsCommonPanelOptions
format='percentunit', + tsStandardOptions.withUnit('percentunit');
stack=true,
fill=10,
legend_show=false,
) { tooltip+: { sort: 2 } };
local CPUSaturation = local CPUSaturation =
// TODO: Is this a useful panel? At least there should be some explanation how load // TODO: Is this a useful panel? At least there should be some explanation how load
// average relates to the "CPU saturation" in the title. // average relates to the "CPU saturation" in the title.
graphPanel.new( timeSeriesPanel.new(
'CPU Saturation (Load1 per CPU)', 'CPU Saturation (Load1 per CPU)',
datasource='$datasource', )
span=6, + tsCommonPanelOptions
format='percentunit', + tsStandardOptions.withUnit('percentunit');
stack=true,
fill=10,
legend_show=false,
) { tooltip+: { sort: 2 } };
local memoryUtilisation = local memoryUtilisation =
graphPanel.new( timeSeriesPanel.new(
'Memory Utilisation', 'Memory Utilisation',
datasource='$datasource', )
span=6, + tsCommonPanelOptions
format='percentunit', + tsStandardOptions.withUnit('percentunit');
stack=true,
fill=10,
legend_show=false,
) { tooltip+: { sort: 2 } };
local memorySaturation = local memorySaturation =
graphPanel.new( timeSeriesPanel.new(
'Memory Saturation (Major Page Faults)', 'Memory Saturation (Major Page Faults)',
datasource='$datasource', )
span=6, + tsCommonPanelOptions
format='rds', + tsStandardOptions.withUnit('rds');
stack=true,
fill=10, local networkOverrides = tsStandardOptions.withOverrides(
legend_show=false, [
) { tooltip+: { sort: 2 } }; tsStandardOptions.override.byRegexp.new('/Transmit/')
+ tsStandardOptions.override.byRegexp.withPropertiesFromOptions(
tsCustom.withTransform('negative-Y')
),
]
);
local networkUtilisation = local networkUtilisation =
graphPanel.new( timeSeriesPanel.new(
'Network Utilisation (Bytes Receive/Transmit)', 'Network Utilisation (Bytes Receive/Transmit)',
datasource='$datasource',
span=6,
format='Bps',
stack=true,
fill=10,
legend_show=false,
) )
.addSeriesOverride({ alias: '/Receive/', stack: 'A' }) + tsCommonPanelOptions
.addSeriesOverride({ alias: '/Transmit/', stack: 'B', transform: 'negative-Y' }) + tsStandardOptions.withUnit('Bps')
{ tooltip+: { sort: 2 } }; + networkOverrides;
local networkSaturation = local networkSaturation =
graphPanel.new( timeSeriesPanel.new(
'Network Saturation (Drops Receive/Transmit)', 'Network Saturation (Drops Receive/Transmit)',
datasource='$datasource',
span=6,
format='Bps',
stack=true,
fill=10,
legend_show=false,
) )
.addSeriesOverride({ alias: '/ Receive/', stack: 'A' }) + tsCommonPanelOptions
.addSeriesOverride({ alias: '/ Transmit/', stack: 'B', transform: 'negative-Y' }) + tsStandardOptions.withUnit('Bps')
{ tooltip+: { sort: 2 } }; + networkOverrides;
local diskIOUtilisation = local diskIOUtilisation =
graphPanel.new( timeSeriesPanel.new(
'Disk IO Utilisation', 'Disk IO Utilisation',
datasource='$datasource', )
span=6, + tsCommonPanelOptions
format='percentunit', + tsStandardOptions.withUnit('percentunit');
stack=true,
fill=10,
legend_show=false,
) { tooltip+: { sort: 2 } };
local diskIOSaturation = local diskIOSaturation =
graphPanel.new( timeSeriesPanel.new(
'Disk IO Saturation', 'Disk IO Saturation',
datasource='$datasource', )
span=6, + tsCommonPanelOptions
format='percentunit', + tsStandardOptions.withUnit('percentunit');
stack=true,
fill=10,
legend_show=false,
) { tooltip+: { sort: 2 } };
local diskSpaceUtilisation = local diskSpaceUtilisation =
graphPanel.new( timeSeriesPanel.new(
'Disk Space Utilisation', 'Disk Space Utilisation',
datasource='$datasource', )
span=12, + tsCommonPanelOptions
format='percentunit', + tsStandardOptions.withUnit('percentunit');
stack=true,
fill=10,
legend_show=false,
) { tooltip+: { sort: 2 } };
{ {
_clusterTemplate:: template.new( _clusterVariable::
name='cluster', variable.query.new('cluster')
datasource='$datasource', + variable.query.withDatasourceFromVariable(datasource)
query='label_values(node_time_seconds, %s)' % $._config.clusterLabel, + variable.query.queryTypes.withLabelValues(
current='', $._config.clusterLabel,
hide=if $._config.showMultiCluster then '' else '2', 'node_time_seconds',
refresh=2, )
includeAll=false, + (if $._config.showMultiCluster then variable.query.generalOptions.showOnDashboard.withLabelAndValue() else variable.query.generalOptions.showOnDashboard.withNothing())
sort=1 + variable.query.refresh.onTime()
), + variable.query.selectionOptions.withIncludeAll(false)
+ variable.query.withSort(asc=true),
grafanaDashboards+:: { grafanaDashboards+:: {
'node-rsrc-use.json': 'node-rsrc-use.json':
dashboard.new( dashboard.new(
'%sUSE Method / Node' % $._config.dashboardNamePrefix, '%sUSE Method / Node' % $._config.dashboardNamePrefix,
time_from='now-1h',
tags=($._config.dashboardTags),
timezone='utc',
refresh='30s',
graphTooltip='shared_crosshair',
uid=std.md5('node-rsrc-use.json')
) )
.addTemplate(datasourceTemplate) + dashboard.time.withFrom('now-1h')
.addTemplate($._clusterTemplate) + dashboard.withTags($._config.dashboardTags)
.addTemplate( + dashboard.withTimezone('utc')
template.new( + dashboard.withRefresh('30s')
+ dashboard.graphTooltip.withSharedCrosshair()
+ dashboard.withUid(std.md5('node-rsrc-use.json'))
+ dashboard.withVariables([
datasource,
$._clusterVariable,
variable.query.new('instance')
+ variable.query.withDatasourceFromVariable(datasource)
+ variable.query.queryTypes.withLabelValues(
'instance', 'instance',
'$datasource', 'node_exporter_build_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}' % $._config,
'label_values(node_exporter_build_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}, instance)' % $._config,
refresh='time',
sort=1
) )
) + variable.query.refresh.onTime()
.addRow( + variable.query.withSort(asc=true),
])
+ dashboard.withPanels(
grafana.util.grid.makeGrid([
row.new('CPU') row.new('CPU')
.addPanel(CPUUtilisation.addTarget(prometheus.target('instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Utilisation'))) + row.withPanels([
.addPanel(CPUSaturation.addTarget(prometheus.target('instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Saturation'))) CPUUtilisation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Utilisation')]),
) CPUSaturation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Saturation')]),
.addRow( ]),
row.new('Memory') row.new('Memory')
.addPanel(memoryUtilisation.addTarget(prometheus.target('instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Utilisation'))) + row.withPanels([
.addPanel(memorySaturation.addTarget(prometheus.target('instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Major page Faults'))) memoryUtilisation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Utilisation')]),
) memorySaturation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Major page Faults')]),
.addRow( ]),
row.new('Network') row.new('Network')
.addPanel( + row.withPanels([
networkUtilisation networkUtilisation + tsQueryOptions.withTargets([
.addTarget(prometheus.target('instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Receive')) prometheus.new('$datasource', 'instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Receive'),
.addTarget(prometheus.target('instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Transmit')) prometheus.new('$datasource', 'instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Transmit'),
) ]),
.addPanel( networkSaturation + tsQueryOptions.withTargets([
networkSaturation prometheus.new('$datasource', 'instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Receive'),
.addTarget(prometheus.target('instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Receive')) prometheus.new('$datasource', 'instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Transmit'),
.addTarget(prometheus.target('instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Transmit')) ]),
) ]),
)
.addRow(
row.new('Disk IO') row.new('Disk IO')
.addPanel(diskIOUtilisation.addTarget(prometheus.target('instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{device}}'))) + row.withPanels([
.addPanel(diskIOSaturation.addTarget(prometheus.target('instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{device}}'))) diskIOUtilisation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('{{device}}')]),
) diskIOSaturation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('{{device}}')]),
.addRow( ]),
], panelWidth=12, panelHeight=7)
+ grafana.util.grid.makeGrid([
row.new('Disk Space') row.new('Disk Space')
.addPanel( + row.withPanels([
diskSpaceUtilisation.addTarget(prometheus.target( diskSpaceUtilisation + tsQueryOptions.withTargets([
prometheus.new(
'$datasource',
||| |||
sort_desc(1 - sort_desc(1 -
( (
@ -204,28 +181,36 @@ local diskSpaceUtilisation =
max without (mountpoint, fstype) (node_filesystem_size_bytes{%(nodeExporterSelector)s, fstype!="", instance="$instance", %(clusterLabel)s="$cluster"}) max without (mountpoint, fstype) (node_filesystem_size_bytes{%(nodeExporterSelector)s, fstype!="", instance="$instance", %(clusterLabel)s="$cluster"})
) != 0 ) != 0
) )
||| % $._config, legendFormat='{{device}}' ||| % $._config
)) ) + prometheus.withLegendFormat('{{device}}'),
) ]),
]),
], panelWidth=24, panelHeight=7, startY=34),
), ),
'node-cluster-rsrc-use.json': 'node-cluster-rsrc-use.json':
dashboard.new( dashboard.new(
'%sUSE Method / Cluster' % $._config.dashboardNamePrefix, '%sUSE Method / Cluster' % $._config.dashboardNamePrefix,
time_from='now-1h',
tags=($._config.dashboardTags),
timezone='utc',
refresh='30s',
graphTooltip='shared_crosshair',
uid=std.md5('node-cluster-rsrc-use.json')
) )
.addTemplate(datasourceTemplate) + dashboard.time.withFrom('now-1h')
.addTemplate($._clusterTemplate) + dashboard.withTags($._config.dashboardTags)
.addRow( + dashboard.withTimezone('utc')
+ dashboard.withRefresh('30s')
+ dashboard.graphTooltip.withSharedCrosshair()
+ dashboard.withUid(std.md5('node-cluster-rsrc-use.json'))
+ dashboard.withVariables([
datasource,
$._clusterVariable,
variable.query.withDatasourceFromVariable(datasource)
+ variable.query.refresh.onTime()
+ variable.query.withSort(asc=true),
])
+ dashboard.withPanels(
grafana.util.grid.makeGrid([
row.new('CPU') row.new('CPU')
.addPanel( + row.withPanels([
CPUUtilisation CPUUtilisation + tsQueryOptions.withTargets([
.addTarget(prometheus.target( prometheus.new(
'$datasource',
||| |||
(( ((
instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}
@ -233,79 +218,90 @@ local diskSpaceUtilisation =
instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}
) != 0 ) ) != 0 )
/ scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) / scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}))
||| % $._config, legendFormat='{{ instance }}' ||| % $._config
)) ) + prometheus.withLegendFormat('{{ instance }}'),
) ]),
.addPanel( CPUSaturation + tsQueryOptions.withTargets([
CPUSaturation prometheus.new(
.addTarget(prometheus.target( '$datasource',
||| |||
( (
instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}
/ scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) / scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}))
) != 0 ) != 0
||| % $._config, legendFormat='{{instance}}' ||| % $._config
)) ) + prometheus.withLegendFormat('{{ instance }}'),
) ]),
) ]),
.addRow(
row.new('Memory') row.new('Memory')
.addPanel( + row.withPanels([
memoryUtilisation memoryUtilisation + tsQueryOptions.withTargets([
.addTarget(prometheus.target( prometheus.new(
'$datasource',
||| |||
( (
instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}
/ scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) / scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}))
) != 0 ) != 0
||| % $._config, legendFormat='{{instance}}', ||| % $._config
)) ) + prometheus.withLegendFormat('{{ instance }}'),
) ]),
.addPanel(memorySaturation.addTarget(prometheus.target('instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}' % $._config, legendFormat='{{instance}}'))) memorySaturation + tsQueryOptions.withTargets([
) prometheus.new(
.addRow( '$datasource',
'instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}' % $._config
) + prometheus.withLegendFormat('{{ instance }}'),
]),
]),
row.new('Network') row.new('Network')
.addPanel( + row.withPanels([
networkUtilisation networkUtilisation + tsQueryOptions.withTargets([
.addTarget(prometheus.target('instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{instance}} Receive')) prometheus.new(
.addTarget(prometheus.target('instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{instance}} Transmit')) '$datasource',
) 'instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config
.addPanel( ) + prometheus.withLegendFormat('{{ instance }} Receive'),
networkSaturation prometheus.new(
.addTarget(prometheus.target('instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{instance}} Receive')) '$datasource',
.addTarget(prometheus.target('instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{instance}} Transmit')) 'instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config
) ) + prometheus.withLegendFormat('{{ instance }} Transmit'),
) ]),
.addRow( networkSaturation + tsQueryOptions.withTargets([
prometheus.new(
'$datasource',
'instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config
) + prometheus.withLegendFormat('{{ instance }} Receive'),
prometheus.new(
'$datasource',
'instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config
) + prometheus.withLegendFormat('{{ instance }} Transmit'),
]),
]),
row.new('Disk IO') row.new('Disk IO')
.addPanel( + row.withPanels([
diskIOUtilisation diskIOUtilisation + tsQueryOptions.withTargets([
.addTarget(prometheus.target( prometheus.new(
'$datasource',
||| |||
(
instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}
/ scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) / scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}))
) != 0 ||| % $._config
||| % $._config, legendFormat='{{instance}} {{device}}' ) + prometheus.withLegendFormat('{{ instance }} {{device}}'),
)) ]),
) diskIOSaturation + tsQueryOptions.withTargets([prometheus.new(
.addPanel( '$datasource',
diskIOSaturation
.addTarget(prometheus.target(
||| |||
(
instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}
/ scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}))
) != 0 ||| % $._config
||| % $._config, legendFormat='{{instance}} {{device}}' ) + prometheus.withLegendFormat('{{ instance }} {{device}}')]),
)) ]),
) ], panelWidth=12, panelHeight=7)
) + grafana.util.grid.makeGrid([
.addRow(
row.new('Disk Space') row.new('Disk Space')
.addPanel( + row.withPanels([
diskSpaceUtilisation diskSpaceUtilisation + tsQueryOptions.withTargets([
.addTarget(prometheus.target( prometheus.new(
'$datasource',
||| |||
sum without (device) ( sum without (device) (
max without (fstype, mountpoint) (( max without (fstype, mountpoint) ((
@ -315,28 +311,37 @@ local diskSpaceUtilisation =
) != 0) ) != 0)
) )
/ scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"}))) / scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"})))
||| % $._config, legendFormat='{{instance}}' ||| % $._config
)) ) + prometheus.withLegendFormat('{{ instance }}'),
) ]),
]),
], panelWidth=24, panelHeight=7, startY=34),
), ),
} + } +
if $._config.showMultiCluster then { if $._config.showMultiCluster then {
'node-multicluster-rsrc-use.json': 'node-multicluster-rsrc-use.json':
dashboard.new( dashboard.new(
'%sUSE Method / Multi-cluster' % $._config.dashboardNamePrefix, '%sUSE Method / Multi-cluster' % $._config.dashboardNamePrefix,
time_from='now-1h',
tags=($._config.dashboardTags),
timezone='utc',
refresh='30s',
graphTooltip='shared_crosshair',
uid=std.md5('node-multicluster-rsrc-use.json')
) )
.addTemplate(datasourceTemplate) + dashboard.time.withFrom('now-1h')
.addRow( + dashboard.withTags($._config.dashboardTags)
+ dashboard.withTimezone('utc')
+ dashboard.withRefresh('30s')
+ dashboard.graphTooltip.withSharedCrosshair()
+ dashboard.withUid(std.md5('node-multicluster-rsrc-use.json'))
+ dashboard.withVariables([
datasource,
variable.query.withDatasourceFromVariable(datasource)
+ variable.query.refresh.onTime()
+ variable.query.withSort(asc=true),
])
+ dashboard.withPanels(
grafana.util.grid.makeGrid([
row.new('CPU') row.new('CPU')
.addPanel( + row.withPanels([
CPUUtilisation CPUUtilisation + tsQueryOptions.withTargets([
.addTarget(prometheus.target( prometheus.new(
'$datasource',
||| |||
sum( sum(
(( ((
@ -346,112 +351,116 @@ local diskSpaceUtilisation =
) != 0) ) != 0)
/ scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s})) / scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s}))
) by (%(clusterLabel)s) ) by (%(clusterLabel)s)
||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config ||| % $._config
)) ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'),
) ]),
.addPanel( CPUSaturation + tsQueryOptions.withTargets([
CPUSaturation prometheus.new(
.addTarget(prometheus.target( '$datasource',
||| |||
sum(( sum((
instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s} instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s}
/ scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s})) / scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s}))
) != 0) by (%(clusterLabel)s) ) != 0) by (%(clusterLabel)s)
||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config ||| % $._config
)) ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'),
) ]),
) ]),
.addRow(
row.new('Memory') row.new('Memory')
.addPanel( + row.withPanels([
memoryUtilisation memoryUtilisation + tsQueryOptions.withTargets([
.addTarget(prometheus.target( prometheus.new(
'$datasource',
||| |||
sum(( sum((
instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s} instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s}
/ scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s})) / scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s}))
) != 0) by (%(clusterLabel)s) ) != 0) by (%(clusterLabel)s)
||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config ||| % $._config
)) ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'),
) ]),
.addPanel( memorySaturation + tsQueryOptions.withTargets([
memorySaturation prometheus.new(
.addTarget(prometheus.target( '$datasource',
||| |||
sum(( sum((
instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s} instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s}
) != 0) by (%(clusterLabel)s) ) != 0) by (%(clusterLabel)s)
||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config |||
)) % $._config
) ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'),
) ]),
.addRow( ]),
row.new('Network') row.new('Network')
.addPanel( + row.withPanels([
networkUtilisation networkUtilisation + tsQueryOptions.withTargets([
.addTarget(prometheus.target( prometheus.new(
'$datasource',
||| |||
sum(( sum((
instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s}
) != 0) by (%(clusterLabel)s) ) != 0) by (%(clusterLabel)s)
||| % $._config, legendFormat='{{%(clusterLabel)s}} Receive' % $._config ||| % $._config
)) ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Receive'),
.addTarget(prometheus.target( prometheus.new(
'$datasource',
||| |||
sum(( sum((
instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s}
) != 0) by (%(clusterLabel)s) ) != 0) by (%(clusterLabel)s)
||| % $._config, legendFormat='{{%(clusterLabel)s}} Transmit' % $._config ||| % $._config
)) ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Transmit'),
) ]),
.addPanel( networkSaturation + tsQueryOptions.withTargets([
networkSaturation prometheus.new(
.addTarget(prometheus.target( '$datasource',
||| |||
sum(( sum((
instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s}
) != 0) by (%(clusterLabel)s) ) != 0) by (%(clusterLabel)s)
||| % $._config, legendFormat='{{%(clusterLabel)s}} Receive' % $._config ||| % $._config
)) ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Receive'),
.addTarget(prometheus.target( prometheus.new(
'$datasource',
||| |||
sum(( sum((
instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s}
) != 0) by (%(clusterLabel)s) ) != 0) by (%(clusterLabel)s)
||| % $._config, legendFormat='{{%(clusterLabel)s}} Transmit' % $._config ||| % $._config
)) ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Transmit'),
) ]),
) ]),
.addRow(
row.new('Disk IO') row.new('Disk IO')
.addPanel( + row.withPanels([
diskIOUtilisation diskIOUtilisation + tsQueryOptions.withTargets([
.addTarget(prometheus.target( prometheus.new(
'$datasource',
||| |||
sum(( sum((
instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s} instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s}
/ scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s})) / scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s}))
) != 0) by (%(clusterLabel)s, device) ) != 0) by (%(clusterLabel)s, device)
||| % $._config, legendFormat='{{%(clusterLabel)s}} {{device}}' % $._config ||| % $._config
)) ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} {{device}}'),
) ]),
.addPanel( diskIOSaturation + tsQueryOptions.withTargets([prometheus.new(
diskIOSaturation '$datasource',
.addTarget(prometheus.target(
||| |||
sum(( sum((
instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s} instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s}
/ scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s})) / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s}))
) != 0) by (%(clusterLabel)s, device) ) != 0) by (%(clusterLabel)s, device)
||| % $._config, legendFormat='{{%(clusterLabel)s}} {{device}}' % $._config ||| % $._config
)) ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} {{device}}')]),
) ]),
)
.addRow( ], panelWidth=12, panelHeight=7)
+ grafana.util.grid.makeGrid([
row.new('Disk Space') row.new('Disk Space')
.addPanel( + row.withPanels([
diskSpaceUtilisation diskSpaceUtilisation + tsQueryOptions.withTargets([
.addTarget(prometheus.target( prometheus.new(
'$datasource',
||| |||
sum ( sum (
sum without (device) ( sum without (device) (
@ -461,9 +470,11 @@ local diskSpaceUtilisation =
) )
/ scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s}))) / scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s})))
) by (%(clusterLabel)s) ) by (%(clusterLabel)s)
||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config ||| % $._config
)) ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'),
) ]),
]),
], panelWidth=24, panelHeight=7, startY=34),
), ),
} else {}, } else {},
} }

View File

@ -4,20 +4,11 @@
{ {
"source": { "source": {
"git": { "git": {
"remote": "https://github.com/grafana/grafonnet-lib.git", "remote": "https://github.com/grafana/grafonnet.git",
"subdir": "grafonnet" "subdir": "gen/grafonnet-latest"
} }
}, },
"version": "master" "version": "main"
},
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet-lib.git",
"subdir": "grafonnet-7.0"
}
},
"version": "master"
} }
], ],
"legacyImports": false "legacyImports": false

View File

@ -1,76 +1,85 @@
local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet'; local grafana = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet';
local dashboard = grafana.dashboard; local dashboard = grafana.dashboard;
local row = grafana.row; local row = grafana.panel.row;
local prometheus = grafana.prometheus; local prometheus = grafana.query.prometheus;
local template = grafana.template; local variable = dashboard.variable;
local graphPanel = grafana.graphPanel;
local grafana70 = import 'github.com/grafana/grafonnet-lib/grafonnet-7.0/grafana.libsonnet'; local timeSeriesPanel = grafana.panel.timeSeries;
local gaugePanel = grafana70.panel.gauge; local tsOptions = timeSeriesPanel.options;
local table = grafana70.panel.table; local tsStandardOptions = timeSeriesPanel.standardOptions;
local tsQueryOptions = timeSeriesPanel.queryOptions;
local tsCustom = timeSeriesPanel.fieldConfig.defaults.custom;
local gaugePanel = grafana.panel.gauge;
local gaugeStep = gaugePanel.standardOptions.threshold.step;
local table = grafana.panel.table;
local tableStep = table.standardOptions.threshold.step;
local tableOverride = table.standardOptions.override;
local tableTransformation = table.queryOptions.transformation;
{ {
new(config=null, platform=null, uid=null):: { new(config=null, platform=null, uid=null):: {
local prometheusDatasourceTemplate = { local prometheusDatasourceVariable = variable.datasource.new(
current: { 'datasource', 'prometheus'
text: 'default',
value: 'default',
},
hide: 0,
label: 'Data Source',
name: 'datasource',
options: [],
query: 'prometheus',
refresh: 1,
regex: '',
type: 'datasource',
},
local clusterTemplatePrototype =
template.new(
'cluster',
'$datasource',
'',
hide=if config.showMultiCluster then '' else '2',
refresh='time',
label='Cluster',
), ),
local clusterTemplate =
if platform == 'Darwin' then
clusterTemplatePrototype
{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname="Darwin"}, %(clusterLabel)s)' % config }
else
clusterTemplatePrototype
{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, sysname!="Darwin"}, %(clusterLabel)s)' % config },
local instanceTemplatePrototype = local clusterVariablePrototype =
template.new( variable.query.new('cluster')
+ variable.query.withDatasourceFromVariable(prometheusDatasourceVariable)
+ (if config.showMultiCluster then variable.query.generalOptions.showOnDashboard.withLabelAndValue() else variable.query.generalOptions.showOnDashboard.withNothing())
+ variable.query.refresh.onTime()
+ variable.query.generalOptions.withLabel('Cluster'),
local clusterVariable =
if platform == 'Darwin' then
clusterVariablePrototype
+ variable.query.queryTypes.withLabelValues(
' %(clusterLabel)s' % config,
'node_uname_info{%(nodeExporterSelector)s, sysname="Darwin"}' % config,
)
else
clusterVariablePrototype
+ variable.query.queryTypes.withLabelValues(
'%(clusterLabel)s' % config,
'node_uname_info{%(nodeExporterSelector)s, sysname!="Darwin"}' % config,
),
local instanceVariablePrototype =
variable.query.new('instance')
+ variable.query.withDatasourceFromVariable(prometheusDatasourceVariable)
+ variable.query.refresh.onTime()
+ variable.query.generalOptions.withLabel('Instance'),
local instanceVariable =
if platform == 'Darwin' then
instanceVariablePrototype
+ variable.query.queryTypes.withLabelValues(
'instance', 'instance',
'$datasource', 'node_uname_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster", sysname="Darwin"}' % config,
'', )
refresh='time',
label='Instance',
),
local instanceTemplate =
if platform == 'Darwin' then
instanceTemplatePrototype
{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster", sysname="Darwin"}, instance)' % config }
else else
instanceTemplatePrototype instanceVariablePrototype
{ query: 'label_values(node_uname_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster", sysname!="Darwin"}, instance)' % config }, + variable.query.queryTypes.withLabelValues(
'instance',
'node_uname_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster", sysname!="Darwin"}' % config,
),
local idleCPU = local idleCPU =
graphPanel.new( timeSeriesPanel.new('CPU Usage')
'CPU Usage', + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable)
datasource='$datasource', + tsStandardOptions.withUnit('percentunit')
span=6, + tsCustom.stacking.withMode('normal')
format='percentunit', + tsStandardOptions.withMax(1)
max=1, + tsStandardOptions.withMin(0)
min=0, + tsOptions.tooltip.withMode('multi')
stack=true, + tsCustom.withFillOpacity(10)
) + tsCustom.withShowPoints('never')
.addTarget(prometheus.target( + tsQueryOptions.withTargets([
prometheus.new(
'$datasource',
||| |||
( (
(1 - sum without (mode) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode=~"idle|iowait|steal", instance="$instance", %(clusterLabel)s="$cluster"}[$__rate_interval]))) (1 - sum without (mode) (rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode=~"idle|iowait|steal", instance="$instance", %(clusterLabel)s="$cluster"}[$__rate_interval])))
@ -78,36 +87,42 @@ local table = grafana70.panel.table;
count without (cpu, mode) (node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance", %(clusterLabel)s="$cluster"}) count without (cpu, mode) (node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle", instance="$instance", %(clusterLabel)s="$cluster"})
) )
||| % config, ||| % config,
legendFormat='{{cpu}}', )
intervalFactor=5, + prometheus.withLegendFormat('{{cpu}}')
)), + prometheus.withIntervalFactor(5),
]),
local systemLoad = local systemLoad =
graphPanel.new( timeSeriesPanel.new('Load Average')
'Load Average', + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable)
datasource='$datasource', + tsStandardOptions.withUnit('short')
span=6, + tsStandardOptions.withMin(0)
format='short', + tsCustom.withFillOpacity(0)
min=0, + tsCustom.withShowPoints('never')
fill=0, + tsOptions.tooltip.withMode('multi')
) + tsQueryOptions.withTargets([
.addTarget(prometheus.target('node_load1{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='1m load average')) prometheus.new('$datasource', 'node_load1{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('1m load average'),
.addTarget(prometheus.target('node_load5{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='5m load average')) prometheus.new('$datasource', 'node_load5{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('5m load average'),
.addTarget(prometheus.target('node_load15{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='15m load average')) prometheus.new('$datasource', 'node_load15{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('15m load average'),
.addTarget(prometheus.target('count(node_cpu_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", mode="idle"})' % config, legendFormat='logical cores')), prometheus.new('$datasource', 'count(node_cpu_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", mode="idle"})' % config) + prometheus.withLegendFormat('logical cores'),
]),
local memoryGraphPanelPrototype = local memoryGraphPanelPrototype =
graphPanel.new( timeSeriesPanel.new('Memory Usage')
'Memory Usage', + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable)
datasource='$datasource', + tsStandardOptions.withUnit('bytes')
span=9, + tsStandardOptions.withMin(0)
format='bytes', + tsOptions.tooltip.withMode('multi')
min=0, + tsCustom.withFillOpacity(10)
), + tsCustom.withShowPoints('never'),
local memoryGraph = local memoryGraph =
if platform == 'Linux' then if platform == 'Linux' then
memoryGraphPanelPrototype { stack: true } memoryGraphPanelPrototype
.addTarget(prometheus.target( + tsCustom.stacking.withMode('normal')
+ tsQueryOptions.withTargets([
prometheus.new(
'$datasource',
||| |||
( (
node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} node_memory_MemTotal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
@ -119,16 +134,19 @@ local table = grafana70.panel.table;
node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
) )
||| % config, ||| % config,
legendFormat='memory used' ) + prometheus.withLegendFormat('memory used'),
)) prometheus.new('$datasource', 'node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('memory buffers'),
.addTarget(prometheus.target('node_memory_Buffers_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='memory buffers')) prometheus.new('$datasource', 'node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('memory cached'),
.addTarget(prometheus.target('node_memory_Cached_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='memory cached')) prometheus.new('$datasource', 'node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('memory free'),
.addTarget(prometheus.target('node_memory_MemFree_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='memory free')) ])
else if platform == 'Darwin' then else if platform == 'Darwin' then
// not useful to stack // not useful to stack
memoryGraphPanelPrototype { stack: false } memoryGraphPanelPrototype
.addTarget(prometheus.target('node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Physical Memory')) + tsCustom.stacking.withMode('none')
.addTarget(prometheus.target( + tsQueryOptions.withTargets([
prometheus.new('$datasource', 'node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('Physical Memory'),
prometheus.new(
'$datasource',
||| |||
( (
node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} -
@ -136,50 +154,61 @@ local table = grafana70.panel.table;
node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} + node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} +
node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
) )
||| % config, legendFormat='Memory Used' ||| % config
)) ) + prometheus.withLegendFormat(
.addTarget(prometheus.target( 'Memory Used'
),
prometheus.new(
'$datasource',
||| |||
( (
node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - node_memory_internal_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} -
node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} node_memory_purgeable_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
) )
||| % config, legendFormat='App Memory' ||| % config
)) ) + prometheus.withLegendFormat(
.addTarget(prometheus.target('node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Wired Memory')) 'App Memory'
.addTarget(prometheus.target('node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Compressed')) ),
prometheus.new('$datasource', 'node_memory_wired_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('Wired Memory'),
prometheus.new('$datasource', 'node_memory_compressed_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('Compressed'),
])
else if platform == 'AIX' then else if platform == 'AIX' then
memoryGraphPanelPrototype { stack: false } memoryGraphPanelPrototype
.addTarget(prometheus.target('node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config, legendFormat='Physical Memory')) + tsCustom.stacking.withMode('none')
.addTarget(prometheus.target( + tsQueryOptions.withTargets([
prometheus.new('$datasource', 'node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}' % config) + prometheus.withLegendFormat('Physical Memory'),
prometheus.new(
'$datasource',
||| |||
( (
node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} - node_memory_total_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} -
node_memory_available_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} node_memory_available_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"}
) )
||| % config, legendFormat='Memory Used' ||| % config
)), ) + prometheus.withLegendFormat('Memory Used'),
]),
// NOTE: avg() is used to circumvent a label change caused by a node_exporter rollout. // NOTE: avg() is used to circumvent a label change caused by a node_exporter rollout.
local memoryGaugePanelPrototype = local memoryGaugePanelPrototype =
gaugePanel.new( gaugePanel.new('Memory Usage')
title='Memory Usage', + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable)
datasource='$datasource', + gaugePanel.standardOptions.thresholds.withSteps([
) gaugeStep.withColor('rgba(50, 172, 45, 0.97)'),
.addThresholdStep('rgba(50, 172, 45, 0.97)') gaugeStep.withColor('rgba(237, 129, 40, 0.89)') + gaugeStep.withValue(80),
.addThresholdStep('rgba(237, 129, 40, 0.89)', 80) gaugeStep.withColor('rgba(245, 54, 54, 0.9)') + gaugeStep.withValue(90),
.addThresholdStep('rgba(245, 54, 54, 0.9)', 90) ])
.setFieldConfig(max=100, min=0, unit='percent') + gaugePanel.standardOptions.withMax(100)
+ { + gaugePanel.standardOptions.withMin(0)
span: 3, + gaugePanel.standardOptions.withUnit('percent'),
},
local memoryGauge = local memoryGauge =
if platform == 'Linux' then if platform == 'Linux' then
memoryGaugePanelPrototype memoryGaugePanelPrototype
+ gaugePanel.queryOptions.withTargets([
.addTarget(prometheus.target( prometheus.new(
'$datasource',
||| |||
100 - 100 -
( (
@ -188,11 +217,14 @@ local table = grafana70.panel.table;
* 100 * 100
) )
||| % config, ||| % config,
)) ),
])
else if platform == 'Darwin' then else if platform == 'Darwin' then
memoryGaugePanelPrototype memoryGaugePanelPrototype
.addTarget(prometheus.target( + gaugePanel.queryOptions.withTargets([
prometheus.new(
'$datasource',
||| |||
( (
( (
@ -206,10 +238,14 @@ local table = grafana70.panel.table;
* *
100 100
||| % config ||| % config
)) ),
])
else if platform == 'AIX' then else if platform == 'AIX' then
memoryGaugePanelPrototype memoryGaugePanelPrototype
.addTarget(prometheus.target( + gaugePanel.queryOptions.withTargets([
prometheus.new(
'$datasource',
||| |||
100 - 100 -
( (
@ -218,156 +254,94 @@ local table = grafana70.panel.table;
* 100 * 100
) )
||| % config ||| % config
)), ),
]),
local diskIO = local diskIO =
graphPanel.new( timeSeriesPanel.new('Disk I/O')
'Disk I/O', + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable)
datasource='$datasource', + tsStandardOptions.withMin(0)
span=6, + tsCustom.withFillOpacity(0)
min=0, + tsCustom.withShowPoints('never')
fill=0, + tsOptions.tooltip.withMode('multi')
) + tsQueryOptions.withTargets([
// TODO: Does it make sense to have those three in the same panel? // TODO: Does it make sense to have those three in the same panel?
.addTarget(prometheus.target( prometheus.new('$datasource', 'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config)
'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config, + prometheus.withLegendFormat('{{device}} read')
legendFormat='{{device}} read', + prometheus.withIntervalFactor(1),
intervalFactor=1, prometheus.new('$datasource', 'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config)
)) + prometheus.withLegendFormat('{{device}} written')
.addTarget(prometheus.target( + prometheus.withIntervalFactor(1),
'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config, prometheus.new('$datasource', 'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config)
legendFormat='{{device}} written', + prometheus.withLegendFormat('{{device}} io time')
intervalFactor=1, + prometheus.withIntervalFactor(1),
)) ])
.addTarget(prometheus.target( + tsStandardOptions.withOverrides(
'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(diskDeviceSelector)s}[$__rate_interval])' % config, [
legendFormat='{{device}} io time', tsStandardOptions.override.byRegexp.new('/ read| written/')
intervalFactor=1, + tsStandardOptions.override.byRegexp.withPropertiesFromOptions(
)) + tsStandardOptions.withUnit('Bps')
{ ),
seriesOverrides: [ tsStandardOptions.override.byRegexp.new('/ io time/')
{ + tsStandardOptions.override.byRegexp.withPropertiesFromOptions(tsStandardOptions.withUnit('percentunit')),
alias: '/ read| written/', ]
yaxis: 1, ),
},
{
alias: '/ io time/',
yaxis: 2,
},
],
yaxes: [
self.yaxe(format='Bps'),
self.yaxe(format='percentunit'),
],
},
local diskSpaceUsage = local diskSpaceUsage =
table.new( table.new('Disk Space Usage')
title='Disk Space Usage', + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable)
datasource='$datasource', + table.standardOptions.withUnit('decbytes')
) + table.standardOptions.thresholds.withSteps(
.setFieldConfig(unit='decbytes') [
.addThresholdStep(color='green', value=null) tableStep.withColor('green'),
.addThresholdStep(color='yellow', value=0.8) tableStep.withColor('yellow') + gaugeStep.withValue(0.8),
.addThresholdStep(color='red', value=0.9) tableStep.withColor('red') + gaugeStep.withValue(0.9),
.addTarget(prometheus.target(
|||
max by (mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s})
||| % config,
legendFormat='',
instant=true,
format='table'
))
.addTarget(prometheus.target(
|||
max by (mountpoint) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s})
||| % config,
legendFormat='',
instant=true,
format='table'
))
.addOverride(
matcher={
id: 'byName',
options: 'Mounted on',
},
properties=[
{
id: 'custom.width',
value: 260,
},
],
)
.addOverride(
matcher={
id: 'byName',
options: 'Size',
},
properties=[
{
id: 'custom.width',
value: 93,
},
],
)
.addOverride(
matcher={
id: 'byName',
options: 'Used',
},
properties=[
{
id: 'custom.width',
value: 72,
},
],
)
.addOverride(
matcher={
id: 'byName',
options: 'Available',
},
properties=[
{
id: 'custom.width',
value: 88,
},
],
)
.addOverride(
matcher={
id: 'byName',
options: 'Used, %',
},
properties=[
{
id: 'unit',
value: 'percentunit',
},
{
id: 'custom.displayMode',
value: 'gradient-gauge',
},
{
id: 'max',
value: 1,
},
{
id: 'min',
value: 0,
},
] ]
) )
+ { span: 6 } + table.queryOptions.withTargets([
+ { prometheus.new(
transformations: [ '$datasource',
|||
max by (mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s})
||| % config
)
+ prometheus.withLegendFormat('')
+ prometheus.withInstant()
+ prometheus.withFormat('table'),
prometheus.new(
'$datasource',
|||
max by (mountpoint) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", %(fsSelector)s, %(fsMountpointSelector)s})
||| % config
)
+ prometheus.withLegendFormat('')
+ prometheus.withInstant()
+ prometheus.withFormat('table'),
])
+ table.standardOptions.withOverrides([
tableOverride.byName.new('Mounted on')
+ tableOverride.byName.withProperty('custom.width', 260),
tableOverride.byName.new('Size')
+ tableOverride.byName.withProperty('custom.width', 93),
tableOverride.byName.new('Used')
+ tableOverride.byName.withProperty('custom.width', 72),
tableOverride.byName.new('Available')
+ tableOverride.byName.withProperty('custom.width', 88),
tableOverride.byName.new('Used, %')
+ tableOverride.byName.withProperty('unit', 'percentunit')
+ tableOverride.byName.withPropertiesFromOptions(
table.fieldConfig.defaults.custom.withCellOptions(
{ type: 'gauge' },
)
)
+ tableOverride.byName.withProperty('max', 1)
+ tableOverride.byName.withProperty('min', 0),
])
+ table.queryOptions.withTransformations([
tableTransformation.withId('groupBy')
+ tableTransformation.withOptions(
{ {
id: 'groupBy',
options: {
fields: { fields: {
'Value #A': { 'Value #A': {
aggregations: [ aggregations: [
@ -386,15 +360,12 @@ local table = grafana70.panel.table;
operation: 'groupby', operation: 'groupby',
}, },
}, },
}, }
}, ),
tableTransformation.withId('merge'),
tableTransformation.withId('calculateField')
+ tableTransformation.withOptions(
{ {
id: 'merge',
options: {},
},
{
id: 'calculateField',
options: {
alias: 'Used', alias: 'Used',
binary: { binary: {
left: 'Value #A (lastNotNull)', left: 'Value #A (lastNotNull)',
@ -406,11 +377,11 @@ local table = grafana70.panel.table;
reduce: { reduce: {
reducer: 'sum', reducer: 'sum',
}, },
}, }
}, ),
tableTransformation.withId('calculateField')
+ tableTransformation.withOptions(
{ {
id: 'calculateField',
options: {
alias: 'Used, %', alias: 'Used, %',
binary: { binary: {
left: 'Used', left: 'Used',
@ -422,11 +393,11 @@ local table = grafana70.panel.table;
reduce: { reduce: {
reducer: 'sum', reducer: 'sum',
}, },
}, }
}, ),
tableTransformation.withId('organize')
+ tableTransformation.withOptions(
{ {
id: 'organize',
options: {
excludeByName: {}, excludeByName: {},
indexByName: {}, indexByName: {},
renameByName: { renameByName: {
@ -434,127 +405,131 @@ local table = grafana70.panel.table;
'Value #B (lastNotNull)': 'Available', 'Value #B (lastNotNull)': 'Available',
mountpoint: 'Mounted on', mountpoint: 'Mounted on',
}, },
}, }
}, ),
tableTransformation.withId('sortBy')
+ tableTransformation.withOptions(
{ {
id: 'sortBy',
options: {
fields: {}, fields: {},
sort: [ sort: [
{ {
field: 'Mounted on', field: 'Mounted on',
}, },
], ],
}, }
}, ),
],
},
]),
local networkReceived = local networkReceived =
graphPanel.new( timeSeriesPanel.new('Network Received')
'Network Received', + timeSeriesPanel.panelOptions.withDescription('Network received (bits/s)')
description='Network received (bits/s)', + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable)
datasource='$datasource', + tsStandardOptions.withUnit('bps')
span=6, + tsStandardOptions.withMin(0)
format='bps', + tsCustom.withFillOpacity(0)
min=0, + tsCustom.withShowPoints('never')
fill=0, + tsOptions.tooltip.withMode('multi')
) + tsQueryOptions.withTargets([
.addTarget(prometheus.target( prometheus.new('$datasource', 'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config)
'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config, + prometheus.withLegendFormat('{{device}}')
legendFormat='{{device}}', + prometheus.withIntervalFactor(1),
intervalFactor=1, ]),
)),
local networkTransmitted = local networkTransmitted =
graphPanel.new( timeSeriesPanel.new('Network Transmitted')
'Network Transmitted', + timeSeriesPanel.panelOptions.withDescription('Network transmitted (bits/s)')
description='Network transmitted (bits/s)', + variable.query.withDatasourceFromVariable(prometheusDatasourceVariable)
datasource='$datasource', + tsStandardOptions.withUnit('bps')
span=6, + tsStandardOptions.withMin(0)
format='bps', + tsCustom.withFillOpacity(0)
min=0, + tsOptions.tooltip.withMode('multi')
fill=0, + tsQueryOptions.withTargets([
) prometheus.new('$datasource', 'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config)
.addTarget(prometheus.target( + prometheus.withLegendFormat('{{device}}')
'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster", device!="lo"}[$__rate_interval]) * 8' % config, + prometheus.withIntervalFactor(1),
legendFormat='{{device}}', ]),
intervalFactor=1,
)),
local cpuRow = local cpuRow =
row.new('CPU') row.new('CPU')
.addPanel(idleCPU) + row.withPanels([
.addPanel(systemLoad), idleCPU,
systemLoad,
]),
local memoryRow = local memoryRow = [
row.new('Memory') row.new('Memory') + row.gridPos.withY(8),
.addPanel(memoryGraph) memoryGraph + row.gridPos.withX(0) + row.gridPos.withY(9) + row.gridPos.withH(7) + row.gridPos.withW(18),
.addPanel(memoryGauge), memoryGauge + row.gridPos.withX(18) + row.gridPos.withY(9) + row.gridPos.withH(7) + row.gridPos.withW(6),
],
local diskRow = local diskRow =
row.new('Disk') row.new('Disk')
.addPanel(diskIO) + row.withPanels([
.addPanel(diskSpaceUsage), diskIO,
diskSpaceUsage,
]),
local networkRow = local networkRow =
row.new('Network') row.new('Network')
.addPanel(networkReceived) + row.withPanels([
.addPanel(networkTransmitted), networkReceived,
networkTransmitted,
]),
local rows = local panels =
[ grafana.util.grid.makeGrid([
cpuRow, cpuRow,
memoryRow, ], panelWidth=12, panelHeight=7)
+ memoryRow
+ grafana.util.grid.makeGrid([
diskRow, diskRow,
networkRow, networkRow,
], ], panelWidth=12, panelHeight=7, startY=18),
local templates = local variables =
[ [
prometheusDatasourceTemplate, prometheusDatasourceVariable,
clusterTemplate, clusterVariable,
instanceTemplate, instanceVariable,
], ],
dashboard: if platform == 'Linux' then dashboard: if platform == 'Linux' then
dashboard.new( dashboard.new(
'%sNodes' % config.dashboardNamePrefix, '%sNodes' % config.dashboardNamePrefix,
time_from='now-1h',
tags=(config.dashboardTags),
timezone='utc',
refresh='30s',
uid=std.md5(uid),
graphTooltip='shared_crosshair'
) )
.addTemplates(templates) + dashboard.time.withFrom('now-1h')
.addRows(rows) + dashboard.withTags(config.dashboardTags)
+ dashboard.withTimezone('utc')
+ dashboard.withRefresh('30s')
+ dashboard.withUid(std.md5(uid))
+ dashboard.graphTooltip.withSharedCrosshair()
+ dashboard.withVariables(variables)
+ dashboard.withPanels(panels)
else if platform == 'Darwin' then else if platform == 'Darwin' then
dashboard.new( dashboard.new(
'%sMacOS' % config.dashboardNamePrefix, '%sMacOS' % config.dashboardNamePrefix,
time_from='now-1h',
tags=(config.dashboardTags),
timezone='utc',
refresh='30s',
uid=std.md5(uid),
graphTooltip='shared_crosshair'
) )
.addTemplates(templates) + dashboard.time.withFrom('now-1h')
.addRows(rows) + dashboard.withTags(config.dashboardTags)
+ dashboard.withTimezone('utc')
+ dashboard.withRefresh('30s')
+ dashboard.withUid(std.md5(uid))
+ dashboard.graphTooltip.withSharedCrosshair()
+ dashboard.withVariables(variables)
+ dashboard.withPanels(panels)
else if platform == 'AIX' then else if platform == 'AIX' then
dashboard.new( dashboard.new(
'%sAIX' % config.dashboardNamePrefix, '%sAIX' % config.dashboardNamePrefix,
time_from='now-1h',
tags=(config.dashboardTags),
timezone='utc',
refresh='30s',
uid=std.md5(uid),
graphTooltip='shared_crosshair'
) )
.addTemplates(templates) + dashboard.time.withFrom('now-1h')
.addRows(rows), + dashboard.withTags(config.dashboardTags)
+ dashboard.withTimezone('utc')
+ dashboard.withRefresh('30s')
+ dashboard.withUid(std.md5(uid))
+ dashboard.graphTooltip.withSharedCrosshair()
+ dashboard.withVariables(variables)
+ dashboard.withPanels(panels),
}, },
} }