Migrate dashboards to new grafonnet library (#3147)

Migrated away from deprecated Grafonnet library. This replaces panels using Angular JS which are disabled by default in Grafana 11 and will be unsupported in Grafana 12.

Fixes #3046

---------

Signed-off-by: Tom <12222103+critchtionary@users.noreply.github.com>
pull/3032/head
Tom 2024-12-19 15:49:22 +00:00 committed by GitHub
parent ff97e35a71
commit d0c1d00d18
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 871 additions and 894 deletions

View File

@ -1,469 +1,480 @@
local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet'; local grafana = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet';
local dashboard = grafana.dashboard; local dashboard = grafana.dashboard;
local row = grafana.row; local variable = dashboard.variable;
local prometheus = grafana.prometheus; local row = grafana.panel.row;
local template = grafana.template; local prometheus = grafana.query.prometheus;
local graphPanel = grafana.graphPanel;
local timeSeriesPanel = grafana.panel.timeSeries;
local tsOptions = timeSeriesPanel.options;
local tsStandardOptions = timeSeriesPanel.standardOptions;
local tsQueryOptions = timeSeriesPanel.queryOptions;
local tsCustom = timeSeriesPanel.fieldConfig.defaults.custom;
local tsLegend = tsOptions.legend;
local c = import '../config.libsonnet'; local c = import '../config.libsonnet';
local datasourceTemplate = { local datasource = variable.datasource.new(
current: { 'datasource', 'prometheus'
text: 'default', );
value: 'default',
}, local tsCommonPanelOptions =
hide: 0, variable.query.withDatasourceFromVariable(datasource)
label: 'Data Source', + tsCustom.stacking.withMode('normal')
name: 'datasource', + tsCustom.withFillOpacity(100)
options: [], + tsCustom.withShowPoints('never')
query: 'prometheus', + tsLegend.withShowLegend(false)
refresh: 1, + tsOptions.tooltip.withMode('multi')
regex: '', + tsOptions.tooltip.withSort('desc');
type: 'datasource',
};
local CPUUtilisation = local CPUUtilisation =
graphPanel.new( timeSeriesPanel.new(
'CPU Utilisation', 'CPU Utilisation',
datasource='$datasource', )
span=6, + tsCommonPanelOptions
format='percentunit', + tsStandardOptions.withUnit('percentunit');
stack=true,
fill=10,
legend_show=false,
) { tooltip+: { sort: 2 } };
local CPUSaturation = local CPUSaturation =
// TODO: Is this a useful panel? At least there should be some explanation how load // TODO: Is this a useful panel? At least there should be some explanation how load
// average relates to the "CPU saturation" in the title. // average relates to the "CPU saturation" in the title.
graphPanel.new( timeSeriesPanel.new(
'CPU Saturation (Load1 per CPU)', 'CPU Saturation (Load1 per CPU)',
datasource='$datasource', )
span=6, + tsCommonPanelOptions
format='percentunit', + tsStandardOptions.withUnit('percentunit');
stack=true,
fill=10,
legend_show=false,
) { tooltip+: { sort: 2 } };
local memoryUtilisation = local memoryUtilisation =
graphPanel.new( timeSeriesPanel.new(
'Memory Utilisation', 'Memory Utilisation',
datasource='$datasource', )
span=6, + tsCommonPanelOptions
format='percentunit', + tsStandardOptions.withUnit('percentunit');
stack=true,
fill=10,
legend_show=false,
) { tooltip+: { sort: 2 } };
local memorySaturation = local memorySaturation =
graphPanel.new( timeSeriesPanel.new(
'Memory Saturation (Major Page Faults)', 'Memory Saturation (Major Page Faults)',
datasource='$datasource', )
span=6, + tsCommonPanelOptions
format='rds', + tsStandardOptions.withUnit('rds');
stack=true,
fill=10, local networkOverrides = tsStandardOptions.withOverrides(
legend_show=false, [
) { tooltip+: { sort: 2 } }; tsStandardOptions.override.byRegexp.new('/Transmit/')
+ tsStandardOptions.override.byRegexp.withPropertiesFromOptions(
tsCustom.withTransform('negative-Y')
),
]
);
local networkUtilisation = local networkUtilisation =
graphPanel.new( timeSeriesPanel.new(
'Network Utilisation (Bytes Receive/Transmit)', 'Network Utilisation (Bytes Receive/Transmit)',
datasource='$datasource',
span=6,
format='Bps',
stack=true,
fill=10,
legend_show=false,
) )
.addSeriesOverride({ alias: '/Receive/', stack: 'A' }) + tsCommonPanelOptions
.addSeriesOverride({ alias: '/Transmit/', stack: 'B', transform: 'negative-Y' }) + tsStandardOptions.withUnit('Bps')
{ tooltip+: { sort: 2 } }; + networkOverrides;
local networkSaturation = local networkSaturation =
graphPanel.new( timeSeriesPanel.new(
'Network Saturation (Drops Receive/Transmit)', 'Network Saturation (Drops Receive/Transmit)',
datasource='$datasource',
span=6,
format='Bps',
stack=true,
fill=10,
legend_show=false,
) )
.addSeriesOverride({ alias: '/ Receive/', stack: 'A' }) + tsCommonPanelOptions
.addSeriesOverride({ alias: '/ Transmit/', stack: 'B', transform: 'negative-Y' }) + tsStandardOptions.withUnit('Bps')
{ tooltip+: { sort: 2 } }; + networkOverrides;
local diskIOUtilisation = local diskIOUtilisation =
graphPanel.new( timeSeriesPanel.new(
'Disk IO Utilisation', 'Disk IO Utilisation',
datasource='$datasource', )
span=6, + tsCommonPanelOptions
format='percentunit', + tsStandardOptions.withUnit('percentunit');
stack=true,
fill=10,
legend_show=false,
) { tooltip+: { sort: 2 } };
local diskIOSaturation = local diskIOSaturation =
graphPanel.new( timeSeriesPanel.new(
'Disk IO Saturation', 'Disk IO Saturation',
datasource='$datasource', )
span=6, + tsCommonPanelOptions
format='percentunit', + tsStandardOptions.withUnit('percentunit');
stack=true,
fill=10,
legend_show=false,
) { tooltip+: { sort: 2 } };
local diskSpaceUtilisation = local diskSpaceUtilisation =
graphPanel.new( timeSeriesPanel.new(
'Disk Space Utilisation', 'Disk Space Utilisation',
datasource='$datasource', )
span=12, + tsCommonPanelOptions
format='percentunit', + tsStandardOptions.withUnit('percentunit');
stack=true,
fill=10,
legend_show=false,
) { tooltip+: { sort: 2 } };
{ {
_clusterTemplate:: template.new( _clusterVariable::
name='cluster', variable.query.new('cluster')
datasource='$datasource', + variable.query.withDatasourceFromVariable(datasource)
query='label_values(node_time_seconds, %s)' % $._config.clusterLabel, + variable.query.queryTypes.withLabelValues(
current='', $._config.clusterLabel,
hide=if $._config.showMultiCluster then '' else '2', 'node_time_seconds',
refresh=2, )
includeAll=false, + (if $._config.showMultiCluster then variable.query.generalOptions.showOnDashboard.withLabelAndValue() else variable.query.generalOptions.showOnDashboard.withNothing())
sort=1 + variable.query.refresh.onTime()
), + variable.query.selectionOptions.withIncludeAll(false)
+ variable.query.withSort(asc=true),
grafanaDashboards+:: { grafanaDashboards+:: {
'node-rsrc-use.json': 'node-rsrc-use.json':
dashboard.new( dashboard.new(
'%sUSE Method / Node' % $._config.dashboardNamePrefix, '%sUSE Method / Node' % $._config.dashboardNamePrefix,
time_from='now-1h',
tags=($._config.dashboardTags),
timezone='utc',
refresh='30s',
graphTooltip='shared_crosshair',
uid=std.md5('node-rsrc-use.json')
) )
.addTemplate(datasourceTemplate) + dashboard.time.withFrom('now-1h')
.addTemplate($._clusterTemplate) + dashboard.withTags($._config.dashboardTags)
.addTemplate( + dashboard.withTimezone('utc')
template.new( + dashboard.withRefresh('30s')
+ dashboard.graphTooltip.withSharedCrosshair()
+ dashboard.withUid(std.md5('node-rsrc-use.json'))
+ dashboard.withVariables([
datasource,
$._clusterVariable,
variable.query.new('instance')
+ variable.query.withDatasourceFromVariable(datasource)
+ variable.query.queryTypes.withLabelValues(
'instance', 'instance',
'$datasource', 'node_exporter_build_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}' % $._config,
'label_values(node_exporter_build_info{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}, instance)' % $._config,
refresh='time',
sort=1
)
)
.addRow(
row.new('CPU')
.addPanel(CPUUtilisation.addTarget(prometheus.target('instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Utilisation')))
.addPanel(CPUSaturation.addTarget(prometheus.target('instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Saturation')))
)
.addRow(
row.new('Memory')
.addPanel(memoryUtilisation.addTarget(prometheus.target('instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Utilisation')))
.addPanel(memorySaturation.addTarget(prometheus.target('instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Major page Faults')))
)
.addRow(
row.new('Network')
.addPanel(
networkUtilisation
.addTarget(prometheus.target('instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Receive'))
.addTarget(prometheus.target('instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Transmit'))
)
.addPanel(
networkSaturation
.addTarget(prometheus.target('instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Receive'))
.addTarget(prometheus.target('instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='Transmit'))
)
)
.addRow(
row.new('Disk IO')
.addPanel(diskIOUtilisation.addTarget(prometheus.target('instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{device}}')))
.addPanel(diskIOSaturation.addTarget(prometheus.target('instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{device}}')))
)
.addRow(
row.new('Disk Space')
.addPanel(
diskSpaceUtilisation.addTarget(prometheus.target(
|||
sort_desc(1 -
(
max without (mountpoint, fstype) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, fstype!="", instance="$instance", %(clusterLabel)s="$cluster"})
/
max without (mountpoint, fstype) (node_filesystem_size_bytes{%(nodeExporterSelector)s, fstype!="", instance="$instance", %(clusterLabel)s="$cluster"})
) != 0
)
||| % $._config, legendFormat='{{device}}'
))
) )
+ variable.query.refresh.onTime()
+ variable.query.withSort(asc=true),
])
+ dashboard.withPanels(
grafana.util.grid.makeGrid([
row.new('CPU')
+ row.withPanels([
CPUUtilisation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Utilisation')]),
CPUSaturation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Saturation')]),
]),
row.new('Memory')
+ row.withPanels([
memoryUtilisation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Utilisation')]),
memorySaturation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Major page Faults')]),
]),
row.new('Network')
+ row.withPanels([
networkUtilisation + tsQueryOptions.withTargets([
prometheus.new('$datasource', 'instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Receive'),
prometheus.new('$datasource', 'instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Transmit'),
]),
networkSaturation + tsQueryOptions.withTargets([
prometheus.new('$datasource', 'instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Receive'),
prometheus.new('$datasource', 'instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('Transmit'),
]),
]),
row.new('Disk IO')
+ row.withPanels([
diskIOUtilisation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('{{device}}')]),
diskIOSaturation + tsQueryOptions.withTargets([prometheus.new('$datasource', 'instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance", %(clusterLabel)s="$cluster"} != 0' % $._config) + prometheus.withLegendFormat('{{device}}')]),
]),
], panelWidth=12, panelHeight=7)
+ grafana.util.grid.makeGrid([
row.new('Disk Space')
+ row.withPanels([
diskSpaceUtilisation + tsQueryOptions.withTargets([
prometheus.new(
'$datasource',
|||
sort_desc(1 -
(
max without (mountpoint, fstype) (node_filesystem_avail_bytes{%(nodeExporterSelector)s, fstype!="", instance="$instance", %(clusterLabel)s="$cluster"})
/
max without (mountpoint, fstype) (node_filesystem_size_bytes{%(nodeExporterSelector)s, fstype!="", instance="$instance", %(clusterLabel)s="$cluster"})
) != 0
)
||| % $._config
) + prometheus.withLegendFormat('{{device}}'),
]),
]),
], panelWidth=24, panelHeight=7, startY=34),
), ),
'node-cluster-rsrc-use.json': 'node-cluster-rsrc-use.json':
dashboard.new( dashboard.new(
'%sUSE Method / Cluster' % $._config.dashboardNamePrefix, '%sUSE Method / Cluster' % $._config.dashboardNamePrefix,
time_from='now-1h',
tags=($._config.dashboardTags),
timezone='utc',
refresh='30s',
graphTooltip='shared_crosshair',
uid=std.md5('node-cluster-rsrc-use.json')
) )
.addTemplate(datasourceTemplate) + dashboard.time.withFrom('now-1h')
.addTemplate($._clusterTemplate) + dashboard.withTags($._config.dashboardTags)
.addRow( + dashboard.withTimezone('utc')
row.new('CPU') + dashboard.withRefresh('30s')
.addPanel( + dashboard.graphTooltip.withSharedCrosshair()
CPUUtilisation + dashboard.withUid(std.md5('node-cluster-rsrc-use.json'))
.addTarget(prometheus.target( + dashboard.withVariables([
||| datasource,
(( $._clusterVariable,
instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} variable.query.withDatasourceFromVariable(datasource)
* + variable.query.refresh.onTime()
instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} + variable.query.withSort(asc=true),
) != 0 ) ])
/ scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) + dashboard.withPanels(
||| % $._config, legendFormat='{{ instance }}' grafana.util.grid.makeGrid([
)) row.new('CPU')
) + row.withPanels([
.addPanel( CPUUtilisation + tsQueryOptions.withTargets([
CPUSaturation prometheus.new(
.addTarget(prometheus.target( '$datasource',
||| |||
( ((
instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}
/ scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) *
) != 0 instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}
||| % $._config, legendFormat='{{instance}}' ) != 0 )
)) / scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}))
) ||| % $._config
) ) + prometheus.withLegendFormat('{{ instance }}'),
.addRow( ]),
row.new('Memory') CPUSaturation + tsQueryOptions.withTargets([
.addPanel( prometheus.new(
memoryUtilisation '$datasource',
.addTarget(prometheus.target( |||
||| (
( instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}
instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} / scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}))
/ scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) ) != 0
) != 0 ||| % $._config
||| % $._config, legendFormat='{{instance}}', ) + prometheus.withLegendFormat('{{ instance }}'),
)) ]),
) ]),
.addPanel(memorySaturation.addTarget(prometheus.target('instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}' % $._config, legendFormat='{{instance}}'))) row.new('Memory')
) + row.withPanels([
.addRow( memoryUtilisation + tsQueryOptions.withTargets([
row.new('Network') prometheus.new(
.addPanel( '$datasource',
networkUtilisation |||
.addTarget(prometheus.target('instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{instance}} Receive')) (
.addTarget(prometheus.target('instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{instance}} Transmit')) instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}
) / scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}))
.addPanel( ) != 0
networkSaturation ||| % $._config
.addTarget(prometheus.target('instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{instance}} Receive')) ) + prometheus.withLegendFormat('{{ instance }}'),
.addTarget(prometheus.target('instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config, legendFormat='{{instance}} Transmit')) ]),
) memorySaturation + tsQueryOptions.withTargets([
) prometheus.new(
.addRow( '$datasource',
row.new('Disk IO') 'instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}' % $._config
.addPanel( ) + prometheus.withLegendFormat('{{ instance }}'),
diskIOUtilisation ]),
.addTarget(prometheus.target( ]),
||| row.new('Network')
( + row.withPanels([
instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} networkUtilisation + tsQueryOptions.withTargets([
/ scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) prometheus.new(
) != 0 '$datasource',
||| % $._config, legendFormat='{{instance}} {{device}}' 'instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config
)) ) + prometheus.withLegendFormat('{{ instance }} Receive'),
) prometheus.new(
.addPanel( '$datasource',
diskIOSaturation 'instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config
.addTarget(prometheus.target( ) + prometheus.withLegendFormat('{{ instance }} Transmit'),
||| ]),
( networkSaturation + tsQueryOptions.withTargets([
prometheus.new(
'$datasource',
'instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config
) + prometheus.withLegendFormat('{{ instance }} Receive'),
prometheus.new(
'$datasource',
'instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} != 0' % $._config
) + prometheus.withLegendFormat('{{ instance }} Transmit'),
]),
]),
row.new('Disk IO')
+ row.withPanels([
diskIOUtilisation + tsQueryOptions.withTargets([
prometheus.new(
'$datasource',
|||
instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}
/ scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}))
||| % $._config
) + prometheus.withLegendFormat('{{ instance }} {{device}}'),
]),
diskIOSaturation + tsQueryOptions.withTargets([prometheus.new(
'$datasource',
|||
instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"} instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}
/ scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"})) / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, %(clusterLabel)s="$cluster"}))
) != 0 ||| % $._config
||| % $._config, legendFormat='{{instance}} {{device}}' ) + prometheus.withLegendFormat('{{ instance }} {{device}}')]),
)) ]),
) ], panelWidth=12, panelHeight=7)
) + grafana.util.grid.makeGrid([
.addRow( row.new('Disk Space')
row.new('Disk Space') + row.withPanels([
.addPanel( diskSpaceUtilisation + tsQueryOptions.withTargets([
diskSpaceUtilisation prometheus.new(
.addTarget(prometheus.target( '$datasource',
||| |||
sum without (device) ( sum without (device) (
max without (fstype, mountpoint) (( max without (fstype, mountpoint) ((
node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"} node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"}
- -
node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"} node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"}
) != 0) ) != 0)
) )
/ scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"}))) / scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s, %(clusterLabel)s="$cluster"})))
||| % $._config, legendFormat='{{instance}}' ||| % $._config
)) ) + prometheus.withLegendFormat('{{ instance }}'),
) ]),
]),
], panelWidth=24, panelHeight=7, startY=34),
), ),
} + } +
if $._config.showMultiCluster then { if $._config.showMultiCluster then {
'node-multicluster-rsrc-use.json': 'node-multicluster-rsrc-use.json':
dashboard.new( dashboard.new(
'%sUSE Method / Multi-cluster' % $._config.dashboardNamePrefix, '%sUSE Method / Multi-cluster' % $._config.dashboardNamePrefix,
time_from='now-1h',
tags=($._config.dashboardTags),
timezone='utc',
refresh='30s',
graphTooltip='shared_crosshair',
uid=std.md5('node-multicluster-rsrc-use.json')
) )
.addTemplate(datasourceTemplate) + dashboard.time.withFrom('now-1h')
.addRow( + dashboard.withTags($._config.dashboardTags)
row.new('CPU') + dashboard.withTimezone('utc')
.addPanel( + dashboard.withRefresh('30s')
CPUUtilisation + dashboard.graphTooltip.withSharedCrosshair()
.addTarget(prometheus.target( + dashboard.withUid(std.md5('node-multicluster-rsrc-use.json'))
||| + dashboard.withVariables([
sum( datasource,
(( variable.query.withDatasourceFromVariable(datasource)
instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s} + variable.query.refresh.onTime()
* + variable.query.withSort(asc=true),
instance:node_num_cpu:sum{%(nodeExporterSelector)s} ])
) != 0) + dashboard.withPanels(
/ scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s})) grafana.util.grid.makeGrid([
) by (%(clusterLabel)s) row.new('CPU')
||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config + row.withPanels([
)) CPUUtilisation + tsQueryOptions.withTargets([
) prometheus.new(
.addPanel( '$datasource',
CPUSaturation |||
.addTarget(prometheus.target( sum(
||| ((
sum(( instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s}
instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s} *
/ scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s})) instance:node_num_cpu:sum{%(nodeExporterSelector)s}
) != 0) by (%(clusterLabel)s) ) != 0)
||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config / scalar(sum(instance:node_num_cpu:sum{%(nodeExporterSelector)s}))
)) ) by (%(clusterLabel)s)
) ||| % $._config
) ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'),
.addRow( ]),
row.new('Memory') CPUSaturation + tsQueryOptions.withTargets([
.addPanel( prometheus.new(
memoryUtilisation '$datasource',
.addTarget(prometheus.target( |||
||| sum((
sum(( instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s}
instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s} / scalar(count(instance:node_load1_per_cpu:ratio{%(nodeExporterSelector)s}))
/ scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s})) ) != 0) by (%(clusterLabel)s)
) != 0) by (%(clusterLabel)s) ||| % $._config
||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'),
)) ]),
) ]),
.addPanel( row.new('Memory')
memorySaturation + row.withPanels([
.addTarget(prometheus.target( memoryUtilisation + tsQueryOptions.withTargets([
||| prometheus.new(
sum(( '$datasource',
instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s} |||
) != 0) by (%(clusterLabel)s) sum((
||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s}
)) / scalar(count(instance:node_memory_utilisation:ratio{%(nodeExporterSelector)s}))
) ) != 0) by (%(clusterLabel)s)
) ||| % $._config
.addRow( ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'),
row.new('Network') ]),
.addPanel( memorySaturation + tsQueryOptions.withTargets([
networkUtilisation prometheus.new(
.addTarget(prometheus.target( '$datasource',
||| |||
sum(( sum((
instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s}
) != 0) by (%(clusterLabel)s) ) != 0) by (%(clusterLabel)s)
||| % $._config, legendFormat='{{%(clusterLabel)s}} Receive' % $._config |||
)) % $._config
.addTarget(prometheus.target( ) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'),
||| ]),
sum(( ]),
instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} row.new('Network')
) != 0) by (%(clusterLabel)s) + row.withPanels([
||| % $._config, legendFormat='{{%(clusterLabel)s}} Transmit' % $._config networkUtilisation + tsQueryOptions.withTargets([
)) prometheus.new(
) '$datasource',
.addPanel( |||
networkSaturation sum((
.addTarget(prometheus.target( instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s}
||| ) != 0) by (%(clusterLabel)s)
sum(( ||| % $._config
instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Receive'),
) != 0) by (%(clusterLabel)s) prometheus.new(
||| % $._config, legendFormat='{{%(clusterLabel)s}} Receive' % $._config '$datasource',
)) |||
.addTarget(prometheus.target( sum((
||| instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s}
sum(( ) != 0) by (%(clusterLabel)s)
instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s} ||| % $._config
) != 0) by (%(clusterLabel)s) ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Transmit'),
||| % $._config, legendFormat='{{%(clusterLabel)s}} Transmit' % $._config ]),
)) networkSaturation + tsQueryOptions.withTargets([
) prometheus.new(
) '$datasource',
.addRow( |||
row.new('Disk IO') sum((
.addPanel( instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s}
diskIOUtilisation ) != 0) by (%(clusterLabel)s)
.addTarget(prometheus.target( ||| % $._config
||| ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Receive'),
sum(( prometheus.new(
instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s} '$datasource',
/ scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s})) |||
) != 0) by (%(clusterLabel)s, device) sum((
||| % $._config, legendFormat='{{%(clusterLabel)s}} {{device}}' % $._config instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s}
)) ) != 0) by (%(clusterLabel)s)
) ||| % $._config
.addPanel( ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} Transmit'),
diskIOSaturation ]),
.addTarget(prometheus.target( ]),
||| row.new('Disk IO')
sum(( + row.withPanels([
instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s} diskIOUtilisation + tsQueryOptions.withTargets([
/ scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s})) prometheus.new(
) != 0) by (%(clusterLabel)s, device) '$datasource',
||| % $._config, legendFormat='{{%(clusterLabel)s}} {{device}}' % $._config |||
)) sum((
) instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s}
) / scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s}))
.addRow( ) != 0) by (%(clusterLabel)s, device)
row.new('Disk Space') ||| % $._config
.addPanel( ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} {{device}}'),
diskSpaceUtilisation ]),
.addTarget(prometheus.target( diskIOSaturation + tsQueryOptions.withTargets([prometheus.new(
||| '$datasource',
sum ( |||
sum without (device) ( sum((
max without (fstype, mountpoint, instance, pod) (( instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s}
node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s} - node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s} / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s}))
) != 0) ) != 0) by (%(clusterLabel)s, device)
) ||| % $._config
/ scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s}))) ) + prometheus.withLegendFormat('{{%(clusterLabel)s}} {{device}}')]),
) by (%(clusterLabel)s) ]),
||| % $._config, legendFormat='{{%(clusterLabel)s}}' % $._config
)) ], panelWidth=12, panelHeight=7)
) + grafana.util.grid.makeGrid([
row.new('Disk Space')
+ row.withPanels([
diskSpaceUtilisation + tsQueryOptions.withTargets([
prometheus.new(
'$datasource',
|||
sum (
sum without (device) (
max without (fstype, mountpoint, instance, pod) ((
node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s} - node_filesystem_avail_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s}
) != 0)
)
/ scalar(sum(max without (fstype, mountpoint) (node_filesystem_size_bytes{%(nodeExporterSelector)s, %(fsSelector)s, %(fsMountpointSelector)s})))
) by (%(clusterLabel)s)
||| % $._config
) + prometheus.withLegendFormat('{{%(clusterLabel)s}}'),
]),
]),
], panelWidth=24, panelHeight=7, startY=34),
), ),
} else {}, } else {},
} }

View File

@ -4,20 +4,11 @@
{ {
"source": { "source": {
"git": { "git": {
"remote": "https://github.com/grafana/grafonnet-lib.git", "remote": "https://github.com/grafana/grafonnet.git",
"subdir": "grafonnet" "subdir": "gen/grafonnet-latest"
} }
}, },
"version": "master" "version": "main"
},
{
"source": {
"git": {
"remote": "https://github.com/grafana/grafonnet-lib.git",
"subdir": "grafonnet-7.0"
}
},
"version": "master"
} }
], ],
"legacyImports": false "legacyImports": false

File diff suppressed because it is too large Load Diff