mirror of https://github.com/prometheus/prometheus
Merge pull request #6119 from cstyan/rw-dashboard-shards
Add additional shards/segment graphs to remote write dashboard.pull/6194/head
commit
b5e603ceb8
|
@ -1,5 +1,12 @@
|
||||||
local g = import 'grafana-builder/grafana.libsonnet';
|
local g = import 'grafana-builder/grafana.libsonnet';
|
||||||
|
local grafana = import 'grafonnet/grafana.libsonnet';
|
||||||
|
local dashboard = grafana.dashboard;
|
||||||
|
local row = grafana.row;
|
||||||
|
local singlestat = grafana.singlestat;
|
||||||
|
local prometheus = grafana.prometheus;
|
||||||
|
local graphPanel = grafana.graphPanel;
|
||||||
|
local tablePanel = grafana.tablePanel;
|
||||||
|
local template = grafana.template;
|
||||||
{
|
{
|
||||||
grafanaDashboards+:: {
|
grafanaDashboards+:: {
|
||||||
'prometheus.json':
|
'prometheus.json':
|
||||||
|
@ -92,57 +99,270 @@ local g = import 'grafana-builder/grafana.libsonnet';
|
||||||
),
|
),
|
||||||
// Remote write specific dashboard.
|
// Remote write specific dashboard.
|
||||||
'prometheus-remote-write.json':
|
'prometheus-remote-write.json':
|
||||||
g.dashboard('Prometheus Remote Write')
|
local timestampComparison =
|
||||||
.addMultiTemplate('instance', 'prometheus_build_info', 'instance')
|
graphPanel.new(
|
||||||
.addMultiTemplate('cluster', 'kube_pod_container_info{image=~".*prometheus.*"}', 'cluster')
|
'Highest Timestamp In vs. Highest Timestamp Sent',
|
||||||
.addRow(
|
datasource='$datasource',
|
||||||
g.row('Timestamps')
|
span=6,
|
||||||
.addPanel(
|
|
||||||
g.panel('Highest Timestamp In vs. Highest Timestamp Sent') +
|
|
||||||
g.queryPanel('prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~"$cluster", instance=~"$instance"} - ignoring(queue) group_right(instance) prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~"$cluster", instance=~"$instance"}', '{{cluster}}:{{instance}}-{{queue}}') +
|
|
||||||
{ yaxes: g.yaxes('s') }
|
|
||||||
)
|
)
|
||||||
.addPanel(
|
.addTarget(prometheus.target(
|
||||||
g.panel('Rate[5m]') +
|
|||
|
||||||
g.queryPanel('rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~"$cluster", instance=~"$instance"}[5m]) - ignoring (queue) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~"$cluster", instance=~"$instance"}[5m])', '{{cluster}}:{{instance}}-{{queue}}')
|
(
|
||||||
|
prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~"$cluster", instance=~"$instance"}
|
||||||
|
-
|
||||||
|
ignoring(queue) group_right(instance) prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~"$cluster", instance=~"$instance"}
|
||||||
|
)
|
||||||
|
|||,
|
||||||
|
legendFormat='{{cluster}}:{{instance}}-{{queue}}',
|
||||||
|
));
|
||||||
|
|
||||||
|
local timestampComparisonRate =
|
||||||
|
graphPanel.new(
|
||||||
|
'Rate[5m]',
|
||||||
|
datasource='$datasource',
|
||||||
|
span=6,
|
||||||
|
)
|
||||||
|
.addTarget(prometheus.target(
|
||||||
|
|||
|
||||||
|
(
|
||||||
|
rate(prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~"$cluster", instance=~"$instance"}[5m])
|
||||||
|
-
|
||||||
|
ignoring (queue) group_right(instance) rate(prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~"$cluster", instance=~"$instance"}[5m])
|
||||||
|
)
|
||||||
|
|||,
|
||||||
|
legendFormat='{{cluster}}:{{instance}}-{{queue}}',
|
||||||
|
));
|
||||||
|
|
||||||
|
local samplesRate =
|
||||||
|
graphPanel.new(
|
||||||
|
'Rate, in vs. succeeded or dropped [5m]',
|
||||||
|
datasource='$datasource',
|
||||||
|
span=12,
|
||||||
|
)
|
||||||
|
.addTarget(prometheus.target(
|
||||||
|
|||
|
||||||
|
rate(
|
||||||
|
prometheus_remote_storage_samples_in_total{cluster=~"$cluster", instance=~"$instance"}[5m])
|
||||||
|
-
|
||||||
|
ignoring(queue) group_right(instance) rate(prometheus_remote_storage_succeeded_samples_total{cluster=~"$cluster", instance=~"$instance"}[5m])
|
||||||
|
-
|
||||||
|
rate(prometheus_remote_storage_dropped_samples_total{cluster=~"$cluster", instance=~"$instance"}[5m])
|
||||||
|
|||,
|
||||||
|
legendFormat='{{cluster}}:{{instance}}-{{queue}}'
|
||||||
|
));
|
||||||
|
|
||||||
|
local shardsQueries =
|
||||||
|
graphPanel.new(
|
||||||
|
'Shards: $queue',
|
||||||
|
datasource='$datasource',
|
||||||
|
span=12,
|
||||||
|
min_span=6,
|
||||||
|
repeat='queue'
|
||||||
|
)
|
||||||
|
.addTarget(prometheus.target(
|
||||||
|
'prometheus_remote_storage_shards_max{cluster=~"$cluster", instance=~"$instance"}',
|
||||||
|
legendFormat='max_shards:{{queue}}'
|
||||||
|
))
|
||||||
|
.addTarget(prometheus.target(
|
||||||
|
'prometheus_remote_storage_shards_min{cluster=~"$cluster", instance=~"$instance"}',
|
||||||
|
legendFormat='min_shards:{{queue}}'
|
||||||
|
))
|
||||||
|
.addTarget(prometheus.target(
|
||||||
|
'prometheus_remote_storage_shards_desired{cluster=~"$cluster", instance=~"$instance"}',
|
||||||
|
legendFormat='desired_shards:{{queue}}'
|
||||||
|
))
|
||||||
|
.addTarget(prometheus.target(
|
||||||
|
'prometheus_remote_storage_shards{cluster=~"$cluster", instance=~"$instance"}',
|
||||||
|
legendFormat='current_shards:{{queue}}'
|
||||||
|
)) +
|
||||||
|
{
|
||||||
|
seriesOverrides: [
|
||||||
|
{
|
||||||
|
alias: '/max_shards/',
|
||||||
|
yaxis: 2,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
local shardsCapacity =
|
||||||
|
graphPanel.new(
|
||||||
|
'Shard Capacity: $queue',
|
||||||
|
datasource='$datasource',
|
||||||
|
span=6,
|
||||||
|
repeat='queue'
|
||||||
|
)
|
||||||
|
.addTarget(prometheus.target(
|
||||||
|
'prometheus_remote_storage_shard_capacity{cluster=~"$cluster", instance=~"$instance"}',
|
||||||
|
legendFormat='{{cluster}}:{{instance}}-{{queue}}'
|
||||||
|
));
|
||||||
|
|
||||||
|
|
||||||
|
local pendingSamples =
|
||||||
|
graphPanel.new(
|
||||||
|
'Pending Samples: $queue',
|
||||||
|
datasource='$datasource',
|
||||||
|
span=6,
|
||||||
|
repeat='queue'
|
||||||
|
)
|
||||||
|
.addTarget(prometheus.target(
|
||||||
|
'prometheus_remote_storage_pending_samples{cluster=~"$cluster", instance=~"$instance"}',
|
||||||
|
legendFormat='{{cluster}}:{{instance}}-{{queue}}'
|
||||||
|
));
|
||||||
|
|
||||||
|
local walSegment =
|
||||||
|
graphPanel.new(
|
||||||
|
'TSDB Current Segment',
|
||||||
|
datasource='$datasource',
|
||||||
|
span=6,
|
||||||
|
formatY1='none',
|
||||||
|
)
|
||||||
|
.addTarget(prometheus.target(
|
||||||
|
'prometheus_tsdb_wal_segment_current{cluster=~"$cluster", instance=~"$instance"}',
|
||||||
|
legendFormat='{{cluster}}:{{instance}}'
|
||||||
|
));
|
||||||
|
|
||||||
|
local queueSegment =
|
||||||
|
graphPanel.new(
|
||||||
|
'Remote Write Current Segment',
|
||||||
|
datasource='$datasource',
|
||||||
|
span=6,
|
||||||
|
formatY1='none',
|
||||||
|
)
|
||||||
|
.addTarget(prometheus.target(
|
||||||
|
'prometheus_wal_watcher_current_segment{cluster=~"$cluster", instance=~"$instance"}',
|
||||||
|
legendFormat='{{cluster}}:{{instance}}-{{queue}}'
|
||||||
|
));
|
||||||
|
|
||||||
|
local droppedSamples =
|
||||||
|
graphPanel.new(
|
||||||
|
'Dropped Samples',
|
||||||
|
datasource='$datasource',
|
||||||
|
span=3,
|
||||||
|
)
|
||||||
|
.addTarget(prometheus.target(
|
||||||
|
'rate(prometheus_remote_storage_dropped_samples_total{cluster=~"$cluster", instance=~"$instance"}[5m])',
|
||||||
|
legendFormat='{{cluster}}:{{instance}}-{{queue}}'
|
||||||
|
));
|
||||||
|
|
||||||
|
local failedSamples =
|
||||||
|
graphPanel.new(
|
||||||
|
'Failed Samples',
|
||||||
|
datasource='$datasource',
|
||||||
|
span=3,
|
||||||
|
)
|
||||||
|
.addTarget(prometheus.target(
|
||||||
|
'rate(prometheus_remote_storage_failed_samples_total{cluster=~"$cluster", instance=~"$instance"}[5m])',
|
||||||
|
legendFormat='{{cluster}}:{{instance}}-{{queue}}'
|
||||||
|
));
|
||||||
|
|
||||||
|
local retriedSamples =
|
||||||
|
graphPanel.new(
|
||||||
|
'Retried Samples',
|
||||||
|
datasource='$datasource',
|
||||||
|
span=3,
|
||||||
|
)
|
||||||
|
.addTarget(prometheus.target(
|
||||||
|
'rate(prometheus_remote_storage_retried_samples_total{cluster=~"$cluster", instance=~"$instance"}[5m])',
|
||||||
|
legendFormat='{{cluster}}:{{instance}}-{{queue}}'
|
||||||
|
));
|
||||||
|
|
||||||
|
local enqueueRetries =
|
||||||
|
graphPanel.new(
|
||||||
|
'Enqueue Retries',
|
||||||
|
datasource='$datasource',
|
||||||
|
span=3,
|
||||||
|
)
|
||||||
|
.addTarget(prometheus.target(
|
||||||
|
'rate(prometheus_remote_storage_enqueue_retries_total{cluster=~"$cluster", instance=~"$instance"}[5m])',
|
||||||
|
legendFormat='{{cluster}}:{{instance}}-{{queue}}'
|
||||||
|
));
|
||||||
|
|
||||||
|
dashboard.new('Prometheus Remote Write',
|
||||||
|
editable=true)
|
||||||
|
.addTemplate(
|
||||||
|
{
|
||||||
|
hide: 0,
|
||||||
|
label: null,
|
||||||
|
name: 'datasource',
|
||||||
|
options: [],
|
||||||
|
query: 'prometheus',
|
||||||
|
refresh: 1,
|
||||||
|
regex: '',
|
||||||
|
type: 'datasource',
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.addTemplate(
|
||||||
|
template.new(
|
||||||
|
'instance',
|
||||||
|
'$datasource',
|
||||||
|
'label_values(prometheus_build_info, instance)' % $._config,
|
||||||
|
refresh='time',
|
||||||
|
current={
|
||||||
|
selected: true,
|
||||||
|
text: 'All',
|
||||||
|
value: '$__all',
|
||||||
|
},
|
||||||
|
includeAll=true,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.addTemplate(
|
||||||
|
template.new(
|
||||||
|
'cluster',
|
||||||
|
'$datasource',
|
||||||
|
'label_values(kube_pod_container_info{image=~".*prometheus.*"}, cluster)' % $._config,
|
||||||
|
refresh='time',
|
||||||
|
current={
|
||||||
|
selected: true,
|
||||||
|
text: 'All',
|
||||||
|
value: '$__all',
|
||||||
|
},
|
||||||
|
includeAll=true,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.addTemplate(
|
||||||
|
template.new(
|
||||||
|
'queue',
|
||||||
|
'$datasource',
|
||||||
|
'label_values(prometheus_remote_storage_shards, queue)' % $._config,
|
||||||
|
refresh='time',
|
||||||
|
current={
|
||||||
|
selected: true,
|
||||||
|
text: 'All',
|
||||||
|
value: '$__all',
|
||||||
|
},
|
||||||
|
includeAll=true,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
.addRow(
|
.addRow(
|
||||||
g.row('Samples')
|
row.new('Timestamps')
|
||||||
.addPanel(
|
.addPanel(timestampComparison)
|
||||||
g.panel('Rate, in vs. succeeded or dropped [5m]') +
|
.addPanel(timestampComparisonRate)
|
||||||
g.queryPanel('rate(prometheus_remote_storage_samples_in_total{cluster=~"$cluster", instance=~"$instance"}[5m])- ignoring(queue) group_right(instance) rate(prometheus_remote_storage_succeeded_samples_total{cluster=~"$cluster", instance=~"$instance"}[5m]) - rate(prometheus_remote_storage_dropped_samples_total{cluster=~"$cluster", instance=~"$instance"}[5m])', '{{cluster}}:{{instance}}-{{queue}}')
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
.addRow(
|
.addRow(
|
||||||
g.row('Shards')
|
row.new('Samples')
|
||||||
.addPanel(
|
.addPanel(samplesRate)
|
||||||
g.panel('Num. Shards') +
|
|
||||||
g.queryPanel('prometheus_remote_storage_shards{cluster=~"$cluster", instance=~"$instance"}', '{{cluster}}:{{instance}}-{{queue}}')
|
|
||||||
)
|
|
||||||
.addPanel(
|
|
||||||
g.panel('Capacity') +
|
|
||||||
g.queryPanel('prometheus_remote_storage_shard_capacity{cluster=~"$cluster", instance=~"$instance"}', '{{cluster}}:{{instance}}-{{queue}}')
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
.addRow(
|
.addRow(
|
||||||
g.row('Misc Rates.')
|
row.new('Shards'
|
||||||
.addPanel(
|
|
||||||
g.panel('Dropped Samples') +
|
|
||||||
g.queryPanel('rate(prometheus_remote_storage_dropped_samples_total{cluster=~"$cluster", instance=~"$instance"}[5m])', '{{cluster}}:{{instance}}-{{queue}}')
|
|
||||||
)
|
)
|
||||||
.addPanel(
|
.addPanel(shardsQueries),
|
||||||
g.panel('Failed Samples') +
|
|
||||||
g.queryPanel('rate(prometheus_remote_storage_failed_samples_total{cluster=~"$cluster", instance=~"$instance"}[5m])', '{{cluster}}:{{instance}}-{{queue}}')
|
|
||||||
)
|
)
|
||||||
.addPanel(
|
.addRow(
|
||||||
g.panel('Retried Samples') +
|
row.new('Shard Details')
|
||||||
g.queryPanel('rate(prometheus_remote_storage_retried_samples_total{cluster=~"$cluster", instance=~"$instance"}[5m])', '{{cluster}}:{{instance}}-{{queue}}')
|
.addPanel(shardsCapacity)
|
||||||
|
.addPanel(pendingSamples)
|
||||||
)
|
)
|
||||||
.addPanel(
|
.addRow(
|
||||||
g.panel('Enqueue Retries') +
|
row.new('Segments')
|
||||||
g.queryPanel('rate(prometheus_remote_storage_enqueue_retries_total{cluster=~"$cluster", instance=~"$instance"}[5m])', '{{cluster}}:{{instance}}-{{queue}}')
|
.addPanel(walSegment)
|
||||||
|
.addPanel(queueSegment)
|
||||||
|
)
|
||||||
|
.addRow(
|
||||||
|
row.new('Misc. Rates')
|
||||||
|
.addPanel(droppedSamples)
|
||||||
|
.addPanel(failedSamples)
|
||||||
|
.addPanel(retriedSamples)
|
||||||
|
.addPanel(enqueueRetries)
|
||||||
)
|
)
|
||||||
),
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue