Address various comments from the review

Signed-off-by: beorn7 <beorn@grafana.com>
pull/4474/head
beorn7 5 years ago
parent 23c03207e9
commit e34af6d4d3

@ -87,9 +87,9 @@
{ {
alert: 'PrometheusTSDBReloadsFailing', alert: 'PrometheusTSDBReloadsFailing',
expr: ||| expr: |||
increase(prometheus_tsdb_reloads_failures_total{%(prometheusSelector)s}[2h]) > 0 increase(prometheus_tsdb_reloads_failures_total{%(prometheusSelector)s}[3h]) > 0
||| % $._config, ||| % $._config,
'for': '12h', 'for': '4h',
labels: { labels: {
severity: 'warning', severity: 'warning',
}, },
@ -100,9 +100,9 @@
{ {
alert: 'PrometheusTSDBCompactionsFailing', alert: 'PrometheusTSDBCompactionsFailing',
expr: ||| expr: |||
increase(prometheus_tsdb_compactions_failed_total{%(prometheusSelector)s}[2h]) > 0 increase(prometheus_tsdb_compactions_failed_total{%(prometheusSelector)s}[3h]) > 0
||| % $._config, ||| % $._config,
'for': '12h', 'for': '4h',
labels: { labels: {
severity: 'warning', severity: 'warning',
}, },
@ -113,7 +113,7 @@
{ {
alert: 'PrometheusTSDBWALCorruptions', alert: 'PrometheusTSDBWALCorruptions',
expr: ||| expr: |||
tsdb_wal_corruptions_total{%(prometheusSelector)s} > 0 increase(tsdb_wal_corruptions_total{%(prometheusSelector)s}[3h]) > 0
||| % $._config, ||| % $._config,
'for': '4h', 'for': '4h',
labels: { labels: {
@ -153,12 +153,12 @@
alert: 'PrometheusRemoteStorageFailures', alert: 'PrometheusRemoteStorageFailures',
expr: ||| expr: |||
( (
rate(prometheus_remote_storage_failed_samples_total{%(prometheusSelector)s}[1m]) rate(prometheus_remote_storage_failed_samples_total{%(prometheusSelector)s}[5m])
/ /
( (
rate(prometheus_remote_storage_failed_samples_total{%(prometheusSelector)s}[1m]) rate(prometheus_remote_storage_failed_samples_total{%(prometheusSelector)s}[5m])
+ +
rate(prometheus_remote_storage_succeeded_samples_total{%(prometheusSelector)s}[1m]) rate(prometheus_remote_storage_succeeded_samples_total{%(prometheusSelector)s}[5m])
) )
) )
* 100 * 100
@ -192,10 +192,10 @@
}, },
{ {
alert: 'PrometheusRuleFailures', alert: 'PrometheusRuleFailures',
'for': '15m',
expr: ||| expr: |||
rate(prometheus_rule_evaluation_failures_total{%(prometheusSelector)s}[1m]) > 0 rate(prometheus_rule_evaluation_failures_total{%(prometheusSelector)s}[5m]) > 0
||| % $._config, ||| % $._config,
'for': '15m',
labels: { labels: {
severity: 'critical', severity: 'critical',
}, },

@ -7,7 +7,7 @@ local g = import 'grafana-builder/grafana.libsonnet';
.addMultiTemplate('job', 'prometheus_build_info', 'job') .addMultiTemplate('job', 'prometheus_build_info', 'job')
.addMultiTemplate('instance', 'prometheus_build_info', 'instance') .addMultiTemplate('instance', 'prometheus_build_info', 'instance')
# Prometheus is quite commonly configured with honor_labels set to true; # Prometheus is quite commonly configured with honor_labels set to true;
# therefor job and instance is not the prometheus server in many queries!. # therefore job and instance is not the prometheus server in many queries!
.addRow( .addRow(
g.row('Prometheus Stats') g.row('Prometheus Stats')
.addPanel( .addPanel(
@ -18,7 +18,7 @@ local g = import 'grafana-builder/grafana.libsonnet';
], { ], {
job: { alias: 'Job' }, job: { alias: 'Job' },
instance: { alias: 'Instance' }, instance: { alias: 'Instance' },
verstion: { alias: 'Version' }, version: { alias: 'Version' },
'Value #A': { alias: 'Count', type: 'hidden' }, 'Value #A': { alias: 'Count', type: 'hidden' },
'Value #B': { alias: 'Uptime' }, 'Value #B': { alias: 'Uptime' },
}) })
@ -28,20 +28,20 @@ local g = import 'grafana-builder/grafana.libsonnet';
g.row('Discovery') g.row('Discovery')
.addPanel( .addPanel(
g.panel('Target Sync') + g.panel('Target Sync') +
g.queryPanel('sum(rate(prometheus_target_sync_length_seconds_sum{job=~"$job",instance=~"$instance"}[2m])) by (scrape_job) * 1e3', '{{scrape_job}}') + g.queryPanel('sum(rate(prometheus_target_sync_length_seconds_sum{job=~"$job",instance=~"$instance"}[5m])) by (scrape_job) * 1e3', '{{scrape_job}}') +
{ yaxes: g.yaxes('ms') } { yaxes: g.yaxes('ms') }
) )
.addPanel( .addPanel(
g.panel('Targets') + g.panel('Targets') +
g.queryPanel('count(up{})', 'Targets') + g.queryPanel('sum(prometheus_sd_discovered_targets{job=~"$job",instance=~"$instance"})', 'Targets') +
g.stack g.stack
) )
) )
.addRow( .addRow(
g.row('Retrieval') g.row('Retrieval')
.addPanel( .addPanel(
g.panel('Target Scrape Duration') + g.panel('Average Scrape Interval Duration') +
g.queryPanel('1e3 * sum(scrape_duration_seconds) / count(scrape_duration_seconds)', 'Average') + g.queryPanel('rate(prometheus_target_interval_length_seconds_sum{job=~"$job",instance=~"$instance"}[5m]) / rate(prometheus_target_interval_length_seconds_count{job=~"$job",instance=~"$instance"}[5m]) * 1e3', '{{interval}} configured') +
{ yaxes: g.yaxes('ms') } { yaxes: g.yaxes('ms') }
) )
.addPanel( .addPanel(
@ -61,7 +61,7 @@ local g = import 'grafana-builder/grafana.libsonnet';
) )
.addPanel( .addPanel(
g.panel('Appended Samples') + g.panel('Appended Samples') +
g.queryPanel('rate(prometheus_tsdb_head_samples_appended_total{job=~"$job",instance=~"$instance"}[1m])', '{{job}} {{instance}}') + g.queryPanel('rate(prometheus_tsdb_head_samples_appended_total{job=~"$job",instance=~"$instance"}[5m])', '{{job}} {{instance}}') +
g.stack g.stack
) )
) )
@ -82,7 +82,7 @@ local g = import 'grafana-builder/grafana.libsonnet';
g.row('Query') g.row('Query')
.addPanel( .addPanel(
g.panel('Query Rate') + g.panel('Query Rate') +
g.queryPanel('rate(prometheus_engine_query_duration_seconds_count{job=~"$job",instance=~"$instance",slice="inner_eval"}[1m])', '{{job}} {{instance}}') + g.queryPanel('rate(prometheus_engine_query_duration_seconds_count{job=~"$job",instance=~"$instance",slice="inner_eval"}[5m])', '{{job}} {{instance}}') +
g.stack, g.stack,
) )
.addPanel( .addPanel(

Loading…
Cancel
Save