From e34af6d4d3b580a2cf279d9295824fdf676d1a92 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Wed, 26 Jun 2019 23:22:16 +0200 Subject: [PATCH] Address various comments from the review Signed-off-by: beorn7 --- .../prometheus-mixin/alerts.libsonnet | 20 +++++++++---------- .../prometheus-mixin/dashboards.libsonnet | 16 +++++++-------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/documentation/prometheus-mixin/alerts.libsonnet b/documentation/prometheus-mixin/alerts.libsonnet index 5ec1f4f03..ef604a159 100644 --- a/documentation/prometheus-mixin/alerts.libsonnet +++ b/documentation/prometheus-mixin/alerts.libsonnet @@ -87,9 +87,9 @@ { alert: 'PrometheusTSDBReloadsFailing', expr: ||| - increase(prometheus_tsdb_reloads_failures_total{%(prometheusSelector)s}[2h]) > 0 + increase(prometheus_tsdb_reloads_failures_total{%(prometheusSelector)s}[3h]) > 0 ||| % $._config, - 'for': '12h', + 'for': '4h', labels: { severity: 'warning', }, @@ -100,9 +100,9 @@ { alert: 'PrometheusTSDBCompactionsFailing', expr: ||| - increase(prometheus_tsdb_compactions_failed_total{%(prometheusSelector)s}[2h]) > 0 + increase(prometheus_tsdb_compactions_failed_total{%(prometheusSelector)s}[3h]) > 0 ||| % $._config, - 'for': '12h', + 'for': '4h', labels: { severity: 'warning', }, @@ -113,7 +113,7 @@ { alert: 'PrometheusTSDBWALCorruptions', expr: ||| - tsdb_wal_corruptions_total{%(prometheusSelector)s} > 0 + increase(tsdb_wal_corruptions_total{%(prometheusSelector)s}[3h]) > 0 ||| % $._config, 'for': '4h', labels: { @@ -153,12 +153,12 @@ alert: 'PrometheusRemoteStorageFailures', expr: ||| ( - rate(prometheus_remote_storage_failed_samples_total{%(prometheusSelector)s}[1m]) + rate(prometheus_remote_storage_failed_samples_total{%(prometheusSelector)s}[5m]) / ( - rate(prometheus_remote_storage_failed_samples_total{%(prometheusSelector)s}[1m]) + rate(prometheus_remote_storage_failed_samples_total{%(prometheusSelector)s}[5m]) + - rate(prometheus_remote_storage_succeeded_samples_total{%(prometheusSelector)s}[1m]) + rate(prometheus_remote_storage_succeeded_samples_total{%(prometheusSelector)s}[5m]) ) ) * 100 @@ -192,10 +192,10 @@ }, { alert: 'PrometheusRuleFailures', - 'for': '15m', expr: ||| - rate(prometheus_rule_evaluation_failures_total{%(prometheusSelector)s}[1m]) > 0 + rate(prometheus_rule_evaluation_failures_total{%(prometheusSelector)s}[5m]) > 0 ||| % $._config, + 'for': '15m', labels: { severity: 'critical', }, diff --git a/documentation/prometheus-mixin/dashboards.libsonnet b/documentation/prometheus-mixin/dashboards.libsonnet index 8cc00f6d6..c7df87013 100644 --- a/documentation/prometheus-mixin/dashboards.libsonnet +++ b/documentation/prometheus-mixin/dashboards.libsonnet @@ -7,7 +7,7 @@ local g = import 'grafana-builder/grafana.libsonnet'; .addMultiTemplate('job', 'prometheus_build_info', 'job') .addMultiTemplate('instance', 'prometheus_build_info', 'instance') # Prometheus is quite commonly configured with honor_labels set to true; - # therefor job and instance is not the prometheus server in many queries!. + # therefore job and instance is not the prometheus server in many queries! .addRow( g.row('Prometheus Stats') .addPanel( @@ -18,7 +18,7 @@ local g = import 'grafana-builder/grafana.libsonnet'; ], { job: { alias: 'Job' }, instance: { alias: 'Instance' }, - verstion: { alias: 'Version' }, + version: { alias: 'Version' }, 'Value #A': { alias: 'Count', type: 'hidden' }, 'Value #B': { alias: 'Uptime' }, }) @@ -28,20 +28,20 @@ local g = import 'grafana-builder/grafana.libsonnet'; g.row('Discovery') .addPanel( g.panel('Target Sync') + - g.queryPanel('sum(rate(prometheus_target_sync_length_seconds_sum{job=~"$job",instance=~"$instance"}[2m])) by (scrape_job) * 1e3', '{{scrape_job}}') + + g.queryPanel('sum(rate(prometheus_target_sync_length_seconds_sum{job=~"$job",instance=~"$instance"}[5m])) by (scrape_job) * 1e3', '{{scrape_job}}') + { yaxes: g.yaxes('ms') } ) .addPanel( g.panel('Targets') + - g.queryPanel('count(up{})', 'Targets') + + g.queryPanel('sum(prometheus_sd_discovered_targets{job=~"$job",instance=~"$instance"})', 'Targets') + g.stack ) ) .addRow( g.row('Retrieval') .addPanel( - g.panel('Target Scrape Duration') + - g.queryPanel('1e3 * sum(scrape_duration_seconds) / count(scrape_duration_seconds)', 'Average') + + g.panel('Average Scrape Interval Duration') + + g.queryPanel('rate(prometheus_target_interval_length_seconds_sum{job=~"$job",instance=~"$instance"}[5m]) / rate(prometheus_target_interval_length_seconds_count{job=~"$job",instance=~"$instance"}[5m]) * 1e3', '{{interval}} configured') + { yaxes: g.yaxes('ms') } ) .addPanel( @@ -61,7 +61,7 @@ local g = import 'grafana-builder/grafana.libsonnet'; ) .addPanel( g.panel('Appended Samples') + - g.queryPanel('rate(prometheus_tsdb_head_samples_appended_total{job=~"$job",instance=~"$instance"}[1m])', '{{job}} {{instance}}') + + g.queryPanel('rate(prometheus_tsdb_head_samples_appended_total{job=~"$job",instance=~"$instance"}[5m])', '{{job}} {{instance}}') + g.stack ) ) @@ -82,7 +82,7 @@ local g = import 'grafana-builder/grafana.libsonnet'; g.row('Query') .addPanel( g.panel('Query Rate') + - g.queryPanel('rate(prometheus_engine_query_duration_seconds_count{job=~"$job",instance=~"$instance",slice="inner_eval"}[1m])', '{{job}} {{instance}}') + + g.queryPanel('rate(prometheus_engine_query_duration_seconds_count{job=~"$job",instance=~"$instance",slice="inner_eval"}[5m])', '{{job}} {{instance}}') + g.stack, ) .addPanel(