fix(notifier): stop dropping known alertmanagers on each ApplyConfig and waiting on SD to update them.

Signed-off-by: machine424 <ayoubmrini424@gmail.com>
pull/14987/head
machine424 2 months ago
parent 3dc623d30b
commit 83ee57343a
No known key found for this signature in database
GPG Key ID: A4B001A4FDEE017D

@ -16,6 +16,8 @@ package notifier
import ( import (
"bytes" "bytes"
"context" "context"
"crypto/md5"
"encoding/hex"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io" "io"
@ -35,6 +37,7 @@ import (
"github.com/prometheus/common/sigv4" "github.com/prometheus/common/sigv4"
"github.com/prometheus/common/version" "github.com/prometheus/common/version"
"go.uber.org/atomic" "go.uber.org/atomic"
"gopkg.in/yaml.v2"
"github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/discovery/targetgroup"
@ -257,6 +260,16 @@ func (n *Manager) ApplyConfig(conf *config.Config) error {
n.opts.RelabelConfigs = conf.AlertingConfig.AlertRelabelConfigs n.opts.RelabelConfigs = conf.AlertingConfig.AlertRelabelConfigs
amSets := make(map[string]*alertmanagerSet) amSets := make(map[string]*alertmanagerSet)
// configToAlertmanagers maps alertmanager sets for each unique AlertmanagerConfig,
// helping to avoid dropping known alertmanagers and re-use them without waiting for SD updates when applying the config.
configToAlertmanagers := make(map[string]*alertmanagerSet, len(n.alertmanagers))
for _, oldAmSet := range n.alertmanagers {
hash, err := oldAmSet.configHash()
if err != nil {
return err
}
configToAlertmanagers[hash] = oldAmSet
}
for k, cfg := range conf.AlertingConfig.AlertmanagerConfigs.ToMap() { for k, cfg := range conf.AlertingConfig.AlertmanagerConfigs.ToMap() {
ams, err := newAlertmanagerSet(cfg, n.logger, n.metrics) ams, err := newAlertmanagerSet(cfg, n.logger, n.metrics)
@ -264,6 +277,16 @@ func (n *Manager) ApplyConfig(conf *config.Config) error {
return err return err
} }
hash, err := ams.configHash()
if err != nil {
return err
}
if oldAmSet, ok := configToAlertmanagers[hash]; ok {
ams.ams = oldAmSet.ams
ams.droppedAms = oldAmSet.droppedAms
}
amSets[k] = ams amSets[k] = ams
} }
@ -803,6 +826,15 @@ func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) {
} }
} }
func (s *alertmanagerSet) configHash() (string, error) {
b, err := yaml.Marshal(s.cfg)
if err != nil {
return "", err
}
hash := md5.Sum(b)
return hex.EncodeToString(hash[:]), nil
}
func postPath(pre string, v config.AlertmanagerAPIVersion) string { func postPath(pre string, v config.AlertmanagerAPIVersion) string {
alertPushEndpoint := fmt.Sprintf("/api/%v/alerts", string(v)) alertPushEndpoint := fmt.Sprintf("/api/%v/alerts", string(v))
return path.Join("/", pre, alertPushEndpoint) return path.Join("/", pre, alertPushEndpoint)

@ -1019,7 +1019,7 @@ func TestStop_DrainingEnabled(t *testing.T) {
require.Equal(t, int64(2), alertsReceived.Load()) require.Equal(t, int64(2), alertsReceived.Load())
} }
func TestAlertmanagersNotDroppedDuringApplyConfig(t *testing.T) { func TestApplyConfig(t *testing.T) {
targetURL := "alertmanager:9093" targetURL := "alertmanager:9093"
targetGroup := &targetgroup.Group{ targetGroup := &targetgroup.Group{
Targets: []model.LabelSet{ Targets: []model.LabelSet{
@ -1039,27 +1039,86 @@ alerting:
- files: - files:
- foo.json - foo.json
` `
// TODO: add order change test // 1. Ensure known alertmanagers are not dropped during ApplyConfig.
// TODO: add entry removed with DS manager require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg))
err := yaml.UnmarshalStrict([]byte(s), cfg)
require.NoError(t, err)
require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 1) require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 1)
yaml.Marshal(cfg.AlertingConfig.AlertmanagerConfigs) // First, apply the config and reload.
require.NoError(t, n.ApplyConfig(cfg))
// First apply config and reload.
err = n.ApplyConfig(cfg)
require.NoError(t, err)
tgs := map[string][]*targetgroup.Group{"config-0": {targetGroup}} tgs := map[string][]*targetgroup.Group{"config-0": {targetGroup}}
n.reload(tgs) n.reload(tgs)
require.Len(t, n.Alertmanagers(), 1) require.Len(t, n.Alertmanagers(), 1)
require.Equal(t, alertmanagerURL, n.Alertmanagers()[0].String()) require.Equal(t, alertmanagerURL, n.Alertmanagers()[0].String())
// Reapply the config. // Reapply the config.
err = n.ApplyConfig(cfg) require.NoError(t, n.ApplyConfig(cfg))
require.NoError(t, err) // Ensure the known alertmanagers are not dropped.
// The already known alertmanagers shouldn't get dropped.
require.Len(t, n.Alertmanagers(), 1) require.Len(t, n.Alertmanagers(), 1)
require.Equal(t, alertmanagerURL, n.Alertmanagers()[0].String()) require.Equal(t, alertmanagerURL, n.Alertmanagers()[0].String())
// 2. Ensure known alertmanagers are not dropped during ApplyConfig even when
// the config order changes.
s = `
alerting:
alertmanagers:
- static_configs:
- file_sd_configs:
- files:
- foo.json
`
require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg))
require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 2)
require.NoError(t, n.ApplyConfig(cfg))
require.Len(t, n.Alertmanagers(), 1)
// Ensure no unnecessary alertmanagers are injected.
require.Empty(t, n.alertmanagers["config-0"].ams)
// Ensure the config order is taken into account.
ams := n.alertmanagers["config-1"].ams
require.Len(t, ams, 1)
require.Equal(t, alertmanagerURL, ams[0].url().String())
// 3. Ensure known alertmanagers are reused for new config with identical AlertmanagerConfig.
s = `
alerting:
alertmanagers:
- file_sd_configs:
- files:
- foo.json
- file_sd_configs:
- files:
- foo.json
`
require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg))
require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 2)
require.NoError(t, n.ApplyConfig(cfg))
require.Len(t, n.Alertmanagers(), 2)
for cfgIdx := range 2 {
ams := n.alertmanagers[fmt.Sprintf("config-%d", cfgIdx)].ams
require.Len(t, ams, 1)
require.Equal(t, alertmanagerURL, ams[0].url().String())
}
// 4. Ensure known alertmanagers are reused only for identical AlertmanagerConfig.
s = `
alerting:
alertmanagers:
- file_sd_configs:
- files:
- foo.json
path_prefix: /bar
- file_sd_configs:
- files:
- foo.json
relabel_configs:
- source_labels: ['__address__']
regex: 'doesntmatter:1234'
action: drop
`
require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg))
require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 2)
require.NoError(t, n.ApplyConfig(cfg))
require.Empty(t, n.Alertmanagers())
} }

Loading…
Cancel
Save