Browse Source

fix(notifier): stop dropping known alertmanagers on each ApplyConfig and waiting on SD to update them.

Signed-off-by: machine424 <ayoubmrini424@gmail.com>
pull/14987/head
machine424 2 months ago
parent
commit
83ee57343a
No known key found for this signature in database
GPG Key ID: A4B001A4FDEE017D
  1. 32
      notifier/notifier.go
  2. 87
      notifier/notifier_test.go

32
notifier/notifier.go

@ -16,6 +16,8 @@ package notifier
import (
"bytes"
"context"
"crypto/md5"
"encoding/hex"
"encoding/json"
"fmt"
"io"
@ -35,6 +37,7 @@ import (
"github.com/prometheus/common/sigv4"
"github.com/prometheus/common/version"
"go.uber.org/atomic"
"gopkg.in/yaml.v2"
"github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/discovery/targetgroup"
@ -257,6 +260,16 @@ func (n *Manager) ApplyConfig(conf *config.Config) error {
n.opts.RelabelConfigs = conf.AlertingConfig.AlertRelabelConfigs
amSets := make(map[string]*alertmanagerSet)
// configToAlertmanagers maps alertmanager sets for each unique AlertmanagerConfig,
// helping to avoid dropping known alertmanagers and re-use them without waiting for SD updates when applying the config.
configToAlertmanagers := make(map[string]*alertmanagerSet, len(n.alertmanagers))
for _, oldAmSet := range n.alertmanagers {
hash, err := oldAmSet.configHash()
if err != nil {
return err
}
configToAlertmanagers[hash] = oldAmSet
}
for k, cfg := range conf.AlertingConfig.AlertmanagerConfigs.ToMap() {
ams, err := newAlertmanagerSet(cfg, n.logger, n.metrics)
@ -264,6 +277,16 @@ func (n *Manager) ApplyConfig(conf *config.Config) error {
return err
}
hash, err := ams.configHash()
if err != nil {
return err
}
if oldAmSet, ok := configToAlertmanagers[hash]; ok {
ams.ams = oldAmSet.ams
ams.droppedAms = oldAmSet.droppedAms
}
amSets[k] = ams
}
@ -803,6 +826,15 @@ func (s *alertmanagerSet) sync(tgs []*targetgroup.Group) {
}
}
func (s *alertmanagerSet) configHash() (string, error) {
b, err := yaml.Marshal(s.cfg)
if err != nil {
return "", err
}
hash := md5.Sum(b)
return hex.EncodeToString(hash[:]), nil
}
func postPath(pre string, v config.AlertmanagerAPIVersion) string {
alertPushEndpoint := fmt.Sprintf("/api/%v/alerts", string(v))
return path.Join("/", pre, alertPushEndpoint)

87
notifier/notifier_test.go

@ -1019,7 +1019,7 @@ func TestStop_DrainingEnabled(t *testing.T) {
require.Equal(t, int64(2), alertsReceived.Load())
}
func TestAlertmanagersNotDroppedDuringApplyConfig(t *testing.T) {
func TestApplyConfig(t *testing.T) {
targetURL := "alertmanager:9093"
targetGroup := &targetgroup.Group{
Targets: []model.LabelSet{
@ -1039,27 +1039,86 @@ alerting:
- files:
- foo.json
`
// TODO: add order change test
// TODO: add entry removed with DS manager
err := yaml.UnmarshalStrict([]byte(s), cfg)
require.NoError(t, err)
// 1. Ensure known alertmanagers are not dropped during ApplyConfig.
require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg))
require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 1)
yaml.Marshal(cfg.AlertingConfig.AlertmanagerConfigs)
// First apply config and reload.
err = n.ApplyConfig(cfg)
require.NoError(t, err)
// First, apply the config and reload.
require.NoError(t, n.ApplyConfig(cfg))
tgs := map[string][]*targetgroup.Group{"config-0": {targetGroup}}
n.reload(tgs)
require.Len(t, n.Alertmanagers(), 1)
require.Equal(t, alertmanagerURL, n.Alertmanagers()[0].String())
// Reapply the config.
err = n.ApplyConfig(cfg)
require.NoError(t, err)
// The already known alertmanagers shouldn't get dropped.
require.NoError(t, n.ApplyConfig(cfg))
// Ensure the known alertmanagers are not dropped.
require.Len(t, n.Alertmanagers(), 1)
require.Equal(t, alertmanagerURL, n.Alertmanagers()[0].String())
// 2. Ensure known alertmanagers are not dropped during ApplyConfig even when
// the config order changes.
s = `
alerting:
alertmanagers:
- static_configs:
- file_sd_configs:
- files:
- foo.json
`
require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg))
require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 2)
require.NoError(t, n.ApplyConfig(cfg))
require.Len(t, n.Alertmanagers(), 1)
// Ensure no unnecessary alertmanagers are injected.
require.Empty(t, n.alertmanagers["config-0"].ams)
// Ensure the config order is taken into account.
ams := n.alertmanagers["config-1"].ams
require.Len(t, ams, 1)
require.Equal(t, alertmanagerURL, ams[0].url().String())
// 3. Ensure known alertmanagers are reused for new config with identical AlertmanagerConfig.
s = `
alerting:
alertmanagers:
- file_sd_configs:
- files:
- foo.json
- file_sd_configs:
- files:
- foo.json
`
require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg))
require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 2)
require.NoError(t, n.ApplyConfig(cfg))
require.Len(t, n.Alertmanagers(), 2)
for cfgIdx := range 2 {
ams := n.alertmanagers[fmt.Sprintf("config-%d", cfgIdx)].ams
require.Len(t, ams, 1)
require.Equal(t, alertmanagerURL, ams[0].url().String())
}
// 4. Ensure known alertmanagers are reused only for identical AlertmanagerConfig.
s = `
alerting:
alertmanagers:
- file_sd_configs:
- files:
- foo.json
path_prefix: /bar
- file_sd_configs:
- files:
- foo.json
relabel_configs:
- source_labels: ['__address__']
regex: 'doesntmatter:1234'
action: drop
`
require.NoError(t, yaml.UnmarshalStrict([]byte(s), cfg))
require.Len(t, cfg.AlertingConfig.AlertmanagerConfigs, 2)
require.NoError(t, n.ApplyConfig(cfg))
require.Empty(t, n.Alertmanagers())
}

Loading…
Cancel
Save