From b0ff51a903e1cb4e87d54f4ffe7bd1d99759c08d Mon Sep 17 00:00:00 2001 From: Yuri Tseretyan Date: Fri, 13 Jun 2025 10:32:23 -0400 Subject: [PATCH] Alerting: Support for Mimir configuration in Grafana Alertmanager (#106402) --- .../api/tooling/definitions/alertmanager.go | 83 ++++++++ .../tooling/definitions/alertmanager_test.go | 200 ++++++++++++++++++ pkg/services/ngalert/notifier/alertmanager.go | 47 +++- .../ngalert/notifier/alertmanager_test.go | 148 +++++++++++++ pkg/services/ngalert/notifier/compat.go | 14 -- .../provisioning/notification_policies.go | 5 + 6 files changed, 476 insertions(+), 21 deletions(-) diff --git a/pkg/services/ngalert/api/tooling/definitions/alertmanager.go b/pkg/services/ngalert/api/tooling/definitions/alertmanager.go index d1d16790ad0..2f8d0533238 100644 --- a/pkg/services/ngalert/api/tooling/definitions/alertmanager.go +++ b/pkg/services/ngalert/api/tooling/definitions/alertmanager.go @@ -3,13 +3,17 @@ package definitions import ( "context" "encoding/json" + "errors" "fmt" "time" "github.com/go-openapi/strfmt" + alertingTemplates "github.com/grafana/alerting/templates" amv2 "github.com/prometheus/alertmanager/api/v2/models" "github.com/prometheus/alertmanager/config" + "github.com/prometheus/alertmanager/pkg/labels" "github.com/prometheus/common/model" + "gopkg.in/yaml.v3" "github.com/grafana/alerting/definition" @@ -263,6 +267,7 @@ type ( PostableApiReceiver = definition.PostableApiReceiver PostableGrafanaReceivers = definition.PostableGrafanaReceivers ReceiverType = definition.ReceiverType + MergeResult = definition.MergeResult ) const ( @@ -643,13 +648,82 @@ type DatasourceUIDReference struct { DatasourceUID string } +type ExtraConfiguration struct { + Identifier string `yaml:"identifier" json:"identifier"` + MergeMatchers config.Matchers `yaml:"merge_matchers" json:"merge_matchers"` + TemplateFiles map[string]string `yaml:"template_files" json:"template_files"` + AlertmanagerConfig PostableApiAlertingConfig `yaml:"alertmanager_config" json:"alertmanager_config"` +} + +func (c ExtraConfiguration) Validate() error { + if c.Identifier == "" { + return errors.New("identifier is required") + } + if len(c.MergeMatchers) == 0 { + return errors.New("at least one matcher is required") + } + for _, m := range c.MergeMatchers { + if m.Type != labels.MatchEqual { + return errors.New("only matchers with type equal are supported") + } + } + err := c.AlertmanagerConfig.Validate() + if err != nil { + return fmt.Errorf("invalid alertmanager configuration: %w", err) + } + return nil +} + // swagger:model type PostableUserConfig struct { TemplateFiles map[string]string `yaml:"template_files" json:"template_files"` AlertmanagerConfig PostableApiAlertingConfig `yaml:"alertmanager_config" json:"alertmanager_config"` + ExtraConfigs []ExtraConfiguration `yaml:"extra_config,omitempty" json:"extra_config,omitempty"` amSimple map[string]interface{} `yaml:"-" json:"-"` } +func (c *PostableUserConfig) GetMergedAlertmanagerConfig() (MergeResult, error) { + if len(c.ExtraConfigs) == 0 { + return MergeResult{ + Config: c.AlertmanagerConfig, + }, nil + } + // support only one config for now + mimirCfg := c.ExtraConfigs[0] + opts := definition.MergeOpts{ + DedupSuffix: mimirCfg.Identifier, + SubtreeMatchers: mimirCfg.MergeMatchers, + } + if err := opts.Validate(); err != nil { + return MergeResult{}, fmt.Errorf("invalid merge options: %w", err) + } + return definition.Merge(c.AlertmanagerConfig, mimirCfg.AlertmanagerConfig, opts) // for now support only the first extra config +} + +// GetMergedTemplateDefinitions converts the given PostableUserConfig's TemplateFiles to a slice of TemplateDefinitions. +func (c *PostableUserConfig) GetMergedTemplateDefinitions() []alertingTemplates.TemplateDefinition { + out := make([]alertingTemplates.TemplateDefinition, 0, len(c.TemplateFiles)) + for name, tmpl := range c.TemplateFiles { + out = append(out, alertingTemplates.TemplateDefinition{ + Name: name, + Template: tmpl, + Kind: alertingTemplates.GrafanaKind, + }) + } + if len(c.ExtraConfigs) == 0 { + return out + } + // support only one config for now + for name, tmpl := range c.ExtraConfigs[0].TemplateFiles { + out = append(out, alertingTemplates.TemplateDefinition{ + Name: name, + Template: tmpl, + Kind: alertingTemplates.MimirKind, + }) + } + return out +} + func (c *PostableUserConfig) UnmarshalJSON(b []byte) error { type plain PostableUserConfig if err := json.Unmarshal(b, (*plain)(c)); err != nil { @@ -661,6 +735,15 @@ func (c *PostableUserConfig) UnmarshalJSON(b []byte) error { return err } + if len(c.ExtraConfigs) > 1 { + return errors.New("only one extra config is supported") + } + for _, extraConfig := range c.ExtraConfigs { + if err := extraConfig.Validate(); err != nil { + return fmt.Errorf("extra configuration is invalid: %w", err) + } + } + type intermediate struct { AlertmanagerConfig map[string]interface{} `yaml:"alertmanager_config" json:"alertmanager_config"` } diff --git a/pkg/services/ngalert/api/tooling/definitions/alertmanager_test.go b/pkg/services/ngalert/api/tooling/definitions/alertmanager_test.go index 7cc534e77d4..363e91a9b40 100644 --- a/pkg/services/ngalert/api/tooling/definitions/alertmanager_test.go +++ b/pkg/services/ngalert/api/tooling/definitions/alertmanager_test.go @@ -6,7 +6,9 @@ import ( "strings" "testing" + alertingTemplates "github.com/grafana/alerting/templates" "github.com/prometheus/alertmanager/config" + "github.com/prometheus/alertmanager/pkg/labels" "github.com/prometheus/common/model" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -215,3 +217,201 @@ func Test_RawMessageMarshaling(t *testing.T) { assert.Equal(t, RawMessage(`{"data":"test"}`), n.Field) }) } + +func TestPostableUserConfig_GetMergedAlertmanagerConfig(t *testing.T) { + alertmanagerCfg := PostableApiAlertingConfig{ + Config: Config{ + Route: &Route{ + Receiver: "default", + }, + }, + Receivers: []*PostableApiReceiver{ + { + Receiver: config.Receiver{ + Name: "default", + }, + }, + }, + } + + testCases := []struct { + name string + config PostableUserConfig + expectedError string + }{ + { + name: "no extra configs", + config: PostableUserConfig{ + AlertmanagerConfig: alertmanagerCfg, + }, + }, + { + name: "valid mimir config", + config: PostableUserConfig{ + AlertmanagerConfig: alertmanagerCfg, + ExtraConfigs: []ExtraConfiguration{ + { + Identifier: "mimir-1", + MergeMatchers: config.Matchers{ + { + Type: labels.MatchEqual, + Name: "cluster", + Value: "prod", + }, + }, + AlertmanagerConfig: PostableApiAlertingConfig{ + Config: Config{ + Route: &Route{ + Receiver: "mimir-receiver", + }, + }, + Receivers: []*PostableApiReceiver{ + { + Receiver: config.Receiver{ + Name: "mimir-receiver", + }, + }, + }, + }, + }, + }, + }, + }, + { + name: "empty identifier", + config: PostableUserConfig{ + AlertmanagerConfig: alertmanagerCfg, + ExtraConfigs: []ExtraConfiguration{ + { + Identifier: "", + MergeMatchers: config.Matchers{}, + AlertmanagerConfig: PostableApiAlertingConfig{ + Config: Config{ + Route: &Route{ + Receiver: "test", + }, + }, + }, + }, + }, + }, + expectedError: "invalid merge options", + }, + { + name: "bad matcher type", + config: PostableUserConfig{ + AlertmanagerConfig: alertmanagerCfg, + ExtraConfigs: []ExtraConfiguration{ + { + Identifier: "test", + MergeMatchers: config.Matchers{ + { + Type: labels.MatchNotEqual, + Name: "cluster", + Value: "prod", + }, + }, + }, + }, + }, + expectedError: "only equality matchers are allowed", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result, err := tc.config.GetMergedAlertmanagerConfig() + if tc.expectedError != "" { + require.Error(t, err) + require.ErrorContains(t, err, tc.expectedError) + } else { + require.NoError(t, err) + require.NotNil(t, result.Config) + } + }) + } +} + +func TestPostableUserConfig_GetMergedTemplateDefinitions(t *testing.T) { + testCases := []struct { + name string + config PostableUserConfig + expectedTemplates int + }{ + { + name: "no templates", + config: PostableUserConfig{ + TemplateFiles: map[string]string{}, + ExtraConfigs: []ExtraConfiguration{}, + }, + expectedTemplates: 0, + }, + { + name: "grafana templates only", + config: PostableUserConfig{ + TemplateFiles: map[string]string{ + "grafana-template1": "{{ define \"test\" }}Hello{{ end }}", + "grafana-template2": "{{ define \"test2\" }}World{{ end }}", + }, + ExtraConfigs: []ExtraConfiguration{}, + }, + expectedTemplates: 2, + }, + { + name: "mimir templates only", + config: PostableUserConfig{ + TemplateFiles: map[string]string{}, + ExtraConfigs: []ExtraConfiguration{ + { + TemplateFiles: map[string]string{ + "mimir-template": "{{ define \"mimir\" }}Mimir{{ end }}", + }, + }, + }, + }, + expectedTemplates: 1, + }, + { + name: "mixed templates", + config: PostableUserConfig{ + TemplateFiles: map[string]string{ + "grafana-template": "{{ define \"grafana\" }}Grafana{{ end }}", + }, + ExtraConfigs: []ExtraConfiguration{ + { + TemplateFiles: map[string]string{ + "mimir-template": "{{ define \"mimir\" }}Mimir{{ end }}", + }, + }, + }, + }, + expectedTemplates: 2, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := tc.config.GetMergedTemplateDefinitions() + require.Len(t, result, tc.expectedTemplates) + + templateMap := make(map[string]string) + kindMap := make(map[string]alertingTemplates.Kind) + for _, tmpl := range result { + templateMap[tmpl.Name] = tmpl.Template + kindMap[tmpl.Name] = tmpl.Kind + } + + for name, content := range tc.config.TemplateFiles { + require.Equal(t, content, templateMap[name]) + require.Equal(t, alertingTemplates.GrafanaKind, kindMap[name]) + } + + if len(tc.config.ExtraConfigs) > 0 { + for name, content := range tc.config.ExtraConfigs[0].TemplateFiles { + require.Equal(t, content, templateMap[name]) + require.Equal(t, alertingTemplates.MimirKind, kindMap[name]) + } + } + }) + } +} diff --git a/pkg/services/ngalert/notifier/alertmanager.go b/pkg/services/ngalert/notifier/alertmanager.go index 035e26dbdc8..25461277367 100644 --- a/pkg/services/ngalert/notifier/alertmanager.go +++ b/pkg/services/ngalert/notifier/alertmanager.go @@ -7,6 +7,7 @@ import ( "encoding/json" "fmt" "strconv" + "strings" "time" alertingNotify "github.com/grafana/alerting/notify" @@ -314,11 +315,43 @@ func (am *alertmanager) aggregateInhibitMatchers(rules []config.InhibitRule, amu } } +func logMergeResult(l log.Logger, m apimodels.MergeResult) { + if len(m.RenamedReceivers) == 0 && len(m.RenamedTimeIntervals) == 0 { + return + } + + logCtx := make([]any, 0, 4) + if len(m.RenamedTimeIntervals) > 0 { + rcvBuilder := strings.Builder{} + for from, to := range m.RenamedReceivers { + rcvBuilder.WriteString(fmt.Sprintf("'%s'->'%s',", from, to)) + } + logCtx = append(logCtx, "renamedReceivers", fmt.Sprintf("[%s]", rcvBuilder.String()[0:rcvBuilder.Len()-1])) + } + if len(m.RenamedTimeIntervals) > 0 { + rcvBuilder := strings.Builder{} + for from, to := range m.RenamedTimeIntervals { + rcvBuilder.WriteString(fmt.Sprintf("'%s'->'%s',", from, to)) + } + logCtx = append(logCtx, "renamedTimeIntervals", fmt.Sprintf("[%s]", rcvBuilder.String()[0:rcvBuilder.Len()-1])) + } + l.Info("Configurations merged successfully but some resources were renamed", logCtx...) +} + // applyConfig applies a new configuration by re-initializing all components using the configuration provided. // It returns a boolean indicating whether the user config was changed and an error. // It is not safe to call concurrently. func (am *alertmanager) applyConfig(ctx context.Context, cfg *apimodels.PostableUserConfig, skipInvalid bool) (bool, error) { - err := AddAutogenConfig(ctx, am.logger, am.Store, am.Base.TenantID(), &cfg.AlertmanagerConfig, skipInvalid) + mergeResult, err := cfg.GetMergedAlertmanagerConfig() + if err != nil { + return false, fmt.Errorf("failed to get full alertmanager configuration: %w", err) + } + logMergeResult(am.logger, mergeResult) + amConfig := mergeResult.Config + templates := cfg.GetMergedTemplateDefinitions() + + // Now add autogenerated config to the route. + err = AddAutogenConfig(ctx, am.logger, am.Store, am.Base.TenantID(), &amConfig, skipInvalid) if err != nil { return false, err } @@ -337,7 +370,7 @@ func (am *alertmanager) applyConfig(ctx context.Context, cfg *apimodels.Postable return false, nil } - receivers := PostableApiAlertingConfigToApiReceivers(cfg.AlertmanagerConfig) + receivers := PostableApiAlertingConfigToApiReceivers(amConfig) for _, recv := range receivers { err = patchNewSecureFields(ctx, recv, alertingNotify.DecodeSecretsFromBase64, am.decryptFn) if err != nil { @@ -347,11 +380,11 @@ func (am *alertmanager) applyConfig(ctx context.Context, cfg *apimodels.Postable am.logger.Info("Applying new configuration to Alertmanager", "configHash", fmt.Sprintf("%x", configHash)) err = am.Base.ApplyConfig(alertingNotify.NotificationsConfiguration{ - RoutingTree: cfg.AlertmanagerConfig.Route.AsAMRoute(), - InhibitRules: cfg.AlertmanagerConfig.InhibitRules, - MuteTimeIntervals: cfg.AlertmanagerConfig.MuteTimeIntervals, - TimeIntervals: cfg.AlertmanagerConfig.TimeIntervals, - Templates: ToTemplateDefinitions(cfg), + RoutingTree: amConfig.Route.AsAMRoute(), + InhibitRules: amConfig.InhibitRules, + MuteTimeIntervals: amConfig.MuteTimeIntervals, + TimeIntervals: amConfig.TimeIntervals, + Templates: templates, Receivers: receivers, DispatcherLimits: &nilLimits{}, Raw: rawConfig, diff --git a/pkg/services/ngalert/notifier/alertmanager_test.go b/pkg/services/ngalert/notifier/alertmanager_test.go index 7033e5f0b8a..eb7b8d9b06f 100644 --- a/pkg/services/ngalert/notifier/alertmanager_test.go +++ b/pkg/services/ngalert/notifier/alertmanager_test.go @@ -2,16 +2,22 @@ package notifier import ( "context" + "net/url" "testing" "time" + "github.com/prometheus/alertmanager/config" + "github.com/prometheus/alertmanager/pkg/labels" "github.com/prometheus/client_golang/prometheus" + promcfg "github.com/prometheus/common/config" + "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "github.com/grafana/grafana/pkg/infra/db" "github.com/grafana/grafana/pkg/infra/log" "github.com/grafana/grafana/pkg/services/dashboards" "github.com/grafana/grafana/pkg/services/featuremgmt" + "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" "github.com/grafana/grafana/pkg/services/ngalert/metrics" "github.com/grafana/grafana/pkg/services/ngalert/store" "github.com/grafana/grafana/pkg/services/ngalert/tests/fakes" @@ -62,3 +68,145 @@ func TestAlertmanager_newAlertmanager(t *testing.T) { am := setupAMTest(t) require.False(t, am.Ready()) } + +func TestAlertmanager_ApplyConfig(t *testing.T) { + basicConfig := func() definitions.PostableApiAlertingConfig { + return definitions.PostableApiAlertingConfig{ + Config: definitions.Config{ + Route: &definitions.Route{ + Receiver: "default-receiver", + ObjectMatchers: definitions.ObjectMatchers{ + &labels.Matcher{ + Type: labels.MatchEqual, + Name: "__grafana_autogenerated__", + Value: "true", + }, + }, + }, + }, + Receivers: []*definitions.PostableApiReceiver{ + { + Receiver: config.Receiver{ + Name: "default-receiver", + }, + }, + }, + } + } + + testCases := []struct { + name string + config *definitions.PostableUserConfig + expectedError string + skipInvalid bool + }{ + { + name: "basic config", + config: &definitions.PostableUserConfig{ + AlertmanagerConfig: basicConfig(), + TemplateFiles: map[string]string{ + "grafana-template": "{{ define \"grafana.title\" }}Alert{{ end }}", + }, + }, + skipInvalid: false, + }, + { + name: "with mimir config", + config: &definitions.PostableUserConfig{ + AlertmanagerConfig: basicConfig(), + TemplateFiles: map[string]string{ + "grafana-template": "{{ define \"grafana.title\" }}Grafana Alert{{ end }}", + }, + ExtraConfigs: []definitions.ExtraConfiguration{ + { + Identifier: "mimir-prod", + MergeMatchers: config.Matchers{ + { + Type: labels.MatchEqual, + Name: "__mimir__", + Value: "true", + }, + }, + TemplateFiles: map[string]string{ + "mimir-template": "{{ define \"mimir.title\" }}Mimir Alert{{ end }}", + }, + AlertmanagerConfig: definitions.PostableApiAlertingConfig{ + Config: definitions.Config{ + Route: &definitions.Route{ + Receiver: "mimir-webhook", + GroupBy: []model.LabelName{"alertname", "cluster"}, + }, + }, + Receivers: []*definitions.PostableApiReceiver{ + { + Receiver: config.Receiver{ + Name: "mimir-webhook", + WebhookConfigs: []*config.WebhookConfig{ + { + URL: &config.SecretURL{ + URL: &url.URL{ + Scheme: "https", + Host: "webhook.example.com", + Path: "/alerts", + }, + }, + HTTPConfig: &promcfg.DefaultHTTPClientConfig, + }, + }, + }, + }, + }, + }, + }, + }, + }, + skipInvalid: false, + }, + { + name: "invalid config fails", + config: &definitions.PostableUserConfig{ + AlertmanagerConfig: basicConfig(), + ExtraConfigs: []definitions.ExtraConfiguration{ + { + Identifier: "", // invalid: empty identifier + MergeMatchers: config.Matchers{}, + AlertmanagerConfig: definitions.PostableApiAlertingConfig{ + Config: definitions.Config{ + Route: &definitions.Route{ + Receiver: "test-receiver", + }, + }, + }, + }, + }, + }, + expectedError: "failed to get full alertmanager configuration", + skipInvalid: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + am := setupAMTest(t) + ctx := context.Background() + + changed, err := am.applyConfig(ctx, tc.config, false) + + if tc.expectedError != "" { + require.Error(t, err) + require.ErrorContains(t, err, tc.expectedError) + require.False(t, changed) + } else { + require.NoError(t, err) + require.True(t, changed) + + templateDefs := tc.config.GetMergedTemplateDefinitions() + expectedTemplateCount := len(tc.config.TemplateFiles) + if len(tc.config.ExtraConfigs) > 0 { + expectedTemplateCount += len(tc.config.ExtraConfigs[0].TemplateFiles) + } + require.Len(t, templateDefs, expectedTemplateCount) + } + }) + } +} diff --git a/pkg/services/ngalert/notifier/compat.go b/pkg/services/ngalert/notifier/compat.go index c7488f3d4e9..20994df5e0b 100644 --- a/pkg/services/ngalert/notifier/compat.go +++ b/pkg/services/ngalert/notifier/compat.go @@ -5,7 +5,6 @@ import ( "fmt" alertingNotify "github.com/grafana/alerting/notify" - alertingTemplates "github.com/grafana/alerting/templates" apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions" "github.com/grafana/grafana/pkg/services/ngalert/models" @@ -132,19 +131,6 @@ func PostableApiAlertingConfigToApiReceivers(c apimodels.PostableApiAlertingConf return apiReceivers } -// ToTemplateDefinitions converts the given PostableUserConfig's TemplateFiles to a slice of TemplateDefinitions. -func ToTemplateDefinitions(cfg *apimodels.PostableUserConfig) []alertingTemplates.TemplateDefinition { - out := make([]alertingTemplates.TemplateDefinition, 0, len(cfg.TemplateFiles)) - for name, tmpl := range cfg.TemplateFiles { - out = append(out, alertingTemplates.TemplateDefinition{ - Name: name, - Template: tmpl, - Kind: alertingTemplates.GrafanaKind, - }) - } - return out -} - // Silence-specific compat functions to convert between grafana/alerting and model types. func GettableSilenceToSilence(s alertingNotify.GettableSilence) *models.Silence { diff --git a/pkg/services/ngalert/provisioning/notification_policies.go b/pkg/services/ngalert/provisioning/notification_policies.go index 48235396938..fed23d7bc69 100644 --- a/pkg/services/ngalert/provisioning/notification_policies.go +++ b/pkg/services/ngalert/provisioning/notification_policies.go @@ -111,6 +111,11 @@ func (nps *NotificationPolicyService) UpdatePolicyTree(ctx context.Context, orgI revision.Config.AlertmanagerConfig.Route = &tree + _, err = revision.Config.GetMergedAlertmanagerConfig() + if err != nil { + return definitions.Route{}, "", fmt.Errorf("new routing tree is not compatible with extra configuration: %w", err) + } + err = nps.xact.InTransaction(ctx, func(ctx context.Context) error { if err := nps.configStore.Save(ctx, revision, orgID); err != nil { return err