Alerting: Extra dedup stage in Grafana Alertmanager (#99825)

* add feature flags

* update alerting module

* update grafana alertmanager to configure the extra dedup stage

---------

Co-authored-by: Santiago <santiagohernandez.1997@gmail.com>
This commit is contained in:
Yuri Tseretyan
2025-01-31 11:12:38 -05:00
committed by GitHub
parent d0703cfdbd
commit 0be6e1bb86
14 changed files with 87 additions and 14 deletions

2
go.mod
View File

@ -69,7 +69,7 @@ require (
github.com/googleapis/gax-go/v2 v2.14.1 // @grafana/grafana-backend-group
github.com/gorilla/mux v1.8.1 // @grafana/grafana-backend-group
github.com/gorilla/websocket v1.5.3 // @grafana/grafana-app-platform-squad
github.com/grafana/alerting v0.0.0-20250129195454-3e5b80036b7a // @grafana/alerting-backend
github.com/grafana/alerting v0.0.0-20250130152446-d49e2e0b7d65 // @grafana/alerting-backend
github.com/grafana/authlib v0.0.0-20250123104008-e99947858901 // @grafana/identity-access-team
github.com/grafana/authlib/types v0.0.0-20250120145936-5f0e28e7a87c // @grafana/identity-access-team
github.com/grafana/dataplane/examples v0.0.1 // @grafana/observability-metrics

4
go.sum
View File

@ -1498,8 +1498,8 @@ github.com/gorilla/sessions v1.2.1 h1:DHd3rPN5lE3Ts3D8rKkQ8x/0kqfeNmBAaiSi+o7Fsg
github.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM=
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/grafana/alerting v0.0.0-20250129195454-3e5b80036b7a h1:44E+I3EPdh/W02Uyfyig86EJKPjvzcF3y0A+FEi1fBk=
github.com/grafana/alerting v0.0.0-20250129195454-3e5b80036b7a/go.mod h1:QsnoKX/iYZxA4Cv+H+wC7uxutBD8qi8ZW5UJvD2TYmU=
github.com/grafana/alerting v0.0.0-20250130152446-d49e2e0b7d65 h1:dmsycYQzl5JexuV8UxQpT3B79maSvhiIahid4/tezAM=
github.com/grafana/alerting v0.0.0-20250130152446-d49e2e0b7d65/go.mod h1:QsnoKX/iYZxA4Cv+H+wC7uxutBD8qi8ZW5UJvD2TYmU=
github.com/grafana/authlib v0.0.0-20250123104008-e99947858901 h1:nqV1YrtX+ZG+EYB5dcmFMWhg2Y038OMaAHAADbOC9RA=
github.com/grafana/authlib v0.0.0-20250123104008-e99947858901/go.mod h1:/gYfphsNu9v1qYWXxpv1NSvMEMSwvdf8qb8YlgwIRl8=
github.com/grafana/authlib/types v0.0.0-20250120145936-5f0e28e7a87c h1:b0sPDtt33uFdmvUJjSCld3kwE2E49dUvevuUDSJsEuo=

View File

@ -255,4 +255,6 @@ export interface FeatureToggles {
elasticsearchImprovedParsing?: boolean;
datasourceConnectionsTab?: boolean;
fetchRulesUsingPost?: boolean;
alertingAlertmanagerExtraDedupStage?: boolean;
alertingAlertmanagerExtraDedupStageStopPipeline?: boolean;
}

View File

@ -1772,6 +1772,24 @@ var (
HideFromAdminPage: true,
HideFromDocs: true,
},
{
Name: "alertingAlertmanagerExtraDedupStage",
Description: "enables extra deduplication stage in alertmanager that checks that timestamps of the pipeline and the current state are matching",
Stage: FeatureStageExperimental,
Owner: grafanaAlertingSquad,
HideFromAdminPage: true,
HideFromDocs: true,
RequiresRestart: true,
},
{
Name: "alertingAlertmanagerExtraDedupStageStopPipeline",
Description: "works together with alertingAlertmanagerExtraDedupStage, if enabled, it will stop the pipeline if the timestamps are not matching. Otherwise, it will emit a warning",
Stage: FeatureStageExperimental,
Owner: grafanaAlertingSquad,
HideFromAdminPage: true,
HideFromDocs: true,
RequiresRestart: true,
},
}
)

View File

@ -236,3 +236,5 @@ grafanaAdvisor,experimental,@grafana/plugins-platform-backend,false,false,false
elasticsearchImprovedParsing,experimental,@grafana/aws-datasources,false,false,false
datasourceConnectionsTab,experimental,@grafana/plugins-platform-backend,false,false,true
fetchRulesUsingPost,experimental,@grafana/alerting-squad,false,false,false
alertingAlertmanagerExtraDedupStage,experimental,@grafana/alerting-squad,false,true,false
alertingAlertmanagerExtraDedupStageStopPipeline,experimental,@grafana/alerting-squad,false,true,false

1 Name Stage Owner requiresDevMode RequiresRestart FrontendOnly
236 elasticsearchImprovedParsing experimental @grafana/aws-datasources false false false
237 datasourceConnectionsTab experimental @grafana/plugins-platform-backend false false true
238 fetchRulesUsingPost experimental @grafana/alerting-squad false false false
239 alertingAlertmanagerExtraDedupStage experimental @grafana/alerting-squad false true false
240 alertingAlertmanagerExtraDedupStageStopPipeline experimental @grafana/alerting-squad false true false

View File

@ -954,4 +954,12 @@ const (
// FlagFetchRulesUsingPost
// Use a POST request to list rules by passing down the namespaces user has access to
FlagFetchRulesUsingPost = "fetchRulesUsingPost"
// FlagAlertingAlertmanagerExtraDedupStage
// enables extra deduplication stage in alertmanager that checks that timestamps of the pipeline and the current state are matching
FlagAlertingAlertmanagerExtraDedupStage = "alertingAlertmanagerExtraDedupStage"
// FlagAlertingAlertmanagerExtraDedupStageStopPipeline
// works together with alertingAlertmanagerExtraDedupStage, if enabled, it will stop the pipeline if the timestamps are not matching. Otherwise, it will emit a warning
FlagAlertingAlertmanagerExtraDedupStageStopPipeline = "alertingAlertmanagerExtraDedupStageStopPipeline"
)

View File

@ -143,6 +143,36 @@
"codeowner": "@grafana/alerting-squad"
}
},
{
"metadata": {
"name": "alertingAlertmanagerExtraDedupStage",
"resourceVersion": "1738251165994",
"creationTimestamp": "2025-01-30T15:32:45Z"
},
"spec": {
"description": "enables extra deduplication stage in alertmanager that checks that timestamps of the pipeline and the current state are matching",
"stage": "experimental",
"codeowner": "@grafana/alerting-squad",
"requiresRestart": true,
"hideFromAdminPage": true,
"hideFromDocs": true
}
},
{
"metadata": {
"name": "alertingAlertmanagerExtraDedupStageStopPipeline",
"resourceVersion": "1738251165994",
"creationTimestamp": "2025-01-30T15:32:45Z"
},
"spec": {
"description": "works together with alertingAlertmanagerExtraDedupStage, if enabled, it will stop the pipeline if the timestamps are not matching. Otherwise, it will emit a warning",
"stage": "experimental",
"codeowner": "@grafana/alerting-squad",
"requiresRestart": true,
"hideFromAdminPage": true,
"hideFromDocs": true
}
},
{
"metadata": {
"name": "alertingApiServer",

View File

@ -10,6 +10,7 @@ import (
"time"
alertingNotify "github.com/grafana/alerting/notify"
"github.com/grafana/alerting/notify/stages"
"github.com/grafana/alerting/receivers"
alertingTemplates "github.com/grafana/alerting/templates"
"github.com/prometheus/alertmanager/config"
@ -17,6 +18,7 @@ import (
amv2 "github.com/prometheus/alertmanager/api/v2/models"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/featuremgmt"
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
@ -91,7 +93,7 @@ func (m maintenanceOptions) MaintenanceFunc(state alertingNotify.State) (int64,
func NewAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store AlertingStore, stateStore stateStore,
peer alertingNotify.ClusterPeer, decryptFn alertingNotify.GetDecryptedValueFn, ns notifications.Service,
m *metrics.Alertmanager, withAutogen bool,
m *metrics.Alertmanager, featureToggles featuremgmt.FeatureToggles,
) (*alertmanager, error) {
nflog, err := stateStore.GetNotificationLog(ctx)
if err != nil {
@ -121,6 +123,16 @@ func NewAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store A
return stateStore.SaveNotificationLog(context.Background(), state)
},
}
l := log.New("ngalert.notifier.alertmanager", "org", orgID)
action := stages.Disabled
if featureToggles.IsEnabledGlobally(featuremgmt.FlagAlertingAlertmanagerExtraDedupStage) {
if featureToggles.IsEnabledGlobally(featuremgmt.FlagAlertingAlertmanagerExtraDedupStageStopPipeline) {
action = stages.StopPipeline
} else {
action = stages.LogOnly
}
l.Info("Initializing Alertmanager", "extra_dedup_stage", action)
}
amcfg := &alertingNotify.GrafanaAlertmanagerConfig{
ExternalURL: cfg.AppURL,
@ -132,9 +144,9 @@ func NewAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store A
MaxSilences: cfg.UnifiedAlerting.AlertmanagerMaxSilencesCount,
MaxSilenceSizeBytes: cfg.UnifiedAlerting.AlertmanagerMaxSilenceSizeBytes,
},
PipelineAndStateTimestampsMismatchAction: action,
}
l := log.New("ngalert.notifier.alertmanager", "org", orgID)
gam, err := alertingNotify.NewGrafanaAlertmanager("orgID", orgID, amcfg, peer, l, alertingNotify.NewGrafanaAlertmanagerMetrics(m.Registerer, l))
if err != nil {
return nil, err
@ -152,7 +164,7 @@ func NewAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store A
logger: l,
// TODO: Preferably, logic around autogen would be outside of the specific alertmanager implementation so that remote alertmanager will get it for free.
withAutogen: withAutogen,
withAutogen: featureToggles.IsEnabled(ctx, featuremgmt.FlagAlertingSimplifiedRouting),
}
return am, nil

View File

@ -11,6 +11,7 @@ import (
"github.com/grafana/grafana/pkg/infra/db"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/dashboards"
"github.com/grafana/grafana/pkg/services/featuremgmt"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/services/ngalert/tests/fakes"
@ -52,7 +53,7 @@ func setupAMTest(t *testing.T) *alertmanager {
orgID := 1
stateStore := NewFileStore(int64(orgID), kvStore)
am, err := NewAlertmanager(context.Background(), 1, cfg, s, stateStore, &NilPeer{}, decryptFn, nil, m, false)
am, err := NewAlertmanager(context.Background(), 1, cfg, s, stateStore, &NilPeer{}, decryptFn, nil, m, featuremgmt.WithFeatures())
require.NoError(t, err)
return am
}

View File

@ -160,7 +160,7 @@ func NewMultiOrgAlertmanager(
moa.factory = func(ctx context.Context, orgID int64) (Alertmanager, error) {
m := metrics.NewAlertmanagerMetrics(moa.metrics.GetOrCreateOrgRegistry(orgID), l)
stateStore := NewFileStore(orgID, kvStore)
return NewAlertmanager(ctx, orgID, moa.settings, moa.configStore, stateStore, moa.peer, moa.decryptFn, moa.ns, m, featureManager.IsEnabled(ctx, featuremgmt.FlagAlertingSimplifiedRouting))
return NewAlertmanager(ctx, orgID, moa.settings, moa.configStore, stateStore, moa.peer, moa.decryptFn, moa.ns, m, featureManager)
}
for _, opt := range opts {

View File

@ -170,7 +170,7 @@ require (
github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect
github.com/googleapis/gax-go/v2 v2.14.1 // indirect
github.com/gorilla/mux v1.8.1 // indirect
github.com/grafana/alerting v0.0.0-20250129195454-3e5b80036b7a // indirect
github.com/grafana/alerting v0.0.0-20250130152446-d49e2e0b7d65 // indirect
github.com/grafana/authlib v0.0.0-20250123104008-e99947858901 // indirect
github.com/grafana/dataplane/sdata v0.0.9 // indirect
github.com/grafana/dskit v0.0.0-20241105154643-a6b453a88040 // indirect

View File

@ -547,8 +547,8 @@ github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/grafana/alerting v0.0.0-20250129195454-3e5b80036b7a h1:44E+I3EPdh/W02Uyfyig86EJKPjvzcF3y0A+FEi1fBk=
github.com/grafana/alerting v0.0.0-20250129195454-3e5b80036b7a/go.mod h1:QsnoKX/iYZxA4Cv+H+wC7uxutBD8qi8ZW5UJvD2TYmU=
github.com/grafana/alerting v0.0.0-20250130152446-d49e2e0b7d65 h1:dmsycYQzl5JexuV8UxQpT3B79maSvhiIahid4/tezAM=
github.com/grafana/alerting v0.0.0-20250130152446-d49e2e0b7d65/go.mod h1:QsnoKX/iYZxA4Cv+H+wC7uxutBD8qi8ZW5UJvD2TYmU=
github.com/grafana/authlib v0.0.0-20250123104008-e99947858901 h1:nqV1YrtX+ZG+EYB5dcmFMWhg2Y038OMaAHAADbOC9RA=
github.com/grafana/authlib v0.0.0-20250123104008-e99947858901/go.mod h1:/gYfphsNu9v1qYWXxpv1NSvMEMSwvdf8qb8YlgwIRl8=
github.com/grafana/authlib/types v0.0.0-20250120145936-5f0e28e7a87c h1:b0sPDtt33uFdmvUJjSCld3kwE2E49dUvevuUDSJsEuo=

View File

@ -115,7 +115,7 @@ require (
github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect
github.com/googleapis/gax-go/v2 v2.14.1 // indirect
github.com/gorilla/mux v1.8.1 // indirect
github.com/grafana/alerting v0.0.0-20250129195454-3e5b80036b7a // indirect
github.com/grafana/alerting v0.0.0-20250130152446-d49e2e0b7d65 // indirect
github.com/grafana/dataplane/sdata v0.0.9 // indirect
github.com/grafana/grafana-app-sdk/logging v0.30.0 // indirect
github.com/grafana/grafana-aws-sdk v0.31.5 // indirect

View File

@ -403,8 +403,8 @@ github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2z
github.com/gorilla/mux v1.7.1/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs=
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
github.com/grafana/alerting v0.0.0-20250129195454-3e5b80036b7a h1:44E+I3EPdh/W02Uyfyig86EJKPjvzcF3y0A+FEi1fBk=
github.com/grafana/alerting v0.0.0-20250129195454-3e5b80036b7a/go.mod h1:QsnoKX/iYZxA4Cv+H+wC7uxutBD8qi8ZW5UJvD2TYmU=
github.com/grafana/alerting v0.0.0-20250130152446-d49e2e0b7d65 h1:dmsycYQzl5JexuV8UxQpT3B79maSvhiIahid4/tezAM=
github.com/grafana/alerting v0.0.0-20250130152446-d49e2e0b7d65/go.mod h1:QsnoKX/iYZxA4Cv+H+wC7uxutBD8qi8ZW5UJvD2TYmU=
github.com/grafana/authlib v0.0.0-20250123104008-e99947858901 h1:nqV1YrtX+ZG+EYB5dcmFMWhg2Y038OMaAHAADbOC9RA=
github.com/grafana/authlib v0.0.0-20250123104008-e99947858901/go.mod h1:/gYfphsNu9v1qYWXxpv1NSvMEMSwvdf8qb8YlgwIRl8=
github.com/grafana/authlib/types v0.0.0-20250120145936-5f0e28e7a87c h1:b0sPDtt33uFdmvUJjSCld3kwE2E49dUvevuUDSJsEuo=