mirror of
https://github.com/grafana/grafana.git
synced 2025-08-06 05:30:12 +08:00
Alerting: Extra dedup stage in Grafana Alertmanager (#99825)
* add feature flags * update alerting module * update grafana alertmanager to configure the extra dedup stage --------- Co-authored-by: Santiago <santiagohernandez.1997@gmail.com>
This commit is contained in:
2
go.mod
2
go.mod
@ -69,7 +69,7 @@ require (
|
||||
github.com/googleapis/gax-go/v2 v2.14.1 // @grafana/grafana-backend-group
|
||||
github.com/gorilla/mux v1.8.1 // @grafana/grafana-backend-group
|
||||
github.com/gorilla/websocket v1.5.3 // @grafana/grafana-app-platform-squad
|
||||
github.com/grafana/alerting v0.0.0-20250129195454-3e5b80036b7a // @grafana/alerting-backend
|
||||
github.com/grafana/alerting v0.0.0-20250130152446-d49e2e0b7d65 // @grafana/alerting-backend
|
||||
github.com/grafana/authlib v0.0.0-20250123104008-e99947858901 // @grafana/identity-access-team
|
||||
github.com/grafana/authlib/types v0.0.0-20250120145936-5f0e28e7a87c // @grafana/identity-access-team
|
||||
github.com/grafana/dataplane/examples v0.0.1 // @grafana/observability-metrics
|
||||
|
4
go.sum
4
go.sum
@ -1498,8 +1498,8 @@ github.com/gorilla/sessions v1.2.1 h1:DHd3rPN5lE3Ts3D8rKkQ8x/0kqfeNmBAaiSi+o7Fsg
|
||||
github.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM=
|
||||
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
|
||||
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
|
||||
github.com/grafana/alerting v0.0.0-20250129195454-3e5b80036b7a h1:44E+I3EPdh/W02Uyfyig86EJKPjvzcF3y0A+FEi1fBk=
|
||||
github.com/grafana/alerting v0.0.0-20250129195454-3e5b80036b7a/go.mod h1:QsnoKX/iYZxA4Cv+H+wC7uxutBD8qi8ZW5UJvD2TYmU=
|
||||
github.com/grafana/alerting v0.0.0-20250130152446-d49e2e0b7d65 h1:dmsycYQzl5JexuV8UxQpT3B79maSvhiIahid4/tezAM=
|
||||
github.com/grafana/alerting v0.0.0-20250130152446-d49e2e0b7d65/go.mod h1:QsnoKX/iYZxA4Cv+H+wC7uxutBD8qi8ZW5UJvD2TYmU=
|
||||
github.com/grafana/authlib v0.0.0-20250123104008-e99947858901 h1:nqV1YrtX+ZG+EYB5dcmFMWhg2Y038OMaAHAADbOC9RA=
|
||||
github.com/grafana/authlib v0.0.0-20250123104008-e99947858901/go.mod h1:/gYfphsNu9v1qYWXxpv1NSvMEMSwvdf8qb8YlgwIRl8=
|
||||
github.com/grafana/authlib/types v0.0.0-20250120145936-5f0e28e7a87c h1:b0sPDtt33uFdmvUJjSCld3kwE2E49dUvevuUDSJsEuo=
|
||||
|
@ -255,4 +255,6 @@ export interface FeatureToggles {
|
||||
elasticsearchImprovedParsing?: boolean;
|
||||
datasourceConnectionsTab?: boolean;
|
||||
fetchRulesUsingPost?: boolean;
|
||||
alertingAlertmanagerExtraDedupStage?: boolean;
|
||||
alertingAlertmanagerExtraDedupStageStopPipeline?: boolean;
|
||||
}
|
||||
|
@ -1772,6 +1772,24 @@ var (
|
||||
HideFromAdminPage: true,
|
||||
HideFromDocs: true,
|
||||
},
|
||||
{
|
||||
Name: "alertingAlertmanagerExtraDedupStage",
|
||||
Description: "enables extra deduplication stage in alertmanager that checks that timestamps of the pipeline and the current state are matching",
|
||||
Stage: FeatureStageExperimental,
|
||||
Owner: grafanaAlertingSquad,
|
||||
HideFromAdminPage: true,
|
||||
HideFromDocs: true,
|
||||
RequiresRestart: true,
|
||||
},
|
||||
{
|
||||
Name: "alertingAlertmanagerExtraDedupStageStopPipeline",
|
||||
Description: "works together with alertingAlertmanagerExtraDedupStage, if enabled, it will stop the pipeline if the timestamps are not matching. Otherwise, it will emit a warning",
|
||||
Stage: FeatureStageExperimental,
|
||||
Owner: grafanaAlertingSquad,
|
||||
HideFromAdminPage: true,
|
||||
HideFromDocs: true,
|
||||
RequiresRestart: true,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -236,3 +236,5 @@ grafanaAdvisor,experimental,@grafana/plugins-platform-backend,false,false,false
|
||||
elasticsearchImprovedParsing,experimental,@grafana/aws-datasources,false,false,false
|
||||
datasourceConnectionsTab,experimental,@grafana/plugins-platform-backend,false,false,true
|
||||
fetchRulesUsingPost,experimental,@grafana/alerting-squad,false,false,false
|
||||
alertingAlertmanagerExtraDedupStage,experimental,@grafana/alerting-squad,false,true,false
|
||||
alertingAlertmanagerExtraDedupStageStopPipeline,experimental,@grafana/alerting-squad,false,true,false
|
||||
|
|
@ -954,4 +954,12 @@ const (
|
||||
// FlagFetchRulesUsingPost
|
||||
// Use a POST request to list rules by passing down the namespaces user has access to
|
||||
FlagFetchRulesUsingPost = "fetchRulesUsingPost"
|
||||
|
||||
// FlagAlertingAlertmanagerExtraDedupStage
|
||||
// enables extra deduplication stage in alertmanager that checks that timestamps of the pipeline and the current state are matching
|
||||
FlagAlertingAlertmanagerExtraDedupStage = "alertingAlertmanagerExtraDedupStage"
|
||||
|
||||
// FlagAlertingAlertmanagerExtraDedupStageStopPipeline
|
||||
// works together with alertingAlertmanagerExtraDedupStage, if enabled, it will stop the pipeline if the timestamps are not matching. Otherwise, it will emit a warning
|
||||
FlagAlertingAlertmanagerExtraDedupStageStopPipeline = "alertingAlertmanagerExtraDedupStageStopPipeline"
|
||||
)
|
||||
|
@ -143,6 +143,36 @@
|
||||
"codeowner": "@grafana/alerting-squad"
|
||||
}
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"name": "alertingAlertmanagerExtraDedupStage",
|
||||
"resourceVersion": "1738251165994",
|
||||
"creationTimestamp": "2025-01-30T15:32:45Z"
|
||||
},
|
||||
"spec": {
|
||||
"description": "enables extra deduplication stage in alertmanager that checks that timestamps of the pipeline and the current state are matching",
|
||||
"stage": "experimental",
|
||||
"codeowner": "@grafana/alerting-squad",
|
||||
"requiresRestart": true,
|
||||
"hideFromAdminPage": true,
|
||||
"hideFromDocs": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"name": "alertingAlertmanagerExtraDedupStageStopPipeline",
|
||||
"resourceVersion": "1738251165994",
|
||||
"creationTimestamp": "2025-01-30T15:32:45Z"
|
||||
},
|
||||
"spec": {
|
||||
"description": "works together with alertingAlertmanagerExtraDedupStage, if enabled, it will stop the pipeline if the timestamps are not matching. Otherwise, it will emit a warning",
|
||||
"stage": "experimental",
|
||||
"codeowner": "@grafana/alerting-squad",
|
||||
"requiresRestart": true,
|
||||
"hideFromAdminPage": true,
|
||||
"hideFromDocs": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"name": "alertingApiServer",
|
||||
|
@ -10,6 +10,7 @@ import (
|
||||
"time"
|
||||
|
||||
alertingNotify "github.com/grafana/alerting/notify"
|
||||
"github.com/grafana/alerting/notify/stages"
|
||||
"github.com/grafana/alerting/receivers"
|
||||
alertingTemplates "github.com/grafana/alerting/templates"
|
||||
"github.com/prometheus/alertmanager/config"
|
||||
@ -17,6 +18,7 @@ import (
|
||||
amv2 "github.com/prometheus/alertmanager/api/v2/models"
|
||||
|
||||
"github.com/grafana/grafana/pkg/infra/log"
|
||||
"github.com/grafana/grafana/pkg/services/featuremgmt"
|
||||
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
|
||||
ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||
@ -91,7 +93,7 @@ func (m maintenanceOptions) MaintenanceFunc(state alertingNotify.State) (int64,
|
||||
|
||||
func NewAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store AlertingStore, stateStore stateStore,
|
||||
peer alertingNotify.ClusterPeer, decryptFn alertingNotify.GetDecryptedValueFn, ns notifications.Service,
|
||||
m *metrics.Alertmanager, withAutogen bool,
|
||||
m *metrics.Alertmanager, featureToggles featuremgmt.FeatureToggles,
|
||||
) (*alertmanager, error) {
|
||||
nflog, err := stateStore.GetNotificationLog(ctx)
|
||||
if err != nil {
|
||||
@ -121,6 +123,16 @@ func NewAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store A
|
||||
return stateStore.SaveNotificationLog(context.Background(), state)
|
||||
},
|
||||
}
|
||||
l := log.New("ngalert.notifier.alertmanager", "org", orgID)
|
||||
action := stages.Disabled
|
||||
if featureToggles.IsEnabledGlobally(featuremgmt.FlagAlertingAlertmanagerExtraDedupStage) {
|
||||
if featureToggles.IsEnabledGlobally(featuremgmt.FlagAlertingAlertmanagerExtraDedupStageStopPipeline) {
|
||||
action = stages.StopPipeline
|
||||
} else {
|
||||
action = stages.LogOnly
|
||||
}
|
||||
l.Info("Initializing Alertmanager", "extra_dedup_stage", action)
|
||||
}
|
||||
|
||||
amcfg := &alertingNotify.GrafanaAlertmanagerConfig{
|
||||
ExternalURL: cfg.AppURL,
|
||||
@ -132,9 +144,9 @@ func NewAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store A
|
||||
MaxSilences: cfg.UnifiedAlerting.AlertmanagerMaxSilencesCount,
|
||||
MaxSilenceSizeBytes: cfg.UnifiedAlerting.AlertmanagerMaxSilenceSizeBytes,
|
||||
},
|
||||
PipelineAndStateTimestampsMismatchAction: action,
|
||||
}
|
||||
|
||||
l := log.New("ngalert.notifier.alertmanager", "org", orgID)
|
||||
gam, err := alertingNotify.NewGrafanaAlertmanager("orgID", orgID, amcfg, peer, l, alertingNotify.NewGrafanaAlertmanagerMetrics(m.Registerer, l))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@ -152,7 +164,7 @@ func NewAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store A
|
||||
logger: l,
|
||||
|
||||
// TODO: Preferably, logic around autogen would be outside of the specific alertmanager implementation so that remote alertmanager will get it for free.
|
||||
withAutogen: withAutogen,
|
||||
withAutogen: featureToggles.IsEnabled(ctx, featuremgmt.FlagAlertingSimplifiedRouting),
|
||||
}
|
||||
|
||||
return am, nil
|
||||
|
@ -11,6 +11,7 @@ import (
|
||||
"github.com/grafana/grafana/pkg/infra/db"
|
||||
"github.com/grafana/grafana/pkg/infra/log"
|
||||
"github.com/grafana/grafana/pkg/services/dashboards"
|
||||
"github.com/grafana/grafana/pkg/services/featuremgmt"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/store"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/tests/fakes"
|
||||
@ -52,7 +53,7 @@ func setupAMTest(t *testing.T) *alertmanager {
|
||||
orgID := 1
|
||||
stateStore := NewFileStore(int64(orgID), kvStore)
|
||||
|
||||
am, err := NewAlertmanager(context.Background(), 1, cfg, s, stateStore, &NilPeer{}, decryptFn, nil, m, false)
|
||||
am, err := NewAlertmanager(context.Background(), 1, cfg, s, stateStore, &NilPeer{}, decryptFn, nil, m, featuremgmt.WithFeatures())
|
||||
require.NoError(t, err)
|
||||
return am
|
||||
}
|
||||
|
@ -160,7 +160,7 @@ func NewMultiOrgAlertmanager(
|
||||
moa.factory = func(ctx context.Context, orgID int64) (Alertmanager, error) {
|
||||
m := metrics.NewAlertmanagerMetrics(moa.metrics.GetOrCreateOrgRegistry(orgID), l)
|
||||
stateStore := NewFileStore(orgID, kvStore)
|
||||
return NewAlertmanager(ctx, orgID, moa.settings, moa.configStore, stateStore, moa.peer, moa.decryptFn, moa.ns, m, featureManager.IsEnabled(ctx, featuremgmt.FlagAlertingSimplifiedRouting))
|
||||
return NewAlertmanager(ctx, orgID, moa.settings, moa.configStore, stateStore, moa.peer, moa.decryptFn, moa.ns, m, featureManager)
|
||||
}
|
||||
|
||||
for _, opt := range opts {
|
||||
|
@ -170,7 +170,7 @@ require (
|
||||
github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect
|
||||
github.com/googleapis/gax-go/v2 v2.14.1 // indirect
|
||||
github.com/gorilla/mux v1.8.1 // indirect
|
||||
github.com/grafana/alerting v0.0.0-20250129195454-3e5b80036b7a // indirect
|
||||
github.com/grafana/alerting v0.0.0-20250130152446-d49e2e0b7d65 // indirect
|
||||
github.com/grafana/authlib v0.0.0-20250123104008-e99947858901 // indirect
|
||||
github.com/grafana/dataplane/sdata v0.0.9 // indirect
|
||||
github.com/grafana/dskit v0.0.0-20241105154643-a6b453a88040 // indirect
|
||||
|
@ -547,8 +547,8 @@ github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
|
||||
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
|
||||
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
|
||||
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
|
||||
github.com/grafana/alerting v0.0.0-20250129195454-3e5b80036b7a h1:44E+I3EPdh/W02Uyfyig86EJKPjvzcF3y0A+FEi1fBk=
|
||||
github.com/grafana/alerting v0.0.0-20250129195454-3e5b80036b7a/go.mod h1:QsnoKX/iYZxA4Cv+H+wC7uxutBD8qi8ZW5UJvD2TYmU=
|
||||
github.com/grafana/alerting v0.0.0-20250130152446-d49e2e0b7d65 h1:dmsycYQzl5JexuV8UxQpT3B79maSvhiIahid4/tezAM=
|
||||
github.com/grafana/alerting v0.0.0-20250130152446-d49e2e0b7d65/go.mod h1:QsnoKX/iYZxA4Cv+H+wC7uxutBD8qi8ZW5UJvD2TYmU=
|
||||
github.com/grafana/authlib v0.0.0-20250123104008-e99947858901 h1:nqV1YrtX+ZG+EYB5dcmFMWhg2Y038OMaAHAADbOC9RA=
|
||||
github.com/grafana/authlib v0.0.0-20250123104008-e99947858901/go.mod h1:/gYfphsNu9v1qYWXxpv1NSvMEMSwvdf8qb8YlgwIRl8=
|
||||
github.com/grafana/authlib/types v0.0.0-20250120145936-5f0e28e7a87c h1:b0sPDtt33uFdmvUJjSCld3kwE2E49dUvevuUDSJsEuo=
|
||||
|
@ -115,7 +115,7 @@ require (
|
||||
github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect
|
||||
github.com/googleapis/gax-go/v2 v2.14.1 // indirect
|
||||
github.com/gorilla/mux v1.8.1 // indirect
|
||||
github.com/grafana/alerting v0.0.0-20250129195454-3e5b80036b7a // indirect
|
||||
github.com/grafana/alerting v0.0.0-20250130152446-d49e2e0b7d65 // indirect
|
||||
github.com/grafana/dataplane/sdata v0.0.9 // indirect
|
||||
github.com/grafana/grafana-app-sdk/logging v0.30.0 // indirect
|
||||
github.com/grafana/grafana-aws-sdk v0.31.5 // indirect
|
||||
|
@ -403,8 +403,8 @@ github.com/gorilla/mux v1.6.2/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2z
|
||||
github.com/gorilla/mux v1.7.1/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs=
|
||||
github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY=
|
||||
github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ=
|
||||
github.com/grafana/alerting v0.0.0-20250129195454-3e5b80036b7a h1:44E+I3EPdh/W02Uyfyig86EJKPjvzcF3y0A+FEi1fBk=
|
||||
github.com/grafana/alerting v0.0.0-20250129195454-3e5b80036b7a/go.mod h1:QsnoKX/iYZxA4Cv+H+wC7uxutBD8qi8ZW5UJvD2TYmU=
|
||||
github.com/grafana/alerting v0.0.0-20250130152446-d49e2e0b7d65 h1:dmsycYQzl5JexuV8UxQpT3B79maSvhiIahid4/tezAM=
|
||||
github.com/grafana/alerting v0.0.0-20250130152446-d49e2e0b7d65/go.mod h1:QsnoKX/iYZxA4Cv+H+wC7uxutBD8qi8ZW5UJvD2TYmU=
|
||||
github.com/grafana/authlib v0.0.0-20250123104008-e99947858901 h1:nqV1YrtX+ZG+EYB5dcmFMWhg2Y038OMaAHAADbOC9RA=
|
||||
github.com/grafana/authlib v0.0.0-20250123104008-e99947858901/go.mod h1:/gYfphsNu9v1qYWXxpv1NSvMEMSwvdf8qb8YlgwIRl8=
|
||||
github.com/grafana/authlib/types v0.0.0-20250120145936-5f0e28e7a87c h1:b0sPDtt33uFdmvUJjSCld3kwE2E49dUvevuUDSJsEuo=
|
||||
|
Reference in New Issue
Block a user