diff --git a/pkg/services/ngalert/ngalert.go b/pkg/services/ngalert/ngalert.go index bdad7dd76ce..8baab25f734 100644 --- a/pkg/services/ngalert/ngalert.go +++ b/pkg/services/ngalert/ngalert.go @@ -200,11 +200,9 @@ func (ng *AlertNG) init() error { AlertSender: alertsRouter, } - var history state.Historian - if ng.Cfg.UnifiedAlerting.StateHistory.Enabled { - history = historian.NewAnnotationHistorian(ng.annotationsRepo, ng.dashboardService) - } else { - history = historian.NewNopHistorian() + history, err := configureHistorianBackend(ng.Cfg.UnifiedAlerting.StateHistory, ng.annotationsRepo, ng.dashboardService) + if err != nil { + return err } stateManager := state.NewManager(ng.Metrics.GetStateMetrics(), appUrl, store, ng.imageService, clk, history) scheduler := schedule.NewScheduler(schedCfg, stateManager) @@ -365,3 +363,21 @@ func readQuotaConfig(cfg *setting.Cfg) (*quota.Map, error) { limits.Set(orgQuotaTag, alertOrgQuota) return limits, nil } + +func configureHistorianBackend(cfg setting.UnifiedAlertingStateHistorySettings, ar annotations.Repository, ds dashboards.DashboardService) (state.Historian, error) { + if !cfg.Enabled { + return historian.NewNopHistorian(), nil + } + + if cfg.Backend == "annotations" { + return historian.NewAnnotationBackend(ar, ds), nil + } + if cfg.Backend == "loki" { + return historian.NewRemoteLokiBackend(), nil + } + if cfg.Backend == "sql" { + return historian.NewSqlBackend(), nil + } + + return nil, fmt.Errorf("unrecognized state history backend: %s", cfg.Backend) +} diff --git a/pkg/services/ngalert/state/historian/annotation.go b/pkg/services/ngalert/state/historian/annotation.go index a771ba0db0b..b49146ad424 100644 --- a/pkg/services/ngalert/state/historian/annotation.go +++ b/pkg/services/ngalert/state/historian/annotation.go @@ -4,12 +4,9 @@ import ( "context" "fmt" "sort" - "strconv" "strings" "time" - "github.com/grafana/grafana-plugin-sdk-go/data" - "github.com/grafana/grafana/pkg/components/simplejson" "github.com/grafana/grafana/pkg/infra/log" "github.com/grafana/grafana/pkg/services/annotations" @@ -19,15 +16,15 @@ import ( "github.com/grafana/grafana/pkg/services/ngalert/state" ) -// AnnotationStateHistorian is an implementation of state.Historian that uses Grafana Annotations as the backing datastore. -type AnnotationStateHistorian struct { +// AnnotationBackend is an implementation of state.Historian that uses Grafana Annotations as the backing datastore. +type AnnotationBackend struct { annotations annotations.Repository dashboards *dashboardResolver log log.Logger } -func NewAnnotationHistorian(annotations annotations.Repository, dashboards dashboards.DashboardService) *AnnotationStateHistorian { - return &AnnotationStateHistorian{ +func NewAnnotationBackend(annotations annotations.Repository, dashboards dashboards.DashboardService) *AnnotationBackend { + return &AnnotationBackend{ annotations: annotations, dashboards: newDashboardResolver(dashboards, defaultDashboardCacheExpiry), log: log.New("ngalert.state.historian"), @@ -35,7 +32,7 @@ func NewAnnotationHistorian(annotations annotations.Repository, dashboards dashb } // RecordStates writes a number of state transitions for a given rule to state history. -func (h *AnnotationStateHistorian) RecordStatesAsync(ctx context.Context, rule *ngmodels.AlertRule, states []state.StateTransition) { +func (h *AnnotationBackend) RecordStatesAsync(ctx context.Context, rule *ngmodels.AlertRule, states []state.StateTransition) { logger := h.log.FromContext(ctx) // Build annotations before starting goroutine, to make sure all data is copied and won't mutate underneath us. annotations := h.buildAnnotations(rule, states, logger) @@ -43,10 +40,10 @@ func (h *AnnotationStateHistorian) RecordStatesAsync(ctx context.Context, rule * go h.recordAnnotationsSync(ctx, panel, annotations, logger) } -func (h *AnnotationStateHistorian) buildAnnotations(rule *ngmodels.AlertRule, states []state.StateTransition, logger log.Logger) []annotations.Item { +func (h *AnnotationBackend) buildAnnotations(rule *ngmodels.AlertRule, states []state.StateTransition, logger log.Logger) []annotations.Item { items := make([]annotations.Item, 0, len(states)) for _, state := range states { - if !shouldAnnotate(state) { + if !shouldRecord(state) { continue } logger.Debug("Alert state changed creating annotation", "newState", state.Formatted(), "oldState", state.PreviousFormatted()) @@ -68,33 +65,7 @@ func (h *AnnotationStateHistorian) buildAnnotations(rule *ngmodels.AlertRule, st return items } -// panelKey uniquely identifies a panel. -type panelKey struct { - orgID int64 - dashUID string - panelID int64 -} - -// panelKey attempts to get the key of the panel attached to the given rule. Returns nil if the rule is not attached to a panel. -func parsePanelKey(rule *ngmodels.AlertRule, logger log.Logger) *panelKey { - dashUID, ok := rule.Annotations[ngmodels.DashboardUIDAnnotation] - if ok { - panelAnno := rule.Annotations[ngmodels.PanelIDAnnotation] - panelID, err := strconv.ParseInt(panelAnno, 10, 64) - if err != nil { - logger.Error("Error parsing panelUID for alert annotation", "actual", panelAnno, "error", err) - return nil - } - return &panelKey{ - orgID: rule.OrgID, - dashUID: dashUID, - panelID: panelID, - } - } - return nil -} - -func (h *AnnotationStateHistorian) recordAnnotationsSync(ctx context.Context, panel *panelKey, annotations []annotations.Item, logger log.Logger) { +func (h *AnnotationBackend) recordAnnotationsSync(ctx context.Context, panel *panelKey, annotations []annotations.Item, logger log.Logger) { if panel != nil { dashID, err := h.dashboards.getID(ctx, panel.orgID, panel.dashUID) if err != nil { @@ -148,21 +119,3 @@ func buildAnnotationTextAndData(rule *ngmodels.AlertRule, currentState *state.St labels := removePrivateLabels(currentState.Labels) return fmt.Sprintf("%s {%s} - %s", rule.Title, labels.String(), value), jsonData } - -func removePrivateLabels(labels data.Labels) data.Labels { - result := make(data.Labels) - for k, v := range labels { - if !strings.HasPrefix(k, "__") && !strings.HasSuffix(k, "__") { - result[k] = v - } - } - return result -} - -func shouldAnnotate(transition state.StateTransition) bool { - // Do not log not transitioned states normal states if it was marked as stale - if !transition.Changed() || transition.StateReason == ngmodels.StateReasonMissingSeries && transition.PreviousState == eval.Normal && transition.State.State == eval.Normal { - return false - } - return true -} diff --git a/pkg/services/ngalert/state/historian/core.go b/pkg/services/ngalert/state/historian/core.go new file mode 100644 index 00000000000..eef8e7e7236 --- /dev/null +++ b/pkg/services/ngalert/state/historian/core.go @@ -0,0 +1,56 @@ +package historian + +import ( + "strconv" + "strings" + + "github.com/grafana/grafana-plugin-sdk-go/data" + "github.com/grafana/grafana/pkg/infra/log" + "github.com/grafana/grafana/pkg/services/ngalert/eval" + "github.com/grafana/grafana/pkg/services/ngalert/models" + "github.com/grafana/grafana/pkg/services/ngalert/state" +) + +func shouldRecord(transition state.StateTransition) bool { + // Do not log not transitioned states normal states if it was marked as stale + if !transition.Changed() || transition.StateReason == models.StateReasonMissingSeries && transition.PreviousState == eval.Normal && transition.State.State == eval.Normal { + return false + } + return true +} + +func removePrivateLabels(labels data.Labels) data.Labels { + result := make(data.Labels) + for k, v := range labels { + if !strings.HasPrefix(k, "__") && !strings.HasSuffix(k, "__") { + result[k] = v + } + } + return result +} + +// panelKey uniquely identifies a panel. +type panelKey struct { + orgID int64 + dashUID string + panelID int64 +} + +// panelKey attempts to get the key of the panel attached to the given rule. Returns nil if the rule is not attached to a panel. +func parsePanelKey(rule *models.AlertRule, logger log.Logger) *panelKey { + dashUID, ok := rule.Annotations[models.DashboardUIDAnnotation] + if ok { + panelAnno := rule.Annotations[models.PanelIDAnnotation] + panelID, err := strconv.ParseInt(panelAnno, 10, 64) + if err != nil { + logger.Error("Error parsing panelUID for alert annotation", "actual", panelAnno, "error", err) + return nil + } + return &panelKey{ + orgID: rule.OrgID, + dashUID: dashUID, + panelID: panelID, + } + } + return nil +} diff --git a/pkg/services/ngalert/state/historian/annotation_test.go b/pkg/services/ngalert/state/historian/core_test.go similarity index 58% rename from pkg/services/ngalert/state/historian/annotation_test.go rename to pkg/services/ngalert/state/historian/core_test.go index eb008035c3c..b9a2cfd1108 100644 --- a/pkg/services/ngalert/state/historian/annotation_test.go +++ b/pkg/services/ngalert/state/historian/core_test.go @@ -6,12 +6,14 @@ import ( "github.com/stretchr/testify/require" + "github.com/grafana/grafana-plugin-sdk-go/data" + "github.com/grafana/grafana/pkg/infra/log" "github.com/grafana/grafana/pkg/services/ngalert/eval" "github.com/grafana/grafana/pkg/services/ngalert/models" "github.com/grafana/grafana/pkg/services/ngalert/state" ) -func TestShouldAnnotate(t *testing.T) { +func TestShouldRecord(t *testing.T) { allStates := []eval.State{ eval.Normal, eval.Alerting, @@ -94,7 +96,126 @@ func TestShouldAnnotate(t *testing.T) { } t.Run(fmt.Sprintf("%s -> %s should be %v", trans.PreviousFormatted(), trans.Formatted(), !ok), func(t *testing.T) { - require.Equal(t, !ok, shouldAnnotate(trans)) + require.Equal(t, !ok, shouldRecord(trans)) + }) + } +} + +func TestRemovePrivateLabels(t *testing.T) { + type testCase struct { + name string + in data.Labels + exp data.Labels + } + + cases := []testCase{ + { + name: "empty", + in: map[string]string{}, + exp: map[string]string{}, + }, + { + name: "nil", + in: nil, + exp: map[string]string{}, + }, + { + name: "prefix", + in: map[string]string{"__asdf": "one", "b": "c"}, + exp: map[string]string{"b": "c"}, + }, + { + name: "suffix", + in: map[string]string{"asdf__": "one", "b": "c"}, + exp: map[string]string{"b": "c"}, + }, + { + name: "both", + in: map[string]string{"__asdf__": "one", "b": "c"}, + exp: map[string]string{"b": "c"}, + }, + { + name: "all", + in: map[string]string{"__a__": "a", "__b": "b", "c__": "c"}, + exp: map[string]string{}, + }, + { + name: "whitespace", + in: map[string]string{" __asdf__ ": "one", "b": "c"}, + exp: map[string]string{" __asdf__ ": "one", "b": "c"}, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + res := removePrivateLabels(tc.in) + require.Equal(t, tc.exp, res) + }) + } +} + +func TestParsePanelKey(t *testing.T) { + logger := log.NewNopLogger() + + type testCase struct { + name string + in models.AlertRule + exp *panelKey + } + + cases := []testCase{ + { + name: "no dash UID", + in: models.AlertRule{ + OrgID: 1, + Annotations: map[string]string{ + models.PanelIDAnnotation: "123", + }, + }, + exp: nil, + }, + { + name: "no panel ID", + in: models.AlertRule{ + OrgID: 1, + Annotations: map[string]string{ + models.DashboardUIDAnnotation: "abcd-uid", + }, + }, + exp: nil, + }, + { + name: "invalid panel ID", + in: models.AlertRule{ + OrgID: 1, + Annotations: map[string]string{ + models.DashboardUIDAnnotation: "abcd-uid", + models.PanelIDAnnotation: "bad-id", + }, + }, + exp: nil, + }, + { + name: "success", + in: models.AlertRule{ + OrgID: 1, + Annotations: map[string]string{ + models.DashboardUIDAnnotation: "abcd-uid", + models.PanelIDAnnotation: "123", + }, + }, + exp: &panelKey{ + orgID: 1, + dashUID: "abcd-uid", + panelID: 123, + }, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + res := parsePanelKey(&tc.in, logger) + require.Equal(t, tc.exp, res) }) } } diff --git a/pkg/services/ngalert/state/historian/loki.go b/pkg/services/ngalert/state/historian/loki.go new file mode 100644 index 00000000000..a1ebacddc06 --- /dev/null +++ b/pkg/services/ngalert/state/historian/loki.go @@ -0,0 +1,22 @@ +package historian + +import ( + "context" + + "github.com/grafana/grafana/pkg/infra/log" + "github.com/grafana/grafana/pkg/services/ngalert/models" + "github.com/grafana/grafana/pkg/services/ngalert/state" +) + +type RemoteLokiBackend struct { + log log.Logger +} + +func NewRemoteLokiBackend() *RemoteLokiBackend { + return &RemoteLokiBackend{ + log: log.New("ngalert.state.historian"), + } +} + +func (h *RemoteLokiBackend) RecordStatesAsync(ctx context.Context, _ *models.AlertRule, _ []state.StateTransition) { +} diff --git a/pkg/services/ngalert/state/historian/sql.go b/pkg/services/ngalert/state/historian/sql.go new file mode 100644 index 00000000000..6e03f277044 --- /dev/null +++ b/pkg/services/ngalert/state/historian/sql.go @@ -0,0 +1,22 @@ +package historian + +import ( + "context" + + "github.com/grafana/grafana/pkg/infra/log" + "github.com/grafana/grafana/pkg/services/ngalert/models" + "github.com/grafana/grafana/pkg/services/ngalert/state" +) + +type SqlBackend struct { + log log.Logger +} + +func NewSqlBackend() *SqlBackend { + return &SqlBackend{ + log: log.New("ngalert.state.historian"), + } +} + +func (h *SqlBackend) RecordStatesAsync(ctx context.Context, _ *models.AlertRule, _ []state.StateTransition) { +} diff --git a/pkg/services/ngalert/state/manager_test.go b/pkg/services/ngalert/state/manager_test.go index 09f04ea7b29..bed9abc09f2 100644 --- a/pkg/services/ngalert/state/manager_test.go +++ b/pkg/services/ngalert/state/manager_test.go @@ -210,7 +210,7 @@ func TestDashboardAnnotations(t *testing.T) { _, dbstore := tests.SetupTestEnv(t, 1) fakeAnnoRepo := annotationstest.NewFakeAnnotationsRepo() - hist := historian.NewAnnotationHistorian(fakeAnnoRepo, &dashboards.FakeDashboardService{}) + hist := historian.NewAnnotationBackend(fakeAnnoRepo, &dashboards.FakeDashboardService{}) st := state.NewManager(testMetrics.GetStateMetrics(), nil, dbstore, &state.NoopImageService{}, clock.New(), hist) const mainOrgID int64 = 1 @@ -2191,7 +2191,7 @@ func TestProcessEvalResults(t *testing.T) { for _, tc := range testCases { fakeAnnoRepo := annotationstest.NewFakeAnnotationsRepo() - hist := historian.NewAnnotationHistorian(fakeAnnoRepo, &dashboards.FakeDashboardService{}) + hist := historian.NewAnnotationBackend(fakeAnnoRepo, &dashboards.FakeDashboardService{}) st := state.NewManager(testMetrics.GetStateMetrics(), nil, &state.FakeInstanceStore{}, &state.NotAvailableImageService{}, clock.New(), hist) t.Run(tc.desc, func(t *testing.T) { for _, res := range tc.evalResults { diff --git a/pkg/setting/setting_unified_alerting.go b/pkg/setting/setting_unified_alerting.go index b134e1810d9..b9441ca5d5a 100644 --- a/pkg/setting/setting_unified_alerting.go +++ b/pkg/setting/setting_unified_alerting.go @@ -102,6 +102,7 @@ type UnifiedAlertingReservedLabelSettings struct { type UnifiedAlertingStateHistorySettings struct { Enabled bool + Backend string } // IsEnabled returns true if UnifiedAlertingSettings.Enabled is either nil or true. @@ -313,6 +314,7 @@ func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error { stateHistory := iniFile.Section("unified_alerting.state_history") uaCfgStateHistory := UnifiedAlertingStateHistorySettings{ Enabled: stateHistory.Key("enabled").MustBool(stateHistoryDefaultEnabled), + Backend: stateHistory.Key("backend").MustString("annotations"), } uaCfg.StateHistory = uaCfgStateHistory