mirror of
https://github.com/grafana/grafana.git
synced 2025-07-30 21:53:00 +08:00
Alerting: Add un-documented toggle for changing state history backend, add shells for remote loki and sql (#61072)
* Add toggle for state history backend and shells * Extract some shared logic and add tests
This commit is contained in:
@ -200,11 +200,9 @@ func (ng *AlertNG) init() error {
|
||||
AlertSender: alertsRouter,
|
||||
}
|
||||
|
||||
var history state.Historian
|
||||
if ng.Cfg.UnifiedAlerting.StateHistory.Enabled {
|
||||
history = historian.NewAnnotationHistorian(ng.annotationsRepo, ng.dashboardService)
|
||||
} else {
|
||||
history = historian.NewNopHistorian()
|
||||
history, err := configureHistorianBackend(ng.Cfg.UnifiedAlerting.StateHistory, ng.annotationsRepo, ng.dashboardService)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stateManager := state.NewManager(ng.Metrics.GetStateMetrics(), appUrl, store, ng.imageService, clk, history)
|
||||
scheduler := schedule.NewScheduler(schedCfg, stateManager)
|
||||
@ -365,3 +363,21 @@ func readQuotaConfig(cfg *setting.Cfg) (*quota.Map, error) {
|
||||
limits.Set(orgQuotaTag, alertOrgQuota)
|
||||
return limits, nil
|
||||
}
|
||||
|
||||
func configureHistorianBackend(cfg setting.UnifiedAlertingStateHistorySettings, ar annotations.Repository, ds dashboards.DashboardService) (state.Historian, error) {
|
||||
if !cfg.Enabled {
|
||||
return historian.NewNopHistorian(), nil
|
||||
}
|
||||
|
||||
if cfg.Backend == "annotations" {
|
||||
return historian.NewAnnotationBackend(ar, ds), nil
|
||||
}
|
||||
if cfg.Backend == "loki" {
|
||||
return historian.NewRemoteLokiBackend(), nil
|
||||
}
|
||||
if cfg.Backend == "sql" {
|
||||
return historian.NewSqlBackend(), nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("unrecognized state history backend: %s", cfg.Backend)
|
||||
}
|
||||
|
@ -4,12 +4,9 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/grafana/grafana-plugin-sdk-go/data"
|
||||
|
||||
"github.com/grafana/grafana/pkg/components/simplejson"
|
||||
"github.com/grafana/grafana/pkg/infra/log"
|
||||
"github.com/grafana/grafana/pkg/services/annotations"
|
||||
@ -19,15 +16,15 @@ import (
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/state"
|
||||
)
|
||||
|
||||
// AnnotationStateHistorian is an implementation of state.Historian that uses Grafana Annotations as the backing datastore.
|
||||
type AnnotationStateHistorian struct {
|
||||
// AnnotationBackend is an implementation of state.Historian that uses Grafana Annotations as the backing datastore.
|
||||
type AnnotationBackend struct {
|
||||
annotations annotations.Repository
|
||||
dashboards *dashboardResolver
|
||||
log log.Logger
|
||||
}
|
||||
|
||||
func NewAnnotationHistorian(annotations annotations.Repository, dashboards dashboards.DashboardService) *AnnotationStateHistorian {
|
||||
return &AnnotationStateHistorian{
|
||||
func NewAnnotationBackend(annotations annotations.Repository, dashboards dashboards.DashboardService) *AnnotationBackend {
|
||||
return &AnnotationBackend{
|
||||
annotations: annotations,
|
||||
dashboards: newDashboardResolver(dashboards, defaultDashboardCacheExpiry),
|
||||
log: log.New("ngalert.state.historian"),
|
||||
@ -35,7 +32,7 @@ func NewAnnotationHistorian(annotations annotations.Repository, dashboards dashb
|
||||
}
|
||||
|
||||
// RecordStates writes a number of state transitions for a given rule to state history.
|
||||
func (h *AnnotationStateHistorian) RecordStatesAsync(ctx context.Context, rule *ngmodels.AlertRule, states []state.StateTransition) {
|
||||
func (h *AnnotationBackend) RecordStatesAsync(ctx context.Context, rule *ngmodels.AlertRule, states []state.StateTransition) {
|
||||
logger := h.log.FromContext(ctx)
|
||||
// Build annotations before starting goroutine, to make sure all data is copied and won't mutate underneath us.
|
||||
annotations := h.buildAnnotations(rule, states, logger)
|
||||
@ -43,10 +40,10 @@ func (h *AnnotationStateHistorian) RecordStatesAsync(ctx context.Context, rule *
|
||||
go h.recordAnnotationsSync(ctx, panel, annotations, logger)
|
||||
}
|
||||
|
||||
func (h *AnnotationStateHistorian) buildAnnotations(rule *ngmodels.AlertRule, states []state.StateTransition, logger log.Logger) []annotations.Item {
|
||||
func (h *AnnotationBackend) buildAnnotations(rule *ngmodels.AlertRule, states []state.StateTransition, logger log.Logger) []annotations.Item {
|
||||
items := make([]annotations.Item, 0, len(states))
|
||||
for _, state := range states {
|
||||
if !shouldAnnotate(state) {
|
||||
if !shouldRecord(state) {
|
||||
continue
|
||||
}
|
||||
logger.Debug("Alert state changed creating annotation", "newState", state.Formatted(), "oldState", state.PreviousFormatted())
|
||||
@ -68,33 +65,7 @@ func (h *AnnotationStateHistorian) buildAnnotations(rule *ngmodels.AlertRule, st
|
||||
return items
|
||||
}
|
||||
|
||||
// panelKey uniquely identifies a panel.
|
||||
type panelKey struct {
|
||||
orgID int64
|
||||
dashUID string
|
||||
panelID int64
|
||||
}
|
||||
|
||||
// panelKey attempts to get the key of the panel attached to the given rule. Returns nil if the rule is not attached to a panel.
|
||||
func parsePanelKey(rule *ngmodels.AlertRule, logger log.Logger) *panelKey {
|
||||
dashUID, ok := rule.Annotations[ngmodels.DashboardUIDAnnotation]
|
||||
if ok {
|
||||
panelAnno := rule.Annotations[ngmodels.PanelIDAnnotation]
|
||||
panelID, err := strconv.ParseInt(panelAnno, 10, 64)
|
||||
if err != nil {
|
||||
logger.Error("Error parsing panelUID for alert annotation", "actual", panelAnno, "error", err)
|
||||
return nil
|
||||
}
|
||||
return &panelKey{
|
||||
orgID: rule.OrgID,
|
||||
dashUID: dashUID,
|
||||
panelID: panelID,
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (h *AnnotationStateHistorian) recordAnnotationsSync(ctx context.Context, panel *panelKey, annotations []annotations.Item, logger log.Logger) {
|
||||
func (h *AnnotationBackend) recordAnnotationsSync(ctx context.Context, panel *panelKey, annotations []annotations.Item, logger log.Logger) {
|
||||
if panel != nil {
|
||||
dashID, err := h.dashboards.getID(ctx, panel.orgID, panel.dashUID)
|
||||
if err != nil {
|
||||
@ -148,21 +119,3 @@ func buildAnnotationTextAndData(rule *ngmodels.AlertRule, currentState *state.St
|
||||
labels := removePrivateLabels(currentState.Labels)
|
||||
return fmt.Sprintf("%s {%s} - %s", rule.Title, labels.String(), value), jsonData
|
||||
}
|
||||
|
||||
func removePrivateLabels(labels data.Labels) data.Labels {
|
||||
result := make(data.Labels)
|
||||
for k, v := range labels {
|
||||
if !strings.HasPrefix(k, "__") && !strings.HasSuffix(k, "__") {
|
||||
result[k] = v
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func shouldAnnotate(transition state.StateTransition) bool {
|
||||
// Do not log not transitioned states normal states if it was marked as stale
|
||||
if !transition.Changed() || transition.StateReason == ngmodels.StateReasonMissingSeries && transition.PreviousState == eval.Normal && transition.State.State == eval.Normal {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
56
pkg/services/ngalert/state/historian/core.go
Normal file
56
pkg/services/ngalert/state/historian/core.go
Normal file
@ -0,0 +1,56 @@
|
||||
package historian
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/grafana/grafana-plugin-sdk-go/data"
|
||||
"github.com/grafana/grafana/pkg/infra/log"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/state"
|
||||
)
|
||||
|
||||
func shouldRecord(transition state.StateTransition) bool {
|
||||
// Do not log not transitioned states normal states if it was marked as stale
|
||||
if !transition.Changed() || transition.StateReason == models.StateReasonMissingSeries && transition.PreviousState == eval.Normal && transition.State.State == eval.Normal {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func removePrivateLabels(labels data.Labels) data.Labels {
|
||||
result := make(data.Labels)
|
||||
for k, v := range labels {
|
||||
if !strings.HasPrefix(k, "__") && !strings.HasSuffix(k, "__") {
|
||||
result[k] = v
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// panelKey uniquely identifies a panel.
|
||||
type panelKey struct {
|
||||
orgID int64
|
||||
dashUID string
|
||||
panelID int64
|
||||
}
|
||||
|
||||
// panelKey attempts to get the key of the panel attached to the given rule. Returns nil if the rule is not attached to a panel.
|
||||
func parsePanelKey(rule *models.AlertRule, logger log.Logger) *panelKey {
|
||||
dashUID, ok := rule.Annotations[models.DashboardUIDAnnotation]
|
||||
if ok {
|
||||
panelAnno := rule.Annotations[models.PanelIDAnnotation]
|
||||
panelID, err := strconv.ParseInt(panelAnno, 10, 64)
|
||||
if err != nil {
|
||||
logger.Error("Error parsing panelUID for alert annotation", "actual", panelAnno, "error", err)
|
||||
return nil
|
||||
}
|
||||
return &panelKey{
|
||||
orgID: rule.OrgID,
|
||||
dashUID: dashUID,
|
||||
panelID: panelID,
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
@ -6,12 +6,14 @@ import (
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/grafana/grafana-plugin-sdk-go/data"
|
||||
"github.com/grafana/grafana/pkg/infra/log"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/eval"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/state"
|
||||
)
|
||||
|
||||
func TestShouldAnnotate(t *testing.T) {
|
||||
func TestShouldRecord(t *testing.T) {
|
||||
allStates := []eval.State{
|
||||
eval.Normal,
|
||||
eval.Alerting,
|
||||
@ -94,7 +96,126 @@ func TestShouldAnnotate(t *testing.T) {
|
||||
}
|
||||
|
||||
t.Run(fmt.Sprintf("%s -> %s should be %v", trans.PreviousFormatted(), trans.Formatted(), !ok), func(t *testing.T) {
|
||||
require.Equal(t, !ok, shouldAnnotate(trans))
|
||||
require.Equal(t, !ok, shouldRecord(trans))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRemovePrivateLabels(t *testing.T) {
|
||||
type testCase struct {
|
||||
name string
|
||||
in data.Labels
|
||||
exp data.Labels
|
||||
}
|
||||
|
||||
cases := []testCase{
|
||||
{
|
||||
name: "empty",
|
||||
in: map[string]string{},
|
||||
exp: map[string]string{},
|
||||
},
|
||||
{
|
||||
name: "nil",
|
||||
in: nil,
|
||||
exp: map[string]string{},
|
||||
},
|
||||
{
|
||||
name: "prefix",
|
||||
in: map[string]string{"__asdf": "one", "b": "c"},
|
||||
exp: map[string]string{"b": "c"},
|
||||
},
|
||||
{
|
||||
name: "suffix",
|
||||
in: map[string]string{"asdf__": "one", "b": "c"},
|
||||
exp: map[string]string{"b": "c"},
|
||||
},
|
||||
{
|
||||
name: "both",
|
||||
in: map[string]string{"__asdf__": "one", "b": "c"},
|
||||
exp: map[string]string{"b": "c"},
|
||||
},
|
||||
{
|
||||
name: "all",
|
||||
in: map[string]string{"__a__": "a", "__b": "b", "c__": "c"},
|
||||
exp: map[string]string{},
|
||||
},
|
||||
{
|
||||
name: "whitespace",
|
||||
in: map[string]string{" __asdf__ ": "one", "b": "c"},
|
||||
exp: map[string]string{" __asdf__ ": "one", "b": "c"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
res := removePrivateLabels(tc.in)
|
||||
require.Equal(t, tc.exp, res)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParsePanelKey(t *testing.T) {
|
||||
logger := log.NewNopLogger()
|
||||
|
||||
type testCase struct {
|
||||
name string
|
||||
in models.AlertRule
|
||||
exp *panelKey
|
||||
}
|
||||
|
||||
cases := []testCase{
|
||||
{
|
||||
name: "no dash UID",
|
||||
in: models.AlertRule{
|
||||
OrgID: 1,
|
||||
Annotations: map[string]string{
|
||||
models.PanelIDAnnotation: "123",
|
||||
},
|
||||
},
|
||||
exp: nil,
|
||||
},
|
||||
{
|
||||
name: "no panel ID",
|
||||
in: models.AlertRule{
|
||||
OrgID: 1,
|
||||
Annotations: map[string]string{
|
||||
models.DashboardUIDAnnotation: "abcd-uid",
|
||||
},
|
||||
},
|
||||
exp: nil,
|
||||
},
|
||||
{
|
||||
name: "invalid panel ID",
|
||||
in: models.AlertRule{
|
||||
OrgID: 1,
|
||||
Annotations: map[string]string{
|
||||
models.DashboardUIDAnnotation: "abcd-uid",
|
||||
models.PanelIDAnnotation: "bad-id",
|
||||
},
|
||||
},
|
||||
exp: nil,
|
||||
},
|
||||
{
|
||||
name: "success",
|
||||
in: models.AlertRule{
|
||||
OrgID: 1,
|
||||
Annotations: map[string]string{
|
||||
models.DashboardUIDAnnotation: "abcd-uid",
|
||||
models.PanelIDAnnotation: "123",
|
||||
},
|
||||
},
|
||||
exp: &panelKey{
|
||||
orgID: 1,
|
||||
dashUID: "abcd-uid",
|
||||
panelID: 123,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
res := parsePanelKey(&tc.in, logger)
|
||||
require.Equal(t, tc.exp, res)
|
||||
})
|
||||
}
|
||||
}
|
22
pkg/services/ngalert/state/historian/loki.go
Normal file
22
pkg/services/ngalert/state/historian/loki.go
Normal file
@ -0,0 +1,22 @@
|
||||
package historian
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/grafana/grafana/pkg/infra/log"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/state"
|
||||
)
|
||||
|
||||
type RemoteLokiBackend struct {
|
||||
log log.Logger
|
||||
}
|
||||
|
||||
func NewRemoteLokiBackend() *RemoteLokiBackend {
|
||||
return &RemoteLokiBackend{
|
||||
log: log.New("ngalert.state.historian"),
|
||||
}
|
||||
}
|
||||
|
||||
func (h *RemoteLokiBackend) RecordStatesAsync(ctx context.Context, _ *models.AlertRule, _ []state.StateTransition) {
|
||||
}
|
22
pkg/services/ngalert/state/historian/sql.go
Normal file
22
pkg/services/ngalert/state/historian/sql.go
Normal file
@ -0,0 +1,22 @@
|
||||
package historian
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/grafana/grafana/pkg/infra/log"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/models"
|
||||
"github.com/grafana/grafana/pkg/services/ngalert/state"
|
||||
)
|
||||
|
||||
type SqlBackend struct {
|
||||
log log.Logger
|
||||
}
|
||||
|
||||
func NewSqlBackend() *SqlBackend {
|
||||
return &SqlBackend{
|
||||
log: log.New("ngalert.state.historian"),
|
||||
}
|
||||
}
|
||||
|
||||
func (h *SqlBackend) RecordStatesAsync(ctx context.Context, _ *models.AlertRule, _ []state.StateTransition) {
|
||||
}
|
@ -210,7 +210,7 @@ func TestDashboardAnnotations(t *testing.T) {
|
||||
_, dbstore := tests.SetupTestEnv(t, 1)
|
||||
|
||||
fakeAnnoRepo := annotationstest.NewFakeAnnotationsRepo()
|
||||
hist := historian.NewAnnotationHistorian(fakeAnnoRepo, &dashboards.FakeDashboardService{})
|
||||
hist := historian.NewAnnotationBackend(fakeAnnoRepo, &dashboards.FakeDashboardService{})
|
||||
st := state.NewManager(testMetrics.GetStateMetrics(), nil, dbstore, &state.NoopImageService{}, clock.New(), hist)
|
||||
|
||||
const mainOrgID int64 = 1
|
||||
@ -2191,7 +2191,7 @@ func TestProcessEvalResults(t *testing.T) {
|
||||
|
||||
for _, tc := range testCases {
|
||||
fakeAnnoRepo := annotationstest.NewFakeAnnotationsRepo()
|
||||
hist := historian.NewAnnotationHistorian(fakeAnnoRepo, &dashboards.FakeDashboardService{})
|
||||
hist := historian.NewAnnotationBackend(fakeAnnoRepo, &dashboards.FakeDashboardService{})
|
||||
st := state.NewManager(testMetrics.GetStateMetrics(), nil, &state.FakeInstanceStore{}, &state.NotAvailableImageService{}, clock.New(), hist)
|
||||
t.Run(tc.desc, func(t *testing.T) {
|
||||
for _, res := range tc.evalResults {
|
||||
|
@ -102,6 +102,7 @@ type UnifiedAlertingReservedLabelSettings struct {
|
||||
|
||||
type UnifiedAlertingStateHistorySettings struct {
|
||||
Enabled bool
|
||||
Backend string
|
||||
}
|
||||
|
||||
// IsEnabled returns true if UnifiedAlertingSettings.Enabled is either nil or true.
|
||||
@ -313,6 +314,7 @@ func (cfg *Cfg) ReadUnifiedAlertingSettings(iniFile *ini.File) error {
|
||||
stateHistory := iniFile.Section("unified_alerting.state_history")
|
||||
uaCfgStateHistory := UnifiedAlertingStateHistorySettings{
|
||||
Enabled: stateHistory.Key("enabled").MustBool(stateHistoryDefaultEnabled),
|
||||
Backend: stateHistory.Key("backend").MustString("annotations"),
|
||||
}
|
||||
uaCfg.StateHistory = uaCfgStateHistory
|
||||
|
||||
|
Reference in New Issue
Block a user