Alerting: Fix state transition from Recovering back to Alerting (#103286)

What is this feature?

This PR fixes a state transition issue where alerts transitioning from the Recovering state back to the Alerting state incorrectly entered the Pending state first if the rule had a For duration configured.

Why do we need this feature?

When an alert goes from Alerting to Recovering (when using the Keep firing for) and then back to Alerting, the existing logic would incorrectly put the alert into Pending state while it should be alerting and still sending notifications to the Alertmanager.
This commit is contained in:
Alexander Akhmetov
2025-04-03 09:40:45 +01:00
committed by GitHub
parent 5bc053730b
commit b49c532999
3 changed files with 559 additions and 3 deletions

View File

@ -157,6 +157,7 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
t2 := tN(2)
t3 := tN(3)
t4 := tN(4)
t5 := tN(5)
baseRule := &ngmodels.AlertRule{
OrgID: 1,
@ -856,7 +857,7 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
},
},
{
desc: "t1[1alerting] t2[1:normal] t3[1:alerting] and 'keep_firing_for'>0 at t2,t3",
desc: "t1[1:alerting] t2[1:normal] t3[1:alerting] and 'keep_firing_for'>0 at t2,t3",
alertRule: baseRuleWith(ngmodels.RuleMuts.WithKeepFiringForNTimes(1), ngmodels.RuleMuts.WithFor(0)),
results: map[time.Time]eval.Results{
t1: {
@ -914,6 +915,68 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
},
},
},
{
desc: "t1[1:alerting] t2[1:alerting] t3[1:normal] t4[1:alerting] and 'keep_firing_for'>0 and 'for'>0 at t2,t3,t4",
alertRule: baseRuleWith(ngmodels.RuleMuts.WithKeepFiringForNTimes(1), ngmodels.RuleMuts.WithForNTimes(1)),
results: map[time.Time]eval.Results{
t1: {
newResult(eval.WithState(eval.Alerting), eval.WithLabels(labels1)),
},
t2: {
newResult(eval.WithState(eval.Alerting), eval.WithLabels(labels1)),
},
t3: {
newResult(eval.WithState(eval.Normal), eval.WithLabels(labels1)),
},
t4: {
newResult(eval.WithState(eval.Alerting), eval.WithLabels(labels1)),
},
},
expectedTransitions: map[time.Time][]StateTransition{
t2: {
{
PreviousState: eval.Pending,
State: &State{
Labels: labels["system + rule + labels1"],
State: eval.Alerting,
LatestResult: newEvaluation(t2, eval.Alerting),
StartsAt: t2,
EndsAt: t2.Add(ResendDelay * 4),
LastEvaluationTime: t2,
LastSentAt: &t2,
},
},
},
t3: {
{
PreviousState: eval.Alerting,
State: &State{
Labels: labels["system + rule + labels1"],
State: eval.Recovering,
LatestResult: newEvaluation(t3, eval.Normal),
StartsAt: t3,
EndsAt: t3.Add(ResendDelay * 4),
LastEvaluationTime: t3,
LastSentAt: &t2,
},
},
},
t4: {
{
PreviousState: eval.Recovering,
State: &State{
Labels: labels["system + rule + labels1"],
State: eval.Alerting,
LatestResult: newEvaluation(t4, eval.Alerting),
StartsAt: t4,
EndsAt: t4.Add(ResendDelay * 4),
LastEvaluationTime: t4,
LastSentAt: &t2,
},
},
},
},
},
{
desc: "t1[1:alerting] t2[NoData] t3[NoData] at t2,t3",
alertRule: baseRule,
@ -2290,6 +2353,244 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
},
},
},
{
desc: "t1[{}:normal] t2[NoData] t3[NoData] t4[{}:normal] t5[NoData] and 'keep_firing_for'=2 and 'for'=1 at t3,t4,t5",
ruleMutators: []ngmodels.AlertRuleMutator{ngmodels.RuleMuts.WithKeepFiringForNTimes(2), ngmodels.RuleMuts.WithForNTimes(1)},
results: map[time.Time]eval.Results{
t1: {
newResult(eval.WithState(eval.Normal)),
},
t2: {
newResult(eval.WithState(eval.NoData), eval.WithLabels(noDataLabels)),
},
t3: {
newResult(eval.WithState(eval.NoData), eval.WithLabels(noDataLabels)),
},
t4: {
newResult(eval.WithState(eval.Normal)),
},
t5: {
newResult(eval.WithState(eval.NoData), eval.WithLabels(noDataLabels)),
},
},
expectedTransitions: map[ngmodels.NoDataState]map[time.Time][]StateTransition{
ngmodels.NoData: {
t3: {
{
PreviousState: eval.NoData,
State: &State{
Labels: labels["system + rule + no-data"],
Annotations: baseRule.Annotations,
State: eval.NoData,
LatestResult: newEvaluation(t3, eval.NoData),
StartsAt: t2,
EndsAt: t3.Add(ResendDelay * 4),
LastEvaluationTime: t3,
LastSentAt: &t2,
},
},
{
PreviousState: eval.Normal,
State: &State{
Labels: labels["system + rule"],
Annotations: baseRule.Annotations,
State: eval.Normal,
StateReason: ngmodels.StateReasonMissingSeries,
LatestResult: newEvaluation(t1, eval.Normal),
StartsAt: t1,
EndsAt: t3,
LastEvaluationTime: t3,
},
},
},
t4: {
{
PreviousState: eval.Normal,
State: &State{
Labels: labels["system + rule"],
Annotations: baseRule.Annotations,
State: eval.Normal,
LatestResult: newEvaluation(t4, eval.Normal),
StartsAt: t4,
EndsAt: t4,
LastEvaluationTime: t4,
},
},
{
PreviousState: eval.Normal,
State: &State{
Labels: labels["system + rule"],
Annotations: baseRule.Annotations,
State: eval.Normal,
LatestResult: newEvaluation(t4, eval.Normal),
StartsAt: t4,
EndsAt: t4,
LastEvaluationTime: t4,
},
},
},
t5: {
{
PreviousState: eval.NoData,
State: &State{
Labels: labels["system + rule + no-data"],
Annotations: baseRule.Annotations,
State: eval.NoData,
LatestResult: newEvaluation(t5, eval.NoData),
StartsAt: t2,
EndsAt: t5.Add(ResendDelay * 4),
LastEvaluationTime: t5,
LastSentAt: &t5,
},
},
},
},
ngmodels.Alerting: {
t3: {
{
PreviousState: eval.Pending,
PreviousStateReason: eval.NoData.String(),
State: &State{
Labels: labels["system + rule"],
Annotations: mergeLabels(baseRule.Annotations, noDataAnnotations),
State: eval.Alerting,
StateReason: eval.NoData.String(),
LatestResult: newEvaluation(t3, eval.NoData),
StartsAt: t3,
EndsAt: t3.Add(ResendDelay * 4),
LastEvaluationTime: t3,
LastSentAt: &t3,
},
},
},
t4: {
{
PreviousState: eval.Alerting,
PreviousStateReason: eval.NoData.String(),
State: &State{
Labels: labels["system + rule"],
State: eval.Recovering,
LatestResult: newEvaluation(t4, eval.Normal),
StartsAt: t4,
EndsAt: t4.Add(ResendDelay * 4),
LastEvaluationTime: t4,
LastSentAt: &t3,
},
},
},
t5: {
{
PreviousState: eval.Recovering,
State: &State{
Labels: labels["system + rule"],
State: eval.Alerting,
StateReason: eval.NoData.String(),
LatestResult: newEvaluation(t5, eval.NoData),
StartsAt: t5,
EndsAt: t5.Add(ResendDelay * 4),
LastEvaluationTime: t5,
LastSentAt: &t3,
Annotations: mergeLabels(baseRule.Annotations, noDataAnnotations),
},
},
},
},
ngmodels.OK: {
t3: {
{
PreviousState: eval.Normal,
PreviousStateReason: eval.NoData.String(),
State: &State{
Labels: labels["system + rule"],
State: eval.Normal,
StateReason: eval.NoData.String(),
LatestResult: newEvaluation(t3, eval.NoData),
StartsAt: t1,
EndsAt: t1,
LastEvaluationTime: t3,
Annotations: mergeLabels(baseRule.Annotations, noDataAnnotations),
},
},
},
t4: {
{
PreviousState: eval.Normal,
PreviousStateReason: eval.NoData.String(),
State: &State{
Labels: labels["system + rule"],
State: eval.Normal,
LatestResult: newEvaluation(t4, eval.Normal),
StartsAt: t1,
EndsAt: t1,
LastEvaluationTime: t4,
},
},
},
t5: {
{
PreviousState: eval.Normal,
State: &State{
Labels: labels["system + rule"],
State: eval.Normal,
StateReason: eval.NoData.String(),
LatestResult: newEvaluation(t5, eval.NoData),
StartsAt: t1,
EndsAt: t1,
LastEvaluationTime: t5,
Annotations: mergeLabels(baseRule.Annotations, noDataAnnotations),
},
},
},
},
ngmodels.KeepLast: {
t3: {
{
PreviousState: eval.Normal,
PreviousStateReason: ngmodels.ConcatReasons(eval.NoData.String(), ngmodels.StateReasonKeepLast),
State: &State{
Labels: labels["system + rule"],
Annotations: mergeLabels(baseRule.Annotations, noDataAnnotations),
State: eval.Normal,
StateReason: ngmodels.ConcatReasons(eval.NoData.String(), ngmodels.StateReasonKeepLast),
LatestResult: newEvaluation(t3, eval.NoData),
StartsAt: t1,
EndsAt: t1,
LastEvaluationTime: t3,
},
},
},
t4: {
{
PreviousState: eval.Normal,
PreviousStateReason: ngmodels.ConcatReasons(eval.NoData.String(), ngmodels.StateReasonKeepLast),
State: &State{
Labels: labels["system + rule"],
State: eval.Normal,
LatestResult: newEvaluation(t4, eval.Normal),
StartsAt: t1,
EndsAt: t1,
LastEvaluationTime: t4,
},
},
},
t5: {
{
PreviousState: eval.Normal,
State: &State{
Labels: labels["system + rule"],
State: eval.Normal,
StateReason: ngmodels.ConcatReasons(eval.NoData.String(), ngmodels.StateReasonKeepLast),
LatestResult: newEvaluation(t5, eval.NoData),
StartsAt: t1,
EndsAt: t1,
LastEvaluationTime: t5,
Annotations: mergeLabels(baseRule.Annotations, noDataAnnotations),
},
},
},
},
},
},
{
desc: "t1[NoData] t2[1:normal] t3[1:normal] at t3",
results: map[time.Time]eval.Results{
@ -3532,6 +3833,226 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
},
},
},
{
desc: "t1[{}:QueryError] t2[{}:QueryError] t3[{}:normal] t4[{}:QueryError] and 'keep_firing_for'=2 and 'for'=1 at t2,t3,t4",
ruleMutators: []ngmodels.AlertRuleMutator{ngmodels.RuleMuts.WithKeepFiringForNTimes(2), ngmodels.RuleMuts.WithForNTimes(1)},
results: map[time.Time]eval.Results{
t1: {
newResult(eval.WithError(datasourceError)),
},
t2: {
newResult(eval.WithError(datasourceError)),
},
t3: {
newResult(eval.WithState(eval.Normal)),
},
t4: {
newResult(eval.WithError(datasourceError)),
},
},
expectedTransitions: map[ngmodels.ExecutionErrorState]map[time.Time][]StateTransition{
ngmodels.ErrorErrState: {
t2: {
{
PreviousState: eval.Error,
State: &State{
CacheID: labels["system + rule"].Fingerprint(),
Labels: labels["system + rule + datasource-error"],
Error: datasourceError,
State: eval.Error,
LatestResult: newEvaluation(t2, eval.Error),
StartsAt: t1,
EndsAt: t2.Add(ResendDelay * 4),
LastEvaluationTime: t2,
LastSentAt: &t1,
Annotations: mergeLabels(baseRule.Annotations, data.Labels{
"Error": datasourceError.Error(),
}),
},
},
},
t3: {
{
PreviousState: eval.Error,
State: &State{
Labels: labels["system + rule"],
State: eval.Normal,
LatestResult: newEvaluation(t3, eval.Normal),
StartsAt: t3,
EndsAt: t3,
LastEvaluationTime: t3,
LastSentAt: &t1,
},
},
},
t4: {
{
PreviousState: eval.Normal,
State: &State{
CacheID: labels["system + rule"].Fingerprint(),
Labels: labels["system + rule + datasource-error"],
State: eval.Error,
LatestResult: newEvaluation(t4, eval.Error),
Error: datasourceError,
StartsAt: t4,
EndsAt: t4.Add(ResendDelay * 4),
LastEvaluationTime: t4,
LastSentAt: &t4,
Annotations: mergeLabels(baseRule.Annotations, data.Labels{
"Error": datasourceError.Error(),
}),
},
},
},
},
ngmodels.AlertingErrState: {
t2: {
{
PreviousState: eval.Pending,
PreviousStateReason: eval.Error.String(),
State: &State{
Labels: labels["system + rule"],
State: eval.Alerting,
StateReason: eval.Error.String(),
Error: datasourceError,
LatestResult: newEvaluation(t2, eval.Error),
StartsAt: t2,
EndsAt: t2.Add(ResendDelay * 4),
LastEvaluationTime: t2,
LastSentAt: &t2,
Annotations: datasourceErrorAnnotations,
},
},
},
t3: {
{
PreviousState: eval.Alerting,
PreviousStateReason: eval.Error.String(),
State: &State{
Labels: labels["system + rule"],
State: eval.Recovering,
LatestResult: newEvaluation(t3, eval.Normal),
StartsAt: t3,
EndsAt: t3.Add(ResendDelay * 4),
LastEvaluationTime: t3,
LastSentAt: &t2,
},
},
},
t4: {
{
PreviousState: eval.Recovering,
State: &State{
Labels: labels["system + rule"],
State: eval.Alerting,
StateReason: eval.Error.String(),
Error: datasourceError,
LatestResult: newEvaluation(t4, eval.Error),
StartsAt: t4,
EndsAt: t4.Add(ResendDelay * 4),
LastEvaluationTime: t4,
LastSentAt: &t2,
Annotations: datasourceErrorAnnotations,
},
},
},
},
ngmodels.OkErrState: {
t2: {
{
PreviousState: eval.Normal,
PreviousStateReason: eval.Error.String(),
State: &State{
Labels: labels["system + rule"],
State: eval.Normal,
StateReason: eval.Error.String(),
LatestResult: newEvaluation(t2, eval.Error),
StartsAt: t1,
EndsAt: t1,
LastEvaluationTime: t2,
Annotations: datasourceErrorAnnotations,
},
},
},
t3: {
{
PreviousState: eval.Normal,
PreviousStateReason: eval.Error.String(),
State: &State{
Labels: labels["system + rule"],
State: eval.Normal,
LatestResult: newEvaluation(t3, eval.Normal),
StartsAt: t1,
EndsAt: t1,
LastEvaluationTime: t3,
},
},
},
t4: {
{
PreviousState: eval.Normal,
State: &State{
Labels: labels["system + rule"],
State: eval.Normal,
StateReason: eval.Error.String(),
LatestResult: newEvaluation(t4, eval.Error),
StartsAt: t1,
EndsAt: t1,
LastEvaluationTime: t4,
Annotations: datasourceErrorAnnotations,
},
},
},
},
ngmodels.KeepLastErrState: {
t2: {
{
PreviousState: eval.Normal,
PreviousStateReason: ngmodels.ConcatReasons(eval.Error.String(), ngmodels.StateReasonKeepLast),
State: &State{
Labels: labels["system + rule"],
State: eval.Normal,
StateReason: ngmodels.ConcatReasons(eval.Error.String(), ngmodels.StateReasonKeepLast),
LatestResult: newEvaluation(t2, eval.Error),
StartsAt: t1,
EndsAt: t1,
LastEvaluationTime: t2,
Annotations: datasourceErrorAnnotations,
},
},
},
t3: {
{
PreviousState: eval.Normal,
PreviousStateReason: ngmodels.ConcatReasons(eval.Error.String(), ngmodels.StateReasonKeepLast),
State: &State{
Labels: labels["system + rule"],
State: eval.Normal,
LatestResult: newEvaluation(t3, eval.Normal),
StartsAt: t1,
EndsAt: t1,
LastEvaluationTime: t3,
},
},
},
t4: {
{
PreviousState: eval.Normal,
State: &State{
Labels: labels["system + rule"],
State: eval.Normal,
StateReason: ngmodels.ConcatReasons(eval.Error.String(), ngmodels.StateReasonKeepLast),
LatestResult: newEvaluation(t4, eval.Error),
StartsAt: t1,
EndsAt: t1,
LastEvaluationTime: t4,
Annotations: datasourceErrorAnnotations,
},
},
},
},
},
},
{
desc: "t1[{}:normal] t2[QueryError] at t2",
results: map[time.Time]eval.Results{

View File

@ -1201,6 +1201,40 @@ func TestProcessEvalResults(t *testing.T) {
},
},
},
{
desc: "normal -> alerting (pending) -> alerting -> normal (recovering) -> alerting when both KeepFiringFor and For are set",
alertRule: baseRuleWith(m.WithKeepFiringForNTimes(2), m.WithForNTimes(1)),
evalResults: map[time.Time]eval.Results{
t1: {
newResult(eval.WithState(eval.Normal), eval.WithLabels(labels1)),
},
t2: {
newResult(eval.WithState(eval.Alerting), eval.WithLabels(labels1)),
},
t3: {
newResult(eval.WithState(eval.Alerting), eval.WithLabels(labels1)),
},
tn(4): {
newResult(eval.WithState(eval.Normal), eval.WithLabels(labels1)),
},
tn(5): {
newResult(eval.WithState(eval.Alerting), eval.WithLabels(labels1)),
},
},
expectedAnnotations: 4,
expectedStates: []*state.State{
{
Labels: labels["system + rule + labels1"],
ResultFingerprint: labels1.Fingerprint(),
State: eval.Alerting,
LatestResult: newEvaluation(tn(5), eval.Alerting),
StartsAt: tn(5),
EndsAt: tn(5).Add(state.ResendDelay * 4),
LastEvaluationTime: tn(5),
LastSentAt: util.Pointer(t3),
},
},
},
}
for _, tc := range testCases {

View File

@ -417,8 +417,9 @@ func resultAlerting(state *State, rule *models.AlertRule, result eval.Result, lo
}
default:
nextEndsAt := nextEndsTime(rule.IntervalSeconds, result.EvaluatedAt)
if rule.For > 0 {
// If the alert rule has a For duration that should be observed then the state should be set to Pending
if state.State != eval.Recovering && rule.For > 0 {
// If the alert rule has a For duration that should be observed then the state should be set to Pending.
// If the alert is currently in the Recovering state then we skip Pending and set it directly to Alerting.
logger.Debug("Changing state",
"previous_state",
state.State,