mirror of
https://github.com/grafana/grafana.git
synced 2025-08-06 20:59:35 +08:00

What is this feature? This PR introduces a new alert rule configuration option, keep_firing_for (Prometheus documentation). keep_firing_for prevents alerts from resolving immediately after the alert condition returns to normal. Instead, they transition into a "Recovering" state and are not considered resolved by the Alertmanager. Once the recovery period ends (or after the next evaluation if it is bigger than keep_firing_for), the alert transitions to "Normal" if it doesn't start alerting again: Before +----------+ +----------+ | Alerting |---->| Normal | +----------+ +----------+ ----- After +----------+ +------------+ +----------+ | Alerting |----->| Recovering |---->| Normal | +----------+ +------------+ +----------+ Why do we need this feature? This feature prevents flapping alerts by adding a recovery period. This helps avoid false resolutions caused by brief alert
54 lines
1.2 KiB
JSON
54 lines
1.2 KiB
JSON
{
|
|
"name": "Group2",
|
|
"interval": "1m",
|
|
"rules": [
|
|
{
|
|
"expr": "",
|
|
"for": "5m",
|
|
"keep_firing_for": "0s",
|
|
"labels": {
|
|
"label1": "test-label"
|
|
},
|
|
"annotations": {
|
|
"annotation": "test-annotation"
|
|
},
|
|
"grafana_alert": {
|
|
"title": "Rule3",
|
|
"condition": "A",
|
|
"data": [
|
|
{
|
|
"refId": "A",
|
|
"queryType": "",
|
|
"relativeTimeRange": {
|
|
"from": 0,
|
|
"to": 0
|
|
},
|
|
"datasourceUid": "__expr__",
|
|
"model": {
|
|
"expression": "0/0",
|
|
"intervalMs": 1000,
|
|
"maxDataPoints": 43200,
|
|
"type": "math"
|
|
}
|
|
}
|
|
],
|
|
"updated": "2023-09-29T17:37:19Z",
|
|
"intervalSeconds": 60,
|
|
"version": 1,
|
|
"uid": "<dynamic>",
|
|
"namespace_uid": "<dynamic>",
|
|
"rule_group": "Group2",
|
|
"no_data_state": "NoData",
|
|
"exec_err_state": "Error",
|
|
"is_paused": false,
|
|
"metadata": {
|
|
"editor_settings": {
|
|
"simplified_query_and_expressions_section": false,
|
|
"simplified_notifications_section": false
|
|
}
|
|
}
|
|
}
|
|
}
|
|
]
|
|
}
|