mirror of
https://github.com/grafana/grafana.git
synced 2025-08-03 01:32:13 +08:00
Alerting: Metric to count imported from Prometheus rules (#100847)
This commit is contained in:

committed by
GitHub

parent
2638de6aeb
commit
d44728f4e5
@ -457,9 +457,9 @@ func grafanaRuleGroupToPrometheus(group string, rules []models.AlertRule) (apimo
|
||||
}
|
||||
|
||||
for i, rule := range rules {
|
||||
promDefinition := rule.PrometheusRuleDefinition()
|
||||
if promDefinition == "" {
|
||||
return apimodels.PrometheusRuleGroup{}, fmt.Errorf("failed to get the Prometheus definition of the rule with UID %s", rule.UID)
|
||||
promDefinition, err := rule.PrometheusRuleDefinition()
|
||||
if err != nil {
|
||||
return apimodels.PrometheusRuleGroup{}, fmt.Errorf("failed to get the Prometheus definition of the rule with UID %s: %w", rule.UID, err)
|
||||
}
|
||||
var r apimodels.PrometheusRule
|
||||
if err := yaml.Unmarshal([]byte(promDefinition), &r); err != nil {
|
||||
|
@ -115,7 +115,10 @@ func TestRouteConvertPrometheusPostRuleGroup(t *testing.T) {
|
||||
require.Equal(t, fmt.Sprintf("[%s] %s", simpleGroup.Name, simpleGroup.Rules[0].Alert), remaining[0].Title)
|
||||
promRuleYAML, err := yaml.Marshal(simpleGroup.Rules[0])
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, string(promRuleYAML), remaining[0].PrometheusRuleDefinition())
|
||||
|
||||
promDefinition, err := remaining[0].PrometheusRuleDefinition()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, string(promRuleYAML), promDefinition)
|
||||
})
|
||||
|
||||
t.Run("should fail to replace a provisioned rule group", func(t *testing.T) {
|
||||
|
@ -32,6 +32,7 @@ type Scheduler struct {
|
||||
Ticker *ticker.Metrics
|
||||
EvaluationMissed *prometheus.CounterVec
|
||||
SimplifiedEditorRules *prometheus.GaugeVec
|
||||
PrometheusImportedRules *prometheus.GaugeVec
|
||||
}
|
||||
|
||||
func NewSchedulerMetrics(r prometheus.Registerer) *Scheduler {
|
||||
@ -192,5 +193,14 @@ func NewSchedulerMetrics(r prometheus.Registerer) *Scheduler {
|
||||
},
|
||||
[]string{"org", "setting"},
|
||||
),
|
||||
PrometheusImportedRules: promauto.With(r).NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: Namespace,
|
||||
Subsystem: Subsystem,
|
||||
Name: "prometheus_imported_rules",
|
||||
Help: "The number of rules imported from a Prometheus-compatible source.",
|
||||
},
|
||||
[]string{"org"},
|
||||
),
|
||||
}
|
||||
}
|
||||
|
@ -397,19 +397,18 @@ func WithoutInternalLabels() LabelOption {
|
||||
}
|
||||
|
||||
func (alertRule *AlertRule) ImportedFromPrometheus() bool {
|
||||
if alertRule.Metadata.PrometheusStyleRule == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return alertRule.Metadata.PrometheusStyleRule.OriginalRuleDefinition != ""
|
||||
_, err := alertRule.PrometheusRuleDefinition()
|
||||
return err == nil
|
||||
}
|
||||
|
||||
func (alertRule *AlertRule) PrometheusRuleDefinition() string {
|
||||
if !alertRule.ImportedFromPrometheus() {
|
||||
return ""
|
||||
func (alertRule *AlertRule) PrometheusRuleDefinition() (string, error) {
|
||||
if alertRule.Metadata.PrometheusStyleRule != nil {
|
||||
if alertRule.Metadata.PrometheusStyleRule.OriginalRuleDefinition != "" {
|
||||
return alertRule.Metadata.PrometheusStyleRule.OriginalRuleDefinition, nil
|
||||
}
|
||||
}
|
||||
|
||||
return alertRule.Metadata.PrometheusStyleRule.OriginalRuleDefinition
|
||||
return "", fmt.Errorf("prometheus rule definition is missing")
|
||||
}
|
||||
|
||||
// GetLabels returns the labels specified as part of the alert rule.
|
||||
|
@ -1017,3 +1017,70 @@ func TestGeneratorFillsAllFields(t *testing.T) {
|
||||
|
||||
require.FailNow(t, "AlertRule generator does not populate fields", "skipped fields: %v", maps.Keys(fields))
|
||||
}
|
||||
|
||||
func TestAlertRule_PrometheusRuleDefinition(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
rule AlertRule
|
||||
expectedResult string
|
||||
expectedErrorMsg string
|
||||
}{
|
||||
{
|
||||
name: "rule with prometheus definition",
|
||||
rule: AlertRule{
|
||||
Metadata: AlertRuleMetadata{
|
||||
PrometheusStyleRule: &PrometheusStyleRule{
|
||||
OriginalRuleDefinition: "groups:\n- name: example\n rules:\n - alert: HighRequestLatency\n expr: request_latency_seconds{job=\"myjob\"} > 0.5\n for: 10m\n labels:\n severity: page\n annotations:\n summary: High request latency",
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedResult: "groups:\n- name: example\n rules:\n - alert: HighRequestLatency\n expr: request_latency_seconds{job=\"myjob\"} > 0.5\n for: 10m\n labels:\n severity: page\n annotations:\n summary: High request latency",
|
||||
expectedErrorMsg: "",
|
||||
},
|
||||
{
|
||||
name: "rule with empty prometheus definition",
|
||||
rule: AlertRule{
|
||||
Metadata: AlertRuleMetadata{
|
||||
PrometheusStyleRule: &PrometheusStyleRule{
|
||||
OriginalRuleDefinition: "",
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedResult: "",
|
||||
expectedErrorMsg: "prometheus rule definition is missing",
|
||||
},
|
||||
{
|
||||
name: "rule with nil prometheus style rule",
|
||||
rule: AlertRule{
|
||||
Metadata: AlertRuleMetadata{
|
||||
PrometheusStyleRule: nil,
|
||||
},
|
||||
},
|
||||
expectedResult: "",
|
||||
expectedErrorMsg: "prometheus rule definition is missing",
|
||||
},
|
||||
{
|
||||
name: "rule with empty metadata",
|
||||
rule: AlertRule{},
|
||||
expectedResult: "",
|
||||
expectedErrorMsg: "prometheus rule definition is missing",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result, err := tt.rule.PrometheusRuleDefinition()
|
||||
isPrometheusRule := tt.rule.ImportedFromPrometheus()
|
||||
|
||||
if tt.expectedErrorMsg != "" {
|
||||
require.Error(t, err)
|
||||
require.Equal(t, tt.expectedErrorMsg, err.Error())
|
||||
require.False(t, isPrometheusRule)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, tt.expectedResult, result)
|
||||
require.True(t, isPrometheusRule)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -46,6 +46,8 @@ func (sch *schedule) updateRulesMetrics(alertRules []*models.AlertRule) {
|
||||
orgsNfSettings := make(map[int64]int64)
|
||||
// gauge for groups per org
|
||||
groupsPerOrg := make(map[int64]map[string]struct{})
|
||||
// gauge for rules imported from Prometheus per org
|
||||
orgsRulesPrometheusImported := make(map[int64]int64)
|
||||
|
||||
simplifiedEditorSettingsPerOrg := make(map[int64]map[string]int64) // orgID -> setting -> count
|
||||
|
||||
@ -86,6 +88,10 @@ func (sch *schedule) updateRulesMetrics(alertRules []*models.AlertRule) {
|
||||
}
|
||||
}
|
||||
|
||||
if rule.ImportedFromPrometheus() {
|
||||
orgsRulesPrometheusImported[rule.OrgID]++
|
||||
}
|
||||
|
||||
// Count groups per org
|
||||
orgGroups, ok := groupsPerOrg[rule.OrgID]
|
||||
if !ok {
|
||||
@ -100,6 +106,7 @@ func (sch *schedule) updateRulesMetrics(alertRules []*models.AlertRule) {
|
||||
sch.metrics.SimpleNotificationRules.Reset()
|
||||
sch.metrics.Groups.Reset()
|
||||
sch.metrics.SimplifiedEditorRules.Reset()
|
||||
sch.metrics.PrometheusImportedRules.Reset()
|
||||
|
||||
// Set metrics
|
||||
for key, count := range buckets {
|
||||
@ -111,6 +118,9 @@ func (sch *schedule) updateRulesMetrics(alertRules []*models.AlertRule) {
|
||||
for orgID, groups := range groupsPerOrg {
|
||||
sch.metrics.Groups.WithLabelValues(fmt.Sprint(orgID)).Set(float64(len(groups)))
|
||||
}
|
||||
for orgID, count := range orgsRulesPrometheusImported {
|
||||
sch.metrics.PrometheusImportedRules.WithLabelValues(fmt.Sprint(orgID)).Set(float64(count))
|
||||
}
|
||||
for orgID, settings := range simplifiedEditorSettingsPerOrg {
|
||||
for setting, count := range settings {
|
||||
sch.metrics.SimplifiedEditorRules.WithLabelValues(fmt.Sprint(orgID), setting).Set(float64(count))
|
||||
|
@ -562,6 +562,7 @@ func TestSchedule_updateRulesMetrics(t *testing.T) {
|
||||
sch := setupScheduler(t, ruleStore, nil, reg, nil, nil, nil)
|
||||
ctx := context.Background()
|
||||
const firstOrgID int64 = 1
|
||||
const secondOrgID int64 = 2
|
||||
|
||||
t.Run("grafana_alerting_rule_group_rules metric should reflect the current state", func(t *testing.T) {
|
||||
// Without any rules there are no metrics
|
||||
@ -640,6 +641,62 @@ func TestSchedule_updateRulesMetrics(t *testing.T) {
|
||||
})
|
||||
})
|
||||
|
||||
t.Run("prometheus_imported_rules metric should reflect the current state", func(t *testing.T) {
|
||||
// Without any imported rules there are no metrics
|
||||
t.Run("it should not show metrics", func(t *testing.T) {
|
||||
sch.updateRulesMetrics([]*models.AlertRule{})
|
||||
|
||||
expectedMetric := ""
|
||||
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_prometheus_imported_rules")
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
alertRule1 := models.RuleGen.With(
|
||||
models.RuleGen.WithOrgID(firstOrgID),
|
||||
models.RuleGen.WithPrometheusOriginalRuleDefinition("1"),
|
||||
).GenerateRef()
|
||||
|
||||
t.Run("it should show one imported rule in a single org", func(t *testing.T) {
|
||||
sch.updateRulesMetrics([]*models.AlertRule{alertRule1})
|
||||
|
||||
expectedMetric := fmt.Sprintf(
|
||||
`# HELP grafana_alerting_prometheus_imported_rules The number of rules imported from a Prometheus-compatible source.
|
||||
# TYPE grafana_alerting_prometheus_imported_rules gauge
|
||||
grafana_alerting_prometheus_imported_rules{org="%[1]d"} 1
|
||||
`, alertRule1.OrgID)
|
||||
|
||||
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_prometheus_imported_rules")
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
alertRule2 := models.RuleGen.With(
|
||||
models.RuleGen.WithOrgID(secondOrgID),
|
||||
models.RuleGen.WithPrometheusOriginalRuleDefinition("1"),
|
||||
).GenerateRef()
|
||||
|
||||
t.Run("it should show two imported rules in two orgs", func(t *testing.T) {
|
||||
sch.updateRulesMetrics([]*models.AlertRule{alertRule1, alertRule2})
|
||||
|
||||
expectedMetric := fmt.Sprintf(
|
||||
`# HELP grafana_alerting_prometheus_imported_rules The number of rules imported from a Prometheus-compatible source.
|
||||
# TYPE grafana_alerting_prometheus_imported_rules gauge
|
||||
grafana_alerting_prometheus_imported_rules{org="%[1]d"} 1
|
||||
grafana_alerting_prometheus_imported_rules{org="%[2]d"} 1
|
||||
`, alertRule1.OrgID, alertRule2.OrgID)
|
||||
|
||||
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_prometheus_imported_rules")
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("after removing all rules it should not show any metrics", func(t *testing.T) {
|
||||
sch.updateRulesMetrics([]*models.AlertRule{})
|
||||
|
||||
expectedMetric := ""
|
||||
err := testutil.GatherAndCompare(reg, bytes.NewBufferString(expectedMetric), "grafana_alerting_prometheus_imported_rules")
|
||||
require.NoError(t, err)
|
||||
})
|
||||
})
|
||||
|
||||
t.Run("rule_groups metric should reflect the current state", func(t *testing.T) {
|
||||
const firstOrgID int64 = 1
|
||||
const secondOrgID int64 = 2
|
||||
|
@ -607,10 +607,7 @@ func (st DBstore) ListAlertRules(ctx context.Context, query *ngmodels.ListAlertR
|
||||
}
|
||||
}
|
||||
if query.ImportedPrometheusRule != nil { // remove false-positive hits from the result
|
||||
hasOriginalRuleDefinition := converted.Metadata.PrometheusStyleRule != nil && len(converted.Metadata.PrometheusStyleRule.OriginalRuleDefinition) > 0
|
||||
if *query.ImportedPrometheusRule && !hasOriginalRuleDefinition {
|
||||
continue
|
||||
} else if !*query.ImportedPrometheusRule && hasOriginalRuleDefinition {
|
||||
if *query.ImportedPrometheusRule != converted.ImportedFromPrometheus() {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
@ -215,11 +215,7 @@ func (f *RuleStore) ListAlertRules(_ context.Context, q *models.ListAlertRulesQu
|
||||
continue
|
||||
}
|
||||
if q.ImportedPrometheusRule != nil {
|
||||
hasOriginalRuleDefinition := r.PrometheusRuleDefinition() != ""
|
||||
if *q.ImportedPrometheusRule && !hasOriginalRuleDefinition {
|
||||
continue
|
||||
}
|
||||
if !*q.ImportedPrometheusRule && hasOriginalRuleDefinition {
|
||||
if *q.ImportedPrometheusRule != r.ImportedFromPrometheus() {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user