From f13b869aa410a1954e0642d69efff0b98fbdf58f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torkel=20=C3=96degaard?= Date: Thu, 14 Jul 2016 13:32:16 +0200 Subject: [PATCH] feat(alerting): work on alerting --- pkg/api/api.go | 32 +++++++++---------- pkg/models/alert.go | 3 ++ pkg/services/alerting/alertstates/states.go | 14 ++++---- pkg/services/alerting/engine.go | 23 +++++++------ pkg/services/alerting/handler.go | 28 +++++++++------- pkg/services/alerting/models.go | 17 +++++----- pkg/services/alerting/result_handler.go | 2 +- pkg/services/sqlstore/alert.go | 21 ++++++++---- .../app/features/alerting/alert_log_ctrl.ts | 4 +-- public/app/features/alerting/alerts_ctrl.ts | 2 +- .../alerting/notification_edit_ctrl.ts | 6 ++-- .../alerting/notifications_list_ctrl.ts | 4 +-- .../datasource/grafana-live/plugin.json | 7 ++++ .../panel/graph/partials/tab_alerting.html | 4 +-- 14 files changed, 95 insertions(+), 72 deletions(-) create mode 100644 public/app/plugins/datasource/grafana-live/plugin.json diff --git a/pkg/api/api.go b/pkg/api/api.go index 01ea3e09d5a..4d2430a7c35 100644 --- a/pkg/api/api.go +++ b/pkg/api/api.go @@ -245,25 +245,23 @@ func Register(r *macaron.Macaron) { // metrics r.Get("/metrics", wrap(GetInternalMetrics)) - r.Group("/alerting", func() { - r.Group("/rules", func() { - r.Get("/:alertId/states", wrap(GetAlertStates)) - //r.Put("/:alertId/state", bind(m.UpdateAlertStateCommand{}), wrap(PutAlertState)) - r.Get("/:alertId", ValidateOrgAlert, wrap(GetAlert)) - //r.Delete("/:alertId", ValidateOrgAlert, wrap(DelAlert)) disabled until we know how to handle it dashboard updates - r.Get("/", wrap(GetAlerts)) - }) - - r.Get("/notifications", wrap(GetAlertNotifications)) - - r.Group("/notification", func() { - r.Post("/", bind(m.CreateAlertNotificationCommand{}), wrap(CreateAlertNotification)) - r.Put("/:notificationId", bind(m.UpdateAlertNotificationCommand{}), wrap(UpdateAlertNotification)) - r.Get("/:notificationId", wrap(GetAlertNotificationById)) - r.Delete("/:notificationId", wrap(DeleteAlertNotification)) - }, reqOrgAdmin) + r.Group("/alerts", func() { + r.Get("/:alertId/states", wrap(GetAlertStates)) + //r.Put("/:alertId/state", bind(m.UpdateAlertStateCommand{}), wrap(PutAlertState)) + r.Get("/:alertId", ValidateOrgAlert, wrap(GetAlert)) + //r.Delete("/:alertId", ValidateOrgAlert, wrap(DelAlert)) disabled until we know how to handle it dashboard updates + r.Get("/", wrap(GetAlerts)) }) + r.Get("/alert-notifications", wrap(GetAlertNotifications)) + + r.Group("/alert-notifications", func() { + r.Post("/", bind(m.CreateAlertNotificationCommand{}), wrap(CreateAlertNotification)) + r.Put("/:notificationId", bind(m.UpdateAlertNotificationCommand{}), wrap(UpdateAlertNotification)) + r.Get("/:notificationId", wrap(GetAlertNotificationById)) + r.Delete("/:notificationId", wrap(DeleteAlertNotification)) + }, reqOrgAdmin) + // error test r.Get("/metrics/error", wrap(GenerateError)) diff --git a/pkg/models/alert.go b/pkg/models/alert.go index fa3f40f6069..a31d27096f6 100644 --- a/pkg/models/alert.go +++ b/pkg/models/alert.go @@ -18,6 +18,9 @@ type Alert struct { Enabled bool Frequency int64 + CreatedBy int64 + UpdatedBy int64 + Created time.Time Updated time.Time diff --git a/pkg/services/alerting/alertstates/states.go b/pkg/services/alerting/alertstates/states.go index 9989c223e16..cf2af121062 100644 --- a/pkg/services/alerting/alertstates/states.go +++ b/pkg/services/alerting/alertstates/states.go @@ -5,14 +5,12 @@ var ( Ok, Warn, Critical, - Acknowledged, - Maintenance, + Unknown, } - Ok = "OK" - Warn = "WARN" - Critical = "CRITICAL" - Acknowledged = "ACKNOWLEDGED" - Maintenance = "MAINTENANCE" - Pending = "PENDING" + Ok = "OK" + Warn = "WARN" + Critical = "CRITICAL" + Pending = "PENDING" + Unknown = "UNKNOWN" ) diff --git a/pkg/services/alerting/engine.go b/pkg/services/alerting/engine.go index 4e002e9eb0d..c16473f793e 100644 --- a/pkg/services/alerting/engine.go +++ b/pkg/services/alerting/engine.go @@ -19,6 +19,7 @@ type Engine struct { ruleReader RuleReader log log.Logger responseHandler ResultHandler + alertJobTimeout time.Duration } func NewEngine() *Engine { @@ -31,6 +32,7 @@ func NewEngine() *Engine { ruleReader: NewRuleReader(), log: log.New("alerting.engine"), responseHandler: NewResultHandler(), + alertJobTimeout: time.Second * 5, } return e @@ -87,24 +89,25 @@ func (e *Engine) execDispatch() { } func (e *Engine) executeJob(job *AlertJob) { - now := time.Now() + startTime := time.Now() resultChan := make(chan *AlertResult, 1) go e.handler.Execute(job, resultChan) select { - case <-time.After(time.Second * 5): + case <-time.After(e.alertJobTimeout): e.resultQueue <- &AlertResult{ - State: alertstates.Pending, - Duration: float64(time.Since(now).Nanoseconds()) / float64(1000000), - Error: fmt.Errorf("Timeout"), - AlertJob: job, - ExeuctionTime: time.Now(), + State: alertstates.Pending, + Error: fmt.Errorf("Timeout"), + AlertJob: job, + StartTime: startTime, + EndTime: time.Now(), } + close(resultChan) e.log.Debug("Job Execution timeout", "alertRuleId", job.Rule.Id) case result := <-resultChan: - result.Duration = float64(time.Since(now).Nanoseconds()) / float64(1000000) - e.log.Debug("Job Execution done", "timeTakenMs", result.Duration, "ruleId", job.Rule.Id) + duration := float64(result.EndTime.Nanosecond()-result.StartTime.Nanosecond()) / float64(1000000) + e.log.Debug("Job Execution done", "timeTakenMs", duration, "ruleId", job.Rule.Id) e.resultQueue <- result } } @@ -117,7 +120,7 @@ func (e *Engine) resultHandler() { }() for result := range e.resultQueue { - e.log.Debug("Alert Rule Result", "ruleId", result.AlertJob.Rule.Id, "state", result.State, "value", result.ActualValue, "retry", result.AlertJob.RetryCount) + e.log.Debug("Alert Rule Result", "ruleId", result.AlertJob.Rule.Id, "state", result.State, "retry", result.AlertJob.RetryCount) result.AlertJob.Running = false diff --git a/pkg/services/alerting/handler.go b/pkg/services/alerting/handler.go index 1dfc6cd2cc4..fc7ff71e8dd 100644 --- a/pkg/services/alerting/handler.go +++ b/pkg/services/alerting/handler.go @@ -26,18 +26,24 @@ func NewHandler() *HandlerImpl { } func (e *HandlerImpl) Execute(job *AlertJob, resultQueue chan *AlertResult) { + startTime := time.Now() + timeSeries, err := e.executeQuery(job) if err != nil { resultQueue <- &AlertResult{ - Error: err, - State: alertstates.Pending, - AlertJob: job, - ExeuctionTime: time.Now(), + Error: err, + State: alertstates.Pending, + AlertJob: job, + StartTime: time.Now(), + EndTime: time.Now(), } } result := e.evaluateRule(job.Rule, timeSeries) result.AlertJob = job + result.StartTime = startTime + result.EndTime = time.Now() + resultQueue <- result } @@ -108,9 +114,9 @@ func (e *HandlerImpl) evaluateRule(rule *AlertRule, series tsdb.TimeSeriesSlice) e.log.Debug("Alert execution Crit", "name", serie.Name, "condition", condition2, "result", critResult) if critResult { triggeredAlert = append(triggeredAlert, &TriggeredAlert{ - State: alertstates.Critical, - ActualValue: transformedValue, - Name: serie.Name, + State: alertstates.Critical, + Value: transformedValue, + Metric: serie.Name, }) continue } @@ -120,9 +126,9 @@ func (e *HandlerImpl) evaluateRule(rule *AlertRule, series tsdb.TimeSeriesSlice) e.log.Debug("Alert execution Warn", "name", serie.Name, "condition", condition, "result", warnResult) if warnResult { triggeredAlert = append(triggeredAlert, &TriggeredAlert{ - State: alertstates.Warn, - ActualValue: transformedValue, - Name: serie.Name, + State: alertstates.Warn, + Value: transformedValue, + Metric: serie.Name, }) } } @@ -138,5 +144,5 @@ func (e *HandlerImpl) evaluateRule(rule *AlertRule, series tsdb.TimeSeriesSlice) } } - return &AlertResult{State: executionState, Description: "Returned " + executionState, TriggeredAlerts: triggeredAlert, ExeuctionTime: time.Now()} + return &AlertResult{State: executionState, TriggeredAlerts: triggeredAlert} } diff --git a/pkg/services/alerting/models.go b/pkg/services/alerting/models.go index e3b1722a689..eb7e60784c2 100644 --- a/pkg/services/alerting/models.go +++ b/pkg/services/alerting/models.go @@ -24,19 +24,20 @@ func (aj *AlertJob) IncRetry() { type AlertResult struct { State string - ActualValue float64 - Duration float64 TriggeredAlerts []*TriggeredAlert - Description string Error error - AlertJob *AlertJob - ExeuctionTime time.Time + Description string + StartTime time.Time + EndTime time.Time + + AlertJob *AlertJob } type TriggeredAlert struct { - ActualValue float64 - Name string - State string + Value float64 + Metric string + State string + Tags map[string]string } type Level struct { diff --git a/pkg/services/alerting/result_handler.go b/pkg/services/alerting/result_handler.go index cb890c7e1b6..d3af23b1416 100644 --- a/pkg/services/alerting/result_handler.go +++ b/pkg/services/alerting/result_handler.go @@ -60,7 +60,7 @@ func (handler *ResultHandlerImpl) shouldUpdateState(result *AlertResult) bool { } lastExecution := query.Result.Created - asdf := result.ExeuctionTime.Add(time.Minute * -15) + asdf := result.StartTime.Add(time.Minute * -15) olderThen15Min := lastExecution.Before(asdf) changedState := query.Result.NewState != result.State diff --git a/pkg/services/sqlstore/alert.go b/pkg/services/sqlstore/alert.go index 20e6a8f2b76..c4f88d7cb4b 100644 --- a/pkg/services/sqlstore/alert.go +++ b/pkg/services/sqlstore/alert.go @@ -158,24 +158,29 @@ func DeleteAlertDefinition(dashboardId int64, sess *xorm.Session) error { func SaveAlerts(cmd *m.SaveAlertsCommand) error { return inTransaction(func(sess *xorm.Session) error { - alerts, err := GetAlertsByDashboardId2(cmd.DashboardId, sess) + existingAlerts, err := GetAlertsByDashboardId2(cmd.DashboardId, sess) if err != nil { return err } - upsertAlerts(alerts, cmd, sess) - deleteMissingAlerts(alerts, cmd, sess) + if err := upsertAlerts(existingAlerts, cmd, sess); err != nil { + return err + } + + if err := deleteMissingAlerts(existingAlerts, cmd, sess); err != nil { + return err + } return nil }) } -func upsertAlerts(alerts []*m.Alert, cmd *m.SaveAlertsCommand, sess *xorm.Session) error { +func upsertAlerts(existingAlerts []*m.Alert, cmd *m.SaveAlertsCommand, sess *xorm.Session) error { for _, alert := range cmd.Alerts { update := false var alertToUpdate *m.Alert - for _, k := range alerts { + for _, k := range existingAlerts { if alert.PanelId == k.PanelId { update = true alert.Id = k.Id @@ -195,11 +200,13 @@ func upsertAlerts(alerts []*m.Alert, cmd *m.SaveAlertsCommand, sess *xorm.Sessio sqlog.Debug("Alert updated", "name", alert.Name, "id", alert.Id) } - } else { alert.Updated = time.Now() alert.Created = time.Now() - alert.State = "OK" + alert.State = "UNKNOWN" + alert.CreatedBy = cmd.UserId + alert.UpdatedBy = cmd.UserId + _, err := sess.Insert(alert) if err != nil { return err diff --git a/public/app/features/alerting/alert_log_ctrl.ts b/public/app/features/alerting/alert_log_ctrl.ts index 8b7a92c2f4e..2727f486604 100644 --- a/public/app/features/alerting/alert_log_ctrl.ts +++ b/public/app/features/alerting/alert_log_ctrl.ts @@ -20,7 +20,7 @@ export class AlertLogCtrl { } loadAlertLogs(alertId: number) { - this.backendSrv.get(`/api/alerts/rules/${alertId}/states`).then(result => { + this.backendSrv.get(`/api/alerts/${alertId}/states`).then(result => { this.alertLogs = _.map(result, log => { log.iconCss = alertDef.getCssForState(log.newState); log.humanTime = moment(log.created).format("YYYY-MM-DD HH:mm:ss"); @@ -28,7 +28,7 @@ export class AlertLogCtrl { }); }); - this.backendSrv.get(`/api/alerts/rules/${alertId}`).then(result => { + this.backendSrv.get(`/api/alerts/${alertId}`).then(result => { this.alert = result; }); } diff --git a/public/app/features/alerting/alerts_ctrl.ts b/public/app/features/alerting/alerts_ctrl.ts index 6cb5d668433..7294ce69166 100644 --- a/public/app/features/alerting/alerts_ctrl.ts +++ b/public/app/features/alerting/alerts_ctrl.ts @@ -49,7 +49,7 @@ export class AlertListCtrl { state: stats }; - this.backendSrv.get('/api/alerts/rules', params).then(result => { + this.backendSrv.get('/api/alerts', params).then(result => { this.alerts = _.map(result, alert => { alert.iconCss = alertDef.getCssForState(alert.state); return alert; diff --git a/public/app/features/alerting/notification_edit_ctrl.ts b/public/app/features/alerting/notification_edit_ctrl.ts index 9bca3cddbec..43dceea8dc2 100644 --- a/public/app/features/alerting/notification_edit_ctrl.ts +++ b/public/app/features/alerting/notification_edit_ctrl.ts @@ -24,7 +24,7 @@ export class AlertNotificationEditCtrl { } loadNotification(notificationId) { - this.backendSrv.get(`/api/alerts/notification/${notificationId}`).then(result => { + this.backendSrv.get(`/api/alert-notifications/${notificationId}`).then(result => { console.log(result); this.notification = result; }); @@ -37,7 +37,7 @@ export class AlertNotificationEditCtrl { save() { if (this.notification.id) { console.log('this.notification: ', this.notification); - this.backendSrv.put(`/api/alerts/notification/${this.notification.id}`, this.notification) + this.backendSrv.put(`/api/alert-notifications/${this.notification.id}`, this.notification) .then(result => { this.notification = result; this.$scope.appEvent('alert-success', ['Notification created!', '']); @@ -45,7 +45,7 @@ export class AlertNotificationEditCtrl { this.$scope.appEvent('alert-error', ['Unable to create notification.', '']); }); } else { - this.backendSrv.post(`/api/alerts/notification`, this.notification) + this.backendSrv.post(`/api/alert-notifications`, this.notification) .then(result => { this.notification = result; this.$scope.appEvent('alert-success', ['Notification updated!', '']); diff --git a/public/app/features/alerting/notifications_list_ctrl.ts b/public/app/features/alerting/notifications_list_ctrl.ts index 54362104b31..d5a05b3edca 100644 --- a/public/app/features/alerting/notifications_list_ctrl.ts +++ b/public/app/features/alerting/notifications_list_ctrl.ts @@ -15,13 +15,13 @@ export class AlertNotificationsListCtrl { } loadNotifications() { - this.backendSrv.get(`/api/alerts/notifications`).then(result => { + this.backendSrv.get(`/api/alert-notifications`).then(result => { this.notifications = result; }); } deleteNotification(notificationId) { - this.backendSrv.delete(`/api/alerts/notification/${notificationId}`) + this.backendSrv.delete(`/api/alerts-notification/${notificationId}`) .then(() => { this.notifications = this.notifications.filter(notification => { return notification.id !== notificationId; diff --git a/public/app/plugins/datasource/grafana-live/plugin.json b/public/app/plugins/datasource/grafana-live/plugin.json new file mode 100644 index 00000000000..1f2ec204949 --- /dev/null +++ b/public/app/plugins/datasource/grafana-live/plugin.json @@ -0,0 +1,7 @@ +{ + "type": "datasource", + "name": "Grafana Live", + "id": "grafana-live", + + "metrics": true +} diff --git a/public/app/plugins/panel/graph/partials/tab_alerting.html b/public/app/plugins/panel/graph/partials/tab_alerting.html index 7efc1d1c6c0..4b76648a845 100644 --- a/public/app/plugins/panel/graph/partials/tab_alerting.html +++ b/public/app/plugins/panel/graph/partials/tab_alerting.html @@ -123,14 +123,14 @@
Information
Alert name - +
Alert description
- +