Backend Plugins: Collect and expose metrics and plugin process health check (#21481)

Adds support for collecting metrics from backend plugins and 
exposing them thru Grafana's Prometheus metrics endpoint. 
Enables to check health of backend plugin by using the route
`/api/plugins/<plugin id>/health`.
Uses sdk v0.6.0.

Closes #20984
This commit is contained in:
Marcus Efraimsson
2020-01-15 13:10:48 +01:00
committed by GitHub
parent f56f54b1a3
commit 5c711bfb79
34 changed files with 2406 additions and 623 deletions

View File

@ -1,14 +1,25 @@
package backendplugin
import (
"bytes"
"context"
"errors"
"fmt"
"github.com/grafana/grafana-plugin-sdk-go/genproto/pluginv2"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/expfmt"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
datasourceV1 "github.com/grafana/grafana-plugin-model/go/datasource"
rendererV1 "github.com/grafana/grafana-plugin-model/go/renderer"
backend "github.com/grafana/grafana-plugin-sdk-go/backend"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/plugins/backendplugin/collector"
"github.com/grafana/grafana/pkg/util/errutil"
plugin "github.com/hashicorp/go-plugin"
dto "github.com/prometheus/client_model/go"
)
// BackendPlugin a registered backend plugin.
@ -20,6 +31,7 @@ type BackendPlugin struct {
client *plugin.Client
logger log.Logger
startFns PluginStartFuncs
diagnostics backend.DiagnosticsPlugin
}
func (p *BackendPlugin) start(ctx context.Context) error {
@ -33,6 +45,11 @@ func (p *BackendPlugin) start(ctx context.Context) error {
var client *Client
if p.client.NegotiatedVersion() > 1 {
rawDiagnostics, err := rpcClient.Dispense("diagnostics")
if err != nil {
return err
}
rawBackend, err := rpcClient.Dispense("backend")
if err != nil {
return err
@ -43,6 +60,12 @@ func (p *BackendPlugin) start(ctx context.Context) error {
return err
}
if rawDiagnostics != nil {
if plugin, ok := rawDiagnostics.(backend.DiagnosticsPlugin); ok {
p.diagnostics = plugin
}
}
client = &Client{}
if rawBackend != nil {
if plugin, ok := rawBackend.(backend.BackendPlugin); ok {
@ -96,3 +119,186 @@ func (p *BackendPlugin) stop() error {
}
return nil
}
// supportsDiagnostics return whether backend plugin supports diagnostics like metrics and health check.
func (p *BackendPlugin) supportsDiagnostics() bool {
return p.diagnostics != nil
}
// CollectMetrics implements the collector.Collector interface.
func (p *BackendPlugin) CollectMetrics(ctx context.Context, ch chan<- prometheus.Metric) error {
if p.diagnostics == nil {
return nil
}
if p.client == nil || p.client.Exited() {
return nil
}
res, err := p.diagnostics.CollectMetrics(ctx, &pluginv2.CollectMetrics_Request{})
if err != nil {
if st, ok := status.FromError(err); ok {
if st.Code() == codes.Unimplemented {
return nil
}
}
return err
}
if res == nil || res.Metrics == nil || res.Metrics.Prometheus == nil {
return nil
}
reader := bytes.NewReader(res.Metrics.Prometheus)
var parser expfmt.TextParser
families, err := parser.TextToMetricFamilies(reader)
if err != nil {
return errutil.Wrap("failed to parse collected metrics", err)
}
for _, mf := range families {
if mf.Help == nil {
help := fmt.Sprintf("Metric read from %s plugin", p.id)
mf.Help = &help
}
}
for _, mf := range families {
convertMetricFamily(p.id, mf, ch, p.logger)
}
return nil
}
func (p *BackendPlugin) checkHealth(ctx context.Context) (*pluginv2.CheckHealth_Response, error) {
if p.diagnostics == nil || p.client == nil || p.client.Exited() {
return &pluginv2.CheckHealth_Response{
Status: pluginv2.CheckHealth_Response_UNKNOWN,
}, nil
}
res, err := p.diagnostics.CheckHealth(ctx, &pluginv2.CheckHealth_Request{})
if err != nil {
if st, ok := status.FromError(err); ok {
if st.Code() == codes.Unimplemented {
return &pluginv2.CheckHealth_Response{
Status: pluginv2.CheckHealth_Response_UNKNOWN,
Info: "Health check not implemented",
}, nil
}
}
return nil, err
}
return res, nil
}
// convertMetricFamily converts metric family to prometheus.Metric.
// Copied from https://github.com/prometheus/node_exporter/blob/3ddc82c2d8d11eec53ed5faa8db969a1bb81f8bb/collector/textfile.go#L66-L165
func convertMetricFamily(pluginID string, metricFamily *dto.MetricFamily, ch chan<- prometheus.Metric, logger log.Logger) {
var valType prometheus.ValueType
var val float64
allLabelNames := map[string]struct{}{}
for _, metric := range metricFamily.Metric {
labels := metric.GetLabel()
for _, label := range labels {
if _, ok := allLabelNames[label.GetName()]; !ok {
allLabelNames[label.GetName()] = struct{}{}
}
}
}
for _, metric := range metricFamily.Metric {
if metric.TimestampMs != nil {
logger.Warn("Ignoring unsupported custom timestamp on metric", "metric", metric)
}
labels := metric.GetLabel()
var names []string
var values []string
for _, label := range labels {
names = append(names, label.GetName())
values = append(values, label.GetValue())
}
names = append(names, "plugin_id")
values = append(values, pluginID)
for k := range allLabelNames {
present := false
for _, name := range names {
if k == name {
present = true
break
}
}
if !present {
names = append(names, k)
values = append(values, "")
}
}
metricName := prometheus.BuildFQName(collector.Namespace, "", *metricFamily.Name)
metricType := metricFamily.GetType()
switch metricType {
case dto.MetricType_COUNTER:
valType = prometheus.CounterValue
val = metric.Counter.GetValue()
case dto.MetricType_GAUGE:
valType = prometheus.GaugeValue
val = metric.Gauge.GetValue()
case dto.MetricType_UNTYPED:
valType = prometheus.UntypedValue
val = metric.Untyped.GetValue()
case dto.MetricType_SUMMARY:
quantiles := map[float64]float64{}
for _, q := range metric.Summary.Quantile {
quantiles[q.GetQuantile()] = q.GetValue()
}
ch <- prometheus.MustNewConstSummary(
prometheus.NewDesc(
metricName,
metricFamily.GetHelp(),
names, nil,
),
metric.Summary.GetSampleCount(),
metric.Summary.GetSampleSum(),
quantiles, values...,
)
case dto.MetricType_HISTOGRAM:
buckets := map[float64]uint64{}
for _, b := range metric.Histogram.Bucket {
buckets[b.GetUpperBound()] = b.GetCumulativeCount()
}
ch <- prometheus.MustNewConstHistogram(
prometheus.NewDesc(
metricName,
metricFamily.GetHelp(),
names, nil,
),
metric.Histogram.GetSampleCount(),
metric.Histogram.GetSampleSum(),
buckets, values...,
)
default:
logger.Error("unknown metric type", "type", metricType)
continue
}
if metricType == dto.MetricType_GAUGE || metricType == dto.MetricType_COUNTER || metricType == dto.MetricType_UNTYPED {
ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc(
metricName,
metricFamily.GetHelp(),
names, nil,
),
valType, val, values...,
)
}
}
}