Plugins: Add status_source label to plugin request metrics (#76236)

* Plugins: Chore: Renamed instrumentation middleware to metrics middleware

* Removed repeated logger attributes in middleware and contextual logger

* renamed loggerParams to logParams

* PR review suggestion

* Add pluginsInstrumentationStatusSource feature toggle

* Plugin error source prometheus metrics

* Add error_source to logs

* re-generate feature toggles

* fix compilation issues

* remove unwanted changes

* Removed logger middleware changes, implement error source using context

* Renamed pluginmeta to pluginrequestmeta, changed some method names

* Fix comment

* pluginrequestmeta.go -> plugin_request_meta.go

* Replaced plugin request meta with status source

* Add tests for pluginrequestmeta status source

* Fix potential nil pointer dereference in instrmentation middleware

* Add metrics middleware tests

* Sort imports in clienttest.go

* Add StatusSourceFromContext test

* Add error_source label to plugin_request_duration_seconds

* Re-generate feature flags

* lint

* Use StatusSourcePlugin by default

* re-generate feature flags
This commit is contained in:
Giuseppe Guerra
2023-10-17 10:27:45 +02:00
committed by GitHub
parent 8ebbe06377
commit f5076d1868
12 changed files with 363 additions and 24 deletions

View File

@ -16,20 +16,21 @@ import (
"github.com/grafana/grafana/pkg/plugins/backendplugin"
"github.com/grafana/grafana/pkg/plugins/manager/client/clienttest"
"github.com/grafana/grafana/pkg/plugins/manager/fakes"
"github.com/grafana/grafana/pkg/plugins/pluginrequestmeta"
"github.com/grafana/grafana/pkg/services/featuremgmt"
)
const (
pluginID = "plugin-id"
metricRequestTotal = "grafana_plugin_request_total"
metricRequestDurationMs = "grafana_plugin_request_duration_milliseconds"
metricRequestDurationS = "grafana_plugin_request_duration_seconds"
metricRequestSize = "grafana_plugin_request_size_bytes"
)
func TestInstrumentationMiddleware(t *testing.T) {
const (
pluginID = "plugin-id"
metricRequestTotal = "grafana_plugin_request_total"
metricRequestDurationMs = "grafana_plugin_request_duration_milliseconds"
metricRequestDurationS = "grafana_plugin_request_duration_seconds"
metricRequestSize = "grafana_plugin_request_size_bytes"
)
pCtx := backend.PluginContext{PluginID: pluginID}
t.Run("should instrument requests", func(t *testing.T) {
for _, tc := range []struct {
expEndpoint string
@ -75,7 +76,7 @@ func TestInstrumentationMiddleware(t *testing.T) {
JSONData: plugins.JSONData{ID: pluginID, Backend: true},
}))
mw := newMetricsMiddleware(promRegistry, pluginsRegistry)
mw := newMetricsMiddleware(promRegistry, pluginsRegistry, featuremgmt.WithFeatures())
cdt := clienttest.NewClientDecoratorTest(t, clienttest.WithMiddlewares(
plugins.ClientMiddlewareFunc(func(next plugins.Client) plugins.Client {
mw.next = next
@ -112,6 +113,173 @@ func TestInstrumentationMiddleware(t *testing.T) {
})
}
func TestInstrumentationMiddlewareStatusSource(t *testing.T) {
const labelStatusSource = "status_source"
queryDataCounterLabels := prometheus.Labels{
"plugin_id": pluginID,
"endpoint": endpointQueryData,
"status": statusOK,
"target": string(backendplugin.TargetUnknown),
}
downstreamErrorResponse := backend.DataResponse{
Frames: nil,
Error: errors.New("bad gateway"),
Status: 502,
ErrorSource: backend.ErrorSourceDownstream,
}
pluginErrorResponse := backend.DataResponse{
Frames: nil,
Error: errors.New("internal error"),
Status: 500,
ErrorSource: backend.ErrorSourcePlugin,
}
legacyErrorResponse := backend.DataResponse{
Frames: nil,
Error: errors.New("internal error"),
Status: 500,
ErrorSource: "",
}
okResponse := backend.DataResponse{
Frames: nil,
Error: nil,
Status: 200,
ErrorSource: "",
}
pCtx := backend.PluginContext{PluginID: pluginID}
promRegistry := prometheus.NewRegistry()
pluginsRegistry := fakes.NewFakePluginRegistry()
require.NoError(t, pluginsRegistry.Add(context.Background(), &plugins.Plugin{
JSONData: plugins.JSONData{ID: pluginID, Backend: true},
}))
features := featuremgmt.WithFeatures(featuremgmt.FlagPluginsInstrumentationStatusSource)
metricsMw := newMetricsMiddleware(promRegistry, pluginsRegistry, features)
cdt := clienttest.NewClientDecoratorTest(t, clienttest.WithMiddlewares(
plugins.ClientMiddlewareFunc(func(next plugins.Client) plugins.Client {
metricsMw.next = next
return metricsMw
}),
))
t.Run("Metrics", func(t *testing.T) {
t.Run("Should ignore ErrorSource if feature flag is disabled", func(t *testing.T) {
// Use different middleware without feature flag
metricsMw := newMetricsMiddleware(prometheus.NewRegistry(), pluginsRegistry, featuremgmt.WithFeatures())
cdt := clienttest.NewClientDecoratorTest(t, clienttest.WithMiddlewares(
plugins.ClientMiddlewareFunc(func(next plugins.Client) plugins.Client {
metricsMw.next = next
return metricsMw
}),
))
cdt.TestClient.QueryDataFunc = func(ctx context.Context, req *backend.QueryDataRequest) (*backend.QueryDataResponse, error) {
return &backend.QueryDataResponse{Responses: map[string]backend.DataResponse{"A": downstreamErrorResponse}}, nil
}
_, err := cdt.Decorator.QueryData(context.Background(), &backend.QueryDataRequest{PluginContext: pCtx})
require.NoError(t, err)
counter, err := metricsMw.pluginMetrics.pluginRequestCounter.GetMetricWith(newLabels(queryDataCounterLabels, nil))
require.NoError(t, err)
require.Equal(t, 1.0, testutil.ToFloat64(counter))
// error_source should not be defined at all
_, err = metricsMw.pluginMetrics.pluginRequestCounter.GetMetricWith(newLabels(
queryDataCounterLabels,
prometheus.Labels{
labelStatusSource: string(backend.ErrorSourceDownstream),
}),
)
require.Error(t, err)
require.ErrorContains(t, err, "inconsistent label cardinality")
})
t.Run("Should add error_source label if feature flag is enabled", func(t *testing.T) {
metricsMw.pluginMetrics.pluginRequestCounter.Reset()
cdt.TestClient.QueryDataFunc = func(ctx context.Context, req *backend.QueryDataRequest) (*backend.QueryDataResponse, error) {
return &backend.QueryDataResponse{Responses: map[string]backend.DataResponse{"A": downstreamErrorResponse}}, nil
}
_, err := cdt.Decorator.QueryData(context.Background(), &backend.QueryDataRequest{PluginContext: pCtx})
require.NoError(t, err)
counter, err := metricsMw.pluginMetrics.pluginRequestCounter.GetMetricWith(newLabels(
queryDataCounterLabels,
prometheus.Labels{
labelStatusSource: string(backend.ErrorSourceDownstream),
}),
)
require.NoError(t, err)
require.Equal(t, 1.0, testutil.ToFloat64(counter))
})
})
t.Run("Priority", func(t *testing.T) {
for _, tc := range []struct {
name string
responses map[string]backend.DataResponse
expStatusSource pluginrequestmeta.StatusSource
}{
{
"Default status source for ok responses should be plugin",
map[string]backend.DataResponse{"A": okResponse},
pluginrequestmeta.StatusSourcePlugin,
},
{
"Plugin errors should have higher priority than downstream errors",
map[string]backend.DataResponse{
"A": pluginErrorResponse,
"B": downstreamErrorResponse,
},
pluginrequestmeta.StatusSourcePlugin,
},
{
"Errors without ErrorSource should be reported as plugin status source",
map[string]backend.DataResponse{"A": legacyErrorResponse},
pluginrequestmeta.StatusSourcePlugin,
},
{
"Downstream errors should have higher priority than ok responses",
map[string]backend.DataResponse{
"A": okResponse,
"B": downstreamErrorResponse,
},
pluginrequestmeta.StatusSourceDownstream,
},
{
"Plugin errors should have higher priority than ok responses",
map[string]backend.DataResponse{
"A": okResponse,
"B": pluginErrorResponse,
},
pluginrequestmeta.StatusSourcePlugin,
},
{
"Legacy errors should have higher priority than ok responses",
map[string]backend.DataResponse{
"A": okResponse,
"B": legacyErrorResponse,
},
pluginrequestmeta.StatusSourcePlugin,
},
} {
t.Run(tc.name, func(t *testing.T) {
t.Cleanup(func() {
cdt.QueryDataCtx = nil
cdt.QueryDataReq = nil
})
cdt.TestClient.QueryDataFunc = func(ctx context.Context, req *backend.QueryDataRequest) (*backend.QueryDataResponse, error) {
cdt.QueryDataCtx = ctx
cdt.QueryDataReq = req
return &backend.QueryDataResponse{Responses: tc.responses}, nil
}
_, err := cdt.Decorator.QueryData(context.Background(), &backend.QueryDataRequest{PluginContext: pCtx})
require.NoError(t, err)
ctxStatusSource := pluginrequestmeta.StatusSourceFromContext(cdt.QueryDataCtx)
require.Equal(t, tc.expStatusSource, ctxStatusSource)
})
}
})
}
// checkHistogram is a utility function that checks if a histogram with the given name and label values exists
// and has been observed at least once.
func checkHistogram(promRegistry *prometheus.Registry, expMetricName string, expLabels map[string]string) error {
@ -162,3 +330,18 @@ func checkHistogram(promRegistry *prometheus.Registry, expMetricName string, exp
}
return nil
}
// newLabels creates a new prometheus.Labels from the given initial labels and additional labels.
// The additionalLabels are merged into the initial ones, and will overwrite a value if already set in initialLabels.
func newLabels(initialLabels prometheus.Labels, additional ...prometheus.Labels) prometheus.Labels {
r := make(prometheus.Labels, len(initialLabels)+len(additional)/2)
for k, v := range initialLabels {
r[k] = v
}
for _, l := range additional {
for k, v := range l {
r[k] = v
}
}
return r
}