From c7442c0fd2eb888b527630db181e37bb5b5456ca Mon Sep 17 00:00:00 2001 From: Marcus Efraimsson Date: Fri, 10 Nov 2023 16:55:01 +0100 Subject: [PATCH] Instrumentation: Check embedded errors in query data response for plugin metrics/logs status label (#77613) Check embedded errors in query data response for plugin metrics/logs status label. Plugin Request Completed log messages are now logged with info level if status=ok, otherwise error level. Fixes #76769 --- .../clientmiddleware/logger_middleware.go | 42 +++--- .../clientmiddleware/metrics_middleware.go | 37 +++-- .../metrics_middleware_test.go | 18 ++- .../clientmiddleware/utils.go | 61 +++++++- .../clientmiddleware/utils_test.go | 137 ++++++++++++++++++ 5 files changed, 244 insertions(+), 51 deletions(-) create mode 100644 pkg/services/pluginsintegration/clientmiddleware/utils_test.go diff --git a/pkg/services/pluginsintegration/clientmiddleware/logger_middleware.go b/pkg/services/pluginsintegration/clientmiddleware/logger_middleware.go index 74dc492fc8a..81d1e9da74d 100644 --- a/pkg/services/pluginsintegration/clientmiddleware/logger_middleware.go +++ b/pkg/services/pluginsintegration/clientmiddleware/logger_middleware.go @@ -2,7 +2,6 @@ package clientmiddleware import ( "context" - "errors" "time" "github.com/grafana/grafana-plugin-sdk-go/backend" @@ -37,31 +36,32 @@ type LoggerMiddleware struct { features featuremgmt.FeatureToggles } -func (m *LoggerMiddleware) logRequest(ctx context.Context, fn func(ctx context.Context) error) error { - status := statusOK +func (m *LoggerMiddleware) logRequest(ctx context.Context, fn func(ctx context.Context) (requestStatus, error)) error { start := time.Now() timeBeforePluginRequest := log.TimeSinceStart(ctx, start) - err := fn(ctx) - if err != nil { - status = statusError - if errors.Is(err, context.Canceled) { - status = statusCancelled - } - } + status, err := fn(ctx) logParams := []any{ "status", status, "duration", time.Since(start), "eventName", "grafana-data-egress", "time_before_plugin_request", timeBeforePluginRequest, } - if status == statusError { + if err != nil { logParams = append(logParams, "error", err) } if m.features.IsEnabled(featuremgmt.FlagPluginsInstrumentationStatusSource) { logParams = append(logParams, "status_source", pluginrequestmeta.StatusSourceFromContext(ctx)) } - m.logger.FromContext(ctx).Info("Plugin Request Completed", logParams...) + + ctxLogger := m.logger.FromContext(ctx) + logFunc := ctxLogger.Info + if status > requestStatusOK { + logFunc = ctxLogger.Error + } + + logFunc("Plugin Request Completed", logParams...) + return err } @@ -71,11 +71,11 @@ func (m *LoggerMiddleware) QueryData(ctx context.Context, req *backend.QueryData } var resp *backend.QueryDataResponse - err := m.logRequest(ctx, func(ctx context.Context) (innerErr error) { + err := m.logRequest(ctx, func(ctx context.Context) (status requestStatus, innerErr error) { resp, innerErr = m.next.QueryData(ctx, req) if innerErr != nil { - return innerErr + return requestStatusFromError(innerErr), innerErr } ctxLogger := m.logger.FromContext(ctx) @@ -85,7 +85,7 @@ func (m *LoggerMiddleware) QueryData(ctx context.Context, req *backend.QueryData } } - return nil + return requestStatusFromQueryDataResponse(resp, innerErr), innerErr }) return resp, err @@ -96,9 +96,9 @@ func (m *LoggerMiddleware) CallResource(ctx context.Context, req *backend.CallRe return m.next.CallResource(ctx, req, sender) } - err := m.logRequest(ctx, func(ctx context.Context) (innerErr error) { + err := m.logRequest(ctx, func(ctx context.Context) (status requestStatus, innerErr error) { innerErr = m.next.CallResource(ctx, req, sender) - return innerErr + return requestStatusFromError(innerErr), innerErr }) return err @@ -110,9 +110,9 @@ func (m *LoggerMiddleware) CheckHealth(ctx context.Context, req *backend.CheckHe } var resp *backend.CheckHealthResult - err := m.logRequest(ctx, func(ctx context.Context) (innerErr error) { + err := m.logRequest(ctx, func(ctx context.Context) (status requestStatus, innerErr error) { resp, innerErr = m.next.CheckHealth(ctx, req) - return innerErr + return requestStatusFromError(innerErr), innerErr }) return resp, err @@ -124,9 +124,9 @@ func (m *LoggerMiddleware) CollectMetrics(ctx context.Context, req *backend.Coll } var resp *backend.CollectMetricsResult - err := m.logRequest(ctx, func(ctx context.Context) (innerErr error) { + err := m.logRequest(ctx, func(ctx context.Context) (status requestStatus, innerErr error) { resp, innerErr = m.next.CollectMetrics(ctx, req) - return innerErr + return requestStatusFromError(innerErr), innerErr }) return resp, err diff --git a/pkg/services/pluginsintegration/clientmiddleware/metrics_middleware.go b/pkg/services/pluginsintegration/clientmiddleware/metrics_middleware.go index f0ed23ba21f..afd0efc9d3c 100644 --- a/pkg/services/pluginsintegration/clientmiddleware/metrics_middleware.go +++ b/pkg/services/pluginsintegration/clientmiddleware/metrics_middleware.go @@ -2,7 +2,6 @@ package clientmiddleware import ( "context" - "errors" "time" "github.com/grafana/grafana-plugin-sdk-go/backend" @@ -109,27 +108,20 @@ func (m *MetricsMiddleware) instrumentPluginRequestSize(ctx context.Context, plu } // instrumentPluginRequest increments the m.pluginRequestCounter metric and tracks the duration of the given request. -func (m *MetricsMiddleware) instrumentPluginRequest(ctx context.Context, pluginCtx backend.PluginContext, endpoint string, fn func(context.Context) error) error { +func (m *MetricsMiddleware) instrumentPluginRequest(ctx context.Context, pluginCtx backend.PluginContext, endpoint string, fn func(context.Context) (requestStatus, error)) error { target, err := m.pluginTarget(ctx, pluginCtx.PluginID) if err != nil { return err } - status := statusOK start := time.Now() - err = fn(ctx) - if err != nil { - status = statusError - if errors.Is(err, context.Canceled) { - status = statusCancelled - } - } + status, err := fn(ctx) elapsed := time.Since(start) pluginRequestDurationLabels := []string{pluginCtx.PluginID, endpoint, target} - pluginRequestCounterLabels := []string{pluginCtx.PluginID, endpoint, status, target} - pluginRequestDurationSecondsLabels := []string{"grafana-backend", pluginCtx.PluginID, endpoint, status, target} + pluginRequestCounterLabels := []string{pluginCtx.PluginID, endpoint, status.String(), target} + pluginRequestDurationSecondsLabels := []string{"grafana-backend", pluginCtx.PluginID, endpoint, status.String(), target} if m.features.IsEnabled(featuremgmt.FlagPluginsInstrumentationStatusSource) { statusSource := pluginrequestmeta.StatusSourceFromContext(ctx) pluginRequestDurationLabels = append(pluginRequestDurationLabels, string(statusSource)) @@ -163,14 +155,17 @@ func (m *MetricsMiddleware) QueryData(ctx context.Context, req *backend.QueryDat for _, v := range req.Queries { requestSize += float64(len(v.JSON)) } + if err := m.instrumentPluginRequestSize(ctx, req.PluginContext, endpointQueryData, requestSize); err != nil { return nil, err } + var resp *backend.QueryDataResponse - err := m.instrumentPluginRequest(ctx, req.PluginContext, endpointQueryData, func(ctx context.Context) (innerErr error) { + err := m.instrumentPluginRequest(ctx, req.PluginContext, endpointQueryData, func(ctx context.Context) (status requestStatus, innerErr error) { resp, innerErr = m.next.QueryData(ctx, req) - return + return requestStatusFromQueryDataResponse(resp, innerErr), innerErr }) + return resp, err } @@ -178,25 +173,27 @@ func (m *MetricsMiddleware) CallResource(ctx context.Context, req *backend.CallR if err := m.instrumentPluginRequestSize(ctx, req.PluginContext, endpointCallResource, float64(len(req.Body))); err != nil { return err } - return m.instrumentPluginRequest(ctx, req.PluginContext, endpointCallResource, func(ctx context.Context) error { - return m.next.CallResource(ctx, req, sender) + return m.instrumentPluginRequest(ctx, req.PluginContext, endpointCallResource, func(ctx context.Context) (requestStatus, error) { + innerErr := m.next.CallResource(ctx, req, sender) + return requestStatusFromError(innerErr), innerErr }) } func (m *MetricsMiddleware) CheckHealth(ctx context.Context, req *backend.CheckHealthRequest) (*backend.CheckHealthResult, error) { var result *backend.CheckHealthResult - err := m.instrumentPluginRequest(ctx, req.PluginContext, endpointCheckHealth, func(ctx context.Context) (innerErr error) { + err := m.instrumentPluginRequest(ctx, req.PluginContext, endpointCheckHealth, func(ctx context.Context) (status requestStatus, innerErr error) { result, innerErr = m.next.CheckHealth(ctx, req) - return + return requestStatusFromError(innerErr), innerErr }) + return result, err } func (m *MetricsMiddleware) CollectMetrics(ctx context.Context, req *backend.CollectMetricsRequest) (*backend.CollectMetricsResult, error) { var result *backend.CollectMetricsResult - err := m.instrumentPluginRequest(ctx, req.PluginContext, endpointCollectMetrics, func(ctx context.Context) (innerErr error) { + err := m.instrumentPluginRequest(ctx, req.PluginContext, endpointCollectMetrics, func(ctx context.Context) (status requestStatus, innerErr error) { result, innerErr = m.next.CollectMetrics(ctx, req) - return + return requestStatusFromError(innerErr), innerErr }) return result, err } diff --git a/pkg/services/pluginsintegration/clientmiddleware/metrics_middleware_test.go b/pkg/services/pluginsintegration/clientmiddleware/metrics_middleware_test.go index 0fe816bf9dc..66a1bc813d8 100644 --- a/pkg/services/pluginsintegration/clientmiddleware/metrics_middleware_test.go +++ b/pkg/services/pluginsintegration/clientmiddleware/metrics_middleware_test.go @@ -90,7 +90,7 @@ func TestInstrumentationMiddleware(t *testing.T) { require.Equal(t, 1, testutil.CollectAndCount(promRegistry, metricRequestDurationMs)) require.Equal(t, 1, testutil.CollectAndCount(promRegistry, metricRequestDurationS)) - counter := mw.pluginMetrics.pluginRequestCounter.WithLabelValues(pluginID, tc.expEndpoint, statusOK, string(backendplugin.TargetUnknown)) + counter := mw.pluginMetrics.pluginRequestCounter.WithLabelValues(pluginID, tc.expEndpoint, requestStatusOK.String(), string(backendplugin.TargetUnknown)) require.Equal(t, 1.0, testutil.ToFloat64(counter)) for _, m := range []string{metricRequestDurationMs, metricRequestDurationS} { require.NoError(t, checkHistogram(promRegistry, m, map[string]string{ @@ -115,10 +115,16 @@ func TestInstrumentationMiddleware(t *testing.T) { func TestInstrumentationMiddlewareStatusSource(t *testing.T) { const labelStatusSource = "status_source" - queryDataCounterLabels := prometheus.Labels{ + queryDataOKCounterLabels := prometheus.Labels{ "plugin_id": pluginID, "endpoint": endpointQueryData, - "status": statusOK, + "status": requestStatusOK.String(), + "target": string(backendplugin.TargetUnknown), + } + queryDataErrorCounterLabels := prometheus.Labels{ + "plugin_id": pluginID, + "endpoint": endpointQueryData, + "status": requestStatusError.String(), "target": string(backendplugin.TargetUnknown), } downstreamErrorResponse := backend.DataResponse{ @@ -180,13 +186,13 @@ func TestInstrumentationMiddlewareStatusSource(t *testing.T) { } _, err := cdt.Decorator.QueryData(context.Background(), &backend.QueryDataRequest{PluginContext: pCtx}) require.NoError(t, err) - counter, err := metricsMw.pluginMetrics.pluginRequestCounter.GetMetricWith(newLabels(queryDataCounterLabels, nil)) + counter, err := metricsMw.pluginMetrics.pluginRequestCounter.GetMetricWith(newLabels(queryDataErrorCounterLabels, nil)) require.NoError(t, err) require.Equal(t, 1.0, testutil.ToFloat64(counter)) // error_source should not be defined at all _, err = metricsMw.pluginMetrics.pluginRequestCounter.GetMetricWith(newLabels( - queryDataCounterLabels, + queryDataOKCounterLabels, prometheus.Labels{ labelStatusSource: string(backend.ErrorSourceDownstream), }), @@ -204,7 +210,7 @@ func TestInstrumentationMiddlewareStatusSource(t *testing.T) { _, err := cdt.Decorator.QueryData(context.Background(), &backend.QueryDataRequest{PluginContext: pCtx}) require.NoError(t, err) counter, err := metricsMw.pluginMetrics.pluginRequestCounter.GetMetricWith(newLabels( - queryDataCounterLabels, + queryDataErrorCounterLabels, prometheus.Labels{ labelStatusSource: string(backend.ErrorSourceDownstream), }), diff --git a/pkg/services/pluginsintegration/clientmiddleware/utils.go b/pkg/services/pluginsintegration/clientmiddleware/utils.go index 462d4250f1a..dc6342295cc 100644 --- a/pkg/services/pluginsintegration/clientmiddleware/utils.go +++ b/pkg/services/pluginsintegration/clientmiddleware/utils.go @@ -1,14 +1,30 @@ package clientmiddleware import ( + "context" + "errors" + "github.com/grafana/grafana-plugin-sdk-go/backend" ) -const ( - statusOK = "ok" - statusError = "error" - statusCancelled = "cancelled" +type requestStatus int +const ( + requestStatusOK requestStatus = iota + requestStatusCancelled + requestStatusError +) + +func (status requestStatus) String() string { + names := [...]string{"ok", "cancelled", "error"} + if status < requestStatusOK || status > requestStatusError { + return "" + } + + return names[status] +} + +const ( endpointCallResource = "callResource" endpointCheckHealth = "checkHealth" endpointCollectMetrics = "collectMetrics" @@ -23,3 +39,40 @@ type callResourceResponseSenderFunc func(res *backend.CallResourceResponse) erro func (fn callResourceResponseSenderFunc) Send(res *backend.CallResourceResponse) error { return fn(res) } + +func requestStatusFromError(err error) requestStatus { + status := requestStatusOK + if err != nil { + status = requestStatusError + if errors.Is(err, context.Canceled) { + status = requestStatusCancelled + } + } + + return status +} + +func requestStatusFromQueryDataResponse(res *backend.QueryDataResponse, err error) requestStatus { + if err != nil { + return requestStatusFromError(err) + } + + status := requestStatusOK + + if res != nil { + for _, dr := range res.Responses { + if dr.Error != nil { + s := requestStatusFromError(dr.Error) + if s > status { + status = s + } + + if status == requestStatusError { + break + } + } + } + } + + return status +} diff --git a/pkg/services/pluginsintegration/clientmiddleware/utils_test.go b/pkg/services/pluginsintegration/clientmiddleware/utils_test.go new file mode 100644 index 00000000000..818a52b4ca5 --- /dev/null +++ b/pkg/services/pluginsintegration/clientmiddleware/utils_test.go @@ -0,0 +1,137 @@ +package clientmiddleware + +import ( + "context" + "errors" + "fmt" + "testing" + + "github.com/grafana/grafana-plugin-sdk-go/backend" + "github.com/grafana/grafana-plugin-sdk-go/data" + "github.com/stretchr/testify/require" +) + +func TestRequestStatus(t *testing.T) { + tcs := []struct { + s requestStatus + expectedLabel string + }{ + { + s: requestStatusOK, + expectedLabel: "ok", + }, + { + s: requestStatusError, + expectedLabel: "error", + }, + { + s: requestStatusCancelled, + expectedLabel: "cancelled", + }, + } + + for _, tc := range tcs { + t.Run(tc.s.String(), func(t *testing.T) { + require.Equal(t, tc.expectedLabel, tc.s.String()) + require.Equal(t, tc.expectedLabel, fmt.Sprint(tc.s)) + }) + } +} + +func TestRequestStatusFromError(t *testing.T) { + tcs := []struct { + desc string + err error + expectedStatus requestStatus + }{ + { + desc: "no error should be status ok", + err: nil, + expectedStatus: requestStatusOK, + }, + { + desc: "error should be status error", + err: errors.New("boom"), + expectedStatus: requestStatusError, + }, + { + desc: "context canceled should be status cancelled", + err: context.Canceled, + expectedStatus: requestStatusCancelled, + }, + } + + for _, tc := range tcs { + t.Run(tc.desc, func(t *testing.T) { + status := requestStatusFromError(tc.err) + require.Equal(t, tc.expectedStatus, status) + }) + } +} + +func TestRequestStatusFromQueryDataResponse(t *testing.T) { + responseWithoutError := backend.NewQueryDataResponse() + responseWithoutError.Responses["A"] = backend.DataResponse{ + Frames: data.Frames{data.NewFrame("test")}, + } + + responseWithError := backend.NewQueryDataResponse() + responseWithError.Responses["A"] = backend.DataResponse{ + Error: errors.New("boom"), + } + responseWithMultipleErrors := backend.NewQueryDataResponse() + responseWithMultipleErrors.Responses["A"] = backend.DataResponse{ + Error: context.Canceled, + } + responseWithMultipleErrors.Responses["B"] = backend.DataResponse{ + Frames: data.Frames{data.NewFrame("test")}, + } + responseWithMultipleErrors.Responses["C"] = backend.DataResponse{ + Error: errors.New("boom"), + } + + tcs := []struct { + desc string + resp *backend.QueryDataResponse + err error + expectedStatus requestStatus + }{ + { + desc: "no error should be status ok", + err: nil, + expectedStatus: requestStatusOK, + }, + { + desc: "error should be status error", + err: errors.New("boom"), + expectedStatus: requestStatusError, + }, + { + desc: "context canceled should be status cancelled", + err: context.Canceled, + expectedStatus: requestStatusCancelled, + }, + { + desc: "response without error should be status ok", + resp: responseWithoutError, + expectedStatus: requestStatusOK, + }, + { + desc: "response with error should be status error", + resp: responseWithError, + expectedStatus: requestStatusError, + }, + { + desc: "response with multiple error should pick the highest status cancelled", + resp: responseWithMultipleErrors, + expectedStatus: requestStatusError, + }, + } + + for _, tc := range tcs { + t.Run(tc.desc, func(t *testing.T) { + status := requestStatusFromQueryDataResponse(tc.resp, tc.err) + require.Equal(t, tc.expectedStatus, status) + }) + } +}