diff --git a/docs/sources/setup-grafana/configure-grafana/feature-toggles/index.md b/docs/sources/setup-grafana/configure-grafana/feature-toggles/index.md index c511abf68a1..179b879be62 100644 --- a/docs/sources/setup-grafana/configure-grafana/feature-toggles/index.md +++ b/docs/sources/setup-grafana/configure-grafana/feature-toggles/index.md @@ -134,6 +134,7 @@ Experimental features might be changed or removed without prior notice. | `angularDeprecationUI` | Display new Angular deprecation-related UI features | | `dashgpt` | Enable AI powered features in dashboards | | `sseGroupByDatasource` | Send query to the same datasource in a single request when using server side expressions | +| `requestInstrumentationStatusSource` | Include a status source label for request metrics and logs | ## Development feature toggles diff --git a/packages/grafana-data/src/types/featureToggles.gen.ts b/packages/grafana-data/src/types/featureToggles.gen.ts index 69e7a1f4416..188c660696c 100644 --- a/packages/grafana-data/src/types/featureToggles.gen.ts +++ b/packages/grafana-data/src/types/featureToggles.gen.ts @@ -124,4 +124,5 @@ export interface FeatureToggles { reportingRetries?: boolean; newBrowseDashboards?: boolean; sseGroupByDatasource?: boolean; + requestInstrumentationStatusSource?: boolean; } diff --git a/pkg/api/metrics.go b/pkg/api/metrics.go index 90ce85e2514..518c2ca7ff2 100644 --- a/pkg/api/metrics.go +++ b/pkg/api/metrics.go @@ -1,6 +1,7 @@ package api import ( + "context" "errors" "fmt" "net/http" @@ -9,6 +10,7 @@ import ( "github.com/grafana/grafana/pkg/api/dtos" "github.com/grafana/grafana/pkg/api/response" + "github.com/grafana/grafana/pkg/middleware/requestmeta" contextmodel "github.com/grafana/grafana/pkg/services/contexthandler/model" "github.com/grafana/grafana/pkg/services/datasources" "github.com/grafana/grafana/pkg/services/featuremgmt" @@ -60,10 +62,10 @@ func (hs *HTTPServer) QueryMetricsV2(c *contextmodel.ReqContext) response.Respon if err != nil { return hs.handleQueryMetricsError(err) } - return hs.toJsonStreamingResponse(resp) + return hs.toJsonStreamingResponse(c.Req.Context(), resp) } -func (hs *HTTPServer) toJsonStreamingResponse(qdr *backend.QueryDataResponse) response.Response { +func (hs *HTTPServer) toJsonStreamingResponse(ctx context.Context, qdr *backend.QueryDataResponse) response.Response { statusWhenError := http.StatusBadRequest if hs.Features.IsEnabled(featuremgmt.FlagDatasourceQueryMultiStatus) { statusWhenError = http.StatusMultiStatus @@ -76,6 +78,11 @@ func (hs *HTTPServer) toJsonStreamingResponse(qdr *backend.QueryDataResponse) re } } + if statusCode == statusWhenError { + // an error in the response we treat as downstream. + requestmeta.WithDownstreamStatusSource(ctx) + } + return response.JSONStreaming(statusCode, qdr) } diff --git a/pkg/api/metrics_test.go b/pkg/api/metrics_test.go index 10321db19c0..279619bcabc 100644 --- a/pkg/api/metrics_test.go +++ b/pkg/api/metrics_test.go @@ -308,7 +308,6 @@ func TestDataSourceQueryError(t *testing.T) { resp, err := srv.SendJSON(req) require.NoError(t, err) - require.Equal(t, tc.expectedStatus, resp.StatusCode) require.Equal(t, tc.expectedStatus, resp.StatusCode) body, err := io.ReadAll(resp.Body) require.NoError(t, err) diff --git a/pkg/api/plugin_resource.go b/pkg/api/plugin_resource.go index 6f94a627ad2..d79a631545b 100644 --- a/pkg/api/plugin_resource.go +++ b/pkg/api/plugin_resource.go @@ -9,6 +9,8 @@ import ( "github.com/grafana/grafana-plugin-sdk-go/backend" + "github.com/grafana/grafana/pkg/api/response" + "github.com/grafana/grafana/pkg/middleware/requestmeta" "github.com/grafana/grafana/pkg/plugins/backendplugin" "github.com/grafana/grafana/pkg/plugins/httpresponsesender" contextmodel "github.com/grafana/grafana/pkg/services/contexthandler/model" @@ -44,7 +46,10 @@ func (hs *HTTPServer) callPluginResource(c *contextmodel.ReqContext, pluginID st if err = hs.makePluginResourceRequest(c.Resp, req, pCtx); err != nil { handleCallResourceError(err, c) + return } + + requestmeta.WithStatusSource(c.Req.Context(), c.Resp.Status()) } func (hs *HTTPServer) callPluginResourceWithDataSource(c *contextmodel.ReqContext, pluginID string, ds *datasources.DataSource) { @@ -77,7 +82,10 @@ func (hs *HTTPServer) callPluginResourceWithDataSource(c *contextmodel.ReqContex if err = hs.makePluginResourceRequest(c.Resp, req, pCtx); err != nil { handleCallResourceError(err, c) + return } + + requestmeta.WithStatusSource(c.Req.Context(), c.Resp.Status()) } func (hs *HTTPServer) pluginResourceRequest(c *contextmodel.ReqContext) (*http.Request, error) { @@ -118,14 +126,15 @@ func (hs *HTTPServer) makePluginResourceRequest(w http.ResponseWriter, req *http func handleCallResourceError(err error, reqCtx *contextmodel.ReqContext) { if errors.Is(err, backendplugin.ErrPluginUnavailable) { - reqCtx.JsonApiErr(503, "Plugin unavailable", err) + reqCtx.JsonApiErr(http.StatusServiceUnavailable, "Plugin unavailable", err) return } if errors.Is(err, backendplugin.ErrMethodNotImplemented) { - reqCtx.JsonApiErr(404, "Not found", err) + reqCtx.JsonApiErr(http.StatusNotFound, "Not found", err) return } - reqCtx.JsonApiErr(500, "Failed to call resource", err) + resp := response.ErrOrFallback(http.StatusInternalServerError, "Failed to call resource", err) + resp.WriteTo(reqCtx) } diff --git a/pkg/api/pluginproxy/ds_proxy.go b/pkg/api/pluginproxy/ds_proxy.go index 21ebadbd612..5e1d017d97a 100644 --- a/pkg/api/pluginproxy/ds_proxy.go +++ b/pkg/api/pluginproxy/ds_proxy.go @@ -120,6 +120,7 @@ func (proxy *DataSourceProxy) HandleRequest() { Body: io.NopCloser(strings.NewReader(msg)), ContentLength: int64(len(msg)), Header: http.Header{}, + Request: resp.Request, } } return nil diff --git a/pkg/api/plugins.go b/pkg/api/plugins.go index 5757295c632..99cf2967ccb 100644 --- a/pkg/api/plugins.go +++ b/pkg/api/plugins.go @@ -395,9 +395,9 @@ func (hs *HTTPServer) CheckHealth(c *contextmodel.ReqContext) response.Response pCtx, err := hs.pluginContextProvider.Get(c.Req.Context(), pluginID, c.SignedInUser, c.OrgID) if err != nil { if errors.Is(err, plugincontext.ErrPluginNotFound) { - return response.Error(404, "Plugin not found", nil) + return response.Error(http.StatusNotFound, "Plugin not found", nil) } - return response.Error(500, "Failed to get plugin settings", err) + return response.Error(http.StatusInternalServerError, "Failed to get plugin settings", err) } resp, err := hs.pluginClient.CheckHealth(c.Req.Context(), &backend.CheckHealthRequest{ PluginContext: pCtx, @@ -417,14 +417,14 @@ func (hs *HTTPServer) CheckHealth(c *contextmodel.ReqContext) response.Response var jsonDetails map[string]any err = json.Unmarshal(resp.JSONDetails, &jsonDetails) if err != nil { - return response.Error(500, "Failed to unmarshal detailed response from backend plugin", err) + return response.Error(http.StatusInternalServerError, "Failed to unmarshal detailed response from backend plugin", err) } payload["details"] = jsonDetails } if resp.Status != backend.HealthStatusOk { - return response.JSON(503, payload) + return response.JSON(http.StatusBadRequest, payload) } return response.JSON(http.StatusOK, payload) @@ -492,22 +492,18 @@ func (hs *HTTPServer) UninstallPlugin(c *contextmodel.ReqContext) response.Respo func translatePluginRequestErrorToAPIError(err error) response.Response { if errors.Is(err, backendplugin.ErrPluginNotRegistered) { - return response.Error(404, "Plugin not found", err) + return response.Error(http.StatusNotFound, "Plugin not found", err) } if errors.Is(err, backendplugin.ErrMethodNotImplemented) { - return response.Error(404, "Not found", err) - } - - if errors.Is(err, backendplugin.ErrHealthCheckFailed) { - return response.Error(500, "Plugin health check failed", err) + return response.Error(http.StatusNotFound, "Not found", err) } if errors.Is(err, backendplugin.ErrPluginUnavailable) { - return response.Error(503, "Plugin unavailable", err) + return response.Error(http.StatusServiceUnavailable, "Plugin unavailable", err) } - return response.Error(500, "Plugin request failed", err) + return response.ErrOrFallback(http.StatusInternalServerError, "Plugin request failed", err) } func (hs *HTTPServer) pluginMarkdown(ctx context.Context, pluginID string, name string) ([]byte, error) { diff --git a/pkg/api/response/response.go b/pkg/api/response/response.go index 71142ef19ff..68e32dc1a11 100644 --- a/pkg/api/response/response.go +++ b/pkg/api/response/response.go @@ -12,6 +12,7 @@ import ( "gopkg.in/yaml.v3" "github.com/grafana/grafana/pkg/infra/tracing" + "github.com/grafana/grafana/pkg/middleware/requestmeta" contextmodel "github.com/grafana/grafana/pkg/services/contexthandler/model" "github.com/grafana/grafana/pkg/setting" "github.com/grafana/grafana/pkg/util/errutil" @@ -80,6 +81,11 @@ func (r *NormalResponse) ErrMessage() string { func (r *NormalResponse) WriteTo(ctx *contextmodel.ReqContext) { if r.err != nil { + grafanaErr := errutil.Error{} + if errors.As(r.err, &grafanaErr) && grafanaErr.Source.IsDownstream() { + requestmeta.WithDownstreamStatusSource(ctx.Req.Context()) + } + if errutil.HasUnifiedLogging(ctx.Req.Context()) { ctx.Error = r.err } else { diff --git a/pkg/middleware/loggermw/logger.go b/pkg/middleware/loggermw/logger.go index 493d7a0e968..89254828442 100644 --- a/pkg/middleware/loggermw/logger.go +++ b/pkg/middleware/loggermw/logger.go @@ -23,6 +23,7 @@ import ( "time" "github.com/grafana/grafana/pkg/middleware" + "github.com/grafana/grafana/pkg/middleware/requestmeta" "github.com/grafana/grafana/pkg/util/errutil" @@ -127,6 +128,11 @@ func (l *loggerImpl) prepareLogParams(c *contextmodel.ReqContext, duration time. logParams = append(logParams, "handler", handler) } + if l.flags.IsEnabled(featuremgmt.FlagRequestInstrumentationStatusSource) { + rmd := requestmeta.GetRequestMetaData(c.Req.Context()) + logParams = append(logParams, "status_source", rmd.StatusSource) + } + logParams = append(logParams, errorLogParams(c.Error)...) return logParams, lvl diff --git a/pkg/middleware/request_metrics.go b/pkg/middleware/request_metrics.go index 554a799510e..bc3dba002e8 100644 --- a/pkg/middleware/request_metrics.go +++ b/pkg/middleware/request_metrics.go @@ -36,6 +36,11 @@ func RequestMetrics(features featuremgmt.FeatureToggles, cfg *setting.Cfg, promR ) histogramLabels := []string{"handler", "status_code", "method"} + + if features.IsEnabled(featuremgmt.FlagRequestInstrumentationStatusSource) { + histogramLabels = append(histogramLabels, "status_source") + } + if cfg.MetricsIncludeTeamLabel { histogramLabels = append(histogramLabels, "team") } @@ -80,8 +85,13 @@ func RequestMetrics(features featuremgmt.FeatureToggles, cfg *setting.Cfg, promR } labelValues := []string{handler, code, r.Method} + rmd := requestmeta.GetRequestMetaData(r.Context()) + + if features.IsEnabled(featuremgmt.FlagRequestInstrumentationStatusSource) { + labelValues = append(labelValues, string(rmd.StatusSource)) + } + if cfg.MetricsIncludeTeamLabel { - rmd := requestmeta.GetRequestMetaData(r.Context()) labelValues = append(labelValues, rmd.Team) } diff --git a/pkg/middleware/requestmeta/request_metadata.go b/pkg/middleware/requestmeta/request_metadata.go index a62b4eee16e..a7ede04ff6d 100644 --- a/pkg/middleware/requestmeta/request_metadata.go +++ b/pkg/middleware/requestmeta/request_metadata.go @@ -13,10 +13,18 @@ const ( TeamCore = "core" ) +type StatusSource string + +const ( + StatusSourceServer StatusSource = "server" + StatusSourceDownstream StatusSource = "downstream" +) + type rMDContextKey struct{} type RequestMetaData struct { - Team string + Team string + StatusSource StatusSource } var requestMetaDataContextKey = rMDContextKey{} @@ -27,8 +35,7 @@ func SetupRequestMetadata() web.Middleware { return func(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { rmd := defaultRequestMetadata() - - ctx := context.WithValue(r.Context(), requestMetaDataContextKey, rmd) + ctx := SetRequestMetaData(r.Context(), rmd) *r = *r.WithContext(ctx) next.ServeHTTP(w, r) @@ -46,7 +53,13 @@ func GetRequestMetaData(ctx context.Context) *RequestMetaData { return value } - return defaultRequestMetadata() + rmd := defaultRequestMetadata() + return &rmd +} + +// SetRequestMetaData sets the request metadata for the context. +func SetRequestMetaData(ctx context.Context, rmd RequestMetaData) context.Context { + return context.WithValue(ctx, requestMetaDataContextKey, &rmd) } // SetOwner returns an `web.Handler` that sets the team name for an request. @@ -57,8 +70,31 @@ func SetOwner(team string) web.Handler { } } -func defaultRequestMetadata() *RequestMetaData { - return &RequestMetaData{ - Team: TeamCore, +// WithDownstreamStatusSource sets the StatusSource field of the [RequestMetaData] for the +// context to [StatusSourceDownstream]. +func WithDownstreamStatusSource(ctx context.Context) { + v := GetRequestMetaData(ctx) + v.StatusSource = StatusSourceDownstream +} + +// WithStatusSource sets the StatusSource field of the [RequestMetaData] for the +// context based on the provided statusCode. +// If statusCode >= 500 then [StatusSourceDownstream]. +// If statusCode < 500 then [StatusSourceServer]. +func WithStatusSource(ctx context.Context, statusCode int) { + v := GetRequestMetaData(ctx) + + if statusCode >= 500 { + v.StatusSource = StatusSourceDownstream + return + } + + v.StatusSource = StatusSourceServer +} + +func defaultRequestMetadata() RequestMetaData { + return RequestMetaData{ + Team: TeamCore, + StatusSource: StatusSourceServer, } } diff --git a/pkg/middleware/requestmeta/request_metadata_test.go b/pkg/middleware/requestmeta/request_metadata_test.go new file mode 100644 index 00000000000..49a5cf46f65 --- /dev/null +++ b/pkg/middleware/requestmeta/request_metadata_test.go @@ -0,0 +1,47 @@ +package requestmeta + +import ( + "context" + "fmt" + "net/http" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestStatusSource(t *testing.T) { + ctx := context.Background() + ctx = SetRequestMetaData(ctx, defaultRequestMetadata()) + rmd := GetRequestMetaData(ctx) + require.Equal(t, StatusSourceServer, rmd.StatusSource) + + WithDownstreamStatusSource(ctx) + rmd = GetRequestMetaData(ctx) + require.Equal(t, StatusSourceDownstream, rmd.StatusSource) +} + +func TestWithStatusSource(t *testing.T) { + tcs := []struct { + status int + expectedSource StatusSource + }{ + {status: http.StatusOK, expectedSource: StatusSourceServer}, + {status: http.StatusBadRequest, expectedSource: StatusSourceServer}, + {status: http.StatusForbidden, expectedSource: StatusSourceServer}, + {status: http.StatusUnauthorized, expectedSource: StatusSourceServer}, + {status: http.StatusInternalServerError, expectedSource: StatusSourceDownstream}, + {status: http.StatusBadGateway, expectedSource: StatusSourceDownstream}, + {status: http.StatusGatewayTimeout, expectedSource: StatusSourceDownstream}, + {status: 599, expectedSource: StatusSourceDownstream}, + } + + for _, tc := range tcs { + t.Run(fmt.Sprintf("status %d => source %s ", tc.status, tc.expectedSource), func(t *testing.T) { + ctx := context.Background() + ctx = SetRequestMetaData(ctx, defaultRequestMetadata()) + WithStatusSource(ctx, tc.status) + rmd := GetRequestMetaData(ctx) + require.Equal(t, tc.expectedSource, rmd.StatusSource) + }) + } +} diff --git a/pkg/plugins/backendplugin/errors.go b/pkg/plugins/backendplugin/errors.go index 9d9fcb70e4c..59067ce29c3 100644 --- a/pkg/plugins/backendplugin/errors.go +++ b/pkg/plugins/backendplugin/errors.go @@ -1,12 +1,12 @@ package backendplugin -import "errors" +import ( + "errors" +) var ( // ErrPluginNotRegistered error returned when plugin is not registered. ErrPluginNotRegistered = errors.New("plugin not registered") - // ErrHealthCheckFailed error returned when health check failed. - ErrHealthCheckFailed = errors.New("health check failed") // ErrPluginUnavailable error returned when plugin is unavailable. ErrPluginUnavailable = errors.New("plugin unavailable") // ErrMethodNotImplemented error returned when plugin method not implemented. diff --git a/pkg/plugins/errors.go b/pkg/plugins/errors.go index 371747cb478..661531aa899 100644 --- a/pkg/plugins/errors.go +++ b/pkg/plugins/errors.go @@ -11,7 +11,8 @@ var ( ErrPluginUnavailable = errutil.Internal("plugin.unavailable") // ErrMethodNotImplemented error returned when a plugin method is not implemented. ErrMethodNotImplemented = errutil.NotImplemented("plugin.notImplemented") - // ErrPluginDownstreamError error returned when a plugin method is not implemented. + // ErrPluginDownstreamError error returned when a plugin request fails. ErrPluginDownstreamError = errutil.Internal("plugin.downstreamError", - errutil.WithPublicMessage("An error occurred within the plugin")) + errutil.WithPublicMessage("An error occurred within the plugin"), + errutil.WithDownstream()) ) diff --git a/pkg/plugins/manager/client/client.go b/pkg/plugins/manager/client/client.go index dc36164c19d..3c1db334d1b 100644 --- a/pkg/plugins/manager/client/client.go +++ b/pkg/plugins/manager/client/client.go @@ -3,7 +3,6 @@ package client import ( "context" "errors" - "fmt" "net/http" "net/textproto" "strings" @@ -75,7 +74,7 @@ func (s *Service) QueryData(ctx context.Context, req *backend.QueryDataRequest) return nil, plugins.ErrPluginUnavailable.Errorf("%w", backendplugin.ErrPluginUnavailable) } - return nil, plugins.ErrPluginDownstreamError.Errorf("%v: %w", "failed to query data", err) + return nil, plugins.ErrPluginDownstreamError.Errorf("client: failed to query data: %w", err) } for refID, res := range resp.Responses { @@ -108,7 +107,7 @@ func (s *Service) CallResource(ctx context.Context, req *backend.CallResourceReq err := instrumentation.InstrumentCallResourceRequest(ctx, &req.PluginContext, instrumentation.Cfg{ LogDatasourceRequests: s.cfg.LogDatasourceRequests, Target: p.Target(), - }, totalBytes, func(ctx context.Context) error { + }, totalBytes, func(ctx context.Context) (innerErr error) { removeConnectionHeaders(req.Headers) removeHopByHopHeaders(req.Headers) removeNonAllowedHeaders(req.Headers) @@ -130,14 +129,12 @@ func (s *Service) CallResource(ctx context.Context, req *backend.CallResourceReq return sender.Send(res) }) - if err := p.CallResource(ctx, req, wrappedSender); err != nil { - return err - } - return nil + innerErr = p.CallResource(ctx, req, wrappedSender) + return }) if err != nil { - return err + return plugins.ErrPluginDownstreamError.Errorf("client: failed to call resources: %w", err) } return nil @@ -162,7 +159,7 @@ func (s *Service) CollectMetrics(ctx context.Context, req *backend.CollectMetric return }) if err != nil { - return nil, err + return nil, plugins.ErrPluginDownstreamError.Errorf("client: failed to collect metrics: %w", err) } return resp, nil @@ -196,7 +193,7 @@ func (s *Service) CheckHealth(ctx context.Context, req *backend.CheckHealthReque return nil, err } - return nil, fmt.Errorf("%w: %w", backendplugin.ErrHealthCheckFailed, err) + return nil, plugins.ErrPluginDownstreamError.Errorf("client: failed to check health: %w", err) } return resp, nil diff --git a/pkg/plugins/manager/client/client_test.go b/pkg/plugins/manager/client/client_test.go index 8388ef02d22..c009e7ac02f 100644 --- a/pkg/plugins/manager/client/client_test.go +++ b/pkg/plugins/manager/client/client_test.go @@ -97,7 +97,7 @@ func TestCheckHealth(t *testing.T) { }, { err: errors.New("surprise surprise"), - expectedError: backendplugin.ErrHealthCheckFailed, + expectedError: plugins.ErrPluginDownstreamError, }, } diff --git a/pkg/server/test_env.go b/pkg/server/test_env.go index 723c3d5659e..2d54760fd5a 100644 --- a/pkg/server/test_env.go +++ b/pkg/server/test_env.go @@ -7,6 +7,7 @@ import ( "github.com/grafana/grafana/pkg/services/notifications" "github.com/grafana/grafana/pkg/services/oauthtoken/oauthtokentest" "github.com/grafana/grafana/pkg/services/sqlstore" + "github.com/grafana/grafana/pkg/web" ) func ProvideTestEnv( @@ -19,13 +20,13 @@ func ProvideTestEnv( oAuthTokenService *oauthtokentest.Service, ) (*TestEnv, error) { return &TestEnv{ - server, - store, - ns, - grpcServer, - pluginRegistry, - httpClientProvider, - oAuthTokenService, + Server: server, + SQLStore: store, + NotificationService: ns, + GRPCServer: grpcServer, + PluginRegistry: pluginRegistry, + HTTPClientProvider: httpClientProvider, + OAuthTokenService: oAuthTokenService, }, nil } @@ -37,4 +38,5 @@ type TestEnv struct { PluginRegistry registry.Service HTTPClientProvider httpclient.Provider OAuthTokenService *oauthtokentest.Service + RequestMiddleware web.Middleware } diff --git a/pkg/services/featuremgmt/registry.go b/pkg/services/featuremgmt/registry.go index 4c569d15101..a066286bd0e 100644 --- a/pkg/services/featuremgmt/registry.go +++ b/pkg/services/featuremgmt/registry.go @@ -737,5 +737,12 @@ var ( Stage: FeatureStageExperimental, Owner: grafanaObservabilityMetricsSquad, }, + { + Name: "requestInstrumentationStatusSource", + Description: "Include a status source label for request metrics and logs", + Stage: FeatureStageExperimental, + FrontendOnly: false, + Owner: grafanaPluginsPlatformSquad, + }, } ) diff --git a/pkg/services/featuremgmt/toggles_gen.csv b/pkg/services/featuremgmt/toggles_gen.csv index 309f43b7ace..2066b2a47f0 100644 --- a/pkg/services/featuremgmt/toggles_gen.csv +++ b/pkg/services/featuremgmt/toggles_gen.csv @@ -105,3 +105,4 @@ dashgpt,experimental,@grafana/dashboards-squad,false,false,false,true reportingRetries,preview,@grafana/sharing-squad,false,false,true,false newBrowseDashboards,preview,@grafana/grafana-frontend-platform,false,false,false,true sseGroupByDatasource,experimental,@grafana/observability-metrics,false,false,false,false +requestInstrumentationStatusSource,experimental,@grafana/plugins-platform-backend,false,false,false,false diff --git a/pkg/services/featuremgmt/toggles_gen.go b/pkg/services/featuremgmt/toggles_gen.go index be0c3910f0e..e11d207b2bd 100644 --- a/pkg/services/featuremgmt/toggles_gen.go +++ b/pkg/services/featuremgmt/toggles_gen.go @@ -430,4 +430,8 @@ const ( // FlagSseGroupByDatasource // Send query to the same datasource in a single request when using server side expressions FlagSseGroupByDatasource = "sseGroupByDatasource" + + // FlagRequestInstrumentationStatusSource + // Include a status source label for request metrics and logs + FlagRequestInstrumentationStatusSource = "requestInstrumentationStatusSource" ) diff --git a/pkg/tests/api/plugins/backendplugin/backendplugin_test.go b/pkg/tests/api/plugins/backendplugin/backendplugin_test.go index 2a3806bf901..e7015d14a5a 100644 --- a/pkg/tests/api/plugins/backendplugin/backendplugin_test.go +++ b/pkg/tests/api/plugins/backendplugin/backendplugin_test.go @@ -12,11 +12,13 @@ import ( "time" "github.com/grafana/grafana-plugin-sdk-go/backend" + "github.com/grafana/grafana-plugin-sdk-go/data" "github.com/stretchr/testify/require" "golang.org/x/oauth2" "github.com/grafana/grafana/pkg/api/dtos" "github.com/grafana/grafana/pkg/components/simplejson" + "github.com/grafana/grafana/pkg/middleware/requestmeta" "github.com/grafana/grafana/pkg/plugins" "github.com/grafana/grafana/pkg/plugins/backendplugin" "github.com/grafana/grafana/pkg/plugins/log" @@ -262,6 +264,123 @@ func TestIntegrationBackendPlugins(t *testing.T) { require.Equal(t, "msg 1\r\nmsg 2\r\n", string(bytes)) }) }) + + newTestScenario(t, "Query data error should return expected status code and marked with downstream status", + options(), + func(t *testing.T, tsCtx *testScenarioContext) { + tsCtx.backendTestPlugin.QueryDataHandler = backend.QueryDataHandlerFunc(func(ctx context.Context, req *backend.QueryDataRequest) (*backend.QueryDataResponse, error) { + return nil, fmt.Errorf("BOOM") + }) + + req := createQueryDataHTTPRequest(t, tsCtx, createRegularQuery(t, tsCtx)) + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + require.Equal(t, http.StatusInternalServerError, resp.StatusCode) + require.NotNil(t, tsCtx.incomingRequest) + require.Equal(t, "/api/ds/query", tsCtx.incomingRequest.URL.Path) + rmd := requestmeta.GetRequestMetaData(tsCtx.incomingRequest.Context()) + require.Equal(t, requestmeta.StatusSourceDownstream, rmd.StatusSource) + t.Cleanup(func() { + err := resp.Body.Close() + require.NoError(t, err) + }) + }) + + newTestScenario(t, "Call resource error should return expected status code and marked with downstream status", + options(), + func(t *testing.T, tsCtx *testScenarioContext) { + tsCtx.backendTestPlugin.CallResourceHandler = backend.CallResourceHandlerFunc(func(ctx context.Context, req *backend.CallResourceRequest, sender backend.CallResourceResponseSender) error { + return fmt.Errorf("BOOM") + }) + + req := createCallResourceHTTPRequest(t, tsCtx) + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + require.Equal(t, http.StatusInternalServerError, resp.StatusCode) + require.NotNil(t, tsCtx.incomingRequest) + require.Equal(t, "/api/datasources/uid/test-plugin/resources", tsCtx.incomingRequest.URL.Path) + rmd := requestmeta.GetRequestMetaData(tsCtx.incomingRequest.Context()) + require.Equal(t, requestmeta.StatusSourceDownstream, rmd.StatusSource) + t.Cleanup(func() { + err := resp.Body.Close() + require.NoError(t, err) + }) + }) + + newTestScenario(t, "Check health error should return expected status code and marked with downstream status", + options(), + func(t *testing.T, tsCtx *testScenarioContext) { + tsCtx.backendTestPlugin.CheckHealthHandler = backend.CheckHealthHandlerFunc(func(ctx context.Context, req *backend.CheckHealthRequest) (*backend.CheckHealthResult, error) { + return nil, fmt.Errorf("BOOM") + }) + + req := createCheckHealthHTTPRequest(t, tsCtx) + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + require.Equal(t, http.StatusInternalServerError, resp.StatusCode) + require.NotNil(t, tsCtx.incomingRequest) + require.Equal(t, "/api/datasources/uid/test-plugin/health", tsCtx.incomingRequest.URL.Path) + rmd := requestmeta.GetRequestMetaData(tsCtx.incomingRequest.Context()) + require.Equal(t, requestmeta.StatusSourceDownstream, rmd.StatusSource) + t.Cleanup(func() { + err := resp.Body.Close() + require.NoError(t, err) + }) + }) + + newTestScenario(t, "Call resource response with 502 status code should be marked with downstream status", + options(), + func(t *testing.T, tsCtx *testScenarioContext) { + tsCtx.backendTestPlugin.CallResourceHandler = backend.CallResourceHandlerFunc(func(ctx context.Context, req *backend.CallResourceRequest, sender backend.CallResourceResponseSender) error { + return sender.Send(&backend.CallResourceResponse{ + Status: http.StatusBadGateway, + Headers: map[string][]string{}, + }) + }) + + req := createCallResourceHTTPRequest(t, tsCtx) + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + b, err := io.ReadAll(resp.Body) + require.NoError(t, err) + require.Equal(t, http.StatusBadGateway, resp.StatusCode, string(b)) + require.NotNil(t, tsCtx.incomingRequest) + require.Equal(t, "/api/datasources/uid/test-plugin/resources", tsCtx.incomingRequest.URL.Path) + rmd := requestmeta.GetRequestMetaData(tsCtx.incomingRequest.Context()) + require.Equal(t, requestmeta.StatusSourceDownstream, rmd.StatusSource) + t.Cleanup(func() { + err := resp.Body.Close() + require.NoError(t, err) + }) + }) + + newTestScenario(t, "Query data response that includes a query data response error should return expected status code and marked with downstream status", + options(), + func(t *testing.T, tsCtx *testScenarioContext) { + tsCtx.backendTestPlugin.QueryDataHandler = backend.QueryDataHandlerFunc(func(ctx context.Context, req *backend.QueryDataRequest) (*backend.QueryDataResponse, error) { + resp := backend.NewQueryDataResponse() + resp.Responses["A"] = backend.DataResponse{ + Frames: data.Frames{}, + } + resp.Responses["B"] = backend.DataResponse{ + Error: fmt.Errorf("BOOM"), + } + return resp, nil + }) + + req := createQueryDataHTTPRequest(t, tsCtx, createRegularQuery(t, tsCtx)) + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + require.Equal(t, http.StatusBadRequest, resp.StatusCode) + require.NotNil(t, tsCtx.incomingRequest) + require.Equal(t, "/api/ds/query", tsCtx.incomingRequest.URL.Path) + rmd := requestmeta.GetRequestMetaData(tsCtx.incomingRequest.Context()) + require.Equal(t, requestmeta.StatusSourceDownstream, rmd.StatusSource) + t.Cleanup(func() { + err := resp.Body.Close() + require.NoError(t, err) + }) + }) } type testScenarioContext struct { @@ -275,6 +394,7 @@ type testScenarioContext struct { rt http.RoundTripper modifyIncomingRequest func(req *http.Request) modifyCallResourceResponse func(sender backend.CallResourceResponseSender) error + incomingRequest *http.Request } type testScenarioInput struct { @@ -350,6 +470,12 @@ func newTestScenario(t *testing.T, name string, opts []testScenarioOption, callb }) grafanaListeningAddr, testEnv := testinfra.StartGrafanaEnv(t, dir, path) + testEnv.RequestMiddleware = func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + tsCtx.incomingRequest = r + next.ServeHTTP(w, r) + }) + } tsCtx.grafanaListeningAddr = grafanaListeningAddr testEnv.SQLStore.Cfg.LoginCookieName = loginCookieName tsCtx.testEnv = testEnv @@ -495,20 +621,13 @@ func (tsCtx *testScenarioContext) runQueryDataTest(t *testing.T, mr dtos.MetricR return &backend.QueryDataResponse{}, nil }) - buf1 := &bytes.Buffer{} - err := json.NewEncoder(buf1).Encode(mr) - require.NoError(t, err) - u := fmt.Sprintf("http://admin:admin@%s/api/ds/query", tsCtx.grafanaListeningAddr) - - req, err := http.NewRequest(http.MethodPost, u, buf1) - req.Header.Set("Content-Type", "application/json") + req := createQueryDataHTTPRequest(t, tsCtx, mr) req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36") if tsCtx.modifyIncomingRequest != nil { tsCtx.modifyIncomingRequest(req) } - require.NoError(t, err) resp, err := http.DefaultClient.Do(req) require.NoError(t, err) b, err := io.ReadAll(resp.Body) @@ -559,9 +678,7 @@ func (tsCtx *testScenarioContext) runCheckHealthTest(t *testing.T, callback func }, nil }) - u := fmt.Sprintf("http://admin:admin@%s/api/datasources/uid/%s/health", tsCtx.grafanaListeningAddr, tsCtx.uid) - - req, err := http.NewRequest(http.MethodGet, u, nil) + req := createCheckHealthHTTPRequest(t, tsCtx) req.Header.Set("Content-Type", "application/json") req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36") @@ -569,7 +686,6 @@ func (tsCtx *testScenarioContext) runCheckHealthTest(t *testing.T, callback func tsCtx.modifyIncomingRequest(req) } - require.NoError(t, err) resp, err := http.DefaultClient.Do(req) require.NoError(t, err) b, err := io.ReadAll(resp.Body) @@ -624,9 +740,7 @@ func (tsCtx *testScenarioContext) runCallResourceTest(t *testing.T, callback fun return tsCtx.modifyCallResourceResponse(sender) }) - u := fmt.Sprintf("http://admin:admin@%s/api/datasources/uid/%s/resources", tsCtx.grafanaListeningAddr, tsCtx.uid) - - req, err := http.NewRequest(http.MethodGet, u, nil) + req := createCallResourceHTTPRequest(t, tsCtx) req.Header.Set("Content-Type", "application/json") req.Header.Set("Connection", "X-Some-Conn-Header") req.Header.Set("X-Some-Conn-Header", "should be deleted") @@ -637,7 +751,6 @@ func (tsCtx *testScenarioContext) runCallResourceTest(t *testing.T, callback fun tsCtx.modifyIncomingRequest(req) } - require.NoError(t, err) resp, err := http.DefaultClient.Do(req) require.NoError(t, err) t.Cleanup(func() { @@ -667,6 +780,39 @@ func (tsCtx *testScenarioContext) runCallResourceTest(t *testing.T, callback fun }) } +func createCheckHealthHTTPRequest(t *testing.T, tsCtx *testScenarioContext) *http.Request { + t.Helper() + + u := fmt.Sprintf("http://admin:admin@%s/api/datasources/uid/%s/health", tsCtx.grafanaListeningAddr, tsCtx.uid) + req, err := http.NewRequest(http.MethodGet, u, nil) + require.NoError(t, err) + return req +} + +func createCallResourceHTTPRequest(t *testing.T, tsCtx *testScenarioContext) *http.Request { + t.Helper() + + u := fmt.Sprintf("http://admin:admin@%s/api/datasources/uid/%s/resources", tsCtx.grafanaListeningAddr, tsCtx.uid) + req, err := http.NewRequest(http.MethodGet, u, nil) + require.NoError(t, err) + return req +} + +func createQueryDataHTTPRequest(t *testing.T, tsCtx *testScenarioContext, mr dtos.MetricRequest) *http.Request { + t.Helper() + + buf1 := &bytes.Buffer{} + err := json.NewEncoder(buf1).Encode(mr) + require.NoError(t, err) + u := fmt.Sprintf("http://admin:admin@%s/api/ds/query", tsCtx.grafanaListeningAddr) + + req, err := http.NewRequest(http.MethodPost, u, buf1) + require.NoError(t, err) + req.Header.Set("Content-Type", "application/json") + + return req +} + func createTestPlugin(id string) (*plugins.Plugin, *testPlugin) { p := &plugins.Plugin{ JSONData: plugins.JSONData{ diff --git a/pkg/tests/testinfra/testinfra.go b/pkg/tests/testinfra/testinfra.go index 04b6dd6eaa6..33cdcb4325b 100644 --- a/pkg/tests/testinfra/testinfra.go +++ b/pkg/tests/testinfra/testinfra.go @@ -59,6 +59,18 @@ func StartGrafanaEnv(t *testing.T, grafDir, cfgPath string) (string, *server.Tes require.NoError(t, err) assert.Greater(t, dbSec.Key("query_retries").MustInt(), 0) + env.Server.HTTPServer.AddMiddleware(func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if env.RequestMiddleware != nil { + h := env.RequestMiddleware(next) + h.ServeHTTP(w, r) + return + } + + next.ServeHTTP(w, r) + }) + }) + go func() { // When the server runs, it will also build and initialize the service graph if err := env.Server.Run(); err != nil { diff --git a/pkg/util/errutil/errors.go b/pkg/util/errutil/errors.go index 43a45292efd..4a0029f2982 100644 --- a/pkg/util/errutil/errors.go +++ b/pkg/util/errutil/errors.go @@ -15,6 +15,7 @@ type Base struct { messageID string publicMessage string logLevel LogLevel + source Source } // NewBase initializes a [Base] that is used to construct [Error]. @@ -32,6 +33,7 @@ func NewBase(reason StatusReason, msgID string, opts ...BaseOpt) Base { reason: reason, messageID: msgID, logLevel: reason.Status().LogLevel(), + source: SourceServer, } for _, opt := range opts { @@ -143,6 +145,32 @@ func NotImplemented(msgID string, opts ...BaseOpt) Base { return NewBase(StatusNotImplemented, msgID, opts...) } +// BadGateway initializes a new [Base] error with reason StatusBadGateway +// and source SourceDownstream that is used to construct [Error]. The msgID +// is passed to the caller to serve as the base for user facing error messages. +// +// msgID should be structured as component.errorBrief, for example +// +// area.downstreamError +func BadGateway(msgID string, opts ...BaseOpt) Base { + newOpts := []BaseOpt{WithDownstream()} + newOpts = append(newOpts, opts...) + return NewBase(StatusBadGateway, msgID, newOpts...) +} + +// GatewayTimeout initializes a new [Base] error with reason StatusGatewayTimeout +// and source SourceDownstream that is used to construct [Error]. The msgID +// is passed to the caller to serve as the base for user facing error messages. +// +// msgID should be structured as component.errorBrief, for example +// +// area.downstreamTimeout +func GatewayTimeout(msgID string, opts ...BaseOpt) Base { + newOpts := []BaseOpt{WithDownstream()} + newOpts = append(newOpts, opts...) + return NewBase(StatusGatewayTimeout, msgID, newOpts...) +} + type BaseOpt func(Base) Base // WithLogLevel sets a custom log level for all errors instantiated from @@ -167,6 +195,17 @@ func WithPublicMessage(message string) BaseOpt { } } +// WithDownstream sets the source as SourceDownstream that will be used +// for errors based on this [Base]. +// +// Used as a functional option to [NewBase]. +func WithDownstream() BaseOpt { + return func(b Base) Base { + b.source = SourceDownstream + return b + } +} + // Errorf creates a new [Error] with Reason and MessageID from [Base], // and Message and Underlying will be populated using the rules of // [fmt.Errorf]. @@ -180,6 +219,7 @@ func (b Base) Errorf(format string, args ...any) Error { MessageID: b.messageID, Underlying: errors.Unwrap(err), LogLevel: b.logLevel, + Source: b.source, } } @@ -273,6 +313,8 @@ type Error struct { PublicPayload map[string]any // LogLevel provides a suggested level of logging for the error. LogLevel LogLevel + // Source identifies from where the error originates. + Source Source } // MarshalJSON returns an error, we do not want raw [Error]s being diff --git a/pkg/util/errutil/source.go b/pkg/util/errutil/source.go new file mode 100644 index 00000000000..f11f19bb546 --- /dev/null +++ b/pkg/util/errutil/source.go @@ -0,0 +1,18 @@ +package errutil + +// Source identifies from where an error originates. +type Source string + +const ( + // SourceServer implies error originates from within the server, i.e. this application. + SourceServer Source = "server" + + // SourceDownstream implies error originates from response error while server acting + // as a proxy, i.e. from a downstream service. + SourceDownstream Source = "downstream" +) + +// IsDownstream checks if Source is SourceDownstream. +func (s Source) IsDownstream() bool { + return s == SourceDownstream +} diff --git a/pkg/util/errutil/status.go b/pkg/util/errutil/status.go index 338cc0349a2..778f333a29e 100644 --- a/pkg/util/errutil/status.go +++ b/pkg/util/errutil/status.go @@ -46,6 +46,15 @@ const ( // features. // HTTP status code 501. StatusNotImplemented CoreStatus = "Not implemented" + // StatusBadGateway means that the server, while acting as a proxy, + // received an invalid response from the downstream server. + // HTTP status code 502. + StatusBadGateway CoreStatus = "Bad gateway" + // StatusGatewayTimeout means that the server, while acting as a proxy, + // did not receive a timely response from a downstream server it needed + // to access in order to complete the request. + // HTTP status code 504. + StatusGatewayTimeout CoreStatus = "Gateway timeout" ) // StatusReason allows for wrapping of CoreStatus. @@ -69,7 +78,7 @@ func (s CoreStatus) HTTPStatus() int { return http.StatusForbidden case StatusNotFound: return http.StatusNotFound - case StatusTimeout: + case StatusTimeout, StatusGatewayTimeout: return http.StatusGatewayTimeout case StatusTooManyRequests: return http.StatusTooManyRequests @@ -77,6 +86,8 @@ func (s CoreStatus) HTTPStatus() int { return http.StatusBadRequest case StatusNotImplemented: return http.StatusNotImplemented + case StatusBadGateway: + return http.StatusBadGateway case StatusUnknown, StatusInternal: return http.StatusInternalServerError default: diff --git a/pkg/util/proxyutil/reverse_proxy.go b/pkg/util/proxyutil/reverse_proxy.go index fd3c8ea95ec..d077d792251 100644 --- a/pkg/util/proxyutil/reverse_proxy.go +++ b/pkg/util/proxyutil/reverse_proxy.go @@ -10,6 +10,7 @@ import ( "time" glog "github.com/grafana/grafana/pkg/infra/log" + "github.com/grafana/grafana/pkg/middleware/requestmeta" "github.com/grafana/grafana/pkg/services/contexthandler" ) @@ -103,6 +104,8 @@ func modifyResponse(logger glog.Logger) func(resp *http.Response) error { SetProxyResponseHeaders(resp.Header) SetViaHeader(resp.Header, resp.ProtoMajor, resp.ProtoMinor) + + requestmeta.WithStatusSource(resp.Request.Context(), resp.StatusCode) return nil } } @@ -120,6 +123,7 @@ type timeoutError interface { func errorHandler(logger glog.Logger) func(http.ResponseWriter, *http.Request, error) { return func(w http.ResponseWriter, r *http.Request, err error) { ctxLogger := logger.FromContext(r.Context()) + requestmeta.WithDownstreamStatusSource(r.Context()) if errors.Is(err, context.Canceled) { ctxLogger.Debug("Proxy request cancelled by client") diff --git a/pkg/util/proxyutil/reverse_proxy_test.go b/pkg/util/proxyutil/reverse_proxy_test.go index ada4609f981..8c739fa8ed8 100644 --- a/pkg/util/proxyutil/reverse_proxy_test.go +++ b/pkg/util/proxyutil/reverse_proxy_test.go @@ -3,6 +3,7 @@ package proxyutil import ( "context" "errors" + "fmt" "net/http" "net/http/httptest" "testing" @@ -11,6 +12,7 @@ import ( "github.com/stretchr/testify/require" "github.com/grafana/grafana/pkg/infra/log" + "github.com/grafana/grafana/pkg/middleware/requestmeta" "github.com/grafana/grafana/pkg/services/contexthandler" "github.com/grafana/grafana/pkg/setting" ) @@ -97,7 +99,7 @@ func TestReverseProxy(t *testing.T) { require.NoError(t, resp.Body.Close()) }) - t.Run("Error handling should convert status codes depending on what kind of error it is", func(t *testing.T) { + t.Run("Error handling should convert status codes depending on what kind of error it is and set downstream status source", func(t *testing.T) { timedOutTransport := http.DefaultTransport.(*http.Transport) timedOutTransport.ResponseHeaderTimeout = time.Millisecond @@ -136,7 +138,12 @@ func TestReverseProxy(t *testing.T) { })) t.Cleanup(upstream.Close) rec := httptest.NewRecorder() + + ctx := requestmeta.SetRequestMetaData(context.Background(), requestmeta.RequestMetaData{ + StatusSource: requestmeta.StatusSourceServer, + }) req := httptest.NewRequest(http.MethodGet, upstream.URL, nil) + req = req.WithContext(ctx) rp := NewReverseProxy( log.New("test"), @@ -151,6 +158,55 @@ func TestReverseProxy(t *testing.T) { resp := rec.Result() require.Equal(t, tc.expectedStatusCode, resp.StatusCode) require.NoError(t, resp.Body.Close()) + + rmd := requestmeta.GetRequestMetaData(ctx) + require.Equal(t, requestmeta.StatusSourceDownstream, rmd.StatusSource) + }) + } + }) + + t.Run("5xx response status codes should set downstream status source", func(t *testing.T) { + testCases := []struct { + status int + expectedSource requestmeta.StatusSource + }{ + {status: http.StatusOK, expectedSource: requestmeta.StatusSourceServer}, + {status: http.StatusBadRequest, expectedSource: requestmeta.StatusSourceServer}, + {status: http.StatusForbidden, expectedSource: requestmeta.StatusSourceServer}, + {status: http.StatusUnauthorized, expectedSource: requestmeta.StatusSourceServer}, + {status: http.StatusInternalServerError, expectedSource: requestmeta.StatusSourceDownstream}, + {status: http.StatusBadGateway, expectedSource: requestmeta.StatusSourceDownstream}, + {status: http.StatusGatewayTimeout, expectedSource: requestmeta.StatusSourceDownstream}, + {status: 599, expectedSource: requestmeta.StatusSourceDownstream}, + } + + for _, tc := range testCases { + t.Run(fmt.Sprintf("status %d => source %s ", tc.status, tc.expectedSource), func(t *testing.T) { + upstream := newUpstreamServer(t, http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { + w.WriteHeader(tc.status) + })) + t.Cleanup(upstream.Close) + rec := httptest.NewRecorder() + + ctx := requestmeta.SetRequestMetaData(context.Background(), requestmeta.RequestMetaData{ + StatusSource: requestmeta.StatusSourceServer, + }) + req := httptest.NewRequest(http.MethodGet, upstream.URL, nil) + req = req.WithContext(ctx) + + rp := NewReverseProxy( + log.New("test"), + func(req *http.Request) {}, + ) + require.NotNil(t, rp) + rp.ServeHTTP(rec, req) + + resp := rec.Result() + require.Equal(t, tc.status, resp.StatusCode) + require.NoError(t, resp.Body.Close()) + + rmd := requestmeta.GetRequestMetaData(ctx) + require.Equal(t, tc.expectedSource, rmd.StatusSource) }) } })