Files
grafana/pkg/tsdb/cloudwatch/cloudwatch.go
Nathan Vērzemnieks a18ea34688 CloudWatch: Backport aws-sdk-go-v2 update from external plugin (#107136)
* CloudWatch: Backport aws-sdk-go-v2 update from external plugin

* Review feedback & cleaning up a couple typos
2025-06-26 15:56:50 +02:00

273 lines
8.8 KiB
Go

package cloudwatch
import (
"context"
"encoding/json"
"fmt"
"slices"
"time"
"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/service/cloudwatch"
"github.com/aws/aws-sdk-go-v2/service/cloudwatchlogs"
cloudwatchlogstypes "github.com/aws/aws-sdk-go-v2/service/cloudwatchlogs/types"
"github.com/aws/aws-sdk-go-v2/service/resourcegroupstaggingapi"
"github.com/grafana/grafana-aws-sdk/pkg/awsauth"
"github.com/grafana/grafana-plugin-sdk-go/backend"
"github.com/grafana/grafana-plugin-sdk-go/backend/instancemgmt"
"github.com/grafana/grafana-plugin-sdk-go/backend/log"
"github.com/grafana/grafana-plugin-sdk-go/backend/proxy"
"github.com/grafana/grafana-plugin-sdk-go/backend/resource/httpadapter"
"github.com/grafana/grafana/pkg/tsdb/cloudwatch/clients"
"github.com/grafana/grafana/pkg/tsdb/cloudwatch/kinds/dataquery"
"github.com/grafana/grafana/pkg/tsdb/cloudwatch/models"
"github.com/patrickmn/go-cache"
)
const (
tagValueCacheExpiration = time.Hour * 24
// headerFromExpression is used by datasources to identify expression queries
headerFromExpression = "X-Grafana-From-Expr"
// headerFromAlert is used by datasources to identify alert queries
headerFromAlert = "FromAlert"
defaultRegion = "default"
logsQueryMode = "Logs"
// QueryTypes
annotationQuery = "annotationQuery"
logAction = "logAction"
timeSeriesQuery = "timeSeriesQuery"
)
type DataQueryJson struct {
dataquery.CloudWatchAnnotationQuery
Type string `json:"type,omitempty"`
}
type DataSource struct {
Settings models.CloudWatchSettings
ProxyOpts *proxy.Options
AWSConfigProvider awsauth.ConfigProvider
logger log.Logger
tagValueCache *cache.Cache
resourceHandler backend.CallResourceHandler
}
func (ds *DataSource) newAWSConfig(ctx context.Context, region string) (aws.Config, error) {
if region == defaultRegion {
if len(ds.Settings.Region) == 0 {
return aws.Config{}, models.ErrMissingRegion
}
region = ds.Settings.Region
}
authSettings := awsauth.Settings{
CredentialsProfile: ds.Settings.Profile,
LegacyAuthType: ds.Settings.AuthType,
AssumeRoleARN: ds.Settings.AssumeRoleARN,
ExternalID: ds.Settings.ExternalID,
Endpoint: ds.Settings.Endpoint,
Region: region,
AccessKey: ds.Settings.AccessKey,
SecretKey: ds.Settings.SecretKey,
}
if ds.Settings.GrafanaSettings.SecureSocksDSProxyEnabled && ds.Settings.SecureSocksProxyEnabled {
authSettings.ProxyOptions = ds.ProxyOpts
}
cfg, err := ds.AWSConfigProvider.GetConfig(ctx, authSettings)
if err != nil {
return aws.Config{}, err
}
return cfg, nil
}
func NewDatasource(ctx context.Context, settings backend.DataSourceInstanceSettings) (instancemgmt.Instance, error) {
instanceSettings, err := models.LoadCloudWatchSettings(ctx, settings)
if err != nil {
return nil, fmt.Errorf("error reading settings: %w", err)
}
opts, err := settings.HTTPClientOptions(ctx)
if err != nil {
return nil, err
}
ds := &DataSource{
Settings: instanceSettings,
// this is used to build a custom dialer when secure socks proxy is enabled
ProxyOpts: opts.ProxyOptions,
AWSConfigProvider: awsauth.NewConfigProvider(),
logger: backend.NewLoggerWith("logger", "grafana-cloudwatch-datasource"),
tagValueCache: cache.New(tagValueCacheExpiration, tagValueCacheExpiration*5),
}
ds.resourceHandler = httpadapter.New(ds.newResourceMux())
return ds, nil
}
// instrumentContext adds plugin key-values to the context; later, logger.FromContext(ctx) will provide a logger
// that adds these values to its output.
// TODO: move this into the sdk (see https://github.com/grafana/grafana/issues/82033)
func instrumentContext(ctx context.Context, endpoint string, pCtx backend.PluginContext) context.Context {
p := []any{"endpoint", endpoint, "pluginId", pCtx.PluginID}
if pCtx.DataSourceInstanceSettings != nil {
p = append(p, "dsName", pCtx.DataSourceInstanceSettings.Name)
p = append(p, "dsUID", pCtx.DataSourceInstanceSettings.UID)
}
if pCtx.User != nil {
p = append(p, "uname", pCtx.User.Login)
}
return log.WithContextualAttributes(ctx, p)
}
func (ds *DataSource) CallResource(ctx context.Context, req *backend.CallResourceRequest, sender backend.CallResourceResponseSender) error {
ctx = instrumentContext(ctx, string(backend.EndpointCallResource), req.PluginContext)
return ds.resourceHandler.CallResource(ctx, req, sender)
}
func (ds *DataSource) QueryData(ctx context.Context, req *backend.QueryDataRequest) (*backend.QueryDataResponse, error) {
ctx = instrumentContext(ctx, string(backend.EndpointQueryData), req.PluginContext)
q := req.Queries[0]
var model DataQueryJson
err := json.Unmarshal(q.JSON, &model)
if err != nil {
return nil, err
}
_, fromAlert := req.Headers[headerFromAlert]
fromExpression := req.GetHTTPHeader(headerFromExpression) != ""
// Public dashboard queries execute like alert queries, i.e. they execute on the backend, therefore, we need to handle them synchronously.
// Since `model.Type` is set during execution on the frontend by the query runner and isn't saved with the query, we are checking here is
// missing the `model.Type` property and if it is a log query in order to determine if it is a public dashboard query.
queryMode := ""
if model.QueryMode != "" {
queryMode = string(model.QueryMode)
}
fromPublicDashboard := model.Type == "" && queryMode == logsQueryMode
isSyncLogQuery := ((fromAlert || fromExpression) && queryMode == logsQueryMode) || fromPublicDashboard
if isSyncLogQuery {
return executeSyncLogQuery(ctx, ds, req)
}
var result *backend.QueryDataResponse
switch model.Type {
case annotationQuery:
result, err = ds.executeAnnotationQuery(ctx, model, q)
case logAction:
result, err = ds.executeLogActions(ctx, req)
case timeSeriesQuery:
fallthrough
default:
result, err = ds.executeTimeSeriesQuery(ctx, req)
}
return result, err
}
func (ds *DataSource) CheckHealth(ctx context.Context, req *backend.CheckHealthRequest) (*backend.CheckHealthResult, error) {
ctx = instrumentContext(ctx, string(backend.EndpointCheckHealth), req.PluginContext)
status := backend.HealthStatusOk
metricsTest := "Successfully queried the CloudWatch metrics API."
logsTest := "Successfully queried the CloudWatch logs API."
err := ds.checkHealthMetrics(ctx, req.PluginContext)
if err != nil {
status = backend.HealthStatusError
metricsTest = fmt.Sprintf("CloudWatch metrics query failed: %s", err.Error())
}
err = ds.checkHealthLogs(ctx)
if err != nil {
status = backend.HealthStatusError
logsTest = fmt.Sprintf("CloudWatch logs query failed: %s", err.Error())
}
return &backend.CheckHealthResult{
Status: status,
Message: fmt.Sprintf("1. %s\n2. %s", metricsTest, logsTest),
}, nil
}
func (ds *DataSource) checkHealthMetrics(ctx context.Context, _ backend.PluginContext) error {
namespace := "AWS/Billing"
metric := "EstimatedCharges"
params := &cloudwatch.ListMetricsInput{
Namespace: &namespace,
MetricName: &metric,
}
cfg, err := ds.newAWSConfig(ctx, defaultRegion)
if err != nil {
return err
}
metricClient := clients.NewMetricsClient(NewCWClient(cfg), ds.Settings.GrafanaSettings.ListMetricsPageLimit)
_, err = metricClient.ListMetricsWithPageLimit(ctx, params)
return err
}
func (ds *DataSource) checkHealthLogs(ctx context.Context) error {
cfg, err := ds.getAWSConfig(ctx, defaultRegion)
if err != nil {
return err
}
logsClient := NewLogsAPI(cfg)
_, err = logsClient.DescribeLogGroups(ctx, &cloudwatchlogs.DescribeLogGroupsInput{Limit: aws.Int32(1)})
return err
}
func (ds *DataSource) getAWSConfig(ctx context.Context, region string) (aws.Config, error) {
return ds.newAWSConfig(ctx, region)
}
func (ds *DataSource) getCWClient(ctx context.Context, region string) (models.CWClient, error) {
cfg, err := ds.getAWSConfig(ctx, region)
if err != nil {
return nil, err
}
return NewCWClient(cfg), nil
}
func (ds *DataSource) getCWLogsClient(ctx context.Context, region string) (models.CWLogsClient, error) {
cfg, err := ds.getAWSConfig(ctx, region)
if err != nil {
return nil, err
}
logsClient := NewCWLogsClient(cfg)
return logsClient, nil
}
func (ds *DataSource) getEC2Client(ctx context.Context, region string) (models.EC2APIProvider, error) {
cfg, err := ds.getAWSConfig(ctx, region)
if err != nil {
return nil, err
}
return NewEC2API(cfg), nil
}
func (ds *DataSource) getRGTAClient(ctx context.Context, region string) (resourcegroupstaggingapi.GetResourcesAPIClient,
error) {
cfg, err := ds.getAWSConfig(ctx, region)
if err != nil {
return nil, err
}
return NewRGTAClient(cfg), nil
}
var terminatedStates = []cloudwatchlogstypes.QueryStatus{
cloudwatchlogstypes.QueryStatusComplete,
cloudwatchlogstypes.QueryStatusCancelled,
cloudwatchlogstypes.QueryStatusFailed,
cloudwatchlogstypes.QueryStatusTimeout,
}
func isTerminated(queryStatus cloudwatchlogstypes.QueryStatus) bool {
return slices.Contains(terminatedStates, queryStatus)
}