Advisor: Allow to retry checks for a single element (#104279)

This commit is contained in:
Andres Martinez Gotor
2025-04-24 12:00:32 +02:00
committed by GitHub
parent 10df5d6f23
commit edeff68645
16 changed files with 255 additions and 13 deletions

View File

@ -36,6 +36,8 @@ check: {
stepID: string
// Human readable identifier of the item that failed
item: string
// ID of the item that failed
itemID: string
// Links to actions that can be taken to resolve the failure
links: [...#ErrorLink]
}

View File

@ -23,6 +23,8 @@ type CheckReportFailure struct {
StepID string `json:"stepID"`
// Human readable identifier of the item that failed
Item string `json:"item"`
// ID of the item that failed
ItemID string `json:"itemID"`
// Links to actions that can be taken to resolve the failure
Links []CheckErrorLink `json:"links"`
}

View File

@ -183,6 +183,14 @@ func schema_pkg_apis_advisor_v0alpha1_CheckReportFailure(ref common.ReferenceCal
Format: "",
},
},
"itemID": {
SchemaProps: spec.SchemaProps{
Description: "ID of the item that failed",
Default: "",
Type: []string{"string"},
Format: "",
},
},
"links": {
SchemaProps: spec.SchemaProps{
Description: "Links to actions that can be taken to resolve the failure",
@ -198,7 +206,7 @@ func schema_pkg_apis_advisor_v0alpha1_CheckReportFailure(ref common.ReferenceCal
},
},
},
Required: []string{"severity", "stepID", "item", "links"},
Required: []string{"severity", "stepID", "item", "itemID", "links"},
},
},
Dependencies: []string{

View File

@ -12,7 +12,7 @@ import (
)
var (
rawSchemaCheckv0alpha1 = []byte(`{"spec":{"properties":{"data":{"additionalProperties":{"type":"string"},"description":"Generic data input that a check can receive","type":"object"}},"type":"object"},"status":{"properties":{"additionalFields":{"description":"additionalFields is reserved for future use","type":"object","x-kubernetes-preserve-unknown-fields":true},"operatorStates":{"additionalProperties":{"properties":{"descriptiveState":{"description":"descriptiveState is an optional more descriptive state field which has no requirements on format","type":"string"},"details":{"description":"details contains any extra information that is operator-specific","type":"object","x-kubernetes-preserve-unknown-fields":true},"lastEvaluation":{"description":"lastEvaluation is the ResourceVersion last evaluated","type":"string"},"state":{"description":"state describes the state of the lastEvaluation.\nIt is limited to three possible states for machine evaluation.","enum":["success","in_progress","failed"],"type":"string"}},"required":["lastEvaluation","state"],"type":"object"},"description":"operatorStates is a map of operator ID to operator state evaluations.\nAny operator which consumes this kind SHOULD add its state evaluation information to this field.","type":"object"},"report":{"properties":{"count":{"description":"Number of elements analyzed","type":"integer"},"failures":{"description":"List of failures","items":{"properties":{"item":{"description":"Human readable identifier of the item that failed","type":"string"},"links":{"description":"Links to actions that can be taken to resolve the failure","items":{"properties":{"message":{"description":"Human readable error message","type":"string"},"url":{"description":"URL to a page with more information about the error","type":"string"}},"required":["url","message"],"type":"object"},"type":"array"},"severity":{"description":"Severity of the failure","enum":["high","low"],"type":"string"},"stepID":{"description":"Step ID that the failure is associated with","type":"string"}},"required":["severity","stepID","item","links"],"type":"object"},"type":"array"}},"required":["count","failures"],"type":"object"}},"required":["report"],"type":"object","x-kubernetes-preserve-unknown-fields":true}}`)
rawSchemaCheckv0alpha1 = []byte(`{"spec":{"properties":{"data":{"additionalProperties":{"type":"string"},"description":"Generic data input that a check can receive","type":"object"}},"type":"object"},"status":{"properties":{"additionalFields":{"description":"additionalFields is reserved for future use","type":"object","x-kubernetes-preserve-unknown-fields":true},"operatorStates":{"additionalProperties":{"properties":{"descriptiveState":{"description":"descriptiveState is an optional more descriptive state field which has no requirements on format","type":"string"},"details":{"description":"details contains any extra information that is operator-specific","type":"object","x-kubernetes-preserve-unknown-fields":true},"lastEvaluation":{"description":"lastEvaluation is the ResourceVersion last evaluated","type":"string"},"state":{"description":"state describes the state of the lastEvaluation.\nIt is limited to three possible states for machine evaluation.","enum":["success","in_progress","failed"],"type":"string"}},"required":["lastEvaluation","state"],"type":"object"},"description":"operatorStates is a map of operator ID to operator state evaluations.\nAny operator which consumes this kind SHOULD add its state evaluation information to this field.","type":"object"},"report":{"properties":{"count":{"description":"Number of elements analyzed","type":"integer"},"failures":{"description":"List of failures","items":{"properties":{"item":{"description":"Human readable identifier of the item that failed","type":"string"},"itemID":{"description":"ID of the item that failed","type":"string"},"links":{"description":"Links to actions that can be taken to resolve the failure","items":{"properties":{"message":{"description":"Human readable error message","type":"string"},"url":{"description":"URL to a page with more information about the error","type":"string"}},"required":["url","message"],"type":"object"},"type":"array"},"severity":{"description":"Severity of the failure","enum":["high","low"],"type":"string"},"stepID":{"description":"Step ID that the failure is associated with","type":"string"}},"required":["severity","stepID","item","itemID","links"],"type":"object"},"type":"array"}},"required":["count","failures"],"type":"object"}},"required":["report"],"type":"object","x-kubernetes-preserve-unknown-fields":true}}`)
versionSchemaCheckv0alpha1 app.VersionSchema
_ = json.Unmarshal(rawSchemaCheckv0alpha1, &versionSchemaCheckv0alpha1)
rawSchemaCheckTypev0alpha1 = []byte(`{"spec":{"properties":{"name":{"type":"string"},"steps":{"items":{"properties":{"description":{"type":"string"},"resolution":{"type":"string"},"stepID":{"type":"string"},"title":{"type":"string"}},"required":["title","description","stepID","resolution"],"type":"object"},"type":"array"}},"required":["name","steps"],"type":"object"},"status":{"properties":{"additionalFields":{"description":"additionalFields is reserved for future use","type":"object","x-kubernetes-preserve-unknown-fields":true},"operatorStates":{"additionalProperties":{"properties":{"descriptiveState":{"description":"descriptiveState is an optional more descriptive state field which has no requirements on format","type":"string"},"details":{"description":"details contains any extra information that is operator-specific","type":"object","x-kubernetes-preserve-unknown-fields":true},"lastEvaluation":{"description":"lastEvaluation is the ResourceVersion last evaluated","type":"string"},"state":{"description":"state describes the state of the lastEvaluation.\nIt is limited to three possible states for machine evaluation.","enum":["success","in_progress","failed"],"type":"string"}},"required":["lastEvaluation","state"],"type":"object"},"description":"operatorStates is a map of operator ID to operator state evaluations.\nAny operator which consumes this kind SHOULD add its state evaluation information to this field.","type":"object"}},"type":"object","x-kubernetes-preserve-unknown-fields":true}}`)

View File

@ -74,6 +74,21 @@ func New(cfg app.Config) (app.App, error) {
}
}()
}
if req.Action == resource.AdmissionActionUpdate {
go func() {
log.Debug("Updating check", "namespace", req.Object.GetNamespace(), "name", req.Object.GetName())
requester, err := identity.GetRequester(ctx)
if err != nil {
log.Error("Error getting requester", "error", err)
return
}
ctx = identity.WithRequester(context.Background(), requester)
err = processCheckRetry(ctx, client, req.Object, check)
if err != nil {
log.Error("Error processing check retry", "error", err)
}
}()
}
}
return nil
},

View File

@ -62,6 +62,17 @@ func (c *check) Items(ctx context.Context) ([]any, error) {
return res, nil
}
func (c *check) Item(ctx context.Context, id string) (any, error) {
requester, err := identity.GetRequester(ctx)
if err != nil {
return nil, err
}
return c.DatasourceSvc.GetDataSource(ctx, &datasources.GetDataSourceQuery{
UID: id,
OrgID: requester.GetOrgID(),
})
}
func (c *check) ID() string {
return CheckID
}
@ -113,6 +124,7 @@ func (s *uidValidationStep) Run(ctx context.Context, obj *advisor.CheckSpec, i a
advisor.CheckReportFailureSeverityLow,
s.ID(),
fmt.Sprintf("%s (%s)", ds.Name, ds.UID),
ds.UID,
[]advisor.CheckErrorLink{},
), nil
}
@ -181,6 +193,7 @@ func (s *healthCheckStep) Run(ctx context.Context, obj *advisor.CheckSpec, i any
advisor.CheckReportFailureSeverityHigh,
s.ID(),
ds.Name,
ds.UID,
[]advisor.CheckErrorLink{
{
Message: "Fix me",
@ -241,6 +254,7 @@ func (s *missingPluginStep) Run(ctx context.Context, obj *advisor.CheckSpec, i a
advisor.CheckReportFailureSeverityHigh,
s.ID(),
ds.Name,
ds.UID,
links,
), nil
}

View File

@ -10,6 +10,8 @@ import (
type Check interface {
// ID returns the unique identifier of the check
ID() string
// Item returns the item that will be checked
Item(ctx context.Context, id string) (any, error)
// Items returns the list of items that will be checked
Items(ctx context.Context) ([]any, error)
// Steps returns the list of steps that will be executed

View File

@ -61,6 +61,14 @@ func (c *check) Items(ctx context.Context) ([]any, error) {
return res, nil
}
func (c *check) Item(ctx context.Context, id string) (any, error) {
p, exists := c.PluginStore.Plugin(ctx, id)
if !exists {
return nil, fmt.Errorf("plugin %s not found", id)
}
return p, nil
}
func (c *check) Steps() []checks.Step {
return []checks.Step{
&deprecationStep{
@ -118,6 +126,7 @@ func (s *deprecationStep) Run(ctx context.Context, _ *advisor.CheckSpec, it any)
return checks.NewCheckReportFailure(
advisor.CheckReportFailureSeverityHigh,
s.ID(),
p.Name,
p.ID,
[]advisor.CheckErrorLink{
{
@ -190,6 +199,7 @@ func (s *updateStep) Run(ctx context.Context, _ *advisor.CheckSpec, i any) (*adv
return checks.NewCheckReportFailure(
advisor.CheckReportFailureSeverityLow,
s.ID(),
p.Name,
p.ID,
[]advisor.CheckErrorLink{
{

View File

@ -33,7 +33,7 @@ func TestRun(t *testing.T) {
{
name: "Deprecated plugin",
plugins: []pluginstore.Plugin{
{JSONData: plugins.JSONData{ID: "plugin1", Info: plugins.Info{Version: "1.0.0"}}},
{JSONData: plugins.JSONData{ID: "plugin1", Name: "Plugin 1", Info: plugins.Info{Version: "1.0.0"}}},
},
pluginInfo: map[string]*repo.PluginInfo{
"plugin1": {Status: "deprecated"},
@ -45,7 +45,8 @@ func TestRun(t *testing.T) {
{
Severity: advisor.CheckReportFailureSeverityHigh,
StepID: "deprecation",
Item: "plugin1",
Item: "Plugin 1",
ItemID: "plugin1",
Links: []advisor.CheckErrorLink{
{
Url: "/plugins/plugin1",
@ -58,7 +59,7 @@ func TestRun(t *testing.T) {
{
name: "Plugin with update",
plugins: []pluginstore.Plugin{
{JSONData: plugins.JSONData{ID: "plugin2", Info: plugins.Info{Version: "1.0.0"}}},
{JSONData: plugins.JSONData{ID: "plugin2", Name: "Plugin 2", Info: plugins.Info{Version: "1.0.0"}}},
},
pluginInfo: map[string]*repo.PluginInfo{
"plugin2": {Status: "active"},
@ -70,7 +71,8 @@ func TestRun(t *testing.T) {
{
Severity: advisor.CheckReportFailureSeverityLow,
StepID: "update",
Item: "plugin2",
Item: "Plugin 2",
ItemID: "plugin2",
Links: []advisor.CheckErrorLink{
{
Url: "/plugins/plugin2?page=version-history",
@ -83,7 +85,7 @@ func TestRun(t *testing.T) {
{
name: "Plugin with update (non semver)",
plugins: []pluginstore.Plugin{
{JSONData: plugins.JSONData{ID: "plugin2", Info: plugins.Info{Version: "alpha"}}},
{JSONData: plugins.JSONData{ID: "plugin2", Name: "Plugin 2", Info: plugins.Info{Version: "alpha"}}},
},
pluginInfo: map[string]*repo.PluginInfo{
"plugin2": {Status: "active"},
@ -95,7 +97,8 @@ func TestRun(t *testing.T) {
{
Severity: advisor.CheckReportFailureSeverityLow,
StepID: "update",
Item: "plugin2",
Item: "Plugin 2",
ItemID: "plugin2",
Links: []advisor.CheckErrorLink{
{
Url: "/plugins/plugin2?page=version-history",
@ -108,7 +111,7 @@ func TestRun(t *testing.T) {
{
name: "Plugin pinned",
plugins: []pluginstore.Plugin{
{JSONData: plugins.JSONData{ID: "plugin3", Info: plugins.Info{Version: "1.0.0"}}},
{JSONData: plugins.JSONData{ID: "plugin3", Name: "Plugin 3", Info: plugins.Info{Version: "1.0.0"}}},
},
pluginInfo: map[string]*repo.PluginInfo{
"plugin3": {Status: "active"},
@ -122,7 +125,7 @@ func TestRun(t *testing.T) {
{
name: "Managed plugin",
plugins: []pluginstore.Plugin{
{JSONData: plugins.JSONData{ID: "plugin4", Info: plugins.Info{Version: "1.0.0"}}},
{JSONData: plugins.JSONData{ID: "plugin4", Name: "Plugin 4", Info: plugins.Info{Version: "1.0.0"}}},
},
pluginInfo: map[string]*repo.PluginInfo{
"plugin4": {Status: "active"},
@ -136,7 +139,7 @@ func TestRun(t *testing.T) {
{
name: "Provisioned plugin",
plugins: []pluginstore.Plugin{
{JSONData: plugins.JSONData{ID: "plugin5", Info: plugins.Info{Version: "1.0.0"}}},
{JSONData: plugins.JSONData{ID: "plugin5", Name: "Plugin 5", Info: plugins.Info{Version: "1.0.0"}}},
},
pluginInfo: map[string]*repo.PluginInfo{
"plugin5": {Status: "active"},

View File

@ -14,6 +14,7 @@ import (
const (
TypeLabel = "advisor.grafana.app/type"
StatusAnnotation = "advisor.grafana.app/status"
RetryAnnotation = "advisor.grafana.app/retry"
StatusAnnotationError = "error"
StatusAnnotationProcessed = "processed"
)
@ -22,12 +23,14 @@ func NewCheckReportFailure(
severity advisor.CheckReportFailureSeverity,
stepID string,
item string,
itemID string,
links []advisor.CheckErrorLink,
) *advisor.CheckReportFailure {
return &advisor.CheckReportFailure{
Severity: severity,
StepID: stepID,
Item: item,
ItemID: itemID,
Links: links,
}
}
@ -47,6 +50,10 @@ func GetStatusAnnotation(obj resource.Object) string {
return obj.GetAnnotations()[StatusAnnotation]
}
func GetRetryAnnotation(obj resource.Object) string {
return obj.GetAnnotations()[RetryAnnotation]
}
func SetStatusAnnotation(ctx context.Context, client resource.Client, obj resource.Object, status string) error {
annotations := obj.GetAnnotations()
if annotations == nil {

View File

@ -94,6 +94,10 @@ func (r *Runner) Run(ctx context.Context) error {
ObjectMeta: metav1.ObjectMeta{
Name: t.ID(),
Namespace: r.namespace,
Annotations: map[string]string{
// Flag to indicate feature availability
checks.RetryAnnotation: "1",
},
},
Spec: advisorv0alpha1.CheckTypeSpec{
Name: t.ID(),

View File

@ -4,6 +4,7 @@ import (
"context"
"errors"
"fmt"
"slices"
"sync"
"github.com/grafana/grafana-app-sdk/resource"
@ -76,6 +77,72 @@ func processCheck(ctx context.Context, client resource.Client, obj resource.Obje
}, resource.PatchOptions{}, obj)
}
func processCheckRetry(ctx context.Context, client resource.Client, obj resource.Object, check checks.Check) error {
status := checks.GetStatusAnnotation(obj)
if status == "" || status == checks.StatusAnnotationError {
// Check not processed yet or errored
return nil
}
// Get the item to retry from the annotation
itemToRetry := checks.GetRetryAnnotation(obj)
if itemToRetry == "" {
// No item to retry, nothing to do
return nil
}
c, ok := obj.(*advisorv0alpha1.Check)
if !ok {
return fmt.Errorf("invalid object type")
}
// Get the items to check
item, err := check.Item(ctx, itemToRetry)
if err != nil {
setErr := checks.SetStatusAnnotation(ctx, client, obj, checks.StatusAnnotationError)
if setErr != nil {
return setErr
}
return fmt.Errorf("error initializing check: %w", err)
}
// Run the steps
steps := check.Steps()
failures, err := runStepsInParallel(ctx, &c.Spec, steps, []any{item})
if err != nil {
setErr := checks.SetStatusAnnotation(ctx, client, obj, checks.StatusAnnotationError)
if setErr != nil {
return setErr
}
return fmt.Errorf("error running steps: %w", err)
}
// Pull failures from the report for the items to retry
c.CheckStatus.Report.Failures = slices.DeleteFunc(c.CheckStatus.Report.Failures, func(f advisorv0alpha1.CheckReportFailure) bool {
if f.ItemID == itemToRetry {
for _, newFailure := range failures {
if newFailure.StepID == f.StepID {
// Same failure found, keep it
return false
}
}
// Failure no longer found, remove it
return true
}
// Failure not in the list of items to retry, keep it
return false
})
// Delete the retry annotation to mark the check as processed
annotations := obj.GetAnnotations()
delete(annotations, checks.RetryAnnotation)
return client.PatchInto(ctx, obj.GetStaticMetadata().Identifier(), resource.PatchRequest{
Operations: []resource.PatchOperation{{
Operation: resource.PatchOpAdd,
Path: "/status/report",
Value: c.CheckStatus.Report,
}, {
Operation: resource.PatchOpAdd,
Path: "/metadata/annotations",
Value: annotations,
}},
}, resource.PatchOptions{}, obj)
}
func runStepsInParallel(ctx context.Context, spec *advisorv0alpha1.CheckSpec, steps []checks.Step, items []any) ([]advisorv0alpha1.CheckReportFailure, error) {
reportFailures := []advisorv0alpha1.CheckReportFailure{}
var internalErr error

View File

@ -129,6 +129,78 @@ func TestProcessCheck_RunError(t *testing.T) {
assert.Equal(t, "error", obj.GetAnnotations()[checks.StatusAnnotation])
}
func TestProcessCheckRetry_NoRetry(t *testing.T) {
obj := &advisorv0alpha1.Check{}
obj.SetAnnotations(map[string]string{})
meta, err := utils.MetaAccessor(obj)
if err != nil {
t.Fatal(err)
}
meta.SetCreatedBy("user:1")
client := &mockClient{}
ctx := context.TODO()
check := &mockCheck{}
err = processCheckRetry(ctx, client, obj, check)
assert.NoError(t, err)
}
func TestProcessCheckRetry_RetryError(t *testing.T) {
obj := &advisorv0alpha1.Check{}
obj.SetAnnotations(map[string]string{
checks.RetryAnnotation: "item",
checks.StatusAnnotation: "processed",
})
meta, err := utils.MetaAccessor(obj)
if err != nil {
t.Fatal(err)
}
meta.SetCreatedBy("user:1")
client := &mockClient{}
ctx := context.TODO()
check := &mockCheck{
items: []any{"item"},
err: errors.New("retry error"),
}
err = processCheckRetry(ctx, client, obj, check)
assert.Error(t, err)
assert.Equal(t, "error", obj.GetAnnotations()[checks.StatusAnnotation])
}
func TestProcessCheckRetry_Success(t *testing.T) {
obj := &advisorv0alpha1.Check{}
obj.SetAnnotations(map[string]string{
checks.RetryAnnotation: "item",
checks.StatusAnnotation: "processed",
})
obj.CheckStatus.Report.Failures = []advisorv0alpha1.CheckReportFailure{
{
ItemID: "item",
StepID: "step",
},
}
meta, err := utils.MetaAccessor(obj)
if err != nil {
t.Fatal(err)
}
meta.SetCreatedBy("user:1")
client := &mockClient{}
ctx := context.TODO()
check := &mockCheck{
items: []any{"item"},
}
err = processCheckRetry(ctx, client, obj, check)
assert.NoError(t, err)
assert.Equal(t, "processed", obj.GetAnnotations()[checks.StatusAnnotation])
assert.Empty(t, obj.GetAnnotations()[checks.RetryAnnotation])
assert.Empty(t, obj.CheckStatus.Report.Failures)
}
type mockClient struct {
resource.Client
lastValue any
@ -152,6 +224,10 @@ func (m *mockCheck) Items(ctx context.Context) ([]any, error) {
return m.items, nil
}
func (m *mockCheck) Item(ctx context.Context, id string) (any, error) {
return m.items[0], nil
}
func (m *mockCheck) Steps() []checks.Step {
return []checks.Step{
&mockStep{err: m.err},

View File

@ -1825,6 +1825,7 @@
"severity",
"stepID",
"item",
"itemID",
"links"
],
"properties": {
@ -1833,6 +1834,11 @@
"type": "string",
"default": ""
},
"itemID": {
"description": "ID of the item that failed",
"type": "string",
"default": ""
},
"links": {
"description": "Links to actions that can be taken to resolve the failure",
"type": "array",

View File

@ -355,6 +355,8 @@ export type CheckErrorLink = {
export type CheckReportFailure = {
/** Human readable identifier of the item that failed */
item: string;
/** ID of the item that failed */
itemID: string;
/** Links to actions that can be taken to resolve the failure */
links: CheckErrorLink[];
/** Severity of the failure */

View File

@ -1,4 +1,28 @@
import { generatedAPI } from './endpoints.gen';
export const advisorAPI = generatedAPI;
export const { useGetCheckQuery, useCreateCheckMutation, useDeleteCheckMutation, useUpdateCheckMutation } = advisorAPI;
export const advisorAPI = generatedAPI.enhanceEndpoints({
endpoints: {
// Need to mutate the generated query to set the Content-Type header correctly
updateCheck: (endpointDefinition) => {
const originalQuery = endpointDefinition.query;
if (originalQuery) {
endpointDefinition.query = (requestOptions) => ({
...originalQuery(requestOptions),
headers: {
'Content-Type': 'application/json-patch+json',
},
body: JSON.stringify(requestOptions.patch),
});
}
},
},
});
export const {
useGetCheckQuery,
useListCheckQuery,
useCreateCheckMutation,
useDeleteCheckMutation,
useUpdateCheckMutation,
useListCheckTypeQuery,
} = advisorAPI;
export { type Check, type CheckType } from './endpoints.gen'; // eslint-disable-line