diff --git a/apps/advisor/pkg/app/checkscheduler/checkscheduler.go b/apps/advisor/pkg/app/checkscheduler/checkscheduler.go index 2bcb8d43f19..cdca40cbd77 100644 --- a/apps/advisor/pkg/app/checkscheduler/checkscheduler.go +++ b/apps/advisor/pkg/app/checkscheduler/checkscheduler.go @@ -14,6 +14,7 @@ import ( advisorv0alpha1 "github.com/grafana/grafana/apps/advisor/pkg/apis/advisor/v0alpha1" "github.com/grafana/grafana/apps/advisor/pkg/app/checkregistry" "github.com/grafana/grafana/apps/advisor/pkg/app/checks" + "github.com/grafana/grafana/pkg/infra/log" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/klog/v2" ) @@ -30,6 +31,7 @@ type Runner struct { evaluationInterval time.Duration maxHistory int namespace string + log log.Logger } // NewRunner creates a new Runner. @@ -66,22 +68,25 @@ func New(cfg app.Config) (app.Runnable, error) { evaluationInterval: evalInterval, maxHistory: maxHistory, namespace: namespace, + log: log.New("advisor.checkscheduler"), }, nil } func (r *Runner) Run(ctx context.Context) error { lastCreated, err := r.checkLastCreated(ctx) if err != nil { - return err - } - - // do an initial creation if necessary - if lastCreated.IsZero() { - err = r.createChecks(ctx) - if err != nil { - klog.Error("Error creating new check reports", "error", err) - } else { - lastCreated = time.Now() + r.log.Error("Error getting last check creation time", "error", err) + // Wait for interval to create the next scheduled check + lastCreated = time.Now() + } else { + // do an initial creation if necessary + if lastCreated.IsZero() { + err = r.createChecks(ctx) + if err != nil { + klog.Error("Error creating new check reports", "error", err) + } else { + lastCreated = time.Now() + } } } diff --git a/apps/advisor/pkg/app/checkscheduler/checkscheduler_test.go b/apps/advisor/pkg/app/checkscheduler/checkscheduler_test.go index 4fd5876b9ff..23ce113df4a 100644 --- a/apps/advisor/pkg/app/checkscheduler/checkscheduler_test.go +++ b/apps/advisor/pkg/app/checkscheduler/checkscheduler_test.go @@ -11,28 +11,35 @@ import ( "github.com/grafana/grafana-app-sdk/resource" advisorv0alpha1 "github.com/grafana/grafana/apps/advisor/pkg/apis/advisor/v0alpha1" "github.com/grafana/grafana/apps/advisor/pkg/app/checks" + "github.com/grafana/grafana/pkg/infra/log" "github.com/stretchr/testify/assert" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -func TestRunner_Run_ErrorOnList(t *testing.T) { - mockCheckService := &MockCheckService{} - mockClient := &MockClient{ - listFunc: func(ctx context.Context, namespace string, options resource.ListOptions) (resource.ListObject, error) { - return nil, errors.New("list error") - }, - createFunc: func(ctx context.Context, id resource.Identifier, obj resource.Object, opts resource.CreateOptions) (resource.Object, error) { - return &advisorv0alpha1.Check{}, nil - }, - } +func TestRunner_Run(t *testing.T) { + t.Run("does not crash when error on list", func(t *testing.T) { + mockCheckService := &MockCheckService{} + mockClient := &MockClient{ + listFunc: func(ctx context.Context, namespace string, options resource.ListOptions) (resource.ListObject, error) { + return nil, errors.New("list error") + }, + createFunc: func(ctx context.Context, id resource.Identifier, obj resource.Object, opts resource.CreateOptions) (resource.Object, error) { + return &advisorv0alpha1.Check{}, nil + }, + } - runner := &Runner{ - checkRegistry: mockCheckService, - client: mockClient, - } + runner := &Runner{ + checkRegistry: mockCheckService, + client: mockClient, + log: log.NewNopLogger(), + evaluationInterval: 1 * time.Hour, + } - err := runner.Run(context.Background()) - assert.Error(t, err) + ctx, cancel := context.WithCancel(context.Background()) + cancel() + err := runner.Run(ctx) + assert.ErrorAs(t, err, &context.Canceled) + }) } func TestRunner_checkLastCreated_ErrorOnList(t *testing.T) { @@ -44,6 +51,7 @@ func TestRunner_checkLastCreated_ErrorOnList(t *testing.T) { runner := &Runner{ client: mockClient, + log: log.NewNopLogger(), } lastCreated, err := runner.checkLastCreated(context.Background()) @@ -68,6 +76,7 @@ func TestRunner_createChecks_ErrorOnCreate(t *testing.T) { runner := &Runner{ checkRegistry: mockCheckService, client: mockClient, + log: log.NewNopLogger(), } err := runner.createChecks(context.Background()) @@ -91,6 +100,7 @@ func TestRunner_createChecks_Success(t *testing.T) { runner := &Runner{ checkRegistry: mockCheckService, client: mockClient, + log: log.NewNopLogger(), } err := runner.createChecks(context.Background()) @@ -106,6 +116,7 @@ func TestRunner_cleanupChecks_ErrorOnList(t *testing.T) { runner := &Runner{ client: mockClient, + log: log.NewNopLogger(), } err := runner.cleanupChecks(context.Background()) @@ -126,6 +137,7 @@ func TestRunner_cleanupChecks_WithinMax(t *testing.T) { runner := &Runner{ client: mockClient, + log: log.NewNopLogger(), } err := runner.cleanupChecks(context.Background()) @@ -155,6 +167,7 @@ func TestRunner_cleanupChecks_ErrorOnDelete(t *testing.T) { runner := &Runner{ client: mockClient, maxHistory: defaultMaxHistory, + log: log.NewNopLogger(), } err := runner.cleanupChecks(context.Background()) assert.ErrorContains(t, err, "delete error") @@ -190,6 +203,7 @@ func TestRunner_cleanupChecks_Success(t *testing.T) { runner := &Runner{ client: mockClient, maxHistory: defaultMaxHistory, + log: log.NewNopLogger(), } err := runner.cleanupChecks(context.Background()) assert.NoError(t, err) diff --git a/apps/advisor/pkg/app/checktyperegisterer/checktyperegisterer.go b/apps/advisor/pkg/app/checktyperegisterer/checktyperegisterer.go index 2ed38e51ecb..3e563b3f88f 100644 --- a/apps/advisor/pkg/app/checktyperegisterer/checktyperegisterer.go +++ b/apps/advisor/pkg/app/checktyperegisterer/checktyperegisterer.go @@ -3,6 +3,7 @@ package checktyperegisterer import ( "context" "fmt" + "time" "github.com/grafana/grafana-app-sdk/app" "github.com/grafana/grafana-app-sdk/k8s" @@ -10,6 +11,7 @@ import ( advisorv0alpha1 "github.com/grafana/grafana/apps/advisor/pkg/apis/advisor/v0alpha1" "github.com/grafana/grafana/apps/advisor/pkg/app/checkregistry" "github.com/grafana/grafana/apps/advisor/pkg/app/checks" + "github.com/grafana/grafana/pkg/infra/log" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -21,6 +23,9 @@ type Runner struct { checkRegistry checkregistry.CheckService client resource.Client namespace string + log log.Logger + retryAttempts int + retryDelay time.Duration } // NewRunner creates a new Runner. @@ -47,9 +52,32 @@ func New(cfg app.Config) (app.Runnable, error) { checkRegistry: checkRegistry, client: client, namespace: namespace, + log: log.New("advisor.checktyperegisterer"), + retryAttempts: 3, + retryDelay: time.Second * 5, }, nil } +func (r *Runner) createOrUpdate(ctx context.Context, obj resource.Object) error { + id := obj.GetStaticMetadata().Identifier() + _, err := r.client.Create(ctx, id, obj, resource.CreateOptions{}) + if err != nil { + if errors.IsAlreadyExists(err) { + // Already exists, update + r.log.Debug("Check type already exists, updating", "identifier", id) + _, err = r.client.Update(ctx, id, obj, resource.UpdateOptions{}) + if err != nil { + // Ignore the error, it's probably due to a race condition + r.log.Error("Error updating check type", "error", err) + } + return nil + } + return err + } + r.log.Debug("Check type registered successfully", "identifier", id) + return nil +} + func (r *Runner) Run(ctx context.Context) error { for _, t := range r.checkRegistry.Checks() { steps := t.Steps() @@ -72,19 +100,19 @@ func (r *Runner) Run(ctx context.Context) error { Steps: stepTypes, }, } - id := obj.GetStaticMetadata().Identifier() - _, err := r.client.Create(ctx, id, obj, resource.CreateOptions{}) - if err != nil { - if errors.IsAlreadyExists(err) { - // Already exists, update - _, err = r.client.Update(ctx, id, obj, resource.UpdateOptions{}) - if err != nil { - return err + for i := 0; i < r.retryAttempts; i++ { + err := r.createOrUpdate(ctx, obj) + if err != nil { + r.log.Error("Error creating check type, retrying", "error", err, "attempt", i+1) + if i == r.retryAttempts-1 { + r.log.Error("Unable to register check type") } else { - continue + time.Sleep(r.retryDelay) } + continue } - return err + r.log.Debug("Check type registered successfully", "check_type", t.ID()) + break } } return nil diff --git a/apps/advisor/pkg/app/checktyperegisterer/checktyperegisterer_test.go b/apps/advisor/pkg/app/checktyperegisterer/checktyperegisterer_test.go index 97fc7742504..4d4bab9adf3 100644 --- a/apps/advisor/pkg/app/checktyperegisterer/checktyperegisterer_test.go +++ b/apps/advisor/pkg/app/checktyperegisterer/checktyperegisterer_test.go @@ -9,6 +9,7 @@ import ( "github.com/grafana/grafana-app-sdk/resource" advisorv0alpha1 "github.com/grafana/grafana/apps/advisor/pkg/apis/advisor/v0alpha1" "github.com/grafana/grafana/apps/advisor/pkg/app/checks" + "github.com/grafana/grafana/pkg/infra/log" k8sErrs "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime/schema" ) @@ -117,7 +118,10 @@ func TestCheckTypesRegisterer_Run(t *testing.T) { createFunc: tt.createFunc, updateFunc: tt.updateFunc, }, - namespace: "custom-namespace", + namespace: "custom-namespace", + log: log.New("test"), + retryAttempts: 1, + retryDelay: 0, } err := r.Run(context.Background()) if err != nil {