CloudMigrations: Bulk update local resources (#96002)

* wip

* make tests pass

* get all tests passing

* fixes

* some small cleanup

* fix test

* convert delimiter keys to struct keys

* dont execute empty sql statement

* remove printlns

* fix unit test

* a bit more cleanup

* whoops
This commit is contained in:
Michael Mandrus
2025-01-10 14:42:18 -05:00
committed by GitHub
parent 99a0eb825d
commit c9d22f06c3
9 changed files with 139 additions and 63 deletions

View File

@ -615,7 +615,7 @@ func (s *Service) GetSnapshot(ctx context.Context, query cloudmigration.GetSnaps
UID: snapshot.UID, UID: snapshot.UID,
SessionID: sessionUid, SessionID: sessionUid,
Status: localStatus, Status: localStatus,
Resources: resources, CloudResourcesToUpdate: resources,
}); err != nil { }); err != nil {
return nil, fmt.Errorf("error updating snapshot status: %w", err) return nil, fmt.Errorf("error updating snapshot status: %w", err)
} }

View File

@ -114,11 +114,19 @@ func Test_GetSnapshotStatusFromGMS(t *testing.T) {
assert.Equal(t, cloudmigration.SnapshotStatusCreating, snapshot.Status) assert.Equal(t, cloudmigration.SnapshotStatusCreating, snapshot.Status)
assert.Never(t, func() bool { return gmsClientFake.GetSnapshotStatusCallCount() > 0 }, time.Second, 10*time.Millisecond) assert.Never(t, func() bool { return gmsClientFake.GetSnapshotStatusCallCount() > 0 }, time.Second, 10*time.Millisecond)
// Make the status pending processing and ensure GMS gets called // Make the status pending processing to ensure GMS gets called and initialize a resource
err = s.store.UpdateSnapshot(ctx, cloudmigration.UpdateSnapshotCmd{ err = s.store.UpdateSnapshot(ctx, cloudmigration.UpdateSnapshotCmd{
UID: uid, UID: uid,
SessionID: sess.UID, SessionID: sess.UID,
Status: cloudmigration.SnapshotStatusPendingProcessing, Status: cloudmigration.SnapshotStatusPendingProcessing,
LocalResourcesToCreate: []cloudmigration.CloudMigrationResource{
{
Name: "A name",
Type: cloudmigration.DatasourceDataType,
RefID: "A",
Status: cloudmigration.ItemStatusPending,
},
},
}) })
assert.NoError(t, err) assert.NoError(t, err)

View File

@ -572,7 +572,7 @@ func (s *Service) buildSnapshot(ctx context.Context, signedInUser *user.SignedIn
UID: snapshotMeta.UID, UID: snapshotMeta.UID,
SessionID: snapshotMeta.SessionUID, SessionID: snapshotMeta.SessionUID,
Status: cloudmigration.SnapshotStatusPendingUpload, Status: cloudmigration.SnapshotStatusPendingUpload,
Resources: localSnapshotResource, LocalResourcesToCreate: localSnapshotResource,
}); err != nil { }); err != nil {
return err return err
} }
@ -714,7 +714,7 @@ func (s *Service) updateSnapshotWithRetries(ctx context.Context, cmd cloudmigrat
} }
return retryer.FuncComplete, nil return retryer.FuncComplete, nil
}, maxRetries, time.Millisecond*10, time.Second*5); err != nil { }, maxRetries, time.Millisecond*10, time.Second*5); err != nil {
s.log.Error("failed to update snapshot status", "snapshotUid", cmd.UID, "status", cmd.Status, "num_resources", len(cmd.Resources), "error", err.Error()) s.log.Error("failed to update snapshot status", "snapshotUid", cmd.UID, "status", cmd.Status, "num_local_resources", len(cmd.LocalResourcesToCreate), "num_cloud_resources", len(cmd.CloudResourcesToUpdate), "error", err.Error())
return fmt.Errorf("failed to update snapshot status: %w", err) return fmt.Errorf("failed to update snapshot status: %w", err)
} }
return nil return nil

View File

@ -10,19 +10,10 @@ type store interface {
CreateMigrationSession(ctx context.Context, session cloudmigration.CloudMigrationSession) (*cloudmigration.CloudMigrationSession, error) CreateMigrationSession(ctx context.Context, session cloudmigration.CloudMigrationSession) (*cloudmigration.CloudMigrationSession, error)
GetMigrationSessionByUID(ctx context.Context, orgID int64, uid string) (*cloudmigration.CloudMigrationSession, error) GetMigrationSessionByUID(ctx context.Context, orgID int64, uid string) (*cloudmigration.CloudMigrationSession, error)
GetCloudMigrationSessionList(ctx context.Context, orgID int64) ([]*cloudmigration.CloudMigrationSession, error) GetCloudMigrationSessionList(ctx context.Context, orgID int64) ([]*cloudmigration.CloudMigrationSession, error)
// DeleteMigrationSessionByUID deletes the migration session, and all the related snapshot and resources.
// the work is done in a transaction.
DeleteMigrationSessionByUID(ctx context.Context, orgID int64, uid string) (*cloudmigration.CloudMigrationSession, []cloudmigration.CloudMigrationSnapshot, error) DeleteMigrationSessionByUID(ctx context.Context, orgID int64, uid string) (*cloudmigration.CloudMigrationSession, []cloudmigration.CloudMigrationSnapshot, error)
CreateSnapshot(ctx context.Context, snapshot cloudmigration.CloudMigrationSnapshot) (string, error) CreateSnapshot(ctx context.Context, snapshot cloudmigration.CloudMigrationSnapshot) (string, error)
UpdateSnapshot(ctx context.Context, snapshot cloudmigration.UpdateSnapshotCmd) error UpdateSnapshot(ctx context.Context, snapshot cloudmigration.UpdateSnapshotCmd) error
GetSnapshotByUID(ctx context.Context, orgID int64, sessUid, id string, resultPage int, resultLimit int) (*cloudmigration.CloudMigrationSnapshot, error) GetSnapshotByUID(ctx context.Context, orgID int64, sessUid, id string, resultPage int, resultLimit int) (*cloudmigration.CloudMigrationSnapshot, error)
GetSnapshotList(ctx context.Context, query cloudmigration.ListSnapshotsQuery) ([]cloudmigration.CloudMigrationSnapshot, error) GetSnapshotList(ctx context.Context, query cloudmigration.ListSnapshotsQuery) ([]cloudmigration.CloudMigrationSnapshot, error)
// Deleted because were not used externally
// - DeleteSnapshot(ctx context.Context, snapshotUid string) error
// - CreateUpdateSnapshotResources(ctx context.Context, snapshotUid string, resources []cloudmigration.CloudMigrationResource) error
// - GetSnapshotResources(ctx context.Context, snapshotUid string, page int, limit int) ([]cloudmigration.CloudMigrationResource, error)
// - GetSnapshotResourceStats(ctx context.Context, snapshotUid string) (*cloudmigration.SnapshotResourceStats, error)
// - DeleteSnapshotResources(ctx context.Context, snapshotUid string) error
} }

View File

@ -4,6 +4,7 @@ import (
"context" "context"
"encoding/base64" "encoding/base64"
"fmt" "fmt"
"strings"
"time" "time"
"github.com/grafana/grafana/pkg/infra/db" "github.com/grafana/grafana/pkg/infra/db"
@ -94,6 +95,7 @@ func (ss *sqlStore) GetCloudMigrationSessionList(ctx context.Context, orgID int6
return migrations, nil return migrations, nil
} }
// DeleteMigrationSessionByUID deletes the migration session, and all the related snapshot and resources the work is done in a transaction.
func (ss *sqlStore) DeleteMigrationSessionByUID(ctx context.Context, orgID int64, uid string) (*cloudmigration.CloudMigrationSession, []cloudmigration.CloudMigrationSnapshot, error) { func (ss *sqlStore) DeleteMigrationSessionByUID(ctx context.Context, orgID int64, uid string) (*cloudmigration.CloudMigrationSession, []cloudmigration.CloudMigrationSnapshot, error) {
var c cloudmigration.CloudMigrationSession var c cloudmigration.CloudMigrationSession
err := ss.db.WithDbSession(ctx, func(sess *db.Session) error { err := ss.db.WithDbSession(ctx, func(sess *db.Session) error {
@ -127,7 +129,7 @@ func (ss *sqlStore) DeleteMigrationSessionByUID(ctx context.Context, orgID int64
if err != nil { if err != nil {
return fmt.Errorf("deleting snapshot resource from db: %w", err) return fmt.Errorf("deleting snapshot resource from db: %w", err)
} }
err = ss.deleteSnapshot(ctx, orgID, snapshot.UID) err = ss.deleteSnapshot(ctx, snapshot.UID)
if err != nil { if err != nil {
return fmt.Errorf("deleting snapshot from db: %w", err) return fmt.Errorf("deleting snapshot from db: %w", err)
} }
@ -211,19 +213,26 @@ func (ss *sqlStore) UpdateSnapshot(ctx context.Context, update cloudmigration.Up
} }
} }
// Update resources if set // If local resources are set, it means we have to create them for the first time
if len(update.Resources) > 0 { if len(update.LocalResourcesToCreate) > 0 {
if err := ss.createUpdateSnapshotResources(ctx, update.UID, update.Resources); err != nil { if err := ss.CreateSnapshotResources(ctx, update.UID, update.LocalResourcesToCreate); err != nil {
return err return err
} }
} }
// If cloud resources are set, it means we have to update our resource local state
if len(update.CloudResourcesToUpdate) > 0 {
if err := ss.UpdateSnapshotResources(ctx, update.UID, update.CloudResourcesToUpdate); err != nil {
return err
}
}
return nil return nil
}) })
return err return err
} }
func (ss *sqlStore) deleteSnapshot(ctx context.Context, orgID int64, snapshotUid string) error { func (ss *sqlStore) deleteSnapshot(ctx context.Context, snapshotUid string) error {
return ss.db.WithDbSession(ctx, func(sess *sqlstore.DBSession) error { return ss.db.WithDbSession(ctx, func(sess *sqlstore.DBSession) error {
_, err := sess.Delete(cloudmigration.CloudMigrationSnapshot{ _, err := sess.Delete(cloudmigration.CloudMigrationSnapshot{
UID: snapshotUid, UID: snapshotUid,
@ -316,30 +325,90 @@ func (ss *sqlStore) GetSnapshotList(ctx context.Context, query cloudmigration.Li
return snapshots, nil return snapshots, nil
} }
// CreateUpdateSnapshotResources either updates a migration resource for a snapshot, or creates it if it does not exist // CreateSnapshotResources initializes the local state of a resources belonging to a snapshot
// If the uid is not known, it uses snapshot_uid + resource_uid as a lookup func (ss *sqlStore) CreateSnapshotResources(ctx context.Context, snapshotUid string, resources []cloudmigration.CloudMigrationResource) error {
func (ss *sqlStore) createUpdateSnapshotResources(ctx context.Context, snapshotUid string, resources []cloudmigration.CloudMigrationResource) error { for i := 0; i < len(resources); i++ {
return ss.db.InTransaction(ctx, func(ctx context.Context) error { resources[i].UID = util.GenerateShortUID()
sql := "UPDATE cloud_migration_resource SET status=?, error_string=?, error_code=? WHERE uid=? OR (snapshot_uid=? AND resource_uid=?)" // ensure snapshot_uids are consistent so that we can use in conjunction with refID for lookup later
resources[i].SnapshotUID = snapshotUid
}
err := ss.db.WithDbSession(ctx, func(sess *sqlstore.DBSession) error { err := ss.db.WithDbSession(ctx, func(sess *sqlstore.DBSession) error {
_, err := sess.Insert(resources)
if err != nil {
return err
}
return nil
})
if err != nil {
return fmt.Errorf("creating resources: %w", err)
}
return nil
}
// UpdateSnapshotResources updates a migration resource for a snapshot, using snapshot_uid + resource_uid as a lookup
// It does preprocessing on the results in order to minimize the sql queries executed.
func (ss *sqlStore) UpdateSnapshotResources(ctx context.Context, snapshotUid string, resources []cloudmigration.CloudMigrationResource) error {
// refIds of resources that migrated successfully in order to update in bulk
okIds := make([]any, 0, len(resources))
// group any failed resources by errCode and errStr
type errId struct {
errCode cloudmigration.ResourceErrorCode
errStr string
}
errorIds := make(map[errId][]any)
for _, r := range resources { for _, r := range resources {
// try an update first if r.Status == cloudmigration.ItemStatusOK {
result, err := sess.Exec(sql, r.Status, r.Error, r.ErrorCode, r.UID, snapshotUid, r.RefID) okIds = append(okIds, r.RefID)
if err != nil { } else if r.Status == cloudmigration.ItemStatusError {
key := errId{errCode: r.ErrorCode, errStr: r.Error}
if ids, ok := errorIds[key]; ok {
errorIds[key] = append(ids, r.RefID)
} else {
errorIds[key] = []any{r.RefID}
}
}
}
type statement struct {
sql string
args []any
}
// Prepare a sql statement for all of the OK statuses
var okUpdateStatement *statement
if len(okIds) > 0 {
okUpdateStatement = &statement{
sql: fmt.Sprintf("UPDATE cloud_migration_resource SET status=? WHERE snapshot_uid=? AND resource_uid IN (?%s)", strings.Repeat(", ?", len(okIds)-1)),
args: append([]any{cloudmigration.ItemStatusOK, snapshotUid}, okIds...),
}
}
// Prepare however many sql statements are necessary for the error statuses
errorStatements := make([]statement, 0, len(errorIds))
for k, ids := range errorIds {
errorStatements = append(errorStatements, statement{
sql: fmt.Sprintf("UPDATE cloud_migration_resource SET status=?, error_code=?, error_string=? WHERE snapshot_uid=? AND resource_uid IN (?%s)", strings.Repeat(", ?", len(ids)-1)),
args: append([]any{cloudmigration.ItemStatusError, k.errCode, k.errStr, snapshotUid}, ids...),
})
}
// Execute the minimum number of required statements!
return ss.db.InTransaction(ctx, func(ctx context.Context) error {
err := ss.db.WithDbSession(ctx, func(sess *sqlstore.DBSession) error {
if okUpdateStatement != nil {
if _, err := sess.Exec(append([]any{okUpdateStatement.sql}, okUpdateStatement.args...)...); err != nil {
return err return err
} }
// if this had no effect, assign a uid and insert instead
n, err := result.RowsAffected()
if err != nil {
return err
} else if n == 0 {
r.UID = util.GenerateShortUID()
// ensure snapshot_uids are consistent so that we can use them to query when uid isn't known
r.SnapshotUID = snapshotUid
_, err := sess.Insert(r)
if err != nil {
return err
} }
for _, q := range errorStatements {
if _, err := sess.Exec(append([]any{q.sql}, q.args...)...); err != nil {
return err
} }
} }
return nil return nil
@ -364,7 +433,7 @@ func (ss *sqlStore) getSnapshotResources(ctx context.Context, snapshotUid string
err := ss.db.WithDbSession(ctx, func(sess *db.Session) error { err := ss.db.WithDbSession(ctx, func(sess *db.Session) error {
offset := (page - 1) * limit offset := (page - 1) * limit
sess.Limit(limit, offset) sess.Limit(limit, offset)
return sess.Find(&resources, &cloudmigration.CloudMigrationResource{ return sess.OrderBy("id ASC").Find(&resources, &cloudmigration.CloudMigrationResource{
SnapshotUID: snapshotUid, SnapshotUID: snapshotUid,
}) })
}) })

View File

@ -155,7 +155,7 @@ func Test_SnapshotManagement(t *testing.T) {
require.Equal(t, *snapshot, snapshots[0]) require.Equal(t, *snapshot, snapshots[0])
// delete snapshot // delete snapshot
err = s.deleteSnapshot(ctx, 1, snapshotUid) err = s.deleteSnapshot(ctx, snapshotUid)
require.NoError(t, err) require.NoError(t, err)
// now we expect not to find the snapshot // now we expect not to find the snapshot
@ -174,16 +174,25 @@ func Test_SnapshotResources(t *testing.T) {
resources, err := s.getSnapshotResources(ctx, "poiuy", 0, 100) resources, err := s.getSnapshotResources(ctx, "poiuy", 0, 100)
assert.NoError(t, err) assert.NoError(t, err)
assert.Len(t, resources, 3) assert.Len(t, resources, 3)
for _, r := range resources {
if r.RefID == "ejcx4d" {
assert.Equal(t, cloudmigration.ItemStatusError, r.Status)
break
}
}
// create a new resource and update an existing resource // create a new resource
err = s.createUpdateSnapshotResources(ctx, "poiuy", []cloudmigration.CloudMigrationResource{ err = s.CreateSnapshotResources(ctx, "poiuy", []cloudmigration.CloudMigrationResource{
{ {
Type: cloudmigration.DatasourceDataType, Type: cloudmigration.DatasourceDataType,
RefID: "mi39fj", RefID: "mi39fj",
Status: cloudmigration.ItemStatusOK, Status: cloudmigration.ItemStatusOK,
}, },
})
assert.NoError(t, err)
err = s.UpdateSnapshotResources(ctx, "poiuy", []cloudmigration.CloudMigrationResource{
{ {
UID: "qwerty", RefID: "ejcx4d",
Status: cloudmigration.ItemStatusOK, Status: cloudmigration.ItemStatusOK,
}, },
}) })
@ -193,16 +202,16 @@ func Test_SnapshotResources(t *testing.T) {
resources, err = s.getSnapshotResources(ctx, "poiuy", 0, 100) resources, err = s.getSnapshotResources(ctx, "poiuy", 0, 100)
assert.NoError(t, err) assert.NoError(t, err)
assert.Len(t, resources, 4) assert.Len(t, resources, 4)
// ensure existing resource was updated // ensure existing resource was updated from ERROR
for _, r := range resources { for _, r := range resources {
if r.UID == "querty" { if r.RefID == "ejcx4d" {
assert.Equal(t, cloudmigration.ItemStatusOK, r.Status) assert.Equal(t, cloudmigration.ItemStatusOK, r.Status)
break break
} }
} }
// ensure a new one was made // ensure a new one was made
for _, r := range resources { for _, r := range resources {
if r.UID == "mi39fj" { if r.RefID == "mi39fj" {
assert.Equal(t, cloudmigration.ItemStatusOK, r.Status) assert.Equal(t, cloudmigration.ItemStatusOK, r.Status)
break break
} }

View File

@ -105,12 +105,6 @@ func (c *memoryClientImpl) GetSnapshotStatus(ctx context.Context, session cloudm
RefID: "folder1", RefID: "folder1",
Status: cloudmigration.ItemStatusOK, Status: cloudmigration.ItemStatusOK,
}, },
{
Type: cloudmigration.DatasourceDataType,
RefID: "ds2",
Status: cloudmigration.ItemStatusWarning,
Error: "Only core data sources are supported. Please ensure the plugin is installed on the cloud stack.",
},
}, },
} }

View File

@ -67,7 +67,7 @@ const (
type CloudMigrationResource struct { type CloudMigrationResource struct {
ID int64 `xorm:"pk autoincr 'id'"` ID int64 `xorm:"pk autoincr 'id'"`
UID string `xorm:"uid"` UID string `xorm:"uid" json:"uid"`
Name string `xorm:"name" json:"name"` Name string `xorm:"name" json:"name"`
Type MigrateDataType `xorm:"resource_type" json:"type"` Type MigrateDataType `xorm:"resource_type" json:"type"`
@ -98,9 +98,10 @@ const (
type ItemStatus string type ItemStatus string
const ( const (
// Returned by GMS
ItemStatusOK ItemStatus = "OK" ItemStatusOK ItemStatus = "OK"
ItemStatusWarning ItemStatus = "WARNING"
ItemStatusError ItemStatus = "ERROR" ItemStatusError ItemStatus = "ERROR"
// Used by default while awaiting GMS results
ItemStatusPending ItemStatus = "PENDING" ItemStatusPending ItemStatus = "PENDING"
) )
@ -180,7 +181,11 @@ type UpdateSnapshotCmd struct {
UID string UID string
SessionID string SessionID string
Status SnapshotStatus Status SnapshotStatus
Resources []CloudMigrationResource
// LocalResourcesToCreate represents the local state of a resource before it has been uploaded to GMS
LocalResourcesToCreate []CloudMigrationResource
// CloudResourcesToUpdate represents resource state from GMS, to be merged with the local state
CloudResourcesToUpdate []CloudMigrationResource
} }
// access token // access token